]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixgbe/ix_txrx.c
Update LLDB snapshot to upstream r241361
[FreeBSD/FreeBSD.git] / sys / dev / ixgbe / ix_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41
42 #include "ixgbe.h"
43
44 #ifdef  RSS
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
47 #endif
48
49 #ifdef DEV_NETMAP
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
53
54 extern int ix_crcstrip;
55 #endif
56
57 /*
58 ** HW RSC control:
59 **  this feature only works with
60 **  IPv4, and only on 82599 and later.
61 **  Also this will cause IP forwarding to
62 **  fail and that can't be controlled by
63 **  the stack as LRO can. For all these
64 **  reasons I've deemed it best to leave
65 **  this off and not bother with a tuneable
66 **  interface, this would need to be compiled
67 **  to enable.
68 */
69 static bool ixgbe_rsc_enable = FALSE;
70
71 #ifdef IXGBE_FDIR
72 /*
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
77 **
78 ** This feature can be disabled by
79 ** setting this to 0.
80 */
81 static int atr_sample_rate = 20;
82 #endif
83
84 /* Shared PCI config read/write */
85 inline u16
86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
87 {
88         u16 value;
89
90         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
91             reg, 2);
92
93         return (value);
94 }
95
96 inline void
97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
98 {
99         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
100             reg, value, 2);
101
102         return;
103 }
104
105 /*********************************************************************
106  *  Local Function prototypes
107  *********************************************************************/
108 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
109 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
110 static int      ixgbe_setup_receive_ring(struct rx_ring *);
111 static void     ixgbe_free_receive_buffers(struct rx_ring *);
112
113 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
114 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
116 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
117                     struct mbuf *, u32 *, u32 *);
118 static int      ixgbe_tso_setup(struct tx_ring *,
119                     struct mbuf *, u32 *, u32 *);
120 #ifdef IXGBE_FDIR
121 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
122 #endif
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125                     struct mbuf *, u32);
126
127 #ifdef IXGBE_LEGACY_TX
128 /*********************************************************************
129  *  Transmit entry point
130  *
131  *  ixgbe_start is called by the stack to initiate a transmit.
132  *  The driver will remain in this routine as long as there are
133  *  packets to transmit and transmit resources are available.
134  *  In case resources are not available stack is notified and
135  *  the packet is requeued.
136  **********************************************************************/
137
138 void
139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
140 {
141         struct mbuf    *m_head;
142         struct adapter *adapter = txr->adapter;
143
144         IXGBE_TX_LOCK_ASSERT(txr);
145
146         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
147                 return;
148         if (!adapter->link_active)
149                 return;
150
151         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
152                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
153                         break;
154
155                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
156                 if (m_head == NULL)
157                         break;
158
159                 if (ixgbe_xmit(txr, &m_head)) {
160                         if (m_head != NULL)
161                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
162                         break;
163                 }
164                 /* Send a copy of the frame to the BPF listener */
165                 ETHER_BPF_MTAP(ifp, m_head);
166         }
167         return;
168 }
169
170 /*
171  * Legacy TX start - called by the stack, this
172  * always uses the first tx ring, and should
173  * not be used with multiqueue tx enabled.
174  */
175 void
176 ixgbe_start(struct ifnet *ifp)
177 {
178         struct adapter *adapter = ifp->if_softc;
179         struct tx_ring  *txr = adapter->tx_rings;
180
181         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
182                 IXGBE_TX_LOCK(txr);
183                 ixgbe_start_locked(txr, ifp);
184                 IXGBE_TX_UNLOCK(txr);
185         }
186         return;
187 }
188
189 #else /* ! IXGBE_LEGACY_TX */
190
191 /*
192 ** Multiqueue Transmit driver
193 **
194 */
195 int
196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
197 {
198         struct adapter  *adapter = ifp->if_softc;
199         struct ix_queue *que;
200         struct tx_ring  *txr;
201         int             i, err = 0;
202 #ifdef  RSS
203         uint32_t bucket_id;
204 #endif
205
206         /*
207          * When doing RSS, map it to the same outbound queue
208          * as the incoming flow would be mapped to.
209          *
210          * If everything is setup correctly, it should be the
211          * same bucket that the current CPU we're on is.
212          */
213 #if __FreeBSD_version < 1100054
214         if (m->m_flags & M_FLOWID) {
215 #else
216         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
217 #endif
218 #ifdef  RSS
219                 if (rss_hash2bucket(m->m_pkthdr.flowid,
220                     M_HASHTYPE_GET(m), &bucket_id) == 0)
221                         /* TODO: spit out something if bucket_id > num_queues? */
222                         i = bucket_id % adapter->num_queues;
223                 else 
224 #endif
225                         i = m->m_pkthdr.flowid % adapter->num_queues;
226         } else
227                 i = curcpu % adapter->num_queues;
228
229         /* Check for a hung queue and pick alternative */
230         if (((1 << i) & adapter->active_queues) == 0)
231                 i = ffsl(adapter->active_queues);
232
233         txr = &adapter->tx_rings[i];
234         que = &adapter->queues[i];
235
236         err = drbr_enqueue(ifp, txr->br, m);
237         if (err)
238                 return (err);
239         if (IXGBE_TX_TRYLOCK(txr)) {
240                 ixgbe_mq_start_locked(ifp, txr);
241                 IXGBE_TX_UNLOCK(txr);
242         } else
243                 taskqueue_enqueue(que->tq, &txr->txq_task);
244
245         return (0);
246 }
247
248 int
249 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
250 {
251         struct adapter  *adapter = txr->adapter;
252         struct mbuf     *next;
253         int             enqueued = 0, err = 0;
254
255         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
256             adapter->link_active == 0)
257                 return (ENETDOWN);
258
259         /* Process the queue */
260 #if __FreeBSD_version < 901504
261         next = drbr_dequeue(ifp, txr->br);
262         while (next != NULL) {
263                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
264                         if (next != NULL)
265                                 err = drbr_enqueue(ifp, txr->br, next);
266 #else
267         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
268                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
269                         if (next == NULL) {
270                                 drbr_advance(ifp, txr->br);
271                         } else {
272                                 drbr_putback(ifp, txr->br, next);
273                         }
274 #endif
275                         break;
276                 }
277 #if __FreeBSD_version >= 901504
278                 drbr_advance(ifp, txr->br);
279 #endif
280                 enqueued++;
281 #if 0 // this is VF-only
282 #if __FreeBSD_version >= 1100036
283                 /*
284                  * Since we're looking at the tx ring, we can check
285                  * to see if we're a VF by examing our tail register
286                  * address.
287                  */
288                 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
289                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
290 #endif
291 #endif
292                 /* Send a copy of the frame to the BPF listener */
293                 ETHER_BPF_MTAP(ifp, next);
294                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
295                         break;
296 #if __FreeBSD_version < 901504
297                 next = drbr_dequeue(ifp, txr->br);
298 #endif
299         }
300
301         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
302                 ixgbe_txeof(txr);
303
304         return (err);
305 }
306
307 /*
308  * Called from a taskqueue to drain queued transmit packets.
309  */
310 void
311 ixgbe_deferred_mq_start(void *arg, int pending)
312 {
313         struct tx_ring *txr = arg;
314         struct adapter *adapter = txr->adapter;
315         struct ifnet *ifp = adapter->ifp;
316
317         IXGBE_TX_LOCK(txr);
318         if (!drbr_empty(ifp, txr->br))
319                 ixgbe_mq_start_locked(ifp, txr);
320         IXGBE_TX_UNLOCK(txr);
321 }
322
323 /*
324  * Flush all ring buffers
325  */
326 void
327 ixgbe_qflush(struct ifnet *ifp)
328 {
329         struct adapter  *adapter = ifp->if_softc;
330         struct tx_ring  *txr = adapter->tx_rings;
331         struct mbuf     *m;
332
333         for (int i = 0; i < adapter->num_queues; i++, txr++) {
334                 IXGBE_TX_LOCK(txr);
335                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
336                         m_freem(m);
337                 IXGBE_TX_UNLOCK(txr);
338         }
339         if_qflush(ifp);
340 }
341 #endif /* IXGBE_LEGACY_TX */
342
343
344 /*********************************************************************
345  *
346  *  This routine maps the mbufs to tx descriptors, allowing the
347  *  TX engine to transmit the packets. 
348  *      - return 0 on success, positive on failure
349  *
350  **********************************************************************/
351
352 static int
353 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
354 {
355         struct adapter  *adapter = txr->adapter;
356         u32             olinfo_status = 0, cmd_type_len;
357         int             i, j, error, nsegs;
358         int             first;
359         bool            remap = TRUE;
360         struct mbuf     *m_head;
361         bus_dma_segment_t segs[adapter->num_segs];
362         bus_dmamap_t    map;
363         struct ixgbe_tx_buf *txbuf;
364         union ixgbe_adv_tx_desc *txd = NULL;
365
366         m_head = *m_headp;
367
368         /* Basic descriptor defines */
369         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
370             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
371
372         if (m_head->m_flags & M_VLANTAG)
373                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
374
375         /*
376          * Important to capture the first descriptor
377          * used because it will contain the index of
378          * the one we tell the hardware to report back
379          */
380         first = txr->next_avail_desc;
381         txbuf = &txr->tx_buffers[first];
382         map = txbuf->map;
383
384         /*
385          * Map the packet for DMA.
386          */
387 retry:
388         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
389             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
390
391         if (__predict_false(error)) {
392                 struct mbuf *m;
393
394                 switch (error) {
395                 case EFBIG:
396                         /* Try it again? - one try */
397                         if (remap == TRUE) {
398                                 remap = FALSE;
399                                 /*
400                                  * XXX: m_defrag will choke on
401                                  * non-MCLBYTES-sized clusters
402                                  */
403                                 m = m_defrag(*m_headp, M_NOWAIT);
404                                 if (m == NULL) {
405                                         adapter->mbuf_defrag_failed++;
406                                         m_freem(*m_headp);
407                                         *m_headp = NULL;
408                                         return (ENOBUFS);
409                                 }
410                                 *m_headp = m;
411                                 goto retry;
412                         } else
413                                 return (error);
414                 case ENOMEM:
415                         txr->no_tx_dma_setup++;
416                         return (error);
417                 default:
418                         txr->no_tx_dma_setup++;
419                         m_freem(*m_headp);
420                         *m_headp = NULL;
421                         return (error);
422                 }
423         }
424
425         /* Make certain there are enough descriptors */
426         if (nsegs > txr->tx_avail - 2) {
427                 txr->no_desc_avail++;
428                 bus_dmamap_unload(txr->txtag, map);
429                 return (ENOBUFS);
430         }
431         m_head = *m_headp;
432
433         /*
434          * Set up the appropriate offload context
435          * this will consume the first descriptor
436          */
437         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
438         if (__predict_false(error)) {
439                 if (error == ENOBUFS)
440                         *m_headp = NULL;
441                 return (error);
442         }
443
444 #ifdef IXGBE_FDIR
445         /* Do the flow director magic */
446         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
447                 ++txr->atr_count;
448                 if (txr->atr_count >= atr_sample_rate) {
449                         ixgbe_atr(txr, m_head);
450                         txr->atr_count = 0;
451                 }
452         }
453 #endif
454
455         i = txr->next_avail_desc;
456         for (j = 0; j < nsegs; j++) {
457                 bus_size_t seglen;
458                 bus_addr_t segaddr;
459
460                 txbuf = &txr->tx_buffers[i];
461                 txd = &txr->tx_base[i];
462                 seglen = segs[j].ds_len;
463                 segaddr = htole64(segs[j].ds_addr);
464
465                 txd->read.buffer_addr = segaddr;
466                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
467                     cmd_type_len |seglen);
468                 txd->read.olinfo_status = htole32(olinfo_status);
469
470                 if (++i == txr->num_desc)
471                         i = 0;
472         }
473
474         txd->read.cmd_type_len |=
475             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
476         txr->tx_avail -= nsegs;
477         txr->next_avail_desc = i;
478
479         txbuf->m_head = m_head;
480         /*
481          * Here we swap the map so the last descriptor,
482          * which gets the completion interrupt has the
483          * real map, and the first descriptor gets the
484          * unused map from this descriptor.
485          */
486         txr->tx_buffers[first].map = txbuf->map;
487         txbuf->map = map;
488         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
489
490         /* Set the EOP descriptor that will be marked done */
491         txbuf = &txr->tx_buffers[first];
492         txbuf->eop = txd;
493
494         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
495             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
496         /*
497          * Advance the Transmit Descriptor Tail (Tdt), this tells the
498          * hardware that this frame is available to transmit.
499          */
500         ++txr->total_packets;
501         IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
502
503         /* Mark queue as having work */
504         if (txr->busy == 0)
505                 txr->busy = 1;
506
507         return (0);
508 }
509
510
511 /*********************************************************************
512  *
513  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
514  *  the information needed to transmit a packet on the wire. This is
515  *  called only once at attach, setup is done every reset.
516  *
517  **********************************************************************/
518 int
519 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
520 {
521         struct adapter *adapter = txr->adapter;
522         device_t dev = adapter->dev;
523         struct ixgbe_tx_buf *txbuf;
524         int error, i;
525
526         /*
527          * Setup DMA descriptor areas.
528          */
529         if ((error = bus_dma_tag_create(
530                                bus_get_dma_tag(adapter->dev),   /* parent */
531                                1, 0,            /* alignment, bounds */
532                                BUS_SPACE_MAXADDR,       /* lowaddr */
533                                BUS_SPACE_MAXADDR,       /* highaddr */
534                                NULL, NULL,              /* filter, filterarg */
535                                IXGBE_TSO_SIZE,          /* maxsize */
536                                adapter->num_segs,       /* nsegments */
537                                PAGE_SIZE,               /* maxsegsize */
538                                0,                       /* flags */
539                                NULL,                    /* lockfunc */
540                                NULL,                    /* lockfuncarg */
541                                &txr->txtag))) {
542                 device_printf(dev,"Unable to allocate TX DMA tag\n");
543                 goto fail;
544         }
545
546         if (!(txr->tx_buffers =
547             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
548             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
549                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
550                 error = ENOMEM;
551                 goto fail;
552         }
553
554         /* Create the descriptor buffer dma maps */
555         txbuf = txr->tx_buffers;
556         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
557                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
558                 if (error != 0) {
559                         device_printf(dev, "Unable to create TX DMA map\n");
560                         goto fail;
561                 }
562         }
563
564         return 0;
565 fail:
566         /* We free all, it handles case where we are in the middle */
567         ixgbe_free_transmit_structures(adapter);
568         return (error);
569 }
570
571 /*********************************************************************
572  *
573  *  Initialize a transmit ring.
574  *
575  **********************************************************************/
576 static void
577 ixgbe_setup_transmit_ring(struct tx_ring *txr)
578 {
579         struct adapter *adapter = txr->adapter;
580         struct ixgbe_tx_buf *txbuf;
581 #ifdef DEV_NETMAP
582         struct netmap_adapter *na = NA(adapter->ifp);
583         struct netmap_slot *slot;
584 #endif /* DEV_NETMAP */
585
586         /* Clear the old ring contents */
587         IXGBE_TX_LOCK(txr);
588 #ifdef DEV_NETMAP
589         /*
590          * (under lock): if in netmap mode, do some consistency
591          * checks and set slot to entry 0 of the netmap ring.
592          */
593         slot = netmap_reset(na, NR_TX, txr->me, 0);
594 #endif /* DEV_NETMAP */
595         bzero((void *)txr->tx_base,
596               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
597         /* Reset indices */
598         txr->next_avail_desc = 0;
599         txr->next_to_clean = 0;
600
601         /* Free any existing tx buffers. */
602         txbuf = txr->tx_buffers;
603         for (int i = 0; i < txr->num_desc; i++, txbuf++) {
604                 if (txbuf->m_head != NULL) {
605                         bus_dmamap_sync(txr->txtag, txbuf->map,
606                             BUS_DMASYNC_POSTWRITE);
607                         bus_dmamap_unload(txr->txtag, txbuf->map);
608                         m_freem(txbuf->m_head);
609                         txbuf->m_head = NULL;
610                 }
611 #ifdef DEV_NETMAP
612                 /*
613                  * In netmap mode, set the map for the packet buffer.
614                  * NOTE: Some drivers (not this one) also need to set
615                  * the physical buffer address in the NIC ring.
616                  * Slots in the netmap ring (indexed by "si") are
617                  * kring->nkr_hwofs positions "ahead" wrt the
618                  * corresponding slot in the NIC ring. In some drivers
619                  * (not here) nkr_hwofs can be negative. Function
620                  * netmap_idx_n2k() handles wraparounds properly.
621                  */
622                 if (slot) {
623                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
624                         netmap_load_map(na, txr->txtag,
625                             txbuf->map, NMB(na, slot + si));
626                 }
627 #endif /* DEV_NETMAP */
628                 /* Clear the EOP descriptor pointer */
629                 txbuf->eop = NULL;
630         }
631
632 #ifdef IXGBE_FDIR
633         /* Set the rate at which we sample packets */
634         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
635                 txr->atr_sample = atr_sample_rate;
636 #endif
637
638         /* Set number of descriptors available */
639         txr->tx_avail = adapter->num_tx_desc;
640
641         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
642             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
643         IXGBE_TX_UNLOCK(txr);
644 }
645
646 /*********************************************************************
647  *
648  *  Initialize all transmit rings.
649  *
650  **********************************************************************/
651 int
652 ixgbe_setup_transmit_structures(struct adapter *adapter)
653 {
654         struct tx_ring *txr = adapter->tx_rings;
655
656         for (int i = 0; i < adapter->num_queues; i++, txr++)
657                 ixgbe_setup_transmit_ring(txr);
658
659         return (0);
660 }
661
662 /*********************************************************************
663  *
664  *  Free all transmit rings.
665  *
666  **********************************************************************/
667 void
668 ixgbe_free_transmit_structures(struct adapter *adapter)
669 {
670         struct tx_ring *txr = adapter->tx_rings;
671
672         for (int i = 0; i < adapter->num_queues; i++, txr++) {
673                 IXGBE_TX_LOCK(txr);
674                 ixgbe_free_transmit_buffers(txr);
675                 ixgbe_dma_free(adapter, &txr->txdma);
676                 IXGBE_TX_UNLOCK(txr);
677                 IXGBE_TX_LOCK_DESTROY(txr);
678         }
679         free(adapter->tx_rings, M_DEVBUF);
680 }
681
682 /*********************************************************************
683  *
684  *  Free transmit ring related data structures.
685  *
686  **********************************************************************/
687 static void
688 ixgbe_free_transmit_buffers(struct tx_ring *txr)
689 {
690         struct adapter *adapter = txr->adapter;
691         struct ixgbe_tx_buf *tx_buffer;
692         int             i;
693
694         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
695
696         if (txr->tx_buffers == NULL)
697                 return;
698
699         tx_buffer = txr->tx_buffers;
700         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
701                 if (tx_buffer->m_head != NULL) {
702                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
703                             BUS_DMASYNC_POSTWRITE);
704                         bus_dmamap_unload(txr->txtag,
705                             tx_buffer->map);
706                         m_freem(tx_buffer->m_head);
707                         tx_buffer->m_head = NULL;
708                         if (tx_buffer->map != NULL) {
709                                 bus_dmamap_destroy(txr->txtag,
710                                     tx_buffer->map);
711                                 tx_buffer->map = NULL;
712                         }
713                 } else if (tx_buffer->map != NULL) {
714                         bus_dmamap_unload(txr->txtag,
715                             tx_buffer->map);
716                         bus_dmamap_destroy(txr->txtag,
717                             tx_buffer->map);
718                         tx_buffer->map = NULL;
719                 }
720         }
721 #ifdef IXGBE_LEGACY_TX
722         if (txr->br != NULL)
723                 buf_ring_free(txr->br, M_DEVBUF);
724 #endif
725         if (txr->tx_buffers != NULL) {
726                 free(txr->tx_buffers, M_DEVBUF);
727                 txr->tx_buffers = NULL;
728         }
729         if (txr->txtag != NULL) {
730                 bus_dma_tag_destroy(txr->txtag);
731                 txr->txtag = NULL;
732         }
733         return;
734 }
735
736 /*********************************************************************
737  *
738  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
739  *
740  **********************************************************************/
741
742 static int
743 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
744     u32 *cmd_type_len, u32 *olinfo_status)
745 {
746         struct adapter *adapter = txr->adapter;
747         struct ixgbe_adv_tx_context_desc *TXD;
748         struct ether_vlan_header *eh;
749         struct ip *ip;
750         struct ip6_hdr *ip6;
751         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
752         int     ehdrlen, ip_hlen = 0;
753         u16     etype;
754         u8      ipproto = 0;
755         int     offload = TRUE;
756         int     ctxd = txr->next_avail_desc;
757         u16     vtag = 0;
758
759         /* First check if TSO is to be used */
760         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
761                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
762
763         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
764                 offload = FALSE;
765
766         /* Indicate the whole packet as payload when not doing TSO */
767         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
768
769         /* Now ready a context descriptor */
770         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
771
772         /*
773         ** In advanced descriptors the vlan tag must 
774         ** be placed into the context descriptor. Hence
775         ** we need to make one even if not doing offloads.
776         */
777         if (mp->m_flags & M_VLANTAG) {
778                 vtag = htole16(mp->m_pkthdr.ether_vtag);
779                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
780         } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
781                 return (0);
782
783         /*
784          * Determine where frame payload starts.
785          * Jump over vlan headers if already present,
786          * helpful for QinQ too.
787          */
788         eh = mtod(mp, struct ether_vlan_header *);
789         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
790                 etype = ntohs(eh->evl_proto);
791                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
792         } else {
793                 etype = ntohs(eh->evl_encap_proto);
794                 ehdrlen = ETHER_HDR_LEN;
795         }
796
797         /* Set the ether header length */
798         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
799
800         if (offload == FALSE)
801                 goto no_offloads;
802
803         switch (etype) {
804                 case ETHERTYPE_IP:
805                         ip = (struct ip *)(mp->m_data + ehdrlen);
806                         ip_hlen = ip->ip_hl << 2;
807                         ipproto = ip->ip_p;
808                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
809                         break;
810                 case ETHERTYPE_IPV6:
811                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
812                         ip_hlen = sizeof(struct ip6_hdr);
813                         /* XXX-BZ this will go badly in case of ext hdrs. */
814                         ipproto = ip6->ip6_nxt;
815                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
816                         break;
817                 default:
818                         offload = FALSE;
819                         break;
820         }
821
822         vlan_macip_lens |= ip_hlen;
823
824         switch (ipproto) {
825                 case IPPROTO_TCP:
826                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
827                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
828                         break;
829
830                 case IPPROTO_UDP:
831                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
832                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
833                         break;
834
835 #if __FreeBSD_version >= 800000
836                 case IPPROTO_SCTP:
837                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
838                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
839                         break;
840 #endif
841                 default:
842                         offload = FALSE;
843                         break;
844         }
845
846         if (offload) /* For the TX descriptor setup */
847                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
848
849 no_offloads:
850         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
851
852         /* Now copy bits into descriptor */
853         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
854         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
855         TXD->seqnum_seed = htole32(0);
856         TXD->mss_l4len_idx = htole32(0);
857
858         /* We've consumed the first desc, adjust counters */
859         if (++ctxd == txr->num_desc)
860                 ctxd = 0;
861         txr->next_avail_desc = ctxd;
862         --txr->tx_avail;
863
864         return (0);
865 }
866
867 /**********************************************************************
868  *
869  *  Setup work for hardware segmentation offload (TSO) on
870  *  adapters using advanced tx descriptors
871  *
872  **********************************************************************/
873 static int
874 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
875     u32 *cmd_type_len, u32 *olinfo_status)
876 {
877         struct ixgbe_adv_tx_context_desc *TXD;
878         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
879         u32 mss_l4len_idx = 0, paylen;
880         u16 vtag = 0, eh_type;
881         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
882         struct ether_vlan_header *eh;
883 #ifdef INET6
884         struct ip6_hdr *ip6;
885 #endif
886 #ifdef INET
887         struct ip *ip;
888 #endif
889         struct tcphdr *th;
890
891
892         /*
893          * Determine where frame payload starts.
894          * Jump over vlan headers if already present
895          */
896         eh = mtod(mp, struct ether_vlan_header *);
897         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
898                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
899                 eh_type = eh->evl_proto;
900         } else {
901                 ehdrlen = ETHER_HDR_LEN;
902                 eh_type = eh->evl_encap_proto;
903         }
904
905         switch (ntohs(eh_type)) {
906 #ifdef INET6
907         case ETHERTYPE_IPV6:
908                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
909                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
910                 if (ip6->ip6_nxt != IPPROTO_TCP)
911                         return (ENXIO);
912                 ip_hlen = sizeof(struct ip6_hdr);
913                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
914                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
915                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
916                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
917                 break;
918 #endif
919 #ifdef INET
920         case ETHERTYPE_IP:
921                 ip = (struct ip *)(mp->m_data + ehdrlen);
922                 if (ip->ip_p != IPPROTO_TCP)
923                         return (ENXIO);
924                 ip->ip_sum = 0;
925                 ip_hlen = ip->ip_hl << 2;
926                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
927                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
928                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
929                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
930                 /* Tell transmit desc to also do IPv4 checksum. */
931                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
932                 break;
933 #endif
934         default:
935                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
936                     __func__, ntohs(eh_type));
937                 break;
938         }
939
940         ctxd = txr->next_avail_desc;
941         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
942
943         tcp_hlen = th->th_off << 2;
944
945         /* This is used in the transmit desc in encap */
946         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
947
948         /* VLAN MACLEN IPLEN */
949         if (mp->m_flags & M_VLANTAG) {
950                 vtag = htole16(mp->m_pkthdr.ether_vtag);
951                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
952         }
953
954         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
955         vlan_macip_lens |= ip_hlen;
956         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
957
958         /* ADV DTYPE TUCMD */
959         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
960         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
961         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
962
963         /* MSS L4LEN IDX */
964         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
965         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
966         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
967
968         TXD->seqnum_seed = htole32(0);
969
970         if (++ctxd == txr->num_desc)
971                 ctxd = 0;
972
973         txr->tx_avail--;
974         txr->next_avail_desc = ctxd;
975         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
976         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
977         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
978         ++txr->tso_tx;
979         return (0);
980 }
981
982
983 /**********************************************************************
984  *
985  *  Examine each tx_buffer in the used queue. If the hardware is done
986  *  processing the packet then free associated resources. The
987  *  tx_buffer is put back on the free queue.
988  *
989  **********************************************************************/
990 void
991 ixgbe_txeof(struct tx_ring *txr)
992 {
993 #ifdef DEV_NETMAP
994         struct adapter          *adapter = txr->adapter;
995         struct ifnet            *ifp = adapter->ifp;
996 #endif
997         u32                     work, processed = 0;
998         u16                     limit = txr->process_limit;
999         struct ixgbe_tx_buf     *buf;
1000         union ixgbe_adv_tx_desc *txd;
1001
1002         mtx_assert(&txr->tx_mtx, MA_OWNED);
1003
1004 #ifdef DEV_NETMAP
1005         if (ifp->if_capenable & IFCAP_NETMAP) {
1006                 struct netmap_adapter *na = NA(ifp);
1007                 struct netmap_kring *kring = &na->tx_rings[txr->me];
1008                 txd = txr->tx_base;
1009                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1010                     BUS_DMASYNC_POSTREAD);
1011                 /*
1012                  * In netmap mode, all the work is done in the context
1013                  * of the client thread. Interrupt handlers only wake up
1014                  * clients, which may be sleeping on individual rings
1015                  * or on a global resource for all rings.
1016                  * To implement tx interrupt mitigation, we wake up the client
1017                  * thread roughly every half ring, even if the NIC interrupts
1018                  * more frequently. This is implemented as follows:
1019                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
1020                  *   the slot that should wake up the thread (nkr_num_slots
1021                  *   means the user thread should not be woken up);
1022                  * - the driver ignores tx interrupts unless netmap_mitigate=0
1023                  *   or the slot has the DD bit set.
1024                  */
1025                 if (!netmap_mitigate ||
1026                     (kring->nr_kflags < kring->nkr_num_slots &&
1027                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1028                         netmap_tx_irq(ifp, txr->me);
1029                 }
1030                 return;
1031         }
1032 #endif /* DEV_NETMAP */
1033
1034         if (txr->tx_avail == txr->num_desc) {
1035                 txr->busy = 0;
1036                 return;
1037         }
1038
1039         /* Get work starting point */
1040         work = txr->next_to_clean;
1041         buf = &txr->tx_buffers[work];
1042         txd = &txr->tx_base[work];
1043         work -= txr->num_desc; /* The distance to ring end */
1044         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1045             BUS_DMASYNC_POSTREAD);
1046
1047         do {
1048                 union ixgbe_adv_tx_desc *eop= buf->eop;
1049                 if (eop == NULL) /* No work */
1050                         break;
1051
1052                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1053                         break;  /* I/O not complete */
1054
1055                 if (buf->m_head) {
1056                         txr->bytes +=
1057                             buf->m_head->m_pkthdr.len;
1058                         bus_dmamap_sync(txr->txtag,
1059                             buf->map,
1060                             BUS_DMASYNC_POSTWRITE);
1061                         bus_dmamap_unload(txr->txtag,
1062                             buf->map);
1063                         m_freem(buf->m_head);
1064                         buf->m_head = NULL;
1065                 }
1066                 buf->eop = NULL;
1067                 ++txr->tx_avail;
1068
1069                 /* We clean the range if multi segment */
1070                 while (txd != eop) {
1071                         ++txd;
1072                         ++buf;
1073                         ++work;
1074                         /* wrap the ring? */
1075                         if (__predict_false(!work)) {
1076                                 work -= txr->num_desc;
1077                                 buf = txr->tx_buffers;
1078                                 txd = txr->tx_base;
1079                         }
1080                         if (buf->m_head) {
1081                                 txr->bytes +=
1082                                     buf->m_head->m_pkthdr.len;
1083                                 bus_dmamap_sync(txr->txtag,
1084                                     buf->map,
1085                                     BUS_DMASYNC_POSTWRITE);
1086                                 bus_dmamap_unload(txr->txtag,
1087                                     buf->map);
1088                                 m_freem(buf->m_head);
1089                                 buf->m_head = NULL;
1090                         }
1091                         ++txr->tx_avail;
1092                         buf->eop = NULL;
1093
1094                 }
1095                 ++txr->packets;
1096                 ++processed;
1097
1098                 /* Try the next packet */
1099                 ++txd;
1100                 ++buf;
1101                 ++work;
1102                 /* reset with a wrap */
1103                 if (__predict_false(!work)) {
1104                         work -= txr->num_desc;
1105                         buf = txr->tx_buffers;
1106                         txd = txr->tx_base;
1107                 }
1108                 prefetch(txd);
1109         } while (__predict_true(--limit));
1110
1111         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1112             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1113
1114         work += txr->num_desc;
1115         txr->next_to_clean = work;
1116
1117         /*
1118         ** Queue Hang detection, we know there's
1119         ** work outstanding or the first return
1120         ** would have been taken, so increment busy
1121         ** if nothing managed to get cleaned, then
1122         ** in local_timer it will be checked and 
1123         ** marked as HUNG if it exceeds a MAX attempt.
1124         */
1125         if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1126                 ++txr->busy;
1127         /*
1128         ** If anything gets cleaned we reset state to 1,
1129         ** note this will turn off HUNG if its set.
1130         */
1131         if (processed)
1132                 txr->busy = 1;
1133
1134         if (txr->tx_avail == txr->num_desc)
1135                 txr->busy = 0;
1136
1137         return;
1138 }
1139
1140
1141 #ifdef IXGBE_FDIR
1142 /*
1143 ** This routine parses packet headers so that Flow
1144 ** Director can make a hashed filter table entry 
1145 ** allowing traffic flows to be identified and kept
1146 ** on the same cpu.  This would be a performance
1147 ** hit, but we only do it at IXGBE_FDIR_RATE of
1148 ** packets.
1149 */
1150 static void
1151 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1152 {
1153         struct adapter                  *adapter = txr->adapter;
1154         struct ix_queue                 *que;
1155         struct ip                       *ip;
1156         struct tcphdr                   *th;
1157         struct udphdr                   *uh;
1158         struct ether_vlan_header        *eh;
1159         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
1160         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
1161         int                             ehdrlen, ip_hlen;
1162         u16                             etype;
1163
1164         eh = mtod(mp, struct ether_vlan_header *);
1165         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1166                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1167                 etype = eh->evl_proto;
1168         } else {
1169                 ehdrlen = ETHER_HDR_LEN;
1170                 etype = eh->evl_encap_proto;
1171         }
1172
1173         /* Only handling IPv4 */
1174         if (etype != htons(ETHERTYPE_IP))
1175                 return;
1176
1177         ip = (struct ip *)(mp->m_data + ehdrlen);
1178         ip_hlen = ip->ip_hl << 2;
1179
1180         /* check if we're UDP or TCP */
1181         switch (ip->ip_p) {
1182         case IPPROTO_TCP:
1183                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1184                 /* src and dst are inverted */
1185                 common.port.dst ^= th->th_sport;
1186                 common.port.src ^= th->th_dport;
1187                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1188                 break;
1189         case IPPROTO_UDP:
1190                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1191                 /* src and dst are inverted */
1192                 common.port.dst ^= uh->uh_sport;
1193                 common.port.src ^= uh->uh_dport;
1194                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1195                 break;
1196         default:
1197                 return;
1198         }
1199
1200         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1201         if (mp->m_pkthdr.ether_vtag)
1202                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1203         else
1204                 common.flex_bytes ^= etype;
1205         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1206
1207         que = &adapter->queues[txr->me];
1208         /*
1209         ** This assumes the Rx queue and Tx
1210         ** queue are bound to the same CPU
1211         */
1212         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1213             input, common, que->msix);
1214 }
1215 #endif /* IXGBE_FDIR */
1216
1217 /*
1218 ** Used to detect a descriptor that has
1219 ** been merged by Hardware RSC.
1220 */
1221 static inline u32
1222 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1223 {
1224         return (le32toh(rx->wb.lower.lo_dword.data) &
1225             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1226 }
1227
1228 /*********************************************************************
1229  *
1230  *  Initialize Hardware RSC (LRO) feature on 82599
1231  *  for an RX ring, this is toggled by the LRO capability
1232  *  even though it is transparent to the stack.
1233  *
1234  *  NOTE: since this HW feature only works with IPV4 and 
1235  *        our testing has shown soft LRO to be as effective
1236  *        I have decided to disable this by default.
1237  *
1238  **********************************************************************/
1239 static void
1240 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1241 {
1242         struct  adapter         *adapter = rxr->adapter;
1243         struct  ixgbe_hw        *hw = &adapter->hw;
1244         u32                     rscctrl, rdrxctl;
1245
1246         /* If turning LRO/RSC off we need to disable it */
1247         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1248                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1249                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1250                 return;
1251         }
1252
1253         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1254         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1255 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1256         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1257 #endif /* DEV_NETMAP */
1258         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1259         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1260         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1261
1262         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1263         rscctrl |= IXGBE_RSCCTL_RSCEN;
1264         /*
1265         ** Limit the total number of descriptors that
1266         ** can be combined, so it does not exceed 64K
1267         */
1268         if (rxr->mbuf_sz == MCLBYTES)
1269                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1270         else if (rxr->mbuf_sz == MJUMPAGESIZE)
1271                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1272         else if (rxr->mbuf_sz == MJUM9BYTES)
1273                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1274         else  /* Using 16K cluster */
1275                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1276
1277         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1278
1279         /* Enable TCP header recognition */
1280         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1281             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1282             IXGBE_PSRTYPE_TCPHDR));
1283
1284         /* Disable RSC for ACK packets */
1285         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1286             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1287
1288         rxr->hw_rsc = TRUE;
1289 }
1290 /*********************************************************************
1291  *
1292  *  Refresh mbuf buffers for RX descriptor rings
1293  *   - now keeps its own state so discards due to resource
1294  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1295  *     it just returns, keeping its placeholder, thus it can simply
1296  *     be recalled to try again.
1297  *
1298  **********************************************************************/
1299 static void
1300 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1301 {
1302         struct adapter          *adapter = rxr->adapter;
1303         bus_dma_segment_t       seg[1];
1304         struct ixgbe_rx_buf     *rxbuf;
1305         struct mbuf             *mp;
1306         int                     i, j, nsegs, error;
1307         bool                    refreshed = FALSE;
1308
1309         i = j = rxr->next_to_refresh;
1310         /* Control the loop with one beyond */
1311         if (++j == rxr->num_desc)
1312                 j = 0;
1313
1314         while (j != limit) {
1315                 rxbuf = &rxr->rx_buffers[i];
1316                 if (rxbuf->buf == NULL) {
1317                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1318                             M_PKTHDR, rxr->mbuf_sz);
1319                         if (mp == NULL)
1320                                 goto update;
1321                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1322                                 m_adj(mp, ETHER_ALIGN);
1323                 } else
1324                         mp = rxbuf->buf;
1325
1326                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1327
1328                 /* If we're dealing with an mbuf that was copied rather
1329                  * than replaced, there's no need to go through busdma.
1330                  */
1331                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1332                         /* Get the memory mapping */
1333                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1334                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1335                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1336                         if (error != 0) {
1337                                 printf("Refresh mbufs: payload dmamap load"
1338                                     " failure - %d\n", error);
1339                                 m_free(mp);
1340                                 rxbuf->buf = NULL;
1341                                 goto update;
1342                         }
1343                         rxbuf->buf = mp;
1344                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1345                             BUS_DMASYNC_PREREAD);
1346                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1347                             htole64(seg[0].ds_addr);
1348                 } else {
1349                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1350                         rxbuf->flags &= ~IXGBE_RX_COPY;
1351                 }
1352
1353                 refreshed = TRUE;
1354                 /* Next is precalculated */
1355                 i = j;
1356                 rxr->next_to_refresh = i;
1357                 if (++j == rxr->num_desc)
1358                         j = 0;
1359         }
1360 update:
1361         if (refreshed) /* Update hardware tail index */
1362                 IXGBE_WRITE_REG(&adapter->hw,
1363                     rxr->tail, rxr->next_to_refresh);
1364         return;
1365 }
1366
1367 /*********************************************************************
1368  *
1369  *  Allocate memory for rx_buffer structures. Since we use one
1370  *  rx_buffer per received packet, the maximum number of rx_buffer's
1371  *  that we'll need is equal to the number of receive descriptors
1372  *  that we've allocated.
1373  *
1374  **********************************************************************/
1375 int
1376 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1377 {
1378         struct  adapter         *adapter = rxr->adapter;
1379         device_t                dev = adapter->dev;
1380         struct ixgbe_rx_buf     *rxbuf;
1381         int                     bsize, error;
1382
1383         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1384         if (!(rxr->rx_buffers =
1385             (struct ixgbe_rx_buf *) malloc(bsize,
1386             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1387                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1388                 error = ENOMEM;
1389                 goto fail;
1390         }
1391
1392         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1393                                    1, 0,        /* alignment, bounds */
1394                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1395                                    BUS_SPACE_MAXADDR,   /* highaddr */
1396                                    NULL, NULL,          /* filter, filterarg */
1397                                    MJUM16BYTES,         /* maxsize */
1398                                    1,                   /* nsegments */
1399                                    MJUM16BYTES,         /* maxsegsize */
1400                                    0,                   /* flags */
1401                                    NULL,                /* lockfunc */
1402                                    NULL,                /* lockfuncarg */
1403                                    &rxr->ptag))) {
1404                 device_printf(dev, "Unable to create RX DMA tag\n");
1405                 goto fail;
1406         }
1407
1408         for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1409                 rxbuf = &rxr->rx_buffers[i];
1410                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1411                 if (error) {
1412                         device_printf(dev, "Unable to create RX dma map\n");
1413                         goto fail;
1414                 }
1415         }
1416
1417         return (0);
1418
1419 fail:
1420         /* Frees all, but can handle partial completion */
1421         ixgbe_free_receive_structures(adapter);
1422         return (error);
1423 }
1424
1425
1426 static void     
1427 ixgbe_free_receive_ring(struct rx_ring *rxr)
1428
1429         struct ixgbe_rx_buf       *rxbuf;
1430
1431         for (int i = 0; i < rxr->num_desc; i++) {
1432                 rxbuf = &rxr->rx_buffers[i];
1433                 if (rxbuf->buf != NULL) {
1434                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1435                             BUS_DMASYNC_POSTREAD);
1436                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1437                         rxbuf->buf->m_flags |= M_PKTHDR;
1438                         m_freem(rxbuf->buf);
1439                         rxbuf->buf = NULL;
1440                         rxbuf->flags = 0;
1441                 }
1442         }
1443 }
1444
1445
1446 /*********************************************************************
1447  *
1448  *  Initialize a receive ring and its buffers.
1449  *
1450  **********************************************************************/
1451 static int
1452 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1453 {
1454         struct  adapter         *adapter;
1455         struct ifnet            *ifp;
1456         device_t                dev;
1457         struct ixgbe_rx_buf     *rxbuf;
1458         bus_dma_segment_t       seg[1];
1459         struct lro_ctrl         *lro = &rxr->lro;
1460         int                     rsize, nsegs, error = 0;
1461 #ifdef DEV_NETMAP
1462         struct netmap_adapter *na = NA(rxr->adapter->ifp);
1463         struct netmap_slot *slot;
1464 #endif /* DEV_NETMAP */
1465
1466         adapter = rxr->adapter;
1467         ifp = adapter->ifp;
1468         dev = adapter->dev;
1469
1470         /* Clear the ring contents */
1471         IXGBE_RX_LOCK(rxr);
1472 #ifdef DEV_NETMAP
1473         /* same as in ixgbe_setup_transmit_ring() */
1474         slot = netmap_reset(na, NR_RX, rxr->me, 0);
1475 #endif /* DEV_NETMAP */
1476         rsize = roundup2(adapter->num_rx_desc *
1477             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1478         bzero((void *)rxr->rx_base, rsize);
1479         /* Cache the size */
1480         rxr->mbuf_sz = adapter->rx_mbuf_sz;
1481
1482         /* Free current RX buffer structs and their mbufs */
1483         ixgbe_free_receive_ring(rxr);
1484
1485         /* Now replenish the mbufs */
1486         for (int j = 0; j != rxr->num_desc; ++j) {
1487                 struct mbuf     *mp;
1488
1489                 rxbuf = &rxr->rx_buffers[j];
1490 #ifdef DEV_NETMAP
1491                 /*
1492                  * In netmap mode, fill the map and set the buffer
1493                  * address in the NIC ring, considering the offset
1494                  * between the netmap and NIC rings (see comment in
1495                  * ixgbe_setup_transmit_ring() ). No need to allocate
1496                  * an mbuf, so end the block with a continue;
1497                  */
1498                 if (slot) {
1499                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1500                         uint64_t paddr;
1501                         void *addr;
1502
1503                         addr = PNMB(na, slot + sj, &paddr);
1504                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1505                         /* Update descriptor and the cached value */
1506                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1507                         rxbuf->addr = htole64(paddr);
1508                         continue;
1509                 }
1510 #endif /* DEV_NETMAP */
1511                 rxbuf->flags = 0; 
1512                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1513                     M_PKTHDR, adapter->rx_mbuf_sz);
1514                 if (rxbuf->buf == NULL) {
1515                         error = ENOBUFS;
1516                         goto fail;
1517                 }
1518                 mp = rxbuf->buf;
1519                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1520                 /* Get the memory mapping */
1521                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1522                     rxbuf->pmap, mp, seg,
1523                     &nsegs, BUS_DMA_NOWAIT);
1524                 if (error != 0)
1525                         goto fail;
1526                 bus_dmamap_sync(rxr->ptag,
1527                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
1528                 /* Update the descriptor and the cached value */
1529                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1530                 rxbuf->addr = htole64(seg[0].ds_addr);
1531         }
1532
1533
1534         /* Setup our descriptor indices */
1535         rxr->next_to_check = 0;
1536         rxr->next_to_refresh = 0;
1537         rxr->lro_enabled = FALSE;
1538         rxr->rx_copies = 0;
1539         rxr->rx_bytes = 0;
1540         rxr->vtag_strip = FALSE;
1541
1542         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1543             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1544
1545         /*
1546         ** Now set up the LRO interface:
1547         */
1548         if (ixgbe_rsc_enable)
1549                 ixgbe_setup_hw_rsc(rxr);
1550         else if (ifp->if_capenable & IFCAP_LRO) {
1551                 int err = tcp_lro_init(lro);
1552                 if (err) {
1553                         device_printf(dev, "LRO Initialization failed!\n");
1554                         goto fail;
1555                 }
1556                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1557                 rxr->lro_enabled = TRUE;
1558                 lro->ifp = adapter->ifp;
1559         }
1560
1561         IXGBE_RX_UNLOCK(rxr);
1562         return (0);
1563
1564 fail:
1565         ixgbe_free_receive_ring(rxr);
1566         IXGBE_RX_UNLOCK(rxr);
1567         return (error);
1568 }
1569
1570 /*********************************************************************
1571  *
1572  *  Initialize all receive rings.
1573  *
1574  **********************************************************************/
1575 int
1576 ixgbe_setup_receive_structures(struct adapter *adapter)
1577 {
1578         struct rx_ring *rxr = adapter->rx_rings;
1579         int j;
1580
1581         for (j = 0; j < adapter->num_queues; j++, rxr++)
1582                 if (ixgbe_setup_receive_ring(rxr))
1583                         goto fail;
1584
1585         return (0);
1586 fail:
1587         /*
1588          * Free RX buffers allocated so far, we will only handle
1589          * the rings that completed, the failing case will have
1590          * cleaned up for itself. 'j' failed, so its the terminus.
1591          */
1592         for (int i = 0; i < j; ++i) {
1593                 rxr = &adapter->rx_rings[i];
1594                 ixgbe_free_receive_ring(rxr);
1595         }
1596
1597         return (ENOBUFS);
1598 }
1599
1600
1601 /*********************************************************************
1602  *
1603  *  Free all receive rings.
1604  *
1605  **********************************************************************/
1606 void
1607 ixgbe_free_receive_structures(struct adapter *adapter)
1608 {
1609         struct rx_ring *rxr = adapter->rx_rings;
1610
1611         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1612
1613         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1614                 struct lro_ctrl         *lro = &rxr->lro;
1615                 ixgbe_free_receive_buffers(rxr);
1616                 /* Free LRO memory */
1617                 tcp_lro_free(lro);
1618                 /* Free the ring memory as well */
1619                 ixgbe_dma_free(adapter, &rxr->rxdma);
1620         }
1621
1622         free(adapter->rx_rings, M_DEVBUF);
1623 }
1624
1625
1626 /*********************************************************************
1627  *
1628  *  Free receive ring data structures
1629  *
1630  **********************************************************************/
1631 void
1632 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1633 {
1634         struct adapter          *adapter = rxr->adapter;
1635         struct ixgbe_rx_buf     *rxbuf;
1636
1637         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1638
1639         /* Cleanup any existing buffers */
1640         if (rxr->rx_buffers != NULL) {
1641                 for (int i = 0; i < adapter->num_rx_desc; i++) {
1642                         rxbuf = &rxr->rx_buffers[i];
1643                         if (rxbuf->buf != NULL) {
1644                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1645                                     BUS_DMASYNC_POSTREAD);
1646                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1647                                 rxbuf->buf->m_flags |= M_PKTHDR;
1648                                 m_freem(rxbuf->buf);
1649                         }
1650                         rxbuf->buf = NULL;
1651                         if (rxbuf->pmap != NULL) {
1652                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1653                                 rxbuf->pmap = NULL;
1654                         }
1655                 }
1656                 if (rxr->rx_buffers != NULL) {
1657                         free(rxr->rx_buffers, M_DEVBUF);
1658                         rxr->rx_buffers = NULL;
1659                 }
1660         }
1661
1662         if (rxr->ptag != NULL) {
1663                 bus_dma_tag_destroy(rxr->ptag);
1664                 rxr->ptag = NULL;
1665         }
1666
1667         return;
1668 }
1669
1670 static __inline void
1671 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1672 {
1673                  
1674         /*
1675          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1676          * should be computed by hardware. Also it should not have VLAN tag in
1677          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1678          */
1679         if (rxr->lro_enabled &&
1680             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1681             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1682             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1683             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1684             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1685             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1686             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1687             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1688                 /*
1689                  * Send to the stack if:
1690                  **  - LRO not enabled, or
1691                  **  - no LRO resources, or
1692                  **  - lro enqueue fails
1693                  */
1694                 if (rxr->lro.lro_cnt != 0)
1695                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1696                                 return;
1697         }
1698         IXGBE_RX_UNLOCK(rxr);
1699         (*ifp->if_input)(ifp, m);
1700         IXGBE_RX_LOCK(rxr);
1701 }
1702
1703 static __inline void
1704 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1705 {
1706         struct ixgbe_rx_buf     *rbuf;
1707
1708         rbuf = &rxr->rx_buffers[i];
1709
1710
1711         /*
1712         ** With advanced descriptors the writeback
1713         ** clobbers the buffer addrs, so its easier
1714         ** to just free the existing mbufs and take
1715         ** the normal refresh path to get new buffers
1716         ** and mapping.
1717         */
1718
1719         if (rbuf->fmp != NULL) {/* Partial chain ? */
1720                 rbuf->fmp->m_flags |= M_PKTHDR;
1721                 m_freem(rbuf->fmp);
1722                 rbuf->fmp = NULL;
1723                 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1724         } else if (rbuf->buf) {
1725                 m_free(rbuf->buf);
1726                 rbuf->buf = NULL;
1727         }
1728         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1729
1730         rbuf->flags = 0;
1731  
1732         return;
1733 }
1734
1735
1736 /*********************************************************************
1737  *
1738  *  This routine executes in interrupt context. It replenishes
1739  *  the mbufs in the descriptor and sends data which has been
1740  *  dma'ed into host memory to upper layer.
1741  *
1742  *  Return TRUE for more work, FALSE for all clean.
1743  *********************************************************************/
1744 bool
1745 ixgbe_rxeof(struct ix_queue *que)
1746 {
1747         struct adapter          *adapter = que->adapter;
1748         struct rx_ring          *rxr = que->rxr;
1749         struct ifnet            *ifp = adapter->ifp;
1750         struct lro_ctrl         *lro = &rxr->lro;
1751         struct lro_entry        *queued;
1752         int                     i, nextp, processed = 0;
1753         u32                     staterr = 0;
1754         u16                     count = rxr->process_limit;
1755         union ixgbe_adv_rx_desc *cur;
1756         struct ixgbe_rx_buf     *rbuf, *nbuf;
1757         u16                     pkt_info;
1758
1759         IXGBE_RX_LOCK(rxr);
1760
1761 #ifdef DEV_NETMAP
1762         /* Same as the txeof routine: wakeup clients on intr. */
1763         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1764                 IXGBE_RX_UNLOCK(rxr);
1765                 return (FALSE);
1766         }
1767 #endif /* DEV_NETMAP */
1768
1769         for (i = rxr->next_to_check; count != 0;) {
1770                 struct mbuf     *sendmp, *mp;
1771                 u32             rsc, ptype;
1772                 u16             len;
1773                 u16             vtag = 0;
1774                 bool            eop;
1775  
1776                 /* Sync the ring. */
1777                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1778                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1779
1780                 cur = &rxr->rx_base[i];
1781                 staterr = le32toh(cur->wb.upper.status_error);
1782                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1783
1784                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1785                         break;
1786                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1787                         break;
1788
1789                 count--;
1790                 sendmp = NULL;
1791                 nbuf = NULL;
1792                 rsc = 0;
1793                 cur->wb.upper.status_error = 0;
1794                 rbuf = &rxr->rx_buffers[i];
1795                 mp = rbuf->buf;
1796
1797                 len = le16toh(cur->wb.upper.length);
1798                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1799                     IXGBE_RXDADV_PKTTYPE_MASK;
1800                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1801
1802                 /* Make sure bad packets are discarded */
1803                 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1804 #if __FreeBSD_version >= 1100036
1805                         if (IXGBE_IS_VF(adapter))
1806                                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1807 #endif
1808                         rxr->rx_discarded++;
1809                         ixgbe_rx_discard(rxr, i);
1810                         goto next_desc;
1811                 }
1812
1813                 /*
1814                 ** On 82599 which supports a hardware
1815                 ** LRO (called HW RSC), packets need
1816                 ** not be fragmented across sequential
1817                 ** descriptors, rather the next descriptor
1818                 ** is indicated in bits of the descriptor.
1819                 ** This also means that we might proceses
1820                 ** more than one packet at a time, something
1821                 ** that has never been true before, it
1822                 ** required eliminating global chain pointers
1823                 ** in favor of what we are doing here.  -jfv
1824                 */
1825                 if (!eop) {
1826                         /*
1827                         ** Figure out the next descriptor
1828                         ** of this frame.
1829                         */
1830                         if (rxr->hw_rsc == TRUE) {
1831                                 rsc = ixgbe_rsc_count(cur);
1832                                 rxr->rsc_num += (rsc - 1);
1833                         }
1834                         if (rsc) { /* Get hardware index */
1835                                 nextp = ((staterr &
1836                                     IXGBE_RXDADV_NEXTP_MASK) >>
1837                                     IXGBE_RXDADV_NEXTP_SHIFT);
1838                         } else { /* Just sequential */
1839                                 nextp = i + 1;
1840                                 if (nextp == adapter->num_rx_desc)
1841                                         nextp = 0;
1842                         }
1843                         nbuf = &rxr->rx_buffers[nextp];
1844                         prefetch(nbuf);
1845                 }
1846                 /*
1847                 ** Rather than using the fmp/lmp global pointers
1848                 ** we now keep the head of a packet chain in the
1849                 ** buffer struct and pass this along from one
1850                 ** descriptor to the next, until we get EOP.
1851                 */
1852                 mp->m_len = len;
1853                 /*
1854                 ** See if there is a stored head
1855                 ** that determines what we are
1856                 */
1857                 sendmp = rbuf->fmp;
1858                 if (sendmp != NULL) {  /* secondary frag */
1859                         rbuf->buf = rbuf->fmp = NULL;
1860                         mp->m_flags &= ~M_PKTHDR;
1861                         sendmp->m_pkthdr.len += mp->m_len;
1862                 } else {
1863                         /*
1864                          * Optimize.  This might be a small packet,
1865                          * maybe just a TCP ACK.  Do a fast copy that
1866                          * is cache aligned into a new mbuf, and
1867                          * leave the old mbuf+cluster for re-use.
1868                          */
1869                         if (eop && len <= IXGBE_RX_COPY_LEN) {
1870                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1871                                 if (sendmp != NULL) {
1872                                         sendmp->m_data +=
1873                                             IXGBE_RX_COPY_ALIGN;
1874                                         ixgbe_bcopy(mp->m_data,
1875                                             sendmp->m_data, len);
1876                                         sendmp->m_len = len;
1877                                         rxr->rx_copies++;
1878                                         rbuf->flags |= IXGBE_RX_COPY;
1879                                 }
1880                         }
1881                         if (sendmp == NULL) {
1882                                 rbuf->buf = rbuf->fmp = NULL;
1883                                 sendmp = mp;
1884                         }
1885
1886                         /* first desc of a non-ps chain */
1887                         sendmp->m_flags |= M_PKTHDR;
1888                         sendmp->m_pkthdr.len = mp->m_len;
1889                 }
1890                 ++processed;
1891
1892                 /* Pass the head pointer on */
1893                 if (eop == 0) {
1894                         nbuf->fmp = sendmp;
1895                         sendmp = NULL;
1896                         mp->m_next = nbuf->buf;
1897                 } else { /* Sending this frame */
1898                         sendmp->m_pkthdr.rcvif = ifp;
1899                         rxr->rx_packets++;
1900                         /* capture data for AIM */
1901                         rxr->bytes += sendmp->m_pkthdr.len;
1902                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1903                         /* Process vlan info */
1904                         if ((rxr->vtag_strip) &&
1905                             (staterr & IXGBE_RXD_STAT_VP))
1906                                 vtag = le16toh(cur->wb.upper.vlan);
1907                         if (vtag) {
1908                                 sendmp->m_pkthdr.ether_vtag = vtag;
1909                                 sendmp->m_flags |= M_VLANTAG;
1910                         }
1911                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1912                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
1913 #if __FreeBSD_version >= 800000
1914 #ifdef RSS
1915                         sendmp->m_pkthdr.flowid =
1916                             le32toh(cur->wb.lower.hi_dword.rss);
1917 #if __FreeBSD_version < 1100054
1918                         sendmp->m_flags |= M_FLOWID;
1919 #endif
1920                         switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1921                         case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1922                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
1923                                 break;
1924                         case IXGBE_RXDADV_RSSTYPE_IPV4:
1925                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
1926                                 break;
1927                         case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1928                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
1929                                 break;
1930                         case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1931                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
1932                                 break;
1933                         case IXGBE_RXDADV_RSSTYPE_IPV6:
1934                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
1935                                 break;
1936                         case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1937                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
1938                                 break;
1939                         case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1940                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
1941                                 break;
1942                         case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1943                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
1944                                 break;
1945                         case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1946                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
1947                                 break;
1948                         default:
1949                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1950                         }
1951 #else /* RSS */
1952                         sendmp->m_pkthdr.flowid = que->msix;
1953 #if __FreeBSD_version >= 1100054
1954                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1955 #else
1956                         sendmp->m_flags |= M_FLOWID;
1957 #endif
1958 #endif /* RSS */
1959 #endif /* FreeBSD_version */
1960                 }
1961 next_desc:
1962                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1963                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1964
1965                 /* Advance our pointers to the next descriptor. */
1966                 if (++i == rxr->num_desc)
1967                         i = 0;
1968
1969                 /* Now send to the stack or do LRO */
1970                 if (sendmp != NULL) {
1971                         rxr->next_to_check = i;
1972                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1973                         i = rxr->next_to_check;
1974                 }
1975
1976                /* Every 8 descriptors we go to refresh mbufs */
1977                 if (processed == 8) {
1978                         ixgbe_refresh_mbufs(rxr, i);
1979                         processed = 0;
1980                 }
1981         }
1982
1983         /* Refresh any remaining buf structs */
1984         if (ixgbe_rx_unrefreshed(rxr))
1985                 ixgbe_refresh_mbufs(rxr, i);
1986
1987         rxr->next_to_check = i;
1988
1989         /*
1990          * Flush any outstanding LRO work
1991          */
1992         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1993                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1994                 tcp_lro_flush(lro, queued);
1995         }
1996
1997         IXGBE_RX_UNLOCK(rxr);
1998
1999         /*
2000         ** Still have cleaning to do?
2001         */
2002         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2003                 return (TRUE);
2004         else
2005                 return (FALSE);
2006 }
2007
2008
2009 /*********************************************************************
2010  *
2011  *  Verify that the hardware indicated that the checksum is valid.
2012  *  Inform the stack about the status of checksum so that stack
2013  *  doesn't spend time verifying the checksum.
2014  *
2015  *********************************************************************/
2016 static void
2017 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2018 {
2019         u16     status = (u16) staterr;
2020         u8      errors = (u8) (staterr >> 24);
2021         bool    sctp = FALSE;
2022
2023         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2024             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2025                 sctp = TRUE;
2026
2027         if (status & IXGBE_RXD_STAT_IPCS) {
2028                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2029                         /* IP Checksum Good */
2030                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
2031                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2032
2033                 } else
2034                         mp->m_pkthdr.csum_flags = 0;
2035         }
2036         if (status & IXGBE_RXD_STAT_L4CS) {
2037                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2038 #if __FreeBSD_version >= 800000
2039                 if (sctp)
2040                         type = CSUM_SCTP_VALID;
2041 #endif
2042                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2043                         mp->m_pkthdr.csum_flags |= type;
2044                         if (!sctp)
2045                                 mp->m_pkthdr.csum_data = htons(0xffff);
2046                 } 
2047         }
2048         return;
2049 }
2050
2051 /********************************************************************
2052  * Manage DMA'able memory.
2053  *******************************************************************/
2054 static void
2055 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2056 {
2057         if (error)
2058                 return;
2059         *(bus_addr_t *) arg = segs->ds_addr;
2060         return;
2061 }
2062
2063 int
2064 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2065                 struct ixgbe_dma_alloc *dma, int mapflags)
2066 {
2067         device_t dev = adapter->dev;
2068         int             r;
2069
2070         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2071                                DBA_ALIGN, 0,    /* alignment, bounds */
2072                                BUS_SPACE_MAXADDR,       /* lowaddr */
2073                                BUS_SPACE_MAXADDR,       /* highaddr */
2074                                NULL, NULL,      /* filter, filterarg */
2075                                size,    /* maxsize */
2076                                1,       /* nsegments */
2077                                size,    /* maxsegsize */
2078                                BUS_DMA_ALLOCNOW,        /* flags */
2079                                NULL,    /* lockfunc */
2080                                NULL,    /* lockfuncarg */
2081                                &dma->dma_tag);
2082         if (r != 0) {
2083                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2084                        "error %u\n", r);
2085                 goto fail_0;
2086         }
2087         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2088                              BUS_DMA_NOWAIT, &dma->dma_map);
2089         if (r != 0) {
2090                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2091                        "error %u\n", r);
2092                 goto fail_1;
2093         }
2094         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2095                             size,
2096                             ixgbe_dmamap_cb,
2097                             &dma->dma_paddr,
2098                             mapflags | BUS_DMA_NOWAIT);
2099         if (r != 0) {
2100                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2101                        "error %u\n", r);
2102                 goto fail_2;
2103         }
2104         dma->dma_size = size;
2105         return (0);
2106 fail_2:
2107         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2108 fail_1:
2109         bus_dma_tag_destroy(dma->dma_tag);
2110 fail_0:
2111         dma->dma_tag = NULL;
2112         return (r);
2113 }
2114
2115 void
2116 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2117 {
2118         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2119             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2120         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2121         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2122         bus_dma_tag_destroy(dma->dma_tag);
2123 }
2124
2125
2126 /*********************************************************************
2127  *
2128  *  Allocate memory for the transmit and receive rings, and then
2129  *  the descriptors associated with each, called only once at attach.
2130  *
2131  **********************************************************************/
2132 int
2133 ixgbe_allocate_queues(struct adapter *adapter)
2134 {
2135         device_t        dev = adapter->dev;
2136         struct ix_queue *que;
2137         struct tx_ring  *txr;
2138         struct rx_ring  *rxr;
2139         int rsize, tsize, error = IXGBE_SUCCESS;
2140         int txconf = 0, rxconf = 0;
2141 #ifdef PCI_IOV
2142         enum ixgbe_iov_mode iov_mode;
2143 #endif
2144
2145         /* First allocate the top level queue structs */
2146         if (!(adapter->queues =
2147             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2148             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2149                 device_printf(dev, "Unable to allocate queue memory\n");
2150                 error = ENOMEM;
2151                 goto fail;
2152         }
2153
2154         /* First allocate the TX ring struct memory */
2155         if (!(adapter->tx_rings =
2156             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2157             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2158                 device_printf(dev, "Unable to allocate TX ring memory\n");
2159                 error = ENOMEM;
2160                 goto tx_fail;
2161         }
2162
2163         /* Next allocate the RX */
2164         if (!(adapter->rx_rings =
2165             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2166             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2167                 device_printf(dev, "Unable to allocate RX ring memory\n");
2168                 error = ENOMEM;
2169                 goto rx_fail;
2170         }
2171
2172         /* For the ring itself */
2173         tsize = roundup2(adapter->num_tx_desc *
2174             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2175
2176 #ifdef PCI_IOV
2177         iov_mode = ixgbe_get_iov_mode(adapter);
2178         adapter->pool = ixgbe_max_vfs(iov_mode);
2179 #else
2180         adapter->pool = 0;
2181 #endif
2182         /*
2183          * Now set up the TX queues, txconf is needed to handle the
2184          * possibility that things fail midcourse and we need to
2185          * undo memory gracefully
2186          */ 
2187         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2188                 /* Set up some basics */
2189                 txr = &adapter->tx_rings[i];
2190                 txr->adapter = adapter;
2191 #ifdef PCI_IOV
2192                 txr->me = ixgbe_pf_que_index(iov_mode, i);
2193 #else
2194                 txr->me = i;
2195 #endif
2196                 txr->num_desc = adapter->num_tx_desc;
2197
2198                 /* Initialize the TX side lock */
2199                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2200                     device_get_nameunit(dev), txr->me);
2201                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2202
2203                 if (ixgbe_dma_malloc(adapter, tsize,
2204                         &txr->txdma, BUS_DMA_NOWAIT)) {
2205                         device_printf(dev,
2206                             "Unable to allocate TX Descriptor memory\n");
2207                         error = ENOMEM;
2208                         goto err_tx_desc;
2209                 }
2210                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2211                 bzero((void *)txr->tx_base, tsize);
2212
2213                 /* Now allocate transmit buffers for the ring */
2214                 if (ixgbe_allocate_transmit_buffers(txr)) {
2215                         device_printf(dev,
2216                             "Critical Failure setting up transmit buffers\n");
2217                         error = ENOMEM;
2218                         goto err_tx_desc;
2219                 }
2220 #ifndef IXGBE_LEGACY_TX
2221                 /* Allocate a buf ring */
2222                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2223                     M_WAITOK, &txr->tx_mtx);
2224                 if (txr->br == NULL) {
2225                         device_printf(dev,
2226                             "Critical Failure setting up buf ring\n");
2227                         error = ENOMEM;
2228                         goto err_tx_desc;
2229                 }
2230 #endif
2231         }
2232
2233         /*
2234          * Next the RX queues...
2235          */ 
2236         rsize = roundup2(adapter->num_rx_desc *
2237             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2238         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2239                 rxr = &adapter->rx_rings[i];
2240                 /* Set up some basics */
2241                 rxr->adapter = adapter;
2242 #ifdef PCI_IOV
2243                 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2244 #else
2245                 rxr->me = i;
2246 #endif
2247                 rxr->num_desc = adapter->num_rx_desc;
2248
2249                 /* Initialize the RX side lock */
2250                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2251                     device_get_nameunit(dev), rxr->me);
2252                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2253
2254                 if (ixgbe_dma_malloc(adapter, rsize,
2255                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2256                         device_printf(dev,
2257                             "Unable to allocate RxDescriptor memory\n");
2258                         error = ENOMEM;
2259                         goto err_rx_desc;
2260                 }
2261                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2262                 bzero((void *)rxr->rx_base, rsize);
2263
2264                 /* Allocate receive buffers for the ring*/
2265                 if (ixgbe_allocate_receive_buffers(rxr)) {
2266                         device_printf(dev,
2267                             "Critical Failure setting up receive buffers\n");
2268                         error = ENOMEM;
2269                         goto err_rx_desc;
2270                 }
2271         }
2272
2273         /*
2274         ** Finally set up the queue holding structs
2275         */
2276         for (int i = 0; i < adapter->num_queues; i++) {
2277                 que = &adapter->queues[i];
2278                 que->adapter = adapter;
2279                 que->me = i;
2280                 que->txr = &adapter->tx_rings[i];
2281                 que->rxr = &adapter->rx_rings[i];
2282         }
2283
2284         return (0);
2285
2286 err_rx_desc:
2287         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2288                 ixgbe_dma_free(adapter, &rxr->rxdma);
2289 err_tx_desc:
2290         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2291                 ixgbe_dma_free(adapter, &txr->txdma);
2292         free(adapter->rx_rings, M_DEVBUF);
2293 rx_fail:
2294         free(adapter->tx_rings, M_DEVBUF);
2295 tx_fail:
2296         free(adapter->queues, M_DEVBUF);
2297 fail:
2298         return (error);
2299 }