]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixgbe/ix_txrx.c
ixgbe(4): Update to version 3.1.13-k
[FreeBSD/FreeBSD.git] / sys / dev / ixgbe / ix_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41
42 #include "ixgbe.h"
43
44 #ifdef  RSS
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
47 #endif
48
49 #ifdef DEV_NETMAP
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
53
54 extern int ix_crcstrip;
55 #endif
56
57 /*
58 ** HW RSC control:
59 **  this feature only works with
60 **  IPv4, and only on 82599 and later.
61 **  Also this will cause IP forwarding to
62 **  fail and that can't be controlled by
63 **  the stack as LRO can. For all these
64 **  reasons I've deemed it best to leave
65 **  this off and not bother with a tuneable
66 **  interface, this would need to be compiled
67 **  to enable.
68 */
69 static bool ixgbe_rsc_enable = FALSE;
70
71 #ifdef IXGBE_FDIR
72 /*
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
77 **
78 ** This feature can be disabled by
79 ** setting this to 0.
80 */
81 static int atr_sample_rate = 20;
82 #endif
83
84 /*********************************************************************
85  *  Local Function prototypes
86  *********************************************************************/
87 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
88 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
89 static int      ixgbe_setup_receive_ring(struct rx_ring *);
90 static void     ixgbe_free_receive_buffers(struct rx_ring *);
91
92 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
93 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
94 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
95 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
96                     struct mbuf *, u32 *, u32 *);
97 static int      ixgbe_tso_setup(struct tx_ring *,
98                     struct mbuf *, u32 *, u32 *);
99 #ifdef IXGBE_FDIR
100 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
101 #endif
102 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
103 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
104                     struct mbuf *, u32);
105
106 #ifdef IXGBE_LEGACY_TX
107 /*********************************************************************
108  *  Transmit entry point
109  *
110  *  ixgbe_start is called by the stack to initiate a transmit.
111  *  The driver will remain in this routine as long as there are
112  *  packets to transmit and transmit resources are available.
113  *  In case resources are not available stack is notified and
114  *  the packet is requeued.
115  **********************************************************************/
116
117 void
118 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
119 {
120         struct mbuf    *m_head;
121         struct adapter *adapter = txr->adapter;
122
123         IXGBE_TX_LOCK_ASSERT(txr);
124
125         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
126                 return;
127         if (!adapter->link_active)
128                 return;
129
130         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
131                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
132                         break;
133
134                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
135                 if (m_head == NULL)
136                         break;
137
138                 if (ixgbe_xmit(txr, &m_head)) {
139                         if (m_head != NULL)
140                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
141                         break;
142                 }
143                 /* Send a copy of the frame to the BPF listener */
144                 ETHER_BPF_MTAP(ifp, m_head);
145         }
146         return;
147 }
148
149 /*
150  * Legacy TX start - called by the stack, this
151  * always uses the first tx ring, and should
152  * not be used with multiqueue tx enabled.
153  */
154 void
155 ixgbe_start(struct ifnet *ifp)
156 {
157         struct adapter *adapter = ifp->if_softc;
158         struct tx_ring  *txr = adapter->tx_rings;
159
160         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
161                 IXGBE_TX_LOCK(txr);
162                 ixgbe_start_locked(txr, ifp);
163                 IXGBE_TX_UNLOCK(txr);
164         }
165         return;
166 }
167
168 #else /* ! IXGBE_LEGACY_TX */
169
170 /*
171 ** Multiqueue Transmit Entry Point
172 ** (if_transmit function)
173 */
174 int
175 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
176 {
177         struct adapter  *adapter = ifp->if_softc;
178         struct ix_queue *que;
179         struct tx_ring  *txr;
180         int             i, err = 0;
181 #ifdef  RSS
182         uint32_t bucket_id;
183 #endif
184
185         /*
186          * When doing RSS, map it to the same outbound queue
187          * as the incoming flow would be mapped to.
188          *
189          * If everything is setup correctly, it should be the
190          * same bucket that the current CPU we're on is.
191          */
192         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
193 #ifdef  RSS
194                 if (rss_hash2bucket(m->m_pkthdr.flowid,
195                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
196                         i = bucket_id % adapter->num_queues;
197 #ifdef IXGBE_DEBUG
198                         if (bucket_id > adapter->num_queues)
199                                 if_printf(ifp, "bucket_id (%d) > num_queues "
200                                     "(%d)\n", bucket_id, adapter->num_queues);
201 #endif
202                 } else 
203 #endif
204                         i = m->m_pkthdr.flowid % adapter->num_queues;
205         } else
206                 i = curcpu % adapter->num_queues;
207
208         /* Check for a hung queue and pick alternative */
209         if (((1 << i) & adapter->active_queues) == 0)
210                 i = ffsl(adapter->active_queues);
211
212         txr = &adapter->tx_rings[i];
213         que = &adapter->queues[i];
214
215         err = drbr_enqueue(ifp, txr->br, m);
216         if (err)
217                 return (err);
218         if (IXGBE_TX_TRYLOCK(txr)) {
219                 ixgbe_mq_start_locked(ifp, txr);
220                 IXGBE_TX_UNLOCK(txr);
221         } else
222                 taskqueue_enqueue(que->tq, &txr->txq_task);
223
224         return (0);
225 }
226
227 int
228 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
229 {
230         struct adapter  *adapter = txr->adapter;
231         struct mbuf     *next;
232         int             enqueued = 0, err = 0;
233
234         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
235             adapter->link_active == 0)
236                 return (ENETDOWN);
237
238         /* Process the queue */
239 #if __FreeBSD_version < 901504
240         next = drbr_dequeue(ifp, txr->br);
241         while (next != NULL) {
242                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
243                         if (next != NULL)
244                                 err = drbr_enqueue(ifp, txr->br, next);
245 #else
246         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
247                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
248                         if (next == NULL) {
249                                 drbr_advance(ifp, txr->br);
250                         } else {
251                                 drbr_putback(ifp, txr->br, next);
252                         }
253 #endif
254                         break;
255                 }
256 #if __FreeBSD_version >= 901504
257                 drbr_advance(ifp, txr->br);
258 #endif
259                 enqueued++;
260 #if 0 // this is VF-only
261 #if __FreeBSD_version >= 1100036
262                 /*
263                  * Since we're looking at the tx ring, we can check
264                  * to see if we're a VF by examing our tail register
265                  * address.
266                  */
267                 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
268                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
269 #endif
270 #endif
271                 /* Send a copy of the frame to the BPF listener */
272                 ETHER_BPF_MTAP(ifp, next);
273                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
274                         break;
275 #if __FreeBSD_version < 901504
276                 next = drbr_dequeue(ifp, txr->br);
277 #endif
278         }
279
280         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
281                 ixgbe_txeof(txr);
282
283         return (err);
284 }
285
286 /*
287  * Called from a taskqueue to drain queued transmit packets.
288  */
289 void
290 ixgbe_deferred_mq_start(void *arg, int pending)
291 {
292         struct tx_ring *txr = arg;
293         struct adapter *adapter = txr->adapter;
294         struct ifnet *ifp = adapter->ifp;
295
296         IXGBE_TX_LOCK(txr);
297         if (!drbr_empty(ifp, txr->br))
298                 ixgbe_mq_start_locked(ifp, txr);
299         IXGBE_TX_UNLOCK(txr);
300 }
301
302 /*
303  * Flush all ring buffers
304  */
305 void
306 ixgbe_qflush(struct ifnet *ifp)
307 {
308         struct adapter  *adapter = ifp->if_softc;
309         struct tx_ring  *txr = adapter->tx_rings;
310         struct mbuf     *m;
311
312         for (int i = 0; i < adapter->num_queues; i++, txr++) {
313                 IXGBE_TX_LOCK(txr);
314                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
315                         m_freem(m);
316                 IXGBE_TX_UNLOCK(txr);
317         }
318         if_qflush(ifp);
319 }
320 #endif /* IXGBE_LEGACY_TX */
321
322
323 /*********************************************************************
324  *
325  *  This routine maps the mbufs to tx descriptors, allowing the
326  *  TX engine to transmit the packets. 
327  *      - return 0 on success, positive on failure
328  *
329  **********************************************************************/
330
331 static int
332 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
333 {
334         struct adapter  *adapter = txr->adapter;
335         u32             olinfo_status = 0, cmd_type_len;
336         int             i, j, error, nsegs;
337         int             first;
338         bool            remap = TRUE;
339         struct mbuf     *m_head;
340         bus_dma_segment_t segs[adapter->num_segs];
341         bus_dmamap_t    map;
342         struct ixgbe_tx_buf *txbuf;
343         union ixgbe_adv_tx_desc *txd = NULL;
344
345         m_head = *m_headp;
346
347         /* Basic descriptor defines */
348         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
349             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
350
351         if (m_head->m_flags & M_VLANTAG)
352                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
353
354         /*
355          * Important to capture the first descriptor
356          * used because it will contain the index of
357          * the one we tell the hardware to report back
358          */
359         first = txr->next_avail_desc;
360         txbuf = &txr->tx_buffers[first];
361         map = txbuf->map;
362
363         /*
364          * Map the packet for DMA.
365          */
366 retry:
367         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
368             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
369
370         if (__predict_false(error)) {
371                 struct mbuf *m;
372
373                 switch (error) {
374                 case EFBIG:
375                         /* Try it again? - one try */
376                         if (remap == TRUE) {
377                                 remap = FALSE;
378                                 /*
379                                  * XXX: m_defrag will choke on
380                                  * non-MCLBYTES-sized clusters
381                                  */
382                                 m = m_defrag(*m_headp, M_NOWAIT);
383                                 if (m == NULL) {
384                                         adapter->mbuf_defrag_failed++;
385                                         m_freem(*m_headp);
386                                         *m_headp = NULL;
387                                         return (ENOBUFS);
388                                 }
389                                 *m_headp = m;
390                                 goto retry;
391                         } else
392                                 return (error);
393                 case ENOMEM:
394                         txr->no_tx_dma_setup++;
395                         return (error);
396                 default:
397                         txr->no_tx_dma_setup++;
398                         m_freem(*m_headp);
399                         *m_headp = NULL;
400                         return (error);
401                 }
402         }
403
404         /* Make certain there are enough descriptors */
405         if (nsegs > txr->tx_avail - 2) {
406                 txr->no_desc_avail++;
407                 bus_dmamap_unload(txr->txtag, map);
408                 return (ENOBUFS);
409         }
410         m_head = *m_headp;
411
412         /*
413          * Set up the appropriate offload context
414          * this will consume the first descriptor
415          */
416         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
417         if (__predict_false(error)) {
418                 if (error == ENOBUFS)
419                         *m_headp = NULL;
420                 return (error);
421         }
422
423 #ifdef IXGBE_FDIR
424         /* Do the flow director magic */
425         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
426                 ++txr->atr_count;
427                 if (txr->atr_count >= atr_sample_rate) {
428                         ixgbe_atr(txr, m_head);
429                         txr->atr_count = 0;
430                 }
431         }
432 #endif
433
434         olinfo_status |= IXGBE_ADVTXD_CC;
435         i = txr->next_avail_desc;
436         for (j = 0; j < nsegs; j++) {
437                 bus_size_t seglen;
438                 bus_addr_t segaddr;
439
440                 txbuf = &txr->tx_buffers[i];
441                 txd = &txr->tx_base[i];
442                 seglen = segs[j].ds_len;
443                 segaddr = htole64(segs[j].ds_addr);
444
445                 txd->read.buffer_addr = segaddr;
446                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
447                     cmd_type_len |seglen);
448                 txd->read.olinfo_status = htole32(olinfo_status);
449
450                 if (++i == txr->num_desc)
451                         i = 0;
452         }
453
454         txd->read.cmd_type_len |=
455             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456         txr->tx_avail -= nsegs;
457         txr->next_avail_desc = i;
458
459         txbuf->m_head = m_head;
460         /*
461          * Here we swap the map so the last descriptor,
462          * which gets the completion interrupt has the
463          * real map, and the first descriptor gets the
464          * unused map from this descriptor.
465          */
466         txr->tx_buffers[first].map = txbuf->map;
467         txbuf->map = map;
468         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
469
470         /* Set the EOP descriptor that will be marked done */
471         txbuf = &txr->tx_buffers[first];
472         txbuf->eop = txd;
473
474         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
475             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
476         /*
477          * Advance the Transmit Descriptor Tail (Tdt), this tells the
478          * hardware that this frame is available to transmit.
479          */
480         ++txr->total_packets;
481         IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
482
483         /* Mark queue as having work */
484         if (txr->busy == 0)
485                 txr->busy = 1;
486
487         return (0);
488 }
489
490
491 /*********************************************************************
492  *
493  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
494  *  the information needed to transmit a packet on the wire. This is
495  *  called only once at attach, setup is done every reset.
496  *
497  **********************************************************************/
498 int
499 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
500 {
501         struct adapter *adapter = txr->adapter;
502         device_t dev = adapter->dev;
503         struct ixgbe_tx_buf *txbuf;
504         int error, i;
505
506         /*
507          * Setup DMA descriptor areas.
508          */
509         if ((error = bus_dma_tag_create(
510                                bus_get_dma_tag(adapter->dev),   /* parent */
511                                1, 0,            /* alignment, bounds */
512                                BUS_SPACE_MAXADDR,       /* lowaddr */
513                                BUS_SPACE_MAXADDR,       /* highaddr */
514                                NULL, NULL,              /* filter, filterarg */
515                                IXGBE_TSO_SIZE,          /* maxsize */
516                                adapter->num_segs,       /* nsegments */
517                                PAGE_SIZE,               /* maxsegsize */
518                                0,                       /* flags */
519                                NULL,                    /* lockfunc */
520                                NULL,                    /* lockfuncarg */
521                                &txr->txtag))) {
522                 device_printf(dev,"Unable to allocate TX DMA tag\n");
523                 goto fail;
524         }
525
526         if (!(txr->tx_buffers =
527             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
530                 error = ENOMEM;
531                 goto fail;
532         }
533
534         /* Create the descriptor buffer dma maps */
535         txbuf = txr->tx_buffers;
536         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
538                 if (error != 0) {
539                         device_printf(dev, "Unable to create TX DMA map\n");
540                         goto fail;
541                 }
542         }
543
544         return 0;
545 fail:
546         /* We free all, it handles case where we are in the middle */
547         ixgbe_free_transmit_structures(adapter);
548         return (error);
549 }
550
551 /*********************************************************************
552  *
553  *  Initialize a transmit ring.
554  *
555  **********************************************************************/
556 static void
557 ixgbe_setup_transmit_ring(struct tx_ring *txr)
558 {
559         struct adapter *adapter = txr->adapter;
560         struct ixgbe_tx_buf *txbuf;
561 #ifdef DEV_NETMAP
562         struct netmap_adapter *na = NA(adapter->ifp);
563         struct netmap_slot *slot;
564 #endif /* DEV_NETMAP */
565
566         /* Clear the old ring contents */
567         IXGBE_TX_LOCK(txr);
568 #ifdef DEV_NETMAP
569         /*
570          * (under lock): if in netmap mode, do some consistency
571          * checks and set slot to entry 0 of the netmap ring.
572          */
573         slot = netmap_reset(na, NR_TX, txr->me, 0);
574 #endif /* DEV_NETMAP */
575         bzero((void *)txr->tx_base,
576               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
577         /* Reset indices */
578         txr->next_avail_desc = 0;
579         txr->next_to_clean = 0;
580
581         /* Free any existing tx buffers. */
582         txbuf = txr->tx_buffers;
583         for (int i = 0; i < txr->num_desc; i++, txbuf++) {
584                 if (txbuf->m_head != NULL) {
585                         bus_dmamap_sync(txr->txtag, txbuf->map,
586                             BUS_DMASYNC_POSTWRITE);
587                         bus_dmamap_unload(txr->txtag, txbuf->map);
588                         m_freem(txbuf->m_head);
589                         txbuf->m_head = NULL;
590                 }
591 #ifdef DEV_NETMAP
592                 /*
593                  * In netmap mode, set the map for the packet buffer.
594                  * NOTE: Some drivers (not this one) also need to set
595                  * the physical buffer address in the NIC ring.
596                  * Slots in the netmap ring (indexed by "si") are
597                  * kring->nkr_hwofs positions "ahead" wrt the
598                  * corresponding slot in the NIC ring. In some drivers
599                  * (not here) nkr_hwofs can be negative. Function
600                  * netmap_idx_n2k() handles wraparounds properly.
601                  */
602                 if (slot) {
603                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
604                         netmap_load_map(na, txr->txtag,
605                             txbuf->map, NMB(na, slot + si));
606                 }
607 #endif /* DEV_NETMAP */
608                 /* Clear the EOP descriptor pointer */
609                 txbuf->eop = NULL;
610         }
611
612 #ifdef IXGBE_FDIR
613         /* Set the rate at which we sample packets */
614         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615                 txr->atr_sample = atr_sample_rate;
616 #endif
617
618         /* Set number of descriptors available */
619         txr->tx_avail = adapter->num_tx_desc;
620
621         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623         IXGBE_TX_UNLOCK(txr);
624 }
625
626 /*********************************************************************
627  *
628  *  Initialize all transmit rings.
629  *
630  **********************************************************************/
631 int
632 ixgbe_setup_transmit_structures(struct adapter *adapter)
633 {
634         struct tx_ring *txr = adapter->tx_rings;
635
636         for (int i = 0; i < adapter->num_queues; i++, txr++)
637                 ixgbe_setup_transmit_ring(txr);
638
639         return (0);
640 }
641
642 /*********************************************************************
643  *
644  *  Free all transmit rings.
645  *
646  **********************************************************************/
647 void
648 ixgbe_free_transmit_structures(struct adapter *adapter)
649 {
650         struct tx_ring *txr = adapter->tx_rings;
651
652         for (int i = 0; i < adapter->num_queues; i++, txr++) {
653                 IXGBE_TX_LOCK(txr);
654                 ixgbe_free_transmit_buffers(txr);
655                 ixgbe_dma_free(adapter, &txr->txdma);
656                 IXGBE_TX_UNLOCK(txr);
657                 IXGBE_TX_LOCK_DESTROY(txr);
658         }
659         free(adapter->tx_rings, M_DEVBUF);
660 }
661
662 /*********************************************************************
663  *
664  *  Free transmit ring related data structures.
665  *
666  **********************************************************************/
667 static void
668 ixgbe_free_transmit_buffers(struct tx_ring *txr)
669 {
670         struct adapter *adapter = txr->adapter;
671         struct ixgbe_tx_buf *tx_buffer;
672         int             i;
673
674         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
675
676         if (txr->tx_buffers == NULL)
677                 return;
678
679         tx_buffer = txr->tx_buffers;
680         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681                 if (tx_buffer->m_head != NULL) {
682                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
683                             BUS_DMASYNC_POSTWRITE);
684                         bus_dmamap_unload(txr->txtag,
685                             tx_buffer->map);
686                         m_freem(tx_buffer->m_head);
687                         tx_buffer->m_head = NULL;
688                         if (tx_buffer->map != NULL) {
689                                 bus_dmamap_destroy(txr->txtag,
690                                     tx_buffer->map);
691                                 tx_buffer->map = NULL;
692                         }
693                 } else if (tx_buffer->map != NULL) {
694                         bus_dmamap_unload(txr->txtag,
695                             tx_buffer->map);
696                         bus_dmamap_destroy(txr->txtag,
697                             tx_buffer->map);
698                         tx_buffer->map = NULL;
699                 }
700         }
701 #ifdef IXGBE_LEGACY_TX
702         if (txr->br != NULL)
703                 buf_ring_free(txr->br, M_DEVBUF);
704 #endif
705         if (txr->tx_buffers != NULL) {
706                 free(txr->tx_buffers, M_DEVBUF);
707                 txr->tx_buffers = NULL;
708         }
709         if (txr->txtag != NULL) {
710                 bus_dma_tag_destroy(txr->txtag);
711                 txr->txtag = NULL;
712         }
713         return;
714 }
715
716 /*********************************************************************
717  *
718  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
719  *
720  **********************************************************************/
721
722 static int
723 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724     u32 *cmd_type_len, u32 *olinfo_status)
725 {
726         struct adapter *adapter = txr->adapter;
727         struct ixgbe_adv_tx_context_desc *TXD;
728         struct ether_vlan_header *eh;
729 #ifdef INET
730         struct ip *ip;
731 #endif
732 #ifdef INET6
733         struct ip6_hdr *ip6;
734 #endif
735         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
736         int     ehdrlen, ip_hlen = 0;
737         u16     etype;
738         u8      ipproto = 0;
739         int     offload = TRUE;
740         int     ctxd = txr->next_avail_desc;
741         u16     vtag = 0;
742         caddr_t l3d;
743
744
745         /* First check if TSO is to be used */
746         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO|CSUM_IP6_TSO))
747                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
748
749         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
750                 offload = FALSE;
751
752         /* Indicate the whole packet as payload when not doing TSO */
753         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
754
755         /* Now ready a context descriptor */
756         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
757
758         /*
759         ** In advanced descriptors the vlan tag must 
760         ** be placed into the context descriptor. Hence
761         ** we need to make one even if not doing offloads.
762         */
763         if (mp->m_flags & M_VLANTAG) {
764                 vtag = htole16(mp->m_pkthdr.ether_vtag);
765                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
766         } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
767                 return (0);
768
769         /*
770          * Determine where frame payload starts.
771          * Jump over vlan headers if already present,
772          * helpful for QinQ too.
773          */
774         eh = mtod(mp, struct ether_vlan_header *);
775         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
776                 etype = ntohs(eh->evl_proto);
777                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
778         } else {
779                 etype = ntohs(eh->evl_encap_proto);
780                 ehdrlen = ETHER_HDR_LEN;
781         }
782
783         /* Set the ether header length */
784         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
785
786         if (offload == FALSE)
787                 goto no_offloads;
788
789         /*
790          * If the first mbuf only includes the ethernet header, jump to the next one
791          * XXX: This assumes the stack splits mbufs containing headers on header boundaries
792          * XXX: And assumes the entire IP header is contained in one mbuf
793          */
794         if (mp->m_len == ehdrlen && mp->m_next)
795                 l3d = mtod(mp->m_next, caddr_t);
796         else
797                 l3d = mtod(mp, caddr_t) + ehdrlen;
798
799         switch (etype) {
800                 case ETHERTYPE_IP:
801                         ip = (struct ip *)(l3d);
802                         ip_hlen = ip->ip_hl << 2;
803                         ipproto = ip->ip_p;
804                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
805                         /* Insert IPv4 checksum into data descriptors */
806                         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
807                                 ip->ip_sum = 0;
808                                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
809                         }
810                         break;
811                 case ETHERTYPE_IPV6:
812                         ip6 = (struct ip6_hdr *)(l3d);
813                         ip_hlen = sizeof(struct ip6_hdr);
814                         ipproto = ip6->ip6_nxt;
815                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
816                         break;
817                 default:
818                         offload = FALSE;
819                         break;
820         }
821
822         vlan_macip_lens |= ip_hlen;
823
824         /* No support for offloads for non-L4 next headers */
825         switch (ipproto) {
826                 case IPPROTO_TCP:
827                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
828                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
829                         else
830                                 offload = false;
831                         break;
832                 case IPPROTO_UDP:
833                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
834                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
835                         else
836                                 offload = false;
837                         break;
838                 case IPPROTO_SCTP:
839                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
840                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
841                         else
842                                 offload = false;
843                         break;
844                 default:
845                         offload = false;
846                         break;
847         }
848
849         if (offload) /* Insert L4 checksum into data descriptors */
850                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
851
852 no_offloads:
853         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
854
855         /* Now copy bits into descriptor */
856         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
857         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
858         TXD->seqnum_seed = htole32(0);
859         TXD->mss_l4len_idx = htole32(0);
860
861         /* We've consumed the first desc, adjust counters */
862         if (++ctxd == txr->num_desc)
863                 ctxd = 0;
864         txr->next_avail_desc = ctxd;
865         --txr->tx_avail;
866
867         return (0);
868 }
869
870 /**********************************************************************
871  *
872  *  Setup work for hardware segmentation offload (TSO) on
873  *  adapters using advanced tx descriptors
874  *
875  **********************************************************************/
876 static int
877 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
878     u32 *cmd_type_len, u32 *olinfo_status)
879 {
880         struct ixgbe_adv_tx_context_desc *TXD;
881         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
882         u32 mss_l4len_idx = 0, paylen;
883         u16 vtag = 0, eh_type;
884         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
885         struct ether_vlan_header *eh;
886 #ifdef INET6
887         struct ip6_hdr *ip6;
888 #endif
889 #ifdef INET
890         struct ip *ip;
891 #endif
892         struct tcphdr *th;
893
894         /*
895          * Determine where frame payload starts.
896          * Jump over vlan headers if already present
897          */
898         eh = mtod(mp, struct ether_vlan_header *);
899         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
900                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
901                 eh_type = eh->evl_proto;
902         } else {
903                 ehdrlen = ETHER_HDR_LEN;
904                 eh_type = eh->evl_encap_proto;
905         }
906
907         switch (ntohs(eh_type)) {
908 #ifdef INET6
909         case ETHERTYPE_IPV6:
910                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
911                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
912                 if (ip6->ip6_nxt != IPPROTO_TCP)
913                         return (ENXIO);
914                 ip_hlen = sizeof(struct ip6_hdr);
915                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
916                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
917                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
918                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
919                 break;
920 #endif
921 #ifdef INET
922         case ETHERTYPE_IP:
923                 ip = (struct ip *)(mp->m_data + ehdrlen);
924                 if (ip->ip_p != IPPROTO_TCP)
925                         return (ENXIO);
926                 ip->ip_sum = 0;
927                 ip_hlen = ip->ip_hl << 2;
928                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
929                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
930                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
931                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
932                 /* Tell transmit desc to also do IPv4 checksum. */
933                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
934                 break;
935 #endif
936         default:
937                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
938                     __func__, ntohs(eh_type));
939                 break;
940         }
941
942         ctxd = txr->next_avail_desc;
943         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
944
945         tcp_hlen = th->th_off << 2;
946
947         /* This is used in the transmit desc in encap */
948         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
949
950         /* VLAN MACLEN IPLEN */
951         if (mp->m_flags & M_VLANTAG) {
952                 vtag = htole16(mp->m_pkthdr.ether_vtag);
953                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
954         }
955
956         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
957         vlan_macip_lens |= ip_hlen;
958         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
959
960         /* ADV DTYPE TUCMD */
961         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
962         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
963         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
964
965         /* MSS L4LEN IDX */
966         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
967         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
968         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
969
970         TXD->seqnum_seed = htole32(0);
971
972         if (++ctxd == txr->num_desc)
973                 ctxd = 0;
974
975         txr->tx_avail--;
976         txr->next_avail_desc = ctxd;
977         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
978         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
979         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
980         ++txr->tso_tx;
981         return (0);
982 }
983
984
985 /**********************************************************************
986  *
987  *  Examine each tx_buffer in the used queue. If the hardware is done
988  *  processing the packet then free associated resources. The
989  *  tx_buffer is put back on the free queue.
990  *
991  **********************************************************************/
992 void
993 ixgbe_txeof(struct tx_ring *txr)
994 {
995         struct adapter          *adapter = txr->adapter;
996 #ifdef DEV_NETMAP
997         struct ifnet            *ifp = adapter->ifp;
998 #endif
999         u32                     work, processed = 0;
1000         u32                     limit = adapter->tx_process_limit;
1001         struct ixgbe_tx_buf     *buf;
1002         union ixgbe_adv_tx_desc *txd;
1003
1004         mtx_assert(&txr->tx_mtx, MA_OWNED);
1005
1006 #ifdef DEV_NETMAP
1007         if (ifp->if_capenable & IFCAP_NETMAP) {
1008                 struct netmap_adapter *na = NA(ifp);
1009                 struct netmap_kring *kring = &na->tx_rings[txr->me];
1010                 txd = txr->tx_base;
1011                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1012                     BUS_DMASYNC_POSTREAD);
1013                 /*
1014                  * In netmap mode, all the work is done in the context
1015                  * of the client thread. Interrupt handlers only wake up
1016                  * clients, which may be sleeping on individual rings
1017                  * or on a global resource for all rings.
1018                  * To implement tx interrupt mitigation, we wake up the client
1019                  * thread roughly every half ring, even if the NIC interrupts
1020                  * more frequently. This is implemented as follows:
1021                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
1022                  *   the slot that should wake up the thread (nkr_num_slots
1023                  *   means the user thread should not be woken up);
1024                  * - the driver ignores tx interrupts unless netmap_mitigate=0
1025                  *   or the slot has the DD bit set.
1026                  */
1027                 if (!netmap_mitigate ||
1028                     (kring->nr_kflags < kring->nkr_num_slots &&
1029                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1030                         netmap_tx_irq(ifp, txr->me);
1031                 }
1032                 return;
1033         }
1034 #endif /* DEV_NETMAP */
1035
1036         if (txr->tx_avail == txr->num_desc) {
1037                 txr->busy = 0;
1038                 return;
1039         }
1040
1041         /* Get work starting point */
1042         work = txr->next_to_clean;
1043         buf = &txr->tx_buffers[work];
1044         txd = &txr->tx_base[work];
1045         work -= txr->num_desc; /* The distance to ring end */
1046         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1047             BUS_DMASYNC_POSTREAD);
1048
1049         do {
1050                 union ixgbe_adv_tx_desc *eop = buf->eop;
1051                 if (eop == NULL) /* No work */
1052                         break;
1053
1054                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1055                         break;  /* I/O not complete */
1056
1057                 if (buf->m_head) {
1058                         txr->bytes +=
1059                             buf->m_head->m_pkthdr.len;
1060                         bus_dmamap_sync(txr->txtag,
1061                             buf->map,
1062                             BUS_DMASYNC_POSTWRITE);
1063                         bus_dmamap_unload(txr->txtag,
1064                             buf->map);
1065                         m_freem(buf->m_head);
1066                         buf->m_head = NULL;
1067                 }
1068                 buf->eop = NULL;
1069                 ++txr->tx_avail;
1070
1071                 /* We clean the range if multi segment */
1072                 while (txd != eop) {
1073                         ++txd;
1074                         ++buf;
1075                         ++work;
1076                         /* wrap the ring? */
1077                         if (__predict_false(!work)) {
1078                                 work -= txr->num_desc;
1079                                 buf = txr->tx_buffers;
1080                                 txd = txr->tx_base;
1081                         }
1082                         if (buf->m_head) {
1083                                 txr->bytes +=
1084                                     buf->m_head->m_pkthdr.len;
1085                                 bus_dmamap_sync(txr->txtag,
1086                                     buf->map,
1087                                     BUS_DMASYNC_POSTWRITE);
1088                                 bus_dmamap_unload(txr->txtag,
1089                                     buf->map);
1090                                 m_freem(buf->m_head);
1091                                 buf->m_head = NULL;
1092                         }
1093                         ++txr->tx_avail;
1094                         buf->eop = NULL;
1095
1096                 }
1097                 ++txr->packets;
1098                 ++processed;
1099
1100                 /* Try the next packet */
1101                 ++txd;
1102                 ++buf;
1103                 ++work;
1104                 /* reset with a wrap */
1105                 if (__predict_false(!work)) {
1106                         work -= txr->num_desc;
1107                         buf = txr->tx_buffers;
1108                         txd = txr->tx_base;
1109                 }
1110                 prefetch(txd);
1111         } while (__predict_true(--limit));
1112
1113         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1114             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1115
1116         work += txr->num_desc;
1117         txr->next_to_clean = work;
1118
1119         /*
1120         ** Queue Hang detection, we know there's
1121         ** work outstanding or the first return
1122         ** would have been taken, so increment busy
1123         ** if nothing managed to get cleaned, then
1124         ** in local_timer it will be checked and 
1125         ** marked as HUNG if it exceeds a MAX attempt.
1126         */
1127         if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1128                 ++txr->busy;
1129         /*
1130         ** If anything gets cleaned we reset state to 1,
1131         ** note this will turn off HUNG if its set.
1132         */
1133         if (processed)
1134                 txr->busy = 1;
1135
1136         if (txr->tx_avail == txr->num_desc)
1137                 txr->busy = 0;
1138
1139         return;
1140 }
1141
1142
1143 #ifdef IXGBE_FDIR
1144 /*
1145 ** This routine parses packet headers so that Flow
1146 ** Director can make a hashed filter table entry 
1147 ** allowing traffic flows to be identified and kept
1148 ** on the same cpu.  This would be a performance
1149 ** hit, but we only do it at IXGBE_FDIR_RATE of
1150 ** packets.
1151 */
1152 static void
1153 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1154 {
1155         struct adapter                  *adapter = txr->adapter;
1156         struct ix_queue                 *que;
1157         struct ip                       *ip;
1158         struct tcphdr                   *th;
1159         struct udphdr                   *uh;
1160         struct ether_vlan_header        *eh;
1161         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
1162         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
1163         int                             ehdrlen, ip_hlen;
1164         u16                             etype;
1165
1166         eh = mtod(mp, struct ether_vlan_header *);
1167         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1168                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1169                 etype = eh->evl_proto;
1170         } else {
1171                 ehdrlen = ETHER_HDR_LEN;
1172                 etype = eh->evl_encap_proto;
1173         }
1174
1175         /* Only handling IPv4 */
1176         if (etype != htons(ETHERTYPE_IP))
1177                 return;
1178
1179         ip = (struct ip *)(mp->m_data + ehdrlen);
1180         ip_hlen = ip->ip_hl << 2;
1181
1182         /* check if we're UDP or TCP */
1183         switch (ip->ip_p) {
1184         case IPPROTO_TCP:
1185                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1186                 /* src and dst are inverted */
1187                 common.port.dst ^= th->th_sport;
1188                 common.port.src ^= th->th_dport;
1189                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1190                 break;
1191         case IPPROTO_UDP:
1192                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1193                 /* src and dst are inverted */
1194                 common.port.dst ^= uh->uh_sport;
1195                 common.port.src ^= uh->uh_dport;
1196                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1197                 break;
1198         default:
1199                 return;
1200         }
1201
1202         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1203         if (mp->m_pkthdr.ether_vtag)
1204                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1205         else
1206                 common.flex_bytes ^= etype;
1207         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1208
1209         que = &adapter->queues[txr->me];
1210         /*
1211         ** This assumes the Rx queue and Tx
1212         ** queue are bound to the same CPU
1213         */
1214         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1215             input, common, que->msix);
1216 }
1217 #endif /* IXGBE_FDIR */
1218
1219 /*
1220 ** Used to detect a descriptor that has
1221 ** been merged by Hardware RSC.
1222 */
1223 static inline u32
1224 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1225 {
1226         return (le32toh(rx->wb.lower.lo_dword.data) &
1227             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1228 }
1229
1230 /*********************************************************************
1231  *
1232  *  Initialize Hardware RSC (LRO) feature on 82599
1233  *  for an RX ring, this is toggled by the LRO capability
1234  *  even though it is transparent to the stack.
1235  *
1236  *  NOTE: since this HW feature only works with IPV4 and 
1237  *        our testing has shown soft LRO to be as effective
1238  *        I have decided to disable this by default.
1239  *
1240  **********************************************************************/
1241 static void
1242 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1243 {
1244         struct  adapter         *adapter = rxr->adapter;
1245         struct  ixgbe_hw        *hw = &adapter->hw;
1246         u32                     rscctrl, rdrxctl;
1247
1248         /* If turning LRO/RSC off we need to disable it */
1249         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1250                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1251                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1252                 return;
1253         }
1254
1255         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1256         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1257 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1258         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1259 #endif /* DEV_NETMAP */
1260         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1261         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1262         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1263
1264         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1265         rscctrl |= IXGBE_RSCCTL_RSCEN;
1266         /*
1267         ** Limit the total number of descriptors that
1268         ** can be combined, so it does not exceed 64K
1269         */
1270         if (rxr->mbuf_sz == MCLBYTES)
1271                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1272         else if (rxr->mbuf_sz == MJUMPAGESIZE)
1273                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1274         else if (rxr->mbuf_sz == MJUM9BYTES)
1275                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1276         else  /* Using 16K cluster */
1277                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1278
1279         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1280
1281         /* Enable TCP header recognition */
1282         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1283             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1284             IXGBE_PSRTYPE_TCPHDR));
1285
1286         /* Disable RSC for ACK packets */
1287         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1288             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1289
1290         rxr->hw_rsc = TRUE;
1291 }
1292
1293 /*********************************************************************
1294  *
1295  *  Refresh mbuf buffers for RX descriptor rings
1296  *   - now keeps its own state so discards due to resource
1297  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1298  *     it just returns, keeping its placeholder, thus it can simply
1299  *     be recalled to try again.
1300  *
1301  **********************************************************************/
1302 static void
1303 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1304 {
1305         struct adapter          *adapter = rxr->adapter;
1306         bus_dma_segment_t       seg[1];
1307         struct ixgbe_rx_buf     *rxbuf;
1308         struct mbuf             *mp;
1309         int                     i, j, nsegs, error;
1310         bool                    refreshed = FALSE;
1311
1312         i = j = rxr->next_to_refresh;
1313         /* Control the loop with one beyond */
1314         if (++j == rxr->num_desc)
1315                 j = 0;
1316
1317         while (j != limit) {
1318                 rxbuf = &rxr->rx_buffers[i];
1319                 if (rxbuf->buf == NULL) {
1320                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1321                             M_PKTHDR, rxr->mbuf_sz);
1322                         if (mp == NULL)
1323                                 goto update;
1324                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1325                                 m_adj(mp, ETHER_ALIGN);
1326                 } else
1327                         mp = rxbuf->buf;
1328
1329                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1330
1331                 /* If we're dealing with an mbuf that was copied rather
1332                  * than replaced, there's no need to go through busdma.
1333                  */
1334                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1335                         /* Get the memory mapping */
1336                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1337                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1338                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1339                         if (error != 0) {
1340                                 printf("Refresh mbufs: payload dmamap load"
1341                                     " failure - %d\n", error);
1342                                 m_free(mp);
1343                                 rxbuf->buf = NULL;
1344                                 goto update;
1345                         }
1346                         rxbuf->buf = mp;
1347                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1348                             BUS_DMASYNC_PREREAD);
1349                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1350                             htole64(seg[0].ds_addr);
1351                 } else {
1352                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1353                         rxbuf->flags &= ~IXGBE_RX_COPY;
1354                 }
1355
1356                 refreshed = TRUE;
1357                 /* Next is precalculated */
1358                 i = j;
1359                 rxr->next_to_refresh = i;
1360                 if (++j == rxr->num_desc)
1361                         j = 0;
1362         }
1363 update:
1364         if (refreshed) /* Update hardware tail index */
1365                 IXGBE_WRITE_REG(&adapter->hw,
1366                     rxr->tail, rxr->next_to_refresh);
1367         return;
1368 }
1369
1370 /*********************************************************************
1371  *
1372  *  Allocate memory for rx_buffer structures. Since we use one
1373  *  rx_buffer per received packet, the maximum number of rx_buffer's
1374  *  that we'll need is equal to the number of receive descriptors
1375  *  that we've allocated.
1376  *
1377  **********************************************************************/
1378 int
1379 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1380 {
1381         struct  adapter         *adapter = rxr->adapter;
1382         device_t                dev = adapter->dev;
1383         struct ixgbe_rx_buf     *rxbuf;
1384         int                     bsize, error;
1385
1386         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1387         if (!(rxr->rx_buffers =
1388             (struct ixgbe_rx_buf *) malloc(bsize,
1389             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1390                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1391                 error = ENOMEM;
1392                 goto fail;
1393         }
1394
1395         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1396                                    1, 0,        /* alignment, bounds */
1397                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1398                                    BUS_SPACE_MAXADDR,   /* highaddr */
1399                                    NULL, NULL,          /* filter, filterarg */
1400                                    MJUM16BYTES,         /* maxsize */
1401                                    1,                   /* nsegments */
1402                                    MJUM16BYTES,         /* maxsegsize */
1403                                    0,                   /* flags */
1404                                    NULL,                /* lockfunc */
1405                                    NULL,                /* lockfuncarg */
1406                                    &rxr->ptag))) {
1407                 device_printf(dev, "Unable to create RX DMA tag\n");
1408                 goto fail;
1409         }
1410
1411         for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1412                 rxbuf = &rxr->rx_buffers[i];
1413                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1414                 if (error) {
1415                         device_printf(dev, "Unable to create RX dma map\n");
1416                         goto fail;
1417                 }
1418         }
1419
1420         return (0);
1421
1422 fail:
1423         /* Frees all, but can handle partial completion */
1424         ixgbe_free_receive_structures(adapter);
1425         return (error);
1426 }
1427
1428 static void     
1429 ixgbe_free_receive_ring(struct rx_ring *rxr)
1430
1431         struct ixgbe_rx_buf       *rxbuf;
1432
1433         for (int i = 0; i < rxr->num_desc; i++) {
1434                 rxbuf = &rxr->rx_buffers[i];
1435                 if (rxbuf->buf != NULL) {
1436                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1437                             BUS_DMASYNC_POSTREAD);
1438                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1439                         rxbuf->buf->m_flags |= M_PKTHDR;
1440                         m_freem(rxbuf->buf);
1441                         rxbuf->buf = NULL;
1442                         rxbuf->flags = 0;
1443                 }
1444         }
1445 }
1446
1447 /*********************************************************************
1448  *
1449  *  Initialize a receive ring and its buffers.
1450  *
1451  **********************************************************************/
1452 static int
1453 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1454 {
1455         struct  adapter         *adapter;
1456         struct ifnet            *ifp;
1457         device_t                dev;
1458         struct ixgbe_rx_buf     *rxbuf;
1459         bus_dma_segment_t       seg[1];
1460         struct lro_ctrl         *lro = &rxr->lro;
1461         int                     rsize, nsegs, error = 0;
1462 #ifdef DEV_NETMAP
1463         struct netmap_adapter *na = NA(rxr->adapter->ifp);
1464         struct netmap_slot *slot;
1465 #endif /* DEV_NETMAP */
1466
1467         adapter = rxr->adapter;
1468         ifp = adapter->ifp;
1469         dev = adapter->dev;
1470
1471         /* Clear the ring contents */
1472         IXGBE_RX_LOCK(rxr);
1473 #ifdef DEV_NETMAP
1474         /* same as in ixgbe_setup_transmit_ring() */
1475         slot = netmap_reset(na, NR_RX, rxr->me, 0);
1476 #endif /* DEV_NETMAP */
1477         rsize = roundup2(adapter->num_rx_desc *
1478             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1479         bzero((void *)rxr->rx_base, rsize);
1480         /* Cache the size */
1481         rxr->mbuf_sz = adapter->rx_mbuf_sz;
1482
1483         /* Free current RX buffer structs and their mbufs */
1484         ixgbe_free_receive_ring(rxr);
1485
1486         /* Now replenish the mbufs */
1487         for (int j = 0; j != rxr->num_desc; ++j) {
1488                 struct mbuf     *mp;
1489
1490                 rxbuf = &rxr->rx_buffers[j];
1491 #ifdef DEV_NETMAP
1492                 /*
1493                  * In netmap mode, fill the map and set the buffer
1494                  * address in the NIC ring, considering the offset
1495                  * between the netmap and NIC rings (see comment in
1496                  * ixgbe_setup_transmit_ring() ). No need to allocate
1497                  * an mbuf, so end the block with a continue;
1498                  */
1499                 if (slot) {
1500                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1501                         uint64_t paddr;
1502                         void *addr;
1503
1504                         addr = PNMB(na, slot + sj, &paddr);
1505                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1506                         /* Update descriptor and the cached value */
1507                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1508                         rxbuf->addr = htole64(paddr);
1509                         continue;
1510                 }
1511 #endif /* DEV_NETMAP */
1512                 rxbuf->flags = 0; 
1513                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1514                     M_PKTHDR, adapter->rx_mbuf_sz);
1515                 if (rxbuf->buf == NULL) {
1516                         error = ENOBUFS;
1517                         goto fail;
1518                 }
1519                 mp = rxbuf->buf;
1520                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1521                 /* Get the memory mapping */
1522                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1523                     rxbuf->pmap, mp, seg,
1524                     &nsegs, BUS_DMA_NOWAIT);
1525                 if (error != 0)
1526                         goto fail;
1527                 bus_dmamap_sync(rxr->ptag,
1528                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
1529                 /* Update the descriptor and the cached value */
1530                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1531                 rxbuf->addr = htole64(seg[0].ds_addr);
1532         }
1533
1534
1535         /* Setup our descriptor indices */
1536         rxr->next_to_check = 0;
1537         rxr->next_to_refresh = 0;
1538         rxr->lro_enabled = FALSE;
1539         rxr->rx_copies = 0;
1540         rxr->rx_bytes = 0;
1541         rxr->vtag_strip = FALSE;
1542
1543         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1544             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1545
1546         /*
1547         ** Now set up the LRO interface:
1548         */
1549         if (ixgbe_rsc_enable)
1550                 ixgbe_setup_hw_rsc(rxr);
1551         else if (ifp->if_capenable & IFCAP_LRO) {
1552                 int err = tcp_lro_init(lro);
1553                 if (err) {
1554                         device_printf(dev, "LRO Initialization failed!\n");
1555                         goto fail;
1556                 }
1557                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1558                 rxr->lro_enabled = TRUE;
1559                 lro->ifp = adapter->ifp;
1560         }
1561
1562         IXGBE_RX_UNLOCK(rxr);
1563         return (0);
1564
1565 fail:
1566         ixgbe_free_receive_ring(rxr);
1567         IXGBE_RX_UNLOCK(rxr);
1568         return (error);
1569 }
1570
1571 /*********************************************************************
1572  *
1573  *  Initialize all receive rings.
1574  *
1575  **********************************************************************/
1576 int
1577 ixgbe_setup_receive_structures(struct adapter *adapter)
1578 {
1579         struct rx_ring *rxr = adapter->rx_rings;
1580         int j;
1581
1582         for (j = 0; j < adapter->num_queues; j++, rxr++)
1583                 if (ixgbe_setup_receive_ring(rxr))
1584                         goto fail;
1585
1586         return (0);
1587 fail:
1588         /*
1589          * Free RX buffers allocated so far, we will only handle
1590          * the rings that completed, the failing case will have
1591          * cleaned up for itself. 'j' failed, so its the terminus.
1592          */
1593         for (int i = 0; i < j; ++i) {
1594                 rxr = &adapter->rx_rings[i];
1595                 ixgbe_free_receive_ring(rxr);
1596         }
1597
1598         return (ENOBUFS);
1599 }
1600
1601
1602 /*********************************************************************
1603  *
1604  *  Free all receive rings.
1605  *
1606  **********************************************************************/
1607 void
1608 ixgbe_free_receive_structures(struct adapter *adapter)
1609 {
1610         struct rx_ring *rxr = adapter->rx_rings;
1611
1612         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1613
1614         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1615                 struct lro_ctrl         *lro = &rxr->lro;
1616                 ixgbe_free_receive_buffers(rxr);
1617                 /* Free LRO memory */
1618                 tcp_lro_free(lro);
1619                 /* Free the ring memory as well */
1620                 ixgbe_dma_free(adapter, &rxr->rxdma);
1621         }
1622
1623         free(adapter->rx_rings, M_DEVBUF);
1624 }
1625
1626
1627 /*********************************************************************
1628  *
1629  *  Free receive ring data structures
1630  *
1631  **********************************************************************/
1632 void
1633 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1634 {
1635         struct adapter          *adapter = rxr->adapter;
1636         struct ixgbe_rx_buf     *rxbuf;
1637
1638         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1639
1640         /* Cleanup any existing buffers */
1641         if (rxr->rx_buffers != NULL) {
1642                 for (int i = 0; i < adapter->num_rx_desc; i++) {
1643                         rxbuf = &rxr->rx_buffers[i];
1644                         if (rxbuf->buf != NULL) {
1645                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1646                                     BUS_DMASYNC_POSTREAD);
1647                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1648                                 rxbuf->buf->m_flags |= M_PKTHDR;
1649                                 m_freem(rxbuf->buf);
1650                         }
1651                         rxbuf->buf = NULL;
1652                         if (rxbuf->pmap != NULL) {
1653                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1654                                 rxbuf->pmap = NULL;
1655                         }
1656                 }
1657                 if (rxr->rx_buffers != NULL) {
1658                         free(rxr->rx_buffers, M_DEVBUF);
1659                         rxr->rx_buffers = NULL;
1660                 }
1661         }
1662
1663         if (rxr->ptag != NULL) {
1664                 bus_dma_tag_destroy(rxr->ptag);
1665                 rxr->ptag = NULL;
1666         }
1667
1668         return;
1669 }
1670
1671 static __inline void
1672 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1673 {
1674                  
1675         /*
1676          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1677          * should be computed by hardware. Also it should not have VLAN tag in
1678          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1679          */
1680         if (rxr->lro_enabled &&
1681             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1682             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1683             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1684             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1685             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1686             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1687             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1688             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1689                 /*
1690                  * Send to the stack if:
1691                  **  - LRO not enabled, or
1692                  **  - no LRO resources, or
1693                  **  - lro enqueue fails
1694                  */
1695                 if (rxr->lro.lro_cnt != 0)
1696                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1697                                 return;
1698         }
1699         IXGBE_RX_UNLOCK(rxr);
1700         (*ifp->if_input)(ifp, m);
1701         IXGBE_RX_LOCK(rxr);
1702 }
1703
1704 static __inline void
1705 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1706 {
1707         struct ixgbe_rx_buf     *rbuf;
1708
1709         rbuf = &rxr->rx_buffers[i];
1710
1711
1712         /*
1713         ** With advanced descriptors the writeback
1714         ** clobbers the buffer addrs, so its easier
1715         ** to just free the existing mbufs and take
1716         ** the normal refresh path to get new buffers
1717         ** and mapping.
1718         */
1719
1720         if (rbuf->fmp != NULL) {/* Partial chain ? */
1721                 rbuf->fmp->m_flags |= M_PKTHDR;
1722                 m_freem(rbuf->fmp);
1723                 rbuf->fmp = NULL;
1724                 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1725         } else if (rbuf->buf) {
1726                 m_free(rbuf->buf);
1727                 rbuf->buf = NULL;
1728         }
1729         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1730
1731         rbuf->flags = 0;
1732  
1733         return;
1734 }
1735
1736
1737 /*********************************************************************
1738  *
1739  *  This routine executes in interrupt context. It replenishes
1740  *  the mbufs in the descriptor and sends data which has been
1741  *  dma'ed into host memory to upper layer.
1742  *
1743  *  Return TRUE for more work, FALSE for all clean.
1744  *********************************************************************/
1745 bool
1746 ixgbe_rxeof(struct ix_queue *que)
1747 {
1748         struct adapter          *adapter = que->adapter;
1749         struct rx_ring          *rxr = que->rxr;
1750         struct ifnet            *ifp = adapter->ifp;
1751         struct lro_ctrl         *lro = &rxr->lro;
1752         struct lro_entry        *queued;
1753         int                     i, nextp, processed = 0;
1754         u32                     staterr = 0;
1755         u32                     count = adapter->rx_process_limit;
1756         union ixgbe_adv_rx_desc *cur;
1757         struct ixgbe_rx_buf     *rbuf, *nbuf;
1758         u16                     pkt_info;
1759
1760         IXGBE_RX_LOCK(rxr);
1761
1762 #ifdef DEV_NETMAP
1763         /* Same as the txeof routine: wakeup clients on intr. */
1764         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1765                 IXGBE_RX_UNLOCK(rxr);
1766                 return (FALSE);
1767         }
1768 #endif /* DEV_NETMAP */
1769
1770         for (i = rxr->next_to_check; count != 0;) {
1771                 struct mbuf     *sendmp, *mp;
1772                 u32             rsc, ptype;
1773                 u16             len;
1774                 u16             vtag = 0;
1775                 bool            eop;
1776  
1777                 /* Sync the ring. */
1778                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1779                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1780
1781                 cur = &rxr->rx_base[i];
1782                 staterr = le32toh(cur->wb.upper.status_error);
1783                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1784
1785                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1786                         break;
1787                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1788                         break;
1789
1790                 count--;
1791                 sendmp = NULL;
1792                 nbuf = NULL;
1793                 rsc = 0;
1794                 cur->wb.upper.status_error = 0;
1795                 rbuf = &rxr->rx_buffers[i];
1796                 mp = rbuf->buf;
1797
1798                 len = le16toh(cur->wb.upper.length);
1799                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1800                     IXGBE_RXDADV_PKTTYPE_MASK;
1801                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1802
1803                 /* Make sure bad packets are discarded */
1804                 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1805 #if __FreeBSD_version >= 1100036
1806                         if (IXGBE_IS_VF(adapter))
1807                                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1808 #endif
1809                         rxr->rx_discarded++;
1810                         ixgbe_rx_discard(rxr, i);
1811                         goto next_desc;
1812                 }
1813
1814                 /*
1815                 ** On 82599 which supports a hardware
1816                 ** LRO (called HW RSC), packets need
1817                 ** not be fragmented across sequential
1818                 ** descriptors, rather the next descriptor
1819                 ** is indicated in bits of the descriptor.
1820                 ** This also means that we might proceses
1821                 ** more than one packet at a time, something
1822                 ** that has never been true before, it
1823                 ** required eliminating global chain pointers
1824                 ** in favor of what we are doing here.  -jfv
1825                 */
1826                 if (!eop) {
1827                         /*
1828                         ** Figure out the next descriptor
1829                         ** of this frame.
1830                         */
1831                         if (rxr->hw_rsc == TRUE) {
1832                                 rsc = ixgbe_rsc_count(cur);
1833                                 rxr->rsc_num += (rsc - 1);
1834                         }
1835                         if (rsc) { /* Get hardware index */
1836                                 nextp = ((staterr &
1837                                     IXGBE_RXDADV_NEXTP_MASK) >>
1838                                     IXGBE_RXDADV_NEXTP_SHIFT);
1839                         } else { /* Just sequential */
1840                                 nextp = i + 1;
1841                                 if (nextp == adapter->num_rx_desc)
1842                                         nextp = 0;
1843                         }
1844                         nbuf = &rxr->rx_buffers[nextp];
1845                         prefetch(nbuf);
1846                 }
1847                 /*
1848                 ** Rather than using the fmp/lmp global pointers
1849                 ** we now keep the head of a packet chain in the
1850                 ** buffer struct and pass this along from one
1851                 ** descriptor to the next, until we get EOP.
1852                 */
1853                 mp->m_len = len;
1854                 /*
1855                 ** See if there is a stored head
1856                 ** that determines what we are
1857                 */
1858                 sendmp = rbuf->fmp;
1859                 if (sendmp != NULL) {  /* secondary frag */
1860                         rbuf->buf = rbuf->fmp = NULL;
1861                         mp->m_flags &= ~M_PKTHDR;
1862                         sendmp->m_pkthdr.len += mp->m_len;
1863                 } else {
1864                         /*
1865                          * Optimize.  This might be a small packet,
1866                          * maybe just a TCP ACK.  Do a fast copy that
1867                          * is cache aligned into a new mbuf, and
1868                          * leave the old mbuf+cluster for re-use.
1869                          */
1870                         if (eop && len <= IXGBE_RX_COPY_LEN) {
1871                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1872                                 if (sendmp != NULL) {
1873                                         sendmp->m_data +=
1874                                             IXGBE_RX_COPY_ALIGN;
1875                                         ixgbe_bcopy(mp->m_data,
1876                                             sendmp->m_data, len);
1877                                         sendmp->m_len = len;
1878                                         rxr->rx_copies++;
1879                                         rbuf->flags |= IXGBE_RX_COPY;
1880                                 }
1881                         }
1882                         if (sendmp == NULL) {
1883                                 rbuf->buf = rbuf->fmp = NULL;
1884                                 sendmp = mp;
1885                         }
1886
1887                         /* first desc of a non-ps chain */
1888                         sendmp->m_flags |= M_PKTHDR;
1889                         sendmp->m_pkthdr.len = mp->m_len;
1890                 }
1891                 ++processed;
1892
1893                 /* Pass the head pointer on */
1894                 if (eop == 0) {
1895                         nbuf->fmp = sendmp;
1896                         sendmp = NULL;
1897                         mp->m_next = nbuf->buf;
1898                 } else { /* Sending this frame */
1899                         sendmp->m_pkthdr.rcvif = ifp;
1900                         rxr->rx_packets++;
1901                         /* capture data for AIM */
1902                         rxr->bytes += sendmp->m_pkthdr.len;
1903                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1904                         /* Process vlan info */
1905                         if ((rxr->vtag_strip) &&
1906                             (staterr & IXGBE_RXD_STAT_VP))
1907                                 vtag = le16toh(cur->wb.upper.vlan);
1908                         if (vtag) {
1909                                 sendmp->m_pkthdr.ether_vtag = vtag;
1910                                 sendmp->m_flags |= M_VLANTAG;
1911                         }
1912                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1913                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
1914
1915                         /*
1916                          * In case of multiqueue, we have RXCSUM.PCSD bit set
1917                          * and never cleared. This means we have RSS hash
1918                          * available to be used.   
1919                          */
1920                         if (adapter->num_queues > 1) {
1921                                 sendmp->m_pkthdr.flowid =
1922                                     le32toh(cur->wb.lower.hi_dword.rss);
1923                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {  
1924                                     case IXGBE_RXDADV_RSSTYPE_IPV4:
1925                                         M_HASHTYPE_SET(sendmp,
1926                                             M_HASHTYPE_RSS_IPV4);
1927                                         break;
1928                                     case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1929                                         M_HASHTYPE_SET(sendmp,
1930                                             M_HASHTYPE_RSS_TCP_IPV4);
1931                                         break;
1932                                     case IXGBE_RXDADV_RSSTYPE_IPV6:
1933                                         M_HASHTYPE_SET(sendmp,
1934                                             M_HASHTYPE_RSS_IPV6);
1935                                         break;
1936                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1937                                         M_HASHTYPE_SET(sendmp,
1938                                             M_HASHTYPE_RSS_TCP_IPV6);
1939                                         break;
1940                                     case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1941                                         M_HASHTYPE_SET(sendmp,
1942                                             M_HASHTYPE_RSS_IPV6_EX);
1943                                         break;
1944                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1945                                         M_HASHTYPE_SET(sendmp,
1946                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
1947                                         break;
1948 #if __FreeBSD_version > 1100000
1949                                     case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1950                                         M_HASHTYPE_SET(sendmp,
1951                                             M_HASHTYPE_RSS_UDP_IPV4);
1952                                         break;
1953                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1954                                         M_HASHTYPE_SET(sendmp,
1955                                             M_HASHTYPE_RSS_UDP_IPV6);
1956                                         break;
1957                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1958                                         M_HASHTYPE_SET(sendmp,
1959                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
1960                                         break;
1961 #endif
1962                                     default:
1963                                         M_HASHTYPE_SET(sendmp,
1964                                             M_HASHTYPE_OPAQUE);
1965                                 }
1966                         } else {
1967                                 sendmp->m_pkthdr.flowid = que->msix;
1968                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1969                         }
1970                 }
1971 next_desc:
1972                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1973                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1974
1975                 /* Advance our pointers to the next descriptor. */
1976                 if (++i == rxr->num_desc)
1977                         i = 0;
1978
1979                 /* Now send to the stack or do LRO */
1980                 if (sendmp != NULL) {
1981                         rxr->next_to_check = i;
1982                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1983                         i = rxr->next_to_check;
1984                 }
1985
1986                /* Every 8 descriptors we go to refresh mbufs */
1987                 if (processed == 8) {
1988                         ixgbe_refresh_mbufs(rxr, i);
1989                         processed = 0;
1990                 }
1991         }
1992
1993         /* Refresh any remaining buf structs */
1994         if (ixgbe_rx_unrefreshed(rxr))
1995                 ixgbe_refresh_mbufs(rxr, i);
1996
1997         rxr->next_to_check = i;
1998
1999         /*
2000          * Flush any outstanding LRO work
2001          */
2002         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
2003                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
2004                 tcp_lro_flush(lro, queued);
2005         }
2006
2007         IXGBE_RX_UNLOCK(rxr);
2008
2009         /*
2010         ** Still have cleaning to do?
2011         */
2012         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2013                 return (TRUE);
2014         else
2015                 return (FALSE);
2016 }
2017
2018
2019 /*********************************************************************
2020  *
2021  *  Verify that the hardware indicated that the checksum is valid.
2022  *  Inform the stack about the status of checksum so that stack
2023  *  doesn't spend time verifying the checksum.
2024  *
2025  *********************************************************************/
2026 static void
2027 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2028 {
2029         u16     status = (u16) staterr;
2030         u8      errors = (u8) (staterr >> 24);
2031         bool    sctp = false;
2032
2033         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2034             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2035                 sctp = true;
2036
2037         /* IPv4 checksum */
2038         if (status & IXGBE_RXD_STAT_IPCS) {
2039                 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
2040                 /* IP Checksum Good */
2041                 if (!(errors & IXGBE_RXD_ERR_IPE))
2042                         mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
2043         }
2044         /* TCP/UDP/SCTP checksum */
2045         if (status & IXGBE_RXD_STAT_L4CS) {
2046                 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
2047                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2048                         mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
2049                         if (!sctp)
2050                                 mp->m_pkthdr.csum_data = htons(0xffff);
2051                 }
2052         }
2053 }
2054
2055 /********************************************************************
2056  * Manage DMA'able memory.
2057  *******************************************************************/
2058 static void
2059 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2060 {
2061         if (error)
2062                 return;
2063         *(bus_addr_t *) arg = segs->ds_addr;
2064         return;
2065 }
2066
2067 int
2068 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2069                 struct ixgbe_dma_alloc *dma, int mapflags)
2070 {
2071         device_t dev = adapter->dev;
2072         int             r;
2073
2074         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2075                                DBA_ALIGN, 0,    /* alignment, bounds */
2076                                BUS_SPACE_MAXADDR,       /* lowaddr */
2077                                BUS_SPACE_MAXADDR,       /* highaddr */
2078                                NULL, NULL,      /* filter, filterarg */
2079                                size,    /* maxsize */
2080                                1,       /* nsegments */
2081                                size,    /* maxsegsize */
2082                                BUS_DMA_ALLOCNOW,        /* flags */
2083                                NULL,    /* lockfunc */
2084                                NULL,    /* lockfuncarg */
2085                                &dma->dma_tag);
2086         if (r != 0) {
2087                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2088                        "error %u\n", r);
2089                 goto fail_0;
2090         }
2091         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2092                              BUS_DMA_NOWAIT, &dma->dma_map);
2093         if (r != 0) {
2094                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2095                        "error %u\n", r);
2096                 goto fail_1;
2097         }
2098         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2099                             size,
2100                             ixgbe_dmamap_cb,
2101                             &dma->dma_paddr,
2102                             mapflags | BUS_DMA_NOWAIT);
2103         if (r != 0) {
2104                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2105                        "error %u\n", r);
2106                 goto fail_2;
2107         }
2108         dma->dma_size = size;
2109         return (0);
2110 fail_2:
2111         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2112 fail_1:
2113         bus_dma_tag_destroy(dma->dma_tag);
2114 fail_0:
2115         dma->dma_tag = NULL;
2116         return (r);
2117 }
2118
2119 void
2120 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2121 {
2122         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2123             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2124         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2125         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2126         bus_dma_tag_destroy(dma->dma_tag);
2127 }
2128
2129
2130 /*********************************************************************
2131  *
2132  *  Allocate memory for the transmit and receive rings, and then
2133  *  the descriptors associated with each, called only once at attach.
2134  *
2135  **********************************************************************/
2136 int
2137 ixgbe_allocate_queues(struct adapter *adapter)
2138 {
2139         device_t        dev = adapter->dev;
2140         struct ix_queue *que;
2141         struct tx_ring  *txr;
2142         struct rx_ring  *rxr;
2143         int rsize, tsize, error = IXGBE_SUCCESS;
2144         int txconf = 0, rxconf = 0;
2145 #ifdef PCI_IOV
2146         enum ixgbe_iov_mode iov_mode;
2147 #endif
2148
2149         /* First allocate the top level queue structs */
2150         if (!(adapter->queues =
2151             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2152             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2153                 device_printf(dev, "Unable to allocate queue memory\n");
2154                 error = ENOMEM;
2155                 goto fail;
2156         }
2157
2158         /* First allocate the TX ring struct memory */
2159         if (!(adapter->tx_rings =
2160             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2161             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2162                 device_printf(dev, "Unable to allocate TX ring memory\n");
2163                 error = ENOMEM;
2164                 goto tx_fail;
2165         }
2166
2167         /* Next allocate the RX */
2168         if (!(adapter->rx_rings =
2169             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2170             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2171                 device_printf(dev, "Unable to allocate RX ring memory\n");
2172                 error = ENOMEM;
2173                 goto rx_fail;
2174         }
2175
2176         /* For the ring itself */
2177         tsize = roundup2(adapter->num_tx_desc *
2178             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2179
2180 #ifdef PCI_IOV
2181         iov_mode = ixgbe_get_iov_mode(adapter);
2182         adapter->pool = ixgbe_max_vfs(iov_mode);
2183 #else
2184         adapter->pool = 0;
2185 #endif
2186         /*
2187          * Now set up the TX queues, txconf is needed to handle the
2188          * possibility that things fail midcourse and we need to
2189          * undo memory gracefully
2190          */ 
2191         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2192                 /* Set up some basics */
2193                 txr = &adapter->tx_rings[i];
2194                 txr->adapter = adapter;
2195 #ifdef PCI_IOV
2196                 txr->me = ixgbe_pf_que_index(iov_mode, i);
2197 #else
2198                 txr->me = i;
2199 #endif
2200                 txr->num_desc = adapter->num_tx_desc;
2201
2202                 /* Initialize the TX side lock */
2203                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2204                     device_get_nameunit(dev), txr->me);
2205                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2206
2207                 if (ixgbe_dma_malloc(adapter, tsize,
2208                         &txr->txdma, BUS_DMA_NOWAIT)) {
2209                         device_printf(dev,
2210                             "Unable to allocate TX Descriptor memory\n");
2211                         error = ENOMEM;
2212                         goto err_tx_desc;
2213                 }
2214                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2215                 bzero((void *)txr->tx_base, tsize);
2216
2217                 /* Now allocate transmit buffers for the ring */
2218                 if (ixgbe_allocate_transmit_buffers(txr)) {
2219                         device_printf(dev,
2220                             "Critical Failure setting up transmit buffers\n");
2221                         error = ENOMEM;
2222                         goto err_tx_desc;
2223                 }
2224 #ifndef IXGBE_LEGACY_TX
2225                 /* Allocate a buf ring */
2226                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2227                     M_WAITOK, &txr->tx_mtx);
2228                 if (txr->br == NULL) {
2229                         device_printf(dev,
2230                             "Critical Failure setting up buf ring\n");
2231                         error = ENOMEM;
2232                         goto err_tx_desc;
2233                 }
2234 #endif
2235         }
2236
2237         /*
2238          * Next the RX queues...
2239          */ 
2240         rsize = roundup2(adapter->num_rx_desc *
2241             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2242         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2243                 rxr = &adapter->rx_rings[i];
2244                 /* Set up some basics */
2245                 rxr->adapter = adapter;
2246 #ifdef PCI_IOV
2247                 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2248 #else
2249                 rxr->me = i;
2250 #endif
2251                 rxr->num_desc = adapter->num_rx_desc;
2252
2253                 /* Initialize the RX side lock */
2254                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2255                     device_get_nameunit(dev), rxr->me);
2256                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2257
2258                 if (ixgbe_dma_malloc(adapter, rsize,
2259                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2260                         device_printf(dev,
2261                             "Unable to allocate RxDescriptor memory\n");
2262                         error = ENOMEM;
2263                         goto err_rx_desc;
2264                 }
2265                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2266                 bzero((void *)rxr->rx_base, rsize);
2267
2268                 /* Allocate receive buffers for the ring*/
2269                 if (ixgbe_allocate_receive_buffers(rxr)) {
2270                         device_printf(dev,
2271                             "Critical Failure setting up receive buffers\n");
2272                         error = ENOMEM;
2273                         goto err_rx_desc;
2274                 }
2275         }
2276
2277         /*
2278         ** Finally set up the queue holding structs
2279         */
2280         for (int i = 0; i < adapter->num_queues; i++) {
2281                 que = &adapter->queues[i];
2282                 que->adapter = adapter;
2283                 que->me = i;
2284                 que->txr = &adapter->tx_rings[i];
2285                 que->rxr = &adapter->rx_rings[i];
2286         }
2287
2288         return (0);
2289
2290 err_rx_desc:
2291         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2292                 ixgbe_dma_free(adapter, &rxr->rxdma);
2293 err_tx_desc:
2294         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2295                 ixgbe_dma_free(adapter, &txr->txdma);
2296         free(adapter->rx_rings, M_DEVBUF);
2297 rx_fail:
2298         free(adapter->tx_rings, M_DEVBUF);
2299 tx_fail:
2300         free(adapter->queues, M_DEVBUF);
2301 fail:
2302         return (error);
2303 }