]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixgbe/ix_txrx.c
Import to 0.6.1
[FreeBSD/FreeBSD.git] / sys / dev / ixgbe / ix_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41
42 #include "ixgbe.h"
43
44 #ifdef  RSS
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
47 #endif
48
49 #ifdef DEV_NETMAP
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
53
54 extern int ix_crcstrip;
55 #endif
56
57 /*
58 ** HW RSC control:
59 **  this feature only works with
60 **  IPv4, and only on 82599 and later.
61 **  Also this will cause IP forwarding to
62 **  fail and that can't be controlled by
63 **  the stack as LRO can. For all these
64 **  reasons I've deemed it best to leave
65 **  this off and not bother with a tuneable
66 **  interface, this would need to be compiled
67 **  to enable.
68 */
69 static bool ixgbe_rsc_enable = FALSE;
70
71 #ifdef IXGBE_FDIR
72 /*
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
77 **
78 ** This feature can be disabled by
79 ** setting this to 0.
80 */
81 static int atr_sample_rate = 20;
82 #endif
83
84 /*********************************************************************
85  *  Local Function prototypes
86  *********************************************************************/
87 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
88 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
89 static int      ixgbe_setup_receive_ring(struct rx_ring *);
90 static void     ixgbe_free_receive_buffers(struct rx_ring *);
91
92 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
93 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
94 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
95 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
96                     struct mbuf *, u32 *, u32 *);
97 static int      ixgbe_tso_setup(struct tx_ring *,
98                     struct mbuf *, u32 *, u32 *);
99 #ifdef IXGBE_FDIR
100 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
101 #endif
102 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
103 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
104                     struct mbuf *, u32);
105
106 #ifdef IXGBE_LEGACY_TX
107 /*********************************************************************
108  *  Transmit entry point
109  *
110  *  ixgbe_start is called by the stack to initiate a transmit.
111  *  The driver will remain in this routine as long as there are
112  *  packets to transmit and transmit resources are available.
113  *  In case resources are not available stack is notified and
114  *  the packet is requeued.
115  **********************************************************************/
116
117 void
118 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
119 {
120         struct mbuf    *m_head;
121         struct adapter *adapter = txr->adapter;
122
123         IXGBE_TX_LOCK_ASSERT(txr);
124
125         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
126                 return;
127         if (!adapter->link_active)
128                 return;
129
130         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
131                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
132                         break;
133
134                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
135                 if (m_head == NULL)
136                         break;
137
138                 if (ixgbe_xmit(txr, &m_head)) {
139                         if (m_head != NULL)
140                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
141                         break;
142                 }
143                 /* Send a copy of the frame to the BPF listener */
144                 ETHER_BPF_MTAP(ifp, m_head);
145         }
146         return;
147 }
148
149 /*
150  * Legacy TX start - called by the stack, this
151  * always uses the first tx ring, and should
152  * not be used with multiqueue tx enabled.
153  */
154 void
155 ixgbe_start(struct ifnet *ifp)
156 {
157         struct adapter *adapter = ifp->if_softc;
158         struct tx_ring  *txr = adapter->tx_rings;
159
160         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
161                 IXGBE_TX_LOCK(txr);
162                 ixgbe_start_locked(txr, ifp);
163                 IXGBE_TX_UNLOCK(txr);
164         }
165         return;
166 }
167
168 #else /* ! IXGBE_LEGACY_TX */
169
170 /*
171 ** Multiqueue Transmit Entry Point
172 ** (if_transmit function)
173 */
174 int
175 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
176 {
177         struct adapter  *adapter = ifp->if_softc;
178         struct ix_queue *que;
179         struct tx_ring  *txr;
180         int             i, err = 0;
181 #ifdef  RSS
182         uint32_t bucket_id;
183 #endif
184
185         /*
186          * When doing RSS, map it to the same outbound queue
187          * as the incoming flow would be mapped to.
188          *
189          * If everything is setup correctly, it should be the
190          * same bucket that the current CPU we're on is.
191          */
192         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
193 #ifdef  RSS
194                 if (rss_hash2bucket(m->m_pkthdr.flowid,
195                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
196                         i = bucket_id % adapter->num_queues;
197 #ifdef IXGBE_DEBUG
198                         if (bucket_id > adapter->num_queues)
199                                 if_printf(ifp, "bucket_id (%d) > num_queues "
200                                     "(%d)\n", bucket_id, adapter->num_queues);
201 #endif
202                 } else 
203 #endif
204                         i = m->m_pkthdr.flowid % adapter->num_queues;
205         } else
206                 i = curcpu % adapter->num_queues;
207
208         /* Check for a hung queue and pick alternative */
209         if (((1 << i) & adapter->active_queues) == 0)
210                 i = ffsl(adapter->active_queues);
211
212         txr = &adapter->tx_rings[i];
213         que = &adapter->queues[i];
214
215         err = drbr_enqueue(ifp, txr->br, m);
216         if (err)
217                 return (err);
218         if (IXGBE_TX_TRYLOCK(txr)) {
219                 ixgbe_mq_start_locked(ifp, txr);
220                 IXGBE_TX_UNLOCK(txr);
221         } else
222                 taskqueue_enqueue(que->tq, &txr->txq_task);
223
224         return (0);
225 }
226
227 int
228 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
229 {
230         struct adapter  *adapter = txr->adapter;
231         struct mbuf     *next;
232         int             enqueued = 0, err = 0;
233
234         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
235             adapter->link_active == 0)
236                 return (ENETDOWN);
237
238         /* Process the queue */
239 #if __FreeBSD_version < 901504
240         next = drbr_dequeue(ifp, txr->br);
241         while (next != NULL) {
242                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
243                         if (next != NULL)
244                                 err = drbr_enqueue(ifp, txr->br, next);
245 #else
246         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
247                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
248                         if (next == NULL) {
249                                 drbr_advance(ifp, txr->br);
250                         } else {
251                                 drbr_putback(ifp, txr->br, next);
252                         }
253 #endif
254                         break;
255                 }
256 #if __FreeBSD_version >= 901504
257                 drbr_advance(ifp, txr->br);
258 #endif
259                 enqueued++;
260 #if 0 // this is VF-only
261 #if __FreeBSD_version >= 1100036
262                 /*
263                  * Since we're looking at the tx ring, we can check
264                  * to see if we're a VF by examing our tail register
265                  * address.
266                  */
267                 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
268                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
269 #endif
270 #endif
271                 /* Send a copy of the frame to the BPF listener */
272                 ETHER_BPF_MTAP(ifp, next);
273                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
274                         break;
275 #if __FreeBSD_version < 901504
276                 next = drbr_dequeue(ifp, txr->br);
277 #endif
278         }
279
280         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
281                 ixgbe_txeof(txr);
282
283         return (err);
284 }
285
286 /*
287  * Called from a taskqueue to drain queued transmit packets.
288  */
289 void
290 ixgbe_deferred_mq_start(void *arg, int pending)
291 {
292         struct tx_ring *txr = arg;
293         struct adapter *adapter = txr->adapter;
294         struct ifnet *ifp = adapter->ifp;
295
296         IXGBE_TX_LOCK(txr);
297         if (!drbr_empty(ifp, txr->br))
298                 ixgbe_mq_start_locked(ifp, txr);
299         IXGBE_TX_UNLOCK(txr);
300 }
301
302 /*
303  * Flush all ring buffers
304  */
305 void
306 ixgbe_qflush(struct ifnet *ifp)
307 {
308         struct adapter  *adapter = ifp->if_softc;
309         struct tx_ring  *txr = adapter->tx_rings;
310         struct mbuf     *m;
311
312         for (int i = 0; i < adapter->num_queues; i++, txr++) {
313                 IXGBE_TX_LOCK(txr);
314                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
315                         m_freem(m);
316                 IXGBE_TX_UNLOCK(txr);
317         }
318         if_qflush(ifp);
319 }
320 #endif /* IXGBE_LEGACY_TX */
321
322
323 /*********************************************************************
324  *
325  *  This routine maps the mbufs to tx descriptors, allowing the
326  *  TX engine to transmit the packets. 
327  *      - return 0 on success, positive on failure
328  *
329  **********************************************************************/
330
331 static int
332 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
333 {
334         struct adapter  *adapter = txr->adapter;
335         u32             olinfo_status = 0, cmd_type_len;
336         int             i, j, error, nsegs;
337         int             first;
338         bool            remap = TRUE;
339         struct mbuf     *m_head;
340         bus_dma_segment_t segs[adapter->num_segs];
341         bus_dmamap_t    map;
342         struct ixgbe_tx_buf *txbuf;
343         union ixgbe_adv_tx_desc *txd = NULL;
344
345         m_head = *m_headp;
346
347         /* Basic descriptor defines */
348         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
349             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
350
351         if (m_head->m_flags & M_VLANTAG)
352                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
353
354         /*
355          * Important to capture the first descriptor
356          * used because it will contain the index of
357          * the one we tell the hardware to report back
358          */
359         first = txr->next_avail_desc;
360         txbuf = &txr->tx_buffers[first];
361         map = txbuf->map;
362
363         /*
364          * Map the packet for DMA.
365          */
366 retry:
367         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
368             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
369
370         if (__predict_false(error)) {
371                 struct mbuf *m;
372
373                 switch (error) {
374                 case EFBIG:
375                         /* Try it again? - one try */
376                         if (remap == TRUE) {
377                                 remap = FALSE;
378                                 /*
379                                  * XXX: m_defrag will choke on
380                                  * non-MCLBYTES-sized clusters
381                                  */
382                                 m = m_defrag(*m_headp, M_NOWAIT);
383                                 if (m == NULL) {
384                                         adapter->mbuf_defrag_failed++;
385                                         m_freem(*m_headp);
386                                         *m_headp = NULL;
387                                         return (ENOBUFS);
388                                 }
389                                 *m_headp = m;
390                                 goto retry;
391                         } else
392                                 return (error);
393                 case ENOMEM:
394                         txr->no_tx_dma_setup++;
395                         return (error);
396                 default:
397                         txr->no_tx_dma_setup++;
398                         m_freem(*m_headp);
399                         *m_headp = NULL;
400                         return (error);
401                 }
402         }
403
404         /* Make certain there are enough descriptors */
405         if (nsegs > txr->tx_avail - 2) {
406                 txr->no_desc_avail++;
407                 bus_dmamap_unload(txr->txtag, map);
408                 return (ENOBUFS);
409         }
410         m_head = *m_headp;
411
412         /*
413          * Set up the appropriate offload context
414          * this will consume the first descriptor
415          */
416         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
417         if (__predict_false(error)) {
418                 if (error == ENOBUFS)
419                         *m_headp = NULL;
420                 return (error);
421         }
422
423 #ifdef IXGBE_FDIR
424         /* Do the flow director magic */
425         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
426                 ++txr->atr_count;
427                 if (txr->atr_count >= atr_sample_rate) {
428                         ixgbe_atr(txr, m_head);
429                         txr->atr_count = 0;
430                 }
431         }
432 #endif
433
434         olinfo_status |= IXGBE_ADVTXD_CC;
435         i = txr->next_avail_desc;
436         for (j = 0; j < nsegs; j++) {
437                 bus_size_t seglen;
438                 bus_addr_t segaddr;
439
440                 txbuf = &txr->tx_buffers[i];
441                 txd = &txr->tx_base[i];
442                 seglen = segs[j].ds_len;
443                 segaddr = htole64(segs[j].ds_addr);
444
445                 txd->read.buffer_addr = segaddr;
446                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
447                     cmd_type_len |seglen);
448                 txd->read.olinfo_status = htole32(olinfo_status);
449
450                 if (++i == txr->num_desc)
451                         i = 0;
452         }
453
454         txd->read.cmd_type_len |=
455             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456         txr->tx_avail -= nsegs;
457         txr->next_avail_desc = i;
458
459         txbuf->m_head = m_head;
460         /*
461          * Here we swap the map so the last descriptor,
462          * which gets the completion interrupt has the
463          * real map, and the first descriptor gets the
464          * unused map from this descriptor.
465          */
466         txr->tx_buffers[first].map = txbuf->map;
467         txbuf->map = map;
468         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
469
470         /* Set the EOP descriptor that will be marked done */
471         txbuf = &txr->tx_buffers[first];
472         txbuf->eop = txd;
473
474         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
475             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
476         /*
477          * Advance the Transmit Descriptor Tail (Tdt), this tells the
478          * hardware that this frame is available to transmit.
479          */
480         ++txr->total_packets;
481         IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
482
483         /* Mark queue as having work */
484         if (txr->busy == 0)
485                 txr->busy = 1;
486
487         return (0);
488 }
489
490
491 /*********************************************************************
492  *
493  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
494  *  the information needed to transmit a packet on the wire. This is
495  *  called only once at attach, setup is done every reset.
496  *
497  **********************************************************************/
498 int
499 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
500 {
501         struct adapter *adapter = txr->adapter;
502         device_t dev = adapter->dev;
503         struct ixgbe_tx_buf *txbuf;
504         int error, i;
505
506         /*
507          * Setup DMA descriptor areas.
508          */
509         if ((error = bus_dma_tag_create(
510                                bus_get_dma_tag(adapter->dev),   /* parent */
511                                1, 0,            /* alignment, bounds */
512                                BUS_SPACE_MAXADDR,       /* lowaddr */
513                                BUS_SPACE_MAXADDR,       /* highaddr */
514                                NULL, NULL,              /* filter, filterarg */
515                                IXGBE_TSO_SIZE,          /* maxsize */
516                                adapter->num_segs,       /* nsegments */
517                                PAGE_SIZE,               /* maxsegsize */
518                                0,                       /* flags */
519                                NULL,                    /* lockfunc */
520                                NULL,                    /* lockfuncarg */
521                                &txr->txtag))) {
522                 device_printf(dev,"Unable to allocate TX DMA tag\n");
523                 goto fail;
524         }
525
526         if (!(txr->tx_buffers =
527             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
530                 error = ENOMEM;
531                 goto fail;
532         }
533
534         /* Create the descriptor buffer dma maps */
535         txbuf = txr->tx_buffers;
536         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
538                 if (error != 0) {
539                         device_printf(dev, "Unable to create TX DMA map\n");
540                         goto fail;
541                 }
542         }
543
544         return 0;
545 fail:
546         /* We free all, it handles case where we are in the middle */
547         ixgbe_free_transmit_structures(adapter);
548         return (error);
549 }
550
551 /*********************************************************************
552  *
553  *  Initialize a transmit ring.
554  *
555  **********************************************************************/
556 static void
557 ixgbe_setup_transmit_ring(struct tx_ring *txr)
558 {
559         struct adapter *adapter = txr->adapter;
560         struct ixgbe_tx_buf *txbuf;
561 #ifdef DEV_NETMAP
562         struct netmap_adapter *na = NA(adapter->ifp);
563         struct netmap_slot *slot;
564 #endif /* DEV_NETMAP */
565
566         /* Clear the old ring contents */
567         IXGBE_TX_LOCK(txr);
568 #ifdef DEV_NETMAP
569         /*
570          * (under lock): if in netmap mode, do some consistency
571          * checks and set slot to entry 0 of the netmap ring.
572          */
573         slot = netmap_reset(na, NR_TX, txr->me, 0);
574 #endif /* DEV_NETMAP */
575         bzero((void *)txr->tx_base,
576               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
577         /* Reset indices */
578         txr->next_avail_desc = 0;
579         txr->next_to_clean = 0;
580
581         /* Free any existing tx buffers. */
582         txbuf = txr->tx_buffers;
583         for (int i = 0; i < txr->num_desc; i++, txbuf++) {
584                 if (txbuf->m_head != NULL) {
585                         bus_dmamap_sync(txr->txtag, txbuf->map,
586                             BUS_DMASYNC_POSTWRITE);
587                         bus_dmamap_unload(txr->txtag, txbuf->map);
588                         m_freem(txbuf->m_head);
589                         txbuf->m_head = NULL;
590                 }
591 #ifdef DEV_NETMAP
592                 /*
593                  * In netmap mode, set the map for the packet buffer.
594                  * NOTE: Some drivers (not this one) also need to set
595                  * the physical buffer address in the NIC ring.
596                  * Slots in the netmap ring (indexed by "si") are
597                  * kring->nkr_hwofs positions "ahead" wrt the
598                  * corresponding slot in the NIC ring. In some drivers
599                  * (not here) nkr_hwofs can be negative. Function
600                  * netmap_idx_n2k() handles wraparounds properly.
601                  */
602                 if (slot) {
603                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
604                         netmap_load_map(na, txr->txtag,
605                             txbuf->map, NMB(na, slot + si));
606                 }
607 #endif /* DEV_NETMAP */
608                 /* Clear the EOP descriptor pointer */
609                 txbuf->eop = NULL;
610         }
611
612 #ifdef IXGBE_FDIR
613         /* Set the rate at which we sample packets */
614         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615                 txr->atr_sample = atr_sample_rate;
616 #endif
617
618         /* Set number of descriptors available */
619         txr->tx_avail = adapter->num_tx_desc;
620
621         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623         IXGBE_TX_UNLOCK(txr);
624 }
625
626 /*********************************************************************
627  *
628  *  Initialize all transmit rings.
629  *
630  **********************************************************************/
631 int
632 ixgbe_setup_transmit_structures(struct adapter *adapter)
633 {
634         struct tx_ring *txr = adapter->tx_rings;
635
636         for (int i = 0; i < adapter->num_queues; i++, txr++)
637                 ixgbe_setup_transmit_ring(txr);
638
639         return (0);
640 }
641
642 /*********************************************************************
643  *
644  *  Free all transmit rings.
645  *
646  **********************************************************************/
647 void
648 ixgbe_free_transmit_structures(struct adapter *adapter)
649 {
650         struct tx_ring *txr = adapter->tx_rings;
651
652         for (int i = 0; i < adapter->num_queues; i++, txr++) {
653                 IXGBE_TX_LOCK(txr);
654                 ixgbe_free_transmit_buffers(txr);
655                 ixgbe_dma_free(adapter, &txr->txdma);
656                 IXGBE_TX_UNLOCK(txr);
657                 IXGBE_TX_LOCK_DESTROY(txr);
658         }
659         free(adapter->tx_rings, M_DEVBUF);
660 }
661
662 /*********************************************************************
663  *
664  *  Free transmit ring related data structures.
665  *
666  **********************************************************************/
667 static void
668 ixgbe_free_transmit_buffers(struct tx_ring *txr)
669 {
670         struct adapter *adapter = txr->adapter;
671         struct ixgbe_tx_buf *tx_buffer;
672         int             i;
673
674         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
675
676         if (txr->tx_buffers == NULL)
677                 return;
678
679         tx_buffer = txr->tx_buffers;
680         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681                 if (tx_buffer->m_head != NULL) {
682                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
683                             BUS_DMASYNC_POSTWRITE);
684                         bus_dmamap_unload(txr->txtag,
685                             tx_buffer->map);
686                         m_freem(tx_buffer->m_head);
687                         tx_buffer->m_head = NULL;
688                         if (tx_buffer->map != NULL) {
689                                 bus_dmamap_destroy(txr->txtag,
690                                     tx_buffer->map);
691                                 tx_buffer->map = NULL;
692                         }
693                 } else if (tx_buffer->map != NULL) {
694                         bus_dmamap_unload(txr->txtag,
695                             tx_buffer->map);
696                         bus_dmamap_destroy(txr->txtag,
697                             tx_buffer->map);
698                         tx_buffer->map = NULL;
699                 }
700         }
701 #ifdef IXGBE_LEGACY_TX
702         if (txr->br != NULL)
703                 buf_ring_free(txr->br, M_DEVBUF);
704 #endif
705         if (txr->tx_buffers != NULL) {
706                 free(txr->tx_buffers, M_DEVBUF);
707                 txr->tx_buffers = NULL;
708         }
709         if (txr->txtag != NULL) {
710                 bus_dma_tag_destroy(txr->txtag);
711                 txr->txtag = NULL;
712         }
713         return;
714 }
715
716 /*********************************************************************
717  *
718  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
719  *
720  **********************************************************************/
721
722 static int
723 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724     u32 *cmd_type_len, u32 *olinfo_status)
725 {
726         struct adapter *adapter = txr->adapter;
727         struct ixgbe_adv_tx_context_desc *TXD;
728         struct ether_vlan_header *eh;
729 #ifdef INET
730         struct ip *ip;
731 #endif
732 #ifdef INET6
733         struct ip6_hdr *ip6;
734 #endif
735         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
736         int     ehdrlen, ip_hlen = 0;
737         u16     etype;
738         u8      ipproto = 0;
739         int     offload = TRUE;
740         int     ctxd = txr->next_avail_desc;
741         u16     vtag = 0;
742         caddr_t l3d;
743
744
745         /* First check if TSO is to be used */
746         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO|CSUM_IP6_TSO))
747                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
748
749         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
750                 offload = FALSE;
751
752         /* Indicate the whole packet as payload when not doing TSO */
753         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
754
755         /* Now ready a context descriptor */
756         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
757
758         /*
759         ** In advanced descriptors the vlan tag must 
760         ** be placed into the context descriptor. Hence
761         ** we need to make one even if not doing offloads.
762         */
763         if (mp->m_flags & M_VLANTAG) {
764                 vtag = htole16(mp->m_pkthdr.ether_vtag);
765                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
766         } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
767                 return (0);
768
769         /*
770          * Determine where frame payload starts.
771          * Jump over vlan headers if already present,
772          * helpful for QinQ too.
773          */
774         eh = mtod(mp, struct ether_vlan_header *);
775         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
776                 etype = ntohs(eh->evl_proto);
777                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
778         } else {
779                 etype = ntohs(eh->evl_encap_proto);
780                 ehdrlen = ETHER_HDR_LEN;
781         }
782
783         /* Set the ether header length */
784         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
785
786         if (offload == FALSE)
787                 goto no_offloads;
788
789         /*
790          * If the first mbuf only includes the ethernet header, jump to the next one
791          * XXX: This assumes the stack splits mbufs containing headers on header boundaries
792          * XXX: And assumes the entire IP header is contained in one mbuf
793          */
794         if (mp->m_len == ehdrlen && mp->m_next)
795                 l3d = mtod(mp->m_next, caddr_t);
796         else
797                 l3d = mtod(mp, caddr_t) + ehdrlen;
798
799         switch (etype) {
800 #ifdef INET
801                 case ETHERTYPE_IP:
802                         ip = (struct ip *)(l3d);
803                         ip_hlen = ip->ip_hl << 2;
804                         ipproto = ip->ip_p;
805                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
806                         /* Insert IPv4 checksum into data descriptors */
807                         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
808                                 ip->ip_sum = 0;
809                                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
810                         }
811                         break;
812 #endif
813 #ifdef INET6
814                 case ETHERTYPE_IPV6:
815                         ip6 = (struct ip6_hdr *)(l3d);
816                         ip_hlen = sizeof(struct ip6_hdr);
817                         ipproto = ip6->ip6_nxt;
818                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
819                         break;
820 #endif
821                 default:
822                         offload = FALSE;
823                         break;
824         }
825
826         vlan_macip_lens |= ip_hlen;
827
828         /* No support for offloads for non-L4 next headers */
829         switch (ipproto) {
830                 case IPPROTO_TCP:
831                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
832                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
833                         else
834                                 offload = false;
835                         break;
836                 case IPPROTO_UDP:
837                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
838                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
839                         else
840                                 offload = false;
841                         break;
842                 case IPPROTO_SCTP:
843                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
844                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
845                         else
846                                 offload = false;
847                         break;
848                 default:
849                         offload = false;
850                         break;
851         }
852
853         if (offload) /* Insert L4 checksum into data descriptors */
854                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
855
856 no_offloads:
857         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
858
859         /* Now copy bits into descriptor */
860         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
861         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
862         TXD->seqnum_seed = htole32(0);
863         TXD->mss_l4len_idx = htole32(0);
864
865         /* We've consumed the first desc, adjust counters */
866         if (++ctxd == txr->num_desc)
867                 ctxd = 0;
868         txr->next_avail_desc = ctxd;
869         --txr->tx_avail;
870
871         return (0);
872 }
873
874 /**********************************************************************
875  *
876  *  Setup work for hardware segmentation offload (TSO) on
877  *  adapters using advanced tx descriptors
878  *
879  **********************************************************************/
880 static int
881 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
882     u32 *cmd_type_len, u32 *olinfo_status)
883 {
884         struct ixgbe_adv_tx_context_desc *TXD;
885         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
886         u32 mss_l4len_idx = 0, paylen;
887         u16 vtag = 0, eh_type;
888         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
889         struct ether_vlan_header *eh;
890 #ifdef INET6
891         struct ip6_hdr *ip6;
892 #endif
893 #ifdef INET
894         struct ip *ip;
895 #endif
896         struct tcphdr *th;
897
898         /*
899          * Determine where frame payload starts.
900          * Jump over vlan headers if already present
901          */
902         eh = mtod(mp, struct ether_vlan_header *);
903         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
904                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
905                 eh_type = eh->evl_proto;
906         } else {
907                 ehdrlen = ETHER_HDR_LEN;
908                 eh_type = eh->evl_encap_proto;
909         }
910
911         switch (ntohs(eh_type)) {
912 #ifdef INET6
913         case ETHERTYPE_IPV6:
914                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
915                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
916                 if (ip6->ip6_nxt != IPPROTO_TCP)
917                         return (ENXIO);
918                 ip_hlen = sizeof(struct ip6_hdr);
919                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
920                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
921                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
922                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
923                 break;
924 #endif
925 #ifdef INET
926         case ETHERTYPE_IP:
927                 ip = (struct ip *)(mp->m_data + ehdrlen);
928                 if (ip->ip_p != IPPROTO_TCP)
929                         return (ENXIO);
930                 ip->ip_sum = 0;
931                 ip_hlen = ip->ip_hl << 2;
932                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
933                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
934                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
935                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
936                 /* Tell transmit desc to also do IPv4 checksum. */
937                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
938                 break;
939 #endif
940         default:
941                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
942                     __func__, ntohs(eh_type));
943                 break;
944         }
945
946         ctxd = txr->next_avail_desc;
947         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
948
949         tcp_hlen = th->th_off << 2;
950
951         /* This is used in the transmit desc in encap */
952         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
953
954         /* VLAN MACLEN IPLEN */
955         if (mp->m_flags & M_VLANTAG) {
956                 vtag = htole16(mp->m_pkthdr.ether_vtag);
957                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
958         }
959
960         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
961         vlan_macip_lens |= ip_hlen;
962         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
963
964         /* ADV DTYPE TUCMD */
965         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
966         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
967         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
968
969         /* MSS L4LEN IDX */
970         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
971         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
972         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
973
974         TXD->seqnum_seed = htole32(0);
975
976         if (++ctxd == txr->num_desc)
977                 ctxd = 0;
978
979         txr->tx_avail--;
980         txr->next_avail_desc = ctxd;
981         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
982         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
983         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
984         ++txr->tso_tx;
985         return (0);
986 }
987
988
989 /**********************************************************************
990  *
991  *  Examine each tx_buffer in the used queue. If the hardware is done
992  *  processing the packet then free associated resources. The
993  *  tx_buffer is put back on the free queue.
994  *
995  **********************************************************************/
996 void
997 ixgbe_txeof(struct tx_ring *txr)
998 {
999         struct adapter          *adapter = txr->adapter;
1000 #ifdef DEV_NETMAP
1001         struct ifnet            *ifp = adapter->ifp;
1002 #endif
1003         u32                     work, processed = 0;
1004         u32                     limit = adapter->tx_process_limit;
1005         struct ixgbe_tx_buf     *buf;
1006         union ixgbe_adv_tx_desc *txd;
1007
1008         mtx_assert(&txr->tx_mtx, MA_OWNED);
1009
1010 #ifdef DEV_NETMAP
1011         if (ifp->if_capenable & IFCAP_NETMAP) {
1012                 struct netmap_adapter *na = NA(ifp);
1013                 struct netmap_kring *kring = &na->tx_rings[txr->me];
1014                 txd = txr->tx_base;
1015                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1016                     BUS_DMASYNC_POSTREAD);
1017                 /*
1018                  * In netmap mode, all the work is done in the context
1019                  * of the client thread. Interrupt handlers only wake up
1020                  * clients, which may be sleeping on individual rings
1021                  * or on a global resource for all rings.
1022                  * To implement tx interrupt mitigation, we wake up the client
1023                  * thread roughly every half ring, even if the NIC interrupts
1024                  * more frequently. This is implemented as follows:
1025                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
1026                  *   the slot that should wake up the thread (nkr_num_slots
1027                  *   means the user thread should not be woken up);
1028                  * - the driver ignores tx interrupts unless netmap_mitigate=0
1029                  *   or the slot has the DD bit set.
1030                  */
1031                 if (!netmap_mitigate ||
1032                     (kring->nr_kflags < kring->nkr_num_slots &&
1033                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1034                         netmap_tx_irq(ifp, txr->me);
1035                 }
1036                 return;
1037         }
1038 #endif /* DEV_NETMAP */
1039
1040         if (txr->tx_avail == txr->num_desc) {
1041                 txr->busy = 0;
1042                 return;
1043         }
1044
1045         /* Get work starting point */
1046         work = txr->next_to_clean;
1047         buf = &txr->tx_buffers[work];
1048         txd = &txr->tx_base[work];
1049         work -= txr->num_desc; /* The distance to ring end */
1050         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1051             BUS_DMASYNC_POSTREAD);
1052
1053         do {
1054                 union ixgbe_adv_tx_desc *eop = buf->eop;
1055                 if (eop == NULL) /* No work */
1056                         break;
1057
1058                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1059                         break;  /* I/O not complete */
1060
1061                 if (buf->m_head) {
1062                         txr->bytes +=
1063                             buf->m_head->m_pkthdr.len;
1064                         bus_dmamap_sync(txr->txtag,
1065                             buf->map,
1066                             BUS_DMASYNC_POSTWRITE);
1067                         bus_dmamap_unload(txr->txtag,
1068                             buf->map);
1069                         m_freem(buf->m_head);
1070                         buf->m_head = NULL;
1071                 }
1072                 buf->eop = NULL;
1073                 ++txr->tx_avail;
1074
1075                 /* We clean the range if multi segment */
1076                 while (txd != eop) {
1077                         ++txd;
1078                         ++buf;
1079                         ++work;
1080                         /* wrap the ring? */
1081                         if (__predict_false(!work)) {
1082                                 work -= txr->num_desc;
1083                                 buf = txr->tx_buffers;
1084                                 txd = txr->tx_base;
1085                         }
1086                         if (buf->m_head) {
1087                                 txr->bytes +=
1088                                     buf->m_head->m_pkthdr.len;
1089                                 bus_dmamap_sync(txr->txtag,
1090                                     buf->map,
1091                                     BUS_DMASYNC_POSTWRITE);
1092                                 bus_dmamap_unload(txr->txtag,
1093                                     buf->map);
1094                                 m_freem(buf->m_head);
1095                                 buf->m_head = NULL;
1096                         }
1097                         ++txr->tx_avail;
1098                         buf->eop = NULL;
1099
1100                 }
1101                 ++txr->packets;
1102                 ++processed;
1103
1104                 /* Try the next packet */
1105                 ++txd;
1106                 ++buf;
1107                 ++work;
1108                 /* reset with a wrap */
1109                 if (__predict_false(!work)) {
1110                         work -= txr->num_desc;
1111                         buf = txr->tx_buffers;
1112                         txd = txr->tx_base;
1113                 }
1114                 prefetch(txd);
1115         } while (__predict_true(--limit));
1116
1117         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1118             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1119
1120         work += txr->num_desc;
1121         txr->next_to_clean = work;
1122
1123         /*
1124         ** Queue Hang detection, we know there's
1125         ** work outstanding or the first return
1126         ** would have been taken, so increment busy
1127         ** if nothing managed to get cleaned, then
1128         ** in local_timer it will be checked and 
1129         ** marked as HUNG if it exceeds a MAX attempt.
1130         */
1131         if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1132                 ++txr->busy;
1133         /*
1134         ** If anything gets cleaned we reset state to 1,
1135         ** note this will turn off HUNG if its set.
1136         */
1137         if (processed)
1138                 txr->busy = 1;
1139
1140         if (txr->tx_avail == txr->num_desc)
1141                 txr->busy = 0;
1142
1143         return;
1144 }
1145
1146
1147 #ifdef IXGBE_FDIR
1148 /*
1149 ** This routine parses packet headers so that Flow
1150 ** Director can make a hashed filter table entry 
1151 ** allowing traffic flows to be identified and kept
1152 ** on the same cpu.  This would be a performance
1153 ** hit, but we only do it at IXGBE_FDIR_RATE of
1154 ** packets.
1155 */
1156 static void
1157 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1158 {
1159         struct adapter                  *adapter = txr->adapter;
1160         struct ix_queue                 *que;
1161         struct ip                       *ip;
1162         struct tcphdr                   *th;
1163         struct udphdr                   *uh;
1164         struct ether_vlan_header        *eh;
1165         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
1166         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
1167         int                             ehdrlen, ip_hlen;
1168         u16                             etype;
1169
1170         eh = mtod(mp, struct ether_vlan_header *);
1171         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1172                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1173                 etype = eh->evl_proto;
1174         } else {
1175                 ehdrlen = ETHER_HDR_LEN;
1176                 etype = eh->evl_encap_proto;
1177         }
1178
1179         /* Only handling IPv4 */
1180         if (etype != htons(ETHERTYPE_IP))
1181                 return;
1182
1183         ip = (struct ip *)(mp->m_data + ehdrlen);
1184         ip_hlen = ip->ip_hl << 2;
1185
1186         /* check if we're UDP or TCP */
1187         switch (ip->ip_p) {
1188         case IPPROTO_TCP:
1189                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1190                 /* src and dst are inverted */
1191                 common.port.dst ^= th->th_sport;
1192                 common.port.src ^= th->th_dport;
1193                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1194                 break;
1195         case IPPROTO_UDP:
1196                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1197                 /* src and dst are inverted */
1198                 common.port.dst ^= uh->uh_sport;
1199                 common.port.src ^= uh->uh_dport;
1200                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1201                 break;
1202         default:
1203                 return;
1204         }
1205
1206         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1207         if (mp->m_pkthdr.ether_vtag)
1208                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1209         else
1210                 common.flex_bytes ^= etype;
1211         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1212
1213         que = &adapter->queues[txr->me];
1214         /*
1215         ** This assumes the Rx queue and Tx
1216         ** queue are bound to the same CPU
1217         */
1218         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1219             input, common, que->msix);
1220 }
1221 #endif /* IXGBE_FDIR */
1222
1223 /*
1224 ** Used to detect a descriptor that has
1225 ** been merged by Hardware RSC.
1226 */
1227 static inline u32
1228 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1229 {
1230         return (le32toh(rx->wb.lower.lo_dword.data) &
1231             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1232 }
1233
1234 /*********************************************************************
1235  *
1236  *  Initialize Hardware RSC (LRO) feature on 82599
1237  *  for an RX ring, this is toggled by the LRO capability
1238  *  even though it is transparent to the stack.
1239  *
1240  *  NOTE: since this HW feature only works with IPV4 and 
1241  *        our testing has shown soft LRO to be as effective
1242  *        I have decided to disable this by default.
1243  *
1244  **********************************************************************/
1245 static void
1246 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1247 {
1248         struct  adapter         *adapter = rxr->adapter;
1249         struct  ixgbe_hw        *hw = &adapter->hw;
1250         u32                     rscctrl, rdrxctl;
1251
1252         /* If turning LRO/RSC off we need to disable it */
1253         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1254                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1255                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1256                 return;
1257         }
1258
1259         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1260         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1261 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1262         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1263 #endif /* DEV_NETMAP */
1264         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1265         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1266         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1267
1268         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1269         rscctrl |= IXGBE_RSCCTL_RSCEN;
1270         /*
1271         ** Limit the total number of descriptors that
1272         ** can be combined, so it does not exceed 64K
1273         */
1274         if (rxr->mbuf_sz == MCLBYTES)
1275                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1276         else if (rxr->mbuf_sz == MJUMPAGESIZE)
1277                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1278         else if (rxr->mbuf_sz == MJUM9BYTES)
1279                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1280         else  /* Using 16K cluster */
1281                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1282
1283         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1284
1285         /* Enable TCP header recognition */
1286         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1287             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1288             IXGBE_PSRTYPE_TCPHDR));
1289
1290         /* Disable RSC for ACK packets */
1291         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1292             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1293
1294         rxr->hw_rsc = TRUE;
1295 }
1296
1297 /*********************************************************************
1298  *
1299  *  Refresh mbuf buffers for RX descriptor rings
1300  *   - now keeps its own state so discards due to resource
1301  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1302  *     it just returns, keeping its placeholder, thus it can simply
1303  *     be recalled to try again.
1304  *
1305  **********************************************************************/
1306 static void
1307 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1308 {
1309         struct adapter          *adapter = rxr->adapter;
1310         bus_dma_segment_t       seg[1];
1311         struct ixgbe_rx_buf     *rxbuf;
1312         struct mbuf             *mp;
1313         int                     i, j, nsegs, error;
1314         bool                    refreshed = FALSE;
1315
1316         i = j = rxr->next_to_refresh;
1317         /* Control the loop with one beyond */
1318         if (++j == rxr->num_desc)
1319                 j = 0;
1320
1321         while (j != limit) {
1322                 rxbuf = &rxr->rx_buffers[i];
1323                 if (rxbuf->buf == NULL) {
1324                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1325                             M_PKTHDR, rxr->mbuf_sz);
1326                         if (mp == NULL)
1327                                 goto update;
1328                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1329                                 m_adj(mp, ETHER_ALIGN);
1330                 } else
1331                         mp = rxbuf->buf;
1332
1333                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1334
1335                 /* If we're dealing with an mbuf that was copied rather
1336                  * than replaced, there's no need to go through busdma.
1337                  */
1338                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1339                         /* Get the memory mapping */
1340                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1341                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1342                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1343                         if (error != 0) {
1344                                 printf("Refresh mbufs: payload dmamap load"
1345                                     " failure - %d\n", error);
1346                                 m_free(mp);
1347                                 rxbuf->buf = NULL;
1348                                 goto update;
1349                         }
1350                         rxbuf->buf = mp;
1351                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1352                             BUS_DMASYNC_PREREAD);
1353                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1354                             htole64(seg[0].ds_addr);
1355                 } else {
1356                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1357                         rxbuf->flags &= ~IXGBE_RX_COPY;
1358                 }
1359
1360                 refreshed = TRUE;
1361                 /* Next is precalculated */
1362                 i = j;
1363                 rxr->next_to_refresh = i;
1364                 if (++j == rxr->num_desc)
1365                         j = 0;
1366         }
1367 update:
1368         if (refreshed) /* Update hardware tail index */
1369                 IXGBE_WRITE_REG(&adapter->hw,
1370                     rxr->tail, rxr->next_to_refresh);
1371         return;
1372 }
1373
1374 /*********************************************************************
1375  *
1376  *  Allocate memory for rx_buffer structures. Since we use one
1377  *  rx_buffer per received packet, the maximum number of rx_buffer's
1378  *  that we'll need is equal to the number of receive descriptors
1379  *  that we've allocated.
1380  *
1381  **********************************************************************/
1382 int
1383 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1384 {
1385         struct  adapter         *adapter = rxr->adapter;
1386         device_t                dev = adapter->dev;
1387         struct ixgbe_rx_buf     *rxbuf;
1388         int                     bsize, error;
1389
1390         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1391         if (!(rxr->rx_buffers =
1392             (struct ixgbe_rx_buf *) malloc(bsize,
1393             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1394                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1395                 error = ENOMEM;
1396                 goto fail;
1397         }
1398
1399         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1400                                    1, 0,        /* alignment, bounds */
1401                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1402                                    BUS_SPACE_MAXADDR,   /* highaddr */
1403                                    NULL, NULL,          /* filter, filterarg */
1404                                    MJUM16BYTES,         /* maxsize */
1405                                    1,                   /* nsegments */
1406                                    MJUM16BYTES,         /* maxsegsize */
1407                                    0,                   /* flags */
1408                                    NULL,                /* lockfunc */
1409                                    NULL,                /* lockfuncarg */
1410                                    &rxr->ptag))) {
1411                 device_printf(dev, "Unable to create RX DMA tag\n");
1412                 goto fail;
1413         }
1414
1415         for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1416                 rxbuf = &rxr->rx_buffers[i];
1417                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1418                 if (error) {
1419                         device_printf(dev, "Unable to create RX dma map\n");
1420                         goto fail;
1421                 }
1422         }
1423
1424         return (0);
1425
1426 fail:
1427         /* Frees all, but can handle partial completion */
1428         ixgbe_free_receive_structures(adapter);
1429         return (error);
1430 }
1431
1432 static void     
1433 ixgbe_free_receive_ring(struct rx_ring *rxr)
1434
1435         struct ixgbe_rx_buf       *rxbuf;
1436
1437         for (int i = 0; i < rxr->num_desc; i++) {
1438                 rxbuf = &rxr->rx_buffers[i];
1439                 if (rxbuf->buf != NULL) {
1440                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1441                             BUS_DMASYNC_POSTREAD);
1442                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1443                         rxbuf->buf->m_flags |= M_PKTHDR;
1444                         m_freem(rxbuf->buf);
1445                         rxbuf->buf = NULL;
1446                         rxbuf->flags = 0;
1447                 }
1448         }
1449 }
1450
1451 /*********************************************************************
1452  *
1453  *  Initialize a receive ring and its buffers.
1454  *
1455  **********************************************************************/
1456 static int
1457 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1458 {
1459         struct  adapter         *adapter;
1460         struct ifnet            *ifp;
1461         device_t                dev;
1462         struct ixgbe_rx_buf     *rxbuf;
1463         bus_dma_segment_t       seg[1];
1464         struct lro_ctrl         *lro = &rxr->lro;
1465         int                     rsize, nsegs, error = 0;
1466 #ifdef DEV_NETMAP
1467         struct netmap_adapter *na = NA(rxr->adapter->ifp);
1468         struct netmap_slot *slot;
1469 #endif /* DEV_NETMAP */
1470
1471         adapter = rxr->adapter;
1472         ifp = adapter->ifp;
1473         dev = adapter->dev;
1474
1475         /* Clear the ring contents */
1476         IXGBE_RX_LOCK(rxr);
1477 #ifdef DEV_NETMAP
1478         /* same as in ixgbe_setup_transmit_ring() */
1479         slot = netmap_reset(na, NR_RX, rxr->me, 0);
1480 #endif /* DEV_NETMAP */
1481         rsize = roundup2(adapter->num_rx_desc *
1482             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1483         bzero((void *)rxr->rx_base, rsize);
1484         /* Cache the size */
1485         rxr->mbuf_sz = adapter->rx_mbuf_sz;
1486
1487         /* Free current RX buffer structs and their mbufs */
1488         ixgbe_free_receive_ring(rxr);
1489
1490         /* Now replenish the mbufs */
1491         for (int j = 0; j != rxr->num_desc; ++j) {
1492                 struct mbuf     *mp;
1493
1494                 rxbuf = &rxr->rx_buffers[j];
1495 #ifdef DEV_NETMAP
1496                 /*
1497                  * In netmap mode, fill the map and set the buffer
1498                  * address in the NIC ring, considering the offset
1499                  * between the netmap and NIC rings (see comment in
1500                  * ixgbe_setup_transmit_ring() ). No need to allocate
1501                  * an mbuf, so end the block with a continue;
1502                  */
1503                 if (slot) {
1504                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1505                         uint64_t paddr;
1506                         void *addr;
1507
1508                         addr = PNMB(na, slot + sj, &paddr);
1509                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1510                         /* Update descriptor and the cached value */
1511                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1512                         rxbuf->addr = htole64(paddr);
1513                         continue;
1514                 }
1515 #endif /* DEV_NETMAP */
1516                 rxbuf->flags = 0; 
1517                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1518                     M_PKTHDR, adapter->rx_mbuf_sz);
1519                 if (rxbuf->buf == NULL) {
1520                         error = ENOBUFS;
1521                         goto fail;
1522                 }
1523                 mp = rxbuf->buf;
1524                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1525                 /* Get the memory mapping */
1526                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1527                     rxbuf->pmap, mp, seg,
1528                     &nsegs, BUS_DMA_NOWAIT);
1529                 if (error != 0)
1530                         goto fail;
1531                 bus_dmamap_sync(rxr->ptag,
1532                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
1533                 /* Update the descriptor and the cached value */
1534                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1535                 rxbuf->addr = htole64(seg[0].ds_addr);
1536         }
1537
1538
1539         /* Setup our descriptor indices */
1540         rxr->next_to_check = 0;
1541         rxr->next_to_refresh = 0;
1542         rxr->lro_enabled = FALSE;
1543         rxr->rx_copies = 0;
1544         rxr->rx_bytes = 0;
1545         rxr->vtag_strip = FALSE;
1546
1547         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1548             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1549
1550         /*
1551         ** Now set up the LRO interface:
1552         */
1553         if (ixgbe_rsc_enable)
1554                 ixgbe_setup_hw_rsc(rxr);
1555         else if (ifp->if_capenable & IFCAP_LRO) {
1556                 int err = tcp_lro_init(lro);
1557                 if (err) {
1558                         device_printf(dev, "LRO Initialization failed!\n");
1559                         goto fail;
1560                 }
1561                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1562                 rxr->lro_enabled = TRUE;
1563                 lro->ifp = adapter->ifp;
1564         }
1565
1566         IXGBE_RX_UNLOCK(rxr);
1567         return (0);
1568
1569 fail:
1570         ixgbe_free_receive_ring(rxr);
1571         IXGBE_RX_UNLOCK(rxr);
1572         return (error);
1573 }
1574
1575 /*********************************************************************
1576  *
1577  *  Initialize all receive rings.
1578  *
1579  **********************************************************************/
1580 int
1581 ixgbe_setup_receive_structures(struct adapter *adapter)
1582 {
1583         struct rx_ring *rxr = adapter->rx_rings;
1584         int j;
1585
1586         for (j = 0; j < adapter->num_queues; j++, rxr++)
1587                 if (ixgbe_setup_receive_ring(rxr))
1588                         goto fail;
1589
1590         return (0);
1591 fail:
1592         /*
1593          * Free RX buffers allocated so far, we will only handle
1594          * the rings that completed, the failing case will have
1595          * cleaned up for itself. 'j' failed, so its the terminus.
1596          */
1597         for (int i = 0; i < j; ++i) {
1598                 rxr = &adapter->rx_rings[i];
1599                 ixgbe_free_receive_ring(rxr);
1600         }
1601
1602         return (ENOBUFS);
1603 }
1604
1605
1606 /*********************************************************************
1607  *
1608  *  Free all receive rings.
1609  *
1610  **********************************************************************/
1611 void
1612 ixgbe_free_receive_structures(struct adapter *adapter)
1613 {
1614         struct rx_ring *rxr = adapter->rx_rings;
1615
1616         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1617
1618         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1619                 struct lro_ctrl         *lro = &rxr->lro;
1620                 ixgbe_free_receive_buffers(rxr);
1621                 /* Free LRO memory */
1622                 tcp_lro_free(lro);
1623                 /* Free the ring memory as well */
1624                 ixgbe_dma_free(adapter, &rxr->rxdma);
1625         }
1626
1627         free(adapter->rx_rings, M_DEVBUF);
1628 }
1629
1630
1631 /*********************************************************************
1632  *
1633  *  Free receive ring data structures
1634  *
1635  **********************************************************************/
1636 void
1637 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1638 {
1639         struct adapter          *adapter = rxr->adapter;
1640         struct ixgbe_rx_buf     *rxbuf;
1641
1642         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1643
1644         /* Cleanup any existing buffers */
1645         if (rxr->rx_buffers != NULL) {
1646                 for (int i = 0; i < adapter->num_rx_desc; i++) {
1647                         rxbuf = &rxr->rx_buffers[i];
1648                         if (rxbuf->buf != NULL) {
1649                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1650                                     BUS_DMASYNC_POSTREAD);
1651                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1652                                 rxbuf->buf->m_flags |= M_PKTHDR;
1653                                 m_freem(rxbuf->buf);
1654                         }
1655                         rxbuf->buf = NULL;
1656                         if (rxbuf->pmap != NULL) {
1657                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1658                                 rxbuf->pmap = NULL;
1659                         }
1660                 }
1661                 if (rxr->rx_buffers != NULL) {
1662                         free(rxr->rx_buffers, M_DEVBUF);
1663                         rxr->rx_buffers = NULL;
1664                 }
1665         }
1666
1667         if (rxr->ptag != NULL) {
1668                 bus_dma_tag_destroy(rxr->ptag);
1669                 rxr->ptag = NULL;
1670         }
1671
1672         return;
1673 }
1674
1675 static __inline void
1676 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1677 {
1678                  
1679         /*
1680          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1681          * should be computed by hardware. Also it should not have VLAN tag in
1682          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1683          */
1684         if (rxr->lro_enabled &&
1685             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1686             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1687             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1688             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1689             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1690             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1691             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1692             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1693                 /*
1694                  * Send to the stack if:
1695                  **  - LRO not enabled, or
1696                  **  - no LRO resources, or
1697                  **  - lro enqueue fails
1698                  */
1699                 if (rxr->lro.lro_cnt != 0)
1700                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1701                                 return;
1702         }
1703         IXGBE_RX_UNLOCK(rxr);
1704         (*ifp->if_input)(ifp, m);
1705         IXGBE_RX_LOCK(rxr);
1706 }
1707
1708 static __inline void
1709 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1710 {
1711         struct ixgbe_rx_buf     *rbuf;
1712
1713         rbuf = &rxr->rx_buffers[i];
1714
1715
1716         /*
1717         ** With advanced descriptors the writeback
1718         ** clobbers the buffer addrs, so its easier
1719         ** to just free the existing mbufs and take
1720         ** the normal refresh path to get new buffers
1721         ** and mapping.
1722         */
1723
1724         if (rbuf->fmp != NULL) {/* Partial chain ? */
1725                 rbuf->fmp->m_flags |= M_PKTHDR;
1726                 m_freem(rbuf->fmp);
1727                 rbuf->fmp = NULL;
1728                 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1729         } else if (rbuf->buf) {
1730                 m_free(rbuf->buf);
1731                 rbuf->buf = NULL;
1732         }
1733         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1734
1735         rbuf->flags = 0;
1736  
1737         return;
1738 }
1739
1740
1741 /*********************************************************************
1742  *
1743  *  This routine executes in interrupt context. It replenishes
1744  *  the mbufs in the descriptor and sends data which has been
1745  *  dma'ed into host memory to upper layer.
1746  *
1747  *  Return TRUE for more work, FALSE for all clean.
1748  *********************************************************************/
1749 bool
1750 ixgbe_rxeof(struct ix_queue *que)
1751 {
1752         struct adapter          *adapter = que->adapter;
1753         struct rx_ring          *rxr = que->rxr;
1754         struct ifnet            *ifp = adapter->ifp;
1755         struct lro_ctrl         *lro = &rxr->lro;
1756         int                     i, nextp, processed = 0;
1757         u32                     staterr = 0;
1758         u32                     count = adapter->rx_process_limit;
1759         union ixgbe_adv_rx_desc *cur;
1760         struct ixgbe_rx_buf     *rbuf, *nbuf;
1761         u16                     pkt_info;
1762
1763         IXGBE_RX_LOCK(rxr);
1764
1765 #ifdef DEV_NETMAP
1766         /* Same as the txeof routine: wakeup clients on intr. */
1767         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1768                 IXGBE_RX_UNLOCK(rxr);
1769                 return (FALSE);
1770         }
1771 #endif /* DEV_NETMAP */
1772
1773         for (i = rxr->next_to_check; count != 0;) {
1774                 struct mbuf     *sendmp, *mp;
1775                 u32             rsc, ptype;
1776                 u16             len;
1777                 u16             vtag = 0;
1778                 bool            eop;
1779  
1780                 /* Sync the ring. */
1781                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1782                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1783
1784                 cur = &rxr->rx_base[i];
1785                 staterr = le32toh(cur->wb.upper.status_error);
1786                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1787
1788                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1789                         break;
1790                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1791                         break;
1792
1793                 count--;
1794                 sendmp = NULL;
1795                 nbuf = NULL;
1796                 rsc = 0;
1797                 cur->wb.upper.status_error = 0;
1798                 rbuf = &rxr->rx_buffers[i];
1799                 mp = rbuf->buf;
1800
1801                 len = le16toh(cur->wb.upper.length);
1802                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1803                     IXGBE_RXDADV_PKTTYPE_MASK;
1804                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1805
1806                 /* Make sure bad packets are discarded */
1807                 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1808 #if __FreeBSD_version >= 1100036
1809                         if (IXGBE_IS_VF(adapter))
1810                                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1811 #endif
1812                         rxr->rx_discarded++;
1813                         ixgbe_rx_discard(rxr, i);
1814                         goto next_desc;
1815                 }
1816
1817                 /*
1818                 ** On 82599 which supports a hardware
1819                 ** LRO (called HW RSC), packets need
1820                 ** not be fragmented across sequential
1821                 ** descriptors, rather the next descriptor
1822                 ** is indicated in bits of the descriptor.
1823                 ** This also means that we might proceses
1824                 ** more than one packet at a time, something
1825                 ** that has never been true before, it
1826                 ** required eliminating global chain pointers
1827                 ** in favor of what we are doing here.  -jfv
1828                 */
1829                 if (!eop) {
1830                         /*
1831                         ** Figure out the next descriptor
1832                         ** of this frame.
1833                         */
1834                         if (rxr->hw_rsc == TRUE) {
1835                                 rsc = ixgbe_rsc_count(cur);
1836                                 rxr->rsc_num += (rsc - 1);
1837                         }
1838                         if (rsc) { /* Get hardware index */
1839                                 nextp = ((staterr &
1840                                     IXGBE_RXDADV_NEXTP_MASK) >>
1841                                     IXGBE_RXDADV_NEXTP_SHIFT);
1842                         } else { /* Just sequential */
1843                                 nextp = i + 1;
1844                                 if (nextp == adapter->num_rx_desc)
1845                                         nextp = 0;
1846                         }
1847                         nbuf = &rxr->rx_buffers[nextp];
1848                         prefetch(nbuf);
1849                 }
1850                 /*
1851                 ** Rather than using the fmp/lmp global pointers
1852                 ** we now keep the head of a packet chain in the
1853                 ** buffer struct and pass this along from one
1854                 ** descriptor to the next, until we get EOP.
1855                 */
1856                 mp->m_len = len;
1857                 /*
1858                 ** See if there is a stored head
1859                 ** that determines what we are
1860                 */
1861                 sendmp = rbuf->fmp;
1862                 if (sendmp != NULL) {  /* secondary frag */
1863                         rbuf->buf = rbuf->fmp = NULL;
1864                         mp->m_flags &= ~M_PKTHDR;
1865                         sendmp->m_pkthdr.len += mp->m_len;
1866                 } else {
1867                         /*
1868                          * Optimize.  This might be a small packet,
1869                          * maybe just a TCP ACK.  Do a fast copy that
1870                          * is cache aligned into a new mbuf, and
1871                          * leave the old mbuf+cluster for re-use.
1872                          */
1873                         if (eop && len <= IXGBE_RX_COPY_LEN) {
1874                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1875                                 if (sendmp != NULL) {
1876                                         sendmp->m_data +=
1877                                             IXGBE_RX_COPY_ALIGN;
1878                                         ixgbe_bcopy(mp->m_data,
1879                                             sendmp->m_data, len);
1880                                         sendmp->m_len = len;
1881                                         rxr->rx_copies++;
1882                                         rbuf->flags |= IXGBE_RX_COPY;
1883                                 }
1884                         }
1885                         if (sendmp == NULL) {
1886                                 rbuf->buf = rbuf->fmp = NULL;
1887                                 sendmp = mp;
1888                         }
1889
1890                         /* first desc of a non-ps chain */
1891                         sendmp->m_flags |= M_PKTHDR;
1892                         sendmp->m_pkthdr.len = mp->m_len;
1893                 }
1894                 ++processed;
1895
1896                 /* Pass the head pointer on */
1897                 if (eop == 0) {
1898                         nbuf->fmp = sendmp;
1899                         sendmp = NULL;
1900                         mp->m_next = nbuf->buf;
1901                 } else { /* Sending this frame */
1902                         sendmp->m_pkthdr.rcvif = ifp;
1903                         rxr->rx_packets++;
1904                         /* capture data for AIM */
1905                         rxr->bytes += sendmp->m_pkthdr.len;
1906                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1907                         /* Process vlan info */
1908                         if ((rxr->vtag_strip) &&
1909                             (staterr & IXGBE_RXD_STAT_VP))
1910                                 vtag = le16toh(cur->wb.upper.vlan);
1911                         if (vtag) {
1912                                 sendmp->m_pkthdr.ether_vtag = vtag;
1913                                 sendmp->m_flags |= M_VLANTAG;
1914                         }
1915                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1916                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
1917
1918                         /*
1919                          * In case of multiqueue, we have RXCSUM.PCSD bit set
1920                          * and never cleared. This means we have RSS hash
1921                          * available to be used.   
1922                          */
1923                         if (adapter->num_queues > 1) {
1924                                 sendmp->m_pkthdr.flowid =
1925                                     le32toh(cur->wb.lower.hi_dword.rss);
1926                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {  
1927                                     case IXGBE_RXDADV_RSSTYPE_IPV4:
1928                                         M_HASHTYPE_SET(sendmp,
1929                                             M_HASHTYPE_RSS_IPV4);
1930                                         break;
1931                                     case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1932                                         M_HASHTYPE_SET(sendmp,
1933                                             M_HASHTYPE_RSS_TCP_IPV4);
1934                                         break;
1935                                     case IXGBE_RXDADV_RSSTYPE_IPV6:
1936                                         M_HASHTYPE_SET(sendmp,
1937                                             M_HASHTYPE_RSS_IPV6);
1938                                         break;
1939                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1940                                         M_HASHTYPE_SET(sendmp,
1941                                             M_HASHTYPE_RSS_TCP_IPV6);
1942                                         break;
1943                                     case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1944                                         M_HASHTYPE_SET(sendmp,
1945                                             M_HASHTYPE_RSS_IPV6_EX);
1946                                         break;
1947                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1948                                         M_HASHTYPE_SET(sendmp,
1949                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
1950                                         break;
1951 #if __FreeBSD_version > 1100000
1952                                     case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1953                                         M_HASHTYPE_SET(sendmp,
1954                                             M_HASHTYPE_RSS_UDP_IPV4);
1955                                         break;
1956                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1957                                         M_HASHTYPE_SET(sendmp,
1958                                             M_HASHTYPE_RSS_UDP_IPV6);
1959                                         break;
1960                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1961                                         M_HASHTYPE_SET(sendmp,
1962                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
1963                                         break;
1964 #endif
1965                                     default:
1966                                         M_HASHTYPE_SET(sendmp,
1967                                             M_HASHTYPE_OPAQUE);
1968                                 }
1969                         } else {
1970                                 sendmp->m_pkthdr.flowid = que->msix;
1971                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1972                         }
1973                 }
1974 next_desc:
1975                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1976                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1977
1978                 /* Advance our pointers to the next descriptor. */
1979                 if (++i == rxr->num_desc)
1980                         i = 0;
1981
1982                 /* Now send to the stack or do LRO */
1983                 if (sendmp != NULL) {
1984                         rxr->next_to_check = i;
1985                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1986                         i = rxr->next_to_check;
1987                 }
1988
1989                /* Every 8 descriptors we go to refresh mbufs */
1990                 if (processed == 8) {
1991                         ixgbe_refresh_mbufs(rxr, i);
1992                         processed = 0;
1993                 }
1994         }
1995
1996         /* Refresh any remaining buf structs */
1997         if (ixgbe_rx_unrefreshed(rxr))
1998                 ixgbe_refresh_mbufs(rxr, i);
1999
2000         rxr->next_to_check = i;
2001
2002         /*
2003          * Flush any outstanding LRO work
2004          */
2005         tcp_lro_flush_all(lro);
2006
2007         IXGBE_RX_UNLOCK(rxr);
2008
2009         /*
2010         ** Still have cleaning to do?
2011         */
2012         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2013                 return (TRUE);
2014         else
2015                 return (FALSE);
2016 }
2017
2018
2019 /*********************************************************************
2020  *
2021  *  Verify that the hardware indicated that the checksum is valid.
2022  *  Inform the stack about the status of checksum so that stack
2023  *  doesn't spend time verifying the checksum.
2024  *
2025  *********************************************************************/
2026 static void
2027 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2028 {
2029         u16     status = (u16) staterr;
2030         u8      errors = (u8) (staterr >> 24);
2031         bool    sctp = false;
2032
2033         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2034             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2035                 sctp = true;
2036
2037         /* IPv4 checksum */
2038         if (status & IXGBE_RXD_STAT_IPCS) {
2039                 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
2040                 /* IP Checksum Good */
2041                 if (!(errors & IXGBE_RXD_ERR_IPE))
2042                         mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
2043         }
2044         /* TCP/UDP/SCTP checksum */
2045         if (status & IXGBE_RXD_STAT_L4CS) {
2046                 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
2047                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2048                         mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
2049                         if (!sctp)
2050                                 mp->m_pkthdr.csum_data = htons(0xffff);
2051                 }
2052         }
2053 }
2054
2055 /********************************************************************
2056  * Manage DMA'able memory.
2057  *******************************************************************/
2058 static void
2059 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2060 {
2061         if (error)
2062                 return;
2063         *(bus_addr_t *) arg = segs->ds_addr;
2064         return;
2065 }
2066
2067 int
2068 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2069                 struct ixgbe_dma_alloc *dma, int mapflags)
2070 {
2071         device_t dev = adapter->dev;
2072         int             r;
2073
2074         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2075                                DBA_ALIGN, 0,    /* alignment, bounds */
2076                                BUS_SPACE_MAXADDR,       /* lowaddr */
2077                                BUS_SPACE_MAXADDR,       /* highaddr */
2078                                NULL, NULL,      /* filter, filterarg */
2079                                size,    /* maxsize */
2080                                1,       /* nsegments */
2081                                size,    /* maxsegsize */
2082                                BUS_DMA_ALLOCNOW,        /* flags */
2083                                NULL,    /* lockfunc */
2084                                NULL,    /* lockfuncarg */
2085                                &dma->dma_tag);
2086         if (r != 0) {
2087                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2088                        "error %u\n", r);
2089                 goto fail_0;
2090         }
2091         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2092                              BUS_DMA_NOWAIT, &dma->dma_map);
2093         if (r != 0) {
2094                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2095                        "error %u\n", r);
2096                 goto fail_1;
2097         }
2098         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2099                             size,
2100                             ixgbe_dmamap_cb,
2101                             &dma->dma_paddr,
2102                             mapflags | BUS_DMA_NOWAIT);
2103         if (r != 0) {
2104                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2105                        "error %u\n", r);
2106                 goto fail_2;
2107         }
2108         dma->dma_size = size;
2109         return (0);
2110 fail_2:
2111         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2112 fail_1:
2113         bus_dma_tag_destroy(dma->dma_tag);
2114 fail_0:
2115         dma->dma_tag = NULL;
2116         return (r);
2117 }
2118
2119 void
2120 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2121 {
2122         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2123             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2124         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2125         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2126         bus_dma_tag_destroy(dma->dma_tag);
2127 }
2128
2129
2130 /*********************************************************************
2131  *
2132  *  Allocate memory for the transmit and receive rings, and then
2133  *  the descriptors associated with each, called only once at attach.
2134  *
2135  **********************************************************************/
2136 int
2137 ixgbe_allocate_queues(struct adapter *adapter)
2138 {
2139         device_t        dev = adapter->dev;
2140         struct ix_queue *que;
2141         struct tx_ring  *txr;
2142         struct rx_ring  *rxr;
2143         int rsize, tsize, error = IXGBE_SUCCESS;
2144         int txconf = 0, rxconf = 0;
2145 #ifdef PCI_IOV
2146         enum ixgbe_iov_mode iov_mode;
2147 #endif
2148
2149         /* First allocate the top level queue structs */
2150         if (!(adapter->queues =
2151             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2152             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2153                 device_printf(dev, "Unable to allocate queue memory\n");
2154                 error = ENOMEM;
2155                 goto fail;
2156         }
2157
2158         /* First allocate the TX ring struct memory */
2159         if (!(adapter->tx_rings =
2160             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2161             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2162                 device_printf(dev, "Unable to allocate TX ring memory\n");
2163                 error = ENOMEM;
2164                 goto tx_fail;
2165         }
2166
2167         /* Next allocate the RX */
2168         if (!(adapter->rx_rings =
2169             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2170             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2171                 device_printf(dev, "Unable to allocate RX ring memory\n");
2172                 error = ENOMEM;
2173                 goto rx_fail;
2174         }
2175
2176         /* For the ring itself */
2177         tsize = roundup2(adapter->num_tx_desc *
2178             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2179
2180 #ifdef PCI_IOV
2181         iov_mode = ixgbe_get_iov_mode(adapter);
2182         adapter->pool = ixgbe_max_vfs(iov_mode);
2183 #else
2184         adapter->pool = 0;
2185 #endif
2186         /*
2187          * Now set up the TX queues, txconf is needed to handle the
2188          * possibility that things fail midcourse and we need to
2189          * undo memory gracefully
2190          */ 
2191         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2192                 /* Set up some basics */
2193                 txr = &adapter->tx_rings[i];
2194                 txr->adapter = adapter;
2195 #ifdef PCI_IOV
2196                 txr->me = ixgbe_pf_que_index(iov_mode, i);
2197 #else
2198                 txr->me = i;
2199 #endif
2200                 txr->num_desc = adapter->num_tx_desc;
2201
2202                 /* Initialize the TX side lock */
2203                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2204                     device_get_nameunit(dev), txr->me);
2205                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2206
2207                 if (ixgbe_dma_malloc(adapter, tsize,
2208                         &txr->txdma, BUS_DMA_NOWAIT)) {
2209                         device_printf(dev,
2210                             "Unable to allocate TX Descriptor memory\n");
2211                         error = ENOMEM;
2212                         goto err_tx_desc;
2213                 }
2214                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2215                 bzero((void *)txr->tx_base, tsize);
2216
2217                 /* Now allocate transmit buffers for the ring */
2218                 if (ixgbe_allocate_transmit_buffers(txr)) {
2219                         device_printf(dev,
2220                             "Critical Failure setting up transmit buffers\n");
2221                         error = ENOMEM;
2222                         goto err_tx_desc;
2223                 }
2224 #ifndef IXGBE_LEGACY_TX
2225                 /* Allocate a buf ring */
2226                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2227                     M_WAITOK, &txr->tx_mtx);
2228                 if (txr->br == NULL) {
2229                         device_printf(dev,
2230                             "Critical Failure setting up buf ring\n");
2231                         error = ENOMEM;
2232                         goto err_tx_desc;
2233                 }
2234 #endif
2235         }
2236
2237         /*
2238          * Next the RX queues...
2239          */ 
2240         rsize = roundup2(adapter->num_rx_desc *
2241             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2242         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2243                 rxr = &adapter->rx_rings[i];
2244                 /* Set up some basics */
2245                 rxr->adapter = adapter;
2246 #ifdef PCI_IOV
2247                 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2248 #else
2249                 rxr->me = i;
2250 #endif
2251                 rxr->num_desc = adapter->num_rx_desc;
2252
2253                 /* Initialize the RX side lock */
2254                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2255                     device_get_nameunit(dev), rxr->me);
2256                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2257
2258                 if (ixgbe_dma_malloc(adapter, rsize,
2259                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2260                         device_printf(dev,
2261                             "Unable to allocate RxDescriptor memory\n");
2262                         error = ENOMEM;
2263                         goto err_rx_desc;
2264                 }
2265                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2266                 bzero((void *)rxr->rx_base, rsize);
2267
2268                 /* Allocate receive buffers for the ring*/
2269                 if (ixgbe_allocate_receive_buffers(rxr)) {
2270                         device_printf(dev,
2271                             "Critical Failure setting up receive buffers\n");
2272                         error = ENOMEM;
2273                         goto err_rx_desc;
2274                 }
2275         }
2276
2277         /*
2278         ** Finally set up the queue holding structs
2279         */
2280         for (int i = 0; i < adapter->num_queues; i++) {
2281                 que = &adapter->queues[i];
2282                 que->adapter = adapter;
2283                 que->me = i;
2284                 que->txr = &adapter->tx_rings[i];
2285                 que->rxr = &adapter->rx_rings[i];
2286         }
2287
2288         return (0);
2289
2290 err_rx_desc:
2291         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2292                 ixgbe_dma_free(adapter, &rxr->rxdma);
2293 err_tx_desc:
2294         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2295                 ixgbe_dma_free(adapter, &txr->txdma);
2296         free(adapter->rx_rings, M_DEVBUF);
2297 rx_fail:
2298         free(adapter->tx_rings, M_DEVBUF);
2299 tx_fail:
2300         free(adapter->queues, M_DEVBUF);
2301 fail:
2302         return (error);
2303 }