]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixgbe/ix_txrx.c
Merge llvm, clang, lld, lldb, compiler-rt and libc++ r307894, and update
[FreeBSD/FreeBSD.git] / sys / dev / ixgbe / ix_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41
42 #include "ixgbe.h"
43
44 #ifdef  RSS
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
47 #endif
48
49 #ifdef DEV_NETMAP
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
53
54 extern int ix_crcstrip;
55 #endif
56
57 /*
58 ** HW RSC control:
59 **  this feature only works with
60 **  IPv4, and only on 82599 and later.
61 **  Also this will cause IP forwarding to
62 **  fail and that can't be controlled by
63 **  the stack as LRO can. For all these
64 **  reasons I've deemed it best to leave
65 **  this off and not bother with a tuneable
66 **  interface, this would need to be compiled
67 **  to enable.
68 */
69 static bool ixgbe_rsc_enable = FALSE;
70
71 #ifdef IXGBE_FDIR
72 /*
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
77 **
78 ** This feature can be disabled by
79 ** setting this to 0.
80 */
81 static int atr_sample_rate = 20;
82 #endif
83
84 /*********************************************************************
85  *  Local Function prototypes
86  *********************************************************************/
87 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
88 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
89 static int      ixgbe_setup_receive_ring(struct rx_ring *);
90 static void     ixgbe_free_receive_buffers(struct rx_ring *);
91
92 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
93 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
94 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
95 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
96                     struct mbuf *, u32 *, u32 *);
97 static int      ixgbe_tso_setup(struct tx_ring *,
98                     struct mbuf *, u32 *, u32 *);
99 #ifdef IXGBE_FDIR
100 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
101 #endif
102 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
103 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
104                     struct mbuf *, u32);
105
106 #ifdef IXGBE_LEGACY_TX
107 /*********************************************************************
108  *  Transmit entry point
109  *
110  *  ixgbe_start is called by the stack to initiate a transmit.
111  *  The driver will remain in this routine as long as there are
112  *  packets to transmit and transmit resources are available.
113  *  In case resources are not available stack is notified and
114  *  the packet is requeued.
115  **********************************************************************/
116
117 void
118 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
119 {
120         struct mbuf    *m_head;
121         struct adapter *adapter = txr->adapter;
122
123         IXGBE_TX_LOCK_ASSERT(txr);
124
125         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
126                 return;
127         if (!adapter->link_active)
128                 return;
129
130         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
131                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
132                         break;
133
134                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
135                 if (m_head == NULL)
136                         break;
137
138                 if (ixgbe_xmit(txr, &m_head)) {
139                         if (m_head != NULL)
140                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
141                         break;
142                 }
143                 /* Send a copy of the frame to the BPF listener */
144                 ETHER_BPF_MTAP(ifp, m_head);
145         }
146         return;
147 }
148
149 /*
150  * Legacy TX start - called by the stack, this
151  * always uses the first tx ring, and should
152  * not be used with multiqueue tx enabled.
153  */
154 void
155 ixgbe_start(struct ifnet *ifp)
156 {
157         struct adapter *adapter = ifp->if_softc;
158         struct tx_ring  *txr = adapter->tx_rings;
159
160         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
161                 IXGBE_TX_LOCK(txr);
162                 ixgbe_start_locked(txr, ifp);
163                 IXGBE_TX_UNLOCK(txr);
164         }
165         return;
166 }
167
168 #else /* ! IXGBE_LEGACY_TX */
169
170 /*
171 ** Multiqueue Transmit Entry Point
172 ** (if_transmit function)
173 */
174 int
175 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
176 {
177         struct adapter  *adapter = ifp->if_softc;
178         struct ix_queue *que;
179         struct tx_ring  *txr;
180         int             i, err = 0;
181 #ifdef  RSS
182         uint32_t bucket_id;
183 #endif
184
185         /*
186          * When doing RSS, map it to the same outbound queue
187          * as the incoming flow would be mapped to.
188          *
189          * If everything is setup correctly, it should be the
190          * same bucket that the current CPU we're on is.
191          */
192         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
193 #ifdef  RSS
194                 if (rss_hash2bucket(m->m_pkthdr.flowid,
195                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
196                         i = bucket_id % adapter->num_queues;
197 #ifdef IXGBE_DEBUG
198                         if (bucket_id > adapter->num_queues)
199                                 if_printf(ifp, "bucket_id (%d) > num_queues "
200                                     "(%d)\n", bucket_id, adapter->num_queues);
201 #endif
202                 } else 
203 #endif
204                         i = m->m_pkthdr.flowid % adapter->num_queues;
205         } else
206                 i = curcpu % adapter->num_queues;
207
208         /* Check for a hung queue and pick alternative */
209         if (((1 << i) & adapter->active_queues) == 0)
210                 i = ffsl(adapter->active_queues);
211
212         txr = &adapter->tx_rings[i];
213         que = &adapter->queues[i];
214
215         err = drbr_enqueue(ifp, txr->br, m);
216         if (err)
217                 return (err);
218         if (IXGBE_TX_TRYLOCK(txr)) {
219                 ixgbe_mq_start_locked(ifp, txr);
220                 IXGBE_TX_UNLOCK(txr);
221         } else
222                 taskqueue_enqueue(que->tq, &txr->txq_task);
223
224         return (0);
225 }
226
227 int
228 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
229 {
230         struct adapter  *adapter = txr->adapter;
231         struct mbuf     *next;
232         int             enqueued = 0, err = 0;
233
234         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
235             adapter->link_active == 0)
236                 return (ENETDOWN);
237
238         /* Process the queue */
239 #if __FreeBSD_version < 901504
240         next = drbr_dequeue(ifp, txr->br);
241         while (next != NULL) {
242                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
243                         if (next != NULL)
244                                 err = drbr_enqueue(ifp, txr->br, next);
245 #else
246         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
247                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
248                         if (next == NULL) {
249                                 drbr_advance(ifp, txr->br);
250                         } else {
251                                 drbr_putback(ifp, txr->br, next);
252                         }
253 #endif
254                         break;
255                 }
256 #if __FreeBSD_version >= 901504
257                 drbr_advance(ifp, txr->br);
258 #endif
259                 enqueued++;
260 #if 0 // this is VF-only
261 #if __FreeBSD_version >= 1100036
262                 /*
263                  * Since we're looking at the tx ring, we can check
264                  * to see if we're a VF by examing our tail register
265                  * address.
266                  */
267                 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
268                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
269 #endif
270 #endif
271                 /* Send a copy of the frame to the BPF listener */
272                 ETHER_BPF_MTAP(ifp, next);
273                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
274                         break;
275 #if __FreeBSD_version < 901504
276                 next = drbr_dequeue(ifp, txr->br);
277 #endif
278         }
279
280         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
281                 ixgbe_txeof(txr);
282
283         return (err);
284 }
285
286 /*
287  * Called from a taskqueue to drain queued transmit packets.
288  */
289 void
290 ixgbe_deferred_mq_start(void *arg, int pending)
291 {
292         struct tx_ring *txr = arg;
293         struct adapter *adapter = txr->adapter;
294         struct ifnet *ifp = adapter->ifp;
295
296         IXGBE_TX_LOCK(txr);
297         if (!drbr_empty(ifp, txr->br))
298                 ixgbe_mq_start_locked(ifp, txr);
299         IXGBE_TX_UNLOCK(txr);
300 }
301
302 /*
303  * Flush all ring buffers
304  */
305 void
306 ixgbe_qflush(struct ifnet *ifp)
307 {
308         struct adapter  *adapter = ifp->if_softc;
309         struct tx_ring  *txr = adapter->tx_rings;
310         struct mbuf     *m;
311
312         for (int i = 0; i < adapter->num_queues; i++, txr++) {
313                 IXGBE_TX_LOCK(txr);
314                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
315                         m_freem(m);
316                 IXGBE_TX_UNLOCK(txr);
317         }
318         if_qflush(ifp);
319 }
320 #endif /* IXGBE_LEGACY_TX */
321
322
323 /*********************************************************************
324  *
325  *  This routine maps the mbufs to tx descriptors, allowing the
326  *  TX engine to transmit the packets. 
327  *      - return 0 on success, positive on failure
328  *
329  **********************************************************************/
330
331 static int
332 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
333 {
334         struct adapter  *adapter = txr->adapter;
335         u32             olinfo_status = 0, cmd_type_len;
336         int             i, j, error, nsegs;
337         int             first;
338         bool            remap = TRUE;
339         struct mbuf     *m_head;
340         bus_dma_segment_t segs[adapter->num_segs];
341         bus_dmamap_t    map;
342         struct ixgbe_tx_buf *txbuf;
343         union ixgbe_adv_tx_desc *txd = NULL;
344
345         m_head = *m_headp;
346
347         /* Basic descriptor defines */
348         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
349             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
350
351         if (m_head->m_flags & M_VLANTAG)
352                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
353
354         /*
355          * Important to capture the first descriptor
356          * used because it will contain the index of
357          * the one we tell the hardware to report back
358          */
359         first = txr->next_avail_desc;
360         txbuf = &txr->tx_buffers[first];
361         map = txbuf->map;
362
363         /*
364          * Map the packet for DMA.
365          */
366 retry:
367         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
368             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
369
370         if (__predict_false(error)) {
371                 struct mbuf *m;
372
373                 switch (error) {
374                 case EFBIG:
375                         /* Try it again? - one try */
376                         if (remap == TRUE) {
377                                 remap = FALSE;
378                                 /*
379                                  * XXX: m_defrag will choke on
380                                  * non-MCLBYTES-sized clusters
381                                  */
382                                 m = m_defrag(*m_headp, M_NOWAIT);
383                                 if (m == NULL) {
384                                         adapter->mbuf_defrag_failed++;
385                                         m_freem(*m_headp);
386                                         *m_headp = NULL;
387                                         return (ENOBUFS);
388                                 }
389                                 *m_headp = m;
390                                 goto retry;
391                         } else
392                                 return (error);
393                 case ENOMEM:
394                         txr->no_tx_dma_setup++;
395                         return (error);
396                 default:
397                         txr->no_tx_dma_setup++;
398                         m_freem(*m_headp);
399                         *m_headp = NULL;
400                         return (error);
401                 }
402         }
403
404         /* Make certain there are enough descriptors */
405         if (txr->tx_avail < (nsegs + 2)) {
406                 txr->no_desc_avail++;
407                 bus_dmamap_unload(txr->txtag, map);
408                 return (ENOBUFS);
409         }
410         m_head = *m_headp;
411
412         /*
413          * Set up the appropriate offload context
414          * this will consume the first descriptor
415          */
416         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
417         if (__predict_false(error)) {
418                 if (error == ENOBUFS)
419                         *m_headp = NULL;
420                 return (error);
421         }
422
423 #ifdef IXGBE_FDIR
424         /* Do the flow director magic */
425         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
426                 ++txr->atr_count;
427                 if (txr->atr_count >= atr_sample_rate) {
428                         ixgbe_atr(txr, m_head);
429                         txr->atr_count = 0;
430                 }
431         }
432 #endif
433
434         olinfo_status |= IXGBE_ADVTXD_CC;
435         i = txr->next_avail_desc;
436         for (j = 0; j < nsegs; j++) {
437                 bus_size_t seglen;
438                 bus_addr_t segaddr;
439
440                 txbuf = &txr->tx_buffers[i];
441                 txd = &txr->tx_base[i];
442                 seglen = segs[j].ds_len;
443                 segaddr = htole64(segs[j].ds_addr);
444
445                 txd->read.buffer_addr = segaddr;
446                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
447                     cmd_type_len |seglen);
448                 txd->read.olinfo_status = htole32(olinfo_status);
449
450                 if (++i == txr->num_desc)
451                         i = 0;
452         }
453
454         txd->read.cmd_type_len |=
455             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456         txr->tx_avail -= nsegs;
457         txr->next_avail_desc = i;
458
459         txbuf->m_head = m_head;
460         /*
461          * Here we swap the map so the last descriptor,
462          * which gets the completion interrupt has the
463          * real map, and the first descriptor gets the
464          * unused map from this descriptor.
465          */
466         txr->tx_buffers[first].map = txbuf->map;
467         txbuf->map = map;
468         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
469
470         /* Set the EOP descriptor that will be marked done */
471         txbuf = &txr->tx_buffers[first];
472         txbuf->eop = txd;
473
474         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
475             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
476         /*
477          * Advance the Transmit Descriptor Tail (Tdt), this tells the
478          * hardware that this frame is available to transmit.
479          */
480         ++txr->total_packets;
481         IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
482
483         /* Mark queue as having work */
484         if (txr->busy == 0)
485                 txr->busy = 1;
486
487         return (0);
488 }
489
490
491 /*********************************************************************
492  *
493  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
494  *  the information needed to transmit a packet on the wire. This is
495  *  called only once at attach, setup is done every reset.
496  *
497  **********************************************************************/
498 int
499 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
500 {
501         struct adapter *adapter = txr->adapter;
502         device_t dev = adapter->dev;
503         struct ixgbe_tx_buf *txbuf;
504         int error, i;
505
506         /*
507          * Setup DMA descriptor areas.
508          */
509         if ((error = bus_dma_tag_create(
510                                bus_get_dma_tag(adapter->dev),   /* parent */
511                                1, 0,            /* alignment, bounds */
512                                BUS_SPACE_MAXADDR,       /* lowaddr */
513                                BUS_SPACE_MAXADDR,       /* highaddr */
514                                NULL, NULL,              /* filter, filterarg */
515                                IXGBE_TSO_SIZE,          /* maxsize */
516                                adapter->num_segs,       /* nsegments */
517                                PAGE_SIZE,               /* maxsegsize */
518                                0,                       /* flags */
519                                NULL,                    /* lockfunc */
520                                NULL,                    /* lockfuncarg */
521                                &txr->txtag))) {
522                 device_printf(dev,"Unable to allocate TX DMA tag\n");
523                 goto fail;
524         }
525
526         if (!(txr->tx_buffers =
527             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
530                 error = ENOMEM;
531                 goto fail;
532         }
533
534         /* Create the descriptor buffer dma maps */
535         txbuf = txr->tx_buffers;
536         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
538                 if (error != 0) {
539                         device_printf(dev, "Unable to create TX DMA map\n");
540                         goto fail;
541                 }
542         }
543
544         return 0;
545 fail:
546         /* We free all, it handles case where we are in the middle */
547         ixgbe_free_transmit_structures(adapter);
548         return (error);
549 }
550
551 /*********************************************************************
552  *
553  *  Initialize a transmit ring.
554  *
555  **********************************************************************/
556 static void
557 ixgbe_setup_transmit_ring(struct tx_ring *txr)
558 {
559         struct adapter *adapter = txr->adapter;
560         struct ixgbe_tx_buf *txbuf;
561 #ifdef DEV_NETMAP
562         struct netmap_adapter *na = NA(adapter->ifp);
563         struct netmap_slot *slot;
564 #endif /* DEV_NETMAP */
565
566         /* Clear the old ring contents */
567         IXGBE_TX_LOCK(txr);
568 #ifdef DEV_NETMAP
569         /*
570          * (under lock): if in netmap mode, do some consistency
571          * checks and set slot to entry 0 of the netmap ring.
572          */
573         slot = netmap_reset(na, NR_TX, txr->me, 0);
574 #endif /* DEV_NETMAP */
575         bzero((void *)txr->tx_base,
576               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
577         /* Reset indices */
578         txr->next_avail_desc = 0;
579         txr->next_to_clean = 0;
580
581         /* Free any existing tx buffers. */
582         txbuf = txr->tx_buffers;
583         for (int i = 0; i < txr->num_desc; i++, txbuf++) {
584                 if (txbuf->m_head != NULL) {
585                         bus_dmamap_sync(txr->txtag, txbuf->map,
586                             BUS_DMASYNC_POSTWRITE);
587                         bus_dmamap_unload(txr->txtag, txbuf->map);
588                         m_freem(txbuf->m_head);
589                         txbuf->m_head = NULL;
590                 }
591 #ifdef DEV_NETMAP
592                 /*
593                  * In netmap mode, set the map for the packet buffer.
594                  * NOTE: Some drivers (not this one) also need to set
595                  * the physical buffer address in the NIC ring.
596                  * Slots in the netmap ring (indexed by "si") are
597                  * kring->nkr_hwofs positions "ahead" wrt the
598                  * corresponding slot in the NIC ring. In some drivers
599                  * (not here) nkr_hwofs can be negative. Function
600                  * netmap_idx_n2k() handles wraparounds properly.
601                  */
602                 if (slot) {
603                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
604                         netmap_load_map(na, txr->txtag,
605                             txbuf->map, NMB(na, slot + si));
606                 }
607 #endif /* DEV_NETMAP */
608                 /* Clear the EOP descriptor pointer */
609                 txbuf->eop = NULL;
610         }
611
612 #ifdef IXGBE_FDIR
613         /* Set the rate at which we sample packets */
614         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615                 txr->atr_sample = atr_sample_rate;
616 #endif
617
618         /* Set number of descriptors available */
619         txr->tx_avail = adapter->num_tx_desc;
620
621         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623         IXGBE_TX_UNLOCK(txr);
624 }
625
626 /*********************************************************************
627  *
628  *  Initialize all transmit rings.
629  *
630  **********************************************************************/
631 int
632 ixgbe_setup_transmit_structures(struct adapter *adapter)
633 {
634         struct tx_ring *txr = adapter->tx_rings;
635
636         for (int i = 0; i < adapter->num_queues; i++, txr++)
637                 ixgbe_setup_transmit_ring(txr);
638
639         return (0);
640 }
641
642 /*********************************************************************
643  *
644  *  Free all transmit rings.
645  *
646  **********************************************************************/
647 void
648 ixgbe_free_transmit_structures(struct adapter *adapter)
649 {
650         struct tx_ring *txr = adapter->tx_rings;
651
652         for (int i = 0; i < adapter->num_queues; i++, txr++) {
653                 IXGBE_TX_LOCK(txr);
654                 ixgbe_free_transmit_buffers(txr);
655                 ixgbe_dma_free(adapter, &txr->txdma);
656                 IXGBE_TX_UNLOCK(txr);
657                 IXGBE_TX_LOCK_DESTROY(txr);
658         }
659         free(adapter->tx_rings, M_DEVBUF);
660 }
661
662 /*********************************************************************
663  *
664  *  Free transmit ring related data structures.
665  *
666  **********************************************************************/
667 static void
668 ixgbe_free_transmit_buffers(struct tx_ring *txr)
669 {
670         struct adapter *adapter = txr->adapter;
671         struct ixgbe_tx_buf *tx_buffer;
672         int             i;
673
674         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
675
676         if (txr->tx_buffers == NULL)
677                 return;
678
679         tx_buffer = txr->tx_buffers;
680         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681                 if (tx_buffer->m_head != NULL) {
682                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
683                             BUS_DMASYNC_POSTWRITE);
684                         bus_dmamap_unload(txr->txtag,
685                             tx_buffer->map);
686                         m_freem(tx_buffer->m_head);
687                         tx_buffer->m_head = NULL;
688                         if (tx_buffer->map != NULL) {
689                                 bus_dmamap_destroy(txr->txtag,
690                                     tx_buffer->map);
691                                 tx_buffer->map = NULL;
692                         }
693                 } else if (tx_buffer->map != NULL) {
694                         bus_dmamap_unload(txr->txtag,
695                             tx_buffer->map);
696                         bus_dmamap_destroy(txr->txtag,
697                             tx_buffer->map);
698                         tx_buffer->map = NULL;
699                 }
700         }
701 #ifdef IXGBE_LEGACY_TX
702         if (txr->br != NULL)
703                 buf_ring_free(txr->br, M_DEVBUF);
704 #endif
705         if (txr->tx_buffers != NULL) {
706                 free(txr->tx_buffers, M_DEVBUF);
707                 txr->tx_buffers = NULL;
708         }
709         if (txr->txtag != NULL) {
710                 bus_dma_tag_destroy(txr->txtag);
711                 txr->txtag = NULL;
712         }
713         return;
714 }
715
716 /*********************************************************************
717  *
718  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
719  *
720  **********************************************************************/
721
722 static int
723 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724     u32 *cmd_type_len, u32 *olinfo_status)
725 {
726         struct adapter *adapter = txr->adapter;
727         struct ixgbe_adv_tx_context_desc *TXD;
728         struct ether_vlan_header *eh;
729 #ifdef INET
730         struct ip *ip;
731 #endif
732 #ifdef INET6
733         struct ip6_hdr *ip6;
734 #endif
735         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
736         int     ehdrlen, ip_hlen = 0;
737         u16     etype;
738         u8      ipproto = 0;
739         int     offload = TRUE;
740         int     ctxd = txr->next_avail_desc;
741         u16     vtag = 0;
742         caddr_t l3d;
743
744
745         /* First check if TSO is to be used */
746         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO|CSUM_IP6_TSO))
747                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
748
749         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
750                 offload = FALSE;
751
752         /* Indicate the whole packet as payload when not doing TSO */
753         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
754
755         /* Now ready a context descriptor */
756         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
757
758         /*
759         ** In advanced descriptors the vlan tag must 
760         ** be placed into the context descriptor. Hence
761         ** we need to make one even if not doing offloads.
762         */
763         if (mp->m_flags & M_VLANTAG) {
764                 vtag = htole16(mp->m_pkthdr.ether_vtag);
765                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
766         } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
767                 return (0);
768
769         /*
770          * Determine where frame payload starts.
771          * Jump over vlan headers if already present,
772          * helpful for QinQ too.
773          */
774         eh = mtod(mp, struct ether_vlan_header *);
775         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
776                 etype = ntohs(eh->evl_proto);
777                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
778         } else {
779                 etype = ntohs(eh->evl_encap_proto);
780                 ehdrlen = ETHER_HDR_LEN;
781         }
782
783         /* Set the ether header length */
784         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
785
786         if (offload == FALSE)
787                 goto no_offloads;
788
789         /*
790          * If the first mbuf only includes the ethernet header, jump to the next one
791          * XXX: This assumes the stack splits mbufs containing headers on header boundaries
792          * XXX: And assumes the entire IP header is contained in one mbuf
793          */
794         if (mp->m_len == ehdrlen && mp->m_next)
795                 l3d = mtod(mp->m_next, caddr_t);
796         else
797                 l3d = mtod(mp, caddr_t) + ehdrlen;
798
799         switch (etype) {
800 #ifdef INET
801                 case ETHERTYPE_IP:
802                         ip = (struct ip *)(l3d);
803                         ip_hlen = ip->ip_hl << 2;
804                         ipproto = ip->ip_p;
805                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
806                         /* Insert IPv4 checksum into data descriptors */
807                         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
808                                 ip->ip_sum = 0;
809                                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
810                         }
811                         break;
812 #endif
813 #ifdef INET6
814                 case ETHERTYPE_IPV6:
815                         ip6 = (struct ip6_hdr *)(l3d);
816                         ip_hlen = sizeof(struct ip6_hdr);
817                         ipproto = ip6->ip6_nxt;
818                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
819                         break;
820 #endif
821                 default:
822                         offload = FALSE;
823                         break;
824         }
825
826         vlan_macip_lens |= ip_hlen;
827
828         /* No support for offloads for non-L4 next headers */
829         switch (ipproto) {
830                 case IPPROTO_TCP:
831                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
832                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
833                         else
834                                 offload = false;
835                         break;
836                 case IPPROTO_UDP:
837                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
838                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
839                         else
840                                 offload = false;
841                         break;
842                 case IPPROTO_SCTP:
843                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
844                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
845                         else
846                                 offload = false;
847                         break;
848                 default:
849                         offload = false;
850                         break;
851         }
852
853         if (offload) /* Insert L4 checksum into data descriptors */
854                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
855
856 no_offloads:
857         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
858
859         /* Now copy bits into descriptor */
860         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
861         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
862         TXD->seqnum_seed = htole32(0);
863         TXD->mss_l4len_idx = htole32(0);
864
865         /* We've consumed the first desc, adjust counters */
866         if (++ctxd == txr->num_desc)
867                 ctxd = 0;
868         txr->next_avail_desc = ctxd;
869         --txr->tx_avail;
870
871         return (0);
872 }
873
874 /**********************************************************************
875  *
876  *  Setup work for hardware segmentation offload (TSO) on
877  *  adapters using advanced tx descriptors
878  *
879  **********************************************************************/
880 static int
881 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
882     u32 *cmd_type_len, u32 *olinfo_status)
883 {
884         struct ixgbe_adv_tx_context_desc *TXD;
885         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
886         u32 mss_l4len_idx = 0, paylen;
887         u16 vtag = 0, eh_type;
888         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
889         struct ether_vlan_header *eh;
890 #ifdef INET6
891         struct ip6_hdr *ip6;
892 #endif
893 #ifdef INET
894         struct ip *ip;
895 #endif
896         struct tcphdr *th;
897
898         /*
899          * Determine where frame payload starts.
900          * Jump over vlan headers if already present
901          */
902         eh = mtod(mp, struct ether_vlan_header *);
903         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
904                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
905                 eh_type = eh->evl_proto;
906         } else {
907                 ehdrlen = ETHER_HDR_LEN;
908                 eh_type = eh->evl_encap_proto;
909         }
910
911         switch (ntohs(eh_type)) {
912 #ifdef INET6
913         case ETHERTYPE_IPV6:
914                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
915                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
916                 if (ip6->ip6_nxt != IPPROTO_TCP)
917                         return (ENXIO);
918                 ip_hlen = sizeof(struct ip6_hdr);
919                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
920                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
921                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
922                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
923                 break;
924 #endif
925 #ifdef INET
926         case ETHERTYPE_IP:
927                 ip = (struct ip *)(mp->m_data + ehdrlen);
928                 if (ip->ip_p != IPPROTO_TCP)
929                         return (ENXIO);
930                 ip->ip_sum = 0;
931                 ip_hlen = ip->ip_hl << 2;
932                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
933                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
934                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
935                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
936                 /* Tell transmit desc to also do IPv4 checksum. */
937                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
938                 break;
939 #endif
940         default:
941                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
942                     __func__, ntohs(eh_type));
943                 break;
944         }
945
946         ctxd = txr->next_avail_desc;
947         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
948
949         tcp_hlen = th->th_off << 2;
950
951         /* This is used in the transmit desc in encap */
952         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
953
954         /* VLAN MACLEN IPLEN */
955         if (mp->m_flags & M_VLANTAG) {
956                 vtag = htole16(mp->m_pkthdr.ether_vtag);
957                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
958         }
959
960         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
961         vlan_macip_lens |= ip_hlen;
962         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
963
964         /* ADV DTYPE TUCMD */
965         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
966         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
967         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
968
969         /* MSS L4LEN IDX */
970         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
971         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
972         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
973
974         TXD->seqnum_seed = htole32(0);
975
976         if (++ctxd == txr->num_desc)
977                 ctxd = 0;
978
979         txr->tx_avail--;
980         txr->next_avail_desc = ctxd;
981         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
982         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
983         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
984         ++txr->tso_tx;
985         return (0);
986 }
987
988
989 /**********************************************************************
990  *
991  *  Examine each tx_buffer in the used queue. If the hardware is done
992  *  processing the packet then free associated resources. The
993  *  tx_buffer is put back on the free queue.
994  *
995  **********************************************************************/
996 void
997 ixgbe_txeof(struct tx_ring *txr)
998 {
999         struct adapter          *adapter = txr->adapter;
1000 #ifdef DEV_NETMAP
1001         struct ifnet            *ifp = adapter->ifp;
1002 #endif
1003         u32                     work, processed = 0;
1004         u32                     limit = adapter->tx_process_limit;
1005         struct ixgbe_tx_buf     *buf;
1006         union ixgbe_adv_tx_desc *txd;
1007
1008         mtx_assert(&txr->tx_mtx, MA_OWNED);
1009
1010 #ifdef DEV_NETMAP
1011         if (ifp->if_capenable & IFCAP_NETMAP) {
1012                 struct netmap_adapter *na = NA(ifp);
1013                 struct netmap_kring *kring = &na->tx_rings[txr->me];
1014                 txd = txr->tx_base;
1015                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1016                     BUS_DMASYNC_POSTREAD);
1017                 /*
1018                  * In netmap mode, all the work is done in the context
1019                  * of the client thread. Interrupt handlers only wake up
1020                  * clients, which may be sleeping on individual rings
1021                  * or on a global resource for all rings.
1022                  * To implement tx interrupt mitigation, we wake up the client
1023                  * thread roughly every half ring, even if the NIC interrupts
1024                  * more frequently. This is implemented as follows:
1025                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
1026                  *   the slot that should wake up the thread (nkr_num_slots
1027                  *   means the user thread should not be woken up);
1028                  * - the driver ignores tx interrupts unless netmap_mitigate=0
1029                  *   or the slot has the DD bit set.
1030                  */
1031                 if (!netmap_mitigate ||
1032                     (kring->nr_kflags < kring->nkr_num_slots &&
1033                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1034                         netmap_tx_irq(ifp, txr->me);
1035                 }
1036                 return;
1037         }
1038 #endif /* DEV_NETMAP */
1039
1040         if (txr->tx_avail == txr->num_desc) {
1041                 txr->busy = 0;
1042                 return;
1043         }
1044
1045         /* Get work starting point */
1046         work = txr->next_to_clean;
1047         buf = &txr->tx_buffers[work];
1048         txd = &txr->tx_base[work];
1049         work -= txr->num_desc; /* The distance to ring end */
1050         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1051             BUS_DMASYNC_POSTREAD);
1052
1053         do {
1054                 union ixgbe_adv_tx_desc *eop = buf->eop;
1055                 if (eop == NULL) /* No work */
1056                         break;
1057
1058                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1059                         break;  /* I/O not complete */
1060
1061                 if (buf->m_head) {
1062                         txr->bytes +=
1063                             buf->m_head->m_pkthdr.len;
1064                         bus_dmamap_sync(txr->txtag,
1065                             buf->map,
1066                             BUS_DMASYNC_POSTWRITE);
1067                         bus_dmamap_unload(txr->txtag,
1068                             buf->map);
1069                         m_freem(buf->m_head);
1070                         buf->m_head = NULL;
1071                 }
1072                 buf->eop = NULL;
1073                 ++txr->tx_avail;
1074
1075                 /* We clean the range if multi segment */
1076                 while (txd != eop) {
1077                         ++txd;
1078                         ++buf;
1079                         ++work;
1080                         /* wrap the ring? */
1081                         if (__predict_false(!work)) {
1082                                 work -= txr->num_desc;
1083                                 buf = txr->tx_buffers;
1084                                 txd = txr->tx_base;
1085                         }
1086                         if (buf->m_head) {
1087                                 txr->bytes +=
1088                                     buf->m_head->m_pkthdr.len;
1089                                 bus_dmamap_sync(txr->txtag,
1090                                     buf->map,
1091                                     BUS_DMASYNC_POSTWRITE);
1092                                 bus_dmamap_unload(txr->txtag,
1093                                     buf->map);
1094                                 m_freem(buf->m_head);
1095                                 buf->m_head = NULL;
1096                         }
1097                         ++txr->tx_avail;
1098                         buf->eop = NULL;
1099
1100                 }
1101                 ++txr->packets;
1102                 ++processed;
1103
1104                 /* Try the next packet */
1105                 ++txd;
1106                 ++buf;
1107                 ++work;
1108                 /* reset with a wrap */
1109                 if (__predict_false(!work)) {
1110                         work -= txr->num_desc;
1111                         buf = txr->tx_buffers;
1112                         txd = txr->tx_base;
1113                 }
1114                 prefetch(txd);
1115         } while (__predict_true(--limit));
1116
1117         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1118             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1119
1120         work += txr->num_desc;
1121         txr->next_to_clean = work;
1122
1123         /*
1124         ** Queue Hang detection, we know there's
1125         ** work outstanding or the first return
1126         ** would have been taken, so increment busy
1127         ** if nothing managed to get cleaned, then
1128         ** in local_timer it will be checked and 
1129         ** marked as HUNG if it exceeds a MAX attempt.
1130         */
1131         if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1132                 ++txr->busy;
1133         /*
1134         ** If anything gets cleaned we reset state to 1,
1135         ** note this will turn off HUNG if its set.
1136         */
1137         if (processed)
1138                 txr->busy = 1;
1139
1140         if (txr->tx_avail == txr->num_desc)
1141                 txr->busy = 0;
1142
1143         return;
1144 }
1145
1146
1147 #ifdef IXGBE_FDIR
1148 /*
1149 ** This routine parses packet headers so that Flow
1150 ** Director can make a hashed filter table entry 
1151 ** allowing traffic flows to be identified and kept
1152 ** on the same cpu.  This would be a performance
1153 ** hit, but we only do it at IXGBE_FDIR_RATE of
1154 ** packets.
1155 */
1156 static void
1157 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1158 {
1159         struct adapter                  *adapter = txr->adapter;
1160         struct ix_queue                 *que;
1161         struct ip                       *ip;
1162         struct tcphdr                   *th;
1163         struct udphdr                   *uh;
1164         struct ether_vlan_header        *eh;
1165         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
1166         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
1167         int                             ehdrlen, ip_hlen;
1168         u16                             etype;
1169
1170         eh = mtod(mp, struct ether_vlan_header *);
1171         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1172                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1173                 etype = eh->evl_proto;
1174         } else {
1175                 ehdrlen = ETHER_HDR_LEN;
1176                 etype = eh->evl_encap_proto;
1177         }
1178
1179         /* Only handling IPv4 */
1180         if (etype != htons(ETHERTYPE_IP))
1181                 return;
1182
1183         ip = (struct ip *)(mp->m_data + ehdrlen);
1184         ip_hlen = ip->ip_hl << 2;
1185
1186         /* check if we're UDP or TCP */
1187         switch (ip->ip_p) {
1188         case IPPROTO_TCP:
1189                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1190                 /* src and dst are inverted */
1191                 common.port.dst ^= th->th_sport;
1192                 common.port.src ^= th->th_dport;
1193                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1194                 break;
1195         case IPPROTO_UDP:
1196                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1197                 /* src and dst are inverted */
1198                 common.port.dst ^= uh->uh_sport;
1199                 common.port.src ^= uh->uh_dport;
1200                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1201                 break;
1202         default:
1203                 return;
1204         }
1205
1206         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1207         if (mp->m_pkthdr.ether_vtag)
1208                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1209         else
1210                 common.flex_bytes ^= etype;
1211         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1212
1213         que = &adapter->queues[txr->me];
1214         /*
1215         ** This assumes the Rx queue and Tx
1216         ** queue are bound to the same CPU
1217         */
1218         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1219             input, common, que->msix);
1220 }
1221 #endif /* IXGBE_FDIR */
1222
1223 /*
1224 ** Used to detect a descriptor that has
1225 ** been merged by Hardware RSC.
1226 */
1227 static inline u32
1228 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1229 {
1230         return (le32toh(rx->wb.lower.lo_dword.data) &
1231             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1232 }
1233
1234 /*********************************************************************
1235  *
1236  *  Initialize Hardware RSC (LRO) feature on 82599
1237  *  for an RX ring, this is toggled by the LRO capability
1238  *  even though it is transparent to the stack.
1239  *
1240  *  NOTE: since this HW feature only works with IPV4 and 
1241  *        our testing has shown soft LRO to be as effective
1242  *        I have decided to disable this by default.
1243  *
1244  **********************************************************************/
1245 static void
1246 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1247 {
1248         struct  adapter         *adapter = rxr->adapter;
1249         struct  ixgbe_hw        *hw = &adapter->hw;
1250         u32                     rscctrl, rdrxctl;
1251
1252         /* If turning LRO/RSC off we need to disable it */
1253         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1254                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1255                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1256                 return;
1257         }
1258
1259         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1260         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1261 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1262         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1263 #endif /* DEV_NETMAP */
1264         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1265         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1266         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1267
1268         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1269         rscctrl |= IXGBE_RSCCTL_RSCEN;
1270         /*
1271         ** Limit the total number of descriptors that
1272         ** can be combined, so it does not exceed 64K
1273         */
1274         if (rxr->mbuf_sz == MCLBYTES)
1275                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1276         else if (rxr->mbuf_sz == MJUMPAGESIZE)
1277                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1278         else if (rxr->mbuf_sz == MJUM9BYTES)
1279                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1280         else  /* Using 16K cluster */
1281                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1282
1283         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1284
1285         /* Enable TCP header recognition */
1286         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1287             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1288             IXGBE_PSRTYPE_TCPHDR));
1289
1290         /* Disable RSC for ACK packets */
1291         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1292             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1293
1294         rxr->hw_rsc = TRUE;
1295 }
1296
1297 /*********************************************************************
1298  *
1299  *  Refresh mbuf buffers for RX descriptor rings
1300  *   - now keeps its own state so discards due to resource
1301  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1302  *     it just returns, keeping its placeholder, thus it can simply
1303  *     be recalled to try again.
1304  *
1305  **********************************************************************/
1306 static void
1307 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1308 {
1309         struct adapter          *adapter = rxr->adapter;
1310         bus_dma_segment_t       seg[1];
1311         struct ixgbe_rx_buf     *rxbuf;
1312         struct mbuf             *mp;
1313         int                     i, j, nsegs, error;
1314         bool                    refreshed = FALSE;
1315
1316         i = j = rxr->next_to_refresh;
1317         /* Control the loop with one beyond */
1318         if (++j == rxr->num_desc)
1319                 j = 0;
1320
1321         while (j != limit) {
1322                 rxbuf = &rxr->rx_buffers[i];
1323                 if (rxbuf->buf == NULL) {
1324                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1325                             M_PKTHDR, rxr->mbuf_sz);
1326                         if (mp == NULL)
1327                                 goto update;
1328                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1329                                 m_adj(mp, ETHER_ALIGN);
1330                 } else
1331                         mp = rxbuf->buf;
1332
1333                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1334
1335                 /* If we're dealing with an mbuf that was copied rather
1336                  * than replaced, there's no need to go through busdma.
1337                  */
1338                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1339                         /* Get the memory mapping */
1340                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1341                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1342                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1343                         if (error != 0) {
1344                                 printf("Refresh mbufs: payload dmamap load"
1345                                     " failure - %d\n", error);
1346                                 m_free(mp);
1347                                 rxbuf->buf = NULL;
1348                                 goto update;
1349                         }
1350                         rxbuf->buf = mp;
1351                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1352                             BUS_DMASYNC_PREREAD);
1353                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1354                             htole64(seg[0].ds_addr);
1355                 } else {
1356                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1357                         rxbuf->flags &= ~IXGBE_RX_COPY;
1358                 }
1359
1360                 refreshed = TRUE;
1361                 /* Next is precalculated */
1362                 i = j;
1363                 rxr->next_to_refresh = i;
1364                 if (++j == rxr->num_desc)
1365                         j = 0;
1366         }
1367 update:
1368         if (refreshed) /* Update hardware tail index */
1369                 IXGBE_WRITE_REG(&adapter->hw,
1370                     rxr->tail, rxr->next_to_refresh);
1371         return;
1372 }
1373
1374 /*********************************************************************
1375  *
1376  *  Allocate memory for rx_buffer structures. Since we use one
1377  *  rx_buffer per received packet, the maximum number of rx_buffer's
1378  *  that we'll need is equal to the number of receive descriptors
1379  *  that we've allocated.
1380  *
1381  **********************************************************************/
1382 int
1383 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1384 {
1385         struct  adapter         *adapter = rxr->adapter;
1386         device_t                dev = adapter->dev;
1387         struct ixgbe_rx_buf     *rxbuf;
1388         int                     bsize, error;
1389
1390         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1391         if (!(rxr->rx_buffers =
1392             (struct ixgbe_rx_buf *) malloc(bsize,
1393             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1394                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1395                 error = ENOMEM;
1396                 goto fail;
1397         }
1398
1399         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1400                                    1, 0,        /* alignment, bounds */
1401                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1402                                    BUS_SPACE_MAXADDR,   /* highaddr */
1403                                    NULL, NULL,          /* filter, filterarg */
1404                                    MJUM16BYTES,         /* maxsize */
1405                                    1,                   /* nsegments */
1406                                    MJUM16BYTES,         /* maxsegsize */
1407                                    0,                   /* flags */
1408                                    NULL,                /* lockfunc */
1409                                    NULL,                /* lockfuncarg */
1410                                    &rxr->ptag))) {
1411                 device_printf(dev, "Unable to create RX DMA tag\n");
1412                 goto fail;
1413         }
1414
1415         for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1416                 rxbuf = &rxr->rx_buffers[i];
1417                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1418                 if (error) {
1419                         device_printf(dev, "Unable to create RX dma map\n");
1420                         goto fail;
1421                 }
1422         }
1423
1424         return (0);
1425
1426 fail:
1427         /* Frees all, but can handle partial completion */
1428         ixgbe_free_receive_structures(adapter);
1429         return (error);
1430 }
1431
1432 static void     
1433 ixgbe_free_receive_ring(struct rx_ring *rxr)
1434 {
1435
1436         for (int i = 0; i < rxr->num_desc; i++) {
1437                 ixgbe_rx_discard(rxr, i);
1438         }
1439 }
1440
1441 /*********************************************************************
1442  *
1443  *  Initialize a receive ring and its buffers.
1444  *
1445  **********************************************************************/
1446 static int
1447 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1448 {
1449         struct  adapter         *adapter;
1450         struct ifnet            *ifp;
1451         device_t                dev;
1452         struct ixgbe_rx_buf     *rxbuf;
1453         bus_dma_segment_t       seg[1];
1454         struct lro_ctrl         *lro = &rxr->lro;
1455         int                     rsize, nsegs, error = 0;
1456 #ifdef DEV_NETMAP
1457         struct netmap_adapter *na = NA(rxr->adapter->ifp);
1458         struct netmap_slot *slot;
1459 #endif /* DEV_NETMAP */
1460
1461         adapter = rxr->adapter;
1462         ifp = adapter->ifp;
1463         dev = adapter->dev;
1464
1465         /* Clear the ring contents */
1466         IXGBE_RX_LOCK(rxr);
1467 #ifdef DEV_NETMAP
1468         /* same as in ixgbe_setup_transmit_ring() */
1469         slot = netmap_reset(na, NR_RX, rxr->me, 0);
1470 #endif /* DEV_NETMAP */
1471         rsize = roundup2(adapter->num_rx_desc *
1472             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1473         bzero((void *)rxr->rx_base, rsize);
1474         /* Cache the size */
1475         rxr->mbuf_sz = adapter->rx_mbuf_sz;
1476
1477         /* Free current RX buffer structs and their mbufs */
1478         ixgbe_free_receive_ring(rxr);
1479
1480         /* Now replenish the mbufs */
1481         for (int j = 0; j != rxr->num_desc; ++j) {
1482                 struct mbuf     *mp;
1483
1484                 rxbuf = &rxr->rx_buffers[j];
1485 #ifdef DEV_NETMAP
1486                 /*
1487                  * In netmap mode, fill the map and set the buffer
1488                  * address in the NIC ring, considering the offset
1489                  * between the netmap and NIC rings (see comment in
1490                  * ixgbe_setup_transmit_ring() ). No need to allocate
1491                  * an mbuf, so end the block with a continue;
1492                  */
1493                 if (slot) {
1494                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1495                         uint64_t paddr;
1496                         void *addr;
1497
1498                         addr = PNMB(na, slot + sj, &paddr);
1499                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1500                         /* Update descriptor and the cached value */
1501                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1502                         rxbuf->addr = htole64(paddr);
1503                         continue;
1504                 }
1505 #endif /* DEV_NETMAP */
1506                 rxbuf->flags = 0; 
1507                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1508                     M_PKTHDR, adapter->rx_mbuf_sz);
1509                 if (rxbuf->buf == NULL) {
1510                         error = ENOBUFS;
1511                         goto fail;
1512                 }
1513                 mp = rxbuf->buf;
1514                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1515                 /* Get the memory mapping */
1516                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1517                     rxbuf->pmap, mp, seg,
1518                     &nsegs, BUS_DMA_NOWAIT);
1519                 if (error != 0)
1520                         goto fail;
1521                 bus_dmamap_sync(rxr->ptag,
1522                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
1523                 /* Update the descriptor and the cached value */
1524                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1525                 rxbuf->addr = htole64(seg[0].ds_addr);
1526         }
1527
1528
1529         /* Setup our descriptor indices */
1530         rxr->next_to_check = 0;
1531         rxr->next_to_refresh = 0;
1532         rxr->lro_enabled = FALSE;
1533         rxr->rx_copies = 0;
1534         rxr->rx_bytes = 0;
1535         rxr->vtag_strip = FALSE;
1536
1537         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1538             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1539
1540         /*
1541         ** Now set up the LRO interface:
1542         */
1543         if (ixgbe_rsc_enable)
1544                 ixgbe_setup_hw_rsc(rxr);
1545         else if (ifp->if_capenable & IFCAP_LRO) {
1546                 int err = tcp_lro_init(lro);
1547                 if (err) {
1548                         device_printf(dev, "LRO Initialization failed!\n");
1549                         goto fail;
1550                 }
1551                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1552                 rxr->lro_enabled = TRUE;
1553                 lro->ifp = adapter->ifp;
1554         }
1555
1556         IXGBE_RX_UNLOCK(rxr);
1557         return (0);
1558
1559 fail:
1560         ixgbe_free_receive_ring(rxr);
1561         IXGBE_RX_UNLOCK(rxr);
1562         return (error);
1563 }
1564
1565 /*********************************************************************
1566  *
1567  *  Initialize all receive rings.
1568  *
1569  **********************************************************************/
1570 int
1571 ixgbe_setup_receive_structures(struct adapter *adapter)
1572 {
1573         struct rx_ring *rxr = adapter->rx_rings;
1574         int j;
1575
1576         for (j = 0; j < adapter->num_queues; j++, rxr++)
1577                 if (ixgbe_setup_receive_ring(rxr))
1578                         goto fail;
1579
1580         return (0);
1581 fail:
1582         /*
1583          * Free RX buffers allocated so far, we will only handle
1584          * the rings that completed, the failing case will have
1585          * cleaned up for itself. 'j' failed, so its the terminus.
1586          */
1587         for (int i = 0; i < j; ++i) {
1588                 rxr = &adapter->rx_rings[i];
1589                 IXGBE_RX_LOCK(rxr);
1590                 ixgbe_free_receive_ring(rxr);
1591                 IXGBE_RX_UNLOCK(rxr);
1592         }
1593
1594         return (ENOBUFS);
1595 }
1596
1597
1598 /*********************************************************************
1599  *
1600  *  Free all receive rings.
1601  *
1602  **********************************************************************/
1603 void
1604 ixgbe_free_receive_structures(struct adapter *adapter)
1605 {
1606         struct rx_ring *rxr = adapter->rx_rings;
1607
1608         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1609
1610         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1611                 struct lro_ctrl         *lro = &rxr->lro;
1612                 ixgbe_free_receive_buffers(rxr);
1613                 /* Free LRO memory */
1614                 tcp_lro_free(lro);
1615                 /* Free the ring memory as well */
1616                 ixgbe_dma_free(adapter, &rxr->rxdma);
1617         }
1618
1619         free(adapter->rx_rings, M_DEVBUF);
1620 }
1621
1622
1623 /*********************************************************************
1624  *
1625  *  Free receive ring data structures
1626  *
1627  **********************************************************************/
1628 void
1629 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1630 {
1631         struct adapter          *adapter = rxr->adapter;
1632         struct ixgbe_rx_buf     *rxbuf;
1633
1634         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1635
1636         /* Cleanup any existing buffers */
1637         if (rxr->rx_buffers != NULL) {
1638                 for (int i = 0; i < adapter->num_rx_desc; i++) {
1639                         rxbuf = &rxr->rx_buffers[i];
1640                         ixgbe_rx_discard(rxr, i);
1641                         if (rxbuf->pmap != NULL) {
1642                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1643                                 rxbuf->pmap = NULL;
1644                         }
1645                 }
1646                 if (rxr->rx_buffers != NULL) {
1647                         free(rxr->rx_buffers, M_DEVBUF);
1648                         rxr->rx_buffers = NULL;
1649                 }
1650         }
1651
1652         if (rxr->ptag != NULL) {
1653                 bus_dma_tag_destroy(rxr->ptag);
1654                 rxr->ptag = NULL;
1655         }
1656
1657         return;
1658 }
1659
1660 static __inline void
1661 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1662 {
1663                  
1664         /*
1665          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1666          * should be computed by hardware. Also it should not have VLAN tag in
1667          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1668          */
1669         if (rxr->lro_enabled &&
1670             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1671             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1672             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1673             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1674             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1675             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1676             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1677             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1678                 /*
1679                  * Send to the stack if:
1680                  **  - LRO not enabled, or
1681                  **  - no LRO resources, or
1682                  **  - lro enqueue fails
1683                  */
1684                 if (rxr->lro.lro_cnt != 0)
1685                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1686                                 return;
1687         }
1688         IXGBE_RX_UNLOCK(rxr);
1689         (*ifp->if_input)(ifp, m);
1690         IXGBE_RX_LOCK(rxr);
1691 }
1692
1693 static __inline void
1694 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1695 {
1696         struct ixgbe_rx_buf     *rbuf;
1697
1698         rbuf = &rxr->rx_buffers[i];
1699
1700
1701         /*
1702         ** With advanced descriptors the writeback
1703         ** clobbers the buffer addrs, so its easier
1704         ** to just free the existing mbufs and take
1705         ** the normal refresh path to get new buffers
1706         ** and mapping.
1707         */
1708
1709         if (rbuf->fmp != NULL) {/* Partial chain ? */
1710                 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1711                 m_freem(rbuf->fmp);
1712                 rbuf->fmp = NULL;
1713                 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1714         } else if (rbuf->buf) {
1715                 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1716                 m_free(rbuf->buf);
1717                 rbuf->buf = NULL;
1718         }
1719         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1720
1721         rbuf->flags = 0;
1722  
1723         return;
1724 }
1725
1726
1727 /*********************************************************************
1728  *
1729  *  This routine executes in interrupt context. It replenishes
1730  *  the mbufs in the descriptor and sends data which has been
1731  *  dma'ed into host memory to upper layer.
1732  *
1733  *  Return TRUE for more work, FALSE for all clean.
1734  *********************************************************************/
1735 bool
1736 ixgbe_rxeof(struct ix_queue *que)
1737 {
1738         struct adapter          *adapter = que->adapter;
1739         struct rx_ring          *rxr = que->rxr;
1740         struct ifnet            *ifp = adapter->ifp;
1741         struct lro_ctrl         *lro = &rxr->lro;
1742         int                     i, nextp, processed = 0;
1743         u32                     staterr = 0;
1744         u32                     count = adapter->rx_process_limit;
1745         union ixgbe_adv_rx_desc *cur;
1746         struct ixgbe_rx_buf     *rbuf, *nbuf;
1747         u16                     pkt_info;
1748
1749         IXGBE_RX_LOCK(rxr);
1750
1751 #ifdef DEV_NETMAP
1752         /* Same as the txeof routine: wakeup clients on intr. */
1753         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1754                 IXGBE_RX_UNLOCK(rxr);
1755                 return (FALSE);
1756         }
1757 #endif /* DEV_NETMAP */
1758
1759         for (i = rxr->next_to_check; count != 0;) {
1760                 struct mbuf     *sendmp, *mp;
1761                 u32             rsc, ptype;
1762                 u16             len;
1763                 u16             vtag = 0;
1764                 bool            eop;
1765  
1766                 /* Sync the ring. */
1767                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1768                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1769
1770                 cur = &rxr->rx_base[i];
1771                 staterr = le32toh(cur->wb.upper.status_error);
1772                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1773
1774                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1775                         break;
1776                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1777                         break;
1778
1779                 count--;
1780                 sendmp = NULL;
1781                 nbuf = NULL;
1782                 rsc = 0;
1783                 cur->wb.upper.status_error = 0;
1784                 rbuf = &rxr->rx_buffers[i];
1785                 mp = rbuf->buf;
1786
1787                 len = le16toh(cur->wb.upper.length);
1788                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1789                     IXGBE_RXDADV_PKTTYPE_MASK;
1790                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1791
1792                 /* Make sure bad packets are discarded */
1793                 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1794 #if __FreeBSD_version >= 1100036
1795                         if (IXGBE_IS_VF(adapter))
1796                                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1797 #endif
1798                         rxr->rx_discarded++;
1799                         ixgbe_rx_discard(rxr, i);
1800                         goto next_desc;
1801                 }
1802
1803                 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1804
1805                 /*
1806                 ** On 82599 which supports a hardware
1807                 ** LRO (called HW RSC), packets need
1808                 ** not be fragmented across sequential
1809                 ** descriptors, rather the next descriptor
1810                 ** is indicated in bits of the descriptor.
1811                 ** This also means that we might proceses
1812                 ** more than one packet at a time, something
1813                 ** that has never been true before, it
1814                 ** required eliminating global chain pointers
1815                 ** in favor of what we are doing here.  -jfv
1816                 */
1817                 if (!eop) {
1818                         /*
1819                         ** Figure out the next descriptor
1820                         ** of this frame.
1821                         */
1822                         if (rxr->hw_rsc == TRUE) {
1823                                 rsc = ixgbe_rsc_count(cur);
1824                                 rxr->rsc_num += (rsc - 1);
1825                         }
1826                         if (rsc) { /* Get hardware index */
1827                                 nextp = ((staterr &
1828                                     IXGBE_RXDADV_NEXTP_MASK) >>
1829                                     IXGBE_RXDADV_NEXTP_SHIFT);
1830                         } else { /* Just sequential */
1831                                 nextp = i + 1;
1832                                 if (nextp == adapter->num_rx_desc)
1833                                         nextp = 0;
1834                         }
1835                         nbuf = &rxr->rx_buffers[nextp];
1836                         prefetch(nbuf);
1837                 }
1838                 /*
1839                 ** Rather than using the fmp/lmp global pointers
1840                 ** we now keep the head of a packet chain in the
1841                 ** buffer struct and pass this along from one
1842                 ** descriptor to the next, until we get EOP.
1843                 */
1844                 mp->m_len = len;
1845                 /*
1846                 ** See if there is a stored head
1847                 ** that determines what we are
1848                 */
1849                 sendmp = rbuf->fmp;
1850                 if (sendmp != NULL) {  /* secondary frag */
1851                         rbuf->buf = rbuf->fmp = NULL;
1852                         mp->m_flags &= ~M_PKTHDR;
1853                         sendmp->m_pkthdr.len += mp->m_len;
1854                 } else {
1855                         /*
1856                          * Optimize.  This might be a small packet,
1857                          * maybe just a TCP ACK.  Do a fast copy that
1858                          * is cache aligned into a new mbuf, and
1859                          * leave the old mbuf+cluster for re-use.
1860                          */
1861                         if (eop && len <= IXGBE_RX_COPY_LEN) {
1862                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1863                                 if (sendmp != NULL) {
1864                                         sendmp->m_data +=
1865                                             IXGBE_RX_COPY_ALIGN;
1866                                         ixgbe_bcopy(mp->m_data,
1867                                             sendmp->m_data, len);
1868                                         sendmp->m_len = len;
1869                                         rxr->rx_copies++;
1870                                         rbuf->flags |= IXGBE_RX_COPY;
1871                                 }
1872                         }
1873                         if (sendmp == NULL) {
1874                                 rbuf->buf = rbuf->fmp = NULL;
1875                                 sendmp = mp;
1876                         }
1877
1878                         /* first desc of a non-ps chain */
1879                         sendmp->m_flags |= M_PKTHDR;
1880                         sendmp->m_pkthdr.len = mp->m_len;
1881                 }
1882                 ++processed;
1883
1884                 /* Pass the head pointer on */
1885                 if (eop == 0) {
1886                         nbuf->fmp = sendmp;
1887                         sendmp = NULL;
1888                         mp->m_next = nbuf->buf;
1889                 } else { /* Sending this frame */
1890                         sendmp->m_pkthdr.rcvif = ifp;
1891                         rxr->rx_packets++;
1892                         /* capture data for AIM */
1893                         rxr->bytes += sendmp->m_pkthdr.len;
1894                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1895                         /* Process vlan info */
1896                         if ((rxr->vtag_strip) &&
1897                             (staterr & IXGBE_RXD_STAT_VP))
1898                                 vtag = le16toh(cur->wb.upper.vlan);
1899                         if (vtag) {
1900                                 sendmp->m_pkthdr.ether_vtag = vtag;
1901                                 sendmp->m_flags |= M_VLANTAG;
1902                         }
1903                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1904                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
1905
1906                         /*
1907                          * In case of multiqueue, we have RXCSUM.PCSD bit set
1908                          * and never cleared. This means we have RSS hash
1909                          * available to be used.   
1910                          */
1911                         if (adapter->num_queues > 1) {
1912                                 sendmp->m_pkthdr.flowid =
1913                                     le32toh(cur->wb.lower.hi_dword.rss);
1914                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {  
1915                                     case IXGBE_RXDADV_RSSTYPE_IPV4:
1916                                         M_HASHTYPE_SET(sendmp,
1917                                             M_HASHTYPE_RSS_IPV4);
1918                                         break;
1919                                     case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1920                                         M_HASHTYPE_SET(sendmp,
1921                                             M_HASHTYPE_RSS_TCP_IPV4);
1922                                         break;
1923                                     case IXGBE_RXDADV_RSSTYPE_IPV6:
1924                                         M_HASHTYPE_SET(sendmp,
1925                                             M_HASHTYPE_RSS_IPV6);
1926                                         break;
1927                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1928                                         M_HASHTYPE_SET(sendmp,
1929                                             M_HASHTYPE_RSS_TCP_IPV6);
1930                                         break;
1931                                     case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1932                                         M_HASHTYPE_SET(sendmp,
1933                                             M_HASHTYPE_RSS_IPV6_EX);
1934                                         break;
1935                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1936                                         M_HASHTYPE_SET(sendmp,
1937                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
1938                                         break;
1939 #if __FreeBSD_version > 1100000
1940                                     case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1941                                         M_HASHTYPE_SET(sendmp,
1942                                             M_HASHTYPE_RSS_UDP_IPV4);
1943                                         break;
1944                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1945                                         M_HASHTYPE_SET(sendmp,
1946                                             M_HASHTYPE_RSS_UDP_IPV6);
1947                                         break;
1948                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1949                                         M_HASHTYPE_SET(sendmp,
1950                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
1951                                         break;
1952 #endif
1953                                     default:
1954                                         M_HASHTYPE_SET(sendmp,
1955                                             M_HASHTYPE_OPAQUE_HASH);
1956                                 }
1957                         } else {
1958                                 sendmp->m_pkthdr.flowid = que->msix;
1959                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1960                         }
1961                 }
1962 next_desc:
1963                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1964                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1965
1966                 /* Advance our pointers to the next descriptor. */
1967                 if (++i == rxr->num_desc)
1968                         i = 0;
1969
1970                 /* Now send to the stack or do LRO */
1971                 if (sendmp != NULL) {
1972                         rxr->next_to_check = i;
1973                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1974                         i = rxr->next_to_check;
1975                 }
1976
1977                /* Every 8 descriptors we go to refresh mbufs */
1978                 if (processed == 8) {
1979                         ixgbe_refresh_mbufs(rxr, i);
1980                         processed = 0;
1981                 }
1982         }
1983
1984         /* Refresh any remaining buf structs */
1985         if (ixgbe_rx_unrefreshed(rxr))
1986                 ixgbe_refresh_mbufs(rxr, i);
1987
1988         rxr->next_to_check = i;
1989
1990         /*
1991          * Flush any outstanding LRO work
1992          */
1993         tcp_lro_flush_all(lro);
1994
1995         IXGBE_RX_UNLOCK(rxr);
1996
1997         /*
1998         ** Still have cleaning to do?
1999         */
2000         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2001                 return (TRUE);
2002         else
2003                 return (FALSE);
2004 }
2005
2006
2007 /*********************************************************************
2008  *
2009  *  Verify that the hardware indicated that the checksum is valid.
2010  *  Inform the stack about the status of checksum so that stack
2011  *  doesn't spend time verifying the checksum.
2012  *
2013  *********************************************************************/
2014 static void
2015 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2016 {
2017         u16     status = (u16) staterr;
2018         u8      errors = (u8) (staterr >> 24);
2019         bool    sctp = false;
2020
2021         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2022             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2023                 sctp = true;
2024
2025         /* IPv4 checksum */
2026         if (status & IXGBE_RXD_STAT_IPCS) {
2027                 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
2028                 /* IP Checksum Good */
2029                 if (!(errors & IXGBE_RXD_ERR_IPE))
2030                         mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
2031         }
2032         /* TCP/UDP/SCTP checksum */
2033         if (status & IXGBE_RXD_STAT_L4CS) {
2034                 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
2035                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2036                         mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
2037                         if (!sctp)
2038                                 mp->m_pkthdr.csum_data = htons(0xffff);
2039                 }
2040         }
2041 }
2042
2043 /********************************************************************
2044  * Manage DMA'able memory.
2045  *******************************************************************/
2046 static void
2047 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2048 {
2049         if (error)
2050                 return;
2051         *(bus_addr_t *) arg = segs->ds_addr;
2052         return;
2053 }
2054
2055 int
2056 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2057                 struct ixgbe_dma_alloc *dma, int mapflags)
2058 {
2059         device_t dev = adapter->dev;
2060         int             r;
2061
2062         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2063                                DBA_ALIGN, 0,    /* alignment, bounds */
2064                                BUS_SPACE_MAXADDR,       /* lowaddr */
2065                                BUS_SPACE_MAXADDR,       /* highaddr */
2066                                NULL, NULL,      /* filter, filterarg */
2067                                size,    /* maxsize */
2068                                1,       /* nsegments */
2069                                size,    /* maxsegsize */
2070                                BUS_DMA_ALLOCNOW,        /* flags */
2071                                NULL,    /* lockfunc */
2072                                NULL,    /* lockfuncarg */
2073                                &dma->dma_tag);
2074         if (r != 0) {
2075                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2076                        "error %u\n", r);
2077                 goto fail_0;
2078         }
2079         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2080                              BUS_DMA_NOWAIT, &dma->dma_map);
2081         if (r != 0) {
2082                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2083                        "error %u\n", r);
2084                 goto fail_1;
2085         }
2086         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2087                             size,
2088                             ixgbe_dmamap_cb,
2089                             &dma->dma_paddr,
2090                             mapflags | BUS_DMA_NOWAIT);
2091         if (r != 0) {
2092                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2093                        "error %u\n", r);
2094                 goto fail_2;
2095         }
2096         dma->dma_size = size;
2097         return (0);
2098 fail_2:
2099         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2100 fail_1:
2101         bus_dma_tag_destroy(dma->dma_tag);
2102 fail_0:
2103         dma->dma_tag = NULL;
2104         return (r);
2105 }
2106
2107 void
2108 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2109 {
2110         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2111             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2112         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2113         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2114         bus_dma_tag_destroy(dma->dma_tag);
2115 }
2116
2117
2118 /*********************************************************************
2119  *
2120  *  Allocate memory for the transmit and receive rings, and then
2121  *  the descriptors associated with each, called only once at attach.
2122  *
2123  **********************************************************************/
2124 int
2125 ixgbe_allocate_queues(struct adapter *adapter)
2126 {
2127         device_t        dev = adapter->dev;
2128         struct ix_queue *que;
2129         struct tx_ring  *txr;
2130         struct rx_ring  *rxr;
2131         int rsize, tsize, error = IXGBE_SUCCESS;
2132         int txconf = 0, rxconf = 0;
2133 #ifdef PCI_IOV
2134         enum ixgbe_iov_mode iov_mode;
2135 #endif
2136
2137         /* First allocate the top level queue structs */
2138         if (!(adapter->queues =
2139             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2140             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2141                 device_printf(dev, "Unable to allocate queue memory\n");
2142                 error = ENOMEM;
2143                 goto fail;
2144         }
2145
2146         /* First allocate the TX ring struct memory */
2147         if (!(adapter->tx_rings =
2148             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2149             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2150                 device_printf(dev, "Unable to allocate TX ring memory\n");
2151                 error = ENOMEM;
2152                 goto tx_fail;
2153         }
2154
2155         /* Next allocate the RX */
2156         if (!(adapter->rx_rings =
2157             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2158             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2159                 device_printf(dev, "Unable to allocate RX ring memory\n");
2160                 error = ENOMEM;
2161                 goto rx_fail;
2162         }
2163
2164         /* For the ring itself */
2165         tsize = roundup2(adapter->num_tx_desc *
2166             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2167
2168 #ifdef PCI_IOV
2169         iov_mode = ixgbe_get_iov_mode(adapter);
2170         adapter->pool = ixgbe_max_vfs(iov_mode);
2171 #else
2172         adapter->pool = 0;
2173 #endif
2174         /*
2175          * Now set up the TX queues, txconf is needed to handle the
2176          * possibility that things fail midcourse and we need to
2177          * undo memory gracefully
2178          */ 
2179         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2180                 /* Set up some basics */
2181                 txr = &adapter->tx_rings[i];
2182                 txr->adapter = adapter;
2183 #ifdef PCI_IOV
2184                 txr->me = ixgbe_pf_que_index(iov_mode, i);
2185 #else
2186                 txr->me = i;
2187 #endif
2188                 txr->num_desc = adapter->num_tx_desc;
2189
2190                 /* Initialize the TX side lock */
2191                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2192                     device_get_nameunit(dev), txr->me);
2193                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2194
2195                 if (ixgbe_dma_malloc(adapter, tsize,
2196                         &txr->txdma, BUS_DMA_NOWAIT)) {
2197                         device_printf(dev,
2198                             "Unable to allocate TX Descriptor memory\n");
2199                         error = ENOMEM;
2200                         goto err_tx_desc;
2201                 }
2202                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2203                 bzero((void *)txr->tx_base, tsize);
2204
2205                 /* Now allocate transmit buffers for the ring */
2206                 if (ixgbe_allocate_transmit_buffers(txr)) {
2207                         device_printf(dev,
2208                             "Critical Failure setting up transmit buffers\n");
2209                         error = ENOMEM;
2210                         goto err_tx_desc;
2211                 }
2212 #ifndef IXGBE_LEGACY_TX
2213                 /* Allocate a buf ring */
2214                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2215                     M_WAITOK, &txr->tx_mtx);
2216                 if (txr->br == NULL) {
2217                         device_printf(dev,
2218                             "Critical Failure setting up buf ring\n");
2219                         error = ENOMEM;
2220                         goto err_tx_desc;
2221                 }
2222 #endif
2223         }
2224
2225         /*
2226          * Next the RX queues...
2227          */ 
2228         rsize = roundup2(adapter->num_rx_desc *
2229             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2230         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2231                 rxr = &adapter->rx_rings[i];
2232                 /* Set up some basics */
2233                 rxr->adapter = adapter;
2234 #ifdef PCI_IOV
2235                 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2236 #else
2237                 rxr->me = i;
2238 #endif
2239                 rxr->num_desc = adapter->num_rx_desc;
2240
2241                 /* Initialize the RX side lock */
2242                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2243                     device_get_nameunit(dev), rxr->me);
2244                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2245
2246                 if (ixgbe_dma_malloc(adapter, rsize,
2247                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2248                         device_printf(dev,
2249                             "Unable to allocate RxDescriptor memory\n");
2250                         error = ENOMEM;
2251                         goto err_rx_desc;
2252                 }
2253                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2254                 bzero((void *)rxr->rx_base, rsize);
2255
2256                 /* Allocate receive buffers for the ring*/
2257                 if (ixgbe_allocate_receive_buffers(rxr)) {
2258                         device_printf(dev,
2259                             "Critical Failure setting up receive buffers\n");
2260                         error = ENOMEM;
2261                         goto err_rx_desc;
2262                 }
2263         }
2264
2265         /*
2266         ** Finally set up the queue holding structs
2267         */
2268         for (int i = 0; i < adapter->num_queues; i++) {
2269                 que = &adapter->queues[i];
2270                 que->adapter = adapter;
2271                 que->me = i;
2272                 que->txr = &adapter->tx_rings[i];
2273                 que->rxr = &adapter->rx_rings[i];
2274         }
2275
2276         return (0);
2277
2278 err_rx_desc:
2279         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2280                 ixgbe_dma_free(adapter, &rxr->rxdma);
2281 err_tx_desc:
2282         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2283                 ixgbe_dma_free(adapter, &txr->txdma);
2284         free(adapter->rx_rings, M_DEVBUF);
2285 rx_fail:
2286         free(adapter->tx_rings, M_DEVBUF);
2287 tx_fail:
2288         free(adapter->queues, M_DEVBUF);
2289 fail:
2290         return (error);
2291 }