]> CyberLeo.Net >> Repos - FreeBSD/releng/10.3.git/blob - sys/dev/ixgbe/ix_txrx.c
- Copy stable/10@296371 to releng/10.3 in preparation for 10.3-RC1
[FreeBSD/releng/10.3.git] / sys / dev / ixgbe / ix_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include "ixgbe.h"
42
43 #ifdef  RSS
44 #include <net/rss_config.h>
45 #include <netinet/in_rss.h>
46 #endif
47
48 #ifdef DEV_NETMAP
49 #include <net/netmap.h>
50 #include <sys/selinfo.h>
51 #include <dev/netmap/netmap_kern.h>
52
53 extern int ix_crcstrip;
54 #endif
55
56 /*
57 ** HW RSC control:
58 **  this feature only works with
59 **  IPv4, and only on 82599 and later.
60 **  Also this will cause IP forwarding to
61 **  fail and that can't be controlled by
62 **  the stack as LRO can. For all these
63 **  reasons I've deemed it best to leave
64 **  this off and not bother with a tuneable
65 **  interface, this would need to be compiled
66 **  to enable.
67 */
68 static bool ixgbe_rsc_enable = FALSE;
69
70 #ifdef IXGBE_FDIR
71 /*
72 ** For Flow Director: this is the
73 ** number of TX packets we sample
74 ** for the filter pool, this means
75 ** every 20th packet will be probed.
76 **
77 ** This feature can be disabled by
78 ** setting this to 0.
79 */
80 static int atr_sample_rate = 20;
81 #endif
82
83 /*********************************************************************
84  *  Local Function prototypes
85  *********************************************************************/
86 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
87 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
88 static int      ixgbe_setup_receive_ring(struct rx_ring *);
89 static void     ixgbe_free_receive_buffers(struct rx_ring *);
90
91 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
92 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
93 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
94 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
95                     struct mbuf *, u32 *, u32 *);
96 static int      ixgbe_tso_setup(struct tx_ring *,
97                     struct mbuf *, u32 *, u32 *);
98 #ifdef IXGBE_FDIR
99 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
100 #endif
101 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
102 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
103                     struct mbuf *, u32);
104
105 #ifdef IXGBE_LEGACY_TX
106 /*********************************************************************
107  *  Transmit entry point
108  *
109  *  ixgbe_start is called by the stack to initiate a transmit.
110  *  The driver will remain in this routine as long as there are
111  *  packets to transmit and transmit resources are available.
112  *  In case resources are not available stack is notified and
113  *  the packet is requeued.
114  **********************************************************************/
115
116 void
117 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
118 {
119         struct mbuf    *m_head;
120         struct adapter *adapter = txr->adapter;
121
122         IXGBE_TX_LOCK_ASSERT(txr);
123
124         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
125                 return;
126         if (!adapter->link_active)
127                 return;
128
129         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
130                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
131                         break;
132
133                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
134                 if (m_head == NULL)
135                         break;
136
137                 if (ixgbe_xmit(txr, &m_head)) {
138                         if (m_head != NULL)
139                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
140                         break;
141                 }
142                 /* Send a copy of the frame to the BPF listener */
143                 ETHER_BPF_MTAP(ifp, m_head);
144         }
145         return;
146 }
147
148 /*
149  * Legacy TX start - called by the stack, this
150  * always uses the first tx ring, and should
151  * not be used with multiqueue tx enabled.
152  */
153 void
154 ixgbe_start(struct ifnet *ifp)
155 {
156         struct adapter *adapter = ifp->if_softc;
157         struct tx_ring  *txr = adapter->tx_rings;
158
159         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
160                 IXGBE_TX_LOCK(txr);
161                 ixgbe_start_locked(txr, ifp);
162                 IXGBE_TX_UNLOCK(txr);
163         }
164         return;
165 }
166
167 #else /* ! IXGBE_LEGACY_TX */
168
169 /*
170 ** Multiqueue Transmit Entry Point
171 ** (if_transmit function)
172 */
173 int
174 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
175 {
176         struct adapter  *adapter = ifp->if_softc;
177         struct ix_queue *que;
178         struct tx_ring  *txr;
179         int             i, err = 0;
180 #ifdef  RSS
181         uint32_t bucket_id;
182 #endif
183
184         /*
185          * When doing RSS, map it to the same outbound queue
186          * as the incoming flow would be mapped to.
187          *
188          * If everything is setup correctly, it should be the
189          * same bucket that the current CPU we're on is.
190          */
191         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
192 #ifdef  RSS
193                 if (rss_hash2bucket(m->m_pkthdr.flowid,
194                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
195                         i = bucket_id % adapter->num_queues;
196 #ifdef IXGBE_DEBUG
197                         if (bucket_id > adapter->num_queues)
198                                 if_printf(ifp, "bucket_id (%d) > num_queues "
199                                     "(%d)\n", bucket_id, adapter->num_queues);
200 #endif
201                 } else 
202 #endif
203                         i = m->m_pkthdr.flowid % adapter->num_queues;
204         } else
205                 i = curcpu % adapter->num_queues;
206
207         /* Check for a hung queue and pick alternative */
208         if (((1 << i) & adapter->active_queues) == 0)
209                 i = ffsl(adapter->active_queues);
210
211         txr = &adapter->tx_rings[i];
212         que = &adapter->queues[i];
213
214         err = drbr_enqueue(ifp, txr->br, m);
215         if (err)
216                 return (err);
217         if (IXGBE_TX_TRYLOCK(txr)) {
218                 ixgbe_mq_start_locked(ifp, txr);
219                 IXGBE_TX_UNLOCK(txr);
220         } else
221                 taskqueue_enqueue(que->tq, &txr->txq_task);
222
223         return (0);
224 }
225
226 int
227 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
228 {
229         struct adapter  *adapter = txr->adapter;
230         struct mbuf     *next;
231         int             enqueued = 0, err = 0;
232
233         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
234             adapter->link_active == 0)
235                 return (ENETDOWN);
236
237         /* Process the queue */
238 #if __FreeBSD_version < 901504
239         next = drbr_dequeue(ifp, txr->br);
240         while (next != NULL) {
241                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
242                         if (next != NULL)
243                                 err = drbr_enqueue(ifp, txr->br, next);
244 #else
245         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
246                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
247                         if (next == NULL) {
248                                 drbr_advance(ifp, txr->br);
249                         } else {
250                                 drbr_putback(ifp, txr->br, next);
251                         }
252 #endif
253                         break;
254                 }
255 #if __FreeBSD_version >= 901504
256                 drbr_advance(ifp, txr->br);
257 #endif
258                 enqueued++;
259 #if 0 // this is VF-only
260 #if __FreeBSD_version >= 1100036
261                 /*
262                  * Since we're looking at the tx ring, we can check
263                  * to see if we're a VF by examing our tail register
264                  * address.
265                  */
266                 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
267                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
268 #endif
269 #endif
270                 /* Send a copy of the frame to the BPF listener */
271                 ETHER_BPF_MTAP(ifp, next);
272                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
273                         break;
274 #if __FreeBSD_version < 901504
275                 next = drbr_dequeue(ifp, txr->br);
276 #endif
277         }
278
279         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
280                 ixgbe_txeof(txr);
281
282         return (err);
283 }
284
285 /*
286  * Called from a taskqueue to drain queued transmit packets.
287  */
288 void
289 ixgbe_deferred_mq_start(void *arg, int pending)
290 {
291         struct tx_ring *txr = arg;
292         struct adapter *adapter = txr->adapter;
293         struct ifnet *ifp = adapter->ifp;
294
295         IXGBE_TX_LOCK(txr);
296         if (!drbr_empty(ifp, txr->br))
297                 ixgbe_mq_start_locked(ifp, txr);
298         IXGBE_TX_UNLOCK(txr);
299 }
300
301 /*
302  * Flush all ring buffers
303  */
304 void
305 ixgbe_qflush(struct ifnet *ifp)
306 {
307         struct adapter  *adapter = ifp->if_softc;
308         struct tx_ring  *txr = adapter->tx_rings;
309         struct mbuf     *m;
310
311         for (int i = 0; i < adapter->num_queues; i++, txr++) {
312                 IXGBE_TX_LOCK(txr);
313                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
314                         m_freem(m);
315                 IXGBE_TX_UNLOCK(txr);
316         }
317         if_qflush(ifp);
318 }
319 #endif /* IXGBE_LEGACY_TX */
320
321
322 /*********************************************************************
323  *
324  *  This routine maps the mbufs to tx descriptors, allowing the
325  *  TX engine to transmit the packets. 
326  *      - return 0 on success, positive on failure
327  *
328  **********************************************************************/
329
330 static int
331 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
332 {
333         struct adapter  *adapter = txr->adapter;
334         u32             olinfo_status = 0, cmd_type_len;
335         int             i, j, error, nsegs;
336         int             first;
337         bool            remap = TRUE;
338         struct mbuf     *m_head;
339         bus_dma_segment_t segs[adapter->num_segs];
340         bus_dmamap_t    map;
341         struct ixgbe_tx_buf *txbuf;
342         union ixgbe_adv_tx_desc *txd = NULL;
343
344         m_head = *m_headp;
345
346         /* Basic descriptor defines */
347         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
348             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
349
350         if (m_head->m_flags & M_VLANTAG)
351                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
352
353         /*
354          * Important to capture the first descriptor
355          * used because it will contain the index of
356          * the one we tell the hardware to report back
357          */
358         first = txr->next_avail_desc;
359         txbuf = &txr->tx_buffers[first];
360         map = txbuf->map;
361
362         /*
363          * Map the packet for DMA.
364          */
365 retry:
366         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
367             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
368
369         if (__predict_false(error)) {
370                 struct mbuf *m;
371
372                 switch (error) {
373                 case EFBIG:
374                         /* Try it again? - one try */
375                         if (remap == TRUE) {
376                                 remap = FALSE;
377                                 /*
378                                  * XXX: m_defrag will choke on
379                                  * non-MCLBYTES-sized clusters
380                                  */
381                                 m = m_defrag(*m_headp, M_NOWAIT);
382                                 if (m == NULL) {
383                                         adapter->mbuf_defrag_failed++;
384                                         m_freem(*m_headp);
385                                         *m_headp = NULL;
386                                         return (ENOBUFS);
387                                 }
388                                 *m_headp = m;
389                                 goto retry;
390                         } else
391                                 return (error);
392                 case ENOMEM:
393                         txr->no_tx_dma_setup++;
394                         return (error);
395                 default:
396                         txr->no_tx_dma_setup++;
397                         m_freem(*m_headp);
398                         *m_headp = NULL;
399                         return (error);
400                 }
401         }
402
403         /* Make certain there are enough descriptors */
404         if (nsegs > txr->tx_avail - 2) {
405                 txr->no_desc_avail++;
406                 bus_dmamap_unload(txr->txtag, map);
407                 return (ENOBUFS);
408         }
409         m_head = *m_headp;
410
411         /*
412          * Set up the appropriate offload context
413          * this will consume the first descriptor
414          */
415         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
416         if (__predict_false(error)) {
417                 if (error == ENOBUFS)
418                         *m_headp = NULL;
419                 return (error);
420         }
421
422 #ifdef IXGBE_FDIR
423         /* Do the flow director magic */
424         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
425                 ++txr->atr_count;
426                 if (txr->atr_count >= atr_sample_rate) {
427                         ixgbe_atr(txr, m_head);
428                         txr->atr_count = 0;
429                 }
430         }
431 #endif
432
433         olinfo_status |= IXGBE_ADVTXD_CC;
434         i = txr->next_avail_desc;
435         for (j = 0; j < nsegs; j++) {
436                 bus_size_t seglen;
437                 bus_addr_t segaddr;
438
439                 txbuf = &txr->tx_buffers[i];
440                 txd = &txr->tx_base[i];
441                 seglen = segs[j].ds_len;
442                 segaddr = htole64(segs[j].ds_addr);
443
444                 txd->read.buffer_addr = segaddr;
445                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
446                     cmd_type_len |seglen);
447                 txd->read.olinfo_status = htole32(olinfo_status);
448
449                 if (++i == txr->num_desc)
450                         i = 0;
451         }
452
453         txd->read.cmd_type_len |=
454             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
455         txr->tx_avail -= nsegs;
456         txr->next_avail_desc = i;
457
458         txbuf->m_head = m_head;
459         /*
460          * Here we swap the map so the last descriptor,
461          * which gets the completion interrupt has the
462          * real map, and the first descriptor gets the
463          * unused map from this descriptor.
464          */
465         txr->tx_buffers[first].map = txbuf->map;
466         txbuf->map = map;
467         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
468
469         /* Set the EOP descriptor that will be marked done */
470         txbuf = &txr->tx_buffers[first];
471         txbuf->eop = txd;
472
473         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
474             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
475         /*
476          * Advance the Transmit Descriptor Tail (Tdt), this tells the
477          * hardware that this frame is available to transmit.
478          */
479         ++txr->total_packets;
480         IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
481
482         /* Mark queue as having work */
483         if (txr->busy == 0)
484                 txr->busy = 1;
485
486         return (0);
487 }
488
489
490 /*********************************************************************
491  *
492  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
493  *  the information needed to transmit a packet on the wire. This is
494  *  called only once at attach, setup is done every reset.
495  *
496  **********************************************************************/
497 int
498 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
499 {
500         struct adapter *adapter = txr->adapter;
501         device_t dev = adapter->dev;
502         struct ixgbe_tx_buf *txbuf;
503         int error, i;
504
505         /*
506          * Setup DMA descriptor areas.
507          */
508         if ((error = bus_dma_tag_create(
509                                bus_get_dma_tag(adapter->dev),   /* parent */
510                                1, 0,            /* alignment, bounds */
511                                BUS_SPACE_MAXADDR,       /* lowaddr */
512                                BUS_SPACE_MAXADDR,       /* highaddr */
513                                NULL, NULL,              /* filter, filterarg */
514                                IXGBE_TSO_SIZE,          /* maxsize */
515                                adapter->num_segs,       /* nsegments */
516                                PAGE_SIZE,               /* maxsegsize */
517                                0,                       /* flags */
518                                NULL,                    /* lockfunc */
519                                NULL,                    /* lockfuncarg */
520                                &txr->txtag))) {
521                 device_printf(dev,"Unable to allocate TX DMA tag\n");
522                 goto fail;
523         }
524
525         if (!(txr->tx_buffers =
526             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
527             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
528                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
529                 error = ENOMEM;
530                 goto fail;
531         }
532
533         /* Create the descriptor buffer dma maps */
534         txbuf = txr->tx_buffers;
535         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
536                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
537                 if (error != 0) {
538                         device_printf(dev, "Unable to create TX DMA map\n");
539                         goto fail;
540                 }
541         }
542
543         return 0;
544 fail:
545         /* We free all, it handles case where we are in the middle */
546         ixgbe_free_transmit_structures(adapter);
547         return (error);
548 }
549
550 /*********************************************************************
551  *
552  *  Initialize a transmit ring.
553  *
554  **********************************************************************/
555 static void
556 ixgbe_setup_transmit_ring(struct tx_ring *txr)
557 {
558         struct adapter *adapter = txr->adapter;
559         struct ixgbe_tx_buf *txbuf;
560 #ifdef DEV_NETMAP
561         struct netmap_adapter *na = NA(adapter->ifp);
562         struct netmap_slot *slot;
563 #endif /* DEV_NETMAP */
564
565         /* Clear the old ring contents */
566         IXGBE_TX_LOCK(txr);
567 #ifdef DEV_NETMAP
568         /*
569          * (under lock): if in netmap mode, do some consistency
570          * checks and set slot to entry 0 of the netmap ring.
571          */
572         slot = netmap_reset(na, NR_TX, txr->me, 0);
573 #endif /* DEV_NETMAP */
574         bzero((void *)txr->tx_base,
575               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
576         /* Reset indices */
577         txr->next_avail_desc = 0;
578         txr->next_to_clean = 0;
579
580         /* Free any existing tx buffers. */
581         txbuf = txr->tx_buffers;
582         for (int i = 0; i < txr->num_desc; i++, txbuf++) {
583                 if (txbuf->m_head != NULL) {
584                         bus_dmamap_sync(txr->txtag, txbuf->map,
585                             BUS_DMASYNC_POSTWRITE);
586                         bus_dmamap_unload(txr->txtag, txbuf->map);
587                         m_freem(txbuf->m_head);
588                         txbuf->m_head = NULL;
589                 }
590 #ifdef DEV_NETMAP
591                 /*
592                  * In netmap mode, set the map for the packet buffer.
593                  * NOTE: Some drivers (not this one) also need to set
594                  * the physical buffer address in the NIC ring.
595                  * Slots in the netmap ring (indexed by "si") are
596                  * kring->nkr_hwofs positions "ahead" wrt the
597                  * corresponding slot in the NIC ring. In some drivers
598                  * (not here) nkr_hwofs can be negative. Function
599                  * netmap_idx_n2k() handles wraparounds properly.
600                  */
601                 if (slot) {
602                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
603                         netmap_load_map(na, txr->txtag,
604                             txbuf->map, NMB(na, slot + si));
605                 }
606 #endif /* DEV_NETMAP */
607                 /* Clear the EOP descriptor pointer */
608                 txbuf->eop = NULL;
609         }
610
611 #ifdef IXGBE_FDIR
612         /* Set the rate at which we sample packets */
613         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
614                 txr->atr_sample = atr_sample_rate;
615 #endif
616
617         /* Set number of descriptors available */
618         txr->tx_avail = adapter->num_tx_desc;
619
620         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
621             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
622         IXGBE_TX_UNLOCK(txr);
623 }
624
625 /*********************************************************************
626  *
627  *  Initialize all transmit rings.
628  *
629  **********************************************************************/
630 int
631 ixgbe_setup_transmit_structures(struct adapter *adapter)
632 {
633         struct tx_ring *txr = adapter->tx_rings;
634
635         for (int i = 0; i < adapter->num_queues; i++, txr++)
636                 ixgbe_setup_transmit_ring(txr);
637
638         return (0);
639 }
640
641 /*********************************************************************
642  *
643  *  Free all transmit rings.
644  *
645  **********************************************************************/
646 void
647 ixgbe_free_transmit_structures(struct adapter *adapter)
648 {
649         struct tx_ring *txr = adapter->tx_rings;
650
651         for (int i = 0; i < adapter->num_queues; i++, txr++) {
652                 IXGBE_TX_LOCK(txr);
653                 ixgbe_free_transmit_buffers(txr);
654                 ixgbe_dma_free(adapter, &txr->txdma);
655                 IXGBE_TX_UNLOCK(txr);
656                 IXGBE_TX_LOCK_DESTROY(txr);
657         }
658         free(adapter->tx_rings, M_DEVBUF);
659 }
660
661 /*********************************************************************
662  *
663  *  Free transmit ring related data structures.
664  *
665  **********************************************************************/
666 static void
667 ixgbe_free_transmit_buffers(struct tx_ring *txr)
668 {
669         struct adapter *adapter = txr->adapter;
670         struct ixgbe_tx_buf *tx_buffer;
671         int             i;
672
673         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
674
675         if (txr->tx_buffers == NULL)
676                 return;
677
678         tx_buffer = txr->tx_buffers;
679         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
680                 if (tx_buffer->m_head != NULL) {
681                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
682                             BUS_DMASYNC_POSTWRITE);
683                         bus_dmamap_unload(txr->txtag,
684                             tx_buffer->map);
685                         m_freem(tx_buffer->m_head);
686                         tx_buffer->m_head = NULL;
687                         if (tx_buffer->map != NULL) {
688                                 bus_dmamap_destroy(txr->txtag,
689                                     tx_buffer->map);
690                                 tx_buffer->map = NULL;
691                         }
692                 } else if (tx_buffer->map != NULL) {
693                         bus_dmamap_unload(txr->txtag,
694                             tx_buffer->map);
695                         bus_dmamap_destroy(txr->txtag,
696                             tx_buffer->map);
697                         tx_buffer->map = NULL;
698                 }
699         }
700 #ifdef IXGBE_LEGACY_TX
701         if (txr->br != NULL)
702                 buf_ring_free(txr->br, M_DEVBUF);
703 #endif
704         if (txr->tx_buffers != NULL) {
705                 free(txr->tx_buffers, M_DEVBUF);
706                 txr->tx_buffers = NULL;
707         }
708         if (txr->txtag != NULL) {
709                 bus_dma_tag_destroy(txr->txtag);
710                 txr->txtag = NULL;
711         }
712         return;
713 }
714
715 /*********************************************************************
716  *
717  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
718  *
719  **********************************************************************/
720
721 static int
722 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
723     u32 *cmd_type_len, u32 *olinfo_status)
724 {
725         struct adapter *adapter = txr->adapter;
726         struct ixgbe_adv_tx_context_desc *TXD;
727         struct ether_vlan_header *eh;
728 #ifdef INET
729         struct ip *ip;
730 #endif
731 #ifdef INET6
732         struct ip6_hdr *ip6;
733 #endif
734         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
735         int     ehdrlen, ip_hlen = 0;
736         u16     etype;
737         u8      ipproto = 0;
738         int     offload = TRUE;
739         int     ctxd = txr->next_avail_desc;
740         u16     vtag = 0;
741         caddr_t l3d;
742
743
744         /* First check if TSO is to be used */
745         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO|CSUM_IP6_TSO))
746                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
747
748         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
749                 offload = FALSE;
750
751         /* Indicate the whole packet as payload when not doing TSO */
752         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
753
754         /* Now ready a context descriptor */
755         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
756
757         /*
758         ** In advanced descriptors the vlan tag must 
759         ** be placed into the context descriptor. Hence
760         ** we need to make one even if not doing offloads.
761         */
762         if (mp->m_flags & M_VLANTAG) {
763                 vtag = htole16(mp->m_pkthdr.ether_vtag);
764                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
765         } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
766                 return (0);
767
768         /*
769          * Determine where frame payload starts.
770          * Jump over vlan headers if already present,
771          * helpful for QinQ too.
772          */
773         eh = mtod(mp, struct ether_vlan_header *);
774         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
775                 etype = ntohs(eh->evl_proto);
776                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
777         } else {
778                 etype = ntohs(eh->evl_encap_proto);
779                 ehdrlen = ETHER_HDR_LEN;
780         }
781
782         /* Set the ether header length */
783         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
784
785         if (offload == FALSE)
786                 goto no_offloads;
787
788         /*
789          * If the first mbuf only includes the ethernet header, jump to the next one
790          * XXX: This assumes the stack splits mbufs containing headers on header boundaries
791          * XXX: And assumes the entire IP header is contained in one mbuf
792          */
793         if (mp->m_len == ehdrlen && mp->m_next)
794                 l3d = mtod(mp->m_next, caddr_t);
795         else
796                 l3d = mtod(mp, caddr_t) + ehdrlen;
797
798         switch (etype) {
799 #ifdef INET
800                 case ETHERTYPE_IP:
801                         ip = (struct ip *)(l3d);
802                         ip_hlen = ip->ip_hl << 2;
803                         ipproto = ip->ip_p;
804                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
805                         /* Insert IPv4 checksum into data descriptors */
806                         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
807                                 ip->ip_sum = 0;
808                                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
809                         }
810                         break;
811 #endif
812 #ifdef INET6
813                 case ETHERTYPE_IPV6:
814                         ip6 = (struct ip6_hdr *)(l3d);
815                         ip_hlen = sizeof(struct ip6_hdr);
816                         ipproto = ip6->ip6_nxt;
817                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
818                         break;
819 #endif
820                 default:
821                         offload = FALSE;
822                         break;
823         }
824
825         vlan_macip_lens |= ip_hlen;
826
827         /* No support for offloads for non-L4 next headers */
828         switch (ipproto) {
829                 case IPPROTO_TCP:
830                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
831                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
832                         else
833                                 offload = false;
834                         break;
835                 case IPPROTO_UDP:
836                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
837                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
838                         else
839                                 offload = false;
840                         break;
841                 case IPPROTO_SCTP:
842                         if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
843                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
844                         else
845                                 offload = false;
846                         break;
847                 default:
848                         offload = false;
849                         break;
850         }
851
852         if (offload) /* Insert L4 checksum into data descriptors */
853                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
854
855 no_offloads:
856         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
857
858         /* Now copy bits into descriptor */
859         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
860         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
861         TXD->seqnum_seed = htole32(0);
862         TXD->mss_l4len_idx = htole32(0);
863
864         /* We've consumed the first desc, adjust counters */
865         if (++ctxd == txr->num_desc)
866                 ctxd = 0;
867         txr->next_avail_desc = ctxd;
868         --txr->tx_avail;
869
870         return (0);
871 }
872
873 /**********************************************************************
874  *
875  *  Setup work for hardware segmentation offload (TSO) on
876  *  adapters using advanced tx descriptors
877  *
878  **********************************************************************/
879 static int
880 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
881     u32 *cmd_type_len, u32 *olinfo_status)
882 {
883         struct ixgbe_adv_tx_context_desc *TXD;
884         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
885         u32 mss_l4len_idx = 0, paylen;
886         u16 vtag = 0, eh_type;
887         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
888         struct ether_vlan_header *eh;
889 #ifdef INET6
890         struct ip6_hdr *ip6;
891 #endif
892 #ifdef INET
893         struct ip *ip;
894 #endif
895         struct tcphdr *th;
896
897         /*
898          * Determine where frame payload starts.
899          * Jump over vlan headers if already present
900          */
901         eh = mtod(mp, struct ether_vlan_header *);
902         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
903                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
904                 eh_type = eh->evl_proto;
905         } else {
906                 ehdrlen = ETHER_HDR_LEN;
907                 eh_type = eh->evl_encap_proto;
908         }
909
910         switch (ntohs(eh_type)) {
911 #ifdef INET6
912         case ETHERTYPE_IPV6:
913                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
914                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
915                 if (ip6->ip6_nxt != IPPROTO_TCP)
916                         return (ENXIO);
917                 ip_hlen = sizeof(struct ip6_hdr);
918                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
919                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
920                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
921                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
922                 break;
923 #endif
924 #ifdef INET
925         case ETHERTYPE_IP:
926                 ip = (struct ip *)(mp->m_data + ehdrlen);
927                 if (ip->ip_p != IPPROTO_TCP)
928                         return (ENXIO);
929                 ip->ip_sum = 0;
930                 ip_hlen = ip->ip_hl << 2;
931                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
932                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
933                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
934                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
935                 /* Tell transmit desc to also do IPv4 checksum. */
936                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
937                 break;
938 #endif
939         default:
940                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
941                     __func__, ntohs(eh_type));
942                 break;
943         }
944
945         ctxd = txr->next_avail_desc;
946         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
947
948         tcp_hlen = th->th_off << 2;
949
950         /* This is used in the transmit desc in encap */
951         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
952
953         /* VLAN MACLEN IPLEN */
954         if (mp->m_flags & M_VLANTAG) {
955                 vtag = htole16(mp->m_pkthdr.ether_vtag);
956                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
957         }
958
959         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
960         vlan_macip_lens |= ip_hlen;
961         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
962
963         /* ADV DTYPE TUCMD */
964         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
965         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
966         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
967
968         /* MSS L4LEN IDX */
969         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
970         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
971         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
972
973         TXD->seqnum_seed = htole32(0);
974
975         if (++ctxd == txr->num_desc)
976                 ctxd = 0;
977
978         txr->tx_avail--;
979         txr->next_avail_desc = ctxd;
980         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
981         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
982         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
983         ++txr->tso_tx;
984         return (0);
985 }
986
987
988 /**********************************************************************
989  *
990  *  Examine each tx_buffer in the used queue. If the hardware is done
991  *  processing the packet then free associated resources. The
992  *  tx_buffer is put back on the free queue.
993  *
994  **********************************************************************/
995 void
996 ixgbe_txeof(struct tx_ring *txr)
997 {
998         struct adapter          *adapter = txr->adapter;
999 #ifdef DEV_NETMAP
1000         struct ifnet            *ifp = adapter->ifp;
1001 #endif
1002         u32                     work, processed = 0;
1003         u32                     limit = adapter->tx_process_limit;
1004         struct ixgbe_tx_buf     *buf;
1005         union ixgbe_adv_tx_desc *txd;
1006
1007         mtx_assert(&txr->tx_mtx, MA_OWNED);
1008
1009 #ifdef DEV_NETMAP
1010         if (ifp->if_capenable & IFCAP_NETMAP) {
1011                 struct netmap_adapter *na = NA(ifp);
1012                 struct netmap_kring *kring = &na->tx_rings[txr->me];
1013                 txd = txr->tx_base;
1014                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1015                     BUS_DMASYNC_POSTREAD);
1016                 /*
1017                  * In netmap mode, all the work is done in the context
1018                  * of the client thread. Interrupt handlers only wake up
1019                  * clients, which may be sleeping on individual rings
1020                  * or on a global resource for all rings.
1021                  * To implement tx interrupt mitigation, we wake up the client
1022                  * thread roughly every half ring, even if the NIC interrupts
1023                  * more frequently. This is implemented as follows:
1024                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
1025                  *   the slot that should wake up the thread (nkr_num_slots
1026                  *   means the user thread should not be woken up);
1027                  * - the driver ignores tx interrupts unless netmap_mitigate=0
1028                  *   or the slot has the DD bit set.
1029                  */
1030                 if (!netmap_mitigate ||
1031                     (kring->nr_kflags < kring->nkr_num_slots &&
1032                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1033                         netmap_tx_irq(ifp, txr->me);
1034                 }
1035                 return;
1036         }
1037 #endif /* DEV_NETMAP */
1038
1039         if (txr->tx_avail == txr->num_desc) {
1040                 txr->busy = 0;
1041                 return;
1042         }
1043
1044         /* Get work starting point */
1045         work = txr->next_to_clean;
1046         buf = &txr->tx_buffers[work];
1047         txd = &txr->tx_base[work];
1048         work -= txr->num_desc; /* The distance to ring end */
1049         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1050             BUS_DMASYNC_POSTREAD);
1051
1052         do {
1053                 union ixgbe_adv_tx_desc *eop = buf->eop;
1054                 if (eop == NULL) /* No work */
1055                         break;
1056
1057                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1058                         break;  /* I/O not complete */
1059
1060                 if (buf->m_head) {
1061                         txr->bytes +=
1062                             buf->m_head->m_pkthdr.len;
1063                         bus_dmamap_sync(txr->txtag,
1064                             buf->map,
1065                             BUS_DMASYNC_POSTWRITE);
1066                         bus_dmamap_unload(txr->txtag,
1067                             buf->map);
1068                         m_freem(buf->m_head);
1069                         buf->m_head = NULL;
1070                 }
1071                 buf->eop = NULL;
1072                 ++txr->tx_avail;
1073
1074                 /* We clean the range if multi segment */
1075                 while (txd != eop) {
1076                         ++txd;
1077                         ++buf;
1078                         ++work;
1079                         /* wrap the ring? */
1080                         if (__predict_false(!work)) {
1081                                 work -= txr->num_desc;
1082                                 buf = txr->tx_buffers;
1083                                 txd = txr->tx_base;
1084                         }
1085                         if (buf->m_head) {
1086                                 txr->bytes +=
1087                                     buf->m_head->m_pkthdr.len;
1088                                 bus_dmamap_sync(txr->txtag,
1089                                     buf->map,
1090                                     BUS_DMASYNC_POSTWRITE);
1091                                 bus_dmamap_unload(txr->txtag,
1092                                     buf->map);
1093                                 m_freem(buf->m_head);
1094                                 buf->m_head = NULL;
1095                         }
1096                         ++txr->tx_avail;
1097                         buf->eop = NULL;
1098
1099                 }
1100                 ++txr->packets;
1101                 ++processed;
1102
1103                 /* Try the next packet */
1104                 ++txd;
1105                 ++buf;
1106                 ++work;
1107                 /* reset with a wrap */
1108                 if (__predict_false(!work)) {
1109                         work -= txr->num_desc;
1110                         buf = txr->tx_buffers;
1111                         txd = txr->tx_base;
1112                 }
1113                 prefetch(txd);
1114         } while (__predict_true(--limit));
1115
1116         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1117             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1118
1119         work += txr->num_desc;
1120         txr->next_to_clean = work;
1121
1122         /*
1123         ** Queue Hang detection, we know there's
1124         ** work outstanding or the first return
1125         ** would have been taken, so increment busy
1126         ** if nothing managed to get cleaned, then
1127         ** in local_timer it will be checked and 
1128         ** marked as HUNG if it exceeds a MAX attempt.
1129         */
1130         if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1131                 ++txr->busy;
1132         /*
1133         ** If anything gets cleaned we reset state to 1,
1134         ** note this will turn off HUNG if its set.
1135         */
1136         if (processed)
1137                 txr->busy = 1;
1138
1139         if (txr->tx_avail == txr->num_desc)
1140                 txr->busy = 0;
1141
1142         return;
1143 }
1144
1145
1146 #ifdef IXGBE_FDIR
1147 /*
1148 ** This routine parses packet headers so that Flow
1149 ** Director can make a hashed filter table entry 
1150 ** allowing traffic flows to be identified and kept
1151 ** on the same cpu.  This would be a performance
1152 ** hit, but we only do it at IXGBE_FDIR_RATE of
1153 ** packets.
1154 */
1155 static void
1156 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1157 {
1158         struct adapter                  *adapter = txr->adapter;
1159         struct ix_queue                 *que;
1160         struct ip                       *ip;
1161         struct tcphdr                   *th;
1162         struct udphdr                   *uh;
1163         struct ether_vlan_header        *eh;
1164         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
1165         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
1166         int                             ehdrlen, ip_hlen;
1167         u16                             etype;
1168
1169         eh = mtod(mp, struct ether_vlan_header *);
1170         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1171                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1172                 etype = eh->evl_proto;
1173         } else {
1174                 ehdrlen = ETHER_HDR_LEN;
1175                 etype = eh->evl_encap_proto;
1176         }
1177
1178         /* Only handling IPv4 */
1179         if (etype != htons(ETHERTYPE_IP))
1180                 return;
1181
1182         ip = (struct ip *)(mp->m_data + ehdrlen);
1183         ip_hlen = ip->ip_hl << 2;
1184
1185         /* check if we're UDP or TCP */
1186         switch (ip->ip_p) {
1187         case IPPROTO_TCP:
1188                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1189                 /* src and dst are inverted */
1190                 common.port.dst ^= th->th_sport;
1191                 common.port.src ^= th->th_dport;
1192                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1193                 break;
1194         case IPPROTO_UDP:
1195                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1196                 /* src and dst are inverted */
1197                 common.port.dst ^= uh->uh_sport;
1198                 common.port.src ^= uh->uh_dport;
1199                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1200                 break;
1201         default:
1202                 return;
1203         }
1204
1205         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1206         if (mp->m_pkthdr.ether_vtag)
1207                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1208         else
1209                 common.flex_bytes ^= etype;
1210         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1211
1212         que = &adapter->queues[txr->me];
1213         /*
1214         ** This assumes the Rx queue and Tx
1215         ** queue are bound to the same CPU
1216         */
1217         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1218             input, common, que->msix);
1219 }
1220 #endif /* IXGBE_FDIR */
1221
1222 /*
1223 ** Used to detect a descriptor that has
1224 ** been merged by Hardware RSC.
1225 */
1226 static inline u32
1227 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1228 {
1229         return (le32toh(rx->wb.lower.lo_dword.data) &
1230             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1231 }
1232
1233 /*********************************************************************
1234  *
1235  *  Initialize Hardware RSC (LRO) feature on 82599
1236  *  for an RX ring, this is toggled by the LRO capability
1237  *  even though it is transparent to the stack.
1238  *
1239  *  NOTE: since this HW feature only works with IPV4 and 
1240  *        our testing has shown soft LRO to be as effective
1241  *        I have decided to disable this by default.
1242  *
1243  **********************************************************************/
1244 static void
1245 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1246 {
1247         struct  adapter         *adapter = rxr->adapter;
1248         struct  ixgbe_hw        *hw = &adapter->hw;
1249         u32                     rscctrl, rdrxctl;
1250
1251         /* If turning LRO/RSC off we need to disable it */
1252         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1253                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1254                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1255                 return;
1256         }
1257
1258         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1259         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1260 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1261         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1262 #endif /* DEV_NETMAP */
1263         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1264         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1265         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1266
1267         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1268         rscctrl |= IXGBE_RSCCTL_RSCEN;
1269         /*
1270         ** Limit the total number of descriptors that
1271         ** can be combined, so it does not exceed 64K
1272         */
1273         if (rxr->mbuf_sz == MCLBYTES)
1274                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1275         else if (rxr->mbuf_sz == MJUMPAGESIZE)
1276                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1277         else if (rxr->mbuf_sz == MJUM9BYTES)
1278                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1279         else  /* Using 16K cluster */
1280                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1281
1282         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1283
1284         /* Enable TCP header recognition */
1285         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1286             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1287             IXGBE_PSRTYPE_TCPHDR));
1288
1289         /* Disable RSC for ACK packets */
1290         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1291             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1292
1293         rxr->hw_rsc = TRUE;
1294 }
1295
1296 /*********************************************************************
1297  *
1298  *  Refresh mbuf buffers for RX descriptor rings
1299  *   - now keeps its own state so discards due to resource
1300  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1301  *     it just returns, keeping its placeholder, thus it can simply
1302  *     be recalled to try again.
1303  *
1304  **********************************************************************/
1305 static void
1306 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1307 {
1308         struct adapter          *adapter = rxr->adapter;
1309         bus_dma_segment_t       seg[1];
1310         struct ixgbe_rx_buf     *rxbuf;
1311         struct mbuf             *mp;
1312         int                     i, j, nsegs, error;
1313         bool                    refreshed = FALSE;
1314
1315         i = j = rxr->next_to_refresh;
1316         /* Control the loop with one beyond */
1317         if (++j == rxr->num_desc)
1318                 j = 0;
1319
1320         while (j != limit) {
1321                 rxbuf = &rxr->rx_buffers[i];
1322                 if (rxbuf->buf == NULL) {
1323                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1324                             M_PKTHDR, rxr->mbuf_sz);
1325                         if (mp == NULL)
1326                                 goto update;
1327                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1328                                 m_adj(mp, ETHER_ALIGN);
1329                 } else
1330                         mp = rxbuf->buf;
1331
1332                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1333
1334                 /* If we're dealing with an mbuf that was copied rather
1335                  * than replaced, there's no need to go through busdma.
1336                  */
1337                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1338                         /* Get the memory mapping */
1339                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1340                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1341                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1342                         if (error != 0) {
1343                                 printf("Refresh mbufs: payload dmamap load"
1344                                     " failure - %d\n", error);
1345                                 m_free(mp);
1346                                 rxbuf->buf = NULL;
1347                                 goto update;
1348                         }
1349                         rxbuf->buf = mp;
1350                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1351                             BUS_DMASYNC_PREREAD);
1352                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1353                             htole64(seg[0].ds_addr);
1354                 } else {
1355                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1356                         rxbuf->flags &= ~IXGBE_RX_COPY;
1357                 }
1358
1359                 refreshed = TRUE;
1360                 /* Next is precalculated */
1361                 i = j;
1362                 rxr->next_to_refresh = i;
1363                 if (++j == rxr->num_desc)
1364                         j = 0;
1365         }
1366 update:
1367         if (refreshed) /* Update hardware tail index */
1368                 IXGBE_WRITE_REG(&adapter->hw,
1369                     rxr->tail, rxr->next_to_refresh);
1370         return;
1371 }
1372
1373 /*********************************************************************
1374  *
1375  *  Allocate memory for rx_buffer structures. Since we use one
1376  *  rx_buffer per received packet, the maximum number of rx_buffer's
1377  *  that we'll need is equal to the number of receive descriptors
1378  *  that we've allocated.
1379  *
1380  **********************************************************************/
1381 int
1382 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1383 {
1384         struct  adapter         *adapter = rxr->adapter;
1385         device_t                dev = adapter->dev;
1386         struct ixgbe_rx_buf     *rxbuf;
1387         int                     bsize, error;
1388
1389         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1390         if (!(rxr->rx_buffers =
1391             (struct ixgbe_rx_buf *) malloc(bsize,
1392             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1393                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1394                 error = ENOMEM;
1395                 goto fail;
1396         }
1397
1398         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1399                                    1, 0,        /* alignment, bounds */
1400                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1401                                    BUS_SPACE_MAXADDR,   /* highaddr */
1402                                    NULL, NULL,          /* filter, filterarg */
1403                                    MJUM16BYTES,         /* maxsize */
1404                                    1,                   /* nsegments */
1405                                    MJUM16BYTES,         /* maxsegsize */
1406                                    0,                   /* flags */
1407                                    NULL,                /* lockfunc */
1408                                    NULL,                /* lockfuncarg */
1409                                    &rxr->ptag))) {
1410                 device_printf(dev, "Unable to create RX DMA tag\n");
1411                 goto fail;
1412         }
1413
1414         for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1415                 rxbuf = &rxr->rx_buffers[i];
1416                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1417                 if (error) {
1418                         device_printf(dev, "Unable to create RX dma map\n");
1419                         goto fail;
1420                 }
1421         }
1422
1423         return (0);
1424
1425 fail:
1426         /* Frees all, but can handle partial completion */
1427         ixgbe_free_receive_structures(adapter);
1428         return (error);
1429 }
1430
1431 static void     
1432 ixgbe_free_receive_ring(struct rx_ring *rxr)
1433
1434         struct ixgbe_rx_buf       *rxbuf;
1435
1436         for (int i = 0; i < rxr->num_desc; i++) {
1437                 rxbuf = &rxr->rx_buffers[i];
1438                 if (rxbuf->buf != NULL) {
1439                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1440                             BUS_DMASYNC_POSTREAD);
1441                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1442                         rxbuf->buf->m_flags |= M_PKTHDR;
1443                         m_freem(rxbuf->buf);
1444                         rxbuf->buf = NULL;
1445                         rxbuf->flags = 0;
1446                 }
1447         }
1448 }
1449
1450 /*********************************************************************
1451  *
1452  *  Initialize a receive ring and its buffers.
1453  *
1454  **********************************************************************/
1455 static int
1456 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1457 {
1458         struct  adapter         *adapter;
1459         struct ifnet            *ifp;
1460         device_t                dev;
1461         struct ixgbe_rx_buf     *rxbuf;
1462         bus_dma_segment_t       seg[1];
1463         struct lro_ctrl         *lro = &rxr->lro;
1464         int                     rsize, nsegs, error = 0;
1465 #ifdef DEV_NETMAP
1466         struct netmap_adapter *na = NA(rxr->adapter->ifp);
1467         struct netmap_slot *slot;
1468 #endif /* DEV_NETMAP */
1469
1470         adapter = rxr->adapter;
1471         ifp = adapter->ifp;
1472         dev = adapter->dev;
1473
1474         /* Clear the ring contents */
1475         IXGBE_RX_LOCK(rxr);
1476 #ifdef DEV_NETMAP
1477         /* same as in ixgbe_setup_transmit_ring() */
1478         slot = netmap_reset(na, NR_RX, rxr->me, 0);
1479 #endif /* DEV_NETMAP */
1480         rsize = roundup2(adapter->num_rx_desc *
1481             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1482         bzero((void *)rxr->rx_base, rsize);
1483         /* Cache the size */
1484         rxr->mbuf_sz = adapter->rx_mbuf_sz;
1485
1486         /* Free current RX buffer structs and their mbufs */
1487         ixgbe_free_receive_ring(rxr);
1488
1489         /* Now replenish the mbufs */
1490         for (int j = 0; j != rxr->num_desc; ++j) {
1491                 struct mbuf     *mp;
1492
1493                 rxbuf = &rxr->rx_buffers[j];
1494 #ifdef DEV_NETMAP
1495                 /*
1496                  * In netmap mode, fill the map and set the buffer
1497                  * address in the NIC ring, considering the offset
1498                  * between the netmap and NIC rings (see comment in
1499                  * ixgbe_setup_transmit_ring() ). No need to allocate
1500                  * an mbuf, so end the block with a continue;
1501                  */
1502                 if (slot) {
1503                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1504                         uint64_t paddr;
1505                         void *addr;
1506
1507                         addr = PNMB(na, slot + sj, &paddr);
1508                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1509                         /* Update descriptor and the cached value */
1510                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1511                         rxbuf->addr = htole64(paddr);
1512                         continue;
1513                 }
1514 #endif /* DEV_NETMAP */
1515                 rxbuf->flags = 0; 
1516                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1517                     M_PKTHDR, adapter->rx_mbuf_sz);
1518                 if (rxbuf->buf == NULL) {
1519                         error = ENOBUFS;
1520                         goto fail;
1521                 }
1522                 mp = rxbuf->buf;
1523                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1524                 /* Get the memory mapping */
1525                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1526                     rxbuf->pmap, mp, seg,
1527                     &nsegs, BUS_DMA_NOWAIT);
1528                 if (error != 0)
1529                         goto fail;
1530                 bus_dmamap_sync(rxr->ptag,
1531                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
1532                 /* Update the descriptor and the cached value */
1533                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1534                 rxbuf->addr = htole64(seg[0].ds_addr);
1535         }
1536
1537
1538         /* Setup our descriptor indices */
1539         rxr->next_to_check = 0;
1540         rxr->next_to_refresh = 0;
1541         rxr->lro_enabled = FALSE;
1542         rxr->rx_copies = 0;
1543         rxr->rx_bytes = 0;
1544         rxr->vtag_strip = FALSE;
1545
1546         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1547             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1548
1549         /*
1550         ** Now set up the LRO interface:
1551         */
1552         if (ixgbe_rsc_enable)
1553                 ixgbe_setup_hw_rsc(rxr);
1554         else if (ifp->if_capenable & IFCAP_LRO) {
1555                 int err = tcp_lro_init(lro);
1556                 if (err) {
1557                         device_printf(dev, "LRO Initialization failed!\n");
1558                         goto fail;
1559                 }
1560                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1561                 rxr->lro_enabled = TRUE;
1562                 lro->ifp = adapter->ifp;
1563         }
1564
1565         IXGBE_RX_UNLOCK(rxr);
1566         return (0);
1567
1568 fail:
1569         ixgbe_free_receive_ring(rxr);
1570         IXGBE_RX_UNLOCK(rxr);
1571         return (error);
1572 }
1573
1574 /*********************************************************************
1575  *
1576  *  Initialize all receive rings.
1577  *
1578  **********************************************************************/
1579 int
1580 ixgbe_setup_receive_structures(struct adapter *adapter)
1581 {
1582         struct rx_ring *rxr = adapter->rx_rings;
1583         int j;
1584
1585         for (j = 0; j < adapter->num_queues; j++, rxr++)
1586                 if (ixgbe_setup_receive_ring(rxr))
1587                         goto fail;
1588
1589         return (0);
1590 fail:
1591         /*
1592          * Free RX buffers allocated so far, we will only handle
1593          * the rings that completed, the failing case will have
1594          * cleaned up for itself. 'j' failed, so its the terminus.
1595          */
1596         for (int i = 0; i < j; ++i) {
1597                 rxr = &adapter->rx_rings[i];
1598                 ixgbe_free_receive_ring(rxr);
1599         }
1600
1601         return (ENOBUFS);
1602 }
1603
1604
1605 /*********************************************************************
1606  *
1607  *  Free all receive rings.
1608  *
1609  **********************************************************************/
1610 void
1611 ixgbe_free_receive_structures(struct adapter *adapter)
1612 {
1613         struct rx_ring *rxr = adapter->rx_rings;
1614
1615         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1616
1617         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1618                 struct lro_ctrl         *lro = &rxr->lro;
1619                 ixgbe_free_receive_buffers(rxr);
1620                 /* Free LRO memory */
1621                 tcp_lro_free(lro);
1622                 /* Free the ring memory as well */
1623                 ixgbe_dma_free(adapter, &rxr->rxdma);
1624         }
1625
1626         free(adapter->rx_rings, M_DEVBUF);
1627 }
1628
1629
1630 /*********************************************************************
1631  *
1632  *  Free receive ring data structures
1633  *
1634  **********************************************************************/
1635 void
1636 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1637 {
1638         struct adapter          *adapter = rxr->adapter;
1639         struct ixgbe_rx_buf     *rxbuf;
1640
1641         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1642
1643         /* Cleanup any existing buffers */
1644         if (rxr->rx_buffers != NULL) {
1645                 for (int i = 0; i < adapter->num_rx_desc; i++) {
1646                         rxbuf = &rxr->rx_buffers[i];
1647                         if (rxbuf->buf != NULL) {
1648                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1649                                     BUS_DMASYNC_POSTREAD);
1650                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1651                                 rxbuf->buf->m_flags |= M_PKTHDR;
1652                                 m_freem(rxbuf->buf);
1653                         }
1654                         rxbuf->buf = NULL;
1655                         if (rxbuf->pmap != NULL) {
1656                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1657                                 rxbuf->pmap = NULL;
1658                         }
1659                 }
1660                 if (rxr->rx_buffers != NULL) {
1661                         free(rxr->rx_buffers, M_DEVBUF);
1662                         rxr->rx_buffers = NULL;
1663                 }
1664         }
1665
1666         if (rxr->ptag != NULL) {
1667                 bus_dma_tag_destroy(rxr->ptag);
1668                 rxr->ptag = NULL;
1669         }
1670
1671         return;
1672 }
1673
1674 static __inline void
1675 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1676 {
1677                  
1678         /*
1679          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1680          * should be computed by hardware. Also it should not have VLAN tag in
1681          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1682          */
1683         if (rxr->lro_enabled &&
1684             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1685             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1686             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1687             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1688             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1689             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1690             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1691             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1692                 /*
1693                  * Send to the stack if:
1694                  **  - LRO not enabled, or
1695                  **  - no LRO resources, or
1696                  **  - lro enqueue fails
1697                  */
1698                 if (rxr->lro.lro_cnt != 0)
1699                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1700                                 return;
1701         }
1702         IXGBE_RX_UNLOCK(rxr);
1703         (*ifp->if_input)(ifp, m);
1704         IXGBE_RX_LOCK(rxr);
1705 }
1706
1707 static __inline void
1708 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1709 {
1710         struct ixgbe_rx_buf     *rbuf;
1711
1712         rbuf = &rxr->rx_buffers[i];
1713
1714
1715         /*
1716         ** With advanced descriptors the writeback
1717         ** clobbers the buffer addrs, so its easier
1718         ** to just free the existing mbufs and take
1719         ** the normal refresh path to get new buffers
1720         ** and mapping.
1721         */
1722
1723         if (rbuf->fmp != NULL) {/* Partial chain ? */
1724                 rbuf->fmp->m_flags |= M_PKTHDR;
1725                 m_freem(rbuf->fmp);
1726                 rbuf->fmp = NULL;
1727                 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1728         } else if (rbuf->buf) {
1729                 m_free(rbuf->buf);
1730                 rbuf->buf = NULL;
1731         }
1732         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1733
1734         rbuf->flags = 0;
1735  
1736         return;
1737 }
1738
1739
1740 /*********************************************************************
1741  *
1742  *  This routine executes in interrupt context. It replenishes
1743  *  the mbufs in the descriptor and sends data which has been
1744  *  dma'ed into host memory to upper layer.
1745  *
1746  *  Return TRUE for more work, FALSE for all clean.
1747  *********************************************************************/
1748 bool
1749 ixgbe_rxeof(struct ix_queue *que)
1750 {
1751         struct adapter          *adapter = que->adapter;
1752         struct rx_ring          *rxr = que->rxr;
1753         struct ifnet            *ifp = adapter->ifp;
1754         struct lro_ctrl         *lro = &rxr->lro;
1755         struct lro_entry        *queued;
1756         int                     i, nextp, processed = 0;
1757         u32                     staterr = 0;
1758         u32                     count = adapter->rx_process_limit;
1759         union ixgbe_adv_rx_desc *cur;
1760         struct ixgbe_rx_buf     *rbuf, *nbuf;
1761         u16                     pkt_info;
1762
1763         IXGBE_RX_LOCK(rxr);
1764
1765 #ifdef DEV_NETMAP
1766         /* Same as the txeof routine: wakeup clients on intr. */
1767         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1768                 IXGBE_RX_UNLOCK(rxr);
1769                 return (FALSE);
1770         }
1771 #endif /* DEV_NETMAP */
1772
1773         for (i = rxr->next_to_check; count != 0;) {
1774                 struct mbuf     *sendmp, *mp;
1775                 u32             rsc, ptype;
1776                 u16             len;
1777                 u16             vtag = 0;
1778                 bool            eop;
1779  
1780                 /* Sync the ring. */
1781                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1782                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1783
1784                 cur = &rxr->rx_base[i];
1785                 staterr = le32toh(cur->wb.upper.status_error);
1786                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1787
1788                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1789                         break;
1790                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1791                         break;
1792
1793                 count--;
1794                 sendmp = NULL;
1795                 nbuf = NULL;
1796                 rsc = 0;
1797                 cur->wb.upper.status_error = 0;
1798                 rbuf = &rxr->rx_buffers[i];
1799                 mp = rbuf->buf;
1800
1801                 len = le16toh(cur->wb.upper.length);
1802                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1803                     IXGBE_RXDADV_PKTTYPE_MASK;
1804                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1805
1806                 /* Make sure bad packets are discarded */
1807                 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1808 #if __FreeBSD_version >= 1100036
1809                         if (IXGBE_IS_VF(adapter))
1810                                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1811 #endif
1812                         rxr->rx_discarded++;
1813                         ixgbe_rx_discard(rxr, i);
1814                         goto next_desc;
1815                 }
1816
1817                 /*
1818                 ** On 82599 which supports a hardware
1819                 ** LRO (called HW RSC), packets need
1820                 ** not be fragmented across sequential
1821                 ** descriptors, rather the next descriptor
1822                 ** is indicated in bits of the descriptor.
1823                 ** This also means that we might proceses
1824                 ** more than one packet at a time, something
1825                 ** that has never been true before, it
1826                 ** required eliminating global chain pointers
1827                 ** in favor of what we are doing here.  -jfv
1828                 */
1829                 if (!eop) {
1830                         /*
1831                         ** Figure out the next descriptor
1832                         ** of this frame.
1833                         */
1834                         if (rxr->hw_rsc == TRUE) {
1835                                 rsc = ixgbe_rsc_count(cur);
1836                                 rxr->rsc_num += (rsc - 1);
1837                         }
1838                         if (rsc) { /* Get hardware index */
1839                                 nextp = ((staterr &
1840                                     IXGBE_RXDADV_NEXTP_MASK) >>
1841                                     IXGBE_RXDADV_NEXTP_SHIFT);
1842                         } else { /* Just sequential */
1843                                 nextp = i + 1;
1844                                 if (nextp == adapter->num_rx_desc)
1845                                         nextp = 0;
1846                         }
1847                         nbuf = &rxr->rx_buffers[nextp];
1848                         prefetch(nbuf);
1849                 }
1850                 /*
1851                 ** Rather than using the fmp/lmp global pointers
1852                 ** we now keep the head of a packet chain in the
1853                 ** buffer struct and pass this along from one
1854                 ** descriptor to the next, until we get EOP.
1855                 */
1856                 mp->m_len = len;
1857                 /*
1858                 ** See if there is a stored head
1859                 ** that determines what we are
1860                 */
1861                 sendmp = rbuf->fmp;
1862                 if (sendmp != NULL) {  /* secondary frag */
1863                         rbuf->buf = rbuf->fmp = NULL;
1864                         mp->m_flags &= ~M_PKTHDR;
1865                         sendmp->m_pkthdr.len += mp->m_len;
1866                 } else {
1867                         /*
1868                          * Optimize.  This might be a small packet,
1869                          * maybe just a TCP ACK.  Do a fast copy that
1870                          * is cache aligned into a new mbuf, and
1871                          * leave the old mbuf+cluster for re-use.
1872                          */
1873                         if (eop && len <= IXGBE_RX_COPY_LEN) {
1874                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1875                                 if (sendmp != NULL) {
1876                                         sendmp->m_data +=
1877                                             IXGBE_RX_COPY_ALIGN;
1878                                         ixgbe_bcopy(mp->m_data,
1879                                             sendmp->m_data, len);
1880                                         sendmp->m_len = len;
1881                                         rxr->rx_copies++;
1882                                         rbuf->flags |= IXGBE_RX_COPY;
1883                                 }
1884                         }
1885                         if (sendmp == NULL) {
1886                                 rbuf->buf = rbuf->fmp = NULL;
1887                                 sendmp = mp;
1888                         }
1889
1890                         /* first desc of a non-ps chain */
1891                         sendmp->m_flags |= M_PKTHDR;
1892                         sendmp->m_pkthdr.len = mp->m_len;
1893                 }
1894                 ++processed;
1895
1896                 /* Pass the head pointer on */
1897                 if (eop == 0) {
1898                         nbuf->fmp = sendmp;
1899                         sendmp = NULL;
1900                         mp->m_next = nbuf->buf;
1901                 } else { /* Sending this frame */
1902                         sendmp->m_pkthdr.rcvif = ifp;
1903                         rxr->rx_packets++;
1904                         /* capture data for AIM */
1905                         rxr->bytes += sendmp->m_pkthdr.len;
1906                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1907                         /* Process vlan info */
1908                         if ((rxr->vtag_strip) &&
1909                             (staterr & IXGBE_RXD_STAT_VP))
1910                                 vtag = le16toh(cur->wb.upper.vlan);
1911                         if (vtag) {
1912                                 sendmp->m_pkthdr.ether_vtag = vtag;
1913                                 sendmp->m_flags |= M_VLANTAG;
1914                         }
1915                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1916                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
1917
1918                         /*
1919                          * In case of multiqueue, we have RXCSUM.PCSD bit set
1920                          * and never cleared. This means we have RSS hash
1921                          * available to be used.   
1922                          */
1923                         if (adapter->num_queues > 1) {
1924                                 sendmp->m_pkthdr.flowid =
1925                                     le32toh(cur->wb.lower.hi_dword.rss);
1926                                 /*
1927                                  * Full RSS support is not avilable in
1928                                  * FreeBSD 10 so setting the hash type to
1929                                  * OPAQUE.
1930                                  */
1931                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1932                         } else {
1933                                 sendmp->m_pkthdr.flowid = que->msix;
1934                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1935                         }
1936                 }
1937 next_desc:
1938                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1939                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1940
1941                 /* Advance our pointers to the next descriptor. */
1942                 if (++i == rxr->num_desc)
1943                         i = 0;
1944
1945                 /* Now send to the stack or do LRO */
1946                 if (sendmp != NULL) {
1947                         rxr->next_to_check = i;
1948                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1949                         i = rxr->next_to_check;
1950                 }
1951
1952                /* Every 8 descriptors we go to refresh mbufs */
1953                 if (processed == 8) {
1954                         ixgbe_refresh_mbufs(rxr, i);
1955                         processed = 0;
1956                 }
1957         }
1958
1959         /* Refresh any remaining buf structs */
1960         if (ixgbe_rx_unrefreshed(rxr))
1961                 ixgbe_refresh_mbufs(rxr, i);
1962
1963         rxr->next_to_check = i;
1964
1965         /*
1966          * Flush any outstanding LRO work
1967          */
1968         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1969                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1970                 tcp_lro_flush(lro, queued);
1971         }
1972
1973         IXGBE_RX_UNLOCK(rxr);
1974
1975         /*
1976         ** Still have cleaning to do?
1977         */
1978         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1979                 return (TRUE);
1980         else
1981                 return (FALSE);
1982 }
1983
1984
1985 /*********************************************************************
1986  *
1987  *  Verify that the hardware indicated that the checksum is valid.
1988  *  Inform the stack about the status of checksum so that stack
1989  *  doesn't spend time verifying the checksum.
1990  *
1991  *********************************************************************/
1992 static void
1993 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1994 {
1995         u16     status = (u16) staterr;
1996         u8      errors = (u8) (staterr >> 24);
1997         bool    sctp = false;
1998
1999         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2000             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2001                 sctp = true;
2002
2003         /* IPv4 checksum */
2004         if (status & IXGBE_RXD_STAT_IPCS) {
2005                 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
2006                 /* IP Checksum Good */
2007                 if (!(errors & IXGBE_RXD_ERR_IPE))
2008                         mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
2009         }
2010         /* TCP/UDP/SCTP checksum */
2011         if (status & IXGBE_RXD_STAT_L4CS) {
2012                 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
2013                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2014                         mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
2015                         if (!sctp)
2016                                 mp->m_pkthdr.csum_data = htons(0xffff);
2017                 }
2018         }
2019 }
2020
2021 /********************************************************************
2022  * Manage DMA'able memory.
2023  *******************************************************************/
2024 static void
2025 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2026 {
2027         if (error)
2028                 return;
2029         *(bus_addr_t *) arg = segs->ds_addr;
2030         return;
2031 }
2032
2033 int
2034 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2035                 struct ixgbe_dma_alloc *dma, int mapflags)
2036 {
2037         device_t dev = adapter->dev;
2038         int             r;
2039
2040         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2041                                DBA_ALIGN, 0,    /* alignment, bounds */
2042                                BUS_SPACE_MAXADDR,       /* lowaddr */
2043                                BUS_SPACE_MAXADDR,       /* highaddr */
2044                                NULL, NULL,      /* filter, filterarg */
2045                                size,    /* maxsize */
2046                                1,       /* nsegments */
2047                                size,    /* maxsegsize */
2048                                BUS_DMA_ALLOCNOW,        /* flags */
2049                                NULL,    /* lockfunc */
2050                                NULL,    /* lockfuncarg */
2051                                &dma->dma_tag);
2052         if (r != 0) {
2053                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2054                        "error %u\n", r);
2055                 goto fail_0;
2056         }
2057         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2058                              BUS_DMA_NOWAIT, &dma->dma_map);
2059         if (r != 0) {
2060                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2061                        "error %u\n", r);
2062                 goto fail_1;
2063         }
2064         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2065                             size,
2066                             ixgbe_dmamap_cb,
2067                             &dma->dma_paddr,
2068                             mapflags | BUS_DMA_NOWAIT);
2069         if (r != 0) {
2070                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2071                        "error %u\n", r);
2072                 goto fail_2;
2073         }
2074         dma->dma_size = size;
2075         return (0);
2076 fail_2:
2077         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2078 fail_1:
2079         bus_dma_tag_destroy(dma->dma_tag);
2080 fail_0:
2081         dma->dma_tag = NULL;
2082         return (r);
2083 }
2084
2085 void
2086 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2087 {
2088         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2089             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2090         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2091         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2092         bus_dma_tag_destroy(dma->dma_tag);
2093 }
2094
2095
2096 /*********************************************************************
2097  *
2098  *  Allocate memory for the transmit and receive rings, and then
2099  *  the descriptors associated with each, called only once at attach.
2100  *
2101  **********************************************************************/
2102 int
2103 ixgbe_allocate_queues(struct adapter *adapter)
2104 {
2105         device_t        dev = adapter->dev;
2106         struct ix_queue *que;
2107         struct tx_ring  *txr;
2108         struct rx_ring  *rxr;
2109         int rsize, tsize, error = IXGBE_SUCCESS;
2110         int txconf = 0, rxconf = 0;
2111 #ifdef PCI_IOV
2112         enum ixgbe_iov_mode iov_mode;
2113 #endif
2114
2115         /* First allocate the top level queue structs */
2116         if (!(adapter->queues =
2117             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2118             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2119                 device_printf(dev, "Unable to allocate queue memory\n");
2120                 error = ENOMEM;
2121                 goto fail;
2122         }
2123
2124         /* First allocate the TX ring struct memory */
2125         if (!(adapter->tx_rings =
2126             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2127             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2128                 device_printf(dev, "Unable to allocate TX ring memory\n");
2129                 error = ENOMEM;
2130                 goto tx_fail;
2131         }
2132
2133         /* Next allocate the RX */
2134         if (!(adapter->rx_rings =
2135             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2136             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2137                 device_printf(dev, "Unable to allocate RX ring memory\n");
2138                 error = ENOMEM;
2139                 goto rx_fail;
2140         }
2141
2142         /* For the ring itself */
2143         tsize = roundup2(adapter->num_tx_desc *
2144             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2145
2146 #ifdef PCI_IOV
2147         iov_mode = ixgbe_get_iov_mode(adapter);
2148         adapter->pool = ixgbe_max_vfs(iov_mode);
2149 #else
2150         adapter->pool = 0;
2151 #endif
2152         /*
2153          * Now set up the TX queues, txconf is needed to handle the
2154          * possibility that things fail midcourse and we need to
2155          * undo memory gracefully
2156          */ 
2157         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2158                 /* Set up some basics */
2159                 txr = &adapter->tx_rings[i];
2160                 txr->adapter = adapter;
2161 #ifdef PCI_IOV
2162                 txr->me = ixgbe_pf_que_index(iov_mode, i);
2163 #else
2164                 txr->me = i;
2165 #endif
2166                 txr->num_desc = adapter->num_tx_desc;
2167
2168                 /* Initialize the TX side lock */
2169                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2170                     device_get_nameunit(dev), txr->me);
2171                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2172
2173                 if (ixgbe_dma_malloc(adapter, tsize,
2174                         &txr->txdma, BUS_DMA_NOWAIT)) {
2175                         device_printf(dev,
2176                             "Unable to allocate TX Descriptor memory\n");
2177                         error = ENOMEM;
2178                         goto err_tx_desc;
2179                 }
2180                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2181                 bzero((void *)txr->tx_base, tsize);
2182
2183                 /* Now allocate transmit buffers for the ring */
2184                 if (ixgbe_allocate_transmit_buffers(txr)) {
2185                         device_printf(dev,
2186                             "Critical Failure setting up transmit buffers\n");
2187                         error = ENOMEM;
2188                         goto err_tx_desc;
2189                 }
2190 #ifndef IXGBE_LEGACY_TX
2191                 /* Allocate a buf ring */
2192                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2193                     M_WAITOK, &txr->tx_mtx);
2194                 if (txr->br == NULL) {
2195                         device_printf(dev,
2196                             "Critical Failure setting up buf ring\n");
2197                         error = ENOMEM;
2198                         goto err_tx_desc;
2199                 }
2200 #endif
2201         }
2202
2203         /*
2204          * Next the RX queues...
2205          */ 
2206         rsize = roundup2(adapter->num_rx_desc *
2207             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2208         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2209                 rxr = &adapter->rx_rings[i];
2210                 /* Set up some basics */
2211                 rxr->adapter = adapter;
2212 #ifdef PCI_IOV
2213                 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2214 #else
2215                 rxr->me = i;
2216 #endif
2217                 rxr->num_desc = adapter->num_rx_desc;
2218
2219                 /* Initialize the RX side lock */
2220                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2221                     device_get_nameunit(dev), rxr->me);
2222                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2223
2224                 if (ixgbe_dma_malloc(adapter, rsize,
2225                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2226                         device_printf(dev,
2227                             "Unable to allocate RxDescriptor memory\n");
2228                         error = ENOMEM;
2229                         goto err_rx_desc;
2230                 }
2231                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2232                 bzero((void *)rxr->rx_base, rsize);
2233
2234                 /* Allocate receive buffers for the ring*/
2235                 if (ixgbe_allocate_receive_buffers(rxr)) {
2236                         device_printf(dev,
2237                             "Critical Failure setting up receive buffers\n");
2238                         error = ENOMEM;
2239                         goto err_rx_desc;
2240                 }
2241         }
2242
2243         /*
2244         ** Finally set up the queue holding structs
2245         */
2246         for (int i = 0; i < adapter->num_queues; i++) {
2247                 que = &adapter->queues[i];
2248                 que->adapter = adapter;
2249                 que->me = i;
2250                 que->txr = &adapter->tx_rings[i];
2251                 que->rxr = &adapter->rx_rings[i];
2252         }
2253
2254         return (0);
2255
2256 err_rx_desc:
2257         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2258                 ixgbe_dma_free(adapter, &rxr->rxdma);
2259 err_tx_desc:
2260         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2261                 ixgbe_dma_free(adapter, &txr->txdma);
2262         free(adapter->rx_rings, M_DEVBUF);
2263 rx_fail:
2264         free(adapter->tx_rings, M_DEVBUF);
2265 tx_fail:
2266         free(adapter->queues, M_DEVBUF);
2267 fail:
2268         return (error);
2269 }