]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - sys/dev/ixgbe/ix_txrx.c
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / sys / dev / ixgbe / ix_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include "ixgbe.h"
42
43 #ifdef DEV_NETMAP
44 #include <net/netmap.h>
45 #include <sys/selinfo.h>
46 #include <dev/netmap/netmap_kern.h>
47
48 extern int ix_crcstrip;
49 #endif
50
51 /*
52 ** HW RSC control:
53 **  this feature only works with
54 **  IPv4, and only on 82599 and later.
55 **  Also this will cause IP forwarding to
56 **  fail and that can't be controlled by
57 **  the stack as LRO can. For all these
58 **  reasons I've deemed it best to leave
59 **  this off and not bother with a tuneable
60 **  interface, this would need to be compiled
61 **  to enable.
62 */
63 static bool ixgbe_rsc_enable = FALSE;
64
65 #ifdef IXGBE_FDIR
66 /*
67 ** For Flow Director: this is the
68 ** number of TX packets we sample
69 ** for the filter pool, this means
70 ** every 20th packet will be probed.
71 **
72 ** This feature can be disabled by
73 ** setting this to 0.
74 */
75 static int atr_sample_rate = 20;
76 #endif
77
78 /* Shared PCI config read/write */
79 inline u16
80 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
81 {
82         u16 value;
83
84         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
85             reg, 2);
86
87         return (value);
88 }
89
90 inline void
91 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
92 {
93         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
94             reg, value, 2);
95
96         return;
97 }
98
99 /*********************************************************************
100  *  Local Function prototypes
101  *********************************************************************/
102 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
103 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
104 static int      ixgbe_setup_receive_ring(struct rx_ring *);
105 static void     ixgbe_free_receive_buffers(struct rx_ring *);
106
107 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
108 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
109 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
110 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
111                     struct mbuf *, u32 *, u32 *);
112 static int      ixgbe_tso_setup(struct tx_ring *,
113                     struct mbuf *, u32 *, u32 *);
114 #ifdef IXGBE_FDIR
115 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
116 #endif
117 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
118 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
119                     struct mbuf *, u32);
120
121 #ifdef IXGBE_LEGACY_TX
122 /*********************************************************************
123  *  Transmit entry point
124  *
125  *  ixgbe_start is called by the stack to initiate a transmit.
126  *  The driver will remain in this routine as long as there are
127  *  packets to transmit and transmit resources are available.
128  *  In case resources are not available stack is notified and
129  *  the packet is requeued.
130  **********************************************************************/
131
132 void
133 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
134 {
135         struct mbuf    *m_head;
136         struct adapter *adapter = txr->adapter;
137
138         IXGBE_TX_LOCK_ASSERT(txr);
139
140         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
141                 return;
142         if (!adapter->link_active)
143                 return;
144
145         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
146                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
147                         break;
148
149                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
150                 if (m_head == NULL)
151                         break;
152
153                 if (ixgbe_xmit(txr, &m_head)) {
154                         if (m_head != NULL)
155                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
156                         break;
157                 }
158                 /* Send a copy of the frame to the BPF listener */
159                 ETHER_BPF_MTAP(ifp, m_head);
160         }
161         return;
162 }
163
164 /*
165  * Legacy TX start - called by the stack, this
166  * always uses the first tx ring, and should
167  * not be used with multiqueue tx enabled.
168  */
169 void
170 ixgbe_start(struct ifnet *ifp)
171 {
172         struct adapter *adapter = ifp->if_softc;
173         struct tx_ring  *txr = adapter->tx_rings;
174
175         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
176                 IXGBE_TX_LOCK(txr);
177                 ixgbe_start_locked(txr, ifp);
178                 IXGBE_TX_UNLOCK(txr);
179         }
180         return;
181 }
182
183 #else /* ! IXGBE_LEGACY_TX */
184
185 /*
186 ** Multiqueue Transmit driver
187 **
188 */
189 int
190 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
191 {
192         struct adapter  *adapter = ifp->if_softc;
193         struct ix_queue *que;
194         struct tx_ring  *txr;
195         int             i, err = 0;
196
197         /*
198          * When doing RSS, map it to the same outbound queue
199          * as the incoming flow would be mapped to.
200          *
201          * If everything is setup correctly, it should be the
202          * same bucket that the current CPU we're on is.
203          */
204         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
205                 i = m->m_pkthdr.flowid % adapter->num_queues;
206         else
207                 i = curcpu % adapter->num_queues;
208
209         /* Check for a hung queue and pick alternative */
210         if (((1 << i) & adapter->active_queues) == 0)
211                 i = ffsl(adapter->active_queues);
212
213         txr = &adapter->tx_rings[i];
214         que = &adapter->queues[i];
215
216         err = drbr_enqueue(ifp, txr->br, m);
217         if (err)
218                 return (err);
219         if (IXGBE_TX_TRYLOCK(txr)) {
220                 ixgbe_mq_start_locked(ifp, txr);
221                 IXGBE_TX_UNLOCK(txr);
222         } else
223                 taskqueue_enqueue(que->tq, &txr->txq_task);
224
225         return (0);
226 }
227
228 int
229 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
230 {
231         struct adapter  *adapter = txr->adapter;
232         struct mbuf     *next;
233         int             enqueued = 0, err = 0;
234
235         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
236             adapter->link_active == 0)
237                 return (ENETDOWN);
238
239         /* Process the queue */
240 #if __FreeBSD_version < 901504
241         next = drbr_dequeue(ifp, txr->br);
242         while (next != NULL) {
243                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
244                         if (next != NULL)
245                                 err = drbr_enqueue(ifp, txr->br, next);
246 #else
247         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
248                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
249                         if (next == NULL) {
250                                 drbr_advance(ifp, txr->br);
251                         } else {
252                                 drbr_putback(ifp, txr->br, next);
253                         }
254 #endif
255                         break;
256                 }
257 #if __FreeBSD_version >= 901504
258                 drbr_advance(ifp, txr->br);
259 #endif
260                 enqueued++;
261 #if 0 // this is VF-only
262 #if __FreeBSD_version >= 1100036
263                 /*
264                  * Since we're looking at the tx ring, we can check
265                  * to see if we're a VF by examing our tail register
266                  * address.
267                  */
268                 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
269                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
270 #endif
271 #endif
272                 /* Send a copy of the frame to the BPF listener */
273                 ETHER_BPF_MTAP(ifp, next);
274                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
275                         break;
276 #if __FreeBSD_version < 901504
277                 next = drbr_dequeue(ifp, txr->br);
278 #endif
279         }
280
281         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
282                 ixgbe_txeof(txr);
283
284         return (err);
285 }
286
287 /*
288  * Called from a taskqueue to drain queued transmit packets.
289  */
290 void
291 ixgbe_deferred_mq_start(void *arg, int pending)
292 {
293         struct tx_ring *txr = arg;
294         struct adapter *adapter = txr->adapter;
295         struct ifnet *ifp = adapter->ifp;
296
297         IXGBE_TX_LOCK(txr);
298         if (!drbr_empty(ifp, txr->br))
299                 ixgbe_mq_start_locked(ifp, txr);
300         IXGBE_TX_UNLOCK(txr);
301 }
302
303 /*
304  * Flush all ring buffers
305  */
306 void
307 ixgbe_qflush(struct ifnet *ifp)
308 {
309         struct adapter  *adapter = ifp->if_softc;
310         struct tx_ring  *txr = adapter->tx_rings;
311         struct mbuf     *m;
312
313         for (int i = 0; i < adapter->num_queues; i++, txr++) {
314                 IXGBE_TX_LOCK(txr);
315                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
316                         m_freem(m);
317                 IXGBE_TX_UNLOCK(txr);
318         }
319         if_qflush(ifp);
320 }
321 #endif /* IXGBE_LEGACY_TX */
322
323
324 /*********************************************************************
325  *
326  *  This routine maps the mbufs to tx descriptors, allowing the
327  *  TX engine to transmit the packets. 
328  *      - return 0 on success, positive on failure
329  *
330  **********************************************************************/
331
332 static int
333 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
334 {
335         struct adapter  *adapter = txr->adapter;
336         u32             olinfo_status = 0, cmd_type_len;
337         int             i, j, error, nsegs;
338         int             first;
339         bool            remap = TRUE;
340         struct mbuf     *m_head;
341         bus_dma_segment_t segs[adapter->num_segs];
342         bus_dmamap_t    map;
343         struct ixgbe_tx_buf *txbuf;
344         union ixgbe_adv_tx_desc *txd = NULL;
345
346         m_head = *m_headp;
347
348         /* Basic descriptor defines */
349         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
350             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
351
352         if (m_head->m_flags & M_VLANTAG)
353                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
354
355         /*
356          * Important to capture the first descriptor
357          * used because it will contain the index of
358          * the one we tell the hardware to report back
359          */
360         first = txr->next_avail_desc;
361         txbuf = &txr->tx_buffers[first];
362         map = txbuf->map;
363
364         /*
365          * Map the packet for DMA.
366          */
367 retry:
368         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
369             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
370
371         if (__predict_false(error)) {
372                 struct mbuf *m;
373
374                 switch (error) {
375                 case EFBIG:
376                         /* Try it again? - one try */
377                         if (remap == TRUE) {
378                                 remap = FALSE;
379                                 /*
380                                  * XXX: m_defrag will choke on
381                                  * non-MCLBYTES-sized clusters
382                                  */
383                                 m = m_defrag(*m_headp, M_NOWAIT);
384                                 if (m == NULL) {
385                                         adapter->mbuf_defrag_failed++;
386                                         m_freem(*m_headp);
387                                         *m_headp = NULL;
388                                         return (ENOBUFS);
389                                 }
390                                 *m_headp = m;
391                                 goto retry;
392                         } else
393                                 return (error);
394                 case ENOMEM:
395                         txr->no_tx_dma_setup++;
396                         return (error);
397                 default:
398                         txr->no_tx_dma_setup++;
399                         m_freem(*m_headp);
400                         *m_headp = NULL;
401                         return (error);
402                 }
403         }
404
405         /* Make certain there are enough descriptors */
406         if (nsegs > txr->tx_avail - 2) {
407                 txr->no_desc_avail++;
408                 bus_dmamap_unload(txr->txtag, map);
409                 return (ENOBUFS);
410         }
411         m_head = *m_headp;
412
413         /*
414          * Set up the appropriate offload context
415          * this will consume the first descriptor
416          */
417         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
418         if (__predict_false(error)) {
419                 if (error == ENOBUFS)
420                         *m_headp = NULL;
421                 return (error);
422         }
423
424 #ifdef IXGBE_FDIR
425         /* Do the flow director magic */
426         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
427                 ++txr->atr_count;
428                 if (txr->atr_count >= atr_sample_rate) {
429                         ixgbe_atr(txr, m_head);
430                         txr->atr_count = 0;
431                 }
432         }
433 #endif
434
435         i = txr->next_avail_desc;
436         for (j = 0; j < nsegs; j++) {
437                 bus_size_t seglen;
438                 bus_addr_t segaddr;
439
440                 txbuf = &txr->tx_buffers[i];
441                 txd = &txr->tx_base[i];
442                 seglen = segs[j].ds_len;
443                 segaddr = htole64(segs[j].ds_addr);
444
445                 txd->read.buffer_addr = segaddr;
446                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
447                     cmd_type_len |seglen);
448                 txd->read.olinfo_status = htole32(olinfo_status);
449
450                 if (++i == txr->num_desc)
451                         i = 0;
452         }
453
454         txd->read.cmd_type_len |=
455             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456         txr->tx_avail -= nsegs;
457         txr->next_avail_desc = i;
458
459         txbuf->m_head = m_head;
460         /*
461          * Here we swap the map so the last descriptor,
462          * which gets the completion interrupt has the
463          * real map, and the first descriptor gets the
464          * unused map from this descriptor.
465          */
466         txr->tx_buffers[first].map = txbuf->map;
467         txbuf->map = map;
468         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
469
470         /* Set the EOP descriptor that will be marked done */
471         txbuf = &txr->tx_buffers[first];
472         txbuf->eop = txd;
473
474         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
475             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
476         /*
477          * Advance the Transmit Descriptor Tail (Tdt), this tells the
478          * hardware that this frame is available to transmit.
479          */
480         ++txr->total_packets;
481         IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
482
483         /* Mark queue as having work */
484         if (txr->busy == 0)
485                 txr->busy = 1;
486
487         return (0);
488 }
489
490
491 /*********************************************************************
492  *
493  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
494  *  the information needed to transmit a packet on the wire. This is
495  *  called only once at attach, setup is done every reset.
496  *
497  **********************************************************************/
498 int
499 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
500 {
501         struct adapter *adapter = txr->adapter;
502         device_t dev = adapter->dev;
503         struct ixgbe_tx_buf *txbuf;
504         int error, i;
505
506         /*
507          * Setup DMA descriptor areas.
508          */
509         if ((error = bus_dma_tag_create(
510                                bus_get_dma_tag(adapter->dev),   /* parent */
511                                1, 0,            /* alignment, bounds */
512                                BUS_SPACE_MAXADDR,       /* lowaddr */
513                                BUS_SPACE_MAXADDR,       /* highaddr */
514                                NULL, NULL,              /* filter, filterarg */
515                                IXGBE_TSO_SIZE,          /* maxsize */
516                                adapter->num_segs,       /* nsegments */
517                                PAGE_SIZE,               /* maxsegsize */
518                                0,                       /* flags */
519                                NULL,                    /* lockfunc */
520                                NULL,                    /* lockfuncarg */
521                                &txr->txtag))) {
522                 device_printf(dev,"Unable to allocate TX DMA tag\n");
523                 goto fail;
524         }
525
526         if (!(txr->tx_buffers =
527             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
530                 error = ENOMEM;
531                 goto fail;
532         }
533
534         /* Create the descriptor buffer dma maps */
535         txbuf = txr->tx_buffers;
536         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
538                 if (error != 0) {
539                         device_printf(dev, "Unable to create TX DMA map\n");
540                         goto fail;
541                 }
542         }
543
544         return 0;
545 fail:
546         /* We free all, it handles case where we are in the middle */
547         ixgbe_free_transmit_structures(adapter);
548         return (error);
549 }
550
551 /*********************************************************************
552  *
553  *  Initialize a transmit ring.
554  *
555  **********************************************************************/
556 static void
557 ixgbe_setup_transmit_ring(struct tx_ring *txr)
558 {
559         struct adapter *adapter = txr->adapter;
560         struct ixgbe_tx_buf *txbuf;
561         int i;
562 #ifdef DEV_NETMAP
563         struct netmap_adapter *na = NA(adapter->ifp);
564         struct netmap_slot *slot;
565 #endif /* DEV_NETMAP */
566
567         /* Clear the old ring contents */
568         IXGBE_TX_LOCK(txr);
569 #ifdef DEV_NETMAP
570         /*
571          * (under lock): if in netmap mode, do some consistency
572          * checks and set slot to entry 0 of the netmap ring.
573          */
574         slot = netmap_reset(na, NR_TX, txr->me, 0);
575 #endif /* DEV_NETMAP */
576         bzero((void *)txr->tx_base,
577               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
578         /* Reset indices */
579         txr->next_avail_desc = 0;
580         txr->next_to_clean = 0;
581
582         /* Free any existing tx buffers. */
583         txbuf = txr->tx_buffers;
584         for (i = 0; i < txr->num_desc; i++, txbuf++) {
585                 if (txbuf->m_head != NULL) {
586                         bus_dmamap_sync(txr->txtag, txbuf->map,
587                             BUS_DMASYNC_POSTWRITE);
588                         bus_dmamap_unload(txr->txtag, txbuf->map);
589                         m_freem(txbuf->m_head);
590                         txbuf->m_head = NULL;
591                 }
592 #ifdef DEV_NETMAP
593                 /*
594                  * In netmap mode, set the map for the packet buffer.
595                  * NOTE: Some drivers (not this one) also need to set
596                  * the physical buffer address in the NIC ring.
597                  * Slots in the netmap ring (indexed by "si") are
598                  * kring->nkr_hwofs positions "ahead" wrt the
599                  * corresponding slot in the NIC ring. In some drivers
600                  * (not here) nkr_hwofs can be negative. Function
601                  * netmap_idx_n2k() handles wraparounds properly.
602                  */
603                 if (slot) {
604                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
605                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
606                 }
607 #endif /* DEV_NETMAP */
608                 /* Clear the EOP descriptor pointer */
609                 txbuf->eop = NULL;
610         }
611
612 #ifdef IXGBE_FDIR
613         /* Set the rate at which we sample packets */
614         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615                 txr->atr_sample = atr_sample_rate;
616 #endif
617
618         /* Set number of descriptors available */
619         txr->tx_avail = adapter->num_tx_desc;
620
621         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623         IXGBE_TX_UNLOCK(txr);
624 }
625
626 /*********************************************************************
627  *
628  *  Initialize all transmit rings.
629  *
630  **********************************************************************/
631 int
632 ixgbe_setup_transmit_structures(struct adapter *adapter)
633 {
634         struct tx_ring *txr = adapter->tx_rings;
635
636         for (int i = 0; i < adapter->num_queues; i++, txr++)
637                 ixgbe_setup_transmit_ring(txr);
638
639         return (0);
640 }
641
642 /*********************************************************************
643  *
644  *  Free all transmit rings.
645  *
646  **********************************************************************/
647 void
648 ixgbe_free_transmit_structures(struct adapter *adapter)
649 {
650         struct tx_ring *txr = adapter->tx_rings;
651
652         for (int i = 0; i < adapter->num_queues; i++, txr++) {
653                 IXGBE_TX_LOCK(txr);
654                 ixgbe_free_transmit_buffers(txr);
655                 ixgbe_dma_free(adapter, &txr->txdma);
656                 IXGBE_TX_UNLOCK(txr);
657                 IXGBE_TX_LOCK_DESTROY(txr);
658         }
659         free(adapter->tx_rings, M_DEVBUF);
660 }
661
662 /*********************************************************************
663  *
664  *  Free transmit ring related data structures.
665  *
666  **********************************************************************/
667 static void
668 ixgbe_free_transmit_buffers(struct tx_ring *txr)
669 {
670         struct adapter *adapter = txr->adapter;
671         struct ixgbe_tx_buf *tx_buffer;
672         int             i;
673
674         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
675
676         if (txr->tx_buffers == NULL)
677                 return;
678
679         tx_buffer = txr->tx_buffers;
680         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681                 if (tx_buffer->m_head != NULL) {
682                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
683                             BUS_DMASYNC_POSTWRITE);
684                         bus_dmamap_unload(txr->txtag,
685                             tx_buffer->map);
686                         m_freem(tx_buffer->m_head);
687                         tx_buffer->m_head = NULL;
688                         if (tx_buffer->map != NULL) {
689                                 bus_dmamap_destroy(txr->txtag,
690                                     tx_buffer->map);
691                                 tx_buffer->map = NULL;
692                         }
693                 } else if (tx_buffer->map != NULL) {
694                         bus_dmamap_unload(txr->txtag,
695                             tx_buffer->map);
696                         bus_dmamap_destroy(txr->txtag,
697                             tx_buffer->map);
698                         tx_buffer->map = NULL;
699                 }
700         }
701 #ifdef IXGBE_LEGACY_TX
702         if (txr->br != NULL)
703                 buf_ring_free(txr->br, M_DEVBUF);
704 #endif
705         if (txr->tx_buffers != NULL) {
706                 free(txr->tx_buffers, M_DEVBUF);
707                 txr->tx_buffers = NULL;
708         }
709         if (txr->txtag != NULL) {
710                 bus_dma_tag_destroy(txr->txtag);
711                 txr->txtag = NULL;
712         }
713         return;
714 }
715
716 /*********************************************************************
717  *
718  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
719  *
720  **********************************************************************/
721
722 static int
723 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724     u32 *cmd_type_len, u32 *olinfo_status)
725 {
726         struct adapter *adapter = txr->adapter;
727         struct ixgbe_adv_tx_context_desc *TXD;
728         struct ether_vlan_header *eh;
729         struct ip *ip;
730         struct ip6_hdr *ip6;
731         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
732         int     ehdrlen, ip_hlen = 0;
733         u16     etype;
734         u8      ipproto = 0;
735         int     offload = TRUE;
736         int     ctxd = txr->next_avail_desc;
737         u16     vtag = 0;
738
739         /* First check if TSO is to be used */
740         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
741                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
742
743         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
744                 offload = FALSE;
745
746         /* Indicate the whole packet as payload when not doing TSO */
747         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
748
749         /* Now ready a context descriptor */
750         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
751
752         /*
753         ** In advanced descriptors the vlan tag must 
754         ** be placed into the context descriptor. Hence
755         ** we need to make one even if not doing offloads.
756         */
757         if (mp->m_flags & M_VLANTAG) {
758                 vtag = htole16(mp->m_pkthdr.ether_vtag);
759                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
760         } 
761         else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
762                 return (0);
763
764         /*
765          * Determine where frame payload starts.
766          * Jump over vlan headers if already present,
767          * helpful for QinQ too.
768          */
769         eh = mtod(mp, struct ether_vlan_header *);
770         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
771                 etype = ntohs(eh->evl_proto);
772                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
773         } else {
774                 etype = ntohs(eh->evl_encap_proto);
775                 ehdrlen = ETHER_HDR_LEN;
776         }
777
778         /* Set the ether header length */
779         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
780
781         if (offload == FALSE)
782                 goto no_offloads;
783
784         switch (etype) {
785                 case ETHERTYPE_IP:
786                         ip = (struct ip *)(mp->m_data + ehdrlen);
787                         ip_hlen = ip->ip_hl << 2;
788                         ipproto = ip->ip_p;
789                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
790                         break;
791                 case ETHERTYPE_IPV6:
792                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
793                         ip_hlen = sizeof(struct ip6_hdr);
794                         /* XXX-BZ this will go badly in case of ext hdrs. */
795                         ipproto = ip6->ip6_nxt;
796                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
797                         break;
798                 default:
799                         offload = FALSE;
800                         break;
801         }
802
803         vlan_macip_lens |= ip_hlen;
804
805         switch (ipproto) {
806                 case IPPROTO_TCP:
807                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
808                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
809                         break;
810
811                 case IPPROTO_UDP:
812                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
813                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
814                         break;
815
816 #if __FreeBSD_version >= 800000
817                 case IPPROTO_SCTP:
818                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
819                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
820                         break;
821 #endif
822                 default:
823                         offload = FALSE;
824                         break;
825         }
826
827         if (offload) /* For the TX descriptor setup */
828                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
829
830 no_offloads:
831         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
832
833         /* Now copy bits into descriptor */
834         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
835         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
836         TXD->seqnum_seed = htole32(0);
837         TXD->mss_l4len_idx = htole32(0);
838
839         /* We've consumed the first desc, adjust counters */
840         if (++ctxd == txr->num_desc)
841                 ctxd = 0;
842         txr->next_avail_desc = ctxd;
843         --txr->tx_avail;
844
845         return (0);
846 }
847
848 /**********************************************************************
849  *
850  *  Setup work for hardware segmentation offload (TSO) on
851  *  adapters using advanced tx descriptors
852  *
853  **********************************************************************/
854 static int
855 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
856     u32 *cmd_type_len, u32 *olinfo_status)
857 {
858         struct ixgbe_adv_tx_context_desc *TXD;
859         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
860         u32 mss_l4len_idx = 0, paylen;
861         u16 vtag = 0, eh_type;
862         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
863         struct ether_vlan_header *eh;
864 #ifdef INET6
865         struct ip6_hdr *ip6;
866 #endif
867 #ifdef INET
868         struct ip *ip;
869 #endif
870         struct tcphdr *th;
871
872
873         /*
874          * Determine where frame payload starts.
875          * Jump over vlan headers if already present
876          */
877         eh = mtod(mp, struct ether_vlan_header *);
878         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
879                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
880                 eh_type = eh->evl_proto;
881         } else {
882                 ehdrlen = ETHER_HDR_LEN;
883                 eh_type = eh->evl_encap_proto;
884         }
885
886         switch (ntohs(eh_type)) {
887 #ifdef INET6
888         case ETHERTYPE_IPV6:
889                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
890                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
891                 if (ip6->ip6_nxt != IPPROTO_TCP)
892                         return (ENXIO);
893                 ip_hlen = sizeof(struct ip6_hdr);
894                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
895                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
896                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
897                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
898                 break;
899 #endif
900 #ifdef INET
901         case ETHERTYPE_IP:
902                 ip = (struct ip *)(mp->m_data + ehdrlen);
903                 if (ip->ip_p != IPPROTO_TCP)
904                         return (ENXIO);
905                 ip->ip_sum = 0;
906                 ip_hlen = ip->ip_hl << 2;
907                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
908                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
909                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
910                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
911                 /* Tell transmit desc to also do IPv4 checksum. */
912                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
913                 break;
914 #endif
915         default:
916                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
917                     __func__, ntohs(eh_type));
918                 break;
919         }
920
921         ctxd = txr->next_avail_desc;
922         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
923
924         tcp_hlen = th->th_off << 2;
925
926         /* This is used in the transmit desc in encap */
927         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
928
929         /* VLAN MACLEN IPLEN */
930         if (mp->m_flags & M_VLANTAG) {
931                 vtag = htole16(mp->m_pkthdr.ether_vtag);
932                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
933         }
934
935         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
936         vlan_macip_lens |= ip_hlen;
937         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
938
939         /* ADV DTYPE TUCMD */
940         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
941         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
942         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
943
944         /* MSS L4LEN IDX */
945         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
946         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
947         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
948
949         TXD->seqnum_seed = htole32(0);
950
951         if (++ctxd == txr->num_desc)
952                 ctxd = 0;
953
954         txr->tx_avail--;
955         txr->next_avail_desc = ctxd;
956         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
957         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
958         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
959         ++txr->tso_tx;
960         return (0);
961 }
962
963
964 /**********************************************************************
965  *
966  *  Examine each tx_buffer in the used queue. If the hardware is done
967  *  processing the packet then free associated resources. The
968  *  tx_buffer is put back on the free queue.
969  *
970  **********************************************************************/
971 void
972 ixgbe_txeof(struct tx_ring *txr)
973 {
974 #ifdef DEV_NETMAP
975         struct adapter          *adapter = txr->adapter;
976         struct ifnet            *ifp = adapter->ifp;
977 #endif
978         u32                     work, processed = 0;
979         u16                     limit = txr->process_limit;
980         struct ixgbe_tx_buf     *buf;
981         union ixgbe_adv_tx_desc *txd;
982
983         mtx_assert(&txr->tx_mtx, MA_OWNED);
984
985 #ifdef DEV_NETMAP
986         if (ifp->if_capenable & IFCAP_NETMAP) {
987                 struct netmap_adapter *na = NA(ifp);
988                 struct netmap_kring *kring = &na->tx_rings[txr->me];
989                 txd = txr->tx_base;
990                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
991                     BUS_DMASYNC_POSTREAD);
992                 /*
993                  * In netmap mode, all the work is done in the context
994                  * of the client thread. Interrupt handlers only wake up
995                  * clients, which may be sleeping on individual rings
996                  * or on a global resource for all rings.
997                  * To implement tx interrupt mitigation, we wake up the client
998                  * thread roughly every half ring, even if the NIC interrupts
999                  * more frequently. This is implemented as follows:
1000                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
1001                  *   the slot that should wake up the thread (nkr_num_slots
1002                  *   means the user thread should not be woken up);
1003                  * - the driver ignores tx interrupts unless netmap_mitigate=0
1004                  *   or the slot has the DD bit set.
1005                  */
1006                 if (!netmap_mitigate ||
1007                     (kring->nr_kflags < kring->nkr_num_slots &&
1008                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1009                         netmap_tx_irq(ifp, txr->me);
1010                 }
1011                 return;
1012         }
1013 #endif /* DEV_NETMAP */
1014
1015         if (txr->tx_avail == txr->num_desc) {
1016                 txr->busy = 0;
1017                 return;
1018         }
1019
1020         /* Get work starting point */
1021         work = txr->next_to_clean;
1022         buf = &txr->tx_buffers[work];
1023         txd = &txr->tx_base[work];
1024         work -= txr->num_desc; /* The distance to ring end */
1025         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1026             BUS_DMASYNC_POSTREAD);
1027
1028         do {
1029                 union ixgbe_adv_tx_desc *eop= buf->eop;
1030                 if (eop == NULL) /* No work */
1031                         break;
1032
1033                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1034                         break;  /* I/O not complete */
1035
1036                 if (buf->m_head) {
1037                         txr->bytes +=
1038                             buf->m_head->m_pkthdr.len;
1039                         bus_dmamap_sync(txr->txtag,
1040                             buf->map,
1041                             BUS_DMASYNC_POSTWRITE);
1042                         bus_dmamap_unload(txr->txtag,
1043                             buf->map);
1044                         m_freem(buf->m_head);
1045                         buf->m_head = NULL;
1046                 }
1047                 buf->eop = NULL;
1048                 ++txr->tx_avail;
1049
1050                 /* We clean the range if multi segment */
1051                 while (txd != eop) {
1052                         ++txd;
1053                         ++buf;
1054                         ++work;
1055                         /* wrap the ring? */
1056                         if (__predict_false(!work)) {
1057                                 work -= txr->num_desc;
1058                                 buf = txr->tx_buffers;
1059                                 txd = txr->tx_base;
1060                         }
1061                         if (buf->m_head) {
1062                                 txr->bytes +=
1063                                     buf->m_head->m_pkthdr.len;
1064                                 bus_dmamap_sync(txr->txtag,
1065                                     buf->map,
1066                                     BUS_DMASYNC_POSTWRITE);
1067                                 bus_dmamap_unload(txr->txtag,
1068                                     buf->map);
1069                                 m_freem(buf->m_head);
1070                                 buf->m_head = NULL;
1071                         }
1072                         ++txr->tx_avail;
1073                         buf->eop = NULL;
1074
1075                 }
1076                 ++txr->packets;
1077                 ++processed;
1078
1079                 /* Try the next packet */
1080                 ++txd;
1081                 ++buf;
1082                 ++work;
1083                 /* reset with a wrap */
1084                 if (__predict_false(!work)) {
1085                         work -= txr->num_desc;
1086                         buf = txr->tx_buffers;
1087                         txd = txr->tx_base;
1088                 }
1089                 prefetch(txd);
1090         } while (__predict_true(--limit));
1091
1092         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1093             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1094
1095         work += txr->num_desc;
1096         txr->next_to_clean = work;
1097
1098         /*
1099         ** Queue Hang detection, we know there's
1100         ** work outstanding or the first return
1101         ** would have been taken, so increment busy
1102         ** if nothing managed to get cleaned, then
1103         ** in local_timer it will be checked and 
1104         ** marked as HUNG if it exceeds a MAX attempt.
1105         */
1106         if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1107                 ++txr->busy;
1108         /*
1109         ** If anything gets cleaned we reset state to 1,
1110         ** note this will turn off HUNG if its set.
1111         */
1112         if (processed)
1113                 txr->busy = 1;
1114
1115         if (txr->tx_avail == txr->num_desc)
1116                 txr->busy = 0;
1117
1118         return;
1119 }
1120
1121
1122 #ifdef IXGBE_FDIR
1123 /*
1124 ** This routine parses packet headers so that Flow
1125 ** Director can make a hashed filter table entry 
1126 ** allowing traffic flows to be identified and kept
1127 ** on the same cpu.  This would be a performance
1128 ** hit, but we only do it at IXGBE_FDIR_RATE of
1129 ** packets.
1130 */
1131 static void
1132 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1133 {
1134         struct adapter                  *adapter = txr->adapter;
1135         struct ix_queue                 *que;
1136         struct ip                       *ip;
1137         struct tcphdr                   *th;
1138         struct udphdr                   *uh;
1139         struct ether_vlan_header        *eh;
1140         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
1141         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
1142         int                             ehdrlen, ip_hlen;
1143         u16                             etype;
1144
1145         eh = mtod(mp, struct ether_vlan_header *);
1146         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1147                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1148                 etype = eh->evl_proto;
1149         } else {
1150                 ehdrlen = ETHER_HDR_LEN;
1151                 etype = eh->evl_encap_proto;
1152         }
1153
1154         /* Only handling IPv4 */
1155         if (etype != htons(ETHERTYPE_IP))
1156                 return;
1157
1158         ip = (struct ip *)(mp->m_data + ehdrlen);
1159         ip_hlen = ip->ip_hl << 2;
1160
1161         /* check if we're UDP or TCP */
1162         switch (ip->ip_p) {
1163         case IPPROTO_TCP:
1164                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1165                 /* src and dst are inverted */
1166                 common.port.dst ^= th->th_sport;
1167                 common.port.src ^= th->th_dport;
1168                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1169                 break;
1170         case IPPROTO_UDP:
1171                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1172                 /* src and dst are inverted */
1173                 common.port.dst ^= uh->uh_sport;
1174                 common.port.src ^= uh->uh_dport;
1175                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1176                 break;
1177         default:
1178                 return;
1179         }
1180
1181         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1182         if (mp->m_pkthdr.ether_vtag)
1183                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1184         else
1185                 common.flex_bytes ^= etype;
1186         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1187
1188         que = &adapter->queues[txr->me];
1189         /*
1190         ** This assumes the Rx queue and Tx
1191         ** queue are bound to the same CPU
1192         */
1193         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1194             input, common, que->msix);
1195 }
1196 #endif /* IXGBE_FDIR */
1197
1198 /*
1199 ** Used to detect a descriptor that has
1200 ** been merged by Hardware RSC.
1201 */
1202 static inline u32
1203 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1204 {
1205         return (le32toh(rx->wb.lower.lo_dword.data) &
1206             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1207 }
1208
1209 /*********************************************************************
1210  *
1211  *  Initialize Hardware RSC (LRO) feature on 82599
1212  *  for an RX ring, this is toggled by the LRO capability
1213  *  even though it is transparent to the stack.
1214  *
1215  *  NOTE: since this HW feature only works with IPV4 and 
1216  *        our testing has shown soft LRO to be as effective
1217  *        I have decided to disable this by default.
1218  *
1219  **********************************************************************/
1220 static void
1221 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1222 {
1223         struct  adapter         *adapter = rxr->adapter;
1224         struct  ixgbe_hw        *hw = &adapter->hw;
1225         u32                     rscctrl, rdrxctl;
1226
1227         /* If turning LRO/RSC off we need to disable it */
1228         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1229                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1230                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1231                 return;
1232         }
1233
1234         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1235         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1236 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1237         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1238 #endif /* DEV_NETMAP */
1239         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1240         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1241         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1242
1243         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1244         rscctrl |= IXGBE_RSCCTL_RSCEN;
1245         /*
1246         ** Limit the total number of descriptors that
1247         ** can be combined, so it does not exceed 64K
1248         */
1249         if (rxr->mbuf_sz == MCLBYTES)
1250                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1251         else if (rxr->mbuf_sz == MJUMPAGESIZE)
1252                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1253         else if (rxr->mbuf_sz == MJUM9BYTES)
1254                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1255         else  /* Using 16K cluster */
1256                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1257
1258         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1259
1260         /* Enable TCP header recognition */
1261         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1262             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1263             IXGBE_PSRTYPE_TCPHDR));
1264
1265         /* Disable RSC for ACK packets */
1266         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1267             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1268
1269         rxr->hw_rsc = TRUE;
1270 }
1271 /*********************************************************************
1272  *
1273  *  Refresh mbuf buffers for RX descriptor rings
1274  *   - now keeps its own state so discards due to resource
1275  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1276  *     it just returns, keeping its placeholder, thus it can simply
1277  *     be recalled to try again.
1278  *
1279  **********************************************************************/
1280 static void
1281 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1282 {
1283         struct adapter          *adapter = rxr->adapter;
1284         bus_dma_segment_t       seg[1];
1285         struct ixgbe_rx_buf     *rxbuf;
1286         struct mbuf             *mp;
1287         int                     i, j, nsegs, error;
1288         bool                    refreshed = FALSE;
1289
1290         i = j = rxr->next_to_refresh;
1291         /* Control the loop with one beyond */
1292         if (++j == rxr->num_desc)
1293                 j = 0;
1294
1295         while (j != limit) {
1296                 rxbuf = &rxr->rx_buffers[i];
1297                 if (rxbuf->buf == NULL) {
1298                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1299                             M_PKTHDR, rxr->mbuf_sz);
1300                         if (mp == NULL)
1301                                 goto update;
1302                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1303                                 m_adj(mp, ETHER_ALIGN);
1304                 } else
1305                         mp = rxbuf->buf;
1306
1307                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1308
1309                 /* If we're dealing with an mbuf that was copied rather
1310                  * than replaced, there's no need to go through busdma.
1311                  */
1312                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1313                         /* Get the memory mapping */
1314                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1315                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1316                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1317                         if (error != 0) {
1318                                 printf("Refresh mbufs: payload dmamap load"
1319                                     " failure - %d\n", error);
1320                                 m_free(mp);
1321                                 rxbuf->buf = NULL;
1322                                 goto update;
1323                         }
1324                         rxbuf->buf = mp;
1325                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1326                             BUS_DMASYNC_PREREAD);
1327                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1328                             htole64(seg[0].ds_addr);
1329                 } else {
1330                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1331                         rxbuf->flags &= ~IXGBE_RX_COPY;
1332                 }
1333
1334                 refreshed = TRUE;
1335                 /* Next is precalculated */
1336                 i = j;
1337                 rxr->next_to_refresh = i;
1338                 if (++j == rxr->num_desc)
1339                         j = 0;
1340         }
1341 update:
1342         if (refreshed) /* Update hardware tail index */
1343                 IXGBE_WRITE_REG(&adapter->hw,
1344                     rxr->tail, rxr->next_to_refresh);
1345         return;
1346 }
1347
1348 /*********************************************************************
1349  *
1350  *  Allocate memory for rx_buffer structures. Since we use one
1351  *  rx_buffer per received packet, the maximum number of rx_buffer's
1352  *  that we'll need is equal to the number of receive descriptors
1353  *  that we've allocated.
1354  *
1355  **********************************************************************/
1356 int
1357 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1358 {
1359         struct  adapter         *adapter = rxr->adapter;
1360         device_t                dev = adapter->dev;
1361         struct ixgbe_rx_buf     *rxbuf;
1362         int                     i, bsize, error;
1363
1364         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1365         if (!(rxr->rx_buffers =
1366             (struct ixgbe_rx_buf *) malloc(bsize,
1367             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1368                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1369                 error = ENOMEM;
1370                 goto fail;
1371         }
1372
1373         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1374                                    1, 0,        /* alignment, bounds */
1375                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1376                                    BUS_SPACE_MAXADDR,   /* highaddr */
1377                                    NULL, NULL,          /* filter, filterarg */
1378                                    MJUM16BYTES,         /* maxsize */
1379                                    1,                   /* nsegments */
1380                                    MJUM16BYTES,         /* maxsegsize */
1381                                    0,                   /* flags */
1382                                    NULL,                /* lockfunc */
1383                                    NULL,                /* lockfuncarg */
1384                                    &rxr->ptag))) {
1385                 device_printf(dev, "Unable to create RX DMA tag\n");
1386                 goto fail;
1387         }
1388
1389         for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
1390                 rxbuf = &rxr->rx_buffers[i];
1391                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1392                 if (error) {
1393                         device_printf(dev, "Unable to create RX dma map\n");
1394                         goto fail;
1395                 }
1396         }
1397
1398         return (0);
1399
1400 fail:
1401         /* Frees all, but can handle partial completion */
1402         ixgbe_free_receive_structures(adapter);
1403         return (error);
1404 }
1405
1406
1407 static void     
1408 ixgbe_free_receive_ring(struct rx_ring *rxr)
1409
1410         struct ixgbe_rx_buf       *rxbuf;
1411         int i;
1412
1413         for (i = 0; i < rxr->num_desc; i++) {
1414                 rxbuf = &rxr->rx_buffers[i];
1415                 if (rxbuf->buf != NULL) {
1416                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1417                             BUS_DMASYNC_POSTREAD);
1418                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1419                         rxbuf->buf->m_flags |= M_PKTHDR;
1420                         m_freem(rxbuf->buf);
1421                         rxbuf->buf = NULL;
1422                         rxbuf->flags = 0;
1423                 }
1424         }
1425 }
1426
1427
1428 /*********************************************************************
1429  *
1430  *  Initialize a receive ring and its buffers.
1431  *
1432  **********************************************************************/
1433 static int
1434 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1435 {
1436         struct  adapter         *adapter;
1437         struct ifnet            *ifp;
1438         device_t                dev;
1439         struct ixgbe_rx_buf     *rxbuf;
1440         bus_dma_segment_t       seg[1];
1441         struct lro_ctrl         *lro = &rxr->lro;
1442         int                     rsize, nsegs, error = 0;
1443 #ifdef DEV_NETMAP
1444         struct netmap_adapter *na = NA(rxr->adapter->ifp);
1445         struct netmap_slot *slot;
1446 #endif /* DEV_NETMAP */
1447
1448         adapter = rxr->adapter;
1449         ifp = adapter->ifp;
1450         dev = adapter->dev;
1451
1452         /* Clear the ring contents */
1453         IXGBE_RX_LOCK(rxr);
1454 #ifdef DEV_NETMAP
1455         /* same as in ixgbe_setup_transmit_ring() */
1456         slot = netmap_reset(na, NR_RX, rxr->me, 0);
1457 #endif /* DEV_NETMAP */
1458         rsize = roundup2(adapter->num_rx_desc *
1459             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1460         bzero((void *)rxr->rx_base, rsize);
1461         /* Cache the size */
1462         rxr->mbuf_sz = adapter->rx_mbuf_sz;
1463
1464         /* Free current RX buffer structs and their mbufs */
1465         ixgbe_free_receive_ring(rxr);
1466
1467         /* Now replenish the mbufs */
1468         for (int j = 0; j != rxr->num_desc; ++j) {
1469                 struct mbuf     *mp;
1470
1471                 rxbuf = &rxr->rx_buffers[j];
1472 #ifdef DEV_NETMAP
1473                 /*
1474                  * In netmap mode, fill the map and set the buffer
1475                  * address in the NIC ring, considering the offset
1476                  * between the netmap and NIC rings (see comment in
1477                  * ixgbe_setup_transmit_ring() ). No need to allocate
1478                  * an mbuf, so end the block with a continue;
1479                  */
1480                 if (slot) {
1481                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1482                         uint64_t paddr;
1483                         void *addr;
1484
1485                         addr = PNMB(na, slot + sj, &paddr);
1486                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1487                         /* Update descriptor and the cached value */
1488                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1489                         rxbuf->addr = htole64(paddr);
1490                         continue;
1491                 }
1492 #endif /* DEV_NETMAP */
1493                 rxbuf->flags = 0; 
1494                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1495                     M_PKTHDR, adapter->rx_mbuf_sz);
1496                 if (rxbuf->buf == NULL) {
1497                         error = ENOBUFS;
1498                         goto fail;
1499                 }
1500                 mp = rxbuf->buf;
1501                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1502                 /* Get the memory mapping */
1503                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1504                     rxbuf->pmap, mp, seg,
1505                     &nsegs, BUS_DMA_NOWAIT);
1506                 if (error != 0)
1507                         goto fail;
1508                 bus_dmamap_sync(rxr->ptag,
1509                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
1510                 /* Update the descriptor and the cached value */
1511                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1512                 rxbuf->addr = htole64(seg[0].ds_addr);
1513         }
1514
1515
1516         /* Setup our descriptor indices */
1517         rxr->next_to_check = 0;
1518         rxr->next_to_refresh = 0;
1519         rxr->lro_enabled = FALSE;
1520         rxr->rx_copies = 0;
1521         rxr->rx_bytes = 0;
1522         rxr->vtag_strip = FALSE;
1523
1524         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1525             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1526
1527         /*
1528         ** Now set up the LRO interface:
1529         */
1530         if (ixgbe_rsc_enable)
1531                 ixgbe_setup_hw_rsc(rxr);
1532         else if (ifp->if_capenable & IFCAP_LRO) {
1533                 int err = tcp_lro_init(lro);
1534                 if (err) {
1535                         device_printf(dev, "LRO Initialization failed!\n");
1536                         goto fail;
1537                 }
1538                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1539                 rxr->lro_enabled = TRUE;
1540                 lro->ifp = adapter->ifp;
1541         }
1542
1543         IXGBE_RX_UNLOCK(rxr);
1544         return (0);
1545
1546 fail:
1547         ixgbe_free_receive_ring(rxr);
1548         IXGBE_RX_UNLOCK(rxr);
1549         return (error);
1550 }
1551
1552 /*********************************************************************
1553  *
1554  *  Initialize all receive rings.
1555  *
1556  **********************************************************************/
1557 int
1558 ixgbe_setup_receive_structures(struct adapter *adapter)
1559 {
1560         struct rx_ring *rxr = adapter->rx_rings;
1561         int j;
1562
1563         for (j = 0; j < adapter->num_queues; j++, rxr++)
1564                 if (ixgbe_setup_receive_ring(rxr))
1565                         goto fail;
1566
1567         return (0);
1568 fail:
1569         /*
1570          * Free RX buffers allocated so far, we will only handle
1571          * the rings that completed, the failing case will have
1572          * cleaned up for itself. 'j' failed, so its the terminus.
1573          */
1574         for (int i = 0; i < j; ++i) {
1575                 rxr = &adapter->rx_rings[i];
1576                 ixgbe_free_receive_ring(rxr);
1577         }
1578
1579         return (ENOBUFS);
1580 }
1581
1582
1583 /*********************************************************************
1584  *
1585  *  Free all receive rings.
1586  *
1587  **********************************************************************/
1588 void
1589 ixgbe_free_receive_structures(struct adapter *adapter)
1590 {
1591         struct rx_ring *rxr = adapter->rx_rings;
1592
1593         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1594
1595         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1596                 struct lro_ctrl         *lro = &rxr->lro;
1597                 ixgbe_free_receive_buffers(rxr);
1598                 /* Free LRO memory */
1599                 tcp_lro_free(lro);
1600                 /* Free the ring memory as well */
1601                 ixgbe_dma_free(adapter, &rxr->rxdma);
1602         }
1603
1604         free(adapter->rx_rings, M_DEVBUF);
1605 }
1606
1607
1608 /*********************************************************************
1609  *
1610  *  Free receive ring data structures
1611  *
1612  **********************************************************************/
1613 void
1614 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1615 {
1616         struct adapter          *adapter = rxr->adapter;
1617         struct ixgbe_rx_buf     *rxbuf;
1618
1619         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1620
1621         /* Cleanup any existing buffers */
1622         if (rxr->rx_buffers != NULL) {
1623                 for (int i = 0; i < adapter->num_rx_desc; i++) {
1624                         rxbuf = &rxr->rx_buffers[i];
1625                         if (rxbuf->buf != NULL) {
1626                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1627                                     BUS_DMASYNC_POSTREAD);
1628                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1629                                 rxbuf->buf->m_flags |= M_PKTHDR;
1630                                 m_freem(rxbuf->buf);
1631                         }
1632                         rxbuf->buf = NULL;
1633                         if (rxbuf->pmap != NULL) {
1634                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1635                                 rxbuf->pmap = NULL;
1636                         }
1637                 }
1638                 if (rxr->rx_buffers != NULL) {
1639                         free(rxr->rx_buffers, M_DEVBUF);
1640                         rxr->rx_buffers = NULL;
1641                 }
1642         }
1643
1644         if (rxr->ptag != NULL) {
1645                 bus_dma_tag_destroy(rxr->ptag);
1646                 rxr->ptag = NULL;
1647         }
1648
1649         return;
1650 }
1651
1652 static __inline void
1653 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1654 {
1655                  
1656         /*
1657          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1658          * should be computed by hardware. Also it should not have VLAN tag in
1659          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1660          */
1661         if (rxr->lro_enabled &&
1662             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1663             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1664             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1665             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1666             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1667             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1668             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1669             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1670                 /*
1671                  * Send to the stack if:
1672                  **  - LRO not enabled, or
1673                  **  - no LRO resources, or
1674                  **  - lro enqueue fails
1675                  */
1676                 if (rxr->lro.lro_cnt != 0)
1677                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1678                                 return;
1679         }
1680         IXGBE_RX_UNLOCK(rxr);
1681         (*ifp->if_input)(ifp, m);
1682         IXGBE_RX_LOCK(rxr);
1683 }
1684
1685 static __inline void
1686 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1687 {
1688         struct ixgbe_rx_buf     *rbuf;
1689
1690         rbuf = &rxr->rx_buffers[i];
1691
1692
1693         /*
1694         ** With advanced descriptors the writeback
1695         ** clobbers the buffer addrs, so its easier
1696         ** to just free the existing mbufs and take
1697         ** the normal refresh path to get new buffers
1698         ** and mapping.
1699         */
1700
1701         if (rbuf->fmp != NULL) {/* Partial chain ? */
1702                 rbuf->fmp->m_flags |= M_PKTHDR;
1703                 m_freem(rbuf->fmp);
1704                 rbuf->fmp = NULL;
1705                 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1706         } else if (rbuf->buf) {
1707                 m_free(rbuf->buf);
1708                 rbuf->buf = NULL;
1709         }
1710         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1711
1712         rbuf->flags = 0;
1713  
1714         return;
1715 }
1716
1717
1718 /*********************************************************************
1719  *
1720  *  This routine executes in interrupt context. It replenishes
1721  *  the mbufs in the descriptor and sends data which has been
1722  *  dma'ed into host memory to upper layer.
1723  *
1724  *  Return TRUE for more work, FALSE for all clean.
1725  *********************************************************************/
1726 bool
1727 ixgbe_rxeof(struct ix_queue *que)
1728 {
1729         struct adapter          *adapter = que->adapter;
1730         struct rx_ring          *rxr = que->rxr;
1731         struct ifnet            *ifp = adapter->ifp;
1732         struct lro_ctrl         *lro = &rxr->lro;
1733         struct lro_entry        *queued;
1734         int                     i, nextp, processed = 0;
1735         u32                     staterr = 0;
1736         u16                     count = rxr->process_limit;
1737         union ixgbe_adv_rx_desc *cur;
1738         struct ixgbe_rx_buf     *rbuf, *nbuf;
1739         u16                     pkt_info;
1740
1741         IXGBE_RX_LOCK(rxr);
1742
1743 #ifdef DEV_NETMAP
1744         /* Same as the txeof routine: wakeup clients on intr. */
1745         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1746                 IXGBE_RX_UNLOCK(rxr);
1747                 return (FALSE);
1748         }
1749 #endif /* DEV_NETMAP */
1750
1751         for (i = rxr->next_to_check; count != 0;) {
1752                 struct mbuf     *sendmp, *mp;
1753                 u32             rsc, ptype;
1754                 u16             len;
1755                 u16             vtag = 0;
1756                 bool            eop;
1757  
1758                 /* Sync the ring. */
1759                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1760                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1761
1762                 cur = &rxr->rx_base[i];
1763                 staterr = le32toh(cur->wb.upper.status_error);
1764                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1765
1766                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1767                         break;
1768                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1769                         break;
1770
1771                 count--;
1772                 sendmp = NULL;
1773                 nbuf = NULL;
1774                 rsc = 0;
1775                 cur->wb.upper.status_error = 0;
1776                 rbuf = &rxr->rx_buffers[i];
1777                 mp = rbuf->buf;
1778
1779                 len = le16toh(cur->wb.upper.length);
1780                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1781                     IXGBE_RXDADV_PKTTYPE_MASK;
1782                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1783
1784                 /* Make sure bad packets are discarded */
1785                 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1786 #if __FreeBSD_version >= 1100036
1787                         if (IXGBE_IS_VF(adapter))
1788                                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1789 #endif
1790                         rxr->rx_discarded++;
1791                         ixgbe_rx_discard(rxr, i);
1792                         goto next_desc;
1793                 }
1794
1795                 /*
1796                 ** On 82599 which supports a hardware
1797                 ** LRO (called HW RSC), packets need
1798                 ** not be fragmented across sequential
1799                 ** descriptors, rather the next descriptor
1800                 ** is indicated in bits of the descriptor.
1801                 ** This also means that we might proceses
1802                 ** more than one packet at a time, something
1803                 ** that has never been true before, it
1804                 ** required eliminating global chain pointers
1805                 ** in favor of what we are doing here.  -jfv
1806                 */
1807                 if (!eop) {
1808                         /*
1809                         ** Figure out the next descriptor
1810                         ** of this frame.
1811                         */
1812                         if (rxr->hw_rsc == TRUE) {
1813                                 rsc = ixgbe_rsc_count(cur);
1814                                 rxr->rsc_num += (rsc - 1);
1815                         }
1816                         if (rsc) { /* Get hardware index */
1817                                 nextp = ((staterr &
1818                                     IXGBE_RXDADV_NEXTP_MASK) >>
1819                                     IXGBE_RXDADV_NEXTP_SHIFT);
1820                         } else { /* Just sequential */
1821                                 nextp = i + 1;
1822                                 if (nextp == adapter->num_rx_desc)
1823                                         nextp = 0;
1824                         }
1825                         nbuf = &rxr->rx_buffers[nextp];
1826                         prefetch(nbuf);
1827                 }
1828                 /*
1829                 ** Rather than using the fmp/lmp global pointers
1830                 ** we now keep the head of a packet chain in the
1831                 ** buffer struct and pass this along from one
1832                 ** descriptor to the next, until we get EOP.
1833                 */
1834                 mp->m_len = len;
1835                 /*
1836                 ** See if there is a stored head
1837                 ** that determines what we are
1838                 */
1839                 sendmp = rbuf->fmp;
1840                 if (sendmp != NULL) {  /* secondary frag */
1841                         rbuf->buf = rbuf->fmp = NULL;
1842                         mp->m_flags &= ~M_PKTHDR;
1843                         sendmp->m_pkthdr.len += mp->m_len;
1844                 } else {
1845                         /*
1846                          * Optimize.  This might be a small packet,
1847                          * maybe just a TCP ACK.  Do a fast copy that
1848                          * is cache aligned into a new mbuf, and
1849                          * leave the old mbuf+cluster for re-use.
1850                          */
1851                         if (eop && len <= IXGBE_RX_COPY_LEN) {
1852                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1853                                 if (sendmp != NULL) {
1854                                         sendmp->m_data +=
1855                                             IXGBE_RX_COPY_ALIGN;
1856                                         ixgbe_bcopy(mp->m_data,
1857                                             sendmp->m_data, len);
1858                                         sendmp->m_len = len;
1859                                         rxr->rx_copies++;
1860                                         rbuf->flags |= IXGBE_RX_COPY;
1861                                 }
1862                         }
1863                         if (sendmp == NULL) {
1864                                 rbuf->buf = rbuf->fmp = NULL;
1865                                 sendmp = mp;
1866                         }
1867
1868                         /* first desc of a non-ps chain */
1869                         sendmp->m_flags |= M_PKTHDR;
1870                         sendmp->m_pkthdr.len = mp->m_len;
1871                 }
1872                 ++processed;
1873
1874                 /* Pass the head pointer on */
1875                 if (eop == 0) {
1876                         nbuf->fmp = sendmp;
1877                         sendmp = NULL;
1878                         mp->m_next = nbuf->buf;
1879                 } else { /* Sending this frame */
1880                         sendmp->m_pkthdr.rcvif = ifp;
1881                         rxr->rx_packets++;
1882                         /* capture data for AIM */
1883                         rxr->bytes += sendmp->m_pkthdr.len;
1884                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1885                         /* Process vlan info */
1886                         if ((rxr->vtag_strip) &&
1887                             (staterr & IXGBE_RXD_STAT_VP))
1888                                 vtag = le16toh(cur->wb.upper.vlan);
1889                         if (vtag) {
1890                                 sendmp->m_pkthdr.ether_vtag = vtag;
1891                                 sendmp->m_flags |= M_VLANTAG;
1892                         }
1893                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1894                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
1895
1896                         /*
1897                          * In case of multiqueue, we have RXCSUM.PCSD bit set
1898                          * and never cleared. This means we have RSS hash
1899                          * available to be used.
1900                          */
1901                         if (adapter->num_queues > 1) {
1902                                 sendmp->m_pkthdr.flowid =
1903                                         le32toh(cur->wb.lower.hi_dword.rss);
1904                                 /*
1905                                  * Full RSS support is not avilable in
1906                                  * FreeBSD 10 so setting the hash type to
1907                                  * OPAQUE.
1908                                  */
1909                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1910                         } else {
1911 #if __FreeBSD_version >= 800000
1912                                 sendmp->m_pkthdr.flowid = que->msix;
1913                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1914 #endif /* FreeBSD_version */
1915                         }
1916                 }
1917 next_desc:
1918                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1919                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1920
1921                 /* Advance our pointers to the next descriptor. */
1922                 if (++i == rxr->num_desc)
1923                         i = 0;
1924
1925                 /* Now send to the stack or do LRO */
1926                 if (sendmp != NULL) {
1927                         rxr->next_to_check = i;
1928                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1929                         i = rxr->next_to_check;
1930                 }
1931
1932                /* Every 8 descriptors we go to refresh mbufs */
1933                 if (processed == 8) {
1934                         ixgbe_refresh_mbufs(rxr, i);
1935                         processed = 0;
1936                 }
1937         }
1938
1939         /* Refresh any remaining buf structs */
1940         if (ixgbe_rx_unrefreshed(rxr))
1941                 ixgbe_refresh_mbufs(rxr, i);
1942
1943         rxr->next_to_check = i;
1944
1945         /*
1946          * Flush any outstanding LRO work
1947          */
1948         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1949                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1950                 tcp_lro_flush(lro, queued);
1951         }
1952
1953         IXGBE_RX_UNLOCK(rxr);
1954
1955         /*
1956         ** Still have cleaning to do?
1957         */
1958         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1959                 return (TRUE);
1960         else
1961                 return (FALSE);
1962 }
1963
1964
1965 /*********************************************************************
1966  *
1967  *  Verify that the hardware indicated that the checksum is valid.
1968  *  Inform the stack about the status of checksum so that stack
1969  *  doesn't spend time verifying the checksum.
1970  *
1971  *********************************************************************/
1972 static void
1973 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1974 {
1975         u16     status = (u16) staterr;
1976         u8      errors = (u8) (staterr >> 24);
1977         bool    sctp = FALSE;
1978
1979         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1980             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1981                 sctp = TRUE;
1982
1983         if (status & IXGBE_RXD_STAT_IPCS) {
1984                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
1985                         /* IP Checksum Good */
1986                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1987                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1988
1989                 } else
1990                         mp->m_pkthdr.csum_flags = 0;
1991         }
1992         if (status & IXGBE_RXD_STAT_L4CS) {
1993                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1994 #if __FreeBSD_version >= 800000
1995                 if (sctp)
1996                         type = CSUM_SCTP_VALID;
1997 #endif
1998                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1999                         mp->m_pkthdr.csum_flags |= type;
2000                         if (!sctp)
2001                                 mp->m_pkthdr.csum_data = htons(0xffff);
2002                 } 
2003         }
2004         return;
2005 }
2006
2007 /********************************************************************
2008  * Manage DMA'able memory.
2009  *******************************************************************/
2010 static void
2011 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2012 {
2013         if (error)
2014                 return;
2015         *(bus_addr_t *) arg = segs->ds_addr;
2016         return;
2017 }
2018
2019 int
2020 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2021                 struct ixgbe_dma_alloc *dma, int mapflags)
2022 {
2023         device_t dev = adapter->dev;
2024         int             r;
2025
2026         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2027                                DBA_ALIGN, 0,    /* alignment, bounds */
2028                                BUS_SPACE_MAXADDR,       /* lowaddr */
2029                                BUS_SPACE_MAXADDR,       /* highaddr */
2030                                NULL, NULL,      /* filter, filterarg */
2031                                size,    /* maxsize */
2032                                1,       /* nsegments */
2033                                size,    /* maxsegsize */
2034                                BUS_DMA_ALLOCNOW,        /* flags */
2035                                NULL,    /* lockfunc */
2036                                NULL,    /* lockfuncarg */
2037                                &dma->dma_tag);
2038         if (r != 0) {
2039                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2040                        "error %u\n", r);
2041                 goto fail_0;
2042         }
2043         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2044                              BUS_DMA_NOWAIT, &dma->dma_map);
2045         if (r != 0) {
2046                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2047                        "error %u\n", r);
2048                 goto fail_1;
2049         }
2050         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2051                             size,
2052                             ixgbe_dmamap_cb,
2053                             &dma->dma_paddr,
2054                             mapflags | BUS_DMA_NOWAIT);
2055         if (r != 0) {
2056                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2057                        "error %u\n", r);
2058                 goto fail_2;
2059         }
2060         dma->dma_size = size;
2061         return (0);
2062 fail_2:
2063         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2064 fail_1:
2065         bus_dma_tag_destroy(dma->dma_tag);
2066 fail_0:
2067         dma->dma_tag = NULL;
2068         return (r);
2069 }
2070
2071 void
2072 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2073 {
2074         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2075             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2076         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2077         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2078         bus_dma_tag_destroy(dma->dma_tag);
2079 }
2080
2081
2082 /*********************************************************************
2083  *
2084  *  Allocate memory for the transmit and receive rings, and then
2085  *  the descriptors associated with each, called only once at attach.
2086  *
2087  **********************************************************************/
2088 int
2089 ixgbe_allocate_queues(struct adapter *adapter)
2090 {
2091         device_t        dev = adapter->dev;
2092         struct ix_queue *que;
2093         struct tx_ring  *txr;
2094         struct rx_ring  *rxr;
2095         int rsize, tsize, error = IXGBE_SUCCESS;
2096         int txconf = 0, rxconf = 0;
2097
2098         /* First allocate the top level queue structs */
2099         if (!(adapter->queues =
2100             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2101             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2102                 device_printf(dev, "Unable to allocate queue memory\n");
2103                 error = ENOMEM;
2104                 goto fail;
2105         }
2106
2107         /* First allocate the TX ring struct memory */
2108         if (!(adapter->tx_rings =
2109             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2110             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2111                 device_printf(dev, "Unable to allocate TX ring memory\n");
2112                 error = ENOMEM;
2113                 goto tx_fail;
2114         }
2115
2116         /* Next allocate the RX */
2117         if (!(adapter->rx_rings =
2118             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2119             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2120                 device_printf(dev, "Unable to allocate RX ring memory\n");
2121                 error = ENOMEM;
2122                 goto rx_fail;
2123         }
2124
2125         /* For the ring itself */
2126         tsize = roundup2(adapter->num_tx_desc *
2127             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2128
2129         /*
2130          * Now set up the TX queues, txconf is needed to handle the
2131          * possibility that things fail midcourse and we need to
2132          * undo memory gracefully
2133          */ 
2134         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2135                 /* Set up some basics */
2136                 txr = &adapter->tx_rings[i];
2137                 txr->adapter = adapter;
2138                 txr->me = i;
2139                 txr->num_desc = adapter->num_tx_desc;
2140
2141                 /* Initialize the TX side lock */
2142                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2143                     device_get_nameunit(dev), txr->me);
2144                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2145
2146                 if (ixgbe_dma_malloc(adapter, tsize,
2147                         &txr->txdma, BUS_DMA_NOWAIT)) {
2148                         device_printf(dev,
2149                             "Unable to allocate TX Descriptor memory\n");
2150                         error = ENOMEM;
2151                         goto err_tx_desc;
2152                 }
2153                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2154                 bzero((void *)txr->tx_base, tsize);
2155
2156                 /* Now allocate transmit buffers for the ring */
2157                 if (ixgbe_allocate_transmit_buffers(txr)) {
2158                         device_printf(dev,
2159                             "Critical Failure setting up transmit buffers\n");
2160                         error = ENOMEM;
2161                         goto err_tx_desc;
2162                 }
2163 #ifndef IXGBE_LEGACY_TX
2164                 /* Allocate a buf ring */
2165                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2166                     M_WAITOK, &txr->tx_mtx);
2167                 if (txr->br == NULL) {
2168                         device_printf(dev,
2169                             "Critical Failure setting up buf ring\n");
2170                         error = ENOMEM;
2171                         goto err_tx_desc;
2172                 }
2173 #endif
2174         }
2175
2176         /*
2177          * Next the RX queues...
2178          */ 
2179         rsize = roundup2(adapter->num_rx_desc *
2180             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2181         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2182                 rxr = &adapter->rx_rings[i];
2183                 /* Set up some basics */
2184                 rxr->adapter = adapter;
2185                 rxr->me = i;
2186                 rxr->num_desc = adapter->num_rx_desc;
2187
2188                 /* Initialize the RX side lock */
2189                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2190                     device_get_nameunit(dev), rxr->me);
2191                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2192
2193                 if (ixgbe_dma_malloc(adapter, rsize,
2194                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2195                         device_printf(dev,
2196                             "Unable to allocate RxDescriptor memory\n");
2197                         error = ENOMEM;
2198                         goto err_rx_desc;
2199                 }
2200                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2201                 bzero((void *)rxr->rx_base, rsize);
2202
2203                 /* Allocate receive buffers for the ring*/
2204                 if (ixgbe_allocate_receive_buffers(rxr)) {
2205                         device_printf(dev,
2206                             "Critical Failure setting up receive buffers\n");
2207                         error = ENOMEM;
2208                         goto err_rx_desc;
2209                 }
2210         }
2211
2212         /*
2213         ** Finally set up the queue holding structs
2214         */
2215         for (int i = 0; i < adapter->num_queues; i++) {
2216                 que = &adapter->queues[i];
2217                 que->adapter = adapter;
2218                 que->me = i;
2219                 que->txr = &adapter->tx_rings[i];
2220                 que->rxr = &adapter->rx_rings[i];
2221         }
2222
2223         return (0);
2224
2225 err_rx_desc:
2226         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2227                 ixgbe_dma_free(adapter, &rxr->rxdma);
2228 err_tx_desc:
2229         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2230                 ixgbe_dma_free(adapter, &txr->txdma);
2231         free(adapter->rx_rings, M_DEVBUF);
2232 rx_fail:
2233         free(adapter->tx_rings, M_DEVBUF);
2234 tx_fail:
2235         free(adapter->queues, M_DEVBUF);
2236 fail:
2237         return (error);
2238 }