]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixgbe/ix_txrx.c
The wrong commit message was given with r283632. This is the correct message.
[FreeBSD/FreeBSD.git] / sys / dev / ixgbe / ix_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41
42 #include "ixgbe.h"
43
44 #ifdef  RSS
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
47 #endif
48
49 #ifdef DEV_NETMAP
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
53
54 extern int ix_crcstrip;
55 #endif
56
57 /*
58 ** HW RSC control:
59 **  this feature only works with
60 **  IPv4, and only on 82599 and later.
61 **  Also this will cause IP forwarding to
62 **  fail and that can't be controlled by
63 **  the stack as LRO can. For all these
64 **  reasons I've deemed it best to leave
65 **  this off and not bother with a tuneable
66 **  interface, this would need to be compiled
67 **  to enable.
68 */
69 static bool ixgbe_rsc_enable = FALSE;
70
71 #ifdef IXGBE_FDIR
72 /*
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
77 **
78 ** This feature can be disabled by
79 ** setting this to 0.
80 */
81 static int atr_sample_rate = 20;
82 #endif
83
84 /* Shared PCI config read/write */
85 inline u16
86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
87 {
88         u16 value;
89
90         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
91             reg, 2);
92
93         return (value);
94 }
95
96 inline void
97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
98 {
99         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
100             reg, value, 2);
101
102         return;
103 }
104
105 /*********************************************************************
106  *  Local Function prototypes
107  *********************************************************************/
108 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
109 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
110 static int      ixgbe_setup_receive_ring(struct rx_ring *);
111 static void     ixgbe_free_receive_buffers(struct rx_ring *);
112
113 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
114 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
116 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
117                     struct mbuf *, u32 *, u32 *);
118 static int      ixgbe_tso_setup(struct tx_ring *,
119                     struct mbuf *, u32 *, u32 *);
120 #ifdef IXGBE_FDIR
121 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
122 #endif
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125                     struct mbuf *, u32);
126
127 #ifdef IXGBE_LEGACY_TX
128 /*********************************************************************
129  *  Transmit entry point
130  *
131  *  ixgbe_start is called by the stack to initiate a transmit.
132  *  The driver will remain in this routine as long as there are
133  *  packets to transmit and transmit resources are available.
134  *  In case resources are not available stack is notified and
135  *  the packet is requeued.
136  **********************************************************************/
137
138 void
139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
140 {
141         struct mbuf    *m_head;
142         struct adapter *adapter = txr->adapter;
143
144         IXGBE_TX_LOCK_ASSERT(txr);
145
146         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
147                 return;
148         if (!adapter->link_active)
149                 return;
150
151         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
152                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
153                         break;
154
155                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
156                 if (m_head == NULL)
157                         break;
158
159                 if (ixgbe_xmit(txr, &m_head)) {
160                         if (m_head != NULL)
161                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
162                         break;
163                 }
164                 /* Send a copy of the frame to the BPF listener */
165                 ETHER_BPF_MTAP(ifp, m_head);
166         }
167         return;
168 }
169
170 /*
171  * Legacy TX start - called by the stack, this
172  * always uses the first tx ring, and should
173  * not be used with multiqueue tx enabled.
174  */
175 void
176 ixgbe_start(struct ifnet *ifp)
177 {
178         struct adapter *adapter = ifp->if_softc;
179         struct tx_ring  *txr = adapter->tx_rings;
180
181         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
182                 IXGBE_TX_LOCK(txr);
183                 ixgbe_start_locked(txr, ifp);
184                 IXGBE_TX_UNLOCK(txr);
185         }
186         return;
187 }
188
189 #else /* ! IXGBE_LEGACY_TX */
190
191 /*
192 ** Multiqueue Transmit driver
193 **
194 */
195 int
196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
197 {
198         struct adapter  *adapter = ifp->if_softc;
199         struct ix_queue *que;
200         struct tx_ring  *txr;
201         int             i, err = 0;
202 #ifdef  RSS
203         uint32_t bucket_id;
204 #endif
205
206         /*
207          * When doing RSS, map it to the same outbound queue
208          * as the incoming flow would be mapped to.
209          *
210          * If everything is setup correctly, it should be the
211          * same bucket that the current CPU we're on is.
212          */
213 #if __FreeBSD_version < 1100054
214         if (m->m_flags & M_FLOWID) {
215 #else
216         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
217 #endif
218 #ifdef  RSS
219                 if (rss_hash2bucket(m->m_pkthdr.flowid,
220                     M_HASHTYPE_GET(m), &bucket_id) == 0)
221                         /* TODO: spit out something if bucket_id > num_queues? */
222                         i = bucket_id % adapter->num_queues;
223                 else 
224 #endif
225                         i = m->m_pkthdr.flowid % adapter->num_queues;
226         } else
227                 i = curcpu % adapter->num_queues;
228
229         /* Check for a hung queue and pick alternative */
230         if (((1 << i) & adapter->active_queues) == 0)
231                 i = ffsl(adapter->active_queues);
232
233         txr = &adapter->tx_rings[i];
234         que = &adapter->queues[i];
235
236         err = drbr_enqueue(ifp, txr->br, m);
237         if (err)
238                 return (err);
239         if (IXGBE_TX_TRYLOCK(txr)) {
240                 ixgbe_mq_start_locked(ifp, txr);
241                 IXGBE_TX_UNLOCK(txr);
242         } else
243                 taskqueue_enqueue(que->tq, &txr->txq_task);
244
245         return (0);
246 }
247
248 int
249 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
250 {
251         struct adapter  *adapter = txr->adapter;
252         struct mbuf     *next;
253         int             enqueued = 0, err = 0;
254
255         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
256             adapter->link_active == 0)
257                 return (ENETDOWN);
258
259         /* Process the queue */
260 #if __FreeBSD_version < 901504
261         next = drbr_dequeue(ifp, txr->br);
262         while (next != NULL) {
263                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
264                         if (next != NULL)
265                                 err = drbr_enqueue(ifp, txr->br, next);
266 #else
267         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
268                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
269                         if (next == NULL) {
270                                 drbr_advance(ifp, txr->br);
271                         } else {
272                                 drbr_putback(ifp, txr->br, next);
273                         }
274 #endif
275                         break;
276                 }
277 #if __FreeBSD_version >= 901504
278                 drbr_advance(ifp, txr->br);
279 #endif
280                 enqueued++;
281 #if 0 // this is VF-only
282 #if __FreeBSD_version >= 1100036
283                 /*
284                  * Since we're looking at the tx ring, we can check
285                  * to see if we're a VF by examing our tail register
286                  * address.
287                  */
288                 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
289                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
290 #endif
291 #endif
292                 /* Send a copy of the frame to the BPF listener */
293                 ETHER_BPF_MTAP(ifp, next);
294                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
295                         break;
296 #if __FreeBSD_version < 901504
297                 next = drbr_dequeue(ifp, txr->br);
298 #endif
299         }
300
301         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
302                 ixgbe_txeof(txr);
303
304         return (err);
305 }
306
307 /*
308  * Called from a taskqueue to drain queued transmit packets.
309  */
310 void
311 ixgbe_deferred_mq_start(void *arg, int pending)
312 {
313         struct tx_ring *txr = arg;
314         struct adapter *adapter = txr->adapter;
315         struct ifnet *ifp = adapter->ifp;
316
317         IXGBE_TX_LOCK(txr);
318         if (!drbr_empty(ifp, txr->br))
319                 ixgbe_mq_start_locked(ifp, txr);
320         IXGBE_TX_UNLOCK(txr);
321 }
322
323 /*
324  * Flush all ring buffers
325  */
326 void
327 ixgbe_qflush(struct ifnet *ifp)
328 {
329         struct adapter  *adapter = ifp->if_softc;
330         struct tx_ring  *txr = adapter->tx_rings;
331         struct mbuf     *m;
332
333         for (int i = 0; i < adapter->num_queues; i++, txr++) {
334                 IXGBE_TX_LOCK(txr);
335                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
336                         m_freem(m);
337                 IXGBE_TX_UNLOCK(txr);
338         }
339         if_qflush(ifp);
340 }
341 #endif /* IXGBE_LEGACY_TX */
342
343
344 /*********************************************************************
345  *
346  *  This routine maps the mbufs to tx descriptors, allowing the
347  *  TX engine to transmit the packets. 
348  *      - return 0 on success, positive on failure
349  *
350  **********************************************************************/
351
352 static int
353 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
354 {
355         struct adapter  *adapter = txr->adapter;
356         u32             olinfo_status = 0, cmd_type_len;
357         int             i, j, error, nsegs;
358         int             first;
359         bool            remap = TRUE;
360         struct mbuf     *m_head;
361         bus_dma_segment_t segs[adapter->num_segs];
362         bus_dmamap_t    map;
363         struct ixgbe_tx_buf *txbuf;
364         union ixgbe_adv_tx_desc *txd = NULL;
365
366         m_head = *m_headp;
367
368         /* Basic descriptor defines */
369         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
370             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
371
372         if (m_head->m_flags & M_VLANTAG)
373                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
374
375         /*
376          * Important to capture the first descriptor
377          * used because it will contain the index of
378          * the one we tell the hardware to report back
379          */
380         first = txr->next_avail_desc;
381         txbuf = &txr->tx_buffers[first];
382         map = txbuf->map;
383
384         /*
385          * Map the packet for DMA.
386          */
387 retry:
388         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
389             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
390
391         if (__predict_false(error)) {
392                 struct mbuf *m;
393
394                 switch (error) {
395                 case EFBIG:
396                         /* Try it again? - one try */
397                         if (remap == TRUE) {
398                                 remap = FALSE;
399                                 /*
400                                  * XXX: m_defrag will choke on
401                                  * non-MCLBYTES-sized clusters
402                                  */
403                                 m = m_defrag(*m_headp, M_NOWAIT);
404                                 if (m == NULL) {
405                                         adapter->mbuf_defrag_failed++;
406                                         m_freem(*m_headp);
407                                         *m_headp = NULL;
408                                         return (ENOBUFS);
409                                 }
410                                 *m_headp = m;
411                                 goto retry;
412                         } else
413                                 return (error);
414                 case ENOMEM:
415                         txr->no_tx_dma_setup++;
416                         return (error);
417                 default:
418                         txr->no_tx_dma_setup++;
419                         m_freem(*m_headp);
420                         *m_headp = NULL;
421                         return (error);
422                 }
423         }
424
425         /* Make certain there are enough descriptors */
426         if (nsegs > txr->tx_avail - 2) {
427                 txr->no_desc_avail++;
428                 bus_dmamap_unload(txr->txtag, map);
429                 return (ENOBUFS);
430         }
431         m_head = *m_headp;
432
433         /*
434          * Set up the appropriate offload context
435          * this will consume the first descriptor
436          */
437         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
438         if (__predict_false(error)) {
439                 if (error == ENOBUFS)
440                         *m_headp = NULL;
441                 return (error);
442         }
443
444 #ifdef IXGBE_FDIR
445         /* Do the flow director magic */
446         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
447                 ++txr->atr_count;
448                 if (txr->atr_count >= atr_sample_rate) {
449                         ixgbe_atr(txr, m_head);
450                         txr->atr_count = 0;
451                 }
452         }
453 #endif
454
455         i = txr->next_avail_desc;
456         for (j = 0; j < nsegs; j++) {
457                 bus_size_t seglen;
458                 bus_addr_t segaddr;
459
460                 txbuf = &txr->tx_buffers[i];
461                 txd = &txr->tx_base[i];
462                 seglen = segs[j].ds_len;
463                 segaddr = htole64(segs[j].ds_addr);
464
465                 txd->read.buffer_addr = segaddr;
466                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
467                     cmd_type_len |seglen);
468                 txd->read.olinfo_status = htole32(olinfo_status);
469
470                 if (++i == txr->num_desc)
471                         i = 0;
472         }
473
474         txd->read.cmd_type_len |=
475             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
476         txr->tx_avail -= nsegs;
477         txr->next_avail_desc = i;
478
479         txbuf->m_head = m_head;
480         /*
481          * Here we swap the map so the last descriptor,
482          * which gets the completion interrupt has the
483          * real map, and the first descriptor gets the
484          * unused map from this descriptor.
485          */
486         txr->tx_buffers[first].map = txbuf->map;
487         txbuf->map = map;
488         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
489
490         /* Set the EOP descriptor that will be marked done */
491         txbuf = &txr->tx_buffers[first];
492         txbuf->eop = txd;
493
494         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
495             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
496         /*
497          * Advance the Transmit Descriptor Tail (Tdt), this tells the
498          * hardware that this frame is available to transmit.
499          */
500         ++txr->total_packets;
501         IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
502
503         /* Mark queue as having work */
504         if (txr->busy == 0)
505                 txr->busy = 1;
506
507         return (0);
508 }
509
510
511 /*********************************************************************
512  *
513  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
514  *  the information needed to transmit a packet on the wire. This is
515  *  called only once at attach, setup is done every reset.
516  *
517  **********************************************************************/
518 int
519 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
520 {
521         struct adapter *adapter = txr->adapter;
522         device_t dev = adapter->dev;
523         struct ixgbe_tx_buf *txbuf;
524         int error, i;
525
526         /*
527          * Setup DMA descriptor areas.
528          */
529         if ((error = bus_dma_tag_create(
530                                bus_get_dma_tag(adapter->dev),   /* parent */
531                                1, 0,            /* alignment, bounds */
532                                BUS_SPACE_MAXADDR,       /* lowaddr */
533                                BUS_SPACE_MAXADDR,       /* highaddr */
534                                NULL, NULL,              /* filter, filterarg */
535                                IXGBE_TSO_SIZE,          /* maxsize */
536                                adapter->num_segs,       /* nsegments */
537                                PAGE_SIZE,               /* maxsegsize */
538                                0,                       /* flags */
539                                NULL,                    /* lockfunc */
540                                NULL,                    /* lockfuncarg */
541                                &txr->txtag))) {
542                 device_printf(dev,"Unable to allocate TX DMA tag\n");
543                 goto fail;
544         }
545
546         if (!(txr->tx_buffers =
547             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
548             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
549                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
550                 error = ENOMEM;
551                 goto fail;
552         }
553
554         /* Create the descriptor buffer dma maps */
555         txbuf = txr->tx_buffers;
556         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
557                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
558                 if (error != 0) {
559                         device_printf(dev, "Unable to create TX DMA map\n");
560                         goto fail;
561                 }
562         }
563
564         return 0;
565 fail:
566         /* We free all, it handles case where we are in the middle */
567         ixgbe_free_transmit_structures(adapter);
568         return (error);
569 }
570
571 /*********************************************************************
572  *
573  *  Initialize a transmit ring.
574  *
575  **********************************************************************/
576 static void
577 ixgbe_setup_transmit_ring(struct tx_ring *txr)
578 {
579         struct adapter *adapter = txr->adapter;
580         struct ixgbe_tx_buf *txbuf;
581         int i;
582 #ifdef DEV_NETMAP
583         struct netmap_adapter *na = NA(adapter->ifp);
584         struct netmap_slot *slot;
585 #endif /* DEV_NETMAP */
586
587         /* Clear the old ring contents */
588         IXGBE_TX_LOCK(txr);
589 #ifdef DEV_NETMAP
590         /*
591          * (under lock): if in netmap mode, do some consistency
592          * checks and set slot to entry 0 of the netmap ring.
593          */
594         slot = netmap_reset(na, NR_TX, txr->me, 0);
595 #endif /* DEV_NETMAP */
596         bzero((void *)txr->tx_base,
597               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
598         /* Reset indices */
599         txr->next_avail_desc = 0;
600         txr->next_to_clean = 0;
601
602         /* Free any existing tx buffers. */
603         txbuf = txr->tx_buffers;
604         for (i = 0; i < txr->num_desc; i++, txbuf++) {
605                 if (txbuf->m_head != NULL) {
606                         bus_dmamap_sync(txr->txtag, txbuf->map,
607                             BUS_DMASYNC_POSTWRITE);
608                         bus_dmamap_unload(txr->txtag, txbuf->map);
609                         m_freem(txbuf->m_head);
610                         txbuf->m_head = NULL;
611                 }
612 #ifdef DEV_NETMAP
613                 /*
614                  * In netmap mode, set the map for the packet buffer.
615                  * NOTE: Some drivers (not this one) also need to set
616                  * the physical buffer address in the NIC ring.
617                  * Slots in the netmap ring (indexed by "si") are
618                  * kring->nkr_hwofs positions "ahead" wrt the
619                  * corresponding slot in the NIC ring. In some drivers
620                  * (not here) nkr_hwofs can be negative. Function
621                  * netmap_idx_n2k() handles wraparounds properly.
622                  */
623                 if (slot) {
624                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
625                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
626                 }
627 #endif /* DEV_NETMAP */
628                 /* Clear the EOP descriptor pointer */
629                 txbuf->eop = NULL;
630         }
631
632 #ifdef IXGBE_FDIR
633         /* Set the rate at which we sample packets */
634         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
635                 txr->atr_sample = atr_sample_rate;
636 #endif
637
638         /* Set number of descriptors available */
639         txr->tx_avail = adapter->num_tx_desc;
640
641         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
642             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
643         IXGBE_TX_UNLOCK(txr);
644 }
645
646 /*********************************************************************
647  *
648  *  Initialize all transmit rings.
649  *
650  **********************************************************************/
651 int
652 ixgbe_setup_transmit_structures(struct adapter *adapter)
653 {
654         struct tx_ring *txr = adapter->tx_rings;
655
656         for (int i = 0; i < adapter->num_queues; i++, txr++)
657                 ixgbe_setup_transmit_ring(txr);
658
659         return (0);
660 }
661
662 /*********************************************************************
663  *
664  *  Free all transmit rings.
665  *
666  **********************************************************************/
667 void
668 ixgbe_free_transmit_structures(struct adapter *adapter)
669 {
670         struct tx_ring *txr = adapter->tx_rings;
671
672         for (int i = 0; i < adapter->num_queues; i++, txr++) {
673                 IXGBE_TX_LOCK(txr);
674                 ixgbe_free_transmit_buffers(txr);
675                 ixgbe_dma_free(adapter, &txr->txdma);
676                 IXGBE_TX_UNLOCK(txr);
677                 IXGBE_TX_LOCK_DESTROY(txr);
678         }
679         free(adapter->tx_rings, M_DEVBUF);
680 }
681
682 /*********************************************************************
683  *
684  *  Free transmit ring related data structures.
685  *
686  **********************************************************************/
687 static void
688 ixgbe_free_transmit_buffers(struct tx_ring *txr)
689 {
690         struct adapter *adapter = txr->adapter;
691         struct ixgbe_tx_buf *tx_buffer;
692         int             i;
693
694         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
695
696         if (txr->tx_buffers == NULL)
697                 return;
698
699         tx_buffer = txr->tx_buffers;
700         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
701                 if (tx_buffer->m_head != NULL) {
702                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
703                             BUS_DMASYNC_POSTWRITE);
704                         bus_dmamap_unload(txr->txtag,
705                             tx_buffer->map);
706                         m_freem(tx_buffer->m_head);
707                         tx_buffer->m_head = NULL;
708                         if (tx_buffer->map != NULL) {
709                                 bus_dmamap_destroy(txr->txtag,
710                                     tx_buffer->map);
711                                 tx_buffer->map = NULL;
712                         }
713                 } else if (tx_buffer->map != NULL) {
714                         bus_dmamap_unload(txr->txtag,
715                             tx_buffer->map);
716                         bus_dmamap_destroy(txr->txtag,
717                             tx_buffer->map);
718                         tx_buffer->map = NULL;
719                 }
720         }
721 #ifdef IXGBE_LEGACY_TX
722         if (txr->br != NULL)
723                 buf_ring_free(txr->br, M_DEVBUF);
724 #endif
725         if (txr->tx_buffers != NULL) {
726                 free(txr->tx_buffers, M_DEVBUF);
727                 txr->tx_buffers = NULL;
728         }
729         if (txr->txtag != NULL) {
730                 bus_dma_tag_destroy(txr->txtag);
731                 txr->txtag = NULL;
732         }
733         return;
734 }
735
736 /*********************************************************************
737  *
738  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
739  *
740  **********************************************************************/
741
742 static int
743 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
744     u32 *cmd_type_len, u32 *olinfo_status)
745 {
746         struct adapter *adapter = txr->adapter;
747         struct ixgbe_adv_tx_context_desc *TXD;
748         struct ether_vlan_header *eh;
749         struct ip *ip;
750         struct ip6_hdr *ip6;
751         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
752         int     ehdrlen, ip_hlen = 0;
753         u16     etype;
754         u8      ipproto = 0;
755         int     offload = TRUE;
756         int     ctxd = txr->next_avail_desc;
757         u16     vtag = 0;
758
759         /* First check if TSO is to be used */
760         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
761                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
762
763         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
764                 offload = FALSE;
765
766         /* Indicate the whole packet as payload when not doing TSO */
767         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
768
769         /* Now ready a context descriptor */
770         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
771
772         /*
773         ** In advanced descriptors the vlan tag must 
774         ** be placed into the context descriptor. Hence
775         ** we need to make one even if not doing offloads.
776         */
777         if (mp->m_flags & M_VLANTAG) {
778                 vtag = htole16(mp->m_pkthdr.ether_vtag);
779                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
780         } 
781         else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
782                 return (0);
783
784         /*
785          * Determine where frame payload starts.
786          * Jump over vlan headers if already present,
787          * helpful for QinQ too.
788          */
789         eh = mtod(mp, struct ether_vlan_header *);
790         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
791                 etype = ntohs(eh->evl_proto);
792                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
793         } else {
794                 etype = ntohs(eh->evl_encap_proto);
795                 ehdrlen = ETHER_HDR_LEN;
796         }
797
798         /* Set the ether header length */
799         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
800
801         if (offload == FALSE)
802                 goto no_offloads;
803
804         switch (etype) {
805                 case ETHERTYPE_IP:
806                         ip = (struct ip *)(mp->m_data + ehdrlen);
807                         ip_hlen = ip->ip_hl << 2;
808                         ipproto = ip->ip_p;
809                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
810                         break;
811                 case ETHERTYPE_IPV6:
812                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
813                         ip_hlen = sizeof(struct ip6_hdr);
814                         /* XXX-BZ this will go badly in case of ext hdrs. */
815                         ipproto = ip6->ip6_nxt;
816                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
817                         break;
818                 default:
819                         offload = FALSE;
820                         break;
821         }
822
823         vlan_macip_lens |= ip_hlen;
824
825         switch (ipproto) {
826                 case IPPROTO_TCP:
827                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
828                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
829                         break;
830
831                 case IPPROTO_UDP:
832                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
833                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
834                         break;
835
836 #if __FreeBSD_version >= 800000
837                 case IPPROTO_SCTP:
838                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
839                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
840                         break;
841 #endif
842                 default:
843                         offload = FALSE;
844                         break;
845         }
846
847         if (offload) /* For the TX descriptor setup */
848                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
849
850 no_offloads:
851         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
852
853         /* Now copy bits into descriptor */
854         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
855         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
856         TXD->seqnum_seed = htole32(0);
857         TXD->mss_l4len_idx = htole32(0);
858
859         /* We've consumed the first desc, adjust counters */
860         if (++ctxd == txr->num_desc)
861                 ctxd = 0;
862         txr->next_avail_desc = ctxd;
863         --txr->tx_avail;
864
865         return (0);
866 }
867
868 /**********************************************************************
869  *
870  *  Setup work for hardware segmentation offload (TSO) on
871  *  adapters using advanced tx descriptors
872  *
873  **********************************************************************/
874 static int
875 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
876     u32 *cmd_type_len, u32 *olinfo_status)
877 {
878         struct ixgbe_adv_tx_context_desc *TXD;
879         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
880         u32 mss_l4len_idx = 0, paylen;
881         u16 vtag = 0, eh_type;
882         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
883         struct ether_vlan_header *eh;
884 #ifdef INET6
885         struct ip6_hdr *ip6;
886 #endif
887 #ifdef INET
888         struct ip *ip;
889 #endif
890         struct tcphdr *th;
891
892
893         /*
894          * Determine where frame payload starts.
895          * Jump over vlan headers if already present
896          */
897         eh = mtod(mp, struct ether_vlan_header *);
898         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
899                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
900                 eh_type = eh->evl_proto;
901         } else {
902                 ehdrlen = ETHER_HDR_LEN;
903                 eh_type = eh->evl_encap_proto;
904         }
905
906         switch (ntohs(eh_type)) {
907 #ifdef INET6
908         case ETHERTYPE_IPV6:
909                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
910                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
911                 if (ip6->ip6_nxt != IPPROTO_TCP)
912                         return (ENXIO);
913                 ip_hlen = sizeof(struct ip6_hdr);
914                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
915                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
916                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
917                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
918                 break;
919 #endif
920 #ifdef INET
921         case ETHERTYPE_IP:
922                 ip = (struct ip *)(mp->m_data + ehdrlen);
923                 if (ip->ip_p != IPPROTO_TCP)
924                         return (ENXIO);
925                 ip->ip_sum = 0;
926                 ip_hlen = ip->ip_hl << 2;
927                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
928                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
929                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
930                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
931                 /* Tell transmit desc to also do IPv4 checksum. */
932                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
933                 break;
934 #endif
935         default:
936                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
937                     __func__, ntohs(eh_type));
938                 break;
939         }
940
941         ctxd = txr->next_avail_desc;
942         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
943
944         tcp_hlen = th->th_off << 2;
945
946         /* This is used in the transmit desc in encap */
947         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
948
949         /* VLAN MACLEN IPLEN */
950         if (mp->m_flags & M_VLANTAG) {
951                 vtag = htole16(mp->m_pkthdr.ether_vtag);
952                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
953         }
954
955         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
956         vlan_macip_lens |= ip_hlen;
957         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
958
959         /* ADV DTYPE TUCMD */
960         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
961         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
962         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
963
964         /* MSS L4LEN IDX */
965         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
966         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
967         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
968
969         TXD->seqnum_seed = htole32(0);
970
971         if (++ctxd == txr->num_desc)
972                 ctxd = 0;
973
974         txr->tx_avail--;
975         txr->next_avail_desc = ctxd;
976         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
977         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
978         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
979         ++txr->tso_tx;
980         return (0);
981 }
982
983
984 /**********************************************************************
985  *
986  *  Examine each tx_buffer in the used queue. If the hardware is done
987  *  processing the packet then free associated resources. The
988  *  tx_buffer is put back on the free queue.
989  *
990  **********************************************************************/
991 void
992 ixgbe_txeof(struct tx_ring *txr)
993 {
994 #ifdef DEV_NETMAP
995         struct adapter          *adapter = txr->adapter;
996         struct ifnet            *ifp = adapter->ifp;
997 #endif
998         u32                     work, processed = 0;
999         u16                     limit = txr->process_limit;
1000         struct ixgbe_tx_buf     *buf;
1001         union ixgbe_adv_tx_desc *txd;
1002
1003         mtx_assert(&txr->tx_mtx, MA_OWNED);
1004
1005 #ifdef DEV_NETMAP
1006         if (ifp->if_capenable & IFCAP_NETMAP) {
1007                 struct netmap_adapter *na = NA(ifp);
1008                 struct netmap_kring *kring = &na->tx_rings[txr->me];
1009                 txd = txr->tx_base;
1010                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1011                     BUS_DMASYNC_POSTREAD);
1012                 /*
1013                  * In netmap mode, all the work is done in the context
1014                  * of the client thread. Interrupt handlers only wake up
1015                  * clients, which may be sleeping on individual rings
1016                  * or on a global resource for all rings.
1017                  * To implement tx interrupt mitigation, we wake up the client
1018                  * thread roughly every half ring, even if the NIC interrupts
1019                  * more frequently. This is implemented as follows:
1020                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
1021                  *   the slot that should wake up the thread (nkr_num_slots
1022                  *   means the user thread should not be woken up);
1023                  * - the driver ignores tx interrupts unless netmap_mitigate=0
1024                  *   or the slot has the DD bit set.
1025                  */
1026                 if (!netmap_mitigate ||
1027                     (kring->nr_kflags < kring->nkr_num_slots &&
1028                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1029                         netmap_tx_irq(ifp, txr->me);
1030                 }
1031                 return;
1032         }
1033 #endif /* DEV_NETMAP */
1034
1035         if (txr->tx_avail == txr->num_desc) {
1036                 txr->busy = 0;
1037                 return;
1038         }
1039
1040         /* Get work starting point */
1041         work = txr->next_to_clean;
1042         buf = &txr->tx_buffers[work];
1043         txd = &txr->tx_base[work];
1044         work -= txr->num_desc; /* The distance to ring end */
1045         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1046             BUS_DMASYNC_POSTREAD);
1047
1048         do {
1049                 union ixgbe_adv_tx_desc *eop= buf->eop;
1050                 if (eop == NULL) /* No work */
1051                         break;
1052
1053                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1054                         break;  /* I/O not complete */
1055
1056                 if (buf->m_head) {
1057                         txr->bytes +=
1058                             buf->m_head->m_pkthdr.len;
1059                         bus_dmamap_sync(txr->txtag,
1060                             buf->map,
1061                             BUS_DMASYNC_POSTWRITE);
1062                         bus_dmamap_unload(txr->txtag,
1063                             buf->map);
1064                         m_freem(buf->m_head);
1065                         buf->m_head = NULL;
1066                 }
1067                 buf->eop = NULL;
1068                 ++txr->tx_avail;
1069
1070                 /* We clean the range if multi segment */
1071                 while (txd != eop) {
1072                         ++txd;
1073                         ++buf;
1074                         ++work;
1075                         /* wrap the ring? */
1076                         if (__predict_false(!work)) {
1077                                 work -= txr->num_desc;
1078                                 buf = txr->tx_buffers;
1079                                 txd = txr->tx_base;
1080                         }
1081                         if (buf->m_head) {
1082                                 txr->bytes +=
1083                                     buf->m_head->m_pkthdr.len;
1084                                 bus_dmamap_sync(txr->txtag,
1085                                     buf->map,
1086                                     BUS_DMASYNC_POSTWRITE);
1087                                 bus_dmamap_unload(txr->txtag,
1088                                     buf->map);
1089                                 m_freem(buf->m_head);
1090                                 buf->m_head = NULL;
1091                         }
1092                         ++txr->tx_avail;
1093                         buf->eop = NULL;
1094
1095                 }
1096                 ++txr->packets;
1097                 ++processed;
1098
1099                 /* Try the next packet */
1100                 ++txd;
1101                 ++buf;
1102                 ++work;
1103                 /* reset with a wrap */
1104                 if (__predict_false(!work)) {
1105                         work -= txr->num_desc;
1106                         buf = txr->tx_buffers;
1107                         txd = txr->tx_base;
1108                 }
1109                 prefetch(txd);
1110         } while (__predict_true(--limit));
1111
1112         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1113             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1114
1115         work += txr->num_desc;
1116         txr->next_to_clean = work;
1117
1118         /*
1119         ** Queue Hang detection, we know there's
1120         ** work outstanding or the first return
1121         ** would have been taken, so increment busy
1122         ** if nothing managed to get cleaned, then
1123         ** in local_timer it will be checked and 
1124         ** marked as HUNG if it exceeds a MAX attempt.
1125         */
1126         if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1127                 ++txr->busy;
1128         /*
1129         ** If anything gets cleaned we reset state to 1,
1130         ** note this will turn off HUNG if its set.
1131         */
1132         if (processed)
1133                 txr->busy = 1;
1134
1135         if (txr->tx_avail == txr->num_desc)
1136                 txr->busy = 0;
1137
1138         return;
1139 }
1140
1141
1142 #ifdef IXGBE_FDIR
1143 /*
1144 ** This routine parses packet headers so that Flow
1145 ** Director can make a hashed filter table entry 
1146 ** allowing traffic flows to be identified and kept
1147 ** on the same cpu.  This would be a performance
1148 ** hit, but we only do it at IXGBE_FDIR_RATE of
1149 ** packets.
1150 */
1151 static void
1152 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1153 {
1154         struct adapter                  *adapter = txr->adapter;
1155         struct ix_queue                 *que;
1156         struct ip                       *ip;
1157         struct tcphdr                   *th;
1158         struct udphdr                   *uh;
1159         struct ether_vlan_header        *eh;
1160         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
1161         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
1162         int                             ehdrlen, ip_hlen;
1163         u16                             etype;
1164
1165         eh = mtod(mp, struct ether_vlan_header *);
1166         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1167                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1168                 etype = eh->evl_proto;
1169         } else {
1170                 ehdrlen = ETHER_HDR_LEN;
1171                 etype = eh->evl_encap_proto;
1172         }
1173
1174         /* Only handling IPv4 */
1175         if (etype != htons(ETHERTYPE_IP))
1176                 return;
1177
1178         ip = (struct ip *)(mp->m_data + ehdrlen);
1179         ip_hlen = ip->ip_hl << 2;
1180
1181         /* check if we're UDP or TCP */
1182         switch (ip->ip_p) {
1183         case IPPROTO_TCP:
1184                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1185                 /* src and dst are inverted */
1186                 common.port.dst ^= th->th_sport;
1187                 common.port.src ^= th->th_dport;
1188                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1189                 break;
1190         case IPPROTO_UDP:
1191                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1192                 /* src and dst are inverted */
1193                 common.port.dst ^= uh->uh_sport;
1194                 common.port.src ^= uh->uh_dport;
1195                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1196                 break;
1197         default:
1198                 return;
1199         }
1200
1201         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1202         if (mp->m_pkthdr.ether_vtag)
1203                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1204         else
1205                 common.flex_bytes ^= etype;
1206         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1207
1208         que = &adapter->queues[txr->me];
1209         /*
1210         ** This assumes the Rx queue and Tx
1211         ** queue are bound to the same CPU
1212         */
1213         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1214             input, common, que->msix);
1215 }
1216 #endif /* IXGBE_FDIR */
1217
1218 /*
1219 ** Used to detect a descriptor that has
1220 ** been merged by Hardware RSC.
1221 */
1222 static inline u32
1223 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1224 {
1225         return (le32toh(rx->wb.lower.lo_dword.data) &
1226             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1227 }
1228
1229 /*********************************************************************
1230  *
1231  *  Initialize Hardware RSC (LRO) feature on 82599
1232  *  for an RX ring, this is toggled by the LRO capability
1233  *  even though it is transparent to the stack.
1234  *
1235  *  NOTE: since this HW feature only works with IPV4 and 
1236  *        our testing has shown soft LRO to be as effective
1237  *        I have decided to disable this by default.
1238  *
1239  **********************************************************************/
1240 static void
1241 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1242 {
1243         struct  adapter         *adapter = rxr->adapter;
1244         struct  ixgbe_hw        *hw = &adapter->hw;
1245         u32                     rscctrl, rdrxctl;
1246
1247         /* If turning LRO/RSC off we need to disable it */
1248         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1249                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1250                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1251                 return;
1252         }
1253
1254         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1255         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1256 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1257         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1258 #endif /* DEV_NETMAP */
1259         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1260         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1261         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1262
1263         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1264         rscctrl |= IXGBE_RSCCTL_RSCEN;
1265         /*
1266         ** Limit the total number of descriptors that
1267         ** can be combined, so it does not exceed 64K
1268         */
1269         if (rxr->mbuf_sz == MCLBYTES)
1270                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1271         else if (rxr->mbuf_sz == MJUMPAGESIZE)
1272                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1273         else if (rxr->mbuf_sz == MJUM9BYTES)
1274                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1275         else  /* Using 16K cluster */
1276                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1277
1278         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1279
1280         /* Enable TCP header recognition */
1281         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1282             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1283             IXGBE_PSRTYPE_TCPHDR));
1284
1285         /* Disable RSC for ACK packets */
1286         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1287             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1288
1289         rxr->hw_rsc = TRUE;
1290 }
1291 /*********************************************************************
1292  *
1293  *  Refresh mbuf buffers for RX descriptor rings
1294  *   - now keeps its own state so discards due to resource
1295  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1296  *     it just returns, keeping its placeholder, thus it can simply
1297  *     be recalled to try again.
1298  *
1299  **********************************************************************/
1300 static void
1301 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1302 {
1303         struct adapter          *adapter = rxr->adapter;
1304         bus_dma_segment_t       seg[1];
1305         struct ixgbe_rx_buf     *rxbuf;
1306         struct mbuf             *mp;
1307         int                     i, j, nsegs, error;
1308         bool                    refreshed = FALSE;
1309
1310         i = j = rxr->next_to_refresh;
1311         /* Control the loop with one beyond */
1312         if (++j == rxr->num_desc)
1313                 j = 0;
1314
1315         while (j != limit) {
1316                 rxbuf = &rxr->rx_buffers[i];
1317                 if (rxbuf->buf == NULL) {
1318                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1319                             M_PKTHDR, rxr->mbuf_sz);
1320                         if (mp == NULL)
1321                                 goto update;
1322                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1323                                 m_adj(mp, ETHER_ALIGN);
1324                 } else
1325                         mp = rxbuf->buf;
1326
1327                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1328
1329                 /* If we're dealing with an mbuf that was copied rather
1330                  * than replaced, there's no need to go through busdma.
1331                  */
1332                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1333                         /* Get the memory mapping */
1334                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1335                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1336                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1337                         if (error != 0) {
1338                                 printf("Refresh mbufs: payload dmamap load"
1339                                     " failure - %d\n", error);
1340                                 m_free(mp);
1341                                 rxbuf->buf = NULL;
1342                                 goto update;
1343                         }
1344                         rxbuf->buf = mp;
1345                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1346                             BUS_DMASYNC_PREREAD);
1347                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1348                             htole64(seg[0].ds_addr);
1349                 } else {
1350                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1351                         rxbuf->flags &= ~IXGBE_RX_COPY;
1352                 }
1353
1354                 refreshed = TRUE;
1355                 /* Next is precalculated */
1356                 i = j;
1357                 rxr->next_to_refresh = i;
1358                 if (++j == rxr->num_desc)
1359                         j = 0;
1360         }
1361 update:
1362         if (refreshed) /* Update hardware tail index */
1363                 IXGBE_WRITE_REG(&adapter->hw,
1364                     rxr->tail, rxr->next_to_refresh);
1365         return;
1366 }
1367
1368 /*********************************************************************
1369  *
1370  *  Allocate memory for rx_buffer structures. Since we use one
1371  *  rx_buffer per received packet, the maximum number of rx_buffer's
1372  *  that we'll need is equal to the number of receive descriptors
1373  *  that we've allocated.
1374  *
1375  **********************************************************************/
1376 int
1377 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1378 {
1379         struct  adapter         *adapter = rxr->adapter;
1380         device_t                dev = adapter->dev;
1381         struct ixgbe_rx_buf     *rxbuf;
1382         int                     i, bsize, error;
1383
1384         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1385         if (!(rxr->rx_buffers =
1386             (struct ixgbe_rx_buf *) malloc(bsize,
1387             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1388                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1389                 error = ENOMEM;
1390                 goto fail;
1391         }
1392
1393         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1394                                    1, 0,        /* alignment, bounds */
1395                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1396                                    BUS_SPACE_MAXADDR,   /* highaddr */
1397                                    NULL, NULL,          /* filter, filterarg */
1398                                    MJUM16BYTES,         /* maxsize */
1399                                    1,                   /* nsegments */
1400                                    MJUM16BYTES,         /* maxsegsize */
1401                                    0,                   /* flags */
1402                                    NULL,                /* lockfunc */
1403                                    NULL,                /* lockfuncarg */
1404                                    &rxr->ptag))) {
1405                 device_printf(dev, "Unable to create RX DMA tag\n");
1406                 goto fail;
1407         }
1408
1409         for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
1410                 rxbuf = &rxr->rx_buffers[i];
1411                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1412                 if (error) {
1413                         device_printf(dev, "Unable to create RX dma map\n");
1414                         goto fail;
1415                 }
1416         }
1417
1418         return (0);
1419
1420 fail:
1421         /* Frees all, but can handle partial completion */
1422         ixgbe_free_receive_structures(adapter);
1423         return (error);
1424 }
1425
1426
1427 static void     
1428 ixgbe_free_receive_ring(struct rx_ring *rxr)
1429
1430         struct ixgbe_rx_buf       *rxbuf;
1431         int i;
1432
1433         for (i = 0; i < rxr->num_desc; i++) {
1434                 rxbuf = &rxr->rx_buffers[i];
1435                 if (rxbuf->buf != NULL) {
1436                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1437                             BUS_DMASYNC_POSTREAD);
1438                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1439                         rxbuf->buf->m_flags |= M_PKTHDR;
1440                         m_freem(rxbuf->buf);
1441                         rxbuf->buf = NULL;
1442                         rxbuf->flags = 0;
1443                 }
1444         }
1445 }
1446
1447
1448 /*********************************************************************
1449  *
1450  *  Initialize a receive ring and its buffers.
1451  *
1452  **********************************************************************/
1453 static int
1454 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1455 {
1456         struct  adapter         *adapter;
1457         struct ifnet            *ifp;
1458         device_t                dev;
1459         struct ixgbe_rx_buf     *rxbuf;
1460         bus_dma_segment_t       seg[1];
1461         struct lro_ctrl         *lro = &rxr->lro;
1462         int                     rsize, nsegs, error = 0;
1463 #ifdef DEV_NETMAP
1464         struct netmap_adapter *na = NA(rxr->adapter->ifp);
1465         struct netmap_slot *slot;
1466 #endif /* DEV_NETMAP */
1467
1468         adapter = rxr->adapter;
1469         ifp = adapter->ifp;
1470         dev = adapter->dev;
1471
1472         /* Clear the ring contents */
1473         IXGBE_RX_LOCK(rxr);
1474 #ifdef DEV_NETMAP
1475         /* same as in ixgbe_setup_transmit_ring() */
1476         slot = netmap_reset(na, NR_RX, rxr->me, 0);
1477 #endif /* DEV_NETMAP */
1478         rsize = roundup2(adapter->num_rx_desc *
1479             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1480         bzero((void *)rxr->rx_base, rsize);
1481         /* Cache the size */
1482         rxr->mbuf_sz = adapter->rx_mbuf_sz;
1483
1484         /* Free current RX buffer structs and their mbufs */
1485         ixgbe_free_receive_ring(rxr);
1486
1487         /* Now replenish the mbufs */
1488         for (int j = 0; j != rxr->num_desc; ++j) {
1489                 struct mbuf     *mp;
1490
1491                 rxbuf = &rxr->rx_buffers[j];
1492 #ifdef DEV_NETMAP
1493                 /*
1494                  * In netmap mode, fill the map and set the buffer
1495                  * address in the NIC ring, considering the offset
1496                  * between the netmap and NIC rings (see comment in
1497                  * ixgbe_setup_transmit_ring() ). No need to allocate
1498                  * an mbuf, so end the block with a continue;
1499                  */
1500                 if (slot) {
1501                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1502                         uint64_t paddr;
1503                         void *addr;
1504
1505                         addr = PNMB(na, slot + sj, &paddr);
1506                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1507                         /* Update descriptor and the cached value */
1508                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1509                         rxbuf->addr = htole64(paddr);
1510                         continue;
1511                 }
1512 #endif /* DEV_NETMAP */
1513                 rxbuf->flags = 0; 
1514                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1515                     M_PKTHDR, adapter->rx_mbuf_sz);
1516                 if (rxbuf->buf == NULL) {
1517                         error = ENOBUFS;
1518                         goto fail;
1519                 }
1520                 mp = rxbuf->buf;
1521                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1522                 /* Get the memory mapping */
1523                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1524                     rxbuf->pmap, mp, seg,
1525                     &nsegs, BUS_DMA_NOWAIT);
1526                 if (error != 0)
1527                         goto fail;
1528                 bus_dmamap_sync(rxr->ptag,
1529                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
1530                 /* Update the descriptor and the cached value */
1531                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1532                 rxbuf->addr = htole64(seg[0].ds_addr);
1533         }
1534
1535
1536         /* Setup our descriptor indices */
1537         rxr->next_to_check = 0;
1538         rxr->next_to_refresh = 0;
1539         rxr->lro_enabled = FALSE;
1540         rxr->rx_copies = 0;
1541         rxr->rx_bytes = 0;
1542         rxr->vtag_strip = FALSE;
1543
1544         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1545             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1546
1547         /*
1548         ** Now set up the LRO interface:
1549         */
1550         if (ixgbe_rsc_enable)
1551                 ixgbe_setup_hw_rsc(rxr);
1552         else if (ifp->if_capenable & IFCAP_LRO) {
1553                 int err = tcp_lro_init(lro);
1554                 if (err) {
1555                         device_printf(dev, "LRO Initialization failed!\n");
1556                         goto fail;
1557                 }
1558                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1559                 rxr->lro_enabled = TRUE;
1560                 lro->ifp = adapter->ifp;
1561         }
1562
1563         IXGBE_RX_UNLOCK(rxr);
1564         return (0);
1565
1566 fail:
1567         ixgbe_free_receive_ring(rxr);
1568         IXGBE_RX_UNLOCK(rxr);
1569         return (error);
1570 }
1571
1572 /*********************************************************************
1573  *
1574  *  Initialize all receive rings.
1575  *
1576  **********************************************************************/
1577 int
1578 ixgbe_setup_receive_structures(struct adapter *adapter)
1579 {
1580         struct rx_ring *rxr = adapter->rx_rings;
1581         int j;
1582
1583         for (j = 0; j < adapter->num_queues; j++, rxr++)
1584                 if (ixgbe_setup_receive_ring(rxr))
1585                         goto fail;
1586
1587         return (0);
1588 fail:
1589         /*
1590          * Free RX buffers allocated so far, we will only handle
1591          * the rings that completed, the failing case will have
1592          * cleaned up for itself. 'j' failed, so its the terminus.
1593          */
1594         for (int i = 0; i < j; ++i) {
1595                 rxr = &adapter->rx_rings[i];
1596                 ixgbe_free_receive_ring(rxr);
1597         }
1598
1599         return (ENOBUFS);
1600 }
1601
1602
1603 /*********************************************************************
1604  *
1605  *  Free all receive rings.
1606  *
1607  **********************************************************************/
1608 void
1609 ixgbe_free_receive_structures(struct adapter *adapter)
1610 {
1611         struct rx_ring *rxr = adapter->rx_rings;
1612
1613         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1614
1615         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1616                 struct lro_ctrl         *lro = &rxr->lro;
1617                 ixgbe_free_receive_buffers(rxr);
1618                 /* Free LRO memory */
1619                 tcp_lro_free(lro);
1620                 /* Free the ring memory as well */
1621                 ixgbe_dma_free(adapter, &rxr->rxdma);
1622         }
1623
1624         free(adapter->rx_rings, M_DEVBUF);
1625 }
1626
1627
1628 /*********************************************************************
1629  *
1630  *  Free receive ring data structures
1631  *
1632  **********************************************************************/
1633 void
1634 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1635 {
1636         struct adapter          *adapter = rxr->adapter;
1637         struct ixgbe_rx_buf     *rxbuf;
1638
1639         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1640
1641         /* Cleanup any existing buffers */
1642         if (rxr->rx_buffers != NULL) {
1643                 for (int i = 0; i < adapter->num_rx_desc; i++) {
1644                         rxbuf = &rxr->rx_buffers[i];
1645                         if (rxbuf->buf != NULL) {
1646                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1647                                     BUS_DMASYNC_POSTREAD);
1648                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1649                                 rxbuf->buf->m_flags |= M_PKTHDR;
1650                                 m_freem(rxbuf->buf);
1651                         }
1652                         rxbuf->buf = NULL;
1653                         if (rxbuf->pmap != NULL) {
1654                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1655                                 rxbuf->pmap = NULL;
1656                         }
1657                 }
1658                 if (rxr->rx_buffers != NULL) {
1659                         free(rxr->rx_buffers, M_DEVBUF);
1660                         rxr->rx_buffers = NULL;
1661                 }
1662         }
1663
1664         if (rxr->ptag != NULL) {
1665                 bus_dma_tag_destroy(rxr->ptag);
1666                 rxr->ptag = NULL;
1667         }
1668
1669         return;
1670 }
1671
1672 static __inline void
1673 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1674 {
1675                  
1676         /*
1677          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1678          * should be computed by hardware. Also it should not have VLAN tag in
1679          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1680          */
1681         if (rxr->lro_enabled &&
1682             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1683             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1684             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1685             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1686             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1687             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1688             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1689             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1690                 /*
1691                  * Send to the stack if:
1692                  **  - LRO not enabled, or
1693                  **  - no LRO resources, or
1694                  **  - lro enqueue fails
1695                  */
1696                 if (rxr->lro.lro_cnt != 0)
1697                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1698                                 return;
1699         }
1700         IXGBE_RX_UNLOCK(rxr);
1701         (*ifp->if_input)(ifp, m);
1702         IXGBE_RX_LOCK(rxr);
1703 }
1704
1705 static __inline void
1706 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1707 {
1708         struct ixgbe_rx_buf     *rbuf;
1709
1710         rbuf = &rxr->rx_buffers[i];
1711
1712
1713         /*
1714         ** With advanced descriptors the writeback
1715         ** clobbers the buffer addrs, so its easier
1716         ** to just free the existing mbufs and take
1717         ** the normal refresh path to get new buffers
1718         ** and mapping.
1719         */
1720
1721         if (rbuf->fmp != NULL) {/* Partial chain ? */
1722                 rbuf->fmp->m_flags |= M_PKTHDR;
1723                 m_freem(rbuf->fmp);
1724                 rbuf->fmp = NULL;
1725                 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1726         } else if (rbuf->buf) {
1727                 m_free(rbuf->buf);
1728                 rbuf->buf = NULL;
1729         }
1730         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1731
1732         rbuf->flags = 0;
1733  
1734         return;
1735 }
1736
1737
1738 /*********************************************************************
1739  *
1740  *  This routine executes in interrupt context. It replenishes
1741  *  the mbufs in the descriptor and sends data which has been
1742  *  dma'ed into host memory to upper layer.
1743  *
1744  *  Return TRUE for more work, FALSE for all clean.
1745  *********************************************************************/
1746 bool
1747 ixgbe_rxeof(struct ix_queue *que)
1748 {
1749         struct adapter          *adapter = que->adapter;
1750         struct rx_ring          *rxr = que->rxr;
1751         struct ifnet            *ifp = adapter->ifp;
1752         struct lro_ctrl         *lro = &rxr->lro;
1753         struct lro_entry        *queued;
1754         int                     i, nextp, processed = 0;
1755         u32                     staterr = 0;
1756         u16                     count = rxr->process_limit;
1757         union ixgbe_adv_rx_desc *cur;
1758         struct ixgbe_rx_buf     *rbuf, *nbuf;
1759         u16                     pkt_info;
1760
1761         IXGBE_RX_LOCK(rxr);
1762
1763 #ifdef DEV_NETMAP
1764         /* Same as the txeof routine: wakeup clients on intr. */
1765         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1766                 IXGBE_RX_UNLOCK(rxr);
1767                 return (FALSE);
1768         }
1769 #endif /* DEV_NETMAP */
1770
1771         for (i = rxr->next_to_check; count != 0;) {
1772                 struct mbuf     *sendmp, *mp;
1773                 u32             rsc, ptype;
1774                 u16             len;
1775                 u16             vtag = 0;
1776                 bool            eop;
1777  
1778                 /* Sync the ring. */
1779                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1780                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1781
1782                 cur = &rxr->rx_base[i];
1783                 staterr = le32toh(cur->wb.upper.status_error);
1784                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1785
1786                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1787                         break;
1788                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1789                         break;
1790
1791                 count--;
1792                 sendmp = NULL;
1793                 nbuf = NULL;
1794                 rsc = 0;
1795                 cur->wb.upper.status_error = 0;
1796                 rbuf = &rxr->rx_buffers[i];
1797                 mp = rbuf->buf;
1798
1799                 len = le16toh(cur->wb.upper.length);
1800                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1801                     IXGBE_RXDADV_PKTTYPE_MASK;
1802                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1803
1804                 /* Make sure bad packets are discarded */
1805                 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1806 #if __FreeBSD_version >= 1100036
1807                         if (IXGBE_IS_VF(adapter))
1808                                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1809 #endif
1810                         rxr->rx_discarded++;
1811                         ixgbe_rx_discard(rxr, i);
1812                         goto next_desc;
1813                 }
1814
1815                 /*
1816                 ** On 82599 which supports a hardware
1817                 ** LRO (called HW RSC), packets need
1818                 ** not be fragmented across sequential
1819                 ** descriptors, rather the next descriptor
1820                 ** is indicated in bits of the descriptor.
1821                 ** This also means that we might proceses
1822                 ** more than one packet at a time, something
1823                 ** that has never been true before, it
1824                 ** required eliminating global chain pointers
1825                 ** in favor of what we are doing here.  -jfv
1826                 */
1827                 if (!eop) {
1828                         /*
1829                         ** Figure out the next descriptor
1830                         ** of this frame.
1831                         */
1832                         if (rxr->hw_rsc == TRUE) {
1833                                 rsc = ixgbe_rsc_count(cur);
1834                                 rxr->rsc_num += (rsc - 1);
1835                         }
1836                         if (rsc) { /* Get hardware index */
1837                                 nextp = ((staterr &
1838                                     IXGBE_RXDADV_NEXTP_MASK) >>
1839                                     IXGBE_RXDADV_NEXTP_SHIFT);
1840                         } else { /* Just sequential */
1841                                 nextp = i + 1;
1842                                 if (nextp == adapter->num_rx_desc)
1843                                         nextp = 0;
1844                         }
1845                         nbuf = &rxr->rx_buffers[nextp];
1846                         prefetch(nbuf);
1847                 }
1848                 /*
1849                 ** Rather than using the fmp/lmp global pointers
1850                 ** we now keep the head of a packet chain in the
1851                 ** buffer struct and pass this along from one
1852                 ** descriptor to the next, until we get EOP.
1853                 */
1854                 mp->m_len = len;
1855                 /*
1856                 ** See if there is a stored head
1857                 ** that determines what we are
1858                 */
1859                 sendmp = rbuf->fmp;
1860                 if (sendmp != NULL) {  /* secondary frag */
1861                         rbuf->buf = rbuf->fmp = NULL;
1862                         mp->m_flags &= ~M_PKTHDR;
1863                         sendmp->m_pkthdr.len += mp->m_len;
1864                 } else {
1865                         /*
1866                          * Optimize.  This might be a small packet,
1867                          * maybe just a TCP ACK.  Do a fast copy that
1868                          * is cache aligned into a new mbuf, and
1869                          * leave the old mbuf+cluster for re-use.
1870                          */
1871                         if (eop && len <= IXGBE_RX_COPY_LEN) {
1872                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1873                                 if (sendmp != NULL) {
1874                                         sendmp->m_data +=
1875                                             IXGBE_RX_COPY_ALIGN;
1876                                         ixgbe_bcopy(mp->m_data,
1877                                             sendmp->m_data, len);
1878                                         sendmp->m_len = len;
1879                                         rxr->rx_copies++;
1880                                         rbuf->flags |= IXGBE_RX_COPY;
1881                                 }
1882                         }
1883                         if (sendmp == NULL) {
1884                                 rbuf->buf = rbuf->fmp = NULL;
1885                                 sendmp = mp;
1886                         }
1887
1888                         /* first desc of a non-ps chain */
1889                         sendmp->m_flags |= M_PKTHDR;
1890                         sendmp->m_pkthdr.len = mp->m_len;
1891                 }
1892                 ++processed;
1893
1894                 /* Pass the head pointer on */
1895                 if (eop == 0) {
1896                         nbuf->fmp = sendmp;
1897                         sendmp = NULL;
1898                         mp->m_next = nbuf->buf;
1899                 } else { /* Sending this frame */
1900                         sendmp->m_pkthdr.rcvif = ifp;
1901                         rxr->rx_packets++;
1902                         /* capture data for AIM */
1903                         rxr->bytes += sendmp->m_pkthdr.len;
1904                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1905                         /* Process vlan info */
1906                         if ((rxr->vtag_strip) &&
1907                             (staterr & IXGBE_RXD_STAT_VP))
1908                                 vtag = le16toh(cur->wb.upper.vlan);
1909                         if (vtag) {
1910                                 sendmp->m_pkthdr.ether_vtag = vtag;
1911                                 sendmp->m_flags |= M_VLANTAG;
1912                         }
1913                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1914                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
1915 #if __FreeBSD_version >= 800000
1916 #ifdef RSS
1917                         sendmp->m_pkthdr.flowid =
1918                             le32toh(cur->wb.lower.hi_dword.rss);
1919 #if __FreeBSD_version < 1100054
1920                         sendmp->m_flags |= M_FLOWID;
1921 #endif
1922                         switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1923                         case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1924                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
1925                                 break;
1926                         case IXGBE_RXDADV_RSSTYPE_IPV4:
1927                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
1928                                 break;
1929                         case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1930                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
1931                                 break;
1932                         case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1933                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
1934                                 break;
1935                         case IXGBE_RXDADV_RSSTYPE_IPV6:
1936                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
1937                                 break;
1938                         case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1939                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
1940                                 break;
1941                         case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1942                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
1943                                 break;
1944                         case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1945                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
1946                                 break;
1947                         case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1948                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
1949                                 break;
1950                         default:
1951                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1952                         }
1953 #else /* RSS */
1954                         sendmp->m_pkthdr.flowid = que->msix;
1955 #if __FreeBSD_version >= 1100054
1956                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1957 #else
1958                         sendmp->m_flags |= M_FLOWID;
1959 #endif
1960 #endif /* RSS */
1961 #endif /* FreeBSD_version */
1962                 }
1963 next_desc:
1964                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1965                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1966
1967                 /* Advance our pointers to the next descriptor. */
1968                 if (++i == rxr->num_desc)
1969                         i = 0;
1970
1971                 /* Now send to the stack or do LRO */
1972                 if (sendmp != NULL) {
1973                         rxr->next_to_check = i;
1974                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1975                         i = rxr->next_to_check;
1976                 }
1977
1978                /* Every 8 descriptors we go to refresh mbufs */
1979                 if (processed == 8) {
1980                         ixgbe_refresh_mbufs(rxr, i);
1981                         processed = 0;
1982                 }
1983         }
1984
1985         /* Refresh any remaining buf structs */
1986         if (ixgbe_rx_unrefreshed(rxr))
1987                 ixgbe_refresh_mbufs(rxr, i);
1988
1989         rxr->next_to_check = i;
1990
1991         /*
1992          * Flush any outstanding LRO work
1993          */
1994         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1995                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1996                 tcp_lro_flush(lro, queued);
1997         }
1998
1999         IXGBE_RX_UNLOCK(rxr);
2000
2001         /*
2002         ** Still have cleaning to do?
2003         */
2004         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2005                 return (TRUE);
2006         else
2007                 return (FALSE);
2008 }
2009
2010
2011 /*********************************************************************
2012  *
2013  *  Verify that the hardware indicated that the checksum is valid.
2014  *  Inform the stack about the status of checksum so that stack
2015  *  doesn't spend time verifying the checksum.
2016  *
2017  *********************************************************************/
2018 static void
2019 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2020 {
2021         u16     status = (u16) staterr;
2022         u8      errors = (u8) (staterr >> 24);
2023         bool    sctp = FALSE;
2024
2025         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2026             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2027                 sctp = TRUE;
2028
2029         if (status & IXGBE_RXD_STAT_IPCS) {
2030                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2031                         /* IP Checksum Good */
2032                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
2033                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2034
2035                 } else
2036                         mp->m_pkthdr.csum_flags = 0;
2037         }
2038         if (status & IXGBE_RXD_STAT_L4CS) {
2039                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2040 #if __FreeBSD_version >= 800000
2041                 if (sctp)
2042                         type = CSUM_SCTP_VALID;
2043 #endif
2044                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2045                         mp->m_pkthdr.csum_flags |= type;
2046                         if (!sctp)
2047                                 mp->m_pkthdr.csum_data = htons(0xffff);
2048                 } 
2049         }
2050         return;
2051 }
2052
2053 /********************************************************************
2054  * Manage DMA'able memory.
2055  *******************************************************************/
2056 static void
2057 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2058 {
2059         if (error)
2060                 return;
2061         *(bus_addr_t *) arg = segs->ds_addr;
2062         return;
2063 }
2064
2065 int
2066 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2067                 struct ixgbe_dma_alloc *dma, int mapflags)
2068 {
2069         device_t dev = adapter->dev;
2070         int             r;
2071
2072         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2073                                DBA_ALIGN, 0,    /* alignment, bounds */
2074                                BUS_SPACE_MAXADDR,       /* lowaddr */
2075                                BUS_SPACE_MAXADDR,       /* highaddr */
2076                                NULL, NULL,      /* filter, filterarg */
2077                                size,    /* maxsize */
2078                                1,       /* nsegments */
2079                                size,    /* maxsegsize */
2080                                BUS_DMA_ALLOCNOW,        /* flags */
2081                                NULL,    /* lockfunc */
2082                                NULL,    /* lockfuncarg */
2083                                &dma->dma_tag);
2084         if (r != 0) {
2085                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2086                        "error %u\n", r);
2087                 goto fail_0;
2088         }
2089         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2090                              BUS_DMA_NOWAIT, &dma->dma_map);
2091         if (r != 0) {
2092                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2093                        "error %u\n", r);
2094                 goto fail_1;
2095         }
2096         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2097                             size,
2098                             ixgbe_dmamap_cb,
2099                             &dma->dma_paddr,
2100                             mapflags | BUS_DMA_NOWAIT);
2101         if (r != 0) {
2102                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2103                        "error %u\n", r);
2104                 goto fail_2;
2105         }
2106         dma->dma_size = size;
2107         return (0);
2108 fail_2:
2109         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2110 fail_1:
2111         bus_dma_tag_destroy(dma->dma_tag);
2112 fail_0:
2113         dma->dma_tag = NULL;
2114         return (r);
2115 }
2116
2117 void
2118 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2119 {
2120         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2121             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2122         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2123         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2124         bus_dma_tag_destroy(dma->dma_tag);
2125 }
2126
2127
2128 /*********************************************************************
2129  *
2130  *  Allocate memory for the transmit and receive rings, and then
2131  *  the descriptors associated with each, called only once at attach.
2132  *
2133  **********************************************************************/
2134 int
2135 ixgbe_allocate_queues(struct adapter *adapter)
2136 {
2137         device_t        dev = adapter->dev;
2138         struct ix_queue *que;
2139         struct tx_ring  *txr;
2140         struct rx_ring  *rxr;
2141         int rsize, tsize, error = IXGBE_SUCCESS;
2142         int txconf = 0, rxconf = 0;
2143
2144         /* First allocate the top level queue structs */
2145         if (!(adapter->queues =
2146             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2147             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2148                 device_printf(dev, "Unable to allocate queue memory\n");
2149                 error = ENOMEM;
2150                 goto fail;
2151         }
2152
2153         /* First allocate the TX ring struct memory */
2154         if (!(adapter->tx_rings =
2155             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2156             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2157                 device_printf(dev, "Unable to allocate TX ring memory\n");
2158                 error = ENOMEM;
2159                 goto tx_fail;
2160         }
2161
2162         /* Next allocate the RX */
2163         if (!(adapter->rx_rings =
2164             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2165             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2166                 device_printf(dev, "Unable to allocate RX ring memory\n");
2167                 error = ENOMEM;
2168                 goto rx_fail;
2169         }
2170
2171         /* For the ring itself */
2172         tsize = roundup2(adapter->num_tx_desc *
2173             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2174
2175         /*
2176          * Now set up the TX queues, txconf is needed to handle the
2177          * possibility that things fail midcourse and we need to
2178          * undo memory gracefully
2179          */ 
2180         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2181                 /* Set up some basics */
2182                 txr = &adapter->tx_rings[i];
2183                 txr->adapter = adapter;
2184                 txr->me = i;
2185                 txr->num_desc = adapter->num_tx_desc;
2186
2187                 /* Initialize the TX side lock */
2188                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2189                     device_get_nameunit(dev), txr->me);
2190                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2191
2192                 if (ixgbe_dma_malloc(adapter, tsize,
2193                         &txr->txdma, BUS_DMA_NOWAIT)) {
2194                         device_printf(dev,
2195                             "Unable to allocate TX Descriptor memory\n");
2196                         error = ENOMEM;
2197                         goto err_tx_desc;
2198                 }
2199                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2200                 bzero((void *)txr->tx_base, tsize);
2201
2202                 /* Now allocate transmit buffers for the ring */
2203                 if (ixgbe_allocate_transmit_buffers(txr)) {
2204                         device_printf(dev,
2205                             "Critical Failure setting up transmit buffers\n");
2206                         error = ENOMEM;
2207                         goto err_tx_desc;
2208                 }
2209 #ifndef IXGBE_LEGACY_TX
2210                 /* Allocate a buf ring */
2211                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2212                     M_WAITOK, &txr->tx_mtx);
2213                 if (txr->br == NULL) {
2214                         device_printf(dev,
2215                             "Critical Failure setting up buf ring\n");
2216                         error = ENOMEM;
2217                         goto err_tx_desc;
2218                 }
2219 #endif
2220         }
2221
2222         /*
2223          * Next the RX queues...
2224          */ 
2225         rsize = roundup2(adapter->num_rx_desc *
2226             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2227         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2228                 rxr = &adapter->rx_rings[i];
2229                 /* Set up some basics */
2230                 rxr->adapter = adapter;
2231                 rxr->me = i;
2232                 rxr->num_desc = adapter->num_rx_desc;
2233
2234                 /* Initialize the RX side lock */
2235                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2236                     device_get_nameunit(dev), rxr->me);
2237                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2238
2239                 if (ixgbe_dma_malloc(adapter, rsize,
2240                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2241                         device_printf(dev,
2242                             "Unable to allocate RxDescriptor memory\n");
2243                         error = ENOMEM;
2244                         goto err_rx_desc;
2245                 }
2246                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2247                 bzero((void *)rxr->rx_base, rsize);
2248
2249                 /* Allocate receive buffers for the ring*/
2250                 if (ixgbe_allocate_receive_buffers(rxr)) {
2251                         device_printf(dev,
2252                             "Critical Failure setting up receive buffers\n");
2253                         error = ENOMEM;
2254                         goto err_rx_desc;
2255                 }
2256         }
2257
2258         /*
2259         ** Finally set up the queue holding structs
2260         */
2261         for (int i = 0; i < adapter->num_queues; i++) {
2262                 que = &adapter->queues[i];
2263                 que->adapter = adapter;
2264                 que->me = i;
2265                 que->txr = &adapter->tx_rings[i];
2266                 que->rxr = &adapter->rx_rings[i];
2267         }
2268
2269         return (0);
2270
2271 err_rx_desc:
2272         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2273                 ixgbe_dma_free(adapter, &rxr->rxdma);
2274 err_tx_desc:
2275         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2276                 ixgbe_dma_free(adapter, &txr->txdma);
2277         free(adapter->rx_rings, M_DEVBUF);
2278 rx_fail:
2279         free(adapter->tx_rings, M_DEVBUF);
2280 tx_fail:
2281         free(adapter->queues, M_DEVBUF);
2282 fail:
2283         return (error);
2284 }