]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixgbe/ix_txrx.c
MFV r289003:
[FreeBSD/FreeBSD.git] / sys / dev / ixgbe / ix_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41
42 #include "ixgbe.h"
43
44 #ifdef  RSS
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
47 #endif
48
49 #ifdef DEV_NETMAP
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
53
54 extern int ix_crcstrip;
55 #endif
56
57 /*
58 ** HW RSC control:
59 **  this feature only works with
60 **  IPv4, and only on 82599 and later.
61 **  Also this will cause IP forwarding to
62 **  fail and that can't be controlled by
63 **  the stack as LRO can. For all these
64 **  reasons I've deemed it best to leave
65 **  this off and not bother with a tuneable
66 **  interface, this would need to be compiled
67 **  to enable.
68 */
69 static bool ixgbe_rsc_enable = FALSE;
70
71 #ifdef IXGBE_FDIR
72 /*
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
77 **
78 ** This feature can be disabled by
79 ** setting this to 0.
80 */
81 static int atr_sample_rate = 20;
82 #endif
83
84 /* Shared PCI config read/write */
85 inline u16
86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
87 {
88         u16 value;
89
90         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
91             reg, 2);
92
93         return (value);
94 }
95
96 inline void
97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
98 {
99         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
100             reg, value, 2);
101
102         return;
103 }
104
105 /*********************************************************************
106  *  Local Function prototypes
107  *********************************************************************/
108 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
109 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
110 static int      ixgbe_setup_receive_ring(struct rx_ring *);
111 static void     ixgbe_free_receive_buffers(struct rx_ring *);
112
113 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
114 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
116 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
117                     struct mbuf *, u32 *, u32 *);
118 static int      ixgbe_tso_setup(struct tx_ring *,
119                     struct mbuf *, u32 *, u32 *);
120 #ifdef IXGBE_FDIR
121 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
122 #endif
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125                     struct mbuf *, u32);
126
127 #ifdef IXGBE_LEGACY_TX
128 /*********************************************************************
129  *  Transmit entry point
130  *
131  *  ixgbe_start is called by the stack to initiate a transmit.
132  *  The driver will remain in this routine as long as there are
133  *  packets to transmit and transmit resources are available.
134  *  In case resources are not available stack is notified and
135  *  the packet is requeued.
136  **********************************************************************/
137
138 void
139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
140 {
141         struct mbuf    *m_head;
142         struct adapter *adapter = txr->adapter;
143
144         IXGBE_TX_LOCK_ASSERT(txr);
145
146         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
147                 return;
148         if (!adapter->link_active)
149                 return;
150
151         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
152                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
153                         break;
154
155                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
156                 if (m_head == NULL)
157                         break;
158
159                 if (ixgbe_xmit(txr, &m_head)) {
160                         if (m_head != NULL)
161                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
162                         break;
163                 }
164                 /* Send a copy of the frame to the BPF listener */
165                 ETHER_BPF_MTAP(ifp, m_head);
166         }
167         return;
168 }
169
170 /*
171  * Legacy TX start - called by the stack, this
172  * always uses the first tx ring, and should
173  * not be used with multiqueue tx enabled.
174  */
175 void
176 ixgbe_start(struct ifnet *ifp)
177 {
178         struct adapter *adapter = ifp->if_softc;
179         struct tx_ring  *txr = adapter->tx_rings;
180
181         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
182                 IXGBE_TX_LOCK(txr);
183                 ixgbe_start_locked(txr, ifp);
184                 IXGBE_TX_UNLOCK(txr);
185         }
186         return;
187 }
188
189 #else /* ! IXGBE_LEGACY_TX */
190
191 /*
192 ** Multiqueue Transmit driver
193 **
194 */
195 int
196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
197 {
198         struct adapter  *adapter = ifp->if_softc;
199         struct ix_queue *que;
200         struct tx_ring  *txr;
201         int             i, err = 0;
202 #ifdef  RSS
203         uint32_t bucket_id;
204 #endif
205
206         /*
207          * When doing RSS, map it to the same outbound queue
208          * as the incoming flow would be mapped to.
209          *
210          * If everything is setup correctly, it should be the
211          * same bucket that the current CPU we're on is.
212          */
213         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
214 #ifdef  RSS
215                 if (rss_hash2bucket(m->m_pkthdr.flowid,
216                     M_HASHTYPE_GET(m), &bucket_id) == 0)
217                         /* TODO: spit out something if bucket_id > num_queues? */
218                         i = bucket_id % adapter->num_queues;
219                 else 
220 #endif
221                         i = m->m_pkthdr.flowid % adapter->num_queues;
222         } else
223                 i = curcpu % adapter->num_queues;
224
225         /* Check for a hung queue and pick alternative */
226         if (((1 << i) & adapter->active_queues) == 0)
227                 i = ffsl(adapter->active_queues);
228
229         txr = &adapter->tx_rings[i];
230         que = &adapter->queues[i];
231
232         err = drbr_enqueue(ifp, txr->br, m);
233         if (err)
234                 return (err);
235         if (IXGBE_TX_TRYLOCK(txr)) {
236                 ixgbe_mq_start_locked(ifp, txr);
237                 IXGBE_TX_UNLOCK(txr);
238         } else
239                 taskqueue_enqueue(que->tq, &txr->txq_task);
240
241         return (0);
242 }
243
244 int
245 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
246 {
247         struct adapter  *adapter = txr->adapter;
248         struct mbuf     *next;
249         int             enqueued = 0, err = 0;
250
251         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
252             adapter->link_active == 0)
253                 return (ENETDOWN);
254
255         /* Process the queue */
256 #if __FreeBSD_version < 901504
257         next = drbr_dequeue(ifp, txr->br);
258         while (next != NULL) {
259                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
260                         if (next != NULL)
261                                 err = drbr_enqueue(ifp, txr->br, next);
262 #else
263         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
264                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
265                         if (next == NULL) {
266                                 drbr_advance(ifp, txr->br);
267                         } else {
268                                 drbr_putback(ifp, txr->br, next);
269                         }
270 #endif
271                         break;
272                 }
273 #if __FreeBSD_version >= 901504
274                 drbr_advance(ifp, txr->br);
275 #endif
276                 enqueued++;
277 #if 0 // this is VF-only
278 #if __FreeBSD_version >= 1100036
279                 /*
280                  * Since we're looking at the tx ring, we can check
281                  * to see if we're a VF by examing our tail register
282                  * address.
283                  */
284                 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
285                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
286 #endif
287 #endif
288                 /* Send a copy of the frame to the BPF listener */
289                 ETHER_BPF_MTAP(ifp, next);
290                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
291                         break;
292 #if __FreeBSD_version < 901504
293                 next = drbr_dequeue(ifp, txr->br);
294 #endif
295         }
296
297         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
298                 ixgbe_txeof(txr);
299
300         return (err);
301 }
302
303 /*
304  * Called from a taskqueue to drain queued transmit packets.
305  */
306 void
307 ixgbe_deferred_mq_start(void *arg, int pending)
308 {
309         struct tx_ring *txr = arg;
310         struct adapter *adapter = txr->adapter;
311         struct ifnet *ifp = adapter->ifp;
312
313         IXGBE_TX_LOCK(txr);
314         if (!drbr_empty(ifp, txr->br))
315                 ixgbe_mq_start_locked(ifp, txr);
316         IXGBE_TX_UNLOCK(txr);
317 }
318
319 /*
320  * Flush all ring buffers
321  */
322 void
323 ixgbe_qflush(struct ifnet *ifp)
324 {
325         struct adapter  *adapter = ifp->if_softc;
326         struct tx_ring  *txr = adapter->tx_rings;
327         struct mbuf     *m;
328
329         for (int i = 0; i < adapter->num_queues; i++, txr++) {
330                 IXGBE_TX_LOCK(txr);
331                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
332                         m_freem(m);
333                 IXGBE_TX_UNLOCK(txr);
334         }
335         if_qflush(ifp);
336 }
337 #endif /* IXGBE_LEGACY_TX */
338
339
340 /*********************************************************************
341  *
342  *  This routine maps the mbufs to tx descriptors, allowing the
343  *  TX engine to transmit the packets. 
344  *      - return 0 on success, positive on failure
345  *
346  **********************************************************************/
347
348 static int
349 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
350 {
351         struct adapter  *adapter = txr->adapter;
352         u32             olinfo_status = 0, cmd_type_len;
353         int             i, j, error, nsegs;
354         int             first;
355         bool            remap = TRUE;
356         struct mbuf     *m_head;
357         bus_dma_segment_t segs[adapter->num_segs];
358         bus_dmamap_t    map;
359         struct ixgbe_tx_buf *txbuf;
360         union ixgbe_adv_tx_desc *txd = NULL;
361
362         m_head = *m_headp;
363
364         /* Basic descriptor defines */
365         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
366             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
367
368         if (m_head->m_flags & M_VLANTAG)
369                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
370
371         /*
372          * Important to capture the first descriptor
373          * used because it will contain the index of
374          * the one we tell the hardware to report back
375          */
376         first = txr->next_avail_desc;
377         txbuf = &txr->tx_buffers[first];
378         map = txbuf->map;
379
380         /*
381          * Map the packet for DMA.
382          */
383 retry:
384         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
385             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
386
387         if (__predict_false(error)) {
388                 struct mbuf *m;
389
390                 switch (error) {
391                 case EFBIG:
392                         /* Try it again? - one try */
393                         if (remap == TRUE) {
394                                 remap = FALSE;
395                                 /*
396                                  * XXX: m_defrag will choke on
397                                  * non-MCLBYTES-sized clusters
398                                  */
399                                 m = m_defrag(*m_headp, M_NOWAIT);
400                                 if (m == NULL) {
401                                         adapter->mbuf_defrag_failed++;
402                                         m_freem(*m_headp);
403                                         *m_headp = NULL;
404                                         return (ENOBUFS);
405                                 }
406                                 *m_headp = m;
407                                 goto retry;
408                         } else
409                                 return (error);
410                 case ENOMEM:
411                         txr->no_tx_dma_setup++;
412                         return (error);
413                 default:
414                         txr->no_tx_dma_setup++;
415                         m_freem(*m_headp);
416                         *m_headp = NULL;
417                         return (error);
418                 }
419         }
420
421         /* Make certain there are enough descriptors */
422         if (nsegs > txr->tx_avail - 2) {
423                 txr->no_desc_avail++;
424                 bus_dmamap_unload(txr->txtag, map);
425                 return (ENOBUFS);
426         }
427         m_head = *m_headp;
428
429         /*
430          * Set up the appropriate offload context
431          * this will consume the first descriptor
432          */
433         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
434         if (__predict_false(error)) {
435                 if (error == ENOBUFS)
436                         *m_headp = NULL;
437                 return (error);
438         }
439
440 #ifdef IXGBE_FDIR
441         /* Do the flow director magic */
442         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
443                 ++txr->atr_count;
444                 if (txr->atr_count >= atr_sample_rate) {
445                         ixgbe_atr(txr, m_head);
446                         txr->atr_count = 0;
447                 }
448         }
449 #endif
450
451         i = txr->next_avail_desc;
452         for (j = 0; j < nsegs; j++) {
453                 bus_size_t seglen;
454                 bus_addr_t segaddr;
455
456                 txbuf = &txr->tx_buffers[i];
457                 txd = &txr->tx_base[i];
458                 seglen = segs[j].ds_len;
459                 segaddr = htole64(segs[j].ds_addr);
460
461                 txd->read.buffer_addr = segaddr;
462                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
463                     cmd_type_len |seglen);
464                 txd->read.olinfo_status = htole32(olinfo_status);
465
466                 if (++i == txr->num_desc)
467                         i = 0;
468         }
469
470         txd->read.cmd_type_len |=
471             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
472         txr->tx_avail -= nsegs;
473         txr->next_avail_desc = i;
474
475         txbuf->m_head = m_head;
476         /*
477          * Here we swap the map so the last descriptor,
478          * which gets the completion interrupt has the
479          * real map, and the first descriptor gets the
480          * unused map from this descriptor.
481          */
482         txr->tx_buffers[first].map = txbuf->map;
483         txbuf->map = map;
484         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
485
486         /* Set the EOP descriptor that will be marked done */
487         txbuf = &txr->tx_buffers[first];
488         txbuf->eop = txd;
489
490         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
491             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
492         /*
493          * Advance the Transmit Descriptor Tail (Tdt), this tells the
494          * hardware that this frame is available to transmit.
495          */
496         ++txr->total_packets;
497         IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
498
499         /* Mark queue as having work */
500         if (txr->busy == 0)
501                 txr->busy = 1;
502
503         return (0);
504 }
505
506
507 /*********************************************************************
508  *
509  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
510  *  the information needed to transmit a packet on the wire. This is
511  *  called only once at attach, setup is done every reset.
512  *
513  **********************************************************************/
514 int
515 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
516 {
517         struct adapter *adapter = txr->adapter;
518         device_t dev = adapter->dev;
519         struct ixgbe_tx_buf *txbuf;
520         int error, i;
521
522         /*
523          * Setup DMA descriptor areas.
524          */
525         if ((error = bus_dma_tag_create(
526                                bus_get_dma_tag(adapter->dev),   /* parent */
527                                1, 0,            /* alignment, bounds */
528                                BUS_SPACE_MAXADDR,       /* lowaddr */
529                                BUS_SPACE_MAXADDR,       /* highaddr */
530                                NULL, NULL,              /* filter, filterarg */
531                                IXGBE_TSO_SIZE,          /* maxsize */
532                                adapter->num_segs,       /* nsegments */
533                                PAGE_SIZE,               /* maxsegsize */
534                                0,                       /* flags */
535                                NULL,                    /* lockfunc */
536                                NULL,                    /* lockfuncarg */
537                                &txr->txtag))) {
538                 device_printf(dev,"Unable to allocate TX DMA tag\n");
539                 goto fail;
540         }
541
542         if (!(txr->tx_buffers =
543             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
544             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
545                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
546                 error = ENOMEM;
547                 goto fail;
548         }
549
550         /* Create the descriptor buffer dma maps */
551         txbuf = txr->tx_buffers;
552         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
553                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
554                 if (error != 0) {
555                         device_printf(dev, "Unable to create TX DMA map\n");
556                         goto fail;
557                 }
558         }
559
560         return 0;
561 fail:
562         /* We free all, it handles case where we are in the middle */
563         ixgbe_free_transmit_structures(adapter);
564         return (error);
565 }
566
567 /*********************************************************************
568  *
569  *  Initialize a transmit ring.
570  *
571  **********************************************************************/
572 static void
573 ixgbe_setup_transmit_ring(struct tx_ring *txr)
574 {
575         struct adapter *adapter = txr->adapter;
576         struct ixgbe_tx_buf *txbuf;
577 #ifdef DEV_NETMAP
578         struct netmap_adapter *na = NA(adapter->ifp);
579         struct netmap_slot *slot;
580 #endif /* DEV_NETMAP */
581
582         /* Clear the old ring contents */
583         IXGBE_TX_LOCK(txr);
584 #ifdef DEV_NETMAP
585         /*
586          * (under lock): if in netmap mode, do some consistency
587          * checks and set slot to entry 0 of the netmap ring.
588          */
589         slot = netmap_reset(na, NR_TX, txr->me, 0);
590 #endif /* DEV_NETMAP */
591         bzero((void *)txr->tx_base,
592               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
593         /* Reset indices */
594         txr->next_avail_desc = 0;
595         txr->next_to_clean = 0;
596
597         /* Free any existing tx buffers. */
598         txbuf = txr->tx_buffers;
599         for (int i = 0; i < txr->num_desc; i++, txbuf++) {
600                 if (txbuf->m_head != NULL) {
601                         bus_dmamap_sync(txr->txtag, txbuf->map,
602                             BUS_DMASYNC_POSTWRITE);
603                         bus_dmamap_unload(txr->txtag, txbuf->map);
604                         m_freem(txbuf->m_head);
605                         txbuf->m_head = NULL;
606                 }
607 #ifdef DEV_NETMAP
608                 /*
609                  * In netmap mode, set the map for the packet buffer.
610                  * NOTE: Some drivers (not this one) also need to set
611                  * the physical buffer address in the NIC ring.
612                  * Slots in the netmap ring (indexed by "si") are
613                  * kring->nkr_hwofs positions "ahead" wrt the
614                  * corresponding slot in the NIC ring. In some drivers
615                  * (not here) nkr_hwofs can be negative. Function
616                  * netmap_idx_n2k() handles wraparounds properly.
617                  */
618                 if (slot) {
619                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
620                         netmap_load_map(na, txr->txtag,
621                             txbuf->map, NMB(na, slot + si));
622                 }
623 #endif /* DEV_NETMAP */
624                 /* Clear the EOP descriptor pointer */
625                 txbuf->eop = NULL;
626         }
627
628 #ifdef IXGBE_FDIR
629         /* Set the rate at which we sample packets */
630         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
631                 txr->atr_sample = atr_sample_rate;
632 #endif
633
634         /* Set number of descriptors available */
635         txr->tx_avail = adapter->num_tx_desc;
636
637         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
638             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
639         IXGBE_TX_UNLOCK(txr);
640 }
641
642 /*********************************************************************
643  *
644  *  Initialize all transmit rings.
645  *
646  **********************************************************************/
647 int
648 ixgbe_setup_transmit_structures(struct adapter *adapter)
649 {
650         struct tx_ring *txr = adapter->tx_rings;
651
652         for (int i = 0; i < adapter->num_queues; i++, txr++)
653                 ixgbe_setup_transmit_ring(txr);
654
655         return (0);
656 }
657
658 /*********************************************************************
659  *
660  *  Free all transmit rings.
661  *
662  **********************************************************************/
663 void
664 ixgbe_free_transmit_structures(struct adapter *adapter)
665 {
666         struct tx_ring *txr = adapter->tx_rings;
667
668         for (int i = 0; i < adapter->num_queues; i++, txr++) {
669                 IXGBE_TX_LOCK(txr);
670                 ixgbe_free_transmit_buffers(txr);
671                 ixgbe_dma_free(adapter, &txr->txdma);
672                 IXGBE_TX_UNLOCK(txr);
673                 IXGBE_TX_LOCK_DESTROY(txr);
674         }
675         free(adapter->tx_rings, M_DEVBUF);
676 }
677
678 /*********************************************************************
679  *
680  *  Free transmit ring related data structures.
681  *
682  **********************************************************************/
683 static void
684 ixgbe_free_transmit_buffers(struct tx_ring *txr)
685 {
686         struct adapter *adapter = txr->adapter;
687         struct ixgbe_tx_buf *tx_buffer;
688         int             i;
689
690         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
691
692         if (txr->tx_buffers == NULL)
693                 return;
694
695         tx_buffer = txr->tx_buffers;
696         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
697                 if (tx_buffer->m_head != NULL) {
698                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
699                             BUS_DMASYNC_POSTWRITE);
700                         bus_dmamap_unload(txr->txtag,
701                             tx_buffer->map);
702                         m_freem(tx_buffer->m_head);
703                         tx_buffer->m_head = NULL;
704                         if (tx_buffer->map != NULL) {
705                                 bus_dmamap_destroy(txr->txtag,
706                                     tx_buffer->map);
707                                 tx_buffer->map = NULL;
708                         }
709                 } else if (tx_buffer->map != NULL) {
710                         bus_dmamap_unload(txr->txtag,
711                             tx_buffer->map);
712                         bus_dmamap_destroy(txr->txtag,
713                             tx_buffer->map);
714                         tx_buffer->map = NULL;
715                 }
716         }
717 #ifdef IXGBE_LEGACY_TX
718         if (txr->br != NULL)
719                 buf_ring_free(txr->br, M_DEVBUF);
720 #endif
721         if (txr->tx_buffers != NULL) {
722                 free(txr->tx_buffers, M_DEVBUF);
723                 txr->tx_buffers = NULL;
724         }
725         if (txr->txtag != NULL) {
726                 bus_dma_tag_destroy(txr->txtag);
727                 txr->txtag = NULL;
728         }
729         return;
730 }
731
732 /*********************************************************************
733  *
734  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
735  *
736  **********************************************************************/
737
738 static int
739 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
740     u32 *cmd_type_len, u32 *olinfo_status)
741 {
742         struct adapter *adapter = txr->adapter;
743         struct ixgbe_adv_tx_context_desc *TXD;
744         struct ether_vlan_header *eh;
745         struct ip *ip;
746         struct ip6_hdr *ip6;
747         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
748         int     ehdrlen, ip_hlen = 0;
749         u16     etype;
750         u8      ipproto = 0;
751         int     offload = TRUE;
752         int     ctxd = txr->next_avail_desc;
753         u16     vtag = 0;
754
755         /* First check if TSO is to be used */
756         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
757                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
758
759         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
760                 offload = FALSE;
761
762         /* Indicate the whole packet as payload when not doing TSO */
763         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
764
765         /* Now ready a context descriptor */
766         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
767
768         /*
769         ** In advanced descriptors the vlan tag must 
770         ** be placed into the context descriptor. Hence
771         ** we need to make one even if not doing offloads.
772         */
773         if (mp->m_flags & M_VLANTAG) {
774                 vtag = htole16(mp->m_pkthdr.ether_vtag);
775                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
776         } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
777                 return (0);
778
779         /*
780          * Determine where frame payload starts.
781          * Jump over vlan headers if already present,
782          * helpful for QinQ too.
783          */
784         eh = mtod(mp, struct ether_vlan_header *);
785         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
786                 etype = ntohs(eh->evl_proto);
787                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
788         } else {
789                 etype = ntohs(eh->evl_encap_proto);
790                 ehdrlen = ETHER_HDR_LEN;
791         }
792
793         /* Set the ether header length */
794         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
795
796         if (offload == FALSE)
797                 goto no_offloads;
798
799         switch (etype) {
800                 case ETHERTYPE_IP:
801                         ip = (struct ip *)(mp->m_data + ehdrlen);
802                         ip_hlen = ip->ip_hl << 2;
803                         ipproto = ip->ip_p;
804                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
805                         break;
806                 case ETHERTYPE_IPV6:
807                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
808                         ip_hlen = sizeof(struct ip6_hdr);
809                         /* XXX-BZ this will go badly in case of ext hdrs. */
810                         ipproto = ip6->ip6_nxt;
811                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
812                         break;
813                 default:
814                         offload = FALSE;
815                         break;
816         }
817
818         vlan_macip_lens |= ip_hlen;
819
820         switch (ipproto) {
821                 case IPPROTO_TCP:
822                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
823                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
824                         break;
825
826                 case IPPROTO_UDP:
827                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
828                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
829                         break;
830
831 #if __FreeBSD_version >= 800000
832                 case IPPROTO_SCTP:
833                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
834                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
835                         break;
836 #endif
837                 default:
838                         offload = FALSE;
839                         break;
840         }
841
842         if (offload) /* For the TX descriptor setup */
843                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
844
845 no_offloads:
846         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
847
848         /* Now copy bits into descriptor */
849         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
850         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
851         TXD->seqnum_seed = htole32(0);
852         TXD->mss_l4len_idx = htole32(0);
853
854         /* We've consumed the first desc, adjust counters */
855         if (++ctxd == txr->num_desc)
856                 ctxd = 0;
857         txr->next_avail_desc = ctxd;
858         --txr->tx_avail;
859
860         return (0);
861 }
862
863 /**********************************************************************
864  *
865  *  Setup work for hardware segmentation offload (TSO) on
866  *  adapters using advanced tx descriptors
867  *
868  **********************************************************************/
869 static int
870 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
871     u32 *cmd_type_len, u32 *olinfo_status)
872 {
873         struct ixgbe_adv_tx_context_desc *TXD;
874         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
875         u32 mss_l4len_idx = 0, paylen;
876         u16 vtag = 0, eh_type;
877         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
878         struct ether_vlan_header *eh;
879 #ifdef INET6
880         struct ip6_hdr *ip6;
881 #endif
882 #ifdef INET
883         struct ip *ip;
884 #endif
885         struct tcphdr *th;
886
887
888         /*
889          * Determine where frame payload starts.
890          * Jump over vlan headers if already present
891          */
892         eh = mtod(mp, struct ether_vlan_header *);
893         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
894                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
895                 eh_type = eh->evl_proto;
896         } else {
897                 ehdrlen = ETHER_HDR_LEN;
898                 eh_type = eh->evl_encap_proto;
899         }
900
901         switch (ntohs(eh_type)) {
902 #ifdef INET6
903         case ETHERTYPE_IPV6:
904                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
905                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
906                 if (ip6->ip6_nxt != IPPROTO_TCP)
907                         return (ENXIO);
908                 ip_hlen = sizeof(struct ip6_hdr);
909                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
910                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
911                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
912                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
913                 break;
914 #endif
915 #ifdef INET
916         case ETHERTYPE_IP:
917                 ip = (struct ip *)(mp->m_data + ehdrlen);
918                 if (ip->ip_p != IPPROTO_TCP)
919                         return (ENXIO);
920                 ip->ip_sum = 0;
921                 ip_hlen = ip->ip_hl << 2;
922                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
923                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
924                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
925                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
926                 /* Tell transmit desc to also do IPv4 checksum. */
927                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
928                 break;
929 #endif
930         default:
931                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
932                     __func__, ntohs(eh_type));
933                 break;
934         }
935
936         ctxd = txr->next_avail_desc;
937         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
938
939         tcp_hlen = th->th_off << 2;
940
941         /* This is used in the transmit desc in encap */
942         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
943
944         /* VLAN MACLEN IPLEN */
945         if (mp->m_flags & M_VLANTAG) {
946                 vtag = htole16(mp->m_pkthdr.ether_vtag);
947                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
948         }
949
950         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
951         vlan_macip_lens |= ip_hlen;
952         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
953
954         /* ADV DTYPE TUCMD */
955         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
956         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
957         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
958
959         /* MSS L4LEN IDX */
960         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
961         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
962         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
963
964         TXD->seqnum_seed = htole32(0);
965
966         if (++ctxd == txr->num_desc)
967                 ctxd = 0;
968
969         txr->tx_avail--;
970         txr->next_avail_desc = ctxd;
971         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
972         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
973         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
974         ++txr->tso_tx;
975         return (0);
976 }
977
978
979 /**********************************************************************
980  *
981  *  Examine each tx_buffer in the used queue. If the hardware is done
982  *  processing the packet then free associated resources. The
983  *  tx_buffer is put back on the free queue.
984  *
985  **********************************************************************/
986 void
987 ixgbe_txeof(struct tx_ring *txr)
988 {
989         struct adapter          *adapter = txr->adapter;
990 #ifdef DEV_NETMAP
991         struct ifnet            *ifp = adapter->ifp;
992 #endif
993         u32                     work, processed = 0;
994         u32                     limit = adapter->tx_process_limit;
995         struct ixgbe_tx_buf     *buf;
996         union ixgbe_adv_tx_desc *txd;
997
998         mtx_assert(&txr->tx_mtx, MA_OWNED);
999
1000 #ifdef DEV_NETMAP
1001         if (ifp->if_capenable & IFCAP_NETMAP) {
1002                 struct netmap_adapter *na = NA(ifp);
1003                 struct netmap_kring *kring = &na->tx_rings[txr->me];
1004                 txd = txr->tx_base;
1005                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1006                     BUS_DMASYNC_POSTREAD);
1007                 /*
1008                  * In netmap mode, all the work is done in the context
1009                  * of the client thread. Interrupt handlers only wake up
1010                  * clients, which may be sleeping on individual rings
1011                  * or on a global resource for all rings.
1012                  * To implement tx interrupt mitigation, we wake up the client
1013                  * thread roughly every half ring, even if the NIC interrupts
1014                  * more frequently. This is implemented as follows:
1015                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
1016                  *   the slot that should wake up the thread (nkr_num_slots
1017                  *   means the user thread should not be woken up);
1018                  * - the driver ignores tx interrupts unless netmap_mitigate=0
1019                  *   or the slot has the DD bit set.
1020                  */
1021                 if (!netmap_mitigate ||
1022                     (kring->nr_kflags < kring->nkr_num_slots &&
1023                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1024                         netmap_tx_irq(ifp, txr->me);
1025                 }
1026                 return;
1027         }
1028 #endif /* DEV_NETMAP */
1029
1030         if (txr->tx_avail == txr->num_desc) {
1031                 txr->busy = 0;
1032                 return;
1033         }
1034
1035         /* Get work starting point */
1036         work = txr->next_to_clean;
1037         buf = &txr->tx_buffers[work];
1038         txd = &txr->tx_base[work];
1039         work -= txr->num_desc; /* The distance to ring end */
1040         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1041             BUS_DMASYNC_POSTREAD);
1042
1043         do {
1044                 union ixgbe_adv_tx_desc *eop= buf->eop;
1045                 if (eop == NULL) /* No work */
1046                         break;
1047
1048                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1049                         break;  /* I/O not complete */
1050
1051                 if (buf->m_head) {
1052                         txr->bytes +=
1053                             buf->m_head->m_pkthdr.len;
1054                         bus_dmamap_sync(txr->txtag,
1055                             buf->map,
1056                             BUS_DMASYNC_POSTWRITE);
1057                         bus_dmamap_unload(txr->txtag,
1058                             buf->map);
1059                         m_freem(buf->m_head);
1060                         buf->m_head = NULL;
1061                 }
1062                 buf->eop = NULL;
1063                 ++txr->tx_avail;
1064
1065                 /* We clean the range if multi segment */
1066                 while (txd != eop) {
1067                         ++txd;
1068                         ++buf;
1069                         ++work;
1070                         /* wrap the ring? */
1071                         if (__predict_false(!work)) {
1072                                 work -= txr->num_desc;
1073                                 buf = txr->tx_buffers;
1074                                 txd = txr->tx_base;
1075                         }
1076                         if (buf->m_head) {
1077                                 txr->bytes +=
1078                                     buf->m_head->m_pkthdr.len;
1079                                 bus_dmamap_sync(txr->txtag,
1080                                     buf->map,
1081                                     BUS_DMASYNC_POSTWRITE);
1082                                 bus_dmamap_unload(txr->txtag,
1083                                     buf->map);
1084                                 m_freem(buf->m_head);
1085                                 buf->m_head = NULL;
1086                         }
1087                         ++txr->tx_avail;
1088                         buf->eop = NULL;
1089
1090                 }
1091                 ++txr->packets;
1092                 ++processed;
1093
1094                 /* Try the next packet */
1095                 ++txd;
1096                 ++buf;
1097                 ++work;
1098                 /* reset with a wrap */
1099                 if (__predict_false(!work)) {
1100                         work -= txr->num_desc;
1101                         buf = txr->tx_buffers;
1102                         txd = txr->tx_base;
1103                 }
1104                 prefetch(txd);
1105         } while (__predict_true(--limit));
1106
1107         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1108             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1109
1110         work += txr->num_desc;
1111         txr->next_to_clean = work;
1112
1113         /*
1114         ** Queue Hang detection, we know there's
1115         ** work outstanding or the first return
1116         ** would have been taken, so increment busy
1117         ** if nothing managed to get cleaned, then
1118         ** in local_timer it will be checked and 
1119         ** marked as HUNG if it exceeds a MAX attempt.
1120         */
1121         if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1122                 ++txr->busy;
1123         /*
1124         ** If anything gets cleaned we reset state to 1,
1125         ** note this will turn off HUNG if its set.
1126         */
1127         if (processed)
1128                 txr->busy = 1;
1129
1130         if (txr->tx_avail == txr->num_desc)
1131                 txr->busy = 0;
1132
1133         return;
1134 }
1135
1136
1137 #ifdef IXGBE_FDIR
1138 /*
1139 ** This routine parses packet headers so that Flow
1140 ** Director can make a hashed filter table entry 
1141 ** allowing traffic flows to be identified and kept
1142 ** on the same cpu.  This would be a performance
1143 ** hit, but we only do it at IXGBE_FDIR_RATE of
1144 ** packets.
1145 */
1146 static void
1147 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1148 {
1149         struct adapter                  *adapter = txr->adapter;
1150         struct ix_queue                 *que;
1151         struct ip                       *ip;
1152         struct tcphdr                   *th;
1153         struct udphdr                   *uh;
1154         struct ether_vlan_header        *eh;
1155         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
1156         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
1157         int                             ehdrlen, ip_hlen;
1158         u16                             etype;
1159
1160         eh = mtod(mp, struct ether_vlan_header *);
1161         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1162                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1163                 etype = eh->evl_proto;
1164         } else {
1165                 ehdrlen = ETHER_HDR_LEN;
1166                 etype = eh->evl_encap_proto;
1167         }
1168
1169         /* Only handling IPv4 */
1170         if (etype != htons(ETHERTYPE_IP))
1171                 return;
1172
1173         ip = (struct ip *)(mp->m_data + ehdrlen);
1174         ip_hlen = ip->ip_hl << 2;
1175
1176         /* check if we're UDP or TCP */
1177         switch (ip->ip_p) {
1178         case IPPROTO_TCP:
1179                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1180                 /* src and dst are inverted */
1181                 common.port.dst ^= th->th_sport;
1182                 common.port.src ^= th->th_dport;
1183                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1184                 break;
1185         case IPPROTO_UDP:
1186                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1187                 /* src and dst are inverted */
1188                 common.port.dst ^= uh->uh_sport;
1189                 common.port.src ^= uh->uh_dport;
1190                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1191                 break;
1192         default:
1193                 return;
1194         }
1195
1196         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1197         if (mp->m_pkthdr.ether_vtag)
1198                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1199         else
1200                 common.flex_bytes ^= etype;
1201         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1202
1203         que = &adapter->queues[txr->me];
1204         /*
1205         ** This assumes the Rx queue and Tx
1206         ** queue are bound to the same CPU
1207         */
1208         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1209             input, common, que->msix);
1210 }
1211 #endif /* IXGBE_FDIR */
1212
1213 /*
1214 ** Used to detect a descriptor that has
1215 ** been merged by Hardware RSC.
1216 */
1217 static inline u32
1218 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1219 {
1220         return (le32toh(rx->wb.lower.lo_dword.data) &
1221             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1222 }
1223
1224 /*********************************************************************
1225  *
1226  *  Initialize Hardware RSC (LRO) feature on 82599
1227  *  for an RX ring, this is toggled by the LRO capability
1228  *  even though it is transparent to the stack.
1229  *
1230  *  NOTE: since this HW feature only works with IPV4 and 
1231  *        our testing has shown soft LRO to be as effective
1232  *        I have decided to disable this by default.
1233  *
1234  **********************************************************************/
1235 static void
1236 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1237 {
1238         struct  adapter         *adapter = rxr->adapter;
1239         struct  ixgbe_hw        *hw = &adapter->hw;
1240         u32                     rscctrl, rdrxctl;
1241
1242         /* If turning LRO/RSC off we need to disable it */
1243         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1244                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1245                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1246                 return;
1247         }
1248
1249         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1250         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1251 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1252         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1253 #endif /* DEV_NETMAP */
1254         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1255         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1256         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1257
1258         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1259         rscctrl |= IXGBE_RSCCTL_RSCEN;
1260         /*
1261         ** Limit the total number of descriptors that
1262         ** can be combined, so it does not exceed 64K
1263         */
1264         if (rxr->mbuf_sz == MCLBYTES)
1265                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1266         else if (rxr->mbuf_sz == MJUMPAGESIZE)
1267                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1268         else if (rxr->mbuf_sz == MJUM9BYTES)
1269                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1270         else  /* Using 16K cluster */
1271                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1272
1273         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1274
1275         /* Enable TCP header recognition */
1276         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1277             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1278             IXGBE_PSRTYPE_TCPHDR));
1279
1280         /* Disable RSC for ACK packets */
1281         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1282             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1283
1284         rxr->hw_rsc = TRUE;
1285 }
1286 /*********************************************************************
1287  *
1288  *  Refresh mbuf buffers for RX descriptor rings
1289  *   - now keeps its own state so discards due to resource
1290  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1291  *     it just returns, keeping its placeholder, thus it can simply
1292  *     be recalled to try again.
1293  *
1294  **********************************************************************/
1295 static void
1296 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1297 {
1298         struct adapter          *adapter = rxr->adapter;
1299         bus_dma_segment_t       seg[1];
1300         struct ixgbe_rx_buf     *rxbuf;
1301         struct mbuf             *mp;
1302         int                     i, j, nsegs, error;
1303         bool                    refreshed = FALSE;
1304
1305         i = j = rxr->next_to_refresh;
1306         /* Control the loop with one beyond */
1307         if (++j == rxr->num_desc)
1308                 j = 0;
1309
1310         while (j != limit) {
1311                 rxbuf = &rxr->rx_buffers[i];
1312                 if (rxbuf->buf == NULL) {
1313                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1314                             M_PKTHDR, rxr->mbuf_sz);
1315                         if (mp == NULL)
1316                                 goto update;
1317                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1318                                 m_adj(mp, ETHER_ALIGN);
1319                 } else
1320                         mp = rxbuf->buf;
1321
1322                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1323
1324                 /* If we're dealing with an mbuf that was copied rather
1325                  * than replaced, there's no need to go through busdma.
1326                  */
1327                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1328                         /* Get the memory mapping */
1329                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1330                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1331                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1332                         if (error != 0) {
1333                                 printf("Refresh mbufs: payload dmamap load"
1334                                     " failure - %d\n", error);
1335                                 m_free(mp);
1336                                 rxbuf->buf = NULL;
1337                                 goto update;
1338                         }
1339                         rxbuf->buf = mp;
1340                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1341                             BUS_DMASYNC_PREREAD);
1342                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1343                             htole64(seg[0].ds_addr);
1344                 } else {
1345                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1346                         rxbuf->flags &= ~IXGBE_RX_COPY;
1347                 }
1348
1349                 refreshed = TRUE;
1350                 /* Next is precalculated */
1351                 i = j;
1352                 rxr->next_to_refresh = i;
1353                 if (++j == rxr->num_desc)
1354                         j = 0;
1355         }
1356 update:
1357         if (refreshed) /* Update hardware tail index */
1358                 IXGBE_WRITE_REG(&adapter->hw,
1359                     rxr->tail, rxr->next_to_refresh);
1360         return;
1361 }
1362
1363 /*********************************************************************
1364  *
1365  *  Allocate memory for rx_buffer structures. Since we use one
1366  *  rx_buffer per received packet, the maximum number of rx_buffer's
1367  *  that we'll need is equal to the number of receive descriptors
1368  *  that we've allocated.
1369  *
1370  **********************************************************************/
1371 int
1372 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1373 {
1374         struct  adapter         *adapter = rxr->adapter;
1375         device_t                dev = adapter->dev;
1376         struct ixgbe_rx_buf     *rxbuf;
1377         int                     bsize, error;
1378
1379         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1380         if (!(rxr->rx_buffers =
1381             (struct ixgbe_rx_buf *) malloc(bsize,
1382             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1383                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1384                 error = ENOMEM;
1385                 goto fail;
1386         }
1387
1388         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1389                                    1, 0,        /* alignment, bounds */
1390                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1391                                    BUS_SPACE_MAXADDR,   /* highaddr */
1392                                    NULL, NULL,          /* filter, filterarg */
1393                                    MJUM16BYTES,         /* maxsize */
1394                                    1,                   /* nsegments */
1395                                    MJUM16BYTES,         /* maxsegsize */
1396                                    0,                   /* flags */
1397                                    NULL,                /* lockfunc */
1398                                    NULL,                /* lockfuncarg */
1399                                    &rxr->ptag))) {
1400                 device_printf(dev, "Unable to create RX DMA tag\n");
1401                 goto fail;
1402         }
1403
1404         for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1405                 rxbuf = &rxr->rx_buffers[i];
1406                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1407                 if (error) {
1408                         device_printf(dev, "Unable to create RX dma map\n");
1409                         goto fail;
1410                 }
1411         }
1412
1413         return (0);
1414
1415 fail:
1416         /* Frees all, but can handle partial completion */
1417         ixgbe_free_receive_structures(adapter);
1418         return (error);
1419 }
1420
1421
1422 static void     
1423 ixgbe_free_receive_ring(struct rx_ring *rxr)
1424
1425         struct ixgbe_rx_buf       *rxbuf;
1426
1427         for (int i = 0; i < rxr->num_desc; i++) {
1428                 rxbuf = &rxr->rx_buffers[i];
1429                 if (rxbuf->buf != NULL) {
1430                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1431                             BUS_DMASYNC_POSTREAD);
1432                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1433                         rxbuf->buf->m_flags |= M_PKTHDR;
1434                         m_freem(rxbuf->buf);
1435                         rxbuf->buf = NULL;
1436                         rxbuf->flags = 0;
1437                 }
1438         }
1439 }
1440
1441
1442 /*********************************************************************
1443  *
1444  *  Initialize a receive ring and its buffers.
1445  *
1446  **********************************************************************/
1447 static int
1448 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1449 {
1450         struct  adapter         *adapter;
1451         struct ifnet            *ifp;
1452         device_t                dev;
1453         struct ixgbe_rx_buf     *rxbuf;
1454         bus_dma_segment_t       seg[1];
1455         struct lro_ctrl         *lro = &rxr->lro;
1456         int                     rsize, nsegs, error = 0;
1457 #ifdef DEV_NETMAP
1458         struct netmap_adapter *na = NA(rxr->adapter->ifp);
1459         struct netmap_slot *slot;
1460 #endif /* DEV_NETMAP */
1461
1462         adapter = rxr->adapter;
1463         ifp = adapter->ifp;
1464         dev = adapter->dev;
1465
1466         /* Clear the ring contents */
1467         IXGBE_RX_LOCK(rxr);
1468 #ifdef DEV_NETMAP
1469         /* same as in ixgbe_setup_transmit_ring() */
1470         slot = netmap_reset(na, NR_RX, rxr->me, 0);
1471 #endif /* DEV_NETMAP */
1472         rsize = roundup2(adapter->num_rx_desc *
1473             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1474         bzero((void *)rxr->rx_base, rsize);
1475         /* Cache the size */
1476         rxr->mbuf_sz = adapter->rx_mbuf_sz;
1477
1478         /* Free current RX buffer structs and their mbufs */
1479         ixgbe_free_receive_ring(rxr);
1480
1481         /* Now replenish the mbufs */
1482         for (int j = 0; j != rxr->num_desc; ++j) {
1483                 struct mbuf     *mp;
1484
1485                 rxbuf = &rxr->rx_buffers[j];
1486 #ifdef DEV_NETMAP
1487                 /*
1488                  * In netmap mode, fill the map and set the buffer
1489                  * address in the NIC ring, considering the offset
1490                  * between the netmap and NIC rings (see comment in
1491                  * ixgbe_setup_transmit_ring() ). No need to allocate
1492                  * an mbuf, so end the block with a continue;
1493                  */
1494                 if (slot) {
1495                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1496                         uint64_t paddr;
1497                         void *addr;
1498
1499                         addr = PNMB(na, slot + sj, &paddr);
1500                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1501                         /* Update descriptor and the cached value */
1502                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1503                         rxbuf->addr = htole64(paddr);
1504                         continue;
1505                 }
1506 #endif /* DEV_NETMAP */
1507                 rxbuf->flags = 0; 
1508                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1509                     M_PKTHDR, adapter->rx_mbuf_sz);
1510                 if (rxbuf->buf == NULL) {
1511                         error = ENOBUFS;
1512                         goto fail;
1513                 }
1514                 mp = rxbuf->buf;
1515                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1516                 /* Get the memory mapping */
1517                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1518                     rxbuf->pmap, mp, seg,
1519                     &nsegs, BUS_DMA_NOWAIT);
1520                 if (error != 0)
1521                         goto fail;
1522                 bus_dmamap_sync(rxr->ptag,
1523                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
1524                 /* Update the descriptor and the cached value */
1525                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1526                 rxbuf->addr = htole64(seg[0].ds_addr);
1527         }
1528
1529
1530         /* Setup our descriptor indices */
1531         rxr->next_to_check = 0;
1532         rxr->next_to_refresh = 0;
1533         rxr->lro_enabled = FALSE;
1534         rxr->rx_copies = 0;
1535         rxr->rx_bytes = 0;
1536         rxr->vtag_strip = FALSE;
1537
1538         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1539             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1540
1541         /*
1542         ** Now set up the LRO interface:
1543         */
1544         if (ixgbe_rsc_enable)
1545                 ixgbe_setup_hw_rsc(rxr);
1546         else if (ifp->if_capenable & IFCAP_LRO) {
1547                 int err = tcp_lro_init(lro);
1548                 if (err) {
1549                         device_printf(dev, "LRO Initialization failed!\n");
1550                         goto fail;
1551                 }
1552                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1553                 rxr->lro_enabled = TRUE;
1554                 lro->ifp = adapter->ifp;
1555         }
1556
1557         IXGBE_RX_UNLOCK(rxr);
1558         return (0);
1559
1560 fail:
1561         ixgbe_free_receive_ring(rxr);
1562         IXGBE_RX_UNLOCK(rxr);
1563         return (error);
1564 }
1565
1566 /*********************************************************************
1567  *
1568  *  Initialize all receive rings.
1569  *
1570  **********************************************************************/
1571 int
1572 ixgbe_setup_receive_structures(struct adapter *adapter)
1573 {
1574         struct rx_ring *rxr = adapter->rx_rings;
1575         int j;
1576
1577         for (j = 0; j < adapter->num_queues; j++, rxr++)
1578                 if (ixgbe_setup_receive_ring(rxr))
1579                         goto fail;
1580
1581         return (0);
1582 fail:
1583         /*
1584          * Free RX buffers allocated so far, we will only handle
1585          * the rings that completed, the failing case will have
1586          * cleaned up for itself. 'j' failed, so its the terminus.
1587          */
1588         for (int i = 0; i < j; ++i) {
1589                 rxr = &adapter->rx_rings[i];
1590                 ixgbe_free_receive_ring(rxr);
1591         }
1592
1593         return (ENOBUFS);
1594 }
1595
1596
1597 /*********************************************************************
1598  *
1599  *  Free all receive rings.
1600  *
1601  **********************************************************************/
1602 void
1603 ixgbe_free_receive_structures(struct adapter *adapter)
1604 {
1605         struct rx_ring *rxr = adapter->rx_rings;
1606
1607         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1608
1609         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1610                 struct lro_ctrl         *lro = &rxr->lro;
1611                 ixgbe_free_receive_buffers(rxr);
1612                 /* Free LRO memory */
1613                 tcp_lro_free(lro);
1614                 /* Free the ring memory as well */
1615                 ixgbe_dma_free(adapter, &rxr->rxdma);
1616         }
1617
1618         free(adapter->rx_rings, M_DEVBUF);
1619 }
1620
1621
1622 /*********************************************************************
1623  *
1624  *  Free receive ring data structures
1625  *
1626  **********************************************************************/
1627 void
1628 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1629 {
1630         struct adapter          *adapter = rxr->adapter;
1631         struct ixgbe_rx_buf     *rxbuf;
1632
1633         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1634
1635         /* Cleanup any existing buffers */
1636         if (rxr->rx_buffers != NULL) {
1637                 for (int i = 0; i < adapter->num_rx_desc; i++) {
1638                         rxbuf = &rxr->rx_buffers[i];
1639                         if (rxbuf->buf != NULL) {
1640                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1641                                     BUS_DMASYNC_POSTREAD);
1642                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1643                                 rxbuf->buf->m_flags |= M_PKTHDR;
1644                                 m_freem(rxbuf->buf);
1645                         }
1646                         rxbuf->buf = NULL;
1647                         if (rxbuf->pmap != NULL) {
1648                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1649                                 rxbuf->pmap = NULL;
1650                         }
1651                 }
1652                 if (rxr->rx_buffers != NULL) {
1653                         free(rxr->rx_buffers, M_DEVBUF);
1654                         rxr->rx_buffers = NULL;
1655                 }
1656         }
1657
1658         if (rxr->ptag != NULL) {
1659                 bus_dma_tag_destroy(rxr->ptag);
1660                 rxr->ptag = NULL;
1661         }
1662
1663         return;
1664 }
1665
1666 static __inline void
1667 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1668 {
1669                  
1670         /*
1671          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1672          * should be computed by hardware. Also it should not have VLAN tag in
1673          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1674          */
1675         if (rxr->lro_enabled &&
1676             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1677             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1678             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1679             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1680             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1681             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1682             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1683             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1684                 /*
1685                  * Send to the stack if:
1686                  **  - LRO not enabled, or
1687                  **  - no LRO resources, or
1688                  **  - lro enqueue fails
1689                  */
1690                 if (rxr->lro.lro_cnt != 0)
1691                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1692                                 return;
1693         }
1694         IXGBE_RX_UNLOCK(rxr);
1695         (*ifp->if_input)(ifp, m);
1696         IXGBE_RX_LOCK(rxr);
1697 }
1698
1699 static __inline void
1700 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1701 {
1702         struct ixgbe_rx_buf     *rbuf;
1703
1704         rbuf = &rxr->rx_buffers[i];
1705
1706
1707         /*
1708         ** With advanced descriptors the writeback
1709         ** clobbers the buffer addrs, so its easier
1710         ** to just free the existing mbufs and take
1711         ** the normal refresh path to get new buffers
1712         ** and mapping.
1713         */
1714
1715         if (rbuf->fmp != NULL) {/* Partial chain ? */
1716                 rbuf->fmp->m_flags |= M_PKTHDR;
1717                 m_freem(rbuf->fmp);
1718                 rbuf->fmp = NULL;
1719                 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1720         } else if (rbuf->buf) {
1721                 m_free(rbuf->buf);
1722                 rbuf->buf = NULL;
1723         }
1724         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1725
1726         rbuf->flags = 0;
1727  
1728         return;
1729 }
1730
1731
1732 /*********************************************************************
1733  *
1734  *  This routine executes in interrupt context. It replenishes
1735  *  the mbufs in the descriptor and sends data which has been
1736  *  dma'ed into host memory to upper layer.
1737  *
1738  *  Return TRUE for more work, FALSE for all clean.
1739  *********************************************************************/
1740 bool
1741 ixgbe_rxeof(struct ix_queue *que)
1742 {
1743         struct adapter          *adapter = que->adapter;
1744         struct rx_ring          *rxr = que->rxr;
1745         struct ifnet            *ifp = adapter->ifp;
1746         struct lro_ctrl         *lro = &rxr->lro;
1747         struct lro_entry        *queued;
1748         int                     i, nextp, processed = 0;
1749         u32                     staterr = 0;
1750         u32                     count = adapter->rx_process_limit;
1751         union ixgbe_adv_rx_desc *cur;
1752         struct ixgbe_rx_buf     *rbuf, *nbuf;
1753         u16                     pkt_info;
1754
1755         IXGBE_RX_LOCK(rxr);
1756
1757 #ifdef DEV_NETMAP
1758         /* Same as the txeof routine: wakeup clients on intr. */
1759         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1760                 IXGBE_RX_UNLOCK(rxr);
1761                 return (FALSE);
1762         }
1763 #endif /* DEV_NETMAP */
1764
1765         for (i = rxr->next_to_check; count != 0;) {
1766                 struct mbuf     *sendmp, *mp;
1767                 u32             rsc, ptype;
1768                 u16             len;
1769                 u16             vtag = 0;
1770                 bool            eop;
1771  
1772                 /* Sync the ring. */
1773                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1774                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1775
1776                 cur = &rxr->rx_base[i];
1777                 staterr = le32toh(cur->wb.upper.status_error);
1778                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1779
1780                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1781                         break;
1782                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1783                         break;
1784
1785                 count--;
1786                 sendmp = NULL;
1787                 nbuf = NULL;
1788                 rsc = 0;
1789                 cur->wb.upper.status_error = 0;
1790                 rbuf = &rxr->rx_buffers[i];
1791                 mp = rbuf->buf;
1792
1793                 len = le16toh(cur->wb.upper.length);
1794                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1795                     IXGBE_RXDADV_PKTTYPE_MASK;
1796                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1797
1798                 /* Make sure bad packets are discarded */
1799                 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1800 #if __FreeBSD_version >= 1100036
1801                         if (IXGBE_IS_VF(adapter))
1802                                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1803 #endif
1804                         rxr->rx_discarded++;
1805                         ixgbe_rx_discard(rxr, i);
1806                         goto next_desc;
1807                 }
1808
1809                 /*
1810                 ** On 82599 which supports a hardware
1811                 ** LRO (called HW RSC), packets need
1812                 ** not be fragmented across sequential
1813                 ** descriptors, rather the next descriptor
1814                 ** is indicated in bits of the descriptor.
1815                 ** This also means that we might proceses
1816                 ** more than one packet at a time, something
1817                 ** that has never been true before, it
1818                 ** required eliminating global chain pointers
1819                 ** in favor of what we are doing here.  -jfv
1820                 */
1821                 if (!eop) {
1822                         /*
1823                         ** Figure out the next descriptor
1824                         ** of this frame.
1825                         */
1826                         if (rxr->hw_rsc == TRUE) {
1827                                 rsc = ixgbe_rsc_count(cur);
1828                                 rxr->rsc_num += (rsc - 1);
1829                         }
1830                         if (rsc) { /* Get hardware index */
1831                                 nextp = ((staterr &
1832                                     IXGBE_RXDADV_NEXTP_MASK) >>
1833                                     IXGBE_RXDADV_NEXTP_SHIFT);
1834                         } else { /* Just sequential */
1835                                 nextp = i + 1;
1836                                 if (nextp == adapter->num_rx_desc)
1837                                         nextp = 0;
1838                         }
1839                         nbuf = &rxr->rx_buffers[nextp];
1840                         prefetch(nbuf);
1841                 }
1842                 /*
1843                 ** Rather than using the fmp/lmp global pointers
1844                 ** we now keep the head of a packet chain in the
1845                 ** buffer struct and pass this along from one
1846                 ** descriptor to the next, until we get EOP.
1847                 */
1848                 mp->m_len = len;
1849                 /*
1850                 ** See if there is a stored head
1851                 ** that determines what we are
1852                 */
1853                 sendmp = rbuf->fmp;
1854                 if (sendmp != NULL) {  /* secondary frag */
1855                         rbuf->buf = rbuf->fmp = NULL;
1856                         mp->m_flags &= ~M_PKTHDR;
1857                         sendmp->m_pkthdr.len += mp->m_len;
1858                 } else {
1859                         /*
1860                          * Optimize.  This might be a small packet,
1861                          * maybe just a TCP ACK.  Do a fast copy that
1862                          * is cache aligned into a new mbuf, and
1863                          * leave the old mbuf+cluster for re-use.
1864                          */
1865                         if (eop && len <= IXGBE_RX_COPY_LEN) {
1866                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1867                                 if (sendmp != NULL) {
1868                                         sendmp->m_data +=
1869                                             IXGBE_RX_COPY_ALIGN;
1870                                         ixgbe_bcopy(mp->m_data,
1871                                             sendmp->m_data, len);
1872                                         sendmp->m_len = len;
1873                                         rxr->rx_copies++;
1874                                         rbuf->flags |= IXGBE_RX_COPY;
1875                                 }
1876                         }
1877                         if (sendmp == NULL) {
1878                                 rbuf->buf = rbuf->fmp = NULL;
1879                                 sendmp = mp;
1880                         }
1881
1882                         /* first desc of a non-ps chain */
1883                         sendmp->m_flags |= M_PKTHDR;
1884                         sendmp->m_pkthdr.len = mp->m_len;
1885                 }
1886                 ++processed;
1887
1888                 /* Pass the head pointer on */
1889                 if (eop == 0) {
1890                         nbuf->fmp = sendmp;
1891                         sendmp = NULL;
1892                         mp->m_next = nbuf->buf;
1893                 } else { /* Sending this frame */
1894                         sendmp->m_pkthdr.rcvif = ifp;
1895                         rxr->rx_packets++;
1896                         /* capture data for AIM */
1897                         rxr->bytes += sendmp->m_pkthdr.len;
1898                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1899                         /* Process vlan info */
1900                         if ((rxr->vtag_strip) &&
1901                             (staterr & IXGBE_RXD_STAT_VP))
1902                                 vtag = le16toh(cur->wb.upper.vlan);
1903                         if (vtag) {
1904                                 sendmp->m_pkthdr.ether_vtag = vtag;
1905                                 sendmp->m_flags |= M_VLANTAG;
1906                         }
1907                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1908                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
1909
1910                         /*
1911                          * In case of multiqueue, we have RXCSUM.PCSD bit set
1912                          * and never cleared. This means we have RSS hash
1913                          * available to be used.   
1914                          */
1915                         if (adapter->num_queues > 1) {
1916                                 sendmp->m_pkthdr.flowid =
1917                                     le32toh(cur->wb.lower.hi_dword.rss);
1918                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {  
1919                                     case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1920                                         M_HASHTYPE_SET(sendmp,
1921                                             M_HASHTYPE_RSS_TCP_IPV4);
1922                                         break;
1923                                     case IXGBE_RXDADV_RSSTYPE_IPV4:
1924                                         M_HASHTYPE_SET(sendmp,
1925                                             M_HASHTYPE_RSS_IPV4);
1926                                         break;
1927                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1928                                         M_HASHTYPE_SET(sendmp,
1929                                             M_HASHTYPE_RSS_TCP_IPV6);
1930                                         break;
1931                                     case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1932                                         M_HASHTYPE_SET(sendmp,
1933                                             M_HASHTYPE_RSS_IPV6_EX);
1934                                         break;
1935                                     case IXGBE_RXDADV_RSSTYPE_IPV6:
1936                                         M_HASHTYPE_SET(sendmp,
1937                                             M_HASHTYPE_RSS_IPV6);
1938                                         break;
1939                                     case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1940                                         M_HASHTYPE_SET(sendmp,
1941                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
1942                                         break;
1943                                     case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1944                                         M_HASHTYPE_SET(sendmp,
1945                                             M_HASHTYPE_RSS_UDP_IPV4);
1946                                         break;
1947                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1948                                         M_HASHTYPE_SET(sendmp,
1949                                             M_HASHTYPE_RSS_UDP_IPV6);
1950                                         break;
1951                                     case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1952                                         M_HASHTYPE_SET(sendmp,
1953                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
1954                                         break;
1955                                     default:
1956                                         M_HASHTYPE_SET(sendmp,
1957                                             M_HASHTYPE_OPAQUE);
1958                                 }
1959                         } else {
1960                                 sendmp->m_pkthdr.flowid = que->msix;
1961                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1962                         }
1963                 }
1964 next_desc:
1965                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1966                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1967
1968                 /* Advance our pointers to the next descriptor. */
1969                 if (++i == rxr->num_desc)
1970                         i = 0;
1971
1972                 /* Now send to the stack or do LRO */
1973                 if (sendmp != NULL) {
1974                         rxr->next_to_check = i;
1975                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1976                         i = rxr->next_to_check;
1977                 }
1978
1979                /* Every 8 descriptors we go to refresh mbufs */
1980                 if (processed == 8) {
1981                         ixgbe_refresh_mbufs(rxr, i);
1982                         processed = 0;
1983                 }
1984         }
1985
1986         /* Refresh any remaining buf structs */
1987         if (ixgbe_rx_unrefreshed(rxr))
1988                 ixgbe_refresh_mbufs(rxr, i);
1989
1990         rxr->next_to_check = i;
1991
1992         /*
1993          * Flush any outstanding LRO work
1994          */
1995         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1996                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1997                 tcp_lro_flush(lro, queued);
1998         }
1999
2000         IXGBE_RX_UNLOCK(rxr);
2001
2002         /*
2003         ** Still have cleaning to do?
2004         */
2005         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2006                 return (TRUE);
2007         else
2008                 return (FALSE);
2009 }
2010
2011
2012 /*********************************************************************
2013  *
2014  *  Verify that the hardware indicated that the checksum is valid.
2015  *  Inform the stack about the status of checksum so that stack
2016  *  doesn't spend time verifying the checksum.
2017  *
2018  *********************************************************************/
2019 static void
2020 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2021 {
2022         u16     status = (u16) staterr;
2023         u8      errors = (u8) (staterr >> 24);
2024         bool    sctp = FALSE;
2025
2026         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2027             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2028                 sctp = TRUE;
2029
2030         if (status & IXGBE_RXD_STAT_IPCS) {
2031                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2032                         /* IP Checksum Good */
2033                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
2034                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2035
2036                 } else
2037                         mp->m_pkthdr.csum_flags = 0;
2038         }
2039         if (status & IXGBE_RXD_STAT_L4CS) {
2040                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2041 #if __FreeBSD_version >= 800000
2042                 if (sctp)
2043                         type = CSUM_SCTP_VALID;
2044 #endif
2045                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2046                         mp->m_pkthdr.csum_flags |= type;
2047                         if (!sctp)
2048                                 mp->m_pkthdr.csum_data = htons(0xffff);
2049                 } 
2050         }
2051         return;
2052 }
2053
2054 /********************************************************************
2055  * Manage DMA'able memory.
2056  *******************************************************************/
2057 static void
2058 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2059 {
2060         if (error)
2061                 return;
2062         *(bus_addr_t *) arg = segs->ds_addr;
2063         return;
2064 }
2065
2066 int
2067 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2068                 struct ixgbe_dma_alloc *dma, int mapflags)
2069 {
2070         device_t dev = adapter->dev;
2071         int             r;
2072
2073         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2074                                DBA_ALIGN, 0,    /* alignment, bounds */
2075                                BUS_SPACE_MAXADDR,       /* lowaddr */
2076                                BUS_SPACE_MAXADDR,       /* highaddr */
2077                                NULL, NULL,      /* filter, filterarg */
2078                                size,    /* maxsize */
2079                                1,       /* nsegments */
2080                                size,    /* maxsegsize */
2081                                BUS_DMA_ALLOCNOW,        /* flags */
2082                                NULL,    /* lockfunc */
2083                                NULL,    /* lockfuncarg */
2084                                &dma->dma_tag);
2085         if (r != 0) {
2086                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2087                        "error %u\n", r);
2088                 goto fail_0;
2089         }
2090         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2091                              BUS_DMA_NOWAIT, &dma->dma_map);
2092         if (r != 0) {
2093                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2094                        "error %u\n", r);
2095                 goto fail_1;
2096         }
2097         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2098                             size,
2099                             ixgbe_dmamap_cb,
2100                             &dma->dma_paddr,
2101                             mapflags | BUS_DMA_NOWAIT);
2102         if (r != 0) {
2103                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2104                        "error %u\n", r);
2105                 goto fail_2;
2106         }
2107         dma->dma_size = size;
2108         return (0);
2109 fail_2:
2110         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2111 fail_1:
2112         bus_dma_tag_destroy(dma->dma_tag);
2113 fail_0:
2114         dma->dma_tag = NULL;
2115         return (r);
2116 }
2117
2118 void
2119 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2120 {
2121         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2122             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2123         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2124         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2125         bus_dma_tag_destroy(dma->dma_tag);
2126 }
2127
2128
2129 /*********************************************************************
2130  *
2131  *  Allocate memory for the transmit and receive rings, and then
2132  *  the descriptors associated with each, called only once at attach.
2133  *
2134  **********************************************************************/
2135 int
2136 ixgbe_allocate_queues(struct adapter *adapter)
2137 {
2138         device_t        dev = adapter->dev;
2139         struct ix_queue *que;
2140         struct tx_ring  *txr;
2141         struct rx_ring  *rxr;
2142         int rsize, tsize, error = IXGBE_SUCCESS;
2143         int txconf = 0, rxconf = 0;
2144 #ifdef PCI_IOV
2145         enum ixgbe_iov_mode iov_mode;
2146 #endif
2147
2148         /* First allocate the top level queue structs */
2149         if (!(adapter->queues =
2150             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2151             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2152                 device_printf(dev, "Unable to allocate queue memory\n");
2153                 error = ENOMEM;
2154                 goto fail;
2155         }
2156
2157         /* First allocate the TX ring struct memory */
2158         if (!(adapter->tx_rings =
2159             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2160             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2161                 device_printf(dev, "Unable to allocate TX ring memory\n");
2162                 error = ENOMEM;
2163                 goto tx_fail;
2164         }
2165
2166         /* Next allocate the RX */
2167         if (!(adapter->rx_rings =
2168             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2169             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2170                 device_printf(dev, "Unable to allocate RX ring memory\n");
2171                 error = ENOMEM;
2172                 goto rx_fail;
2173         }
2174
2175         /* For the ring itself */
2176         tsize = roundup2(adapter->num_tx_desc *
2177             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2178
2179 #ifdef PCI_IOV
2180         iov_mode = ixgbe_get_iov_mode(adapter);
2181         adapter->pool = ixgbe_max_vfs(iov_mode);
2182 #else
2183         adapter->pool = 0;
2184 #endif
2185         /*
2186          * Now set up the TX queues, txconf is needed to handle the
2187          * possibility that things fail midcourse and we need to
2188          * undo memory gracefully
2189          */ 
2190         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2191                 /* Set up some basics */
2192                 txr = &adapter->tx_rings[i];
2193                 txr->adapter = adapter;
2194 #ifdef PCI_IOV
2195                 txr->me = ixgbe_pf_que_index(iov_mode, i);
2196 #else
2197                 txr->me = i;
2198 #endif
2199                 txr->num_desc = adapter->num_tx_desc;
2200
2201                 /* Initialize the TX side lock */
2202                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2203                     device_get_nameunit(dev), txr->me);
2204                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2205
2206                 if (ixgbe_dma_malloc(adapter, tsize,
2207                         &txr->txdma, BUS_DMA_NOWAIT)) {
2208                         device_printf(dev,
2209                             "Unable to allocate TX Descriptor memory\n");
2210                         error = ENOMEM;
2211                         goto err_tx_desc;
2212                 }
2213                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2214                 bzero((void *)txr->tx_base, tsize);
2215
2216                 /* Now allocate transmit buffers for the ring */
2217                 if (ixgbe_allocate_transmit_buffers(txr)) {
2218                         device_printf(dev,
2219                             "Critical Failure setting up transmit buffers\n");
2220                         error = ENOMEM;
2221                         goto err_tx_desc;
2222                 }
2223 #ifndef IXGBE_LEGACY_TX
2224                 /* Allocate a buf ring */
2225                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2226                     M_WAITOK, &txr->tx_mtx);
2227                 if (txr->br == NULL) {
2228                         device_printf(dev,
2229                             "Critical Failure setting up buf ring\n");
2230                         error = ENOMEM;
2231                         goto err_tx_desc;
2232                 }
2233 #endif
2234         }
2235
2236         /*
2237          * Next the RX queues...
2238          */ 
2239         rsize = roundup2(adapter->num_rx_desc *
2240             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2241         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2242                 rxr = &adapter->rx_rings[i];
2243                 /* Set up some basics */
2244                 rxr->adapter = adapter;
2245 #ifdef PCI_IOV
2246                 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2247 #else
2248                 rxr->me = i;
2249 #endif
2250                 rxr->num_desc = adapter->num_rx_desc;
2251
2252                 /* Initialize the RX side lock */
2253                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2254                     device_get_nameunit(dev), rxr->me);
2255                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2256
2257                 if (ixgbe_dma_malloc(adapter, rsize,
2258                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2259                         device_printf(dev,
2260                             "Unable to allocate RxDescriptor memory\n");
2261                         error = ENOMEM;
2262                         goto err_rx_desc;
2263                 }
2264                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2265                 bzero((void *)rxr->rx_base, rsize);
2266
2267                 /* Allocate receive buffers for the ring*/
2268                 if (ixgbe_allocate_receive_buffers(rxr)) {
2269                         device_printf(dev,
2270                             "Critical Failure setting up receive buffers\n");
2271                         error = ENOMEM;
2272                         goto err_rx_desc;
2273                 }
2274         }
2275
2276         /*
2277         ** Finally set up the queue holding structs
2278         */
2279         for (int i = 0; i < adapter->num_queues; i++) {
2280                 que = &adapter->queues[i];
2281                 que->adapter = adapter;
2282                 que->me = i;
2283                 que->txr = &adapter->tx_rings[i];
2284                 que->rxr = &adapter->rx_rings[i];
2285         }
2286
2287         return (0);
2288
2289 err_rx_desc:
2290         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2291                 ixgbe_dma_free(adapter, &rxr->rxdma);
2292 err_tx_desc:
2293         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2294                 ixgbe_dma_free(adapter, &txr->txdma);
2295         free(adapter->rx_rings, M_DEVBUF);
2296 rx_fail:
2297         free(adapter->tx_rings, M_DEVBUF);
2298 tx_fail:
2299         free(adapter->queues, M_DEVBUF);
2300 fail:
2301         return (error);
2302 }