]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/ixgbe/ixv_txrx.c
MFC r356952:
[FreeBSD/stable/10.git] / sys / dev / ixgbe / ixv_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2017, Intel Corporation
4   All rights reserved.
5
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include "ixv.h"
42
43 extern int ix_crcstrip;
44
45 /*
46  * HW RSC control:
47  *  this feature only works with
48  *  IPv4, and only on 82599 and later.
49  *  Also this will cause IP forwarding to
50  *  fail and that can't be controlled by
51  *  the stack as LRO can. For all these
52  *  reasons I've deemed it best to leave
53  *  this off and not bother with a tuneable
54  *  interface, this would need to be compiled
55  *  to enable.
56  */
57 static bool ixgbe_rsc_enable = FALSE;
58
59 /************************************************************************
60  *  Local Function prototypes
61  ************************************************************************/
62 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
63 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
64 static int           ixgbe_setup_receive_ring(struct rx_ring *);
65 static void          ixgbe_free_receive_buffers(struct rx_ring *);
66 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32);
67 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
68 static int           ixgbe_xmit(struct tx_ring *, struct mbuf **);
69 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
70                                         struct mbuf *, u32 *, u32 *);
71 static int           ixgbe_tso_setup(struct tx_ring *,
72                                      struct mbuf *, u32 *, u32 *);
73 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
74 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
75                                     struct mbuf *, u32);
76 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
77                                       struct ixgbe_dma_alloc *, int);
78 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
79
80 MALLOC_DECLARE(M_IXV);
81
82 /************************************************************************
83  * ixv_legacy_start_locked - Transmit entry point
84  *
85  *   Called by the stack to initiate a transmit.
86  *   The driver will remain in this routine as long as there are
87  *   packets to transmit and transmit resources are available.
88  *   In case resources are not available, the stack is notified
89  *   and the packet is requeued.
90  ************************************************************************/
91 int
92 ixv_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
93 {
94         struct mbuf    *m_head;
95         struct adapter *adapter = txr->adapter;
96
97         IXGBE_TX_LOCK_ASSERT(txr);
98
99         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
100                 return (ENETDOWN);
101         if (!adapter->link_active)
102                 return (ENETDOWN);
103
104         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
105                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
106                         break;
107
108                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
109                 if (m_head == NULL)
110                         break;
111
112                 if (ixgbe_xmit(txr, &m_head)) {
113                         if (m_head != NULL)
114                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
115                         break;
116                 }
117                 /* Send a copy of the frame to the BPF listener */
118                 ETHER_BPF_MTAP(ifp, m_head);
119         }
120
121         return IXGBE_SUCCESS;
122 } /* ixv_legacy_start_locked */
123
124 /************************************************************************
125  * ixv_legacy_start
126  *
127  *   Called by the stack, this always uses the first tx ring,
128  *   and should not be used with multiqueue tx enabled.
129  ************************************************************************/
130 void
131 ixv_legacy_start(struct ifnet *ifp)
132 {
133         struct adapter *adapter = ifp->if_softc;
134         struct tx_ring *txr = adapter->tx_rings;
135
136         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
137                 IXGBE_TX_LOCK(txr);
138                 ixv_legacy_start_locked(ifp, txr);
139                 IXGBE_TX_UNLOCK(txr);
140         }
141 } /* ixv_legacy_start */
142
143 /************************************************************************
144  * ixv_mq_start - Multiqueue Transmit Entry Point
145  *
146  *   (if_transmit function)
147  ************************************************************************/
148 int
149 ixv_mq_start(struct ifnet *ifp, struct mbuf *m)
150 {
151         struct adapter  *adapter = ifp->if_softc;
152         struct ix_queue *que;
153         struct tx_ring  *txr;
154         int             i, err = 0;
155         uint32_t        bucket_id;
156
157         /*
158          * When doing RSS, map it to the same outbound queue
159          * as the incoming flow would be mapped to.
160          *
161          * If everything is setup correctly, it should be the
162          * same bucket that the current CPU we're on is.
163          */
164         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
165                 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
166                     (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
167                     &bucket_id) == 0)) {
168                         i = bucket_id % adapter->num_queues;
169 #ifdef IXGBE_DEBUG
170                         if (bucket_id > adapter->num_queues)
171                                 if_printf(ifp,
172                                     "bucket_id (%d) > num_queues (%d)\n",
173                                     bucket_id, adapter->num_queues);
174 #endif
175                 } else
176                         i = m->m_pkthdr.flowid % adapter->num_queues;
177         } else
178                 i = curcpu % adapter->num_queues;
179
180         /* Check for a hung queue and pick alternative */
181         if (((1 << i) & adapter->active_queues) == 0)
182                 i = ffsl(adapter->active_queues);
183
184         txr = &adapter->tx_rings[i];
185         que = &adapter->queues[i];
186
187         err = drbr_enqueue(ifp, txr->br, m);
188         if (err)
189                 return (err);
190         if (IXGBE_TX_TRYLOCK(txr)) {
191                 ixv_mq_start_locked(ifp, txr);
192                 IXGBE_TX_UNLOCK(txr);
193         } else
194                 taskqueue_enqueue(que->tq, &txr->txq_task);
195
196         return (0);
197 } /* ixv_mq_start */
198
199 /************************************************************************
200  * ixv_mq_start_locked
201  ************************************************************************/
202 int
203 ixv_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
204 {
205         struct mbuf    *next;
206         int            enqueued = 0, err = 0;
207
208         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
209                 return (ENETDOWN);
210         if (!txr->adapter->link_active)
211                 return (ENETDOWN);
212
213         /* Process the queue */
214         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
215                 err = ixgbe_xmit(txr, &next);
216                 if (err != 0) {
217                         if (next == NULL)
218                                 drbr_advance(ifp, txr->br);
219                         else
220                                 drbr_putback(ifp, txr->br, next);
221                         break;
222                 }
223                 drbr_advance(ifp, txr->br);
224                 enqueued++;
225 #if __FreeBSD_version >= 1100036
226                 /*
227                  * Since we're looking at the tx ring, we can check
228                  * to see if we're a VF by examing our tail register
229                  * address.
230                  */
231                 if (next->m_flags & M_MCAST)
232                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
233 #endif
234                 /* Send a copy of the frame to the BPF listener */
235                 ETHER_BPF_MTAP(ifp, next);
236                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
237                         break;
238         }
239
240         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
241                 ixv_txeof(txr);
242
243         return (err);
244 } /* ixv_mq_start_locked */
245
246 /************************************************************************
247  * ixv_deferred_mq_start
248  *
249  *   Called from a taskqueue to drain queued transmit packets.
250  ************************************************************************/
251 void
252 ixv_deferred_mq_start(void *arg, int pending)
253 {
254         struct tx_ring *txr = arg;
255         struct adapter *adapter = txr->adapter;
256         struct ifnet   *ifp = adapter->ifp;
257
258         IXGBE_TX_LOCK(txr);
259         if (!drbr_empty(ifp, txr->br))
260                 ixv_mq_start_locked(ifp, txr);
261         IXGBE_TX_UNLOCK(txr);
262 } /* ixv_deferred_mq_start */
263
264 /************************************************************************
265  * ixv_qflush - Flush all ring buffers
266  ************************************************************************/
267 void
268 ixv_qflush(struct ifnet *ifp)
269 {
270         struct adapter *adapter = ifp->if_softc;
271         struct tx_ring *txr = adapter->tx_rings;
272         struct mbuf    *m;
273
274         for (int i = 0; i < adapter->num_queues; i++, txr++) {
275                 IXGBE_TX_LOCK(txr);
276                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
277                         m_freem(m);
278                 IXGBE_TX_UNLOCK(txr);
279         }
280         if_qflush(ifp);
281 } /* ixv_qflush */
282
283
284 /************************************************************************
285  * ixgbe_xmit
286  *
287  *   This routine maps the mbufs to tx descriptors, allowing the
288  *   TX engine to transmit the packets.
289  *
290  *   Return 0 on success, positive on failure
291  ************************************************************************/
292 static int
293 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
294 {
295         struct adapter          *adapter = txr->adapter;
296         struct ixgbe_tx_buf     *txbuf;
297         union ixgbe_adv_tx_desc *txd = NULL;
298         struct mbuf             *m_head;
299         int                     i, j, error, nsegs;
300         int                     first;
301         u32                     olinfo_status = 0, cmd_type_len;
302         bool                    remap = TRUE;
303         bus_dma_segment_t       segs[adapter->num_segs];
304         bus_dmamap_t            map;
305
306         m_head = *m_headp;
307
308         /* Basic descriptor defines */
309         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
310             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
311
312         if (m_head->m_flags & M_VLANTAG)
313                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
314
315         /*
316          * Important to capture the first descriptor
317          * used because it will contain the index of
318          * the one we tell the hardware to report back
319          */
320         first = txr->next_avail_desc;
321         txbuf = &txr->tx_buffers[first];
322         map = txbuf->map;
323
324         /*
325          * Map the packet for DMA.
326          */
327 retry:
328         error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs,
329             &nsegs, BUS_DMA_NOWAIT);
330
331         if (__predict_false(error)) {
332                 struct mbuf *m;
333
334                 switch (error) {
335                 case EFBIG:
336                         /* Try it again? - one try */
337                         if (remap == TRUE) {
338                                 remap = FALSE;
339                                 /*
340                                  * XXX: m_defrag will choke on
341                                  * non-MCLBYTES-sized clusters
342                                  */
343                                 m = m_defrag(*m_headp, M_NOWAIT);
344                                 if (m == NULL) {
345                                         adapter->mbuf_defrag_failed++;
346                                         m_freem(*m_headp);
347                                         *m_headp = NULL;
348                                         return (ENOBUFS);
349                                 }
350                                 *m_headp = m;
351                                 goto retry;
352                         } else
353                                 return (error);
354                 case ENOMEM:
355                         txr->no_tx_dma_setup++;
356                         return (error);
357                 default:
358                         txr->no_tx_dma_setup++;
359                         m_freem(*m_headp);
360                         *m_headp = NULL;
361                         return (error);
362                 }
363         }
364
365         /* Make certain there are enough descriptors */
366         if (txr->tx_avail < (nsegs + 2)) {
367                 txr->no_desc_avail++;
368                 bus_dmamap_unload(txr->txtag, map);
369                 return (ENOBUFS);
370         }
371         m_head = *m_headp;
372
373         /*
374          * Set up the appropriate offload context
375          * this will consume the first descriptor
376          */
377         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
378         if (__predict_false(error)) {
379                 if (error == ENOBUFS)
380                         *m_headp = NULL;
381                 return (error);
382         }
383
384         olinfo_status |= IXGBE_ADVTXD_CC;
385         i = txr->next_avail_desc;
386         for (j = 0; j < nsegs; j++) {
387                 bus_size_t seglen;
388                 bus_addr_t segaddr;
389
390                 txbuf = &txr->tx_buffers[i];
391                 txd = &txr->tx_base[i];
392                 seglen = segs[j].ds_len;
393                 segaddr = htole64(segs[j].ds_addr);
394
395                 txd->read.buffer_addr = segaddr;
396                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
397                     cmd_type_len | seglen);
398                 txd->read.olinfo_status = htole32(olinfo_status);
399
400                 if (++i == txr->num_desc)
401                         i = 0;
402         }
403
404         txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
405         txr->tx_avail -= nsegs;
406         txr->next_avail_desc = i;
407
408         txbuf->m_head = m_head;
409         /*
410          * Here we swap the map so the last descriptor,
411          * which gets the completion interrupt has the
412          * real map, and the first descriptor gets the
413          * unused map from this descriptor.
414          */
415         txr->tx_buffers[first].map = txbuf->map;
416         txbuf->map = map;
417         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
418
419         /* Set the EOP descriptor that will be marked done */
420         txbuf = &txr->tx_buffers[first];
421         txbuf->eop = txd;
422
423         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
424             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
425         /*
426          * Advance the Transmit Descriptor Tail (Tdt), this tells the
427          * hardware that this frame is available to transmit.
428          */
429         ++txr->total_packets;
430         IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
431
432         /* Mark queue as having work */
433         if (txr->busy == 0)
434                 txr->busy = 1;
435
436         return (0);
437 } /* ixgbe_xmit */
438
439
440 /************************************************************************
441  * ixgbe_allocate_transmit_buffers
442  *
443  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
444  *   the information needed to transmit a packet on the wire. This is
445  *   called only once at attach, setup is done every reset.
446  ************************************************************************/
447 static int
448 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
449 {
450         struct adapter      *adapter = txr->adapter;
451         device_t            dev = adapter->dev;
452         struct ixgbe_tx_buf *txbuf;
453         int                 error, i;
454
455         /*
456          * Setup DMA descriptor areas.
457          */
458         error = bus_dma_tag_create(
459                  /*      parent */ bus_get_dma_tag(adapter->dev),
460                  /*   alignment */ 1,
461                  /*      bounds */ 0,
462                  /*     lowaddr */ BUS_SPACE_MAXADDR,
463                  /*    highaddr */ BUS_SPACE_MAXADDR,
464                  /*      filter */ NULL,
465                  /*   filterarg */ NULL,
466                  /*     maxsize */ IXGBE_TSO_SIZE,
467                  /*   nsegments */ adapter->num_segs,
468                  /*  maxsegsize */ PAGE_SIZE,
469                  /*       flags */ 0,
470                  /*    lockfunc */ NULL,
471                  /* lockfuncarg */ NULL,
472                                    &txr->txtag);
473         if (error) {
474                 device_printf(dev, "Unable to allocate TX DMA tag\n");
475                 goto fail;
476         }
477
478         txr->tx_buffers =
479             (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) *
480             adapter->num_tx_desc, M_IXV, M_NOWAIT | M_ZERO);
481         if (!txr->tx_buffers) {
482                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
483                 error = ENOMEM;
484                 goto fail;
485         }
486
487         /* Create the descriptor buffer dma maps */
488         txbuf = txr->tx_buffers;
489         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
490                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
491                 if (error != 0) {
492                         device_printf(dev, "Unable to create TX DMA map\n");
493                         goto fail;
494                 }
495         }
496
497         return 0;
498 fail:
499         /* We free all, it handles case where we are in the middle */
500         ixv_free_transmit_structures(adapter);
501
502         return (error);
503 } /* ixgbe_allocate_transmit_buffers */
504
505 /************************************************************************
506  *
507  *  Initialize a transmit ring.
508  *
509  ************************************************************************/
510 static void
511 ixgbe_setup_transmit_ring(struct tx_ring *txr)
512 {
513         struct adapter        *adapter = txr->adapter;
514         struct ixgbe_tx_buf   *txbuf;
515 #ifdef DEV_NETMAP
516         struct netmap_adapter *na = NA(adapter->ifp);
517         struct netmap_slot    *slot;
518 #endif /* DEV_NETMAP */
519
520         /* Clear the old ring contents */
521         IXGBE_TX_LOCK(txr);
522
523 #ifdef DEV_NETMAP
524         if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
525                 /*
526                  * (under lock): if in netmap mode, do some consistency
527                  * checks and set slot to entry 0 of the netmap ring.
528                  */
529                 slot = netmap_reset(na, NR_TX, txr->me, 0);
530         }
531 #endif /* DEV_NETMAP */
532
533         bzero((void *)txr->tx_base,
534             (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
535         /* Reset indices */
536         txr->next_avail_desc = 0;
537         txr->next_to_clean = 0;
538
539         /* Free any existing tx buffers. */
540         txbuf = txr->tx_buffers;
541         for (int i = 0; i < txr->num_desc; i++, txbuf++) {
542                 if (txbuf->m_head != NULL) {
543                         bus_dmamap_sync(txr->txtag, txbuf->map,
544                             BUS_DMASYNC_POSTWRITE);
545                         bus_dmamap_unload(txr->txtag, txbuf->map);
546                         m_freem(txbuf->m_head);
547                         txbuf->m_head = NULL;
548                 }
549
550 #ifdef DEV_NETMAP
551                 /*
552                  * In netmap mode, set the map for the packet buffer.
553                  * NOTE: Some drivers (not this one) also need to set
554                  * the physical buffer address in the NIC ring.
555                  * Slots in the netmap ring (indexed by "si") are
556                  * kring->nkr_hwofs positions "ahead" wrt the
557                  * corresponding slot in the NIC ring. In some drivers
558                  * (not here) nkr_hwofs can be negative. Function
559                  * netmap_idx_n2k() handles wraparounds properly.
560                  */
561                 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
562                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
563                         netmap_load_map(na, txr->txtag,
564                             txbuf->map, NMB(na, slot + si));
565                 }
566 #endif /* DEV_NETMAP */
567
568                 /* Clear the EOP descriptor pointer */
569                 txbuf->eop = NULL;
570         }
571
572         /* Set number of descriptors available */
573         txr->tx_avail = adapter->num_tx_desc;
574
575         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
576             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
577         IXGBE_TX_UNLOCK(txr);
578 } /* ixgbe_setup_transmit_ring */
579
580 /************************************************************************
581  * ixv_setup_transmit_structures - Initialize all transmit rings.
582  ************************************************************************/
583 int
584 ixv_setup_transmit_structures(struct adapter *adapter)
585 {
586         struct tx_ring *txr = adapter->tx_rings;
587
588         for (int i = 0; i < adapter->num_queues; i++, txr++)
589                 ixgbe_setup_transmit_ring(txr);
590
591         return (0);
592 } /* ixv_setup_transmit_structures */
593
594 /************************************************************************
595  * ixv_free_transmit_structures - Free all transmit rings.
596  ************************************************************************/
597 void
598 ixv_free_transmit_structures(struct adapter *adapter)
599 {
600         struct tx_ring *txr = adapter->tx_rings;
601
602         for (int i = 0; i < adapter->num_queues; i++, txr++) {
603                 IXGBE_TX_LOCK(txr);
604                 ixgbe_free_transmit_buffers(txr);
605                 ixgbe_dma_free(adapter, &txr->txdma);
606                 IXGBE_TX_UNLOCK(txr);
607                 IXGBE_TX_LOCK_DESTROY(txr);
608         }
609         free(adapter->tx_rings, M_IXV);
610 } /* ixv_free_transmit_structures */
611
612 /************************************************************************
613  * ixgbe_free_transmit_buffers
614  *
615  *   Free transmit ring related data structures.
616  ************************************************************************/
617 static void
618 ixgbe_free_transmit_buffers(struct tx_ring *txr)
619 {
620         struct adapter      *adapter = txr->adapter;
621         struct ixgbe_tx_buf *tx_buffer;
622         int                 i;
623
624         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
625
626         if (txr->tx_buffers == NULL)
627                 return;
628
629         tx_buffer = txr->tx_buffers;
630         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
631                 if (tx_buffer->m_head != NULL) {
632                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
633                             BUS_DMASYNC_POSTWRITE);
634                         bus_dmamap_unload(txr->txtag, tx_buffer->map);
635                         m_freem(tx_buffer->m_head);
636                         tx_buffer->m_head = NULL;
637                         if (tx_buffer->map != NULL) {
638                                 bus_dmamap_destroy(txr->txtag, tx_buffer->map);
639                                 tx_buffer->map = NULL;
640                         }
641                 } else if (tx_buffer->map != NULL) {
642                         bus_dmamap_unload(txr->txtag, tx_buffer->map);
643                         bus_dmamap_destroy(txr->txtag, tx_buffer->map);
644                         tx_buffer->map = NULL;
645                 }
646         }
647         if (txr->br != NULL)
648                 buf_ring_free(txr->br, M_IXV);
649         if (txr->tx_buffers != NULL) {
650                 free(txr->tx_buffers, M_IXV);
651                 txr->tx_buffers = NULL;
652         }
653         if (txr->txtag != NULL) {
654                 bus_dma_tag_destroy(txr->txtag);
655                 txr->txtag = NULL;
656         }
657 } /* ixgbe_free_transmit_buffers */
658
659 /************************************************************************
660  * ixgbe_tx_ctx_setup
661  *
662  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
663  ************************************************************************/
664 static int
665 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
666     u32 *cmd_type_len, u32 *olinfo_status)
667 {
668         struct ixgbe_adv_tx_context_desc *TXD;
669         struct ether_vlan_header         *eh;
670 #ifdef INET
671         struct ip                        *ip;
672 #endif
673 #ifdef INET6
674         struct ip6_hdr                   *ip6;
675 #endif
676         int                              ehdrlen, ip_hlen = 0;
677         int                              offload = TRUE;
678         int                              ctxd = txr->next_avail_desc;
679         u32                              vlan_macip_lens = 0;
680         u32                              type_tucmd_mlhl = 0;
681         u16                              vtag = 0;
682         u16                              etype;
683         u8                               ipproto = 0;
684         caddr_t                          l3d;
685
686
687         /* First check if TSO is to be used */
688         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO))
689                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
690
691         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
692                 offload = FALSE;
693
694         /* Indicate the whole packet as payload when not doing TSO */
695         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
696
697         /* Now ready a context descriptor */
698         TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
699
700         /*
701          * In advanced descriptors the vlan tag must
702          * be placed into the context descriptor. Hence
703          * we need to make one even if not doing offloads.
704          */
705         if (mp->m_flags & M_VLANTAG) {
706                 vtag = htole16(mp->m_pkthdr.ether_vtag);
707                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
708         } else if (!IXGBE_IS_X550VF(txr->adapter) && (offload == FALSE))
709                 return (0);
710
711         /*
712          * Determine where frame payload starts.
713          * Jump over vlan headers if already present,
714          * helpful for QinQ too.
715          */
716         eh = mtod(mp, struct ether_vlan_header *);
717         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
718                 etype = ntohs(eh->evl_proto);
719                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
720         } else {
721                 etype = ntohs(eh->evl_encap_proto);
722                 ehdrlen = ETHER_HDR_LEN;
723         }
724
725         /* Set the ether header length */
726         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
727
728         if (offload == FALSE)
729                 goto no_offloads;
730
731         /*
732          * If the first mbuf only includes the ethernet header,
733          * jump to the next one
734          * XXX: This assumes the stack splits mbufs containing headers
735          *      on header boundaries
736          * XXX: And assumes the entire IP header is contained in one mbuf
737          */
738         if (mp->m_len == ehdrlen && mp->m_next)
739                 l3d = mtod(mp->m_next, caddr_t);
740         else
741                 l3d = mtod(mp, caddr_t) + ehdrlen;
742
743         switch (etype) {
744 #ifdef INET
745                 case ETHERTYPE_IP:
746                         ip = (struct ip *)(l3d);
747                         ip_hlen = ip->ip_hl << 2;
748                         ipproto = ip->ip_p;
749                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
750                         /* Insert IPv4 checksum into data descriptors */
751                         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
752                                 ip->ip_sum = 0;
753                                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
754                         }
755                         break;
756 #endif
757 #ifdef INET6
758                 case ETHERTYPE_IPV6:
759                         ip6 = (struct ip6_hdr *)(l3d);
760                         ip_hlen = sizeof(struct ip6_hdr);
761                         ipproto = ip6->ip6_nxt;
762                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
763                         break;
764 #endif
765                 default:
766                         offload = FALSE;
767                         break;
768         }
769
770         vlan_macip_lens |= ip_hlen;
771
772         /* No support for offloads for non-L4 next headers */
773         switch (ipproto) {
774                 case IPPROTO_TCP:
775                         if (mp->m_pkthdr.csum_flags &
776                             (CSUM_IP_TCP | CSUM_IP6_TCP))
777                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
778                         else
779                                 offload = false;
780                         break;
781                 case IPPROTO_UDP:
782                         if (mp->m_pkthdr.csum_flags &
783                             (CSUM_IP_UDP | CSUM_IP6_UDP))
784                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
785                         else
786                                 offload = false;
787                         break;
788                 case IPPROTO_SCTP:
789                         if (mp->m_pkthdr.csum_flags &
790                             (CSUM_IP_SCTP | CSUM_IP6_SCTP))
791                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
792                         else
793                                 offload = false;
794                         break;
795                 default:
796                         offload = false;
797                         break;
798         }
799
800         if (offload) /* Insert L4 checksum into data descriptors */
801                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
802
803 no_offloads:
804         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
805
806         /* Now copy bits into descriptor */
807         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
808         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
809         TXD->seqnum_seed = htole32(0);
810         TXD->mss_l4len_idx = htole32(0);
811
812         /* We've consumed the first desc, adjust counters */
813         if (++ctxd == txr->num_desc)
814                 ctxd = 0;
815         txr->next_avail_desc = ctxd;
816         --txr->tx_avail;
817
818         return (0);
819 } /* ixgbe_tx_ctx_setup */
820
821 /************************************************************************
822  * ixgbe_tso_setup
823  *
824  *   Setup work for hardware segmentation offload (TSO) on
825  *   adapters using advanced tx descriptors
826  ************************************************************************/
827 static int
828 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
829     u32 *olinfo_status)
830 {
831         struct ixgbe_adv_tx_context_desc *TXD;
832         struct ether_vlan_header         *eh;
833 #ifdef INET6
834         struct ip6_hdr                   *ip6;
835 #endif
836 #ifdef INET
837         struct ip                        *ip;
838 #endif
839         struct tcphdr                    *th;
840         int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
841         u32                              vlan_macip_lens = 0;
842         u32                              type_tucmd_mlhl = 0;
843         u32                              mss_l4len_idx = 0, paylen;
844         u16                              vtag = 0, eh_type;
845
846         /*
847          * Determine where frame payload starts.
848          * Jump over vlan headers if already present
849          */
850         eh = mtod(mp, struct ether_vlan_header *);
851         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
852                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
853                 eh_type = eh->evl_proto;
854         } else {
855                 ehdrlen = ETHER_HDR_LEN;
856                 eh_type = eh->evl_encap_proto;
857         }
858
859         switch (ntohs(eh_type)) {
860 #ifdef INET
861         case ETHERTYPE_IP:
862                 ip = (struct ip *)(mp->m_data + ehdrlen);
863                 if (ip->ip_p != IPPROTO_TCP)
864                         return (ENXIO);
865                 ip->ip_sum = 0;
866                 ip_hlen = ip->ip_hl << 2;
867                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
868                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
869                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
870                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
871                 /* Tell transmit desc to also do IPv4 checksum. */
872                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
873                 break;
874 #endif
875 #ifdef INET6
876         case ETHERTYPE_IPV6:
877                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
878                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
879                 if (ip6->ip6_nxt != IPPROTO_TCP)
880                         return (ENXIO);
881                 ip_hlen = sizeof(struct ip6_hdr);
882                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
883                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
884                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
885                 break;
886 #endif
887         default:
888                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
889                     __func__, ntohs(eh_type));
890                 break;
891         }
892
893         ctxd = txr->next_avail_desc;
894         TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
895
896         tcp_hlen = th->th_off << 2;
897
898         /* This is used in the transmit desc in encap */
899         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
900
901         /* VLAN MACLEN IPLEN */
902         if (mp->m_flags & M_VLANTAG) {
903                 vtag = htole16(mp->m_pkthdr.ether_vtag);
904                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
905         }
906
907         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
908         vlan_macip_lens |= ip_hlen;
909         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
910
911         /* ADV DTYPE TUCMD */
912         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
913         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
914         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
915
916         /* MSS L4LEN IDX */
917         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
918         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
919         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
920
921         TXD->seqnum_seed = htole32(0);
922
923         if (++ctxd == txr->num_desc)
924                 ctxd = 0;
925
926         txr->tx_avail--;
927         txr->next_avail_desc = ctxd;
928         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
929         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
930         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
931         ++txr->tso_tx;
932
933         return (0);
934 } /* ixgbe_tso_setup */
935
936
937 /************************************************************************
938  * ixv_txeof
939  *
940  *   Examine each tx_buffer in the used queue. If the hardware is done
941  *   processing the packet then free associated resources. The
942  *   tx_buffer is put back on the free queue.
943  ************************************************************************/
944 void
945 ixv_txeof(struct tx_ring *txr)
946 {
947         struct adapter          *adapter = txr->adapter;
948         struct ixgbe_tx_buf     *buf;
949         union ixgbe_adv_tx_desc *txd;
950         u32                     work, processed = 0;
951         u32                     limit = adapter->tx_process_limit;
952
953         mtx_assert(&txr->tx_mtx, MA_OWNED);
954
955 #ifdef DEV_NETMAP
956         if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
957             (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
958                 struct netmap_adapter *na = NA(adapter->ifp);
959                 struct netmap_kring *kring = &na->tx_rings[txr->me];
960                 txd = txr->tx_base;
961                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
962                     BUS_DMASYNC_POSTREAD);
963                 /*
964                  * In netmap mode, all the work is done in the context
965                  * of the client thread. Interrupt handlers only wake up
966                  * clients, which may be sleeping on individual rings
967                  * or on a global resource for all rings.
968                  * To implement tx interrupt mitigation, we wake up the client
969                  * thread roughly every half ring, even if the NIC interrupts
970                  * more frequently. This is implemented as follows:
971                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
972                  *   the slot that should wake up the thread (nkr_num_slots
973                  *   means the user thread should not be woken up);
974                  * - the driver ignores tx interrupts unless netmap_mitigate=0
975                  *   or the slot has the DD bit set.
976                  */
977                 if (!netmap_mitigate ||
978                     (kring->nr_kflags < kring->nkr_num_slots &&
979                      txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
980                         netmap_tx_irq(adapter->ifp, txr->me);
981                 }
982                 return;
983         }
984 #endif /* DEV_NETMAP */
985
986         if (txr->tx_avail == txr->num_desc) {
987                 txr->busy = 0;
988                 return;
989         }
990
991         /* Get work starting point */
992         work = txr->next_to_clean;
993         buf = &txr->tx_buffers[work];
994         txd = &txr->tx_base[work];
995         work -= txr->num_desc; /* The distance to ring end */
996         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
997             BUS_DMASYNC_POSTREAD);
998
999         do {
1000                 union ixgbe_adv_tx_desc *eop = buf->eop;
1001                 if (eop == NULL) /* No work */
1002                         break;
1003
1004                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1005                         break;  /* I/O not complete */
1006
1007                 if (buf->m_head) {
1008                         txr->bytes += buf->m_head->m_pkthdr.len;
1009                         bus_dmamap_sync(txr->txtag, buf->map,
1010                             BUS_DMASYNC_POSTWRITE);
1011                         bus_dmamap_unload(txr->txtag, buf->map);
1012                         m_freem(buf->m_head);
1013                         buf->m_head = NULL;
1014                 }
1015                 buf->eop = NULL;
1016                 ++txr->tx_avail;
1017
1018                 /* We clean the range if multi segment */
1019                 while (txd != eop) {
1020                         ++txd;
1021                         ++buf;
1022                         ++work;
1023                         /* wrap the ring? */
1024                         if (__predict_false(!work)) {
1025                                 work -= txr->num_desc;
1026                                 buf = txr->tx_buffers;
1027                                 txd = txr->tx_base;
1028                         }
1029                         if (buf->m_head) {
1030                                 txr->bytes += buf->m_head->m_pkthdr.len;
1031                                 bus_dmamap_sync(txr->txtag, buf->map,
1032                                     BUS_DMASYNC_POSTWRITE);
1033                                 bus_dmamap_unload(txr->txtag, buf->map);
1034                                 m_freem(buf->m_head);
1035                                 buf->m_head = NULL;
1036                         }
1037                         ++txr->tx_avail;
1038                         buf->eop = NULL;
1039
1040                 }
1041                 ++txr->packets;
1042                 ++processed;
1043
1044                 /* Try the next packet */
1045                 ++txd;
1046                 ++buf;
1047                 ++work;
1048                 /* reset with a wrap */
1049                 if (__predict_false(!work)) {
1050                         work -= txr->num_desc;
1051                         buf = txr->tx_buffers;
1052                         txd = txr->tx_base;
1053                 }
1054                 prefetch(txd);
1055         } while (__predict_true(--limit));
1056
1057         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1058             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1059
1060         work += txr->num_desc;
1061         txr->next_to_clean = work;
1062
1063         /*
1064          * Queue Hang detection, we know there's
1065          * work outstanding or the first return
1066          * would have been taken, so increment busy
1067          * if nothing managed to get cleaned, then
1068          * in local_timer it will be checked and
1069          * marked as HUNG if it exceeds a MAX attempt.
1070          */
1071         if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1072                 ++txr->busy;
1073         /*
1074          * If anything gets cleaned we reset state to 1,
1075          * note this will turn off HUNG if its set.
1076          */
1077         if (processed)
1078                 txr->busy = 1;
1079
1080         if (txr->tx_avail == txr->num_desc)
1081                 txr->busy = 0;
1082
1083         return;
1084 } /* ixv_txeof */
1085
1086 /************************************************************************
1087  * ixgbe_rsc_count
1088  *
1089  *   Used to detect a descriptor that has been merged by Hardware RSC.
1090  ************************************************************************/
1091 static inline u32
1092 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1093 {
1094         return (le32toh(rx->wb.lower.lo_dword.data) &
1095             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1096 } /* ixgbe_rsc_count */
1097
1098 /************************************************************************
1099  * ixgbe_setup_hw_rsc
1100  *
1101  *   Initialize Hardware RSC (LRO) feature on 82599
1102  *   for an RX ring, this is toggled by the LRO capability
1103  *   even though it is transparent to the stack.
1104  *
1105  *   NOTE: Since this HW feature only works with IPv4 and
1106  *         testing has shown soft LRO to be as effective,
1107  *         this feature will be disabled by default.
1108  ************************************************************************/
1109 static void
1110 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1111 {
1112         struct adapter  *adapter = rxr->adapter;
1113         struct ixgbe_hw *hw = &adapter->hw;
1114         u32             rscctrl, rdrxctl;
1115
1116         /* If turning LRO/RSC off we need to disable it */
1117         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1118                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1119                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1120                 return;
1121         }
1122
1123         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1124         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1125 #ifdef DEV_NETMAP
1126         /* Always strip CRC unless Netmap disabled it */
1127         if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1128             !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1129             ix_crcstrip)
1130 #endif /* DEV_NETMAP */
1131                 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1132         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1133         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1134
1135         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1136         rscctrl |= IXGBE_RSCCTL_RSCEN;
1137         /*
1138          * Limit the total number of descriptors that
1139          * can be combined, so it does not exceed 64K
1140          */
1141         if (rxr->mbuf_sz == MCLBYTES)
1142                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1143         else if (rxr->mbuf_sz == MJUMPAGESIZE)
1144                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1145         else if (rxr->mbuf_sz == MJUM9BYTES)
1146                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1147         else  /* Using 16K cluster */
1148                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1149
1150         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1151
1152         /* Enable TCP header recognition */
1153         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1154             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1155
1156         /* Disable RSC for ACK packets */
1157         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1158             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1159
1160         rxr->hw_rsc = TRUE;
1161 } /* ixgbe_setup_hw_rsc */
1162
1163 /************************************************************************
1164  * ixgbe_refresh_mbufs
1165  *
1166  *   Refresh mbuf buffers for RX descriptor rings
1167  *    - now keeps its own state so discards due to resource
1168  *      exhaustion are unnecessary, if an mbuf cannot be obtained
1169  *      it just returns, keeping its placeholder, thus it can simply
1170  *      be recalled to try again.
1171  ************************************************************************/
1172 static void
1173 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1174 {
1175         struct adapter      *adapter = rxr->adapter;
1176         struct ixgbe_rx_buf *rxbuf;
1177         struct mbuf         *mp;
1178         bus_dma_segment_t   seg[1];
1179         int                 i, j, nsegs, error;
1180         bool                refreshed = FALSE;
1181
1182         i = j = rxr->next_to_refresh;
1183         /* Control the loop with one beyond */
1184         if (++j == rxr->num_desc)
1185                 j = 0;
1186
1187         while (j != limit) {
1188                 rxbuf = &rxr->rx_buffers[i];
1189                 if (rxbuf->buf == NULL) {
1190                         mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1191                             rxr->mbuf_sz);
1192                         if (mp == NULL)
1193                                 goto update;
1194                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1195                                 m_adj(mp, ETHER_ALIGN);
1196                 } else
1197                         mp = rxbuf->buf;
1198
1199                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1200
1201                 /* If we're dealing with an mbuf that was copied rather
1202                  * than replaced, there's no need to go through busdma.
1203                  */
1204                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1205                         /* Get the memory mapping */
1206                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1207                         error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap,
1208                             mp, seg, &nsegs, BUS_DMA_NOWAIT);
1209                         if (error != 0) {
1210                                 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1211                                 m_free(mp);
1212                                 rxbuf->buf = NULL;
1213                                 goto update;
1214                         }
1215                         rxbuf->buf = mp;
1216                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1217                             BUS_DMASYNC_PREREAD);
1218                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1219                             htole64(seg[0].ds_addr);
1220                 } else {
1221                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1222                         rxbuf->flags &= ~IXGBE_RX_COPY;
1223                 }
1224
1225                 refreshed = TRUE;
1226                 /* Next is precalculated */
1227                 i = j;
1228                 rxr->next_to_refresh = i;
1229                 if (++j == rxr->num_desc)
1230                         j = 0;
1231         }
1232
1233 update:
1234         if (refreshed) /* Update hardware tail index */
1235                 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1236
1237         return;
1238 } /* ixgbe_refresh_mbufs */
1239
1240 /************************************************************************
1241  * ixgbe_allocate_receive_buffers
1242  *
1243  *   Allocate memory for rx_buffer structures. Since we use one
1244  *   rx_buffer per received packet, the maximum number of rx_buffer's
1245  *   that we'll need is equal to the number of receive descriptors
1246  *   that we've allocated.
1247  ************************************************************************/
1248 static int
1249 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1250 {
1251         struct adapter      *adapter = rxr->adapter;
1252         device_t            dev = adapter->dev;
1253         struct ixgbe_rx_buf *rxbuf;
1254         int                 bsize, error;
1255
1256         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1257         rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_IXV,
1258             M_NOWAIT | M_ZERO);
1259         if (!rxr->rx_buffers) {
1260                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1261                 error = ENOMEM;
1262                 goto fail;
1263         }
1264
1265         error = bus_dma_tag_create(
1266                  /*      parent */ bus_get_dma_tag(dev),
1267                  /*   alignment */ 1,
1268                  /*      bounds */ 0,
1269                  /*     lowaddr */ BUS_SPACE_MAXADDR,
1270                  /*    highaddr */ BUS_SPACE_MAXADDR,
1271                  /*      filter */ NULL,
1272                  /*   filterarg */ NULL,
1273                  /*     maxsize */ MJUM16BYTES,
1274                  /*   nsegments */ 1,
1275                  /*  maxsegsize */ MJUM16BYTES,
1276                  /*       flags */ 0,
1277                  /*    lockfunc */ NULL,
1278                  /* lockfuncarg */ NULL,
1279                                    &rxr->ptag);
1280         if (error) {
1281                 device_printf(dev, "Unable to create RX DMA tag\n");
1282                 goto fail;
1283         }
1284
1285         for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1286                 rxbuf = &rxr->rx_buffers[i];
1287                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1288                 if (error) {
1289                         device_printf(dev, "Unable to create RX dma map\n");
1290                         goto fail;
1291                 }
1292         }
1293
1294         return (0);
1295
1296 fail:
1297         /* Frees all, but can handle partial completion */
1298         ixv_free_receive_structures(adapter);
1299
1300         return (error);
1301 } /* ixgbe_allocate_receive_buffers */
1302
1303 /************************************************************************
1304  * ixgbe_free_receive_ring
1305  ************************************************************************/
1306 static void
1307 ixgbe_free_receive_ring(struct rx_ring *rxr)
1308 {
1309         struct ixgbe_rx_buf *rxbuf;
1310
1311         for (int i = 0; i < rxr->num_desc; i++) {
1312                 rxbuf = &rxr->rx_buffers[i];
1313                 if (rxbuf->buf != NULL) {
1314                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1315                             BUS_DMASYNC_POSTREAD);
1316                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1317                         rxbuf->buf->m_flags |= M_PKTHDR;
1318                         m_freem(rxbuf->buf);
1319                         rxbuf->buf = NULL;
1320                         rxbuf->flags = 0;
1321                 }
1322         }
1323 } /* ixgbe_free_receive_ring */
1324
1325 /************************************************************************
1326  * ixgbe_setup_receive_ring
1327  *
1328  *   Initialize a receive ring and its buffers.
1329  ************************************************************************/
1330 static int
1331 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1332 {
1333         struct adapter        *adapter;
1334         struct ifnet          *ifp;
1335         device_t              dev;
1336         struct ixgbe_rx_buf   *rxbuf;
1337         struct lro_ctrl       *lro = &rxr->lro;
1338 #ifdef DEV_NETMAP
1339         struct netmap_adapter *na = NA(rxr->adapter->ifp);
1340         struct netmap_slot    *slot;
1341 #endif /* DEV_NETMAP */
1342         bus_dma_segment_t     seg[1];
1343         int                   rsize, nsegs, error = 0;
1344
1345         adapter = rxr->adapter;
1346         ifp = adapter->ifp;
1347         dev = adapter->dev;
1348
1349         /* Clear the ring contents */
1350         IXGBE_RX_LOCK(rxr);
1351
1352 #ifdef DEV_NETMAP
1353         if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1354                 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1355 #endif /* DEV_NETMAP */
1356
1357         rsize = roundup2(adapter->num_rx_desc *
1358             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1359         bzero((void *)rxr->rx_base, rsize);
1360         /* Cache the size */
1361         rxr->mbuf_sz = adapter->rx_mbuf_sz;
1362
1363         /* Free current RX buffer structs and their mbufs */
1364         ixgbe_free_receive_ring(rxr);
1365
1366         /* Now replenish the mbufs */
1367         for (int j = 0; j != rxr->num_desc; ++j) {
1368                 struct mbuf *mp;
1369
1370                 rxbuf = &rxr->rx_buffers[j];
1371
1372 #ifdef DEV_NETMAP
1373                 /*
1374                  * In netmap mode, fill the map and set the buffer
1375                  * address in the NIC ring, considering the offset
1376                  * between the netmap and NIC rings (see comment in
1377                  * ixgbe_setup_transmit_ring() ). No need to allocate
1378                  * an mbuf, so end the block with a continue;
1379                  */
1380                 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1381                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1382                         uint64_t paddr;
1383                         void *addr;
1384
1385                         addr = PNMB(na, slot + sj, &paddr);
1386                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1387                         /* Update descriptor and the cached value */
1388                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1389                         rxbuf->addr = htole64(paddr);
1390                         continue;
1391                 }
1392 #endif /* DEV_NETMAP */
1393
1394                 rxbuf->flags = 0;
1395                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1396                     adapter->rx_mbuf_sz);
1397                 if (rxbuf->buf == NULL) {
1398                         error = ENOBUFS;
1399                         goto fail;
1400                 }
1401                 mp = rxbuf->buf;
1402                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1403                 /* Get the memory mapping */
1404                 error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg,
1405                     &nsegs, BUS_DMA_NOWAIT);
1406                 if (error != 0)
1407                         goto fail;
1408                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD);
1409                 /* Update the descriptor and the cached value */
1410                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1411                 rxbuf->addr = htole64(seg[0].ds_addr);
1412         }
1413
1414
1415         /* Setup our descriptor indices */
1416         rxr->next_to_check = 0;
1417         rxr->next_to_refresh = 0;
1418         rxr->lro_enabled = FALSE;
1419         rxr->rx_copies = 0;
1420         rxr->rx_bytes = 0;
1421         rxr->vtag_strip = FALSE;
1422
1423         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1424             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1425
1426         /*
1427          * Now set up the LRO interface
1428          */
1429         if (ixgbe_rsc_enable)
1430                 ixgbe_setup_hw_rsc(rxr);
1431         else if (ifp->if_capenable & IFCAP_LRO) {
1432                 int err = tcp_lro_init(lro);
1433                 if (err) {
1434                         device_printf(dev, "LRO Initialization failed!\n");
1435                         goto fail;
1436                 }
1437                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1438                 rxr->lro_enabled = TRUE;
1439                 lro->ifp = adapter->ifp;
1440         }
1441
1442         IXGBE_RX_UNLOCK(rxr);
1443
1444         return (0);
1445
1446 fail:
1447         ixgbe_free_receive_ring(rxr);
1448         IXGBE_RX_UNLOCK(rxr);
1449
1450         return (error);
1451 } /* ixgbe_setup_receive_ring */
1452
1453 /************************************************************************
1454  * ixv_setup_receive_structures - Initialize all receive rings.
1455  ************************************************************************/
1456 int
1457 ixv_setup_receive_structures(struct adapter *adapter)
1458 {
1459         struct rx_ring *rxr = adapter->rx_rings;
1460         int            j;
1461
1462         for (j = 0; j < adapter->num_queues; j++, rxr++)
1463                 if (ixgbe_setup_receive_ring(rxr))
1464                         goto fail;
1465
1466         return (0);
1467 fail:
1468         /*
1469          * Free RX buffers allocated so far, we will only handle
1470          * the rings that completed, the failing case will have
1471          * cleaned up for itself. 'j' failed, so its the terminus.
1472          */
1473         for (int i = 0; i < j; ++i) {
1474                 rxr = &adapter->rx_rings[i];
1475                 ixgbe_free_receive_ring(rxr);
1476         }
1477
1478         return (ENOBUFS);
1479 } /* ixv_setup_receive_structures */
1480
1481
1482 /************************************************************************
1483  * ixv_free_receive_structures - Free all receive rings.
1484  ************************************************************************/
1485 void
1486 ixv_free_receive_structures(struct adapter *adapter)
1487 {
1488         struct rx_ring *rxr = adapter->rx_rings;
1489         struct lro_ctrl *lro;
1490
1491         INIT_DEBUGOUT("ixv_free_receive_structures: begin");
1492
1493         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1494                 lro = &rxr->lro;
1495                 ixgbe_free_receive_buffers(rxr);
1496                 /* Free LRO memory */
1497                 tcp_lro_free(lro);
1498                 /* Free the ring memory as well */
1499                 ixgbe_dma_free(adapter, &rxr->rxdma);
1500         }
1501
1502         free(adapter->rx_rings, M_IXV);
1503 } /* ixv_free_receive_structures */
1504
1505
1506 /************************************************************************
1507  * ixgbe_free_receive_buffers - Free receive ring data structures
1508  ************************************************************************/
1509 static void
1510 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1511 {
1512         struct adapter      *adapter = rxr->adapter;
1513         struct ixgbe_rx_buf *rxbuf;
1514
1515         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1516
1517         /* Cleanup any existing buffers */
1518         if (rxr->rx_buffers != NULL) {
1519                 for (int i = 0; i < adapter->num_rx_desc; i++) {
1520                         rxbuf = &rxr->rx_buffers[i];
1521                         if (rxbuf->buf != NULL) {
1522                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1523                                     BUS_DMASYNC_POSTREAD);
1524                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1525                                 rxbuf->buf->m_flags |= M_PKTHDR;
1526                                 m_freem(rxbuf->buf);
1527                         }
1528                         rxbuf->buf = NULL;
1529                         if (rxbuf->pmap != NULL) {
1530                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1531                                 rxbuf->pmap = NULL;
1532                         }
1533                 }
1534                 if (rxr->rx_buffers != NULL) {
1535                         free(rxr->rx_buffers, M_IXV);
1536                         rxr->rx_buffers = NULL;
1537                 }
1538         }
1539
1540         if (rxr->ptag != NULL) {
1541                 bus_dma_tag_destroy(rxr->ptag);
1542                 rxr->ptag = NULL;
1543         }
1544
1545         return;
1546 } /* ixgbe_free_receive_buffers */
1547
1548 /************************************************************************
1549  * ixgbe_rx_input
1550  ************************************************************************/
1551 static __inline void
1552 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1553     u32 ptype)
1554 {
1555         /*
1556          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1557          * should be computed by hardware. Also it should not have VLAN tag in
1558          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1559          */
1560         if (rxr->lro_enabled &&
1561             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1562             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1563             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1564              (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1565              (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1566              (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1567             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1568             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1569                 /*
1570                  * Send to the stack if:
1571                  *  - LRO not enabled, or
1572                  *  - no LRO resources, or
1573                  *  - lro enqueue fails
1574                  */
1575                 if (rxr->lro.lro_cnt != 0)
1576                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1577                                 return;
1578         }
1579         IXGBE_RX_UNLOCK(rxr);
1580         (*ifp->if_input)(ifp, m);
1581         IXGBE_RX_LOCK(rxr);
1582 } /* ixgbe_rx_input */
1583
1584 /************************************************************************
1585  * ixgbe_rx_discard
1586  ************************************************************************/
1587 static __inline void
1588 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1589 {
1590         struct ixgbe_rx_buf *rbuf;
1591
1592         rbuf = &rxr->rx_buffers[i];
1593
1594         /*
1595          * With advanced descriptors the writeback
1596          * clobbers the buffer addrs, so its easier
1597          * to just free the existing mbufs and take
1598          * the normal refresh path to get new buffers
1599          * and mapping.
1600          */
1601
1602         if (rbuf->fmp != NULL) {/* Partial chain ? */
1603                 rbuf->fmp->m_flags |= M_PKTHDR;
1604                 m_freem(rbuf->fmp);
1605                 rbuf->fmp = NULL;
1606                 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1607         } else if (rbuf->buf) {
1608                 m_free(rbuf->buf);
1609                 rbuf->buf = NULL;
1610         }
1611         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1612
1613         rbuf->flags = 0;
1614
1615         return;
1616 } /* ixgbe_rx_discard */
1617
1618
1619 /************************************************************************
1620  * ixv_rxeof
1621  *
1622  *   This routine executes in interrupt context. It replenishes
1623  *   the mbufs in the descriptor and sends data which has been
1624  *   dma'ed into host memory to upper layer.
1625  *
1626  *   Return TRUE for more work, FALSE for all clean.
1627  ************************************************************************/
1628 bool
1629 ixv_rxeof(struct ix_queue *que)
1630 {
1631         struct adapter          *adapter = que->adapter;
1632         struct rx_ring          *rxr = que->rxr;
1633         struct ifnet            *ifp = adapter->ifp;
1634         struct lro_ctrl         *lro = &rxr->lro;
1635 #if __FreeBSD_version < 1100105
1636         struct lro_entry        *queued;
1637 #endif
1638         union ixgbe_adv_rx_desc *cur;
1639         struct ixgbe_rx_buf     *rbuf, *nbuf;
1640         int                     i, nextp, processed = 0;
1641         u32                     staterr = 0;
1642         u32                     count = adapter->rx_process_limit;
1643         u16                     pkt_info;
1644
1645         IXGBE_RX_LOCK(rxr);
1646
1647 #ifdef DEV_NETMAP
1648         if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1649                 /* Same as the txeof routine: wakeup clients on intr. */
1650                 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1651                         IXGBE_RX_UNLOCK(rxr);
1652                         return (FALSE);
1653                 }
1654         }
1655 #endif /* DEV_NETMAP */
1656
1657         for (i = rxr->next_to_check; count != 0;) {
1658                 struct mbuf *sendmp, *mp;
1659                 u32         rsc, ptype;
1660                 u16         len;
1661                 u16         vtag = 0;
1662                 bool        eop;
1663
1664                 /* Sync the ring. */
1665                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1666                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1667
1668                 cur = &rxr->rx_base[i];
1669                 staterr = le32toh(cur->wb.upper.status_error);
1670                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1671
1672                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1673                         break;
1674                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1675                         break;
1676
1677                 count--;
1678                 sendmp = NULL;
1679                 nbuf = NULL;
1680                 rsc = 0;
1681                 cur->wb.upper.status_error = 0;
1682                 rbuf = &rxr->rx_buffers[i];
1683                 mp = rbuf->buf;
1684
1685                 len = le16toh(cur->wb.upper.length);
1686                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1687                     IXGBE_RXDADV_PKTTYPE_MASK;
1688                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1689
1690                 /* Make sure bad packets are discarded */
1691                 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1692 #if __FreeBSD_version >= 1100036
1693                         if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1694 #endif
1695                         rxr->rx_discarded++;
1696                         ixgbe_rx_discard(rxr, i);
1697                         goto next_desc;
1698                 }
1699
1700                 /*
1701                  * On 82599 which supports a hardware
1702                  * LRO (called HW RSC), packets need
1703                  * not be fragmented across sequential
1704                  * descriptors, rather the next descriptor
1705                  * is indicated in bits of the descriptor.
1706                  * This also means that we might proceses
1707                  * more than one packet at a time, something
1708                  * that has never been true before, it
1709                  * required eliminating global chain pointers
1710                  * in favor of what we are doing here.  -jfv
1711                  */
1712                 if (!eop) {
1713                         /*
1714                          * Figure out the next descriptor
1715                          * of this frame.
1716                          */
1717                         if (rxr->hw_rsc == TRUE) {
1718                                 rsc = ixgbe_rsc_count(cur);
1719                                 rxr->rsc_num += (rsc - 1);
1720                         }
1721                         if (rsc) { /* Get hardware index */
1722                                 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1723                                     IXGBE_RXDADV_NEXTP_SHIFT);
1724                         } else { /* Just sequential */
1725                                 nextp = i + 1;
1726                                 if (nextp == adapter->num_rx_desc)
1727                                         nextp = 0;
1728                         }
1729                         nbuf = &rxr->rx_buffers[nextp];
1730                         prefetch(nbuf);
1731                 }
1732                 /*
1733                  * Rather than using the fmp/lmp global pointers
1734                  * we now keep the head of a packet chain in the
1735                  * buffer struct and pass this along from one
1736                  * descriptor to the next, until we get EOP.
1737                  */
1738                 mp->m_len = len;
1739                 /*
1740                  * See if there is a stored head
1741                  * that determines what we are
1742                  */
1743                 sendmp = rbuf->fmp;
1744                 if (sendmp != NULL) {  /* secondary frag */
1745                         rbuf->buf = rbuf->fmp = NULL;
1746                         mp->m_flags &= ~M_PKTHDR;
1747                         sendmp->m_pkthdr.len += mp->m_len;
1748                 } else {
1749                         /*
1750                          * Optimize.  This might be a small packet,
1751                          * maybe just a TCP ACK.  Do a fast copy that
1752                          * is cache aligned into a new mbuf, and
1753                          * leave the old mbuf+cluster for re-use.
1754                          */
1755                         if (eop && len <= IXGBE_RX_COPY_LEN) {
1756                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1757                                 if (sendmp != NULL) {
1758                                         sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1759                                         ixv_bcopy(mp->m_data, sendmp->m_data,
1760                                             len);
1761                                         sendmp->m_len = len;
1762                                         rxr->rx_copies++;
1763                                         rbuf->flags |= IXGBE_RX_COPY;
1764                                 }
1765                         }
1766                         if (sendmp == NULL) {
1767                                 rbuf->buf = rbuf->fmp = NULL;
1768                                 sendmp = mp;
1769                         }
1770
1771                         /* first desc of a non-ps chain */
1772                         sendmp->m_flags |= M_PKTHDR;
1773                         sendmp->m_pkthdr.len = mp->m_len;
1774                 }
1775                 ++processed;
1776
1777                 /* Pass the head pointer on */
1778                 if (eop == 0) {
1779                         nbuf->fmp = sendmp;
1780                         sendmp = NULL;
1781                         mp->m_next = nbuf->buf;
1782                 } else { /* Sending this frame */
1783                         sendmp->m_pkthdr.rcvif = ifp;
1784                         rxr->rx_packets++;
1785                         /* capture data for AIM */
1786                         rxr->bytes += sendmp->m_pkthdr.len;
1787                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1788                         /* Process vlan info */
1789                         if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1790                                 vtag = le16toh(cur->wb.upper.vlan);
1791                         if (vtag) {
1792                                 sendmp->m_pkthdr.ether_vtag = vtag;
1793                                 sendmp->m_flags |= M_VLANTAG;
1794                         }
1795                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1796                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
1797
1798                         /*
1799                          * In case of multiqueue, we have RXCSUM.PCSD bit set
1800                          * and never cleared. This means we have RSS hash
1801                          * available to be used.
1802                          */
1803                         if (adapter->num_queues > 1) {
1804                                 sendmp->m_pkthdr.flowid =
1805                                     le32toh(cur->wb.lower.hi_dword.rss);
1806                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1807                                 case IXGBE_RXDADV_RSSTYPE_IPV4:
1808                                         M_HASHTYPE_SET(sendmp,
1809                                             M_HASHTYPE_RSS_IPV4);
1810                                         break;
1811                                 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1812                                         M_HASHTYPE_SET(sendmp,
1813                                             M_HASHTYPE_RSS_TCP_IPV4);
1814                                         break;
1815                                 case IXGBE_RXDADV_RSSTYPE_IPV6:
1816                                         M_HASHTYPE_SET(sendmp,
1817                                             M_HASHTYPE_RSS_IPV6);
1818                                         break;
1819                                 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1820                                         M_HASHTYPE_SET(sendmp,
1821                                             M_HASHTYPE_RSS_TCP_IPV6);
1822                                         break;
1823                                 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1824                                         M_HASHTYPE_SET(sendmp,
1825                                             M_HASHTYPE_RSS_IPV6_EX);
1826                                         break;
1827                                 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1828                                         M_HASHTYPE_SET(sendmp,
1829                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
1830                                         break;
1831 #if __FreeBSD_version > 1100000
1832                                 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1833                                         M_HASHTYPE_SET(sendmp,
1834                                             M_HASHTYPE_RSS_UDP_IPV4);
1835                                         break;
1836                                 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1837                                         M_HASHTYPE_SET(sendmp,
1838                                             M_HASHTYPE_RSS_UDP_IPV6);
1839                                         break;
1840                                 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1841                                         M_HASHTYPE_SET(sendmp,
1842                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
1843                                         break;
1844 #endif
1845                                 default:
1846 #if __FreeBSD_version < 1100116
1847                                         M_HASHTYPE_SET(sendmp,
1848                                             M_HASHTYPE_OPAQUE);
1849 #else
1850                                         M_HASHTYPE_SET(sendmp,
1851                                             M_HASHTYPE_OPAQUE_HASH);
1852 #endif
1853                                 }
1854                         } else {
1855                                 sendmp->m_pkthdr.flowid = que->msix;
1856                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1857                         }
1858                 }
1859 next_desc:
1860                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1861                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1862
1863                 /* Advance our pointers to the next descriptor. */
1864                 if (++i == rxr->num_desc)
1865                         i = 0;
1866
1867                 /* Now send to the stack or do LRO */
1868                 if (sendmp != NULL) {
1869                         rxr->next_to_check = i;
1870                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1871                         i = rxr->next_to_check;
1872                 }
1873
1874                 /* Every 8 descriptors we go to refresh mbufs */
1875                 if (processed == 8) {
1876                         ixgbe_refresh_mbufs(rxr, i);
1877                         processed = 0;
1878                 }
1879         }
1880
1881         /* Refresh any remaining buf structs */
1882         if (ixgbe_rx_unrefreshed(rxr))
1883                 ixgbe_refresh_mbufs(rxr, i);
1884
1885         rxr->next_to_check = i;
1886
1887         /*
1888          * Flush any outstanding LRO work
1889          */
1890 #if __FreeBSD_version < 1100105
1891         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1892                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1893                 tcp_lro_flush(lro, queued);
1894         }
1895 #else
1896         tcp_lro_flush_all(lro);
1897 #endif
1898
1899         IXGBE_RX_UNLOCK(rxr);
1900
1901         /*
1902          * Still have cleaning to do?
1903          */
1904         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1905                 return (TRUE);
1906
1907         return (FALSE);
1908 } /* ixv_rxeof */
1909
1910
1911 /************************************************************************
1912  * ixgbe_rx_checksum
1913  *
1914  *   Verify that the hardware indicated that the checksum is valid.
1915  *   Inform the stack about the status of checksum so that stack
1916  *   doesn't spend time verifying the checksum.
1917  ************************************************************************/
1918 static void
1919 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1920 {
1921         u16  status = (u16)staterr;
1922         u8   errors = (u8)(staterr >> 24);
1923         bool sctp = false;
1924
1925         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1926             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1927                 sctp = true;
1928
1929         /* IPv4 checksum */
1930         if (status & IXGBE_RXD_STAT_IPCS) {
1931                 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
1932                 /* IP Checksum Good */
1933                 if (!(errors & IXGBE_RXD_ERR_IPE))
1934                         mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
1935         }
1936         /* TCP/UDP/SCTP checksum */
1937         if (status & IXGBE_RXD_STAT_L4CS) {
1938                 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
1939                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1940                         mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
1941                         if (!sctp)
1942                                 mp->m_pkthdr.csum_data = htons(0xffff);
1943                 }
1944         }
1945 } /* ixgbe_rx_checksum */
1946
1947 /************************************************************************
1948  * ixgbe_dmamap_cb - Manage DMA'able memory.
1949  ************************************************************************/
1950 static void
1951 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1952 {
1953         if (error)
1954                 return;
1955         *(bus_addr_t *)arg = segs->ds_addr;
1956
1957         return;
1958 } /* ixgbe_dmamap_cb */
1959
1960 /************************************************************************
1961  * ixgbe_dma_malloc
1962  ************************************************************************/
1963 static int
1964 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
1965                  struct ixgbe_dma_alloc *dma, int mapflags)
1966 {
1967         device_t dev = adapter->dev;
1968         int      r;
1969
1970         r = bus_dma_tag_create(
1971              /*      parent */ bus_get_dma_tag(adapter->dev),
1972              /*   alignment */ DBA_ALIGN,
1973              /*      bounds */ 0,
1974              /*     lowaddr */ BUS_SPACE_MAXADDR,
1975              /*    highaddr */ BUS_SPACE_MAXADDR,
1976              /*      filter */ NULL,
1977              /*   filterarg */ NULL,
1978              /*     maxsize */ size,
1979              /*   nsegments */ 1,
1980              /*  maxsegsize */ size,
1981              /*       flags */ BUS_DMA_ALLOCNOW,
1982              /*    lockfunc */ NULL,
1983              /* lockfuncarg */ NULL,
1984                                &dma->dma_tag);
1985         if (r != 0) {
1986                 device_printf(dev,
1987                     "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n",
1988                     r);
1989                 goto fail_0;
1990         }
1991         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
1992             BUS_DMA_NOWAIT, &dma->dma_map);
1993         if (r != 0) {
1994                 device_printf(dev,
1995                     "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r);
1996                 goto fail_1;
1997         }
1998         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
1999             ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2000         if (r != 0) {
2001                 device_printf(dev,
2002                     "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r);
2003                 goto fail_2;
2004         }
2005         dma->dma_size = size;
2006
2007         return (0);
2008 fail_2:
2009         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2010 fail_1:
2011         bus_dma_tag_destroy(dma->dma_tag);
2012 fail_0:
2013         dma->dma_tag = NULL;
2014
2015         return (r);
2016 } /* ixgbe_dma_malloc */
2017
2018 static void
2019 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2020 {
2021         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2022             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2023         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2024         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2025         bus_dma_tag_destroy(dma->dma_tag);
2026 } /* ixgbe_dma_free */
2027
2028
2029 /************************************************************************
2030  * ixv_allocate_queues
2031  *
2032  *   Allocate memory for the transmit and receive rings, and then
2033  *   the descriptors associated with each, called only once at attach.
2034  ************************************************************************/
2035 int
2036 ixv_allocate_queues(struct adapter *adapter)
2037 {
2038         device_t        dev = adapter->dev;
2039         struct ix_queue *que;
2040         struct tx_ring  *txr;
2041         struct rx_ring  *rxr;
2042         int             rsize, tsize, error = IXGBE_SUCCESS;
2043         int             txconf = 0, rxconf = 0;
2044
2045         /* First, allocate the top level queue structs */
2046         adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2047             adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2048         if (!adapter->queues) {
2049                 device_printf(dev, "Unable to allocate queue memory\n");
2050                 error = ENOMEM;
2051                 goto fail;
2052         }
2053
2054         /* Second, allocate the TX ring struct memory */
2055         adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2056             adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2057         if (!adapter->tx_rings) {
2058                 device_printf(dev, "Unable to allocate TX ring memory\n");
2059                 error = ENOMEM;
2060                 goto tx_fail;
2061         }
2062
2063         /* Third, allocate the RX ring */
2064         adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2065             adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2066         if (!adapter->rx_rings) {
2067                 device_printf(dev, "Unable to allocate RX ring memory\n");
2068                 error = ENOMEM;
2069                 goto rx_fail;
2070         }
2071
2072         /* For the ring itself */
2073         tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2074             DBA_ALIGN);
2075
2076         /*
2077          * Now set up the TX queues, txconf is needed to handle the
2078          * possibility that things fail midcourse and we need to
2079          * undo memory gracefully
2080          */
2081         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2082                 /* Set up some basics */
2083                 txr = &adapter->tx_rings[i];
2084                 txr->adapter = adapter;
2085                 txr->br = NULL;
2086                 txr->me = i;
2087                 txr->num_desc = adapter->num_tx_desc;
2088
2089                 /* Initialize the TX side lock */
2090                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2091                     device_get_nameunit(dev), txr->me);
2092                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2093
2094                 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2095                     BUS_DMA_NOWAIT)) {
2096                         device_printf(dev,
2097                             "Unable to allocate TX Descriptor memory\n");
2098                         error = ENOMEM;
2099                         goto err_tx_desc;
2100                 }
2101                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2102                 bzero((void *)txr->tx_base, tsize);
2103
2104                 /* Now allocate transmit buffers for the ring */
2105                 if (ixgbe_allocate_transmit_buffers(txr)) {
2106                         device_printf(dev,
2107                             "Critical Failure setting up transmit buffers\n");
2108                         error = ENOMEM;
2109                         goto err_tx_desc;
2110                 }
2111                 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2112                         /* Allocate a buf ring */
2113                         txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_IXV,
2114                             M_WAITOK, &txr->tx_mtx);
2115                         if (txr->br == NULL) {
2116                                 device_printf(dev,
2117                                     "Critical Failure setting up buf ring\n");
2118                                 error = ENOMEM;
2119                                 goto err_tx_desc;
2120                         }
2121                 }
2122         }
2123
2124         /*
2125          * Next the RX queues...
2126          */
2127         rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2128             DBA_ALIGN);
2129         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2130                 rxr = &adapter->rx_rings[i];
2131                 /* Set up some basics */
2132                 rxr->adapter = adapter;
2133                 rxr->me = i;
2134                 rxr->num_desc = adapter->num_rx_desc;
2135
2136                 /* Initialize the RX side lock */
2137                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2138                     device_get_nameunit(dev), rxr->me);
2139                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2140
2141                 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2142                     BUS_DMA_NOWAIT)) {
2143                         device_printf(dev,
2144                             "Unable to allocate RxDescriptor memory\n");
2145                         error = ENOMEM;
2146                         goto err_rx_desc;
2147                 }
2148                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2149                 bzero((void *)rxr->rx_base, rsize);
2150
2151                 /* Allocate receive buffers for the ring */
2152                 if (ixgbe_allocate_receive_buffers(rxr)) {
2153                         device_printf(dev,
2154                             "Critical Failure setting up receive buffers\n");
2155                         error = ENOMEM;
2156                         goto err_rx_desc;
2157                 }
2158         }
2159
2160         /*
2161          * Finally set up the queue holding structs
2162          */
2163         for (int i = 0; i < adapter->num_queues; i++) {
2164                 que = &adapter->queues[i];
2165                 que->adapter = adapter;
2166                 que->me = i;
2167                 que->txr = &adapter->tx_rings[i];
2168                 que->rxr = &adapter->rx_rings[i];
2169         }
2170
2171         return (0);
2172
2173 err_rx_desc:
2174         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2175                 ixgbe_dma_free(adapter, &rxr->rxdma);
2176 err_tx_desc:
2177         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2178                 ixgbe_dma_free(adapter, &txr->txdma);
2179         free(adapter->rx_rings, M_IXV);
2180 rx_fail:
2181         free(adapter->tx_rings, M_IXV);
2182 tx_fail:
2183         free(adapter->queues, M_IXV);
2184 fail:
2185         return (error);
2186 } /* ixv_allocate_queues */