]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/ixgbe/ix_txrx.c
ixgbe(4): Update to 3.2.11-k
[FreeBSD/stable/10.git] / sys / dev / ixgbe / ix_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2017, Intel Corporation
4   All rights reserved.
5
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include "ixgbe.h"
42
43 extern int ix_crcstrip;
44
45 /*
46  * HW RSC control:
47  *  this feature only works with
48  *  IPv4, and only on 82599 and later.
49  *  Also this will cause IP forwarding to
50  *  fail and that can't be controlled by
51  *  the stack as LRO can. For all these
52  *  reasons I've deemed it best to leave
53  *  this off and not bother with a tuneable
54  *  interface, this would need to be compiled
55  *  to enable.
56  */
57 static bool ixgbe_rsc_enable = FALSE;
58
59 /*
60  * For Flow Director: this is the
61  * number of TX packets we sample
62  * for the filter pool, this means
63  * every 20th packet will be probed.
64  *
65  * This feature can be disabled by
66  * setting this to 0.
67  */
68 static int atr_sample_rate = 20;
69
70 /************************************************************************
71  *  Local Function prototypes
72  ************************************************************************/
73 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
74 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
75 static int           ixgbe_setup_receive_ring(struct rx_ring *);
76 static void          ixgbe_free_receive_buffers(struct rx_ring *);
77 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32);
78 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
79 static int           ixgbe_xmit(struct tx_ring *, struct mbuf **);
80 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
81                                         struct mbuf *, u32 *, u32 *);
82 static int           ixgbe_tso_setup(struct tx_ring *,
83                                      struct mbuf *, u32 *, u32 *);
84 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
85 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
86                                     struct mbuf *, u32);
87 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
88                                       struct ixgbe_dma_alloc *, int);
89 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
90
91 MALLOC_DECLARE(M_IXGBE);
92
93 /************************************************************************
94  * ixgbe_legacy_start_locked - Transmit entry point
95  *
96  *   Called by the stack to initiate a transmit.
97  *   The driver will remain in this routine as long as there are
98  *   packets to transmit and transmit resources are available.
99  *   In case resources are not available, the stack is notified
100  *   and the packet is requeued.
101  ************************************************************************/
102 int
103 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
104 {
105         struct mbuf    *m_head;
106         struct adapter *adapter = txr->adapter;
107
108         IXGBE_TX_LOCK_ASSERT(txr);
109
110         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
111                 return (ENETDOWN);
112         if (!adapter->link_active)
113                 return (ENETDOWN);
114
115         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
116                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
117                         break;
118
119                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
120                 if (m_head == NULL)
121                         break;
122
123                 if (ixgbe_xmit(txr, &m_head)) {
124                         if (m_head != NULL)
125                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
126                         break;
127                 }
128                 /* Send a copy of the frame to the BPF listener */
129                 ETHER_BPF_MTAP(ifp, m_head);
130         }
131
132         return IXGBE_SUCCESS;
133 } /* ixgbe_legacy_start_locked */
134
135 /************************************************************************
136  * ixgbe_legacy_start
137  *
138  *   Called by the stack, this always uses the first tx ring,
139  *   and should not be used with multiqueue tx enabled.
140  ************************************************************************/
141 void
142 ixgbe_legacy_start(struct ifnet *ifp)
143 {
144         struct adapter *adapter = ifp->if_softc;
145         struct tx_ring *txr = adapter->tx_rings;
146
147         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
148                 IXGBE_TX_LOCK(txr);
149                 ixgbe_legacy_start_locked(ifp, txr);
150                 IXGBE_TX_UNLOCK(txr);
151         }
152 } /* ixgbe_legacy_start */
153
154 /************************************************************************
155  * ixgbe_mq_start - Multiqueue Transmit Entry Point
156  *
157  *   (if_transmit function)
158  ************************************************************************/
159 int
160 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
161 {
162         struct adapter  *adapter = ifp->if_softc;
163         struct ix_queue *que;
164         struct tx_ring  *txr;
165         int             i, err = 0;
166         uint32_t        bucket_id;
167
168         /*
169          * When doing RSS, map it to the same outbound queue
170          * as the incoming flow would be mapped to.
171          *
172          * If everything is setup correctly, it should be the
173          * same bucket that the current CPU we're on is.
174          */
175         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
176                 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
177                     (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
178                     &bucket_id) == 0)) {
179                         i = bucket_id % adapter->num_queues;
180 #ifdef IXGBE_DEBUG
181                         if (bucket_id > adapter->num_queues)
182                                 if_printf(ifp,
183                                     "bucket_id (%d) > num_queues (%d)\n",
184                                     bucket_id, adapter->num_queues);
185 #endif
186                 } else
187                         i = m->m_pkthdr.flowid % adapter->num_queues;
188         } else
189                 i = curcpu % adapter->num_queues;
190
191         /* Check for a hung queue and pick alternative */
192         if (((1 << i) & adapter->active_queues) == 0)
193                 i = ffsl(adapter->active_queues);
194
195         txr = &adapter->tx_rings[i];
196         que = &adapter->queues[i];
197
198         err = drbr_enqueue(ifp, txr->br, m);
199         if (err)
200                 return (err);
201         if (IXGBE_TX_TRYLOCK(txr)) {
202                 ixgbe_mq_start_locked(ifp, txr);
203                 IXGBE_TX_UNLOCK(txr);
204         } else
205                 taskqueue_enqueue(que->tq, &txr->txq_task);
206
207         return (0);
208 } /* ixgbe_mq_start */
209
210 /************************************************************************
211  * ixgbe_mq_start_locked
212  ************************************************************************/
213 int
214 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
215 {
216         struct mbuf    *next;
217         int            enqueued = 0, err = 0;
218
219         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
220                 return (ENETDOWN);
221         if (!txr->adapter->link_active)
222                 return (ENETDOWN);
223
224         /* Process the queue */
225 #if __FreeBSD_version < 901504
226         next = drbr_dequeue(ifp, txr->br);
227         while (next != NULL) {
228                 err = ixgbe_xmit(txr, &next);
229                 if (err != 0) {
230                         if (next != NULL)
231                                 err = drbr_enqueue(ifp, txr->br, next);
232 #else
233         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
234                 err = ixgbe_xmit(txr, &next);
235                 if (err != 0) {
236                         if (next == NULL)
237                                 drbr_advance(ifp, txr->br);
238                         else
239                                 drbr_putback(ifp, txr->br, next);
240 #endif
241                         break;
242                 }
243 #if __FreeBSD_version >= 901504
244                 drbr_advance(ifp, txr->br);
245 #endif
246                 enqueued++;
247                 /* Send a copy of the frame to the BPF listener */
248                 ETHER_BPF_MTAP(ifp, next);
249                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
250                         break;
251 #if __FreeBSD_version < 901504
252                 next = drbr_dequeue(ifp, txr->br);
253 #endif
254         }
255
256         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
257                 ixgbe_txeof(txr);
258
259         return (err);
260 } /* ixgbe_mq_start_locked */
261
262 /************************************************************************
263  * ixgbe_deferred_mq_start
264  *
265  *   Called from a taskqueue to drain queued transmit packets.
266  ************************************************************************/
267 void
268 ixgbe_deferred_mq_start(void *arg, int pending)
269 {
270         struct tx_ring *txr = arg;
271         struct adapter *adapter = txr->adapter;
272         struct ifnet   *ifp = adapter->ifp;
273
274         IXGBE_TX_LOCK(txr);
275         if (!drbr_empty(ifp, txr->br))
276                 ixgbe_mq_start_locked(ifp, txr);
277         IXGBE_TX_UNLOCK(txr);
278 } /* ixgbe_deferred_mq_start */
279
280 /************************************************************************
281  * ixgbe_qflush - Flush all ring buffers
282  ************************************************************************/
283 void
284 ixgbe_qflush(struct ifnet *ifp)
285 {
286         struct adapter *adapter = ifp->if_softc;
287         struct tx_ring *txr = adapter->tx_rings;
288         struct mbuf    *m;
289
290         for (int i = 0; i < adapter->num_queues; i++, txr++) {
291                 IXGBE_TX_LOCK(txr);
292                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
293                         m_freem(m);
294                 IXGBE_TX_UNLOCK(txr);
295         }
296         if_qflush(ifp);
297 } /* ixgbe_qflush */
298
299
300 /************************************************************************
301  * ixgbe_xmit
302  *
303  *   This routine maps the mbufs to tx descriptors, allowing the
304  *   TX engine to transmit the packets.
305  *
306  *   Return 0 on success, positive on failure
307  ************************************************************************/
308 static int
309 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
310 {
311         struct adapter          *adapter = txr->adapter;
312         struct ixgbe_tx_buf     *txbuf;
313         union ixgbe_adv_tx_desc *txd = NULL;
314         struct mbuf             *m_head;
315         int                     i, j, error, nsegs;
316         int                     first;
317         u32                     olinfo_status = 0, cmd_type_len;
318         bool                    remap = TRUE;
319         bus_dma_segment_t       segs[adapter->num_segs];
320         bus_dmamap_t            map;
321
322         m_head = *m_headp;
323
324         /* Basic descriptor defines */
325         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
326             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
327
328         if (m_head->m_flags & M_VLANTAG)
329                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
330
331         /*
332          * Important to capture the first descriptor
333          * used because it will contain the index of
334          * the one we tell the hardware to report back
335          */
336         first = txr->next_avail_desc;
337         txbuf = &txr->tx_buffers[first];
338         map = txbuf->map;
339
340         /*
341          * Map the packet for DMA.
342          */
343 retry:
344         error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs,
345             &nsegs, BUS_DMA_NOWAIT);
346
347         if (__predict_false(error)) {
348                 struct mbuf *m;
349
350                 switch (error) {
351                 case EFBIG:
352                         /* Try it again? - one try */
353                         if (remap == TRUE) {
354                                 remap = FALSE;
355                                 /*
356                                  * XXX: m_defrag will choke on
357                                  * non-MCLBYTES-sized clusters
358                                  */
359                                 m = m_defrag(*m_headp, M_NOWAIT);
360                                 if (m == NULL) {
361                                         adapter->mbuf_defrag_failed++;
362                                         m_freem(*m_headp);
363                                         *m_headp = NULL;
364                                         return (ENOBUFS);
365                                 }
366                                 *m_headp = m;
367                                 goto retry;
368                         } else
369                                 return (error);
370                 case ENOMEM:
371                         txr->no_tx_dma_setup++;
372                         return (error);
373                 default:
374                         txr->no_tx_dma_setup++;
375                         m_freem(*m_headp);
376                         *m_headp = NULL;
377                         return (error);
378                 }
379         }
380
381         /* Make certain there are enough descriptors */
382         if (txr->tx_avail < (nsegs + 2)) {
383                 txr->no_desc_avail++;
384                 bus_dmamap_unload(txr->txtag, map);
385                 return (ENOBUFS);
386         }
387         m_head = *m_headp;
388
389         /*
390          * Set up the appropriate offload context
391          * this will consume the first descriptor
392          */
393         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
394         if (__predict_false(error)) {
395                 if (error == ENOBUFS)
396                         *m_headp = NULL;
397                 return (error);
398         }
399
400         /* Do the flow director magic */
401         if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
402             (txr->atr_sample) && (!adapter->fdir_reinit)) {
403                 ++txr->atr_count;
404                 if (txr->atr_count >= atr_sample_rate) {
405                         ixgbe_atr(txr, m_head);
406                         txr->atr_count = 0;
407                 }
408         }
409
410         olinfo_status |= IXGBE_ADVTXD_CC;
411         i = txr->next_avail_desc;
412         for (j = 0; j < nsegs; j++) {
413                 bus_size_t seglen;
414                 bus_addr_t segaddr;
415
416                 txbuf = &txr->tx_buffers[i];
417                 txd = &txr->tx_base[i];
418                 seglen = segs[j].ds_len;
419                 segaddr = htole64(segs[j].ds_addr);
420
421                 txd->read.buffer_addr = segaddr;
422                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
423                     cmd_type_len | seglen);
424                 txd->read.olinfo_status = htole32(olinfo_status);
425
426                 if (++i == txr->num_desc)
427                         i = 0;
428         }
429
430         txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
431         txr->tx_avail -= nsegs;
432         txr->next_avail_desc = i;
433
434         txbuf->m_head = m_head;
435         /*
436          * Here we swap the map so the last descriptor,
437          * which gets the completion interrupt has the
438          * real map, and the first descriptor gets the
439          * unused map from this descriptor.
440          */
441         txr->tx_buffers[first].map = txbuf->map;
442         txbuf->map = map;
443         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
444
445         /* Set the EOP descriptor that will be marked done */
446         txbuf = &txr->tx_buffers[first];
447         txbuf->eop = txd;
448
449         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
450             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
451         /*
452          * Advance the Transmit Descriptor Tail (Tdt), this tells the
453          * hardware that this frame is available to transmit.
454          */
455         ++txr->total_packets;
456         IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
457
458         /* Mark queue as having work */
459         if (txr->busy == 0)
460                 txr->busy = 1;
461
462         return (0);
463 } /* ixgbe_xmit */
464
465
466 /************************************************************************
467  * ixgbe_allocate_transmit_buffers
468  *
469  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
470  *   the information needed to transmit a packet on the wire. This is
471  *   called only once at attach, setup is done every reset.
472  ************************************************************************/
473 static int
474 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
475 {
476         struct adapter      *adapter = txr->adapter;
477         device_t            dev = adapter->dev;
478         struct ixgbe_tx_buf *txbuf;
479         int                 error, i;
480
481         /*
482          * Setup DMA descriptor areas.
483          */
484         error = bus_dma_tag_create(
485                  /*      parent */ bus_get_dma_tag(adapter->dev),
486                  /*   alignment */ 1,
487                  /*      bounds */ 0,
488                  /*     lowaddr */ BUS_SPACE_MAXADDR,
489                  /*    highaddr */ BUS_SPACE_MAXADDR,
490                  /*      filter */ NULL,
491                  /*   filterarg */ NULL,
492                  /*     maxsize */ IXGBE_TSO_SIZE,
493                  /*   nsegments */ adapter->num_segs,
494                  /*  maxsegsize */ PAGE_SIZE,
495                  /*       flags */ 0,
496                  /*    lockfunc */ NULL,
497                  /* lockfuncarg */ NULL,
498                                    &txr->txtag);
499         if (error) {
500                 device_printf(dev, "Unable to allocate TX DMA tag\n");
501                 goto fail;
502         }
503
504         txr->tx_buffers =
505             (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) *
506             adapter->num_tx_desc, M_IXGBE, M_NOWAIT | M_ZERO);
507         if (!txr->tx_buffers) {
508                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
509                 error = ENOMEM;
510                 goto fail;
511         }
512
513         /* Create the descriptor buffer dma maps */
514         txbuf = txr->tx_buffers;
515         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
516                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
517                 if (error != 0) {
518                         device_printf(dev, "Unable to create TX DMA map\n");
519                         goto fail;
520                 }
521         }
522
523         return 0;
524 fail:
525         /* We free all, it handles case where we are in the middle */
526         ixgbe_free_transmit_structures(adapter);
527
528         return (error);
529 } /* ixgbe_allocate_transmit_buffers */
530
531 /************************************************************************
532  *
533  *  Initialize a transmit ring.
534  *
535  ************************************************************************/
536 static void
537 ixgbe_setup_transmit_ring(struct tx_ring *txr)
538 {
539         struct adapter        *adapter = txr->adapter;
540         struct ixgbe_tx_buf   *txbuf;
541 #ifdef DEV_NETMAP
542         struct netmap_adapter *na = NA(adapter->ifp);
543         struct netmap_slot    *slot;
544 #endif /* DEV_NETMAP */
545
546         /* Clear the old ring contents */
547         IXGBE_TX_LOCK(txr);
548
549 #ifdef DEV_NETMAP
550         if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
551                 /*
552                  * (under lock): if in netmap mode, do some consistency
553                  * checks and set slot to entry 0 of the netmap ring.
554                  */
555                 slot = netmap_reset(na, NR_TX, txr->me, 0);
556         }
557 #endif /* DEV_NETMAP */
558
559         bzero((void *)txr->tx_base,
560             (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
561         /* Reset indices */
562         txr->next_avail_desc = 0;
563         txr->next_to_clean = 0;
564
565         /* Free any existing tx buffers. */
566         txbuf = txr->tx_buffers;
567         for (int i = 0; i < txr->num_desc; i++, txbuf++) {
568                 if (txbuf->m_head != NULL) {
569                         bus_dmamap_sync(txr->txtag, txbuf->map,
570                             BUS_DMASYNC_POSTWRITE);
571                         bus_dmamap_unload(txr->txtag, txbuf->map);
572                         m_freem(txbuf->m_head);
573                         txbuf->m_head = NULL;
574                 }
575
576 #ifdef DEV_NETMAP
577                 /*
578                  * In netmap mode, set the map for the packet buffer.
579                  * NOTE: Some drivers (not this one) also need to set
580                  * the physical buffer address in the NIC ring.
581                  * Slots in the netmap ring (indexed by "si") are
582                  * kring->nkr_hwofs positions "ahead" wrt the
583                  * corresponding slot in the NIC ring. In some drivers
584                  * (not here) nkr_hwofs can be negative. Function
585                  * netmap_idx_n2k() handles wraparounds properly.
586                  */
587                 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
588                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
589                         netmap_load_map(na, txr->txtag,
590                             txbuf->map, NMB(na, slot + si));
591                 }
592 #endif /* DEV_NETMAP */
593
594                 /* Clear the EOP descriptor pointer */
595                 txbuf->eop = NULL;
596         }
597
598         /* Set the rate at which we sample packets */
599         if (adapter->feat_en & IXGBE_FEATURE_FDIR)
600                 txr->atr_sample = atr_sample_rate;
601
602         /* Set number of descriptors available */
603         txr->tx_avail = adapter->num_tx_desc;
604
605         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
606             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
607         IXGBE_TX_UNLOCK(txr);
608 } /* ixgbe_setup_transmit_ring */
609
610 /************************************************************************
611  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
612  ************************************************************************/
613 int
614 ixgbe_setup_transmit_structures(struct adapter *adapter)
615 {
616         struct tx_ring *txr = adapter->tx_rings;
617
618         for (int i = 0; i < adapter->num_queues; i++, txr++)
619                 ixgbe_setup_transmit_ring(txr);
620
621         return (0);
622 } /* ixgbe_setup_transmit_structures */
623
624 /************************************************************************
625  * ixgbe_free_transmit_structures - Free all transmit rings.
626  ************************************************************************/
627 void
628 ixgbe_free_transmit_structures(struct adapter *adapter)
629 {
630         struct tx_ring *txr = adapter->tx_rings;
631
632         for (int i = 0; i < adapter->num_queues; i++, txr++) {
633                 IXGBE_TX_LOCK(txr);
634                 ixgbe_free_transmit_buffers(txr);
635                 ixgbe_dma_free(adapter, &txr->txdma);
636                 IXGBE_TX_UNLOCK(txr);
637                 IXGBE_TX_LOCK_DESTROY(txr);
638         }
639         free(adapter->tx_rings, M_IXGBE);
640 } /* ixgbe_free_transmit_structures */
641
642 /************************************************************************
643  * ixgbe_free_transmit_buffers
644  *
645  *   Free transmit ring related data structures.
646  ************************************************************************/
647 static void
648 ixgbe_free_transmit_buffers(struct tx_ring *txr)
649 {
650         struct adapter      *adapter = txr->adapter;
651         struct ixgbe_tx_buf *tx_buffer;
652         int                 i;
653
654         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
655
656         if (txr->tx_buffers == NULL)
657                 return;
658
659         tx_buffer = txr->tx_buffers;
660         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
661                 if (tx_buffer->m_head != NULL) {
662                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
663                             BUS_DMASYNC_POSTWRITE);
664                         bus_dmamap_unload(txr->txtag, tx_buffer->map);
665                         m_freem(tx_buffer->m_head);
666                         tx_buffer->m_head = NULL;
667                         if (tx_buffer->map != NULL) {
668                                 bus_dmamap_destroy(txr->txtag, tx_buffer->map);
669                                 tx_buffer->map = NULL;
670                         }
671                 } else if (tx_buffer->map != NULL) {
672                         bus_dmamap_unload(txr->txtag, tx_buffer->map);
673                         bus_dmamap_destroy(txr->txtag, tx_buffer->map);
674                         tx_buffer->map = NULL;
675                 }
676         }
677         if (txr->br != NULL)
678                 buf_ring_free(txr->br, M_IXGBE);
679         if (txr->tx_buffers != NULL) {
680                 free(txr->tx_buffers, M_IXGBE);
681                 txr->tx_buffers = NULL;
682         }
683         if (txr->txtag != NULL) {
684                 bus_dma_tag_destroy(txr->txtag);
685                 txr->txtag = NULL;
686         }
687 } /* ixgbe_free_transmit_buffers */
688
689 /************************************************************************
690  * ixgbe_tx_ctx_setup
691  *
692  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
693  ************************************************************************/
694 static int
695 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
696     u32 *cmd_type_len, u32 *olinfo_status)
697 {
698         struct ixgbe_adv_tx_context_desc *TXD;
699         struct ether_vlan_header         *eh;
700 #ifdef INET
701         struct ip                        *ip;
702 #endif
703 #ifdef INET6
704         struct ip6_hdr                   *ip6;
705 #endif
706         int                              ehdrlen, ip_hlen = 0;
707         int                              offload = TRUE;
708         int                              ctxd = txr->next_avail_desc;
709         u32                              vlan_macip_lens = 0;
710         u32                              type_tucmd_mlhl = 0;
711         u16                              vtag = 0;
712         u16                              etype;
713         u8                               ipproto = 0;
714         caddr_t                          l3d;
715
716
717         /* First check if TSO is to be used */
718         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO))
719                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
720
721         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
722                 offload = FALSE;
723
724         /* Indicate the whole packet as payload when not doing TSO */
725         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
726
727         /* Now ready a context descriptor */
728         TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
729
730         /*
731          * In advanced descriptors the vlan tag must
732          * be placed into the context descriptor. Hence
733          * we need to make one even if not doing offloads.
734          */
735         if (mp->m_flags & M_VLANTAG) {
736                 vtag = htole16(mp->m_pkthdr.ether_vtag);
737                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
738         } else if (!IXGBE_IS_X550VF(txr->adapter) && (offload == FALSE))
739                 return (0);
740
741         /*
742          * Determine where frame payload starts.
743          * Jump over vlan headers if already present,
744          * helpful for QinQ too.
745          */
746         eh = mtod(mp, struct ether_vlan_header *);
747         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
748                 etype = ntohs(eh->evl_proto);
749                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
750         } else {
751                 etype = ntohs(eh->evl_encap_proto);
752                 ehdrlen = ETHER_HDR_LEN;
753         }
754
755         /* Set the ether header length */
756         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
757
758         if (offload == FALSE)
759                 goto no_offloads;
760
761         /*
762          * If the first mbuf only includes the ethernet header,
763          * jump to the next one
764          * XXX: This assumes the stack splits mbufs containing headers
765          *      on header boundaries
766          * XXX: And assumes the entire IP header is contained in one mbuf
767          */
768         if (mp->m_len == ehdrlen && mp->m_next)
769                 l3d = mtod(mp->m_next, caddr_t);
770         else
771                 l3d = mtod(mp, caddr_t) + ehdrlen;
772
773         switch (etype) {
774 #ifdef INET
775                 case ETHERTYPE_IP:
776                         ip = (struct ip *)(l3d);
777                         ip_hlen = ip->ip_hl << 2;
778                         ipproto = ip->ip_p;
779                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
780                         /* Insert IPv4 checksum into data descriptors */
781                         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
782                                 ip->ip_sum = 0;
783                                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
784                         }
785                         break;
786 #endif
787 #ifdef INET6
788                 case ETHERTYPE_IPV6:
789                         ip6 = (struct ip6_hdr *)(l3d);
790                         ip_hlen = sizeof(struct ip6_hdr);
791                         ipproto = ip6->ip6_nxt;
792                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
793                         break;
794 #endif
795                 default:
796                         offload = FALSE;
797                         break;
798         }
799
800         vlan_macip_lens |= ip_hlen;
801
802         /* No support for offloads for non-L4 next headers */
803         switch (ipproto) {
804                 case IPPROTO_TCP:
805                         if (mp->m_pkthdr.csum_flags &
806                             (CSUM_IP_TCP | CSUM_IP6_TCP))
807                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
808                         else
809                                 offload = false;
810                         break;
811                 case IPPROTO_UDP:
812                         if (mp->m_pkthdr.csum_flags &
813                             (CSUM_IP_UDP | CSUM_IP6_UDP))
814                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
815                         else
816                                 offload = false;
817                         break;
818                 case IPPROTO_SCTP:
819                         if (mp->m_pkthdr.csum_flags &
820                             (CSUM_IP_SCTP | CSUM_IP6_SCTP))
821                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
822                         else
823                                 offload = false;
824                         break;
825                 default:
826                         offload = false;
827                         break;
828         }
829
830         if (offload) /* Insert L4 checksum into data descriptors */
831                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
832
833 no_offloads:
834         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
835
836         /* Now copy bits into descriptor */
837         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
838         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
839         TXD->seqnum_seed = htole32(0);
840         TXD->mss_l4len_idx = htole32(0);
841
842         /* We've consumed the first desc, adjust counters */
843         if (++ctxd == txr->num_desc)
844                 ctxd = 0;
845         txr->next_avail_desc = ctxd;
846         --txr->tx_avail;
847
848         return (0);
849 } /* ixgbe_tx_ctx_setup */
850
851 /************************************************************************
852  * ixgbe_tso_setup
853  *
854  *   Setup work for hardware segmentation offload (TSO) on
855  *   adapters using advanced tx descriptors
856  ************************************************************************/
857 static int
858 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
859     u32 *olinfo_status)
860 {
861         struct ixgbe_adv_tx_context_desc *TXD;
862         struct ether_vlan_header         *eh;
863 #ifdef INET6
864         struct ip6_hdr                   *ip6;
865 #endif
866 #ifdef INET
867         struct ip                        *ip;
868 #endif
869         struct tcphdr                    *th;
870         int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
871         u32                              vlan_macip_lens = 0;
872         u32                              type_tucmd_mlhl = 0;
873         u32                              mss_l4len_idx = 0, paylen;
874         u16                              vtag = 0, eh_type;
875
876         /*
877          * Determine where frame payload starts.
878          * Jump over vlan headers if already present
879          */
880         eh = mtod(mp, struct ether_vlan_header *);
881         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
882                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
883                 eh_type = eh->evl_proto;
884         } else {
885                 ehdrlen = ETHER_HDR_LEN;
886                 eh_type = eh->evl_encap_proto;
887         }
888
889         switch (ntohs(eh_type)) {
890 #ifdef INET
891         case ETHERTYPE_IP:
892                 ip = (struct ip *)(mp->m_data + ehdrlen);
893                 if (ip->ip_p != IPPROTO_TCP)
894                         return (ENXIO);
895                 ip->ip_sum = 0;
896                 ip_hlen = ip->ip_hl << 2;
897                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
898                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
899                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
900                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
901                 /* Tell transmit desc to also do IPv4 checksum. */
902                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
903                 break;
904 #endif
905 #ifdef INET6
906         case ETHERTYPE_IPV6:
907                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
908                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
909                 if (ip6->ip6_nxt != IPPROTO_TCP)
910                         return (ENXIO);
911                 ip_hlen = sizeof(struct ip6_hdr);
912                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
913                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
914                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
915                 break;
916 #endif
917         default:
918                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
919                     __func__, ntohs(eh_type));
920                 break;
921         }
922
923         ctxd = txr->next_avail_desc;
924         TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
925
926         tcp_hlen = th->th_off << 2;
927
928         /* This is used in the transmit desc in encap */
929         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
930
931         /* VLAN MACLEN IPLEN */
932         if (mp->m_flags & M_VLANTAG) {
933                 vtag = htole16(mp->m_pkthdr.ether_vtag);
934                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
935         }
936
937         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
938         vlan_macip_lens |= ip_hlen;
939         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
940
941         /* ADV DTYPE TUCMD */
942         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
943         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
944         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
945
946         /* MSS L4LEN IDX */
947         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
948         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
949         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
950
951         TXD->seqnum_seed = htole32(0);
952
953         if (++ctxd == txr->num_desc)
954                 ctxd = 0;
955
956         txr->tx_avail--;
957         txr->next_avail_desc = ctxd;
958         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
959         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
960         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
961         ++txr->tso_tx;
962
963         return (0);
964 } /* ixgbe_tso_setup */
965
966
967 /************************************************************************
968  * ixgbe_txeof
969  *
970  *   Examine each tx_buffer in the used queue. If the hardware is done
971  *   processing the packet then free associated resources. The
972  *   tx_buffer is put back on the free queue.
973  ************************************************************************/
974 void
975 ixgbe_txeof(struct tx_ring *txr)
976 {
977         struct adapter          *adapter = txr->adapter;
978         struct ixgbe_tx_buf     *buf;
979         union ixgbe_adv_tx_desc *txd;
980         u32                     work, processed = 0;
981         u32                     limit = adapter->tx_process_limit;
982
983         mtx_assert(&txr->tx_mtx, MA_OWNED);
984
985 #ifdef DEV_NETMAP
986         if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
987             (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
988                 struct netmap_adapter *na = NA(adapter->ifp);
989                 struct netmap_kring *kring = &na->tx_rings[txr->me];
990                 txd = txr->tx_base;
991                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
992                     BUS_DMASYNC_POSTREAD);
993                 /*
994                  * In netmap mode, all the work is done in the context
995                  * of the client thread. Interrupt handlers only wake up
996                  * clients, which may be sleeping on individual rings
997                  * or on a global resource for all rings.
998                  * To implement tx interrupt mitigation, we wake up the client
999                  * thread roughly every half ring, even if the NIC interrupts
1000                  * more frequently. This is implemented as follows:
1001                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
1002                  *   the slot that should wake up the thread (nkr_num_slots
1003                  *   means the user thread should not be woken up);
1004                  * - the driver ignores tx interrupts unless netmap_mitigate=0
1005                  *   or the slot has the DD bit set.
1006                  */
1007                 if (!netmap_mitigate ||
1008                     (kring->nr_kflags < kring->nkr_num_slots &&
1009                      txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1010                         netmap_tx_irq(adapter->ifp, txr->me);
1011                 }
1012                 return;
1013         }
1014 #endif /* DEV_NETMAP */
1015
1016         if (txr->tx_avail == txr->num_desc) {
1017                 txr->busy = 0;
1018                 return;
1019         }
1020
1021         /* Get work starting point */
1022         work = txr->next_to_clean;
1023         buf = &txr->tx_buffers[work];
1024         txd = &txr->tx_base[work];
1025         work -= txr->num_desc; /* The distance to ring end */
1026         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1027             BUS_DMASYNC_POSTREAD);
1028
1029         do {
1030                 union ixgbe_adv_tx_desc *eop = buf->eop;
1031                 if (eop == NULL) /* No work */
1032                         break;
1033
1034                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1035                         break;  /* I/O not complete */
1036
1037                 if (buf->m_head) {
1038                         txr->bytes += buf->m_head->m_pkthdr.len;
1039                         bus_dmamap_sync(txr->txtag, buf->map,
1040                             BUS_DMASYNC_POSTWRITE);
1041                         bus_dmamap_unload(txr->txtag, buf->map);
1042                         m_freem(buf->m_head);
1043                         buf->m_head = NULL;
1044                 }
1045                 buf->eop = NULL;
1046                 ++txr->tx_avail;
1047
1048                 /* We clean the range if multi segment */
1049                 while (txd != eop) {
1050                         ++txd;
1051                         ++buf;
1052                         ++work;
1053                         /* wrap the ring? */
1054                         if (__predict_false(!work)) {
1055                                 work -= txr->num_desc;
1056                                 buf = txr->tx_buffers;
1057                                 txd = txr->tx_base;
1058                         }
1059                         if (buf->m_head) {
1060                                 txr->bytes += buf->m_head->m_pkthdr.len;
1061                                 bus_dmamap_sync(txr->txtag, buf->map,
1062                                     BUS_DMASYNC_POSTWRITE);
1063                                 bus_dmamap_unload(txr->txtag, buf->map);
1064                                 m_freem(buf->m_head);
1065                                 buf->m_head = NULL;
1066                         }
1067                         ++txr->tx_avail;
1068                         buf->eop = NULL;
1069
1070                 }
1071                 ++txr->packets;
1072                 ++processed;
1073
1074                 /* Try the next packet */
1075                 ++txd;
1076                 ++buf;
1077                 ++work;
1078                 /* reset with a wrap */
1079                 if (__predict_false(!work)) {
1080                         work -= txr->num_desc;
1081                         buf = txr->tx_buffers;
1082                         txd = txr->tx_base;
1083                 }
1084                 prefetch(txd);
1085         } while (__predict_true(--limit));
1086
1087         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1088             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1089
1090         work += txr->num_desc;
1091         txr->next_to_clean = work;
1092
1093         /*
1094          * Queue Hang detection, we know there's
1095          * work outstanding or the first return
1096          * would have been taken, so increment busy
1097          * if nothing managed to get cleaned, then
1098          * in local_timer it will be checked and
1099          * marked as HUNG if it exceeds a MAX attempt.
1100          */
1101         if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1102                 ++txr->busy;
1103         /*
1104          * If anything gets cleaned we reset state to 1,
1105          * note this will turn off HUNG if its set.
1106          */
1107         if (processed)
1108                 txr->busy = 1;
1109
1110         if (txr->tx_avail == txr->num_desc)
1111                 txr->busy = 0;
1112
1113         return;
1114 } /* ixgbe_txeof */
1115
1116 /************************************************************************
1117  * ixgbe_rsc_count
1118  *
1119  *   Used to detect a descriptor that has been merged by Hardware RSC.
1120  ************************************************************************/
1121 static inline u32
1122 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1123 {
1124         return (le32toh(rx->wb.lower.lo_dword.data) &
1125             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1126 } /* ixgbe_rsc_count */
1127
1128 /************************************************************************
1129  * ixgbe_setup_hw_rsc
1130  *
1131  *   Initialize Hardware RSC (LRO) feature on 82599
1132  *   for an RX ring, this is toggled by the LRO capability
1133  *   even though it is transparent to the stack.
1134  *
1135  *   NOTE: Since this HW feature only works with IPv4 and
1136  *         testing has shown soft LRO to be as effective,
1137  *         this feature will be disabled by default.
1138  ************************************************************************/
1139 static void
1140 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1141 {
1142         struct adapter  *adapter = rxr->adapter;
1143         struct ixgbe_hw *hw = &adapter->hw;
1144         u32             rscctrl, rdrxctl;
1145
1146         /* If turning LRO/RSC off we need to disable it */
1147         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1148                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1149                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1150                 return;
1151         }
1152
1153         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1154         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1155 #ifdef DEV_NETMAP
1156         /* Always strip CRC unless Netmap disabled it */
1157         if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1158             !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1159             ix_crcstrip)
1160 #endif /* DEV_NETMAP */
1161                 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1162         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1163         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1164
1165         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1166         rscctrl |= IXGBE_RSCCTL_RSCEN;
1167         /*
1168          * Limit the total number of descriptors that
1169          * can be combined, so it does not exceed 64K
1170          */
1171         if (rxr->mbuf_sz == MCLBYTES)
1172                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1173         else if (rxr->mbuf_sz == MJUMPAGESIZE)
1174                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1175         else if (rxr->mbuf_sz == MJUM9BYTES)
1176                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1177         else  /* Using 16K cluster */
1178                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1179
1180         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1181
1182         /* Enable TCP header recognition */
1183         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1184             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1185
1186         /* Disable RSC for ACK packets */
1187         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1188             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1189
1190         rxr->hw_rsc = TRUE;
1191 } /* ixgbe_setup_hw_rsc */
1192
1193 /************************************************************************
1194  * ixgbe_refresh_mbufs
1195  *
1196  *   Refresh mbuf buffers for RX descriptor rings
1197  *    - now keeps its own state so discards due to resource
1198  *      exhaustion are unnecessary, if an mbuf cannot be obtained
1199  *      it just returns, keeping its placeholder, thus it can simply
1200  *      be recalled to try again.
1201  ************************************************************************/
1202 static void
1203 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1204 {
1205         struct adapter      *adapter = rxr->adapter;
1206         struct ixgbe_rx_buf *rxbuf;
1207         struct mbuf         *mp;
1208         bus_dma_segment_t   seg[1];
1209         int                 i, j, nsegs, error;
1210         bool                refreshed = FALSE;
1211
1212         i = j = rxr->next_to_refresh;
1213         /* Control the loop with one beyond */
1214         if (++j == rxr->num_desc)
1215                 j = 0;
1216
1217         while (j != limit) {
1218                 rxbuf = &rxr->rx_buffers[i];
1219                 if (rxbuf->buf == NULL) {
1220                         mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1221                             rxr->mbuf_sz);
1222                         if (mp == NULL)
1223                                 goto update;
1224                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1225                                 m_adj(mp, ETHER_ALIGN);
1226                 } else
1227                         mp = rxbuf->buf;
1228
1229                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1230
1231                 /* If we're dealing with an mbuf that was copied rather
1232                  * than replaced, there's no need to go through busdma.
1233                  */
1234                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1235                         /* Get the memory mapping */
1236                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1237                         error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap,
1238                             mp, seg, &nsegs, BUS_DMA_NOWAIT);
1239                         if (error != 0) {
1240                                 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1241                                 m_free(mp);
1242                                 rxbuf->buf = NULL;
1243                                 goto update;
1244                         }
1245                         rxbuf->buf = mp;
1246                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1247                             BUS_DMASYNC_PREREAD);
1248                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1249                             htole64(seg[0].ds_addr);
1250                 } else {
1251                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1252                         rxbuf->flags &= ~IXGBE_RX_COPY;
1253                 }
1254
1255                 refreshed = TRUE;
1256                 /* Next is precalculated */
1257                 i = j;
1258                 rxr->next_to_refresh = i;
1259                 if (++j == rxr->num_desc)
1260                         j = 0;
1261         }
1262
1263 update:
1264         if (refreshed) /* Update hardware tail index */
1265                 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1266
1267         return;
1268 } /* ixgbe_refresh_mbufs */
1269
1270 /************************************************************************
1271  * ixgbe_allocate_receive_buffers
1272  *
1273  *   Allocate memory for rx_buffer structures. Since we use one
1274  *   rx_buffer per received packet, the maximum number of rx_buffer's
1275  *   that we'll need is equal to the number of receive descriptors
1276  *   that we've allocated.
1277  ************************************************************************/
1278 static int
1279 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1280 {
1281         struct adapter      *adapter = rxr->adapter;
1282         device_t            dev = adapter->dev;
1283         struct ixgbe_rx_buf *rxbuf;
1284         int                 bsize, error;
1285
1286         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1287         rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_IXGBE,
1288             M_NOWAIT | M_ZERO);
1289         if (!rxr->rx_buffers) {
1290                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1291                 error = ENOMEM;
1292                 goto fail;
1293         }
1294
1295         error = bus_dma_tag_create(
1296                  /*      parent */ bus_get_dma_tag(dev),
1297                  /*   alignment */ 1,
1298                  /*      bounds */ 0,
1299                  /*     lowaddr */ BUS_SPACE_MAXADDR,
1300                  /*    highaddr */ BUS_SPACE_MAXADDR,
1301                  /*      filter */ NULL,
1302                  /*   filterarg */ NULL,
1303                  /*     maxsize */ MJUM16BYTES,
1304                  /*   nsegments */ 1,
1305                  /*  maxsegsize */ MJUM16BYTES,
1306                  /*       flags */ 0,
1307                  /*    lockfunc */ NULL,
1308                  /* lockfuncarg */ NULL,
1309                                    &rxr->ptag);
1310         if (error) {
1311                 device_printf(dev, "Unable to create RX DMA tag\n");
1312                 goto fail;
1313         }
1314
1315         for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1316                 rxbuf = &rxr->rx_buffers[i];
1317                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1318                 if (error) {
1319                         device_printf(dev, "Unable to create RX dma map\n");
1320                         goto fail;
1321                 }
1322         }
1323
1324         return (0);
1325
1326 fail:
1327         /* Frees all, but can handle partial completion */
1328         ixgbe_free_receive_structures(adapter);
1329
1330         return (error);
1331 } /* ixgbe_allocate_receive_buffers */
1332
1333 /************************************************************************
1334  * ixgbe_free_receive_ring
1335  ************************************************************************/
1336 static void
1337 ixgbe_free_receive_ring(struct rx_ring *rxr)
1338 {
1339         struct ixgbe_rx_buf *rxbuf;
1340
1341         for (int i = 0; i < rxr->num_desc; i++) {
1342                 rxbuf = &rxr->rx_buffers[i];
1343                 if (rxbuf->buf != NULL) {
1344                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1345                             BUS_DMASYNC_POSTREAD);
1346                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1347                         rxbuf->buf->m_flags |= M_PKTHDR;
1348                         m_freem(rxbuf->buf);
1349                         rxbuf->buf = NULL;
1350                         rxbuf->flags = 0;
1351                 }
1352         }
1353 } /* ixgbe_free_receive_ring */
1354
1355 /************************************************************************
1356  * ixgbe_setup_receive_ring
1357  *
1358  *   Initialize a receive ring and its buffers.
1359  ************************************************************************/
1360 static int
1361 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1362 {
1363         struct adapter        *adapter;
1364         struct ifnet          *ifp;
1365         device_t              dev;
1366         struct ixgbe_rx_buf   *rxbuf;
1367         struct lro_ctrl       *lro = &rxr->lro;
1368 #ifdef DEV_NETMAP
1369         struct netmap_adapter *na = NA(rxr->adapter->ifp);
1370         struct netmap_slot    *slot;
1371 #endif /* DEV_NETMAP */
1372         bus_dma_segment_t     seg[1];
1373         int                   rsize, nsegs, error = 0;
1374
1375         adapter = rxr->adapter;
1376         ifp = adapter->ifp;
1377         dev = adapter->dev;
1378
1379         /* Clear the ring contents */
1380         IXGBE_RX_LOCK(rxr);
1381
1382 #ifdef DEV_NETMAP
1383         if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1384                 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1385 #endif /* DEV_NETMAP */
1386
1387         rsize = roundup2(adapter->num_rx_desc *
1388             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1389         bzero((void *)rxr->rx_base, rsize);
1390         /* Cache the size */
1391         rxr->mbuf_sz = adapter->rx_mbuf_sz;
1392
1393         /* Free current RX buffer structs and their mbufs */
1394         ixgbe_free_receive_ring(rxr);
1395
1396         /* Now replenish the mbufs */
1397         for (int j = 0; j != rxr->num_desc; ++j) {
1398                 struct mbuf *mp;
1399
1400                 rxbuf = &rxr->rx_buffers[j];
1401
1402 #ifdef DEV_NETMAP
1403                 /*
1404                  * In netmap mode, fill the map and set the buffer
1405                  * address in the NIC ring, considering the offset
1406                  * between the netmap and NIC rings (see comment in
1407                  * ixgbe_setup_transmit_ring() ). No need to allocate
1408                  * an mbuf, so end the block with a continue;
1409                  */
1410                 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1411                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1412                         uint64_t paddr;
1413                         void *addr;
1414
1415                         addr = PNMB(na, slot + sj, &paddr);
1416                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1417                         /* Update descriptor and the cached value */
1418                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1419                         rxbuf->addr = htole64(paddr);
1420                         continue;
1421                 }
1422 #endif /* DEV_NETMAP */
1423
1424                 rxbuf->flags = 0;
1425                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1426                     adapter->rx_mbuf_sz);
1427                 if (rxbuf->buf == NULL) {
1428                         error = ENOBUFS;
1429                         goto fail;
1430                 }
1431                 mp = rxbuf->buf;
1432                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1433                 /* Get the memory mapping */
1434                 error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg,
1435                     &nsegs, BUS_DMA_NOWAIT);
1436                 if (error != 0)
1437                         goto fail;
1438                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD);
1439                 /* Update the descriptor and the cached value */
1440                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1441                 rxbuf->addr = htole64(seg[0].ds_addr);
1442         }
1443
1444
1445         /* Setup our descriptor indices */
1446         rxr->next_to_check = 0;
1447         rxr->next_to_refresh = 0;
1448         rxr->lro_enabled = FALSE;
1449         rxr->rx_copies = 0;
1450         rxr->rx_bytes = 0;
1451         rxr->vtag_strip = FALSE;
1452
1453         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1454             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1455
1456         /*
1457          * Now set up the LRO interface
1458          */
1459         if (ixgbe_rsc_enable)
1460                 ixgbe_setup_hw_rsc(rxr);
1461         else if (ifp->if_capenable & IFCAP_LRO) {
1462                 int err = tcp_lro_init(lro);
1463                 if (err) {
1464                         device_printf(dev, "LRO Initialization failed!\n");
1465                         goto fail;
1466                 }
1467                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1468                 rxr->lro_enabled = TRUE;
1469                 lro->ifp = adapter->ifp;
1470         }
1471
1472         IXGBE_RX_UNLOCK(rxr);
1473
1474         return (0);
1475
1476 fail:
1477         ixgbe_free_receive_ring(rxr);
1478         IXGBE_RX_UNLOCK(rxr);
1479
1480         return (error);
1481 } /* ixgbe_setup_receive_ring */
1482
1483 /************************************************************************
1484  * ixgbe_setup_receive_structures - Initialize all receive rings.
1485  ************************************************************************/
1486 int
1487 ixgbe_setup_receive_structures(struct adapter *adapter)
1488 {
1489         struct rx_ring *rxr = adapter->rx_rings;
1490         int            j;
1491
1492         for (j = 0; j < adapter->num_queues; j++, rxr++)
1493                 if (ixgbe_setup_receive_ring(rxr))
1494                         goto fail;
1495
1496         return (0);
1497 fail:
1498         /*
1499          * Free RX buffers allocated so far, we will only handle
1500          * the rings that completed, the failing case will have
1501          * cleaned up for itself. 'j' failed, so its the terminus.
1502          */
1503         for (int i = 0; i < j; ++i) {
1504                 rxr = &adapter->rx_rings[i];
1505                 ixgbe_free_receive_ring(rxr);
1506         }
1507
1508         return (ENOBUFS);
1509 } /* ixgbe_setup_receive_structures */
1510
1511
1512 /************************************************************************
1513  * ixgbe_free_receive_structures - Free all receive rings.
1514  ************************************************************************/
1515 void
1516 ixgbe_free_receive_structures(struct adapter *adapter)
1517 {
1518         struct rx_ring *rxr = adapter->rx_rings;
1519         struct lro_ctrl *lro;
1520
1521         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1522
1523         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1524                 lro = &rxr->lro;
1525                 ixgbe_free_receive_buffers(rxr);
1526                 /* Free LRO memory */
1527                 tcp_lro_free(lro);
1528                 /* Free the ring memory as well */
1529                 ixgbe_dma_free(adapter, &rxr->rxdma);
1530         }
1531
1532         free(adapter->rx_rings, M_IXGBE);
1533 } /* ixgbe_free_receive_structures */
1534
1535
1536 /************************************************************************
1537  * ixgbe_free_receive_buffers - Free receive ring data structures
1538  ************************************************************************/
1539 void
1540 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1541 {
1542         struct adapter      *adapter = rxr->adapter;
1543         struct ixgbe_rx_buf *rxbuf;
1544
1545         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1546
1547         /* Cleanup any existing buffers */
1548         if (rxr->rx_buffers != NULL) {
1549                 for (int i = 0; i < adapter->num_rx_desc; i++) {
1550                         rxbuf = &rxr->rx_buffers[i];
1551                         if (rxbuf->buf != NULL) {
1552                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1553                                     BUS_DMASYNC_POSTREAD);
1554                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1555                                 rxbuf->buf->m_flags |= M_PKTHDR;
1556                                 m_freem(rxbuf->buf);
1557                         }
1558                         rxbuf->buf = NULL;
1559                         if (rxbuf->pmap != NULL) {
1560                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1561                                 rxbuf->pmap = NULL;
1562                         }
1563                 }
1564                 if (rxr->rx_buffers != NULL) {
1565                         free(rxr->rx_buffers, M_IXGBE);
1566                         rxr->rx_buffers = NULL;
1567                 }
1568         }
1569
1570         if (rxr->ptag != NULL) {
1571                 bus_dma_tag_destroy(rxr->ptag);
1572                 rxr->ptag = NULL;
1573         }
1574
1575         return;
1576 } /* ixgbe_free_receive_buffers */
1577
1578 /************************************************************************
1579  * ixgbe_rx_input
1580  ************************************************************************/
1581 static __inline void
1582 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1583     u32 ptype)
1584 {
1585         /*
1586          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1587          * should be computed by hardware. Also it should not have VLAN tag in
1588          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1589          */
1590         if (rxr->lro_enabled &&
1591             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1592             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1593             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1594              (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1595              (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1596              (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1597             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1598             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1599                 /*
1600                  * Send to the stack if:
1601                  *  - LRO not enabled, or
1602                  *  - no LRO resources, or
1603                  *  - lro enqueue fails
1604                  */
1605                 if (rxr->lro.lro_cnt != 0)
1606                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1607                                 return;
1608         }
1609         IXGBE_RX_UNLOCK(rxr);
1610         (*ifp->if_input)(ifp, m);
1611         IXGBE_RX_LOCK(rxr);
1612 } /* ixgbe_rx_input */
1613
1614 /************************************************************************
1615  * ixgbe_rx_discard
1616  ************************************************************************/
1617 static __inline void
1618 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1619 {
1620         struct ixgbe_rx_buf *rbuf;
1621
1622         rbuf = &rxr->rx_buffers[i];
1623
1624         /*
1625          * With advanced descriptors the writeback
1626          * clobbers the buffer addrs, so its easier
1627          * to just free the existing mbufs and take
1628          * the normal refresh path to get new buffers
1629          * and mapping.
1630          */
1631
1632         if (rbuf->fmp != NULL) {/* Partial chain ? */
1633                 rbuf->fmp->m_flags |= M_PKTHDR;
1634                 m_freem(rbuf->fmp);
1635                 rbuf->fmp = NULL;
1636                 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1637         } else if (rbuf->buf) {
1638                 m_free(rbuf->buf);
1639                 rbuf->buf = NULL;
1640         }
1641         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1642
1643         rbuf->flags = 0;
1644
1645         return;
1646 } /* ixgbe_rx_discard */
1647
1648
1649 /************************************************************************
1650  * ixgbe_rxeof
1651  *
1652  *   This routine executes in interrupt context. It replenishes
1653  *   the mbufs in the descriptor and sends data which has been
1654  *   dma'ed into host memory to upper layer.
1655  *
1656  *   Return TRUE for more work, FALSE for all clean.
1657  ************************************************************************/
1658 bool
1659 ixgbe_rxeof(struct ix_queue *que)
1660 {
1661         struct adapter          *adapter = que->adapter;
1662         struct rx_ring          *rxr = que->rxr;
1663         struct ifnet            *ifp = adapter->ifp;
1664         struct lro_ctrl         *lro = &rxr->lro;
1665 #if __FreeBSD_version < 1100105
1666         struct lro_entry        *queued;
1667 #endif
1668         union ixgbe_adv_rx_desc *cur;
1669         struct ixgbe_rx_buf     *rbuf, *nbuf;
1670         int                     i, nextp, processed = 0;
1671         u32                     staterr = 0;
1672         u32                     count = adapter->rx_process_limit;
1673         u16                     pkt_info;
1674
1675         IXGBE_RX_LOCK(rxr);
1676
1677 #ifdef DEV_NETMAP
1678         if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1679                 /* Same as the txeof routine: wakeup clients on intr. */
1680                 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1681                         IXGBE_RX_UNLOCK(rxr);
1682                         return (FALSE);
1683                 }
1684         }
1685 #endif /* DEV_NETMAP */
1686
1687         for (i = rxr->next_to_check; count != 0;) {
1688                 struct mbuf *sendmp, *mp;
1689                 u32         rsc, ptype;
1690                 u16         len;
1691                 u16         vtag = 0;
1692                 bool        eop;
1693
1694                 /* Sync the ring. */
1695                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1696                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1697
1698                 cur = &rxr->rx_base[i];
1699                 staterr = le32toh(cur->wb.upper.status_error);
1700                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1701
1702                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1703                         break;
1704                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1705                         break;
1706
1707                 count--;
1708                 sendmp = NULL;
1709                 nbuf = NULL;
1710                 rsc = 0;
1711                 cur->wb.upper.status_error = 0;
1712                 rbuf = &rxr->rx_buffers[i];
1713                 mp = rbuf->buf;
1714
1715                 len = le16toh(cur->wb.upper.length);
1716                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1717                     IXGBE_RXDADV_PKTTYPE_MASK;
1718                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1719
1720                 /* Make sure bad packets are discarded */
1721                 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1722                         rxr->rx_discarded++;
1723                         ixgbe_rx_discard(rxr, i);
1724                         goto next_desc;
1725                 }
1726
1727                 /*
1728                  * On 82599 which supports a hardware
1729                  * LRO (called HW RSC), packets need
1730                  * not be fragmented across sequential
1731                  * descriptors, rather the next descriptor
1732                  * is indicated in bits of the descriptor.
1733                  * This also means that we might proceses
1734                  * more than one packet at a time, something
1735                  * that has never been true before, it
1736                  * required eliminating global chain pointers
1737                  * in favor of what we are doing here.  -jfv
1738                  */
1739                 if (!eop) {
1740                         /*
1741                          * Figure out the next descriptor
1742                          * of this frame.
1743                          */
1744                         if (rxr->hw_rsc == TRUE) {
1745                                 rsc = ixgbe_rsc_count(cur);
1746                                 rxr->rsc_num += (rsc - 1);
1747                         }
1748                         if (rsc) { /* Get hardware index */
1749                                 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1750                                     IXGBE_RXDADV_NEXTP_SHIFT);
1751                         } else { /* Just sequential */
1752                                 nextp = i + 1;
1753                                 if (nextp == adapter->num_rx_desc)
1754                                         nextp = 0;
1755                         }
1756                         nbuf = &rxr->rx_buffers[nextp];
1757                         prefetch(nbuf);
1758                 }
1759                 /*
1760                  * Rather than using the fmp/lmp global pointers
1761                  * we now keep the head of a packet chain in the
1762                  * buffer struct and pass this along from one
1763                  * descriptor to the next, until we get EOP.
1764                  */
1765                 mp->m_len = len;
1766                 /*
1767                  * See if there is a stored head
1768                  * that determines what we are
1769                  */
1770                 sendmp = rbuf->fmp;
1771                 if (sendmp != NULL) {  /* secondary frag */
1772                         rbuf->buf = rbuf->fmp = NULL;
1773                         mp->m_flags &= ~M_PKTHDR;
1774                         sendmp->m_pkthdr.len += mp->m_len;
1775                 } else {
1776                         /*
1777                          * Optimize.  This might be a small packet,
1778                          * maybe just a TCP ACK.  Do a fast copy that
1779                          * is cache aligned into a new mbuf, and
1780                          * leave the old mbuf+cluster for re-use.
1781                          */
1782                         if (eop && len <= IXGBE_RX_COPY_LEN) {
1783                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1784                                 if (sendmp != NULL) {
1785                                         sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1786                                         ixgbe_bcopy(mp->m_data, sendmp->m_data,
1787                                             len);
1788                                         sendmp->m_len = len;
1789                                         rxr->rx_copies++;
1790                                         rbuf->flags |= IXGBE_RX_COPY;
1791                                 }
1792                         }
1793                         if (sendmp == NULL) {
1794                                 rbuf->buf = rbuf->fmp = NULL;
1795                                 sendmp = mp;
1796                         }
1797
1798                         /* first desc of a non-ps chain */
1799                         sendmp->m_flags |= M_PKTHDR;
1800                         sendmp->m_pkthdr.len = mp->m_len;
1801                 }
1802                 ++processed;
1803
1804                 /* Pass the head pointer on */
1805                 if (eop == 0) {
1806                         nbuf->fmp = sendmp;
1807                         sendmp = NULL;
1808                         mp->m_next = nbuf->buf;
1809                 } else { /* Sending this frame */
1810                         sendmp->m_pkthdr.rcvif = ifp;
1811                         rxr->rx_packets++;
1812                         /* capture data for AIM */
1813                         rxr->bytes += sendmp->m_pkthdr.len;
1814                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1815                         /* Process vlan info */
1816                         if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1817                                 vtag = le16toh(cur->wb.upper.vlan);
1818                         if (vtag) {
1819                                 sendmp->m_pkthdr.ether_vtag = vtag;
1820                                 sendmp->m_flags |= M_VLANTAG;
1821                         }
1822                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1823                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
1824
1825                         /*
1826                          * In case of multiqueue, we have RXCSUM.PCSD bit set
1827                          * and never cleared. This means we have RSS hash
1828                          * available to be used.
1829                          */
1830                         if (adapter->num_queues > 1) {
1831                                 sendmp->m_pkthdr.flowid =
1832                                     le32toh(cur->wb.lower.hi_dword.rss);
1833                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1834                                 case IXGBE_RXDADV_RSSTYPE_IPV4:
1835                                         M_HASHTYPE_SET(sendmp,
1836                                             M_HASHTYPE_RSS_IPV4);
1837                                         break;
1838                                 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1839                                         M_HASHTYPE_SET(sendmp,
1840                                             M_HASHTYPE_RSS_TCP_IPV4);
1841                                         break;
1842                                 case IXGBE_RXDADV_RSSTYPE_IPV6:
1843                                         M_HASHTYPE_SET(sendmp,
1844                                             M_HASHTYPE_RSS_IPV6);
1845                                         break;
1846                                 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1847                                         M_HASHTYPE_SET(sendmp,
1848                                             M_HASHTYPE_RSS_TCP_IPV6);
1849                                         break;
1850                                 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1851                                         M_HASHTYPE_SET(sendmp,
1852                                             M_HASHTYPE_RSS_IPV6_EX);
1853                                         break;
1854                                 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1855                                         M_HASHTYPE_SET(sendmp,
1856                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
1857                                         break;
1858 #if __FreeBSD_version > 1100000
1859                                 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1860                                         M_HASHTYPE_SET(sendmp,
1861                                             M_HASHTYPE_RSS_UDP_IPV4);
1862                                         break;
1863                                 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1864                                         M_HASHTYPE_SET(sendmp,
1865                                             M_HASHTYPE_RSS_UDP_IPV6);
1866                                         break;
1867                                 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1868                                         M_HASHTYPE_SET(sendmp,
1869                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
1870                                         break;
1871 #endif
1872                                 default:
1873 #if __FreeBSD_version < 1100116
1874                                         M_HASHTYPE_SET(sendmp,
1875                                             M_HASHTYPE_OPAQUE);
1876 #else
1877                                         M_HASHTYPE_SET(sendmp,
1878                                             M_HASHTYPE_OPAQUE_HASH);
1879 #endif
1880                                 }
1881                         } else {
1882                                 sendmp->m_pkthdr.flowid = que->msix;
1883                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1884                         }
1885                 }
1886 next_desc:
1887                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1888                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1889
1890                 /* Advance our pointers to the next descriptor. */
1891                 if (++i == rxr->num_desc)
1892                         i = 0;
1893
1894                 /* Now send to the stack or do LRO */
1895                 if (sendmp != NULL) {
1896                         rxr->next_to_check = i;
1897                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1898                         i = rxr->next_to_check;
1899                 }
1900
1901                 /* Every 8 descriptors we go to refresh mbufs */
1902                 if (processed == 8) {
1903                         ixgbe_refresh_mbufs(rxr, i);
1904                         processed = 0;
1905                 }
1906         }
1907
1908         /* Refresh any remaining buf structs */
1909         if (ixgbe_rx_unrefreshed(rxr))
1910                 ixgbe_refresh_mbufs(rxr, i);
1911
1912         rxr->next_to_check = i;
1913
1914         /*
1915          * Flush any outstanding LRO work
1916          */
1917 #if __FreeBSD_version < 1100105
1918         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1919                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1920                 tcp_lro_flush(lro, queued);
1921         }
1922 #else
1923         tcp_lro_flush_all(lro);
1924 #endif
1925
1926         IXGBE_RX_UNLOCK(rxr);
1927
1928         /*
1929          * Still have cleaning to do?
1930          */
1931         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1932                 return (TRUE);
1933
1934         return (FALSE);
1935 } /* ixgbe_rxeof */
1936
1937
1938 /************************************************************************
1939  * ixgbe_rx_checksum
1940  *
1941  *   Verify that the hardware indicated that the checksum is valid.
1942  *   Inform the stack about the status of checksum so that stack
1943  *   doesn't spend time verifying the checksum.
1944  ************************************************************************/
1945 static void
1946 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1947 {
1948         u16  status = (u16)staterr;
1949         u8   errors = (u8)(staterr >> 24);
1950         bool sctp = false;
1951
1952         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1953             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1954                 sctp = true;
1955
1956         /* IPv4 checksum */
1957         if (status & IXGBE_RXD_STAT_IPCS) {
1958                 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
1959                 /* IP Checksum Good */
1960                 if (!(errors & IXGBE_RXD_ERR_IPE))
1961                         mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
1962         }
1963         /* TCP/UDP/SCTP checksum */
1964         if (status & IXGBE_RXD_STAT_L4CS) {
1965                 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
1966                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1967                         mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
1968                         if (!sctp)
1969                                 mp->m_pkthdr.csum_data = htons(0xffff);
1970                 }
1971         }
1972 } /* ixgbe_rx_checksum */
1973
1974 /************************************************************************
1975  * ixgbe_dmamap_cb - Manage DMA'able memory.
1976  ************************************************************************/
1977 static void
1978 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1979 {
1980         if (error)
1981                 return;
1982         *(bus_addr_t *)arg = segs->ds_addr;
1983
1984         return;
1985 } /* ixgbe_dmamap_cb */
1986
1987 /************************************************************************
1988  * ixgbe_dma_malloc
1989  ************************************************************************/
1990 static int
1991 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
1992                  struct ixgbe_dma_alloc *dma, int mapflags)
1993 {
1994         device_t dev = adapter->dev;
1995         int      r;
1996
1997         r = bus_dma_tag_create(
1998              /*      parent */ bus_get_dma_tag(adapter->dev),
1999              /*   alignment */ DBA_ALIGN,
2000              /*      bounds */ 0,
2001              /*     lowaddr */ BUS_SPACE_MAXADDR,
2002              /*    highaddr */ BUS_SPACE_MAXADDR,
2003              /*      filter */ NULL,
2004              /*   filterarg */ NULL,
2005              /*     maxsize */ size,
2006              /*   nsegments */ 1,
2007              /*  maxsegsize */ size,
2008              /*       flags */ BUS_DMA_ALLOCNOW,
2009              /*    lockfunc */ NULL,
2010              /* lockfuncarg */ NULL,
2011                                &dma->dma_tag);
2012         if (r != 0) {
2013                 device_printf(dev,
2014                     "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n",
2015                     r);
2016                 goto fail_0;
2017         }
2018         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2019             BUS_DMA_NOWAIT, &dma->dma_map);
2020         if (r != 0) {
2021                 device_printf(dev,
2022                     "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r);
2023                 goto fail_1;
2024         }
2025         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
2026             ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2027         if (r != 0) {
2028                 device_printf(dev,
2029                     "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r);
2030                 goto fail_2;
2031         }
2032         dma->dma_size = size;
2033
2034         return (0);
2035 fail_2:
2036         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2037 fail_1:
2038         bus_dma_tag_destroy(dma->dma_tag);
2039 fail_0:
2040         dma->dma_tag = NULL;
2041
2042         return (r);
2043 } /* ixgbe_dma_malloc */
2044
2045 static void
2046 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2047 {
2048         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2049             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2050         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2051         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2052         bus_dma_tag_destroy(dma->dma_tag);
2053 } /* ixgbe_dma_free */
2054
2055
2056 /************************************************************************
2057  * ixgbe_allocate_queues
2058  *
2059  *   Allocate memory for the transmit and receive rings, and then
2060  *   the descriptors associated with each, called only once at attach.
2061  ************************************************************************/
2062 int
2063 ixgbe_allocate_queues(struct adapter *adapter)
2064 {
2065         device_t        dev = adapter->dev;
2066         struct ix_queue *que;
2067         struct tx_ring  *txr;
2068         struct rx_ring  *rxr;
2069         int             rsize, tsize, error = IXGBE_SUCCESS;
2070         int             txconf = 0, rxconf = 0;
2071
2072         /* First, allocate the top level queue structs */
2073         adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2074             adapter->num_queues, M_IXGBE, M_NOWAIT | M_ZERO);
2075         if (!adapter->queues) {
2076                 device_printf(dev, "Unable to allocate queue memory\n");
2077                 error = ENOMEM;
2078                 goto fail;
2079         }
2080
2081         /* Second, allocate the TX ring struct memory */
2082         adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2083             adapter->num_queues, M_IXGBE, M_NOWAIT | M_ZERO);
2084         if (!adapter->tx_rings) {
2085                 device_printf(dev, "Unable to allocate TX ring memory\n");
2086                 error = ENOMEM;
2087                 goto tx_fail;
2088         }
2089
2090         /* Third, allocate the RX ring */
2091         adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2092             adapter->num_queues, M_IXGBE, M_NOWAIT | M_ZERO);
2093         if (!adapter->rx_rings) {
2094                 device_printf(dev, "Unable to allocate RX ring memory\n");
2095                 error = ENOMEM;
2096                 goto rx_fail;
2097         }
2098
2099         /* For the ring itself */
2100         tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2101             DBA_ALIGN);
2102
2103         /*
2104          * Now set up the TX queues, txconf is needed to handle the
2105          * possibility that things fail midcourse and we need to
2106          * undo memory gracefully
2107          */
2108         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2109                 /* Set up some basics */
2110                 txr = &adapter->tx_rings[i];
2111                 txr->adapter = adapter;
2112                 txr->br = NULL;
2113                 /* In case SR-IOV is enabled, align the index properly */
2114                 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2115                     i);
2116                 txr->num_desc = adapter->num_tx_desc;
2117
2118                 /* Initialize the TX side lock */
2119                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2120                     device_get_nameunit(dev), txr->me);
2121                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2122
2123                 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2124                     BUS_DMA_NOWAIT)) {
2125                         device_printf(dev,
2126                             "Unable to allocate TX Descriptor memory\n");
2127                         error = ENOMEM;
2128                         goto err_tx_desc;
2129                 }
2130                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2131                 bzero((void *)txr->tx_base, tsize);
2132
2133                 /* Now allocate transmit buffers for the ring */
2134                 if (ixgbe_allocate_transmit_buffers(txr)) {
2135                         device_printf(dev,
2136                             "Critical Failure setting up transmit buffers\n");
2137                         error = ENOMEM;
2138                         goto err_tx_desc;
2139                 }
2140                 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2141                         /* Allocate a buf ring */
2142                         txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_IXGBE,
2143                             M_WAITOK, &txr->tx_mtx);
2144                         if (txr->br == NULL) {
2145                                 device_printf(dev,
2146                                     "Critical Failure setting up buf ring\n");
2147                                 error = ENOMEM;
2148                                 goto err_tx_desc;
2149                         }
2150                 }
2151         }
2152
2153         /*
2154          * Next the RX queues...
2155          */
2156         rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2157             DBA_ALIGN);
2158         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2159                 rxr = &adapter->rx_rings[i];
2160                 /* Set up some basics */
2161                 rxr->adapter = adapter;
2162                 /* In case SR-IOV is enabled, align the index properly */
2163                 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2164                     i);
2165                 rxr->num_desc = adapter->num_rx_desc;
2166
2167                 /* Initialize the RX side lock */
2168                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2169                     device_get_nameunit(dev), rxr->me);
2170                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2171
2172                 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2173                     BUS_DMA_NOWAIT)) {
2174                         device_printf(dev,
2175                             "Unable to allocate RxDescriptor memory\n");
2176                         error = ENOMEM;
2177                         goto err_rx_desc;
2178                 }
2179                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2180                 bzero((void *)rxr->rx_base, rsize);
2181
2182                 /* Allocate receive buffers for the ring */
2183                 if (ixgbe_allocate_receive_buffers(rxr)) {
2184                         device_printf(dev,
2185                             "Critical Failure setting up receive buffers\n");
2186                         error = ENOMEM;
2187                         goto err_rx_desc;
2188                 }
2189         }
2190
2191         /*
2192          * Finally set up the queue holding structs
2193          */
2194         for (int i = 0; i < adapter->num_queues; i++) {
2195                 que = &adapter->queues[i];
2196                 que->adapter = adapter;
2197                 que->me = i;
2198                 que->txr = &adapter->tx_rings[i];
2199                 que->rxr = &adapter->rx_rings[i];
2200         }
2201
2202         return (0);
2203
2204 err_rx_desc:
2205         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2206                 ixgbe_dma_free(adapter, &rxr->rxdma);
2207 err_tx_desc:
2208         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2209                 ixgbe_dma_free(adapter, &txr->txdma);
2210         free(adapter->rx_rings, M_IXGBE);
2211 rx_fail:
2212         free(adapter->tx_rings, M_IXGBE);
2213 tx_fail:
2214         free(adapter->queues, M_IXGBE);
2215 fail:
2216         return (error);
2217 } /* ixgbe_allocate_queues */