]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixgbe/ix_txrx.c
MFC r334506:
[FreeBSD/FreeBSD.git] / sys / dev / ixgbe / ix_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2017, Intel Corporation
4   All rights reserved.
5
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41
42 #include "ixgbe.h"
43
44 /*
45  * HW RSC control:
46  *  this feature only works with
47  *  IPv4, and only on 82599 and later.
48  *  Also this will cause IP forwarding to
49  *  fail and that can't be controlled by
50  *  the stack as LRO can. For all these
51  *  reasons I've deemed it best to leave
52  *  this off and not bother with a tuneable
53  *  interface, this would need to be compiled
54  *  to enable.
55  */
56 static bool ixgbe_rsc_enable = FALSE;
57
58 /*
59  * For Flow Director: this is the
60  * number of TX packets we sample
61  * for the filter pool, this means
62  * every 20th packet will be probed.
63  *
64  * This feature can be disabled by
65  * setting this to 0.
66  */
67 static int atr_sample_rate = 20;
68
69 /************************************************************************
70  *  Local Function prototypes
71  ************************************************************************/
72 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
73 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
74 static int           ixgbe_setup_receive_ring(struct rx_ring *);
75 static void          ixgbe_free_receive_buffers(struct rx_ring *);
76 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32);
77 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
78 static int           ixgbe_xmit(struct tx_ring *, struct mbuf **);
79 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
80                                         struct mbuf *, u32 *, u32 *);
81 static int           ixgbe_tso_setup(struct tx_ring *,
82                                      struct mbuf *, u32 *, u32 *);
83 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
84 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
85                                     struct mbuf *, u32);
86 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
87                                       struct ixgbe_dma_alloc *, int);
88 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
89
90 /************************************************************************
91  * ixgbe_legacy_start_locked - Transmit entry point
92  *
93  *   Called by the stack to initiate a transmit.
94  *   The driver will remain in this routine as long as there are
95  *   packets to transmit and transmit resources are available.
96  *   In case resources are not available, the stack is notified
97  *   and the packet is requeued.
98  ************************************************************************/
99 int
100 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
101 {
102         struct mbuf    *m_head;
103         struct adapter *adapter = txr->adapter;
104
105         IXGBE_TX_LOCK_ASSERT(txr);
106
107         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
108                 return (ENETDOWN);
109         if (!adapter->link_active)
110                 return (ENETDOWN);
111
112         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
113                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
114                         break;
115
116                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
117                 if (m_head == NULL)
118                         break;
119
120                 if (ixgbe_xmit(txr, &m_head)) {
121                         if (m_head != NULL)
122                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
123                         break;
124                 }
125                 /* Send a copy of the frame to the BPF listener */
126                 ETHER_BPF_MTAP(ifp, m_head);
127         }
128
129         return IXGBE_SUCCESS;
130 } /* ixgbe_legacy_start_locked */
131
132 /************************************************************************
133  * ixgbe_legacy_start
134  *
135  *   Called by the stack, this always uses the first tx ring,
136  *   and should not be used with multiqueue tx enabled.
137  ************************************************************************/
138 void
139 ixgbe_legacy_start(struct ifnet *ifp)
140 {
141         struct adapter *adapter = ifp->if_softc;
142         struct tx_ring *txr = adapter->tx_rings;
143
144         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
145                 IXGBE_TX_LOCK(txr);
146                 ixgbe_legacy_start_locked(ifp, txr);
147                 IXGBE_TX_UNLOCK(txr);
148         }
149 } /* ixgbe_legacy_start */
150
151 /************************************************************************
152  * ixgbe_mq_start - Multiqueue Transmit Entry Point
153  *
154  *   (if_transmit function)
155  ************************************************************************/
156 int
157 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
158 {
159         struct adapter  *adapter = ifp->if_softc;
160         struct ix_queue *que;
161         struct tx_ring  *txr;
162         int             i, err = 0;
163         uint32_t        bucket_id;
164
165         /*
166          * When doing RSS, map it to the same outbound queue
167          * as the incoming flow would be mapped to.
168          *
169          * If everything is setup correctly, it should be the
170          * same bucket that the current CPU we're on is.
171          */
172         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
173                 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
174                     (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
175                     &bucket_id) == 0)) {
176                         i = bucket_id % adapter->num_queues;
177 #ifdef IXGBE_DEBUG
178                         if (bucket_id > adapter->num_queues)
179                                 if_printf(ifp,
180                                     "bucket_id (%d) > num_queues (%d)\n",
181                                     bucket_id, adapter->num_queues);
182 #endif
183                 } else
184                         i = m->m_pkthdr.flowid % adapter->num_queues;
185         } else
186                 i = curcpu % adapter->num_queues;
187
188         /* Check for a hung queue and pick alternative */
189         if (((1 << i) & adapter->active_queues) == 0)
190                 i = ffsl(adapter->active_queues);
191
192         txr = &adapter->tx_rings[i];
193         que = &adapter->queues[i];
194
195         err = drbr_enqueue(ifp, txr->br, m);
196         if (err)
197                 return (err);
198         if (IXGBE_TX_TRYLOCK(txr)) {
199                 ixgbe_mq_start_locked(ifp, txr);
200                 IXGBE_TX_UNLOCK(txr);
201         } else
202                 taskqueue_enqueue(que->tq, &txr->txq_task);
203
204         return (0);
205 } /* ixgbe_mq_start */
206
207 /************************************************************************
208  * ixgbe_mq_start_locked
209  ************************************************************************/
210 int
211 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
212 {
213         struct mbuf    *next;
214         int            enqueued = 0, err = 0;
215
216         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
217                 return (ENETDOWN);
218         if (txr->adapter->link_active == 0)
219                 return (ENETDOWN);
220
221         /* Process the queue */
222 #if __FreeBSD_version < 901504
223         next = drbr_dequeue(ifp, txr->br);
224         while (next != NULL) {
225                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
226                         if (next != NULL)
227                                 err = drbr_enqueue(ifp, txr->br, next);
228 #else
229         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
230                 err = ixgbe_xmit(txr, &next);
231                 if (err != 0) {
232                         if (next == NULL)
233                                 drbr_advance(ifp, txr->br);
234                         else
235                                 drbr_putback(ifp, txr->br, next);
236 #endif
237                         break;
238                 }
239 #if __FreeBSD_version >= 901504
240                 drbr_advance(ifp, txr->br);
241 #endif
242                 enqueued++;
243 #if __FreeBSD_version >= 1100036
244                 /*
245                  * Since we're looking at the tx ring, we can check
246                  * to see if we're a VF by examing our tail register
247                  * address.
248                  */
249                 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
250                     (next->m_flags & M_MCAST))
251                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
252 #endif
253                 /* Send a copy of the frame to the BPF listener */
254                 ETHER_BPF_MTAP(ifp, next);
255                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
256                         break;
257 #if __FreeBSD_version < 901504
258                 next = drbr_dequeue(ifp, txr->br);
259 #endif
260         }
261
262         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
263                 ixgbe_txeof(txr);
264
265         return (err);
266 } /* ixgbe_mq_start_locked */
267
268 /************************************************************************
269  * ixgbe_deferred_mq_start
270  *
271  *   Called from a taskqueue to drain queued transmit packets.
272  ************************************************************************/
273 void
274 ixgbe_deferred_mq_start(void *arg, int pending)
275 {
276         struct tx_ring *txr = arg;
277         struct adapter *adapter = txr->adapter;
278         struct ifnet   *ifp = adapter->ifp;
279
280         IXGBE_TX_LOCK(txr);
281         if (!drbr_empty(ifp, txr->br))
282                 ixgbe_mq_start_locked(ifp, txr);
283         IXGBE_TX_UNLOCK(txr);
284 } /* ixgbe_deferred_mq_start */
285
286 /************************************************************************
287  * ixgbe_qflush - Flush all ring buffers
288  ************************************************************************/
289 void
290 ixgbe_qflush(struct ifnet *ifp)
291 {
292         struct adapter *adapter = ifp->if_softc;
293         struct tx_ring *txr = adapter->tx_rings;
294         struct mbuf    *m;
295
296         for (int i = 0; i < adapter->num_queues; i++, txr++) {
297                 IXGBE_TX_LOCK(txr);
298                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
299                         m_freem(m);
300                 IXGBE_TX_UNLOCK(txr);
301         }
302         if_qflush(ifp);
303 } /* ixgbe_qflush */
304
305
306 /************************************************************************
307  * ixgbe_xmit
308  *
309  *   Maps the mbufs to tx descriptors, allowing the
310  *   TX engine to transmit the packets.
311  *
312  *   Return 0 on success, positive on failure
313  ************************************************************************/
314 static int
315 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
316 {
317         struct adapter          *adapter = txr->adapter;
318         struct ixgbe_tx_buf     *txbuf;
319         union ixgbe_adv_tx_desc *txd = NULL;
320         struct mbuf             *m_head;
321         int                     i, j, error, nsegs;
322         int                     first;
323         u32                     olinfo_status = 0, cmd_type_len;
324         bool                    remap = TRUE;
325         bus_dma_segment_t       segs[adapter->num_segs];
326         bus_dmamap_t            map;
327
328         m_head = *m_headp;
329
330         /* Basic descriptor defines */
331         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
332             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
333
334         if (m_head->m_flags & M_VLANTAG)
335                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
336
337         /*
338          * Important to capture the first descriptor
339          * used because it will contain the index of
340          * the one we tell the hardware to report back
341          */
342         first = txr->next_avail_desc;
343         txbuf = &txr->tx_buffers[first];
344         map = txbuf->map;
345
346         /*
347          * Map the packet for DMA.
348          */
349 retry:
350         error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs,
351             &nsegs, BUS_DMA_NOWAIT);
352
353         if (__predict_false(error)) {
354                 struct mbuf *m;
355
356                 switch (error) {
357                 case EFBIG:
358                         /* Try it again? - one try */
359                         if (remap == TRUE) {
360                                 remap = FALSE;
361                                 /*
362                                  * XXX: m_defrag will choke on
363                                  * non-MCLBYTES-sized clusters
364                                  */
365                                 m = m_defrag(*m_headp, M_NOWAIT);
366                                 if (m == NULL) {
367                                         adapter->mbuf_defrag_failed++;
368                                         m_freem(*m_headp);
369                                         *m_headp = NULL;
370                                         return (ENOBUFS);
371                                 }
372                                 *m_headp = m;
373                                 goto retry;
374                         } else
375                                 return (error);
376                 case ENOMEM:
377                         txr->no_tx_dma_setup++;
378                         return (error);
379                 default:
380                         txr->no_tx_dma_setup++;
381                         m_freem(*m_headp);
382                         *m_headp = NULL;
383                         return (error);
384                 }
385         }
386
387         /* Make certain there are enough descriptors */
388         if (txr->tx_avail < (nsegs + 2)) {
389                 txr->no_desc_avail++;
390                 bus_dmamap_unload(txr->txtag, map);
391                 return (ENOBUFS);
392         }
393         m_head = *m_headp;
394
395         /*
396          * Set up the appropriate offload context
397          * this will consume the first descriptor
398          */
399         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
400         if (__predict_false(error)) {
401                 if (error == ENOBUFS)
402                         *m_headp = NULL;
403                 return (error);
404         }
405
406         /* Do the flow director magic */
407         if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
408             (txr->atr_sample) && (!adapter->fdir_reinit)) {
409                 ++txr->atr_count;
410                 if (txr->atr_count >= atr_sample_rate) {
411                         ixgbe_atr(txr, m_head);
412                         txr->atr_count = 0;
413                 }
414         }
415
416         olinfo_status |= IXGBE_ADVTXD_CC;
417         i = txr->next_avail_desc;
418         for (j = 0; j < nsegs; j++) {
419                 bus_size_t seglen;
420                 bus_addr_t segaddr;
421
422                 txbuf = &txr->tx_buffers[i];
423                 txd = &txr->tx_base[i];
424                 seglen = segs[j].ds_len;
425                 segaddr = htole64(segs[j].ds_addr);
426
427                 txd->read.buffer_addr = segaddr;
428                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
429                     cmd_type_len | seglen);
430                 txd->read.olinfo_status = htole32(olinfo_status);
431
432                 if (++i == txr->num_desc)
433                         i = 0;
434         }
435
436         txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
437         txr->tx_avail -= nsegs;
438         txr->next_avail_desc = i;
439
440         txbuf->m_head = m_head;
441         /*
442          * Here we swap the map so the last descriptor,
443          * which gets the completion interrupt has the
444          * real map, and the first descriptor gets the
445          * unused map from this descriptor.
446          */
447         txr->tx_buffers[first].map = txbuf->map;
448         txbuf->map = map;
449         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
450
451         /* Set the EOP descriptor that will be marked done */
452         txbuf = &txr->tx_buffers[first];
453         txbuf->eop = txd;
454
455         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
456             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
457         /*
458          * Advance the Transmit Descriptor Tail (Tdt), this tells the
459          * hardware that this frame is available to transmit.
460          */
461         ++txr->total_packets;
462         IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
463
464         /* Mark queue as having work */
465         if (txr->busy == 0)
466                 txr->busy = 1;
467
468         return (0);
469 } /* ixgbe_xmit */
470
471
472 /************************************************************************
473  * ixgbe_allocate_transmit_buffers
474  *
475  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
476  *   the information needed to transmit a packet on the wire. This is
477  *   called only once at attach, setup is done every reset.
478  ************************************************************************/
479 static int
480 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
481 {
482         struct adapter      *adapter = txr->adapter;
483         device_t            dev = adapter->dev;
484         struct ixgbe_tx_buf *txbuf;
485         int                 error, i;
486
487         /*
488          * Setup DMA descriptor areas.
489          */
490         error = bus_dma_tag_create(
491                  /*      parent */ bus_get_dma_tag(adapter->dev),
492                  /*   alignment */ 1,
493                  /*      bounds */ 0,
494                  /*     lowaddr */ BUS_SPACE_MAXADDR,
495                  /*    highaddr */ BUS_SPACE_MAXADDR,
496                  /*      filter */ NULL,
497                  /*   filterarg */ NULL,
498                  /*     maxsize */ IXGBE_TSO_SIZE,
499                  /*   nsegments */ adapter->num_segs,
500                  /*  maxsegsize */ PAGE_SIZE,
501                  /*       flags */ 0,
502                  /*    lockfunc */ NULL,
503                  /* lockfuncarg */ NULL,
504                                    &txr->txtag);
505         if (error != 0) {
506                 device_printf(dev, "Unable to allocate TX DMA tag\n");
507                 goto fail;
508         }
509
510         txr->tx_buffers =
511             (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) *
512             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
513         if (txr->tx_buffers == NULL) {
514                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
515                 error = ENOMEM;
516                 goto fail;
517         }
518
519         /* Create the descriptor buffer dma maps */
520         txbuf = txr->tx_buffers;
521         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
522                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
523                 if (error != 0) {
524                         device_printf(dev, "Unable to create TX DMA map\n");
525                         goto fail;
526                 }
527         }
528
529         return 0;
530 fail:
531         /* We free all, it handles case where we are in the middle */
532         ixgbe_free_transmit_structures(adapter);
533
534         return (error);
535 } /* ixgbe_allocate_transmit_buffers */
536
537 /************************************************************************
538  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
539  ************************************************************************/
540 static void
541 ixgbe_setup_transmit_ring(struct tx_ring *txr)
542 {
543         struct adapter        *adapter = txr->adapter;
544         struct ixgbe_tx_buf   *txbuf;
545 #ifdef DEV_NETMAP
546         struct netmap_adapter *na = NA(adapter->ifp);
547         struct netmap_slot    *slot;
548 #endif /* DEV_NETMAP */
549
550         /* Clear the old ring contents */
551         IXGBE_TX_LOCK(txr);
552
553 #ifdef DEV_NETMAP
554         if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
555                 /*
556                  * (under lock): if in netmap mode, do some consistency
557                  * checks and set slot to entry 0 of the netmap ring.
558                  */
559                 slot = netmap_reset(na, NR_TX, txr->me, 0);
560         }
561 #endif /* DEV_NETMAP */
562
563         bzero((void *)txr->tx_base,
564             (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
565         /* Reset indices */
566         txr->next_avail_desc = 0;
567         txr->next_to_clean = 0;
568
569         /* Free any existing tx buffers. */
570         txbuf = txr->tx_buffers;
571         for (int i = 0; i < txr->num_desc; i++, txbuf++) {
572                 if (txbuf->m_head != NULL) {
573                         bus_dmamap_sync(txr->txtag, txbuf->map,
574                             BUS_DMASYNC_POSTWRITE);
575                         bus_dmamap_unload(txr->txtag, txbuf->map);
576                         m_freem(txbuf->m_head);
577                         txbuf->m_head = NULL;
578                 }
579
580 #ifdef DEV_NETMAP
581                 /*
582                  * In netmap mode, set the map for the packet buffer.
583                  * NOTE: Some drivers (not this one) also need to set
584                  * the physical buffer address in the NIC ring.
585                  * Slots in the netmap ring (indexed by "si") are
586                  * kring->nkr_hwofs positions "ahead" wrt the
587                  * corresponding slot in the NIC ring. In some drivers
588                  * (not here) nkr_hwofs can be negative. Function
589                  * netmap_idx_n2k() handles wraparounds properly.
590                  */
591                 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
592                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
593                         netmap_load_map(na, txr->txtag,
594                             txbuf->map, NMB(na, slot + si));
595                 }
596 #endif /* DEV_NETMAP */
597
598                 /* Clear the EOP descriptor pointer */
599                 txbuf->eop = NULL;
600         }
601
602         /* Set the rate at which we sample packets */
603         if (adapter->feat_en & IXGBE_FEATURE_FDIR)
604                 txr->atr_sample = atr_sample_rate;
605
606         /* Set number of descriptors available */
607         txr->tx_avail = adapter->num_tx_desc;
608
609         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
610             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
611         IXGBE_TX_UNLOCK(txr);
612 } /* ixgbe_setup_transmit_ring */
613
614 /************************************************************************
615  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
616  ************************************************************************/
617 int
618 ixgbe_setup_transmit_structures(struct adapter *adapter)
619 {
620         struct tx_ring *txr = adapter->tx_rings;
621
622         for (int i = 0; i < adapter->num_queues; i++, txr++)
623                 ixgbe_setup_transmit_ring(txr);
624
625         return (0);
626 } /* ixgbe_setup_transmit_structures */
627
628 /************************************************************************
629  * ixgbe_free_transmit_structures - Free all transmit rings.
630  ************************************************************************/
631 void
632 ixgbe_free_transmit_structures(struct adapter *adapter)
633 {
634         struct tx_ring *txr = adapter->tx_rings;
635
636         for (int i = 0; i < adapter->num_queues; i++, txr++) {
637                 IXGBE_TX_LOCK(txr);
638                 ixgbe_free_transmit_buffers(txr);
639                 ixgbe_dma_free(adapter, &txr->txdma);
640                 IXGBE_TX_UNLOCK(txr);
641                 IXGBE_TX_LOCK_DESTROY(txr);
642         }
643         free(adapter->tx_rings, M_DEVBUF);
644 } /* ixgbe_free_transmit_structures */
645
646 /************************************************************************
647  * ixgbe_free_transmit_buffers
648  *
649  *   Free transmit ring related data structures.
650  ************************************************************************/
651 static void
652 ixgbe_free_transmit_buffers(struct tx_ring *txr)
653 {
654         struct adapter      *adapter = txr->adapter;
655         struct ixgbe_tx_buf *tx_buffer;
656         int                 i;
657
658         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
659
660         if (txr->tx_buffers == NULL)
661                 return;
662
663         tx_buffer = txr->tx_buffers;
664         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
665                 if (tx_buffer->m_head != NULL) {
666                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
667                             BUS_DMASYNC_POSTWRITE);
668                         bus_dmamap_unload(txr->txtag, tx_buffer->map);
669                         m_freem(tx_buffer->m_head);
670                         tx_buffer->m_head = NULL;
671                         if (tx_buffer->map != NULL) {
672                                 bus_dmamap_destroy(txr->txtag, tx_buffer->map);
673                                 tx_buffer->map = NULL;
674                         }
675                 } else if (tx_buffer->map != NULL) {
676                         bus_dmamap_unload(txr->txtag, tx_buffer->map);
677                         bus_dmamap_destroy(txr->txtag, tx_buffer->map);
678                         tx_buffer->map = NULL;
679                 }
680         }
681         if (txr->br != NULL)
682                 buf_ring_free(txr->br, M_DEVBUF);
683         if (txr->tx_buffers != NULL) {
684                 free(txr->tx_buffers, M_DEVBUF);
685                 txr->tx_buffers = NULL;
686         }
687         if (txr->txtag != NULL) {
688                 bus_dma_tag_destroy(txr->txtag);
689                 txr->txtag = NULL;
690         }
691 } /* ixgbe_free_transmit_buffers */
692
693 /************************************************************************
694  * ixgbe_tx_ctx_setup
695  *
696  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
697  ************************************************************************/
698 static int
699 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
700     u32 *cmd_type_len, u32 *olinfo_status)
701 {
702         struct ixgbe_adv_tx_context_desc *TXD;
703         struct ether_vlan_header         *eh;
704 #ifdef INET
705         struct ip                        *ip;
706 #endif
707 #ifdef INET6
708         struct ip6_hdr                   *ip6;
709 #endif
710         int                              ehdrlen, ip_hlen = 0;
711         int                              offload = TRUE;
712         int                              ctxd = txr->next_avail_desc;
713         u32                              vlan_macip_lens = 0;
714         u32                              type_tucmd_mlhl = 0;
715         u16                              vtag = 0;
716         u16                              etype;
717         u8                               ipproto = 0;
718         caddr_t                          l3d;
719
720
721         /* First check if TSO is to be used */
722         if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO))
723                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
724
725         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
726                 offload = FALSE;
727
728         /* Indicate the whole packet as payload when not doing TSO */
729         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
730
731         /* Now ready a context descriptor */
732         TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
733
734         /*
735          * In advanced descriptors the vlan tag must
736          * be placed into the context descriptor. Hence
737          * we need to make one even if not doing offloads.
738          */
739         if (mp->m_flags & M_VLANTAG) {
740                 vtag = htole16(mp->m_pkthdr.ether_vtag);
741                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
742         } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
743                    (offload == FALSE))
744                 return (0);
745
746         /*
747          * Determine where frame payload starts.
748          * Jump over vlan headers if already present,
749          * helpful for QinQ too.
750          */
751         eh = mtod(mp, struct ether_vlan_header *);
752         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
753                 etype = ntohs(eh->evl_proto);
754                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
755         } else {
756                 etype = ntohs(eh->evl_encap_proto);
757                 ehdrlen = ETHER_HDR_LEN;
758         }
759
760         /* Set the ether header length */
761         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
762
763         if (offload == FALSE)
764                 goto no_offloads;
765
766         /*
767          * If the first mbuf only includes the ethernet header,
768          * jump to the next one
769          * XXX: This assumes the stack splits mbufs containing headers
770          *      on header boundaries
771          * XXX: And assumes the entire IP header is contained in one mbuf
772          */
773         if (mp->m_len == ehdrlen && mp->m_next)
774                 l3d = mtod(mp->m_next, caddr_t);
775         else
776                 l3d = mtod(mp, caddr_t) + ehdrlen;
777
778         switch (etype) {
779 #ifdef INET
780                 case ETHERTYPE_IP:
781                         ip = (struct ip *)(l3d);
782                         ip_hlen = ip->ip_hl << 2;
783                         ipproto = ip->ip_p;
784                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
785                         /* Insert IPv4 checksum into data descriptors */
786                         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
787                                 ip->ip_sum = 0;
788                                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
789                         }
790                         break;
791 #endif
792 #ifdef INET6
793                 case ETHERTYPE_IPV6:
794                         ip6 = (struct ip6_hdr *)(l3d);
795                         ip_hlen = sizeof(struct ip6_hdr);
796                         ipproto = ip6->ip6_nxt;
797                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
798                         break;
799 #endif
800                 default:
801                         offload = FALSE;
802                         break;
803         }
804
805         vlan_macip_lens |= ip_hlen;
806
807         /* No support for offloads for non-L4 next headers */
808         switch (ipproto) {
809                 case IPPROTO_TCP:
810                         if (mp->m_pkthdr.csum_flags &
811                             (CSUM_IP_TCP | CSUM_IP6_TCP))
812                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
813                         else
814                                 offload = false;
815                         break;
816                 case IPPROTO_UDP:
817                         if (mp->m_pkthdr.csum_flags &
818                             (CSUM_IP_UDP | CSUM_IP6_UDP))
819                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
820                         else
821                                 offload = false;
822                         break;
823                 case IPPROTO_SCTP:
824                         if (mp->m_pkthdr.csum_flags &
825                             (CSUM_IP_SCTP | CSUM_IP6_SCTP))
826                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
827                         else
828                                 offload = false;
829                         break;
830                 default:
831                         offload = false;
832                         break;
833         }
834
835         if (offload) /* Insert L4 checksum into data descriptors */
836                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
837
838 no_offloads:
839         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
840
841         /* Now copy bits into descriptor */
842         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
843         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
844         TXD->seqnum_seed = htole32(0);
845         TXD->mss_l4len_idx = htole32(0);
846
847         /* We've consumed the first desc, adjust counters */
848         if (++ctxd == txr->num_desc)
849                 ctxd = 0;
850         txr->next_avail_desc = ctxd;
851         --txr->tx_avail;
852
853         return (0);
854 } /* ixgbe_tx_ctx_setup */
855
856 /************************************************************************
857  * ixgbe_tso_setup
858  *
859  *   Setup work for hardware segmentation offload (TSO) on
860  *   adapters using advanced tx descriptors
861  ************************************************************************/
862 static int
863 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
864     u32 *olinfo_status)
865 {
866         struct ixgbe_adv_tx_context_desc *TXD;
867         struct ether_vlan_header         *eh;
868 #ifdef INET6
869         struct ip6_hdr                   *ip6;
870 #endif
871 #ifdef INET
872         struct ip                        *ip;
873 #endif
874         struct tcphdr                    *th;
875         int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
876         u32                              vlan_macip_lens = 0;
877         u32                              type_tucmd_mlhl = 0;
878         u32                              mss_l4len_idx = 0, paylen;
879         u16                              vtag = 0, eh_type;
880
881         /*
882          * Determine where frame payload starts.
883          * Jump over vlan headers if already present
884          */
885         eh = mtod(mp, struct ether_vlan_header *);
886         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
887                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
888                 eh_type = eh->evl_proto;
889         } else {
890                 ehdrlen = ETHER_HDR_LEN;
891                 eh_type = eh->evl_encap_proto;
892         }
893
894         switch (ntohs(eh_type)) {
895 #ifdef INET
896         case ETHERTYPE_IP:
897                 ip = (struct ip *)(mp->m_data + ehdrlen);
898                 if (ip->ip_p != IPPROTO_TCP)
899                         return (ENXIO);
900                 ip->ip_sum = 0;
901                 ip_hlen = ip->ip_hl << 2;
902                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
903                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
904                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
905                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
906                 /* Tell transmit desc to also do IPv4 checksum. */
907                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
908                 break;
909 #endif
910 #ifdef INET6
911         case ETHERTYPE_IPV6:
912                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
913                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
914                 if (ip6->ip6_nxt != IPPROTO_TCP)
915                         return (ENXIO);
916                 ip_hlen = sizeof(struct ip6_hdr);
917                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
918                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
919                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
920                 break;
921 #endif
922         default:
923                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
924                     __func__, ntohs(eh_type));
925                 break;
926         }
927
928         ctxd = txr->next_avail_desc;
929         TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
930
931         tcp_hlen = th->th_off << 2;
932
933         /* This is used in the transmit desc in encap */
934         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
935
936         /* VLAN MACLEN IPLEN */
937         if (mp->m_flags & M_VLANTAG) {
938                 vtag = htole16(mp->m_pkthdr.ether_vtag);
939                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
940         }
941
942         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
943         vlan_macip_lens |= ip_hlen;
944         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
945
946         /* ADV DTYPE TUCMD */
947         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
948         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
949         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
950
951         /* MSS L4LEN IDX */
952         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
953         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
954         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
955
956         TXD->seqnum_seed = htole32(0);
957
958         if (++ctxd == txr->num_desc)
959                 ctxd = 0;
960
961         txr->tx_avail--;
962         txr->next_avail_desc = ctxd;
963         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
964         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
965         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
966         ++txr->tso_tx;
967
968         return (0);
969 } /* ixgbe_tso_setup */
970
971
972 /************************************************************************
973  * ixgbe_txeof
974  *
975  *   Examine each tx_buffer in the used queue. If the hardware is done
976  *   processing the packet then free associated resources. The
977  *   tx_buffer is put back on the free queue.
978  ************************************************************************/
979 void
980 ixgbe_txeof(struct tx_ring *txr)
981 {
982         struct adapter          *adapter = txr->adapter;
983         struct ixgbe_tx_buf     *buf;
984         union ixgbe_adv_tx_desc *txd;
985         u32                     work, processed = 0;
986         u32                     limit = adapter->tx_process_limit;
987
988         mtx_assert(&txr->tx_mtx, MA_OWNED);
989
990 #ifdef DEV_NETMAP
991         if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
992             (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
993                 struct netmap_adapter *na = NA(adapter->ifp);
994                 struct netmap_kring *kring = &na->tx_rings[txr->me];
995                 txd = txr->tx_base;
996                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
997                     BUS_DMASYNC_POSTREAD);
998                 /*
999                  * In netmap mode, all the work is done in the context
1000                  * of the client thread. Interrupt handlers only wake up
1001                  * clients, which may be sleeping on individual rings
1002                  * or on a global resource for all rings.
1003                  * To implement tx interrupt mitigation, we wake up the client
1004                  * thread roughly every half ring, even if the NIC interrupts
1005                  * more frequently. This is implemented as follows:
1006                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
1007                  *   the slot that should wake up the thread (nkr_num_slots
1008                  *   means the user thread should not be woken up);
1009                  * - the driver ignores tx interrupts unless netmap_mitigate=0
1010                  *   or the slot has the DD bit set.
1011                  */
1012                 if (!netmap_mitigate ||
1013                     (kring->nr_kflags < kring->nkr_num_slots &&
1014                      txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1015                         netmap_tx_irq(adapter->ifp, txr->me);
1016                 }
1017                 return;
1018         }
1019 #endif /* DEV_NETMAP */
1020
1021         if (txr->tx_avail == txr->num_desc) {
1022                 txr->busy = 0;
1023                 return;
1024         }
1025
1026         /* Get work starting point */
1027         work = txr->next_to_clean;
1028         buf = &txr->tx_buffers[work];
1029         txd = &txr->tx_base[work];
1030         work -= txr->num_desc; /* The distance to ring end */
1031         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1032             BUS_DMASYNC_POSTREAD);
1033
1034         do {
1035                 union ixgbe_adv_tx_desc *eop = buf->eop;
1036                 if (eop == NULL) /* No work */
1037                         break;
1038
1039                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1040                         break;  /* I/O not complete */
1041
1042                 if (buf->m_head) {
1043                         txr->bytes += buf->m_head->m_pkthdr.len;
1044                         bus_dmamap_sync(txr->txtag, buf->map,
1045                             BUS_DMASYNC_POSTWRITE);
1046                         bus_dmamap_unload(txr->txtag, buf->map);
1047                         m_freem(buf->m_head);
1048                         buf->m_head = NULL;
1049                 }
1050                 buf->eop = NULL;
1051                 ++txr->tx_avail;
1052
1053                 /* We clean the range if multi segment */
1054                 while (txd != eop) {
1055                         ++txd;
1056                         ++buf;
1057                         ++work;
1058                         /* wrap the ring? */
1059                         if (__predict_false(!work)) {
1060                                 work -= txr->num_desc;
1061                                 buf = txr->tx_buffers;
1062                                 txd = txr->tx_base;
1063                         }
1064                         if (buf->m_head) {
1065                                 txr->bytes += buf->m_head->m_pkthdr.len;
1066                                 bus_dmamap_sync(txr->txtag, buf->map,
1067                                     BUS_DMASYNC_POSTWRITE);
1068                                 bus_dmamap_unload(txr->txtag, buf->map);
1069                                 m_freem(buf->m_head);
1070                                 buf->m_head = NULL;
1071                         }
1072                         ++txr->tx_avail;
1073                         buf->eop = NULL;
1074
1075                 }
1076                 ++txr->packets;
1077                 ++processed;
1078
1079                 /* Try the next packet */
1080                 ++txd;
1081                 ++buf;
1082                 ++work;
1083                 /* reset with a wrap */
1084                 if (__predict_false(!work)) {
1085                         work -= txr->num_desc;
1086                         buf = txr->tx_buffers;
1087                         txd = txr->tx_base;
1088                 }
1089                 prefetch(txd);
1090         } while (__predict_true(--limit));
1091
1092         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1093             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1094
1095         work += txr->num_desc;
1096         txr->next_to_clean = work;
1097
1098         /*
1099          * Queue Hang detection, we know there's
1100          * work outstanding or the first return
1101          * would have been taken, so increment busy
1102          * if nothing managed to get cleaned, then
1103          * in local_timer it will be checked and
1104          * marked as HUNG if it exceeds a MAX attempt.
1105          */
1106         if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1107                 ++txr->busy;
1108         /*
1109          * If anything gets cleaned we reset state to 1,
1110          * note this will turn off HUNG if its set.
1111          */
1112         if (processed)
1113                 txr->busy = 1;
1114
1115         if (txr->tx_avail == txr->num_desc)
1116                 txr->busy = 0;
1117
1118         return;
1119 } /* ixgbe_txeof */
1120
1121 /************************************************************************
1122  * ixgbe_rsc_count
1123  *
1124  *   Used to detect a descriptor that has been merged by Hardware RSC.
1125  ************************************************************************/
1126 static inline u32
1127 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1128 {
1129         return (le32toh(rx->wb.lower.lo_dword.data) &
1130             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1131 } /* ixgbe_rsc_count */
1132
1133 /************************************************************************
1134  * ixgbe_setup_hw_rsc
1135  *
1136  *   Initialize Hardware RSC (LRO) feature on 82599
1137  *   for an RX ring, this is toggled by the LRO capability
1138  *   even though it is transparent to the stack.
1139  *
1140  *   NOTE: Since this HW feature only works with IPv4 and
1141  *         testing has shown soft LRO to be as effective,
1142  *         this feature will be disabled by default.
1143  ************************************************************************/
1144 static void
1145 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1146 {
1147         struct adapter  *adapter = rxr->adapter;
1148         struct ixgbe_hw *hw = &adapter->hw;
1149         u32             rscctrl, rdrxctl;
1150
1151         /* If turning LRO/RSC off we need to disable it */
1152         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1153                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1154                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1155                 return;
1156         }
1157
1158         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1159         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1160 #ifdef DEV_NETMAP
1161         /* Always strip CRC unless Netmap disabled it */
1162         if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1163             !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1164             ix_crcstrip)
1165 #endif /* DEV_NETMAP */
1166                 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1167         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1168         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1169
1170         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1171         rscctrl |= IXGBE_RSCCTL_RSCEN;
1172         /*
1173          * Limit the total number of descriptors that
1174          * can be combined, so it does not exceed 64K
1175          */
1176         if (rxr->mbuf_sz == MCLBYTES)
1177                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1178         else if (rxr->mbuf_sz == MJUMPAGESIZE)
1179                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1180         else if (rxr->mbuf_sz == MJUM9BYTES)
1181                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1182         else  /* Using 16K cluster */
1183                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1184
1185         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1186
1187         /* Enable TCP header recognition */
1188         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1189             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1190
1191         /* Disable RSC for ACK packets */
1192         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1193             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1194
1195         rxr->hw_rsc = TRUE;
1196 } /* ixgbe_setup_hw_rsc */
1197
1198 /************************************************************************
1199  * ixgbe_refresh_mbufs
1200  *
1201  *   Refresh mbuf buffers for RX descriptor rings
1202  *    - now keeps its own state so discards due to resource
1203  *      exhaustion are unnecessary, if an mbuf cannot be obtained
1204  *      it just returns, keeping its placeholder, thus it can simply
1205  *      be recalled to try again.
1206  ************************************************************************/
1207 static void
1208 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1209 {
1210         struct adapter      *adapter = rxr->adapter;
1211         struct ixgbe_rx_buf *rxbuf;
1212         struct mbuf         *mp;
1213         bus_dma_segment_t   seg[1];
1214         int                 i, j, nsegs, error;
1215         bool                refreshed = FALSE;
1216
1217         i = j = rxr->next_to_refresh;
1218         /* Control the loop with one beyond */
1219         if (++j == rxr->num_desc)
1220                 j = 0;
1221
1222         while (j != limit) {
1223                 rxbuf = &rxr->rx_buffers[i];
1224                 if (rxbuf->buf == NULL) {
1225                         mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1226                             rxr->mbuf_sz);
1227                         if (mp == NULL)
1228                                 goto update;
1229                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1230                                 m_adj(mp, ETHER_ALIGN);
1231                 } else
1232                         mp = rxbuf->buf;
1233
1234                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1235
1236                 /* If we're dealing with an mbuf that was copied rather
1237                  * than replaced, there's no need to go through busdma.
1238                  */
1239                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1240                         /* Get the memory mapping */
1241                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1242                         error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap,
1243                             mp, seg, &nsegs, BUS_DMA_NOWAIT);
1244                         if (error != 0) {
1245                                 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1246                                 m_free(mp);
1247                                 rxbuf->buf = NULL;
1248                                 goto update;
1249                         }
1250                         rxbuf->buf = mp;
1251                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1252                             BUS_DMASYNC_PREREAD);
1253                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1254                             htole64(seg[0].ds_addr);
1255                 } else {
1256                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1257                         rxbuf->flags &= ~IXGBE_RX_COPY;
1258                 }
1259
1260                 refreshed = TRUE;
1261                 /* Next is precalculated */
1262                 i = j;
1263                 rxr->next_to_refresh = i;
1264                 if (++j == rxr->num_desc)
1265                         j = 0;
1266         }
1267
1268 update:
1269         if (refreshed) /* Update hardware tail index */
1270                 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1271
1272         return;
1273 } /* ixgbe_refresh_mbufs */
1274
1275 /************************************************************************
1276  * ixgbe_allocate_receive_buffers
1277  *
1278  *   Allocate memory for rx_buffer structures. Since we use one
1279  *   rx_buffer per received packet, the maximum number of rx_buffer's
1280  *   that we'll need is equal to the number of receive descriptors
1281  *   that we've allocated.
1282  ************************************************************************/
1283 static int
1284 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1285 {
1286         struct adapter      *adapter = rxr->adapter;
1287         device_t            dev = adapter->dev;
1288         struct ixgbe_rx_buf *rxbuf;
1289         int                 bsize, error;
1290
1291         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1292         rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1293             M_NOWAIT | M_ZERO);
1294         if (rxr->rx_buffers == NULL) {
1295                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1296                 error = ENOMEM;
1297                 goto fail;
1298         }
1299
1300         error = bus_dma_tag_create(
1301                  /*      parent */ bus_get_dma_tag(dev),
1302                  /*   alignment */ 1,
1303                  /*      bounds */ 0,
1304                  /*     lowaddr */ BUS_SPACE_MAXADDR,
1305                  /*    highaddr */ BUS_SPACE_MAXADDR,
1306                  /*      filter */ NULL,
1307                  /*   filterarg */ NULL,
1308                  /*     maxsize */ MJUM16BYTES,
1309                  /*   nsegments */ 1,
1310                  /*  maxsegsize */ MJUM16BYTES,
1311                  /*       flags */ 0,
1312                  /*    lockfunc */ NULL,
1313                  /* lockfuncarg */ NULL,
1314                                    &rxr->ptag);
1315         if (error != 0) {
1316                 device_printf(dev, "Unable to create RX DMA tag\n");
1317                 goto fail;
1318         }
1319
1320         for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1321                 rxbuf = &rxr->rx_buffers[i];
1322                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1323                 if (error) {
1324                         device_printf(dev, "Unable to create RX dma map\n");
1325                         goto fail;
1326                 }
1327         }
1328
1329         return (0);
1330
1331 fail:
1332         /* Frees all, but can handle partial completion */
1333         ixgbe_free_receive_structures(adapter);
1334
1335         return (error);
1336 } /* ixgbe_allocate_receive_buffers */
1337
1338 /************************************************************************
1339  * ixgbe_free_receive_ring
1340  ************************************************************************/
1341 static void
1342 ixgbe_free_receive_ring(struct rx_ring *rxr)
1343 {
1344         for (int i = 0; i < rxr->num_desc; i++) {
1345                 ixgbe_rx_discard(rxr, i);
1346         }
1347 } /* ixgbe_free_receive_ring */
1348
1349 /************************************************************************
1350  * ixgbe_setup_receive_ring
1351  *
1352  *   Initialize a receive ring and its buffers.
1353  ************************************************************************/
1354 static int
1355 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1356 {
1357         struct adapter        *adapter;
1358         struct ifnet          *ifp;
1359         device_t              dev;
1360         struct ixgbe_rx_buf   *rxbuf;
1361         struct lro_ctrl       *lro = &rxr->lro;
1362 #ifdef DEV_NETMAP
1363         struct netmap_adapter *na = NA(rxr->adapter->ifp);
1364         struct netmap_slot    *slot;
1365 #endif /* DEV_NETMAP */
1366         bus_dma_segment_t     seg[1];
1367         int                   rsize, nsegs, error = 0;
1368
1369         adapter = rxr->adapter;
1370         ifp = adapter->ifp;
1371         dev = adapter->dev;
1372
1373         /* Clear the ring contents */
1374         IXGBE_RX_LOCK(rxr);
1375
1376 #ifdef DEV_NETMAP
1377         if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1378                 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1379 #endif /* DEV_NETMAP */
1380
1381         rsize = roundup2(adapter->num_rx_desc *
1382             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1383         bzero((void *)rxr->rx_base, rsize);
1384         /* Cache the size */
1385         rxr->mbuf_sz = adapter->rx_mbuf_sz;
1386
1387         /* Free current RX buffer structs and their mbufs */
1388         ixgbe_free_receive_ring(rxr);
1389
1390         /* Now replenish the mbufs */
1391         for (int j = 0; j != rxr->num_desc; ++j) {
1392                 struct mbuf *mp;
1393
1394                 rxbuf = &rxr->rx_buffers[j];
1395
1396 #ifdef DEV_NETMAP
1397                 /*
1398                  * In netmap mode, fill the map and set the buffer
1399                  * address in the NIC ring, considering the offset
1400                  * between the netmap and NIC rings (see comment in
1401                  * ixgbe_setup_transmit_ring() ). No need to allocate
1402                  * an mbuf, so end the block with a continue;
1403                  */
1404                 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1405                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1406                         uint64_t paddr;
1407                         void *addr;
1408
1409                         addr = PNMB(na, slot + sj, &paddr);
1410                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1411                         /* Update descriptor and the cached value */
1412                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1413                         rxbuf->addr = htole64(paddr);
1414                         continue;
1415                 }
1416 #endif /* DEV_NETMAP */
1417
1418                 rxbuf->flags = 0;
1419                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1420                     adapter->rx_mbuf_sz);
1421                 if (rxbuf->buf == NULL) {
1422                         error = ENOBUFS;
1423                         goto fail;
1424                 }
1425                 mp = rxbuf->buf;
1426                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1427                 /* Get the memory mapping */
1428                 error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg,
1429                     &nsegs, BUS_DMA_NOWAIT);
1430                 if (error != 0)
1431                         goto fail;
1432                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD);
1433                 /* Update the descriptor and the cached value */
1434                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1435                 rxbuf->addr = htole64(seg[0].ds_addr);
1436         }
1437
1438
1439         /* Setup our descriptor indices */
1440         rxr->next_to_check = 0;
1441         rxr->next_to_refresh = 0;
1442         rxr->lro_enabled = FALSE;
1443         rxr->rx_copies = 0;
1444         rxr->rx_bytes = 0;
1445         rxr->vtag_strip = FALSE;
1446
1447         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1448             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1449
1450         /*
1451          * Now set up the LRO interface
1452          */
1453         if (ixgbe_rsc_enable)
1454                 ixgbe_setup_hw_rsc(rxr);
1455         else if (ifp->if_capenable & IFCAP_LRO) {
1456                 int err = tcp_lro_init(lro);
1457                 if (err) {
1458                         device_printf(dev, "LRO Initialization failed!\n");
1459                         goto fail;
1460                 }
1461                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1462                 rxr->lro_enabled = TRUE;
1463                 lro->ifp = adapter->ifp;
1464         }
1465
1466         IXGBE_RX_UNLOCK(rxr);
1467
1468         return (0);
1469
1470 fail:
1471         ixgbe_free_receive_ring(rxr);
1472         IXGBE_RX_UNLOCK(rxr);
1473
1474         return (error);
1475 } /* ixgbe_setup_receive_ring */
1476
1477 /************************************************************************
1478  * ixgbe_setup_receive_structures - Initialize all receive rings.
1479  ************************************************************************/
1480 int
1481 ixgbe_setup_receive_structures(struct adapter *adapter)
1482 {
1483         struct rx_ring *rxr = adapter->rx_rings;
1484         int            j;
1485
1486         for (j = 0; j < adapter->num_queues; j++, rxr++)
1487                 if (ixgbe_setup_receive_ring(rxr))
1488                         goto fail;
1489
1490         return (0);
1491 fail:
1492         /*
1493          * Free RX buffers allocated so far, we will only handle
1494          * the rings that completed, the failing case will have
1495          * cleaned up for itself. 'j' failed, so its the terminus.
1496          */
1497         for (int i = 0; i < j; ++i) {
1498                 rxr = &adapter->rx_rings[i];
1499                 IXGBE_RX_LOCK(rxr);
1500                 ixgbe_free_receive_ring(rxr);
1501                 IXGBE_RX_UNLOCK(rxr);
1502         }
1503
1504         return (ENOBUFS);
1505 } /* ixgbe_setup_receive_structures */
1506
1507
1508 /************************************************************************
1509  * ixgbe_free_receive_structures - Free all receive rings.
1510  ************************************************************************/
1511 void
1512 ixgbe_free_receive_structures(struct adapter *adapter)
1513 {
1514         struct rx_ring *rxr = adapter->rx_rings;
1515
1516         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1517
1518         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1519                 ixgbe_free_receive_buffers(rxr);
1520                 /* Free LRO memory */
1521                 tcp_lro_free(&rxr->lro);
1522                 /* Free the ring memory as well */
1523                 ixgbe_dma_free(adapter, &rxr->rxdma);
1524         }
1525
1526         free(adapter->rx_rings, M_DEVBUF);
1527 } /* ixgbe_free_receive_structures */
1528
1529
1530 /************************************************************************
1531  * ixgbe_free_receive_buffers - Free receive ring data structures
1532  ************************************************************************/
1533 static void
1534 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1535 {
1536         struct adapter      *adapter = rxr->adapter;
1537         struct ixgbe_rx_buf *rxbuf;
1538
1539         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1540
1541         /* Cleanup any existing buffers */
1542         if (rxr->rx_buffers != NULL) {
1543                 for (int i = 0; i < adapter->num_rx_desc; i++) {
1544                         rxbuf = &rxr->rx_buffers[i];
1545                         ixgbe_rx_discard(rxr, i);
1546                         if (rxbuf->pmap != NULL) {
1547                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1548                                 rxbuf->pmap = NULL;
1549                         }
1550                 }
1551                 if (rxr->rx_buffers != NULL) {
1552                         free(rxr->rx_buffers, M_DEVBUF);
1553                         rxr->rx_buffers = NULL;
1554                 }
1555         }
1556
1557         if (rxr->ptag != NULL) {
1558                 bus_dma_tag_destroy(rxr->ptag);
1559                 rxr->ptag = NULL;
1560         }
1561
1562         return;
1563 } /* ixgbe_free_receive_buffers */
1564
1565 /************************************************************************
1566  * ixgbe_rx_input
1567  ************************************************************************/
1568 static __inline void
1569 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1570     u32 ptype)
1571 {
1572         /*
1573          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1574          * should be computed by hardware. Also it should not have VLAN tag in
1575          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1576          */
1577         if (rxr->lro_enabled &&
1578             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1579             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1580             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1581              (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1582              (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1583              (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1584             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1585             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1586                 /*
1587                  * Send to the stack if:
1588                  *  - LRO not enabled, or
1589                  *  - no LRO resources, or
1590                  *  - lro enqueue fails
1591                  */
1592                 if (rxr->lro.lro_cnt != 0)
1593                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1594                                 return;
1595         }
1596         IXGBE_RX_UNLOCK(rxr);
1597         (*ifp->if_input)(ifp, m);
1598         IXGBE_RX_LOCK(rxr);
1599 } /* ixgbe_rx_input */
1600
1601 /************************************************************************
1602  * ixgbe_rx_discard
1603  ************************************************************************/
1604 static __inline void
1605 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1606 {
1607         struct ixgbe_rx_buf *rbuf;
1608
1609         rbuf = &rxr->rx_buffers[i];
1610
1611         /*
1612          * With advanced descriptors the writeback
1613          * clobbers the buffer addrs, so its easier
1614          * to just free the existing mbufs and take
1615          * the normal refresh path to get new buffers
1616          * and mapping.
1617          */
1618
1619         if (rbuf->fmp != NULL) {/* Partial chain ? */
1620                 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1621                 m_freem(rbuf->fmp);
1622                 rbuf->fmp = NULL;
1623                 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1624         } else if (rbuf->buf) {
1625                 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1626                 m_free(rbuf->buf);
1627                 rbuf->buf = NULL;
1628         }
1629         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1630
1631         rbuf->flags = 0;
1632
1633         return;
1634 } /* ixgbe_rx_discard */
1635
1636
1637 /************************************************************************
1638  * ixgbe_rxeof
1639  *
1640  *   Executes in interrupt context. It replenishes the
1641  *   mbufs in the descriptor and sends data which has
1642  *   been dma'ed into host memory to upper layer.
1643  *
1644  *   Return TRUE for more work, FALSE for all clean.
1645  ************************************************************************/
1646 bool
1647 ixgbe_rxeof(struct ix_queue *que)
1648 {
1649         struct adapter          *adapter = que->adapter;
1650         struct rx_ring          *rxr = que->rxr;
1651         struct ifnet            *ifp = adapter->ifp;
1652         struct lro_ctrl         *lro = &rxr->lro;
1653         union ixgbe_adv_rx_desc *cur;
1654         struct ixgbe_rx_buf     *rbuf, *nbuf;
1655         int                     i, nextp, processed = 0;
1656         u32                     staterr = 0;
1657         u32                     count = adapter->rx_process_limit;
1658         u16                     pkt_info;
1659
1660         IXGBE_RX_LOCK(rxr);
1661
1662 #ifdef DEV_NETMAP
1663         if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1664                 /* Same as the txeof routine: wakeup clients on intr. */
1665                 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1666                         IXGBE_RX_UNLOCK(rxr);
1667                         return (FALSE);
1668                 }
1669         }
1670 #endif /* DEV_NETMAP */
1671
1672         for (i = rxr->next_to_check; count != 0;) {
1673                 struct mbuf *sendmp, *mp;
1674                 u32         rsc, ptype;
1675                 u16         len;
1676                 u16         vtag = 0;
1677                 bool        eop;
1678
1679                 /* Sync the ring. */
1680                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1681                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1682
1683                 cur = &rxr->rx_base[i];
1684                 staterr = le32toh(cur->wb.upper.status_error);
1685                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1686
1687                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1688                         break;
1689                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1690                         break;
1691
1692                 count--;
1693                 sendmp = NULL;
1694                 nbuf = NULL;
1695                 rsc = 0;
1696                 cur->wb.upper.status_error = 0;
1697                 rbuf = &rxr->rx_buffers[i];
1698                 mp = rbuf->buf;
1699
1700                 len = le16toh(cur->wb.upper.length);
1701                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1702                     IXGBE_RXDADV_PKTTYPE_MASK;
1703                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1704
1705                 /* Make sure bad packets are discarded */
1706                 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1707 #if __FreeBSD_version >= 1100036
1708                         if (adapter->feat_en & IXGBE_FEATURE_VF)
1709                                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1710 #endif
1711                         rxr->rx_discarded++;
1712                         ixgbe_rx_discard(rxr, i);
1713                         goto next_desc;
1714                 }
1715
1716                 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1717
1718                 /*
1719                  * On 82599 which supports a hardware
1720                  * LRO (called HW RSC), packets need
1721                  * not be fragmented across sequential
1722                  * descriptors, rather the next descriptor
1723                  * is indicated in bits of the descriptor.
1724                  * This also means that we might proceses
1725                  * more than one packet at a time, something
1726                  * that has never been true before, it
1727                  * required eliminating global chain pointers
1728                  * in favor of what we are doing here.  -jfv
1729                  */
1730                 if (!eop) {
1731                         /*
1732                          * Figure out the next descriptor
1733                          * of this frame.
1734                          */
1735                         if (rxr->hw_rsc == TRUE) {
1736                                 rsc = ixgbe_rsc_count(cur);
1737                                 rxr->rsc_num += (rsc - 1);
1738                         }
1739                         if (rsc) { /* Get hardware index */
1740                                 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1741                                     IXGBE_RXDADV_NEXTP_SHIFT);
1742                         } else { /* Just sequential */
1743                                 nextp = i + 1;
1744                                 if (nextp == adapter->num_rx_desc)
1745                                         nextp = 0;
1746                         }
1747                         nbuf = &rxr->rx_buffers[nextp];
1748                         prefetch(nbuf);
1749                 }
1750                 /*
1751                  * Rather than using the fmp/lmp global pointers
1752                  * we now keep the head of a packet chain in the
1753                  * buffer struct and pass this along from one
1754                  * descriptor to the next, until we get EOP.
1755                  */
1756                 mp->m_len = len;
1757                 /*
1758                  * See if there is a stored head
1759                  * that determines what we are
1760                  */
1761                 sendmp = rbuf->fmp;
1762                 if (sendmp != NULL) {  /* secondary frag */
1763                         rbuf->buf = rbuf->fmp = NULL;
1764                         mp->m_flags &= ~M_PKTHDR;
1765                         sendmp->m_pkthdr.len += mp->m_len;
1766                 } else {
1767                         /*
1768                          * Optimize.  This might be a small packet,
1769                          * maybe just a TCP ACK.  Do a fast copy that
1770                          * is cache aligned into a new mbuf, and
1771                          * leave the old mbuf+cluster for re-use.
1772                          */
1773                         if (eop && len <= IXGBE_RX_COPY_LEN) {
1774                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1775                                 if (sendmp != NULL) {
1776                                         sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1777                                         ixgbe_bcopy(mp->m_data, sendmp->m_data,
1778                                             len);
1779                                         sendmp->m_len = len;
1780                                         rxr->rx_copies++;
1781                                         rbuf->flags |= IXGBE_RX_COPY;
1782                                 }
1783                         }
1784                         if (sendmp == NULL) {
1785                                 rbuf->buf = rbuf->fmp = NULL;
1786                                 sendmp = mp;
1787                         }
1788
1789                         /* first desc of a non-ps chain */
1790                         sendmp->m_flags |= M_PKTHDR;
1791                         sendmp->m_pkthdr.len = mp->m_len;
1792                 }
1793                 ++processed;
1794
1795                 /* Pass the head pointer on */
1796                 if (eop == 0) {
1797                         nbuf->fmp = sendmp;
1798                         sendmp = NULL;
1799                         mp->m_next = nbuf->buf;
1800                 } else { /* Sending this frame */
1801                         sendmp->m_pkthdr.rcvif = ifp;
1802                         rxr->rx_packets++;
1803                         /* capture data for AIM */
1804                         rxr->bytes += sendmp->m_pkthdr.len;
1805                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1806                         /* Process vlan info */
1807                         if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1808                                 vtag = le16toh(cur->wb.upper.vlan);
1809                         if (vtag) {
1810                                 sendmp->m_pkthdr.ether_vtag = vtag;
1811                                 sendmp->m_flags |= M_VLANTAG;
1812                         }
1813                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1814                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
1815
1816                         /*
1817                          * In case of multiqueue, we have RXCSUM.PCSD bit set
1818                          * and never cleared. This means we have RSS hash
1819                          * available to be used.
1820                          */
1821                         if (adapter->num_queues > 1) {
1822                                 sendmp->m_pkthdr.flowid =
1823                                     le32toh(cur->wb.lower.hi_dword.rss);
1824                                 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1825                                 case IXGBE_RXDADV_RSSTYPE_IPV4:
1826                                         M_HASHTYPE_SET(sendmp,
1827                                             M_HASHTYPE_RSS_IPV4);
1828                                         break;
1829                                 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1830                                         M_HASHTYPE_SET(sendmp,
1831                                             M_HASHTYPE_RSS_TCP_IPV4);
1832                                         break;
1833                                 case IXGBE_RXDADV_RSSTYPE_IPV6:
1834                                         M_HASHTYPE_SET(sendmp,
1835                                             M_HASHTYPE_RSS_IPV6);
1836                                         break;
1837                                 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1838                                         M_HASHTYPE_SET(sendmp,
1839                                             M_HASHTYPE_RSS_TCP_IPV6);
1840                                         break;
1841                                 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1842                                         M_HASHTYPE_SET(sendmp,
1843                                             M_HASHTYPE_RSS_IPV6_EX);
1844                                         break;
1845                                 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1846                                         M_HASHTYPE_SET(sendmp,
1847                                             M_HASHTYPE_RSS_TCP_IPV6_EX);
1848                                         break;
1849 #if __FreeBSD_version > 1100000
1850                                 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1851                                         M_HASHTYPE_SET(sendmp,
1852                                             M_HASHTYPE_RSS_UDP_IPV4);
1853                                         break;
1854                                 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1855                                         M_HASHTYPE_SET(sendmp,
1856                                             M_HASHTYPE_RSS_UDP_IPV6);
1857                                         break;
1858                                 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1859                                         M_HASHTYPE_SET(sendmp,
1860                                             M_HASHTYPE_RSS_UDP_IPV6_EX);
1861                                         break;
1862 #endif
1863                                 default:
1864                                         M_HASHTYPE_SET(sendmp,
1865                                             M_HASHTYPE_OPAQUE_HASH);
1866                                 }
1867                         } else {
1868                                 sendmp->m_pkthdr.flowid = que->msix;
1869                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1870                         }
1871                 }
1872 next_desc:
1873                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1874                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1875
1876                 /* Advance our pointers to the next descriptor. */
1877                 if (++i == rxr->num_desc)
1878                         i = 0;
1879
1880                 /* Now send to the stack or do LRO */
1881                 if (sendmp != NULL) {
1882                         rxr->next_to_check = i;
1883                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1884                         i = rxr->next_to_check;
1885                 }
1886
1887                 /* Every 8 descriptors we go to refresh mbufs */
1888                 if (processed == 8) {
1889                         ixgbe_refresh_mbufs(rxr, i);
1890                         processed = 0;
1891                 }
1892         }
1893
1894         /* Refresh any remaining buf structs */
1895         if (ixgbe_rx_unrefreshed(rxr))
1896                 ixgbe_refresh_mbufs(rxr, i);
1897
1898         rxr->next_to_check = i;
1899
1900         /*
1901          * Flush any outstanding LRO work
1902          */
1903         tcp_lro_flush_all(lro);
1904
1905         IXGBE_RX_UNLOCK(rxr);
1906
1907         /*
1908          * Still have cleaning to do?
1909          */
1910         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1911                 return (TRUE);
1912
1913         return (FALSE);
1914 } /* ixgbe_rxeof */
1915
1916
1917 /************************************************************************
1918  * ixgbe_rx_checksum
1919  *
1920  *   Verify that the hardware indicated that the checksum is valid.
1921  *   Inform the stack about the status of checksum so that stack
1922  *   doesn't spend time verifying the checksum.
1923  ************************************************************************/
1924 static void
1925 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1926 {
1927         u16  status = (u16)staterr;
1928         u8   errors = (u8)(staterr >> 24);
1929         bool sctp = false;
1930
1931         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1932             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1933                 sctp = true;
1934
1935         /* IPv4 checksum */
1936         if (status & IXGBE_RXD_STAT_IPCS) {
1937                 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
1938                 /* IP Checksum Good */
1939                 if (!(errors & IXGBE_RXD_ERR_IPE))
1940                         mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
1941         }
1942         /* TCP/UDP/SCTP checksum */
1943         if (status & IXGBE_RXD_STAT_L4CS) {
1944                 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
1945                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1946                         mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
1947                         if (!sctp)
1948                                 mp->m_pkthdr.csum_data = htons(0xffff);
1949                 }
1950         }
1951 } /* ixgbe_rx_checksum */
1952
1953 /************************************************************************
1954  * ixgbe_dmamap_cb - Manage DMA'able memory.
1955  ************************************************************************/
1956 static void
1957 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1958 {
1959         if (error)
1960                 return;
1961         *(bus_addr_t *)arg = segs->ds_addr;
1962
1963         return;
1964 } /* ixgbe_dmamap_cb */
1965
1966 /************************************************************************
1967  * ixgbe_dma_malloc
1968  ************************************************************************/
1969 static int
1970 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
1971                  struct ixgbe_dma_alloc *dma, int mapflags)
1972 {
1973         device_t dev = adapter->dev;
1974         int      r;
1975
1976         r = bus_dma_tag_create(
1977              /*      parent */ bus_get_dma_tag(adapter->dev),
1978              /*   alignment */ DBA_ALIGN,
1979              /*      bounds */ 0,
1980              /*     lowaddr */ BUS_SPACE_MAXADDR,
1981              /*    highaddr */ BUS_SPACE_MAXADDR,
1982              /*      filter */ NULL,
1983              /*   filterarg */ NULL,
1984              /*     maxsize */ size,
1985              /*   nsegments */ 1,
1986              /*  maxsegsize */ size,
1987              /*       flags */ BUS_DMA_ALLOCNOW,
1988              /*    lockfunc */ NULL,
1989              /* lockfuncarg */ NULL,
1990                                &dma->dma_tag);
1991         if (r != 0) {
1992                 device_printf(dev,
1993                     "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n",
1994                     r);
1995                 goto fail_0;
1996         }
1997         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
1998             BUS_DMA_NOWAIT, &dma->dma_map);
1999         if (r != 0) {
2000                 device_printf(dev,
2001                     "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r);
2002                 goto fail_1;
2003         }
2004         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
2005             ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2006         if (r != 0) {
2007                 device_printf(dev,
2008                     "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r);
2009                 goto fail_2;
2010         }
2011         dma->dma_size = size;
2012
2013         return (0);
2014 fail_2:
2015         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2016 fail_1:
2017         bus_dma_tag_destroy(dma->dma_tag);
2018 fail_0:
2019         dma->dma_tag = NULL;
2020
2021         return (r);
2022 } /* ixgbe_dma_malloc */
2023
2024 /************************************************************************
2025  * ixgbe_dma_free
2026  ************************************************************************/
2027 static void
2028 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2029 {
2030         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2031             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2032         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2033         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2034         bus_dma_tag_destroy(dma->dma_tag);
2035 } /* ixgbe_dma_free */
2036
2037
2038 /************************************************************************
2039  * ixgbe_allocate_queues
2040  *
2041  *   Allocate memory for the transmit and receive rings, and then
2042  *   the descriptors associated with each, called only once at attach.
2043  ************************************************************************/
2044 int
2045 ixgbe_allocate_queues(struct adapter *adapter)
2046 {
2047         device_t        dev = adapter->dev;
2048         struct ix_queue *que;
2049         struct tx_ring  *txr;
2050         struct rx_ring  *rxr;
2051         int             rsize, tsize, error = IXGBE_SUCCESS;
2052         int             txconf = 0, rxconf = 0;
2053
2054         /* First, allocate the top level queue structs */
2055         adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2056             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2057         if (adapter->queues == NULL) {
2058                 device_printf(dev, "Unable to allocate queue memory\n");
2059                 error = ENOMEM;
2060                 goto fail;
2061         }
2062
2063         /* Second, allocate the TX ring struct memory */
2064         adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2065             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2066         if (adapter->tx_rings == NULL) {
2067                 device_printf(dev, "Unable to allocate TX ring memory\n");
2068                 error = ENOMEM;
2069                 goto tx_fail;
2070         }
2071
2072         /* Third, allocate the RX ring */
2073         adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2074             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2075         if (adapter->rx_rings == NULL) {
2076                 device_printf(dev, "Unable to allocate RX ring memory\n");
2077                 error = ENOMEM;
2078                 goto rx_fail;
2079         }
2080
2081         /* For the ring itself */
2082         tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2083             DBA_ALIGN);
2084
2085         /*
2086          * Now set up the TX queues, txconf is needed to handle the
2087          * possibility that things fail midcourse and we need to
2088          * undo memory gracefully
2089          */
2090         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2091                 /* Set up some basics */
2092                 txr = &adapter->tx_rings[i];
2093                 txr->adapter = adapter;
2094                 txr->br = NULL;
2095                 /* In case SR-IOV is enabled, align the index properly */
2096                 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2097                     i);
2098                 txr->num_desc = adapter->num_tx_desc;
2099
2100                 /* Initialize the TX side lock */
2101                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2102                     device_get_nameunit(dev), txr->me);
2103                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2104
2105                 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2106                     BUS_DMA_NOWAIT)) {
2107                         device_printf(dev,
2108                             "Unable to allocate TX Descriptor memory\n");
2109                         error = ENOMEM;
2110                         goto err_tx_desc;
2111                 }
2112                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2113                 bzero((void *)txr->tx_base, tsize);
2114
2115                 /* Now allocate transmit buffers for the ring */
2116                 if (ixgbe_allocate_transmit_buffers(txr)) {
2117                         device_printf(dev,
2118                             "Critical Failure setting up transmit buffers\n");
2119                         error = ENOMEM;
2120                         goto err_tx_desc;
2121                 }
2122                 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2123                         /* Allocate a buf ring */
2124                         txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2125                             M_WAITOK, &txr->tx_mtx);
2126                         if (txr->br == NULL) {
2127                                 device_printf(dev,
2128                                     "Critical Failure setting up buf ring\n");
2129                                 error = ENOMEM;
2130                                 goto err_tx_desc;
2131                         }
2132                 }
2133         }
2134
2135         /*
2136          * Next the RX queues...
2137          */
2138         rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2139             DBA_ALIGN);
2140         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2141                 rxr = &adapter->rx_rings[i];
2142                 /* Set up some basics */
2143                 rxr->adapter = adapter;
2144                 /* In case SR-IOV is enabled, align the index properly */
2145                 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2146                     i);
2147                 rxr->num_desc = adapter->num_rx_desc;
2148
2149                 /* Initialize the RX side lock */
2150                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2151                     device_get_nameunit(dev), rxr->me);
2152                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2153
2154                 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2155                     BUS_DMA_NOWAIT)) {
2156                         device_printf(dev,
2157                             "Unable to allocate RxDescriptor memory\n");
2158                         error = ENOMEM;
2159                         goto err_rx_desc;
2160                 }
2161                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2162                 bzero((void *)rxr->rx_base, rsize);
2163
2164                 /* Allocate receive buffers for the ring */
2165                 if (ixgbe_allocate_receive_buffers(rxr)) {
2166                         device_printf(dev,
2167                             "Critical Failure setting up receive buffers\n");
2168                         error = ENOMEM;
2169                         goto err_rx_desc;
2170                 }
2171         }
2172
2173         /*
2174          * Finally set up the queue holding structs
2175          */
2176         for (int i = 0; i < adapter->num_queues; i++) {
2177                 que = &adapter->queues[i];
2178                 que->adapter = adapter;
2179                 que->me = i;
2180                 que->txr = &adapter->tx_rings[i];
2181                 que->rxr = &adapter->rx_rings[i];
2182         }
2183
2184         return (0);
2185
2186 err_rx_desc:
2187         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2188                 ixgbe_dma_free(adapter, &rxr->rxdma);
2189 err_tx_desc:
2190         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2191                 ixgbe_dma_free(adapter, &txr->txdma);
2192         free(adapter->rx_rings, M_DEVBUF);
2193 rx_fail:
2194         free(adapter->tx_rings, M_DEVBUF);
2195 tx_fail:
2196         free(adapter->queues, M_DEVBUF);
2197 fail:
2198         return (error);
2199 } /* ixgbe_allocate_queues */