]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixl/ixl_txrx.c
MFV r333779: xz 5.2.4.
[FreeBSD/FreeBSD.git] / sys / dev / ixl / ixl_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2013-2017, Intel Corporation
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 **      IXL driver TX/RX Routines:
37 **          This was seperated to allow usage by
38 **          both the PF and VF drivers.
39 */
40
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_rss.h"
45 #endif
46
47 #include "ixl.h"
48
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52
53 /* Local Prototypes */
54 static void     ixl_rx_checksum(struct mbuf *, u32, u32, u8);
55 static void     ixl_refresh_mbufs(struct ixl_queue *, int);
56 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
57 static int      ixl_tx_setup_offload(struct ixl_queue *,
58                     struct mbuf *, u32 *, u32 *);
59 static bool     ixl_tso_setup(struct ixl_queue *, struct mbuf *);
60 static void     ixl_queue_sw_irq(struct ixl_vsi *, int);
61
62 static inline void ixl_rx_discard(struct rx_ring *, int);
63 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
64                     struct mbuf *, u8);
65
66 static inline bool ixl_tso_detect_sparse(struct mbuf *mp);
67 static inline u32 ixl_get_tx_head(struct ixl_queue *que);
68
69 #ifdef DEV_NETMAP
70 #include <dev/netmap/if_ixl_netmap.h>
71 #if __FreeBSD_version >= 1200000
72 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1;
73 #endif
74 #endif /* DEV_NETMAP */
75
76 /*
77  * @key key is saved into this parameter
78  */
79 void
80 ixl_get_default_rss_key(u32 *key)
81 {
82         MPASS(key != NULL);
83
84         u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
85             0x183cfd8c, 0xce880440, 0x580cbc3c,
86             0x35897377, 0x328b25e1, 0x4fa98922,
87             0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
88             0x0, 0x0, 0x0};
89
90         bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
91 }
92
93 /**
94  * i40e_vc_stat_str - convert virtchnl status err code to a string
95  * @hw: pointer to the HW structure
96  * @stat_err: the status error code to convert
97  **/
98 const char *
99 i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err)
100 {
101         switch (stat_err) {
102         case VIRTCHNL_STATUS_SUCCESS:
103                 return "OK";
104         case VIRTCHNL_ERR_PARAM:
105                 return "VIRTCHNL_ERR_PARAM";
106         case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH:
107                 return "VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH";
108         case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR:
109                 return "VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR";
110         case VIRTCHNL_STATUS_ERR_INVALID_VF_ID:
111                 return "VIRTCHNL_STATUS_ERR_INVALID_VF_ID";
112         case VIRTCHNL_STATUS_NOT_SUPPORTED:
113                 return "VIRTCHNL_STATUS_NOT_SUPPORTED";
114         }
115
116         snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
117         return hw->err_str;
118 }
119
120 /*
121  * PCI BUSMASTER needs to be set for proper operation.
122  */
123 void
124 ixl_set_busmaster(device_t dev)
125 {
126         u16 pci_cmd_word;
127
128         pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
129         pci_cmd_word |= PCIM_CMD_BUSMASTEREN;
130         pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2);
131 }
132
133 /*
134  * Rewrite the ENABLE bit in the MSIX control register
135  */
136 void
137 ixl_set_msix_enable(device_t dev)
138 {
139         int msix_ctrl, rid;
140
141         pci_find_cap(dev, PCIY_MSIX, &rid);
142         rid += PCIR_MSIX_CTRL;
143         msix_ctrl = pci_read_config(dev, rid, 2);
144         msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
145         pci_write_config(dev, rid, msix_ctrl, 2);
146 }
147
148
149 /*
150 ** Multiqueue Transmit driver
151 */
152 int
153 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
154 {
155         struct ixl_vsi          *vsi = ifp->if_softc;
156         struct ixl_queue        *que;
157         struct tx_ring          *txr;
158         int                     err, i;
159 #ifdef RSS
160         u32                     bucket_id;
161 #endif
162
163         /*
164          * Which queue to use:
165          *
166          * When doing RSS, map it to the same outbound
167          * queue as the incoming flow would be mapped to.
168          * If everything is setup correctly, it should be
169          * the same bucket that the current CPU we're on is.
170          */
171         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
172 #ifdef  RSS
173                 if (rss_hash2bucket(m->m_pkthdr.flowid,
174                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
175                         i = bucket_id % vsi->num_queues;
176                 } else
177 #endif
178                         i = m->m_pkthdr.flowid % vsi->num_queues;
179         } else
180                 i = curcpu % vsi->num_queues;
181
182         que = &vsi->queues[i];
183         txr = &que->txr;
184
185         err = drbr_enqueue(ifp, txr->br, m);
186         if (err)
187                 return (err);
188         if (IXL_TX_TRYLOCK(txr)) {
189                 ixl_mq_start_locked(ifp, txr);
190                 IXL_TX_UNLOCK(txr);
191         } else
192                 taskqueue_enqueue(que->tq, &que->tx_task);
193
194         return (0);
195 }
196
197 int
198 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
199 {
200         struct ixl_queue        *que = txr->que;
201         struct ixl_vsi          *vsi = que->vsi;
202         struct mbuf             *next;
203         int                     err = 0;
204
205
206         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
207             vsi->link_active == 0)
208                 return (ENETDOWN);
209
210         /* Process the transmit queue */
211         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
212                 if ((err = ixl_xmit(que, &next)) != 0) {
213                         if (next == NULL)
214                                 drbr_advance(ifp, txr->br);
215                         else
216                                 drbr_putback(ifp, txr->br, next);
217                         break;
218                 }
219                 drbr_advance(ifp, txr->br);
220                 /* Send a copy of the frame to the BPF listener */
221                 ETHER_BPF_MTAP(ifp, next);
222                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
223                         break;
224         }
225
226         if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
227                 ixl_txeof(que);
228
229         return (err);
230 }
231
232 /*
233  * Called from a taskqueue to drain queued transmit packets.
234  */
235 void
236 ixl_deferred_mq_start(void *arg, int pending)
237 {
238         struct ixl_queue        *que = arg;
239         struct tx_ring          *txr = &que->txr;
240         struct ixl_vsi          *vsi = que->vsi;
241         struct ifnet            *ifp = vsi->ifp;
242         
243         IXL_TX_LOCK(txr);
244         if (!drbr_empty(ifp, txr->br))
245                 ixl_mq_start_locked(ifp, txr);
246         IXL_TX_UNLOCK(txr);
247 }
248
249 /*
250 ** Flush all queue ring buffers
251 */
252 void
253 ixl_qflush(struct ifnet *ifp)
254 {
255         struct ixl_vsi  *vsi = ifp->if_softc;
256
257         for (int i = 0; i < vsi->num_queues; i++) {
258                 struct ixl_queue *que = &vsi->queues[i];
259                 struct tx_ring  *txr = &que->txr;
260                 struct mbuf     *m;
261                 IXL_TX_LOCK(txr);
262                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
263                         m_freem(m);
264                 IXL_TX_UNLOCK(txr);
265         }
266         if_qflush(ifp);
267 }
268
269 static inline bool
270 ixl_tso_detect_sparse(struct mbuf *mp)
271 {
272         struct mbuf     *m;
273         int             num, mss;
274
275         num = 0;
276         mss = mp->m_pkthdr.tso_segsz;
277
278         /* Exclude first mbuf; assume it contains all headers */
279         for (m = mp->m_next; m != NULL; m = m->m_next) {
280                 if (m == NULL)
281                         break;
282                 num++;
283                 mss -= m->m_len % mp->m_pkthdr.tso_segsz;
284
285                 if (num > IXL_SPARSE_CHAIN)
286                         return (true);
287                 if (mss < 1) {
288                         num = (mss == 0) ? 0 : 1;
289                         mss += mp->m_pkthdr.tso_segsz;
290                 }
291         }
292
293         return (false);
294 }
295
296
297 /*********************************************************************
298  *
299  *  This routine maps the mbufs to tx descriptors, allowing the
300  *  TX engine to transmit the packets. 
301  *      - return 0 on success, positive on failure
302  *
303  **********************************************************************/
304 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
305
306 static int
307 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
308 {
309         struct ixl_vsi          *vsi = que->vsi;
310         struct i40e_hw          *hw = vsi->hw;
311         struct tx_ring          *txr = &que->txr;
312         struct ixl_tx_buf       *buf;
313         struct i40e_tx_desc     *txd = NULL;
314         struct mbuf             *m_head, *m;
315         int                     i, j, error, nsegs;
316         int                     first, last = 0;
317         u16                     vtag = 0;
318         u32                     cmd, off;
319         bus_dmamap_t            map;
320         bus_dma_tag_t           tag;
321         bus_dma_segment_t       segs[IXL_MAX_TSO_SEGS];
322
323         cmd = off = 0;
324         m_head = *m_headp;
325
326         /*
327          * Important to capture the first descriptor
328          * used because it will contain the index of
329          * the one we tell the hardware to report back
330          */
331         first = txr->next_avail;
332         buf = &txr->buffers[first];
333         map = buf->map;
334         tag = txr->tx_tag;
335
336         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
337                 /* Use larger mapping for TSO */
338                 tag = txr->tso_tag;
339                 if (ixl_tso_detect_sparse(m_head)) {
340                         m = m_defrag(m_head, M_NOWAIT);
341                         if (m == NULL) {
342                                 m_freem(*m_headp);
343                                 *m_headp = NULL;
344                                 return (ENOBUFS);
345                         }
346                         *m_headp = m;
347                 }
348         }
349
350         /*
351          * Map the packet for DMA.
352          */
353         error = bus_dmamap_load_mbuf_sg(tag, map,
354             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
355
356         if (error == EFBIG) {
357                 struct mbuf *m;
358
359                 m = m_defrag(*m_headp, M_NOWAIT);
360                 if (m == NULL) {
361                         que->mbuf_defrag_failed++;
362                         m_freem(*m_headp);
363                         *m_headp = NULL;
364                         return (ENOBUFS);
365                 }
366                 *m_headp = m;
367
368                 /* Try it again */
369                 error = bus_dmamap_load_mbuf_sg(tag, map,
370                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
371
372                 if (error != 0) {
373                         que->tx_dmamap_failed++;
374                         m_freem(*m_headp);
375                         *m_headp = NULL;
376                         return (error);
377                 }
378         } else if (error != 0) {
379                 que->tx_dmamap_failed++;
380                 m_freem(*m_headp);
381                 *m_headp = NULL;
382                 return (error);
383         }
384
385         /* Make certain there are enough descriptors */
386         if (nsegs > txr->avail - 2) {
387                 txr->no_desc++;
388                 error = ENOBUFS;
389                 goto xmit_fail;
390         }
391         m_head = *m_headp;
392
393         /* Set up the TSO/CSUM offload */
394         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
395                 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
396                 if (error)
397                         goto xmit_fail;
398         }
399
400         cmd |= I40E_TX_DESC_CMD_ICRC;
401         /* Grab the VLAN tag */
402         if (m_head->m_flags & M_VLANTAG) {
403                 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
404                 vtag = htole16(m_head->m_pkthdr.ether_vtag);
405         }
406
407         i = txr->next_avail;
408         for (j = 0; j < nsegs; j++) {
409                 bus_size_t seglen;
410
411                 buf = &txr->buffers[i];
412                 buf->tag = tag; /* Keep track of the type tag */
413                 txd = &txr->base[i];
414                 seglen = segs[j].ds_len;
415
416                 txd->buffer_addr = htole64(segs[j].ds_addr);
417                 txd->cmd_type_offset_bsz =
418                     htole64(I40E_TX_DESC_DTYPE_DATA
419                     | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
420                     | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
421                     | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
422                     | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
423
424                 last = i; /* descriptor that will get completion IRQ */
425
426                 if (++i == que->num_tx_desc)
427                         i = 0;
428
429                 buf->m_head = NULL;
430                 buf->eop_index = -1;
431         }
432         /* Set the last descriptor for report */
433         txd->cmd_type_offset_bsz |=
434             htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
435         txr->avail -= nsegs;
436         txr->next_avail = i;
437
438         buf->m_head = m_head;
439         /* Swap the dma map between the first and last descriptor.
440          * The descriptor that gets checked on completion will now
441          * have the real map from the first descriptor.
442          */
443         txr->buffers[first].map = buf->map;
444         buf->map = map;
445         bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
446
447         /* Set the index of the descriptor that will be marked done */
448         buf = &txr->buffers[first];
449         buf->eop_index = last;
450
451         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
452             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
453         /*
454          * Advance the Transmit Descriptor Tail (Tdt), this tells the
455          * hardware that this frame is available to transmit.
456          */
457         ++txr->total_packets;
458         wr32(hw, txr->tail, i);
459
460         /* Mark outstanding work */
461         atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
462         return (0);
463
464 xmit_fail:
465         bus_dmamap_unload(tag, buf->map);
466         return (error);
467 }
468
469
470 /*********************************************************************
471  *
472  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
473  *  the information needed to transmit a packet on the wire. This is
474  *  called only once at attach, setup is done every reset.
475  *
476  **********************************************************************/
477 int
478 ixl_allocate_tx_data(struct ixl_queue *que)
479 {
480         struct tx_ring          *txr = &que->txr;
481         struct ixl_vsi          *vsi = que->vsi;
482         device_t                dev = vsi->dev;
483         struct ixl_tx_buf       *buf;
484         int                     i, error = 0;
485
486         /*
487          * Setup DMA descriptor areas.
488          */
489         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),           /* parent */
490                                1, 0,                    /* alignment, bounds */
491                                BUS_SPACE_MAXADDR,       /* lowaddr */
492                                BUS_SPACE_MAXADDR,       /* highaddr */
493                                NULL, NULL,              /* filter, filterarg */
494                                IXL_TSO_SIZE,            /* maxsize */
495                                IXL_MAX_TX_SEGS,         /* nsegments */
496                                IXL_MAX_DMA_SEG_SIZE,    /* maxsegsize */
497                                0,                       /* flags */
498                                NULL,                    /* lockfunc */
499                                NULL,                    /* lockfuncarg */
500                                &txr->tx_tag))) {
501                 device_printf(dev,"Unable to allocate TX DMA tag\n");
502                 return (error);
503         }
504
505         /* Make a special tag for TSO */
506         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),           /* parent */
507                                1, 0,                    /* alignment, bounds */
508                                BUS_SPACE_MAXADDR,       /* lowaddr */
509                                BUS_SPACE_MAXADDR,       /* highaddr */
510                                NULL, NULL,              /* filter, filterarg */
511                                IXL_TSO_SIZE,            /* maxsize */
512                                IXL_MAX_TSO_SEGS,        /* nsegments */
513                                IXL_MAX_DMA_SEG_SIZE,    /* maxsegsize */
514                                0,                       /* flags */
515                                NULL,                    /* lockfunc */
516                                NULL,                    /* lockfuncarg */
517                                &txr->tso_tag))) {
518                 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
519                 goto free_tx_dma;
520         }
521
522         if (!(txr->buffers =
523             (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
524             que->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
525                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
526                 error = ENOMEM;
527                 goto free_tx_tso_dma;
528         }
529
530         /* Create the descriptor buffer default dma maps */
531         buf = txr->buffers;
532         for (i = 0; i < que->num_tx_desc; i++, buf++) {
533                 buf->tag = txr->tx_tag;
534                 error = bus_dmamap_create(buf->tag, 0, &buf->map);
535                 if (error != 0) {
536                         device_printf(dev, "Unable to create TX DMA map\n");
537                         goto free_buffers;
538                 }
539         }
540
541         return 0;
542
543 free_buffers:
544         while (i--) {
545                 buf--;
546                 bus_dmamap_destroy(buf->tag, buf->map);
547         }
548
549         free(txr->buffers, M_DEVBUF);
550         txr->buffers = NULL;
551 free_tx_tso_dma:
552         bus_dma_tag_destroy(txr->tso_tag);
553         txr->tso_tag = NULL;
554 free_tx_dma:
555         bus_dma_tag_destroy(txr->tx_tag);
556         txr->tx_tag = NULL;
557
558         return (error);
559 }
560
561
562 /*********************************************************************
563  *
564  *  (Re)Initialize a queue transmit ring.
565  *      - called by init, it clears the descriptor ring,
566  *        and frees any stale mbufs 
567  *
568  **********************************************************************/
569 void
570 ixl_init_tx_ring(struct ixl_queue *que)
571 {
572 #ifdef DEV_NETMAP
573         struct netmap_adapter *na = NA(que->vsi->ifp);
574         struct netmap_slot *slot;
575 #endif /* DEV_NETMAP */
576         struct tx_ring          *txr = &que->txr;
577         struct ixl_tx_buf       *buf;
578
579         /* Clear the old ring contents */
580         IXL_TX_LOCK(txr);
581
582 #ifdef DEV_NETMAP
583         /*
584          * (under lock): if in netmap mode, do some consistency
585          * checks and set slot to entry 0 of the netmap ring.
586          */
587         slot = netmap_reset(na, NR_TX, que->me, 0);
588 #endif /* DEV_NETMAP */
589
590         bzero((void *)txr->base,
591               (sizeof(struct i40e_tx_desc)) * que->num_tx_desc);
592
593         /* Reset indices */
594         txr->next_avail = 0;
595         txr->next_to_clean = 0;
596
597         /* Reset watchdog status */
598         txr->watchdog_timer = 0;
599
600         /* Free any existing tx mbufs. */
601         buf = txr->buffers;
602         for (int i = 0; i < que->num_tx_desc; i++, buf++) {
603                 if (buf->m_head != NULL) {
604                         bus_dmamap_sync(buf->tag, buf->map,
605                             BUS_DMASYNC_POSTWRITE);
606                         bus_dmamap_unload(buf->tag, buf->map);
607                         m_freem(buf->m_head);
608                         buf->m_head = NULL;
609                 }
610 #ifdef DEV_NETMAP
611                 /*
612                  * In netmap mode, set the map for the packet buffer.
613                  * NOTE: Some drivers (not this one) also need to set
614                  * the physical buffer address in the NIC ring.
615                  * netmap_idx_n2k() maps a nic index, i, into the corresponding
616                  * netmap slot index, si
617                  */
618                 if (slot) {
619                         int si = netmap_idx_n2k(na->tx_rings[que->me], i);
620                         netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
621                 }
622 #endif /* DEV_NETMAP */
623                 /* Clear the EOP index */
624                 buf->eop_index = -1;
625         }
626
627         /* Set number of descriptors available */
628         txr->avail = que->num_tx_desc;
629
630         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
631             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
632         IXL_TX_UNLOCK(txr);
633 }
634
635
636 /*********************************************************************
637  *
638  *  Free transmit ring related data structures.
639  *
640  **********************************************************************/
641 void
642 ixl_free_que_tx(struct ixl_queue *que)
643 {
644         struct tx_ring *txr = &que->txr;
645         struct ixl_tx_buf *buf;
646
647         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
648
649         for (int i = 0; i < que->num_tx_desc; i++) {
650                 buf = &txr->buffers[i];
651                 if (buf->m_head != NULL) {
652                         bus_dmamap_sync(buf->tag, buf->map,
653                             BUS_DMASYNC_POSTWRITE);
654                         m_freem(buf->m_head);
655                         buf->m_head = NULL;
656                         }
657                 bus_dmamap_unload(buf->tag, buf->map);
658                 bus_dmamap_destroy(buf->tag, buf->map);
659         }
660         if (txr->buffers != NULL) {
661                 free(txr->buffers, M_DEVBUF);
662                 txr->buffers = NULL;
663         }
664         if (txr->tx_tag != NULL) {
665                 bus_dma_tag_destroy(txr->tx_tag);
666                 txr->tx_tag = NULL;
667         }
668         if (txr->tso_tag != NULL) {
669                 bus_dma_tag_destroy(txr->tso_tag);
670                 txr->tso_tag = NULL;
671         }
672
673         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
674         return;
675 }
676
677 /*********************************************************************
678  *
679  *  Setup descriptor for hw offloads 
680  *
681  **********************************************************************/
682
683 static int
684 ixl_tx_setup_offload(struct ixl_queue *que,
685     struct mbuf *mp, u32 *cmd, u32 *off)
686 {
687         struct ether_vlan_header        *eh;
688 #ifdef INET
689         struct ip                       *ip = NULL;
690 #endif
691         struct tcphdr                   *th = NULL;
692 #ifdef INET6
693         struct ip6_hdr                  *ip6;
694 #endif
695         int                             elen, ip_hlen = 0, tcp_hlen;
696         u16                             etype;
697         u8                              ipproto = 0;
698         bool                            tso = FALSE;
699
700         /* Set up the TSO context descriptor if required */
701         if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
702                 tso = ixl_tso_setup(que, mp);
703                 if (tso)
704                         ++que->tso;
705                 else
706                         return (ENXIO);
707         }
708
709         /*
710          * Determine where frame payload starts.
711          * Jump over vlan headers if already present,
712          * helpful for QinQ too.
713          */
714         eh = mtod(mp, struct ether_vlan_header *);
715         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
716                 etype = ntohs(eh->evl_proto);
717                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
718         } else {
719                 etype = ntohs(eh->evl_encap_proto);
720                 elen = ETHER_HDR_LEN;
721         }
722
723         switch (etype) {
724 #ifdef INET
725                 case ETHERTYPE_IP:
726                         ip = (struct ip *)(mp->m_data + elen);
727                         ip_hlen = ip->ip_hl << 2;
728                         ipproto = ip->ip_p;
729                         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
730                         /* The IP checksum must be recalculated with TSO */
731                         if (tso)
732                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
733                         else
734                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
735                         break;
736 #endif
737 #ifdef INET6
738                 case ETHERTYPE_IPV6:
739                         ip6 = (struct ip6_hdr *)(mp->m_data + elen);
740                         ip_hlen = sizeof(struct ip6_hdr);
741                         ipproto = ip6->ip6_nxt;
742                         th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
743                         *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
744                         break;
745 #endif
746                 default:
747                         break;
748         }
749
750         *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
751         *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
752
753         switch (ipproto) {
754                 case IPPROTO_TCP:
755                         tcp_hlen = th->th_off << 2;
756                         if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
757                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
758                                 *off |= (tcp_hlen >> 2) <<
759                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
760                         }
761                         break;
762                 case IPPROTO_UDP:
763                         if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
764                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
765                                 *off |= (sizeof(struct udphdr) >> 2) <<
766                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
767                         }
768                         break;
769                 case IPPROTO_SCTP:
770                         if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
771                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
772                                 *off |= (sizeof(struct sctphdr) >> 2) <<
773                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
774                         }
775                         /* Fall Thru */
776                 default:
777                         break;
778         }
779
780         return (0);
781 }
782
783
784 /**********************************************************************
785  *
786  *  Setup context for hardware segmentation offload (TSO)
787  *
788  **********************************************************************/
789 static bool
790 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
791 {
792         struct tx_ring                  *txr = &que->txr;
793         struct i40e_tx_context_desc     *TXD;
794         struct ixl_tx_buf               *buf;
795         u32                             cmd, mss, type, tsolen;
796         u16                             etype;
797         int                             idx, elen, ip_hlen, tcp_hlen;
798         struct ether_vlan_header        *eh;
799 #ifdef INET
800         struct ip                       *ip;
801 #endif
802 #ifdef INET6
803         struct ip6_hdr                  *ip6;
804 #endif
805 #if defined(INET6) || defined(INET)
806         struct tcphdr                   *th;
807 #endif
808         u64                             type_cmd_tso_mss;
809
810         /*
811          * Determine where frame payload starts.
812          * Jump over vlan headers if already present
813          */
814         eh = mtod(mp, struct ether_vlan_header *);
815         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
816                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
817                 etype = eh->evl_proto;
818         } else {
819                 elen = ETHER_HDR_LEN;
820                 etype = eh->evl_encap_proto;
821         }
822
823         switch (ntohs(etype)) {
824 #ifdef INET6
825         case ETHERTYPE_IPV6:
826                 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
827                 if (ip6->ip6_nxt != IPPROTO_TCP)
828                         return (ENXIO);
829                 ip_hlen = sizeof(struct ip6_hdr);
830                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
831                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
832                 tcp_hlen = th->th_off << 2;
833                 /*
834                  * The corresponding flag is set by the stack in the IPv4
835                  * TSO case, but not in IPv6 (at least in FreeBSD 10.2).
836                  * So, set it here because the rest of the flow requires it.
837                  */
838                 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
839                 break;
840 #endif
841 #ifdef INET
842         case ETHERTYPE_IP:
843                 ip = (struct ip *)(mp->m_data + elen);
844                 if (ip->ip_p != IPPROTO_TCP)
845                         return (ENXIO);
846                 ip->ip_sum = 0;
847                 ip_hlen = ip->ip_hl << 2;
848                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
849                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
850                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
851                 tcp_hlen = th->th_off << 2;
852                 break;
853 #endif
854         default:
855                 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
856                     __func__, ntohs(etype));
857                 return FALSE;
858         }
859
860         /* Ensure we have at least the IP+TCP header in the first mbuf. */
861         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
862                 return FALSE;
863
864         idx = txr->next_avail;
865         buf = &txr->buffers[idx];
866         TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
867         tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
868
869         type = I40E_TX_DESC_DTYPE_CONTEXT;
870         cmd = I40E_TX_CTX_DESC_TSO;
871         /* TSO MSS must not be less than 64 */
872         if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) {
873                 que->mss_too_small++;
874                 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS;
875         }
876         mss = mp->m_pkthdr.tso_segsz;
877
878         type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
879             ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
880             ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
881             ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
882         TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
883
884         TXD->tunneling_params = htole32(0);
885         buf->m_head = NULL;
886         buf->eop_index = -1;
887
888         if (++idx == que->num_tx_desc)
889                 idx = 0;
890
891         txr->avail--;
892         txr->next_avail = idx;
893
894         return TRUE;
895 }
896
897 /*
898  * ixl_get_tx_head - Retrieve the value from the
899  *    location the HW records its HEAD index
900  */
901 static inline u32
902 ixl_get_tx_head(struct ixl_queue *que)
903 {
904         struct tx_ring  *txr = &que->txr;
905         void *head = &txr->base[que->num_tx_desc];
906         return LE32_TO_CPU(*(volatile __le32 *)head);
907 }
908
909 /**********************************************************************
910  *
911  * Get index of last used descriptor/buffer from hardware, and clean
912  * the descriptors/buffers up to that index.
913  *
914  **********************************************************************/
915 static bool
916 ixl_txeof_hwb(struct ixl_queue *que)
917 {
918         struct tx_ring          *txr = &que->txr;
919         u32                     first, last, head, done;
920         struct ixl_tx_buf       *buf;
921         struct i40e_tx_desc     *tx_desc, *eop_desc;
922
923         mtx_assert(&txr->mtx, MA_OWNED);
924
925 #ifdef DEV_NETMAP
926         // XXX todo: implement moderation
927         if (netmap_tx_irq(que->vsi->ifp, que->me))
928                 return FALSE;
929 #endif /* DEF_NETMAP */
930
931         /* These are not the descriptors you seek, move along :) */
932         if (txr->avail == que->num_tx_desc) {
933                 atomic_store_rel_32(&txr->watchdog_timer, 0);
934                 return FALSE;
935         }
936
937         first = txr->next_to_clean;
938         buf = &txr->buffers[first];
939         tx_desc = (struct i40e_tx_desc *)&txr->base[first];
940         last = buf->eop_index;
941         if (last == -1)
942                 return FALSE;
943         eop_desc = (struct i40e_tx_desc *)&txr->base[last];
944
945         /* Sync DMA before reading head index from ring */
946         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
947             BUS_DMASYNC_POSTREAD);
948
949         /* Get the Head WB value */
950         head = ixl_get_tx_head(que);
951
952         /*
953         ** Get the index of the first descriptor
954         ** BEYOND the EOP and call that 'done'.
955         ** I do this so the comparison in the
956         ** inner while loop below can be simple
957         */
958         if (++last == que->num_tx_desc) last = 0;
959         done = last;
960
961         /*
962         ** The HEAD index of the ring is written in a 
963         ** defined location, this rather than a done bit
964         ** is what is used to keep track of what must be
965         ** 'cleaned'.
966         */
967         while (first != head) {
968                 /* We clean the range of the packet */
969                 while (first != done) {
970                         ++txr->avail;
971
972                         if (buf->m_head) {
973                                 txr->bytes += /* for ITR adjustment */
974                                     buf->m_head->m_pkthdr.len;
975                                 txr->tx_bytes += /* for TX stats */
976                                     buf->m_head->m_pkthdr.len;
977                                 bus_dmamap_sync(buf->tag,
978                                     buf->map,
979                                     BUS_DMASYNC_POSTWRITE);
980                                 bus_dmamap_unload(buf->tag,
981                                     buf->map);
982                                 m_freem(buf->m_head);
983                                 buf->m_head = NULL;
984                         }
985                         buf->eop_index = -1;
986
987                         if (++first == que->num_tx_desc)
988                                 first = 0;
989
990                         buf = &txr->buffers[first];
991                         tx_desc = &txr->base[first];
992                 }
993                 ++txr->packets;
994                 /* If a packet was successfully cleaned, reset the watchdog timer */
995                 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
996                 /* See if there is more work now */
997                 last = buf->eop_index;
998                 if (last != -1) {
999                         eop_desc = &txr->base[last];
1000                         /* Get next done point */
1001                         if (++last == que->num_tx_desc) last = 0;
1002                         done = last;
1003                 } else
1004                         break;
1005         }
1006         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
1007             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1008
1009         txr->next_to_clean = first;
1010
1011         /*
1012          * If there are no pending descriptors, clear the timeout.
1013          */
1014         if (txr->avail == que->num_tx_desc) {
1015                 atomic_store_rel_32(&txr->watchdog_timer, 0);
1016                 return FALSE;
1017         }
1018
1019         return TRUE;
1020 }
1021
1022 /**********************************************************************
1023  *
1024  * Use index kept by driver and the flag on each descriptor to find used
1025  * descriptor/buffers and clean them up for re-use.
1026  *
1027  * This method of reclaiming descriptors is current incompatible with
1028  * DEV_NETMAP.
1029  *
1030  * Returns TRUE if there are more descriptors to be cleaned after this
1031  * function exits.
1032  *
1033  **********************************************************************/
1034 static bool
1035 ixl_txeof_dwb(struct ixl_queue *que)
1036 {
1037         struct tx_ring          *txr = &que->txr;
1038         u32                     first, last, done;
1039         u32                     limit = 256;
1040         struct ixl_tx_buf       *buf;
1041         struct i40e_tx_desc     *tx_desc, *eop_desc;
1042
1043         mtx_assert(&txr->mtx, MA_OWNED);
1044
1045         /* There are no descriptors to clean */
1046         if (txr->avail == que->num_tx_desc) {
1047                 atomic_store_rel_32(&txr->watchdog_timer, 0);
1048                 return FALSE;
1049         }
1050
1051         /* Set starting index/descriptor/buffer */
1052         first = txr->next_to_clean;
1053         buf = &txr->buffers[first];
1054         tx_desc = &txr->base[first];
1055
1056         /*
1057          * This function operates per-packet -- identifies the start of the
1058          * packet and gets the index of the last descriptor of the packet from
1059          * it, from eop_index.
1060          *
1061          * If the last descriptor is marked "done" by the hardware, then all
1062          * of the descriptors for the packet are cleaned.
1063          */
1064         last = buf->eop_index;
1065         if (last == -1)
1066                 return FALSE;
1067         eop_desc = &txr->base[last];
1068
1069         /* Sync DMA before reading from ring */
1070         bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_POSTREAD);
1071
1072         /*
1073          * Get the index of the first descriptor beyond the EOP and call that
1074          * 'done'. Simplifies the comparison for the inner loop below.
1075          */
1076         if (++last == que->num_tx_desc)
1077                 last = 0;
1078         done = last;
1079
1080         /*
1081          * We find the last completed descriptor by examining each
1082          * descriptor's status bits to see if it's done.
1083          */
1084         do {
1085                 /* Break if last descriptor in packet isn't marked done */
1086                 if ((eop_desc->cmd_type_offset_bsz & I40E_TXD_QW1_DTYPE_MASK)
1087                     != I40E_TX_DESC_DTYPE_DESC_DONE)
1088                         break;
1089
1090                 /* Clean the descriptors that make up the processed packet */
1091                 while (first != done) {
1092                         /*
1093                          * If there was a buffer attached to this descriptor,
1094                          * prevent the adapter from accessing it, and add its
1095                          * length to the queue's TX stats.
1096                          */
1097                         if (buf->m_head) {
1098                                 txr->bytes += buf->m_head->m_pkthdr.len;
1099                                 txr->tx_bytes += buf->m_head->m_pkthdr.len;
1100                                 bus_dmamap_sync(buf->tag, buf->map,
1101                                     BUS_DMASYNC_POSTWRITE);
1102                                 bus_dmamap_unload(buf->tag, buf->map);
1103                                 m_freem(buf->m_head);
1104                                 buf->m_head = NULL;
1105                         }
1106                         buf->eop_index = -1;
1107                         ++txr->avail;
1108
1109                         if (++first == que->num_tx_desc)
1110                                 first = 0;
1111                         buf = &txr->buffers[first];
1112                         tx_desc = &txr->base[first];
1113                 }
1114                 ++txr->packets;
1115                 /* If a packet was successfully cleaned, reset the watchdog timer */
1116                 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
1117
1118                 /*
1119                  * Since buf is the first buffer after the one that was just
1120                  * cleaned, check if the packet it starts is done, too.
1121                  */
1122                 last = buf->eop_index;
1123                 if (last != -1) {
1124                         eop_desc = &txr->base[last];
1125                         /* Get next done point */
1126                         if (++last == que->num_tx_desc) last = 0;
1127                         done = last;
1128                 } else
1129                         break;
1130         } while (--limit);
1131
1132         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
1133             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1134
1135         txr->next_to_clean = first;
1136
1137         /*
1138          * If there are no pending descriptors, clear the watchdog timer.
1139          */
1140         if (txr->avail == que->num_tx_desc) {
1141                 atomic_store_rel_32(&txr->watchdog_timer, 0);
1142                 return FALSE;
1143         }
1144
1145         return TRUE;
1146 }
1147
1148 bool
1149 ixl_txeof(struct ixl_queue *que)
1150 {
1151         struct ixl_vsi *vsi = que->vsi;
1152
1153         return (vsi->enable_head_writeback) ? ixl_txeof_hwb(que)
1154             : ixl_txeof_dwb(que);
1155 }
1156
1157
1158 /*********************************************************************
1159  *
1160  *  Refresh mbuf buffers for RX descriptor rings
1161  *   - now keeps its own state so discards due to resource
1162  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1163  *     it just returns, keeping its placeholder, thus it can simply
1164  *     be recalled to try again.
1165  *
1166  **********************************************************************/
1167 static void
1168 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
1169 {
1170         struct ixl_vsi          *vsi = que->vsi;
1171         struct rx_ring          *rxr = &que->rxr;
1172         bus_dma_segment_t       hseg[1];
1173         bus_dma_segment_t       pseg[1];
1174         struct ixl_rx_buf       *buf;
1175         struct mbuf             *mh, *mp;
1176         int                     i, j, nsegs, error;
1177         bool                    refreshed = FALSE;
1178
1179         i = j = rxr->next_refresh;
1180         /* Control the loop with one beyond */
1181         if (++j == que->num_rx_desc)
1182                 j = 0;
1183
1184         while (j != limit) {
1185                 buf = &rxr->buffers[i];
1186                 if (rxr->hdr_split == FALSE)
1187                         goto no_split;
1188
1189                 if (buf->m_head == NULL) {
1190                         mh = m_gethdr(M_NOWAIT, MT_DATA);
1191                         if (mh == NULL)
1192                                 goto update;
1193                 } else
1194                         mh = buf->m_head;
1195
1196                 mh->m_pkthdr.len = mh->m_len = MHLEN;
1197                 mh->m_len = MHLEN;
1198                 mh->m_flags |= M_PKTHDR;
1199                 /* Get the memory mapping */
1200                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1201                     buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1202                 if (error != 0) {
1203                         printf("Refresh mbufs: hdr dmamap load"
1204                             " failure - %d\n", error);
1205                         m_free(mh);
1206                         buf->m_head = NULL;
1207                         goto update;
1208                 }
1209                 buf->m_head = mh;
1210                 bus_dmamap_sync(rxr->htag, buf->hmap,
1211                     BUS_DMASYNC_PREREAD);
1212                 rxr->base[i].read.hdr_addr =
1213                    htole64(hseg[0].ds_addr);
1214
1215 no_split:
1216                 if (buf->m_pack == NULL) {
1217                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1218                             M_PKTHDR, rxr->mbuf_sz);
1219                         if (mp == NULL)
1220                                 goto update;
1221                 } else
1222                         mp = buf->m_pack;
1223
1224                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1225                 /* Get the memory mapping */
1226                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1227                     buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1228                 if (error != 0) {
1229                         printf("Refresh mbufs: payload dmamap load"
1230                             " failure - %d\n", error);
1231                         m_free(mp);
1232                         buf->m_pack = NULL;
1233                         goto update;
1234                 }
1235                 buf->m_pack = mp;
1236                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1237                     BUS_DMASYNC_PREREAD);
1238                 rxr->base[i].read.pkt_addr =
1239                    htole64(pseg[0].ds_addr);
1240                 /* Used only when doing header split */
1241                 rxr->base[i].read.hdr_addr = 0;
1242
1243                 refreshed = TRUE;
1244                 /* Next is precalculated */
1245                 i = j;
1246                 rxr->next_refresh = i;
1247                 if (++j == que->num_rx_desc)
1248                         j = 0;
1249         }
1250 update:
1251         if (refreshed) /* Update hardware tail index */
1252                 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1253         return;
1254 }
1255
1256
1257 /*********************************************************************
1258  *
1259  *  Allocate memory for rx_buffer structures. Since we use one
1260  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1261  *  that we'll need is equal to the number of receive descriptors
1262  *  that we've defined.
1263  *
1264  **********************************************************************/
1265 int
1266 ixl_allocate_rx_data(struct ixl_queue *que)
1267 {
1268         struct rx_ring          *rxr = &que->rxr;
1269         struct ixl_vsi          *vsi = que->vsi;
1270         device_t                dev = vsi->dev;
1271         struct ixl_rx_buf       *buf;
1272         int                     i, bsize, error;
1273
1274         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1275                                    1, 0,        /* alignment, bounds */
1276                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1277                                    BUS_SPACE_MAXADDR,   /* highaddr */
1278                                    NULL, NULL,          /* filter, filterarg */
1279                                    MSIZE,               /* maxsize */
1280                                    1,                   /* nsegments */
1281                                    MSIZE,               /* maxsegsize */
1282                                    0,                   /* flags */
1283                                    NULL,                /* lockfunc */
1284                                    NULL,                /* lockfuncarg */
1285                                    &rxr->htag))) {
1286                 device_printf(dev, "Unable to create RX DMA htag\n");
1287                 return (error);
1288         }
1289
1290         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1291                                    1, 0,        /* alignment, bounds */
1292                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1293                                    BUS_SPACE_MAXADDR,   /* highaddr */
1294                                    NULL, NULL,          /* filter, filterarg */
1295                                    MJUM16BYTES,         /* maxsize */
1296                                    1,                   /* nsegments */
1297                                    MJUM16BYTES,         /* maxsegsize */
1298                                    0,                   /* flags */
1299                                    NULL,                /* lockfunc */
1300                                    NULL,                /* lockfuncarg */
1301                                    &rxr->ptag))) {
1302                 device_printf(dev, "Unable to create RX DMA ptag\n");
1303                 goto free_rx_htag;
1304         }
1305
1306         bsize = sizeof(struct ixl_rx_buf) * que->num_rx_desc;
1307         if (!(rxr->buffers =
1308             (struct ixl_rx_buf *) malloc(bsize,
1309             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1310                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1311                 error = ENOMEM;
1312                 goto free_rx_ptag;
1313         }
1314
1315         for (i = 0; i < que->num_rx_desc; i++) {
1316                 buf = &rxr->buffers[i];
1317                 error = bus_dmamap_create(rxr->htag,
1318                     BUS_DMA_NOWAIT, &buf->hmap);
1319                 if (error) {
1320                         device_printf(dev, "Unable to create RX head map\n");
1321                         goto free_buffers;
1322                 }
1323                 error = bus_dmamap_create(rxr->ptag,
1324                     BUS_DMA_NOWAIT, &buf->pmap);
1325                 if (error) {
1326                         bus_dmamap_destroy(rxr->htag, buf->hmap);
1327                         device_printf(dev, "Unable to create RX pkt map\n");
1328                         goto free_buffers;
1329                 }
1330         }
1331
1332         return 0;
1333 free_buffers:
1334         while (i--) {
1335                 buf = &rxr->buffers[i];
1336                 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1337                 bus_dmamap_destroy(rxr->htag, buf->hmap);
1338         }
1339         free(rxr->buffers, M_DEVBUF);
1340         rxr->buffers = NULL;
1341 free_rx_ptag:
1342         bus_dma_tag_destroy(rxr->ptag);
1343         rxr->ptag = NULL;
1344 free_rx_htag:
1345         bus_dma_tag_destroy(rxr->htag);
1346         rxr->htag = NULL;
1347         return (error);
1348 }
1349
1350
1351 /*********************************************************************
1352  *
1353  *  (Re)Initialize the queue receive ring and its buffers.
1354  *
1355  **********************************************************************/
1356 int
1357 ixl_init_rx_ring(struct ixl_queue *que)
1358 {
1359         struct  rx_ring         *rxr = &que->rxr;
1360         struct ixl_vsi          *vsi = que->vsi;
1361 #if defined(INET6) || defined(INET)
1362         struct ifnet            *ifp = vsi->ifp;
1363         struct lro_ctrl         *lro = &rxr->lro;
1364 #endif
1365         struct ixl_rx_buf       *buf;
1366         bus_dma_segment_t       pseg[1], hseg[1];
1367         int                     rsize, nsegs, error = 0;
1368 #ifdef DEV_NETMAP
1369         struct netmap_adapter *na = NA(que->vsi->ifp);
1370         struct netmap_slot *slot;
1371 #endif /* DEV_NETMAP */
1372
1373         IXL_RX_LOCK(rxr);
1374 #ifdef DEV_NETMAP
1375         /* same as in ixl_init_tx_ring() */
1376         slot = netmap_reset(na, NR_RX, que->me, 0);
1377 #endif /* DEV_NETMAP */
1378         /* Clear the ring contents */
1379         rsize = roundup2(que->num_rx_desc *
1380             sizeof(union i40e_rx_desc), DBA_ALIGN);
1381         bzero((void *)rxr->base, rsize);
1382         /* Cleanup any existing buffers */
1383         for (int i = 0; i < que->num_rx_desc; i++) {
1384                 buf = &rxr->buffers[i];
1385                 if (buf->m_head != NULL) {
1386                         bus_dmamap_sync(rxr->htag, buf->hmap,
1387                             BUS_DMASYNC_POSTREAD);
1388                         bus_dmamap_unload(rxr->htag, buf->hmap);
1389                         buf->m_head->m_flags |= M_PKTHDR;
1390                         m_freem(buf->m_head);
1391                 }
1392                 if (buf->m_pack != NULL) {
1393                         bus_dmamap_sync(rxr->ptag, buf->pmap,
1394                             BUS_DMASYNC_POSTREAD);
1395                         bus_dmamap_unload(rxr->ptag, buf->pmap);
1396                         buf->m_pack->m_flags |= M_PKTHDR;
1397                         m_freem(buf->m_pack);
1398                 }
1399                 buf->m_head = NULL;
1400                 buf->m_pack = NULL;
1401         }
1402
1403         /* header split is off */
1404         rxr->hdr_split = FALSE;
1405
1406         /* Now replenish the mbufs */
1407         for (int j = 0; j != que->num_rx_desc; ++j) {
1408                 struct mbuf     *mh, *mp;
1409
1410                 buf = &rxr->buffers[j];
1411 #ifdef DEV_NETMAP
1412                 /*
1413                  * In netmap mode, fill the map and set the buffer
1414                  * address in the NIC ring, considering the offset
1415                  * between the netmap and NIC rings (see comment in
1416                  * ixgbe_setup_transmit_ring() ). No need to allocate
1417                  * an mbuf, so end the block with a continue;
1418                  */
1419                 if (slot) {
1420                         int sj = netmap_idx_n2k(na->rx_rings[que->me], j);
1421                         uint64_t paddr;
1422                         void *addr;
1423
1424                         addr = PNMB(na, slot + sj, &paddr);
1425                         netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1426                         /* Update descriptor and the cached value */
1427                         rxr->base[j].read.pkt_addr = htole64(paddr);
1428                         rxr->base[j].read.hdr_addr = 0;
1429                         continue;
1430                 }
1431 #endif /* DEV_NETMAP */
1432                 /*
1433                 ** Don't allocate mbufs if not
1434                 ** doing header split, its wasteful
1435                 */ 
1436                 if (rxr->hdr_split == FALSE)
1437                         goto skip_head;
1438
1439                 /* First the header */
1440                 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1441                 if (buf->m_head == NULL) {
1442                         error = ENOBUFS;
1443                         goto fail;
1444                 }
1445                 m_adj(buf->m_head, ETHER_ALIGN);
1446                 mh = buf->m_head;
1447                 mh->m_len = mh->m_pkthdr.len = MHLEN;
1448                 mh->m_flags |= M_PKTHDR;
1449                 /* Get the memory mapping */
1450                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1451                     buf->hmap, buf->m_head, hseg,
1452                     &nsegs, BUS_DMA_NOWAIT);
1453                 if (error != 0) /* Nothing elegant to do here */
1454                         goto fail;
1455                 bus_dmamap_sync(rxr->htag,
1456                     buf->hmap, BUS_DMASYNC_PREREAD);
1457                 /* Update descriptor */
1458                 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1459
1460 skip_head:
1461                 /* Now the payload cluster */
1462                 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1463                     M_PKTHDR, rxr->mbuf_sz);
1464                 if (buf->m_pack == NULL) {
1465                         error = ENOBUFS;
1466                         goto fail;
1467                 }
1468                 mp = buf->m_pack;
1469                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1470                 /* Get the memory mapping */
1471                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1472                     buf->pmap, mp, pseg,
1473                     &nsegs, BUS_DMA_NOWAIT);
1474                 if (error != 0)
1475                         goto fail;
1476                 bus_dmamap_sync(rxr->ptag,
1477                     buf->pmap, BUS_DMASYNC_PREREAD);
1478                 /* Update descriptor */
1479                 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1480                 rxr->base[j].read.hdr_addr = 0;
1481         }
1482
1483
1484         /* Setup our descriptor indices */
1485         rxr->next_check = 0;
1486         rxr->next_refresh = 0;
1487         rxr->lro_enabled = FALSE;
1488         rxr->split = 0;
1489         rxr->bytes = 0;
1490         rxr->discard = FALSE;
1491
1492         wr32(vsi->hw, rxr->tail, que->num_rx_desc - 1);
1493         ixl_flush(vsi->hw);
1494
1495 #if defined(INET6) || defined(INET)
1496         /*
1497         ** Now set up the LRO interface:
1498         */
1499         if (ifp->if_capenable & IFCAP_LRO) {
1500                 int err = tcp_lro_init(lro);
1501                 if (err) {
1502                         if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1503                         goto fail;
1504                 }
1505                 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1506                 rxr->lro_enabled = TRUE;
1507                 lro->ifp = vsi->ifp;
1508         }
1509 #endif
1510
1511         bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1512             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1513
1514 fail:
1515         IXL_RX_UNLOCK(rxr);
1516         return (error);
1517 }
1518
1519
1520 /*********************************************************************
1521  *
1522  *  Free station receive ring data structures
1523  *
1524  **********************************************************************/
1525 void
1526 ixl_free_que_rx(struct ixl_queue *que)
1527 {
1528         struct rx_ring          *rxr = &que->rxr;
1529         struct ixl_rx_buf       *buf;
1530
1531         /* Cleanup any existing buffers */
1532         if (rxr->buffers != NULL) {
1533                 for (int i = 0; i < que->num_rx_desc; i++) {
1534                         buf = &rxr->buffers[i];
1535
1536                         /* Free buffers and unload dma maps */
1537                         ixl_rx_discard(rxr, i);
1538
1539                         bus_dmamap_destroy(rxr->htag, buf->hmap);
1540                         bus_dmamap_destroy(rxr->ptag, buf->pmap);
1541                 }
1542                 free(rxr->buffers, M_DEVBUF);
1543                 rxr->buffers = NULL;
1544         }
1545
1546         if (rxr->htag != NULL) {
1547                 bus_dma_tag_destroy(rxr->htag);
1548                 rxr->htag = NULL;
1549         }
1550         if (rxr->ptag != NULL) {
1551                 bus_dma_tag_destroy(rxr->ptag);
1552                 rxr->ptag = NULL;
1553         }
1554 }
1555
1556 static inline void
1557 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1558 {
1559
1560 #if defined(INET6) || defined(INET)
1561         /*
1562          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1563          * should be computed by hardware. Also it should not have VLAN tag in
1564          * ethernet header.
1565          */
1566         if (rxr->lro_enabled &&
1567             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1568             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1569             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1570                 /*
1571                  * Send to the stack if:
1572                  **  - LRO not enabled, or
1573                  **  - no LRO resources, or
1574                  **  - lro enqueue fails
1575                  */
1576                 if (rxr->lro.lro_cnt != 0)
1577                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1578                                 return;
1579         }
1580 #endif
1581         (*ifp->if_input)(ifp, m);
1582 }
1583
1584
1585 static inline void
1586 ixl_rx_discard(struct rx_ring *rxr, int i)
1587 {
1588         struct ixl_rx_buf       *rbuf;
1589
1590         KASSERT(rxr != NULL, ("Receive ring pointer cannot be null"));
1591         KASSERT(i < rxr->que->num_rx_desc, ("Descriptor index must be less than que->num_desc"));
1592
1593         rbuf = &rxr->buffers[i];
1594
1595         /* Free the mbufs in the current chain for the packet */
1596         if (rbuf->fmp != NULL) {
1597                 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1598                 m_freem(rbuf->fmp);
1599                 rbuf->fmp = NULL;
1600         }
1601
1602         /*
1603          * Free the mbufs for the current descriptor; and let ixl_refresh_mbufs()
1604          * assign new mbufs to these.
1605          */
1606         if (rbuf->m_head) {
1607                 bus_dmamap_sync(rxr->htag, rbuf->hmap, BUS_DMASYNC_POSTREAD);
1608                 bus_dmamap_unload(rxr->htag, rbuf->hmap);
1609                 m_free(rbuf->m_head);
1610                 rbuf->m_head = NULL;
1611         }
1612  
1613         if (rbuf->m_pack) {
1614                 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1615                 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1616                 m_free(rbuf->m_pack);
1617                 rbuf->m_pack = NULL;
1618         }
1619 }
1620
1621 #ifdef RSS
1622 /*
1623 ** i40e_ptype_to_hash: parse the packet type
1624 ** to determine the appropriate hash.
1625 */
1626 static inline int
1627 ixl_ptype_to_hash(u8 ptype)
1628 {
1629         struct i40e_rx_ptype_decoded    decoded;
1630
1631         decoded = decode_rx_desc_ptype(ptype);
1632
1633         if (!decoded.known)
1634                 return M_HASHTYPE_OPAQUE_HASH;
1635
1636         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
1637                 return M_HASHTYPE_OPAQUE_HASH;
1638
1639         /* Note: anything that gets to this point is IP */
1640         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
1641                 switch (decoded.inner_prot) {
1642                 case I40E_RX_PTYPE_INNER_PROT_TCP:
1643                         return M_HASHTYPE_RSS_TCP_IPV6;
1644                 case I40E_RX_PTYPE_INNER_PROT_UDP:
1645                         return M_HASHTYPE_RSS_UDP_IPV6;
1646                 default:
1647                         return M_HASHTYPE_RSS_IPV6;
1648                 }
1649         }
1650         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
1651                 switch (decoded.inner_prot) {
1652                 case I40E_RX_PTYPE_INNER_PROT_TCP:
1653                         return M_HASHTYPE_RSS_TCP_IPV4;
1654                 case I40E_RX_PTYPE_INNER_PROT_UDP:
1655                         return M_HASHTYPE_RSS_UDP_IPV4;
1656                 default:
1657                         return M_HASHTYPE_RSS_IPV4;
1658                 }
1659         }
1660         /* We should never get here!! */
1661         return M_HASHTYPE_OPAQUE_HASH;
1662 }
1663 #endif /* RSS */
1664
1665 /*********************************************************************
1666  *
1667  *  This routine executes in interrupt context. It replenishes
1668  *  the mbufs in the descriptor and sends data which has been
1669  *  dma'ed into host memory to upper layer.
1670  *
1671  *  We loop at most count times if count is > 0, or until done if
1672  *  count < 0.
1673  *
1674  *  Return TRUE for more work, FALSE for all clean.
1675  *********************************************************************/
1676 bool
1677 ixl_rxeof(struct ixl_queue *que, int count)
1678 {
1679         struct ixl_vsi          *vsi = que->vsi;
1680         struct rx_ring          *rxr = &que->rxr;
1681         struct ifnet            *ifp = vsi->ifp;
1682 #if defined(INET6) || defined(INET)
1683         struct lro_ctrl         *lro = &rxr->lro;
1684 #endif
1685         int                     i, nextp, processed = 0;
1686         union i40e_rx_desc      *cur;
1687         struct ixl_rx_buf       *rbuf, *nbuf;
1688
1689         IXL_RX_LOCK(rxr);
1690
1691 #ifdef DEV_NETMAP
1692         if (netmap_rx_irq(ifp, que->me, &count)) {
1693                 IXL_RX_UNLOCK(rxr);
1694                 return (FALSE);
1695         }
1696 #endif /* DEV_NETMAP */
1697
1698         for (i = rxr->next_check; count != 0;) {
1699                 struct mbuf     *sendmp, *mh, *mp;
1700                 u32             status, error;
1701                 u16             hlen, plen, vtag;
1702                 u64             qword;
1703                 u8              ptype;
1704                 bool            eop;
1705  
1706                 /* Sync the ring. */
1707                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1708                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1709
1710                 cur = &rxr->base[i];
1711                 qword = le64toh(cur->wb.qword1.status_error_len);
1712                 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1713                     >> I40E_RXD_QW1_STATUS_SHIFT;
1714                 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1715                     >> I40E_RXD_QW1_ERROR_SHIFT;
1716                 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1717                     >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1718                 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1719                     >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1720                 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1721                     >> I40E_RXD_QW1_PTYPE_SHIFT;
1722
1723                 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1724                         ++rxr->not_done;
1725                         break;
1726                 }
1727                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1728                         break;
1729
1730                 count--;
1731                 sendmp = NULL;
1732                 nbuf = NULL;
1733                 cur->wb.qword1.status_error_len = 0;
1734                 rbuf = &rxr->buffers[i];
1735                 mh = rbuf->m_head;
1736                 mp = rbuf->m_pack;
1737                 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1738                 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1739                         vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1740                 else
1741                         vtag = 0;
1742
1743                 /* Remove device access to the rx buffers. */
1744                 if (rbuf->m_head != NULL) {
1745                         bus_dmamap_sync(rxr->htag, rbuf->hmap,
1746                             BUS_DMASYNC_POSTREAD);
1747                         bus_dmamap_unload(rxr->htag, rbuf->hmap);
1748                 }
1749                 if (rbuf->m_pack != NULL) {
1750                         bus_dmamap_sync(rxr->ptag, rbuf->pmap,
1751                             BUS_DMASYNC_POSTREAD);
1752                         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1753                 }
1754
1755                 /*
1756                 ** Make sure bad packets are discarded,
1757                 ** note that only EOP descriptor has valid
1758                 ** error results.
1759                 */
1760                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1761                         rxr->desc_errs++;
1762                         ixl_rx_discard(rxr, i);
1763                         goto next_desc;
1764                 }
1765
1766                 /* Prefetch the next buffer */
1767                 if (!eop) {
1768                         nextp = i + 1;
1769                         if (nextp == que->num_rx_desc)
1770                                 nextp = 0;
1771                         nbuf = &rxr->buffers[nextp];
1772                         prefetch(nbuf);
1773                 }
1774
1775                 /*
1776                 ** The header mbuf is ONLY used when header 
1777                 ** split is enabled, otherwise we get normal 
1778                 ** behavior, ie, both header and payload
1779                 ** are DMA'd into the payload buffer.
1780                 **
1781                 ** Rather than using the fmp/lmp global pointers
1782                 ** we now keep the head of a packet chain in the
1783                 ** buffer struct and pass this along from one
1784                 ** descriptor to the next, until we get EOP.
1785                 */
1786                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1787                         if (hlen > IXL_RX_HDR)
1788                                 hlen = IXL_RX_HDR;
1789                         mh->m_len = hlen;
1790                         mh->m_flags |= M_PKTHDR;
1791                         mh->m_next = NULL;
1792                         mh->m_pkthdr.len = mh->m_len;
1793                         /* Null buf pointer so it is refreshed */
1794                         rbuf->m_head = NULL;
1795                         /*
1796                         ** Check the payload length, this
1797                         ** could be zero if its a small
1798                         ** packet.
1799                         */
1800                         if (plen > 0) {
1801                                 mp->m_len = plen;
1802                                 mp->m_next = NULL;
1803                                 mp->m_flags &= ~M_PKTHDR;
1804                                 mh->m_next = mp;
1805                                 mh->m_pkthdr.len += mp->m_len;
1806                                 /* Null buf pointer so it is refreshed */
1807                                 rbuf->m_pack = NULL;
1808                                 rxr->split++;
1809                         }
1810                         /*
1811                         ** Now create the forward
1812                         ** chain so when complete 
1813                         ** we wont have to.
1814                         */
1815                         if (eop == 0) {
1816                                 /* stash the chain head */
1817                                 nbuf->fmp = mh;
1818                                 /* Make forward chain */
1819                                 if (plen)
1820                                         mp->m_next = nbuf->m_pack;
1821                                 else
1822                                         mh->m_next = nbuf->m_pack;
1823                         } else {
1824                                 /* Singlet, prepare to send */
1825                                 sendmp = mh;
1826                                 if (vtag) {
1827                                         sendmp->m_pkthdr.ether_vtag = vtag;
1828                                         sendmp->m_flags |= M_VLANTAG;
1829                                 }
1830                         }
1831                 } else {
1832                         /*
1833                         ** Either no header split, or a
1834                         ** secondary piece of a fragmented
1835                         ** split packet.
1836                         */
1837                         mp->m_len = plen;
1838                         /*
1839                         ** See if there is a stored head
1840                         ** that determines what we are
1841                         */
1842                         sendmp = rbuf->fmp;
1843                         rbuf->m_pack = rbuf->fmp = NULL;
1844
1845                         if (sendmp != NULL) /* secondary frag */
1846                                 sendmp->m_pkthdr.len += mp->m_len;
1847                         else {
1848                                 /* first desc of a non-ps chain */
1849                                 sendmp = mp;
1850                                 sendmp->m_flags |= M_PKTHDR;
1851                                 sendmp->m_pkthdr.len = mp->m_len;
1852                         }
1853                         /* Pass the head pointer on */
1854                         if (eop == 0) {
1855                                 nbuf->fmp = sendmp;
1856                                 sendmp = NULL;
1857                                 mp->m_next = nbuf->m_pack;
1858                         }
1859                 }
1860                 ++processed;
1861                 /* Sending this frame? */
1862                 if (eop) {
1863                         sendmp->m_pkthdr.rcvif = ifp;
1864                         /* gather stats */
1865                         rxr->rx_packets++;
1866                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1867                         /* capture data for dynamic ITR adjustment */
1868                         rxr->packets++;
1869                         rxr->bytes += sendmp->m_pkthdr.len;
1870                         /* Set VLAN tag (field only valid in eop desc) */
1871                         if (vtag) {
1872                                 sendmp->m_pkthdr.ether_vtag = vtag;
1873                                 sendmp->m_flags |= M_VLANTAG;
1874                         }
1875                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1876                                 ixl_rx_checksum(sendmp, status, error, ptype);
1877 #ifdef RSS
1878                         sendmp->m_pkthdr.flowid =
1879                             le32toh(cur->wb.qword0.hi_dword.rss);
1880                         M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1881 #else
1882                         sendmp->m_pkthdr.flowid = que->msix;
1883                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1884 #endif
1885                 }
1886 next_desc:
1887                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1888                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1889
1890                 /* Advance our pointers to the next descriptor. */
1891                 if (++i == que->num_rx_desc)
1892                         i = 0;
1893
1894                 /* Now send to the stack or do LRO */
1895                 if (sendmp != NULL) {
1896                         rxr->next_check = i;
1897                         IXL_RX_UNLOCK(rxr);
1898                         ixl_rx_input(rxr, ifp, sendmp, ptype);
1899                         IXL_RX_LOCK(rxr);
1900                         /*
1901                          * Update index used in loop in case another
1902                          * ixl_rxeof() call executes when lock is released
1903                          */
1904                         i = rxr->next_check;
1905                 }
1906
1907                 /* Every 8 descriptors we go to refresh mbufs */
1908                 if (processed == 8) {
1909                         ixl_refresh_mbufs(que, i);
1910                         processed = 0;
1911                 }
1912         }
1913
1914         /* Refresh any remaining buf structs */
1915         if (ixl_rx_unrefreshed(que))
1916                 ixl_refresh_mbufs(que, i);
1917
1918         rxr->next_check = i;
1919
1920         IXL_RX_UNLOCK(rxr);
1921
1922 #if defined(INET6) || defined(INET)
1923         /*
1924          * Flush any outstanding LRO work
1925          */
1926 #if __FreeBSD_version >= 1100105
1927         tcp_lro_flush_all(lro);
1928 #else
1929         struct lro_entry *queued;
1930         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1931                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1932                 tcp_lro_flush(lro, queued);
1933         }
1934 #endif
1935 #endif /* defined(INET6) || defined(INET) */
1936
1937         return (FALSE);
1938 }
1939
1940
1941 /*********************************************************************
1942  *
1943  *  Verify that the hardware indicated that the checksum is valid.
1944  *  Inform the stack about the status of checksum so that stack
1945  *  doesn't spend time verifying the checksum.
1946  *
1947  *********************************************************************/
1948 static void
1949 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1950 {
1951         struct i40e_rx_ptype_decoded decoded;
1952
1953         decoded = decode_rx_desc_ptype(ptype);
1954
1955         /* Errors? */
1956         if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1957             (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1958                 mp->m_pkthdr.csum_flags = 0;
1959                 return;
1960         }
1961
1962         /* IPv6 with extension headers likely have bad csum */
1963         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1964             decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1965                 if (status &
1966                     (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1967                         mp->m_pkthdr.csum_flags = 0;
1968                         return;
1969                 }
1970
1971  
1972         /* IP Checksum Good */
1973         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1974         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1975
1976         if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1977                 mp->m_pkthdr.csum_flags |= 
1978                     (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1979                 mp->m_pkthdr.csum_data |= htons(0xffff);
1980         }
1981         return;
1982 }
1983
1984 #if __FreeBSD_version >= 1100000
1985 uint64_t
1986 ixl_get_counter(if_t ifp, ift_counter cnt)
1987 {
1988         struct ixl_vsi *vsi;
1989
1990         vsi = if_getsoftc(ifp);
1991
1992         switch (cnt) {
1993         case IFCOUNTER_IPACKETS:
1994                 return (vsi->ipackets);
1995         case IFCOUNTER_IERRORS:
1996                 return (vsi->ierrors);
1997         case IFCOUNTER_OPACKETS:
1998                 return (vsi->opackets);
1999         case IFCOUNTER_OERRORS:
2000                 return (vsi->oerrors);
2001         case IFCOUNTER_COLLISIONS:
2002                 /* Collisions are by standard impossible in 40G/10G Ethernet */
2003                 return (0);
2004         case IFCOUNTER_IBYTES:
2005                 return (vsi->ibytes);
2006         case IFCOUNTER_OBYTES:
2007                 return (vsi->obytes);
2008         case IFCOUNTER_IMCASTS:
2009                 return (vsi->imcasts);
2010         case IFCOUNTER_OMCASTS:
2011                 return (vsi->omcasts);
2012         case IFCOUNTER_IQDROPS:
2013                 return (vsi->iqdrops);
2014         case IFCOUNTER_OQDROPS:
2015                 return (vsi->oqdrops);
2016         case IFCOUNTER_NOPROTO:
2017                 return (vsi->noproto);
2018         default:
2019                 return (if_get_counter_default(ifp, cnt));
2020         }
2021 }
2022 #endif
2023
2024 /*
2025  * Set TX and RX ring size adjusting value to supported range
2026  */
2027 void
2028 ixl_vsi_setup_rings_size(struct ixl_vsi * vsi, int tx_ring_size, int rx_ring_size)
2029 {
2030         struct device * dev = vsi->dev;
2031
2032         if (tx_ring_size < IXL_MIN_RING
2033              || tx_ring_size > IXL_MAX_RING
2034              || tx_ring_size % IXL_RING_INCREMENT != 0) {
2035                 device_printf(dev, "Invalid tx_ring_size value of %d set!\n",
2036                     tx_ring_size);
2037                 device_printf(dev, "tx_ring_size must be between %d and %d, "
2038                     "inclusive, and must be a multiple of %d\n",
2039                     IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT);
2040                 device_printf(dev, "Using default value of %d instead\n",
2041                     IXL_DEFAULT_RING);
2042                 vsi->num_tx_desc = IXL_DEFAULT_RING;
2043         } else
2044                 vsi->num_tx_desc = tx_ring_size;
2045
2046         if (rx_ring_size < IXL_MIN_RING
2047              || rx_ring_size > IXL_MAX_RING
2048              || rx_ring_size % IXL_RING_INCREMENT != 0) {
2049                 device_printf(dev, "Invalid rx_ring_size value of %d set!\n",
2050                     rx_ring_size);
2051                 device_printf(dev, "rx_ring_size must be between %d and %d, "
2052                     "inclusive, and must be a multiple of %d\n",
2053                     IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT);
2054                 device_printf(dev, "Using default value of %d instead\n",
2055                     IXL_DEFAULT_RING);
2056                 vsi->num_rx_desc = IXL_DEFAULT_RING;
2057         } else
2058                 vsi->num_rx_desc = rx_ring_size;
2059
2060         device_printf(dev, "using %d tx descriptors and %d rx descriptors\n",
2061                 vsi->num_tx_desc, vsi->num_rx_desc);
2062
2063 }
2064
2065 static void
2066 ixl_queue_sw_irq(struct ixl_vsi *vsi, int qidx)
2067 {
2068         struct i40e_hw *hw = vsi->hw;
2069         u32     reg, mask;
2070
2071         if ((vsi->flags & IXL_FLAGS_IS_VF) != 0) {
2072                 mask = (I40E_VFINT_DYN_CTLN1_INTENA_MASK |
2073                         I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK |
2074                         I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK);
2075
2076                 reg = I40E_VFINT_DYN_CTLN1(qidx);
2077         } else {
2078                 mask = (I40E_PFINT_DYN_CTLN_INTENA_MASK |
2079                                 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
2080                                 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK);
2081
2082                 reg = ((vsi->flags & IXL_FLAGS_USES_MSIX) != 0) ?
2083                         I40E_PFINT_DYN_CTLN(qidx) : I40E_PFINT_DYN_CTL0;
2084         }
2085
2086         wr32(hw, reg, mask);
2087 }
2088
2089 int
2090 ixl_queue_hang_check(struct ixl_vsi *vsi)
2091 {
2092         struct ixl_queue *que = vsi->queues;
2093         device_t dev = vsi->dev;
2094         struct tx_ring *txr;
2095         s32 timer, new_timer;
2096         int hung = 0;
2097
2098         for (int i = 0; i < vsi->num_queues; i++, que++) {
2099                 txr = &que->txr;
2100                 /*
2101                  * If watchdog_timer is equal to defualt value set by ixl_txeof
2102                  * just substract hz and move on - the queue is most probably
2103                  * running. Otherwise check the value.
2104                  */
2105                 if (atomic_cmpset_rel_32(&txr->watchdog_timer,
2106                                         IXL_WATCHDOG, (IXL_WATCHDOG) - hz) == 0) {
2107                         timer = atomic_load_acq_32(&txr->watchdog_timer);
2108                         /*
2109                          * Again - if the timer was reset to default value
2110                          * then queue is running. Otherwise check if watchdog
2111                          * expired and act accrdingly.
2112                          */
2113
2114                         if (timer > 0 && timer != IXL_WATCHDOG) {
2115                                 new_timer = timer - hz;
2116                                 if (new_timer <= 0) {
2117                                         atomic_store_rel_32(&txr->watchdog_timer, -1);
2118                                         device_printf(dev, "WARNING: queue %d "
2119                                                         "appears to be hung!\n", que->me);
2120                                         ++hung;
2121                                         /* Try to unblock the queue with SW IRQ */
2122                                         ixl_queue_sw_irq(vsi, i);
2123                                 } else {
2124                                         /*
2125                                          * If this fails, that means something in the TX path
2126                                          * has updated the watchdog, so it means the TX path
2127                                          * is still working and the watchdog doesn't need
2128                                          * to countdown.
2129                                          */
2130                                         atomic_cmpset_rel_32(&txr->watchdog_timer,
2131                                                         timer, new_timer);
2132                                 }
2133                         }
2134                 }
2135         }
2136
2137         return (hung);
2138 }
2139