]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixl/ixl_txrx.c
Merge lld trunk r300422 and resolve conflicts.
[FreeBSD/FreeBSD.git] / sys / dev / ixl / ixl_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2013-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 **      IXL driver TX/RX Routines:
37 **          This was seperated to allow usage by
38 **          both the PF and VF drivers.
39 */
40
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_rss.h"
45 #endif
46
47 #include "ixl.h"
48
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52
53 /* Local Prototypes */
54 static void     ixl_rx_checksum(struct mbuf *, u32, u32, u8);
55 static void     ixl_refresh_mbufs(struct ixl_queue *, int);
56 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
57 static int      ixl_tx_setup_offload(struct ixl_queue *,
58                     struct mbuf *, u32 *, u32 *);
59 static bool     ixl_tso_setup(struct ixl_queue *, struct mbuf *);
60
61 static inline void ixl_rx_discard(struct rx_ring *, int);
62 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
63                     struct mbuf *, u8);
64
65 static inline bool ixl_tso_detect_sparse(struct mbuf *mp);
66 static int      ixl_tx_setup_offload(struct ixl_queue *que,
67     struct mbuf *mp, u32 *cmd, u32 *off);
68 static inline u32 ixl_get_tx_head(struct ixl_queue *que);
69
70 #ifdef DEV_NETMAP
71 #include <dev/netmap/if_ixl_netmap.h>
72 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1;
73 #endif /* DEV_NETMAP */
74
75 /*
76  * @key key is saved into this parameter
77  */
78 void
79 ixl_get_default_rss_key(u32 *key)
80 {
81         MPASS(key != NULL);
82
83         u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
84             0x183cfd8c, 0xce880440, 0x580cbc3c,
85             0x35897377, 0x328b25e1, 0x4fa98922,
86             0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
87             0x0, 0x0, 0x0};
88
89         bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
90 }
91
92 /*
93 ** Multiqueue Transmit driver
94 */
95 int
96 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
97 {
98         struct ixl_vsi          *vsi = ifp->if_softc;
99         struct ixl_queue        *que;
100         struct tx_ring          *txr;
101         int                     err, i;
102 #ifdef RSS
103         u32                     bucket_id;
104 #endif
105
106         /*
107         ** Which queue to use:
108         **
109         ** When doing RSS, map it to the same outbound
110         ** queue as the incoming flow would be mapped to.
111         ** If everything is setup correctly, it should be
112         ** the same bucket that the current CPU we're on is.
113         */
114         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
115 #ifdef  RSS
116                 if (rss_hash2bucket(m->m_pkthdr.flowid,
117                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
118                         i = bucket_id % vsi->num_queues;
119                 } else
120 #endif
121                         i = m->m_pkthdr.flowid % vsi->num_queues;
122         } else
123                 i = curcpu % vsi->num_queues;
124
125         que = &vsi->queues[i];
126         txr = &que->txr;
127
128         err = drbr_enqueue(ifp, txr->br, m);
129         if (err)
130                 return (err);
131         if (IXL_TX_TRYLOCK(txr)) {
132                 ixl_mq_start_locked(ifp, txr);
133                 IXL_TX_UNLOCK(txr);
134         } else
135                 taskqueue_enqueue(que->tq, &que->tx_task);
136
137         return (0);
138 }
139
140 int
141 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
142 {
143         struct ixl_queue        *que = txr->que;
144         struct ixl_vsi          *vsi = que->vsi;
145         struct mbuf             *next;
146         int                     err = 0;
147
148
149         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
150             vsi->link_active == 0)
151                 return (ENETDOWN);
152
153         /* Process the transmit queue */
154         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
155                 if ((err = ixl_xmit(que, &next)) != 0) {
156                         if (next == NULL)
157                                 drbr_advance(ifp, txr->br);
158                         else
159                                 drbr_putback(ifp, txr->br, next);
160                         break;
161                 }
162                 drbr_advance(ifp, txr->br);
163                 /* Send a copy of the frame to the BPF listener */
164                 ETHER_BPF_MTAP(ifp, next);
165                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
166                         break;
167         }
168
169         if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
170                 ixl_txeof(que);
171
172         return (err);
173 }
174
175 /*
176  * Called from a taskqueue to drain queued transmit packets.
177  */
178 void
179 ixl_deferred_mq_start(void *arg, int pending)
180 {
181         struct ixl_queue        *que = arg;
182         struct tx_ring          *txr = &que->txr;
183         struct ixl_vsi          *vsi = que->vsi;
184         struct ifnet            *ifp = vsi->ifp;
185         
186         IXL_TX_LOCK(txr);
187         if (!drbr_empty(ifp, txr->br))
188                 ixl_mq_start_locked(ifp, txr);
189         IXL_TX_UNLOCK(txr);
190 }
191
192 /*
193 ** Flush all queue ring buffers
194 */
195 void
196 ixl_qflush(struct ifnet *ifp)
197 {
198         struct ixl_vsi  *vsi = ifp->if_softc;
199
200         for (int i = 0; i < vsi->num_queues; i++) {
201                 struct ixl_queue *que = &vsi->queues[i];
202                 struct tx_ring  *txr = &que->txr;
203                 struct mbuf     *m;
204                 IXL_TX_LOCK(txr);
205                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
206                         m_freem(m);
207                 IXL_TX_UNLOCK(txr);
208         }
209         if_qflush(ifp);
210 }
211
212 /*
213 ** Find mbuf chains passed to the driver 
214 ** that are 'sparse', using more than 8
215 ** mbufs to deliver an mss-size chunk of data
216 */
217 static inline bool
218 ixl_tso_detect_sparse(struct mbuf *mp)
219 {
220         struct mbuf     *m;
221         int             num, mss;
222
223         num = 0;
224         mss = mp->m_pkthdr.tso_segsz;
225
226         /* Exclude first mbuf; assume it contains all headers */
227         for (m = mp->m_next; m != NULL; m = m->m_next) {
228                 if (m == NULL)
229                         break;
230                 num++;
231                 mss -= m->m_len % mp->m_pkthdr.tso_segsz;
232
233                 if (mss < 1) {
234                         if (num > IXL_SPARSE_CHAIN)
235                                 return (true);
236                         num = (mss == 0) ? 0 : 1;
237                         mss += mp->m_pkthdr.tso_segsz;
238                 }
239         }
240
241         return (false);
242 }
243
244
245 /*********************************************************************
246  *
247  *  This routine maps the mbufs to tx descriptors, allowing the
248  *  TX engine to transmit the packets. 
249  *      - return 0 on success, positive on failure
250  *
251  **********************************************************************/
252 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
253
254 static int
255 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
256 {
257         struct ixl_vsi          *vsi = que->vsi;
258         struct i40e_hw          *hw = vsi->hw;
259         struct tx_ring          *txr = &que->txr;
260         struct ixl_tx_buf       *buf;
261         struct i40e_tx_desc     *txd = NULL;
262         struct mbuf             *m_head, *m;
263         int                     i, j, error, nsegs;
264         int                     first, last = 0;
265         u16                     vtag = 0;
266         u32                     cmd, off;
267         bus_dmamap_t            map;
268         bus_dma_tag_t           tag;
269         bus_dma_segment_t       segs[IXL_MAX_TSO_SEGS];
270
271         cmd = off = 0;
272         m_head = *m_headp;
273
274         /*
275          * Important to capture the first descriptor
276          * used because it will contain the index of
277          * the one we tell the hardware to report back
278          */
279         first = txr->next_avail;
280         buf = &txr->buffers[first];
281         map = buf->map;
282         tag = txr->tx_tag;
283
284         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
285                 /* Use larger mapping for TSO */
286                 tag = txr->tso_tag;
287                 if (ixl_tso_detect_sparse(m_head)) {
288                         m = m_defrag(m_head, M_NOWAIT);
289                         if (m == NULL) {
290                                 m_freem(*m_headp);
291                                 *m_headp = NULL;
292                                 return (ENOBUFS);
293                         }
294                         *m_headp = m;
295                 }
296         }
297
298         /*
299          * Map the packet for DMA.
300          */
301         error = bus_dmamap_load_mbuf_sg(tag, map,
302             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
303
304         if (error == EFBIG) {
305                 struct mbuf *m;
306
307                 m = m_defrag(*m_headp, M_NOWAIT);
308                 if (m == NULL) {
309                         que->mbuf_defrag_failed++;
310                         m_freem(*m_headp);
311                         *m_headp = NULL;
312                         return (ENOBUFS);
313                 }
314                 *m_headp = m;
315
316                 /* Try it again */
317                 error = bus_dmamap_load_mbuf_sg(tag, map,
318                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
319
320                 if (error != 0) {
321                         que->tx_dmamap_failed++;
322                         m_freem(*m_headp);
323                         *m_headp = NULL;
324                         return (error);
325                 }
326         } else if (error != 0) {
327                 que->tx_dmamap_failed++;
328                 m_freem(*m_headp);
329                 *m_headp = NULL;
330                 return (error);
331         }
332
333         /* Make certain there are enough descriptors */
334         if (nsegs > txr->avail - 2) {
335                 txr->no_desc++;
336                 error = ENOBUFS;
337                 goto xmit_fail;
338         }
339         m_head = *m_headp;
340
341         /* Set up the TSO/CSUM offload */
342         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
343                 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
344                 if (error)
345                         goto xmit_fail;
346         }
347
348         cmd |= I40E_TX_DESC_CMD_ICRC;
349         /* Grab the VLAN tag */
350         if (m_head->m_flags & M_VLANTAG) {
351                 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
352                 vtag = htole16(m_head->m_pkthdr.ether_vtag);
353         }
354
355         i = txr->next_avail;
356         for (j = 0; j < nsegs; j++) {
357                 bus_size_t seglen;
358
359                 buf = &txr->buffers[i];
360                 buf->tag = tag; /* Keep track of the type tag */
361                 txd = &txr->base[i];
362                 seglen = segs[j].ds_len;
363
364                 txd->buffer_addr = htole64(segs[j].ds_addr);
365                 txd->cmd_type_offset_bsz =
366                     htole64(I40E_TX_DESC_DTYPE_DATA
367                     | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
368                     | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
369                     | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
370                     | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
371
372                 last = i; /* descriptor that will get completion IRQ */
373
374                 if (++i == que->num_desc)
375                         i = 0;
376
377                 buf->m_head = NULL;
378                 buf->eop_index = -1;
379         }
380         /* Set the last descriptor for report */
381         txd->cmd_type_offset_bsz |=
382             htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
383         txr->avail -= nsegs;
384         txr->next_avail = i;
385
386         buf->m_head = m_head;
387         /* Swap the dma map between the first and last descriptor */
388         txr->buffers[first].map = buf->map;
389         buf->map = map;
390         bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
391
392         /* Set the index of the descriptor that will be marked done */
393         buf = &txr->buffers[first];
394         buf->eop_index = last;
395
396         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
397             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
398         /*
399          * Advance the Transmit Descriptor Tail (Tdt), this tells the
400          * hardware that this frame is available to transmit.
401          */
402         ++txr->total_packets;
403         wr32(hw, txr->tail, i);
404
405         /* Mark outstanding work */
406         atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
407         return (0);
408
409 xmit_fail:
410         bus_dmamap_unload(tag, buf->map);
411         return (error);
412 }
413
414
415 /*********************************************************************
416  *
417  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
418  *  the information needed to transmit a packet on the wire. This is
419  *  called only once at attach, setup is done every reset.
420  *
421  **********************************************************************/
422 int
423 ixl_allocate_tx_data(struct ixl_queue *que)
424 {
425         struct tx_ring          *txr = &que->txr;
426         struct ixl_vsi          *vsi = que->vsi;
427         device_t                dev = vsi->dev;
428         struct ixl_tx_buf       *buf;
429         int                     error = 0;
430
431         /*
432          * Setup DMA descriptor areas.
433          */
434         if ((error = bus_dma_tag_create(NULL,           /* parent */
435                                1, 0,                    /* alignment, bounds */
436                                BUS_SPACE_MAXADDR,       /* lowaddr */
437                                BUS_SPACE_MAXADDR,       /* highaddr */
438                                NULL, NULL,              /* filter, filterarg */
439                                IXL_TSO_SIZE,            /* maxsize */
440                                IXL_MAX_TX_SEGS,         /* nsegments */
441                                PAGE_SIZE,               /* maxsegsize */
442                                0,                       /* flags */
443                                NULL,                    /* lockfunc */
444                                NULL,                    /* lockfuncarg */
445                                &txr->tx_tag))) {
446                 device_printf(dev,"Unable to allocate TX DMA tag\n");
447                 goto fail;
448         }
449
450         /* Make a special tag for TSO */
451         if ((error = bus_dma_tag_create(NULL,           /* parent */
452                                1, 0,                    /* alignment, bounds */
453                                BUS_SPACE_MAXADDR,       /* lowaddr */
454                                BUS_SPACE_MAXADDR,       /* highaddr */
455                                NULL, NULL,              /* filter, filterarg */
456                                IXL_TSO_SIZE,            /* maxsize */
457                                IXL_MAX_TSO_SEGS,        /* nsegments */
458                                PAGE_SIZE,               /* maxsegsize */
459                                0,                       /* flags */
460                                NULL,                    /* lockfunc */
461                                NULL,                    /* lockfuncarg */
462                                &txr->tso_tag))) {
463                 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
464                 goto fail;
465         }
466
467         if (!(txr->buffers =
468             (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
469             que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
470                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
471                 error = ENOMEM;
472                 goto fail;
473         }
474
475         /* Create the descriptor buffer default dma maps */
476         buf = txr->buffers;
477         for (int i = 0; i < que->num_desc; i++, buf++) {
478                 buf->tag = txr->tx_tag;
479                 error = bus_dmamap_create(buf->tag, 0, &buf->map);
480                 if (error != 0) {
481                         device_printf(dev, "Unable to create TX DMA map\n");
482                         goto fail;
483                 }
484         }
485 fail:
486         return (error);
487 }
488
489
490 /*********************************************************************
491  *
492  *  (Re)Initialize a queue transmit ring.
493  *      - called by init, it clears the descriptor ring,
494  *        and frees any stale mbufs 
495  *
496  **********************************************************************/
497 void
498 ixl_init_tx_ring(struct ixl_queue *que)
499 {
500 #ifdef DEV_NETMAP
501         struct netmap_adapter *na = NA(que->vsi->ifp);
502         struct netmap_slot *slot;
503 #endif /* DEV_NETMAP */
504         struct tx_ring          *txr = &que->txr;
505         struct ixl_tx_buf       *buf;
506
507         /* Clear the old ring contents */
508         IXL_TX_LOCK(txr);
509
510 #ifdef DEV_NETMAP
511         /*
512          * (under lock): if in netmap mode, do some consistency
513          * checks and set slot to entry 0 of the netmap ring.
514          */
515         slot = netmap_reset(na, NR_TX, que->me, 0);
516 #endif /* DEV_NETMAP */
517
518         bzero((void *)txr->base,
519               (sizeof(struct i40e_tx_desc)) * que->num_desc);
520
521         /* Reset indices */
522         txr->next_avail = 0;
523         txr->next_to_clean = 0;
524
525         /* Reset watchdog status */
526         txr->watchdog_timer = 0;
527
528 #ifdef IXL_FDIR
529         /* Initialize flow director */
530         txr->atr_rate = ixl_atr_rate;
531         txr->atr_count = 0;
532 #endif
533         /* Free any existing tx mbufs. */
534         buf = txr->buffers;
535         for (int i = 0; i < que->num_desc; i++, buf++) {
536                 if (buf->m_head != NULL) {
537                         bus_dmamap_sync(buf->tag, buf->map,
538                             BUS_DMASYNC_POSTWRITE);
539                         bus_dmamap_unload(buf->tag, buf->map);
540                         m_freem(buf->m_head);
541                         buf->m_head = NULL;
542                 }
543 #ifdef DEV_NETMAP
544                 /*
545                  * In netmap mode, set the map for the packet buffer.
546                  * NOTE: Some drivers (not this one) also need to set
547                  * the physical buffer address in the NIC ring.
548                  * netmap_idx_n2k() maps a nic index, i, into the corresponding
549                  * netmap slot index, si
550                  */
551                 if (slot) {
552                         int si = netmap_idx_n2k(&na->tx_rings[que->me], i);
553                         netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
554                 }
555 #endif /* DEV_NETMAP */
556                 /* Clear the EOP index */
557                 buf->eop_index = -1;
558         }
559
560         /* Set number of descriptors available */
561         txr->avail = que->num_desc;
562
563         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
564             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
565         IXL_TX_UNLOCK(txr);
566 }
567
568
569 /*********************************************************************
570  *
571  *  Free transmit ring related data structures.
572  *
573  **********************************************************************/
574 void
575 ixl_free_que_tx(struct ixl_queue *que)
576 {
577         struct tx_ring *txr = &que->txr;
578         struct ixl_tx_buf *buf;
579
580         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
581
582         for (int i = 0; i < que->num_desc; i++) {
583                 buf = &txr->buffers[i];
584                 if (buf->m_head != NULL) {
585                         bus_dmamap_sync(buf->tag, buf->map,
586                             BUS_DMASYNC_POSTWRITE);
587                         bus_dmamap_unload(buf->tag,
588                             buf->map);
589                         m_freem(buf->m_head);
590                         buf->m_head = NULL;
591                         if (buf->map != NULL) {
592                                 bus_dmamap_destroy(buf->tag,
593                                     buf->map);
594                                 buf->map = NULL;
595                         }
596                 } else if (buf->map != NULL) {
597                         bus_dmamap_unload(buf->tag,
598                             buf->map);
599                         bus_dmamap_destroy(buf->tag,
600                             buf->map);
601                         buf->map = NULL;
602                 }
603         }
604         if (txr->br != NULL)
605                 buf_ring_free(txr->br, M_DEVBUF);
606         if (txr->buffers != NULL) {
607                 free(txr->buffers, M_DEVBUF);
608                 txr->buffers = NULL;
609         }
610         if (txr->tx_tag != NULL) {
611                 bus_dma_tag_destroy(txr->tx_tag);
612                 txr->tx_tag = NULL;
613         }
614         if (txr->tso_tag != NULL) {
615                 bus_dma_tag_destroy(txr->tso_tag);
616                 txr->tso_tag = NULL;
617         }
618
619         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
620         return;
621 }
622
623 /*********************************************************************
624  *
625  *  Setup descriptor for hw offloads 
626  *
627  **********************************************************************/
628
629 static int
630 ixl_tx_setup_offload(struct ixl_queue *que,
631     struct mbuf *mp, u32 *cmd, u32 *off)
632 {
633         struct ether_vlan_header        *eh;
634 #ifdef INET
635         struct ip                       *ip = NULL;
636 #endif
637         struct tcphdr                   *th = NULL;
638 #ifdef INET6
639         struct ip6_hdr                  *ip6;
640 #endif
641         int                             elen, ip_hlen = 0, tcp_hlen;
642         u16                             etype;
643         u8                              ipproto = 0;
644         bool                            tso = FALSE;
645
646         /* Set up the TSO context descriptor if required */
647         if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
648                 tso = ixl_tso_setup(que, mp);
649                 if (tso)
650                         ++que->tso;
651                 else
652                         return (ENXIO);
653         }
654
655         /*
656          * Determine where frame payload starts.
657          * Jump over vlan headers if already present,
658          * helpful for QinQ too.
659          */
660         eh = mtod(mp, struct ether_vlan_header *);
661         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
662                 etype = ntohs(eh->evl_proto);
663                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
664         } else {
665                 etype = ntohs(eh->evl_encap_proto);
666                 elen = ETHER_HDR_LEN;
667         }
668
669         switch (etype) {
670 #ifdef INET
671                 case ETHERTYPE_IP:
672                         ip = (struct ip *)(mp->m_data + elen);
673                         ip_hlen = ip->ip_hl << 2;
674                         ipproto = ip->ip_p;
675                         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
676                         /* The IP checksum must be recalculated with TSO */
677                         if (tso)
678                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
679                         else
680                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
681                         break;
682 #endif
683 #ifdef INET6
684                 case ETHERTYPE_IPV6:
685                         ip6 = (struct ip6_hdr *)(mp->m_data + elen);
686                         ip_hlen = sizeof(struct ip6_hdr);
687                         ipproto = ip6->ip6_nxt;
688                         th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
689                         *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
690                         break;
691 #endif
692                 default:
693                         break;
694         }
695
696         *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
697         *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
698
699         switch (ipproto) {
700                 case IPPROTO_TCP:
701                         tcp_hlen = th->th_off << 2;
702                         if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
703                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
704                                 *off |= (tcp_hlen >> 2) <<
705                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
706                         }
707 #ifdef IXL_FDIR
708                         ixl_atr(que, th, etype);
709 #endif
710                         break;
711                 case IPPROTO_UDP:
712                         if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
713                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
714                                 *off |= (sizeof(struct udphdr) >> 2) <<
715                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
716                         }
717                         break;
718
719                 case IPPROTO_SCTP:
720                         if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
721                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
722                                 *off |= (sizeof(struct sctphdr) >> 2) <<
723                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
724                         }
725                         /* Fall Thru */
726                 default:
727                         break;
728         }
729
730         return (0);
731 }
732
733
734 /**********************************************************************
735  *
736  *  Setup context for hardware segmentation offload (TSO)
737  *
738  **********************************************************************/
739 static bool
740 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
741 {
742         struct tx_ring                  *txr = &que->txr;
743         struct i40e_tx_context_desc     *TXD;
744         struct ixl_tx_buf               *buf;
745         u32                             cmd, mss, type, tsolen;
746         u16                             etype;
747         int                             idx, elen, ip_hlen, tcp_hlen;
748         struct ether_vlan_header        *eh;
749 #ifdef INET
750         struct ip                       *ip;
751 #endif
752 #ifdef INET6
753         struct ip6_hdr                  *ip6;
754 #endif
755 #if defined(INET6) || defined(INET)
756         struct tcphdr                   *th;
757 #endif
758         u64                             type_cmd_tso_mss;
759
760         /*
761          * Determine where frame payload starts.
762          * Jump over vlan headers if already present
763          */
764         eh = mtod(mp, struct ether_vlan_header *);
765         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
766                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
767                 etype = eh->evl_proto;
768         } else {
769                 elen = ETHER_HDR_LEN;
770                 etype = eh->evl_encap_proto;
771         }
772
773         switch (ntohs(etype)) {
774 #ifdef INET6
775         case ETHERTYPE_IPV6:
776                 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
777                 if (ip6->ip6_nxt != IPPROTO_TCP)
778                         return (ENXIO);
779                 ip_hlen = sizeof(struct ip6_hdr);
780                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
781                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
782                 tcp_hlen = th->th_off << 2;
783                 /*
784                  * The corresponding flag is set by the stack in the IPv4
785                  * TSO case, but not in IPv6 (at least in FreeBSD 10.2).
786                  * So, set it here because the rest of the flow requires it.
787                  */
788                 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
789                 break;
790 #endif
791 #ifdef INET
792         case ETHERTYPE_IP:
793                 ip = (struct ip *)(mp->m_data + elen);
794                 if (ip->ip_p != IPPROTO_TCP)
795                         return (ENXIO);
796                 ip->ip_sum = 0;
797                 ip_hlen = ip->ip_hl << 2;
798                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
799                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
800                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
801                 tcp_hlen = th->th_off << 2;
802                 break;
803 #endif
804         default:
805                 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
806                     __func__, ntohs(etype));
807                 return FALSE;
808         }
809
810         /* Ensure we have at least the IP+TCP header in the first mbuf. */
811         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
812                 return FALSE;
813
814         idx = txr->next_avail;
815         buf = &txr->buffers[idx];
816         TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
817         tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
818
819         type = I40E_TX_DESC_DTYPE_CONTEXT;
820         cmd = I40E_TX_CTX_DESC_TSO;
821         /* TSO MSS must not be less than 64 */
822         if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) {
823                 que->mss_too_small++;
824                 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS;
825         }
826         mss = mp->m_pkthdr.tso_segsz;
827
828         type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
829             ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
830             ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
831             ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
832         TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
833
834         TXD->tunneling_params = htole32(0);
835         buf->m_head = NULL;
836         buf->eop_index = -1;
837
838         if (++idx == que->num_desc)
839                 idx = 0;
840
841         txr->avail--;
842         txr->next_avail = idx;
843
844         return TRUE;
845 }
846
847 /*             
848 ** ixl_get_tx_head - Retrieve the value from the 
849 **    location the HW records its HEAD index
850 */
851 static inline u32
852 ixl_get_tx_head(struct ixl_queue *que)
853 {
854         struct tx_ring  *txr = &que->txr;
855         void *head = &txr->base[que->num_desc];
856         return LE32_TO_CPU(*(volatile __le32 *)head);
857 }
858
859 /**********************************************************************
860  *
861  *  Examine each tx_buffer in the used queue. If the hardware is done
862  *  processing the packet then free associated resources. The
863  *  tx_buffer is put back on the free queue.
864  *
865  **********************************************************************/
866 bool
867 ixl_txeof(struct ixl_queue *que)
868 {
869         struct tx_ring          *txr = &que->txr;
870         u32                     first, last, head, done, processed;
871         struct ixl_tx_buf       *buf;
872         struct i40e_tx_desc     *tx_desc, *eop_desc;
873
874
875         mtx_assert(&txr->mtx, MA_OWNED);
876
877 #ifdef DEV_NETMAP
878         // XXX todo: implement moderation
879         if (netmap_tx_irq(que->vsi->ifp, que->me))
880                 return FALSE;
881 #endif /* DEF_NETMAP */
882
883         /* These are not the descriptors you seek, move along :) */
884         if (txr->avail == que->num_desc) {
885                 atomic_store_rel_32(&txr->watchdog_timer, 0);
886                 return FALSE;
887         }
888
889         processed = 0;
890         first = txr->next_to_clean;
891         buf = &txr->buffers[first];
892         tx_desc = (struct i40e_tx_desc *)&txr->base[first];
893         last = buf->eop_index;
894         if (last == -1)
895                 return FALSE;
896         eop_desc = (struct i40e_tx_desc *)&txr->base[last];
897
898         /* Get the Head WB value */
899         head = ixl_get_tx_head(que);
900
901         /*
902         ** Get the index of the first descriptor
903         ** BEYOND the EOP and call that 'done'.
904         ** I do this so the comparison in the
905         ** inner while loop below can be simple
906         */
907         if (++last == que->num_desc) last = 0;
908         done = last;
909
910         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
911             BUS_DMASYNC_POSTREAD);
912         /*
913         ** The HEAD index of the ring is written in a 
914         ** defined location, this rather than a done bit
915         ** is what is used to keep track of what must be
916         ** 'cleaned'.
917         */
918         while (first != head) {
919                 /* We clean the range of the packet */
920                 while (first != done) {
921                         ++txr->avail;
922                         ++processed;
923
924                         if (buf->m_head) {
925                                 txr->bytes += /* for ITR adjustment */
926                                     buf->m_head->m_pkthdr.len;
927                                 txr->tx_bytes += /* for TX stats */
928                                     buf->m_head->m_pkthdr.len;
929                                 bus_dmamap_sync(buf->tag,
930                                     buf->map,
931                                     BUS_DMASYNC_POSTWRITE);
932                                 bus_dmamap_unload(buf->tag,
933                                     buf->map);
934                                 m_freem(buf->m_head);
935                                 buf->m_head = NULL;
936                                 buf->map = NULL;
937                         }
938                         buf->eop_index = -1;
939
940                         if (++first == que->num_desc)
941                                 first = 0;
942
943                         buf = &txr->buffers[first];
944                         tx_desc = &txr->base[first];
945                 }
946                 ++txr->packets;
947                 /* See if there is more work now */
948                 last = buf->eop_index;
949                 if (last != -1) {
950                         eop_desc = &txr->base[last];
951                         /* Get next done point */
952                         if (++last == que->num_desc) last = 0;
953                         done = last;
954                 } else
955                         break;
956         }
957         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
958             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
959
960         txr->next_to_clean = first;
961
962
963         /*
964          * If there are no pending descriptors, clear the timeout.
965          */
966         if (txr->avail == que->num_desc) {
967                 atomic_store_rel_32(&txr->watchdog_timer, 0);
968                 return FALSE;
969         }
970
971         return TRUE;
972 }
973
974 /*********************************************************************
975  *
976  *  Refresh mbuf buffers for RX descriptor rings
977  *   - now keeps its own state so discards due to resource
978  *     exhaustion are unnecessary, if an mbuf cannot be obtained
979  *     it just returns, keeping its placeholder, thus it can simply
980  *     be recalled to try again.
981  *
982  **********************************************************************/
983 static void
984 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
985 {
986         struct ixl_vsi          *vsi = que->vsi;
987         struct rx_ring          *rxr = &que->rxr;
988         bus_dma_segment_t       hseg[1];
989         bus_dma_segment_t       pseg[1];
990         struct ixl_rx_buf       *buf;
991         struct mbuf             *mh, *mp;
992         int                     i, j, nsegs, error;
993         bool                    refreshed = FALSE;
994
995         i = j = rxr->next_refresh;
996         /* Control the loop with one beyond */
997         if (++j == que->num_desc)
998                 j = 0;
999
1000         while (j != limit) {
1001                 buf = &rxr->buffers[i];
1002                 if (rxr->hdr_split == FALSE)
1003                         goto no_split;
1004
1005                 if (buf->m_head == NULL) {
1006                         mh = m_gethdr(M_NOWAIT, MT_DATA);
1007                         if (mh == NULL)
1008                                 goto update;
1009                 } else
1010                         mh = buf->m_head;
1011
1012                 mh->m_pkthdr.len = mh->m_len = MHLEN;
1013                 mh->m_len = MHLEN;
1014                 mh->m_flags |= M_PKTHDR;
1015                 /* Get the memory mapping */
1016                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1017                     buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1018                 if (error != 0) {
1019                         printf("Refresh mbufs: hdr dmamap load"
1020                             " failure - %d\n", error);
1021                         m_free(mh);
1022                         buf->m_head = NULL;
1023                         goto update;
1024                 }
1025                 buf->m_head = mh;
1026                 bus_dmamap_sync(rxr->htag, buf->hmap,
1027                     BUS_DMASYNC_PREREAD);
1028                 rxr->base[i].read.hdr_addr =
1029                    htole64(hseg[0].ds_addr);
1030
1031 no_split:
1032                 if (buf->m_pack == NULL) {
1033                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1034                             M_PKTHDR, rxr->mbuf_sz);
1035                         if (mp == NULL)
1036                                 goto update;
1037                 } else
1038                         mp = buf->m_pack;
1039
1040                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1041                 /* Get the memory mapping */
1042                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1043                     buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1044                 if (error != 0) {
1045                         printf("Refresh mbufs: payload dmamap load"
1046                             " failure - %d\n", error);
1047                         m_free(mp);
1048                         buf->m_pack = NULL;
1049                         goto update;
1050                 }
1051                 buf->m_pack = mp;
1052                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1053                     BUS_DMASYNC_PREREAD);
1054                 rxr->base[i].read.pkt_addr =
1055                    htole64(pseg[0].ds_addr);
1056                 /* Used only when doing header split */
1057                 rxr->base[i].read.hdr_addr = 0;
1058
1059                 refreshed = TRUE;
1060                 /* Next is precalculated */
1061                 i = j;
1062                 rxr->next_refresh = i;
1063                 if (++j == que->num_desc)
1064                         j = 0;
1065         }
1066 update:
1067         if (refreshed) /* Update hardware tail index */
1068                 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1069         return;
1070 }
1071
1072
1073 /*********************************************************************
1074  *
1075  *  Allocate memory for rx_buffer structures. Since we use one
1076  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1077  *  that we'll need is equal to the number of receive descriptors
1078  *  that we've defined.
1079  *
1080  **********************************************************************/
1081 int
1082 ixl_allocate_rx_data(struct ixl_queue *que)
1083 {
1084         struct rx_ring          *rxr = &que->rxr;
1085         struct ixl_vsi          *vsi = que->vsi;
1086         device_t                dev = vsi->dev;
1087         struct ixl_rx_buf       *buf;
1088         int                     i, bsize, error;
1089
1090         bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1091         if (!(rxr->buffers =
1092             (struct ixl_rx_buf *) malloc(bsize,
1093             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1094                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1095                 error = ENOMEM;
1096                 return (error);
1097         }
1098
1099         if ((error = bus_dma_tag_create(NULL,   /* parent */
1100                                    1, 0,        /* alignment, bounds */
1101                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1102                                    BUS_SPACE_MAXADDR,   /* highaddr */
1103                                    NULL, NULL,          /* filter, filterarg */
1104                                    MSIZE,               /* maxsize */
1105                                    1,                   /* nsegments */
1106                                    MSIZE,               /* maxsegsize */
1107                                    0,                   /* flags */
1108                                    NULL,                /* lockfunc */
1109                                    NULL,                /* lockfuncarg */
1110                                    &rxr->htag))) {
1111                 device_printf(dev, "Unable to create RX DMA htag\n");
1112                 return (error);
1113         }
1114
1115         if ((error = bus_dma_tag_create(NULL,   /* parent */
1116                                    1, 0,        /* alignment, bounds */
1117                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1118                                    BUS_SPACE_MAXADDR,   /* highaddr */
1119                                    NULL, NULL,          /* filter, filterarg */
1120                                    MJUM16BYTES,         /* maxsize */
1121                                    1,                   /* nsegments */
1122                                    MJUM16BYTES,         /* maxsegsize */
1123                                    0,                   /* flags */
1124                                    NULL,                /* lockfunc */
1125                                    NULL,                /* lockfuncarg */
1126                                    &rxr->ptag))) {
1127                 device_printf(dev, "Unable to create RX DMA ptag\n");
1128                 return (error);
1129         }
1130
1131         for (i = 0; i < que->num_desc; i++) {
1132                 buf = &rxr->buffers[i];
1133                 error = bus_dmamap_create(rxr->htag,
1134                     BUS_DMA_NOWAIT, &buf->hmap);
1135                 if (error) {
1136                         device_printf(dev, "Unable to create RX head map\n");
1137                         break;
1138                 }
1139                 error = bus_dmamap_create(rxr->ptag,
1140                     BUS_DMA_NOWAIT, &buf->pmap);
1141                 if (error) {
1142                         device_printf(dev, "Unable to create RX pkt map\n");
1143                         break;
1144                 }
1145         }
1146
1147         return (error);
1148 }
1149
1150
1151 /*********************************************************************
1152  *
1153  *  (Re)Initialize the queue receive ring and its buffers.
1154  *
1155  **********************************************************************/
1156 int
1157 ixl_init_rx_ring(struct ixl_queue *que)
1158 {
1159         struct  rx_ring         *rxr = &que->rxr;
1160         struct ixl_vsi          *vsi = que->vsi;
1161 #if defined(INET6) || defined(INET)
1162         struct ifnet            *ifp = vsi->ifp;
1163         struct lro_ctrl         *lro = &rxr->lro;
1164 #endif
1165         struct ixl_rx_buf       *buf;
1166         bus_dma_segment_t       pseg[1], hseg[1];
1167         int                     rsize, nsegs, error = 0;
1168 #ifdef DEV_NETMAP
1169         struct netmap_adapter *na = NA(que->vsi->ifp);
1170         struct netmap_slot *slot;
1171 #endif /* DEV_NETMAP */
1172
1173         IXL_RX_LOCK(rxr);
1174 #ifdef DEV_NETMAP
1175         /* same as in ixl_init_tx_ring() */
1176         slot = netmap_reset(na, NR_RX, que->me, 0);
1177 #endif /* DEV_NETMAP */
1178         /* Clear the ring contents */
1179         rsize = roundup2(que->num_desc *
1180             sizeof(union i40e_rx_desc), DBA_ALIGN);
1181         bzero((void *)rxr->base, rsize);
1182         /* Cleanup any existing buffers */
1183         for (int i = 0; i < que->num_desc; i++) {
1184                 buf = &rxr->buffers[i];
1185                 if (buf->m_head != NULL) {
1186                         bus_dmamap_sync(rxr->htag, buf->hmap,
1187                             BUS_DMASYNC_POSTREAD);
1188                         bus_dmamap_unload(rxr->htag, buf->hmap);
1189                         buf->m_head->m_flags |= M_PKTHDR;
1190                         m_freem(buf->m_head);
1191                 }
1192                 if (buf->m_pack != NULL) {
1193                         bus_dmamap_sync(rxr->ptag, buf->pmap,
1194                             BUS_DMASYNC_POSTREAD);
1195                         bus_dmamap_unload(rxr->ptag, buf->pmap);
1196                         buf->m_pack->m_flags |= M_PKTHDR;
1197                         m_freem(buf->m_pack);
1198                 }
1199                 buf->m_head = NULL;
1200                 buf->m_pack = NULL;
1201         }
1202
1203         /* header split is off */
1204         rxr->hdr_split = FALSE;
1205
1206         /* Now replenish the mbufs */
1207         for (int j = 0; j != que->num_desc; ++j) {
1208                 struct mbuf     *mh, *mp;
1209
1210                 buf = &rxr->buffers[j];
1211 #ifdef DEV_NETMAP
1212                 /*
1213                  * In netmap mode, fill the map and set the buffer
1214                  * address in the NIC ring, considering the offset
1215                  * between the netmap and NIC rings (see comment in
1216                  * ixgbe_setup_transmit_ring() ). No need to allocate
1217                  * an mbuf, so end the block with a continue;
1218                  */
1219                 if (slot) {
1220                         int sj = netmap_idx_n2k(&na->rx_rings[que->me], j);
1221                         uint64_t paddr;
1222                         void *addr;
1223
1224                         addr = PNMB(na, slot + sj, &paddr);
1225                         netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1226                         /* Update descriptor and the cached value */
1227                         rxr->base[j].read.pkt_addr = htole64(paddr);
1228                         rxr->base[j].read.hdr_addr = 0;
1229                         continue;
1230                 }
1231 #endif /* DEV_NETMAP */
1232                 /*
1233                 ** Don't allocate mbufs if not
1234                 ** doing header split, its wasteful
1235                 */ 
1236                 if (rxr->hdr_split == FALSE)
1237                         goto skip_head;
1238
1239                 /* First the header */
1240                 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1241                 if (buf->m_head == NULL) {
1242                         error = ENOBUFS;
1243                         goto fail;
1244                 }
1245                 m_adj(buf->m_head, ETHER_ALIGN);
1246                 mh = buf->m_head;
1247                 mh->m_len = mh->m_pkthdr.len = MHLEN;
1248                 mh->m_flags |= M_PKTHDR;
1249                 /* Get the memory mapping */
1250                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1251                     buf->hmap, buf->m_head, hseg,
1252                     &nsegs, BUS_DMA_NOWAIT);
1253                 if (error != 0) /* Nothing elegant to do here */
1254                         goto fail;
1255                 bus_dmamap_sync(rxr->htag,
1256                     buf->hmap, BUS_DMASYNC_PREREAD);
1257                 /* Update descriptor */
1258                 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1259
1260 skip_head:
1261                 /* Now the payload cluster */
1262                 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1263                     M_PKTHDR, rxr->mbuf_sz);
1264                 if (buf->m_pack == NULL) {
1265                         error = ENOBUFS;
1266                         goto fail;
1267                 }
1268                 mp = buf->m_pack;
1269                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1270                 /* Get the memory mapping */
1271                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1272                     buf->pmap, mp, pseg,
1273                     &nsegs, BUS_DMA_NOWAIT);
1274                 if (error != 0)
1275                         goto fail;
1276                 bus_dmamap_sync(rxr->ptag,
1277                     buf->pmap, BUS_DMASYNC_PREREAD);
1278                 /* Update descriptor */
1279                 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1280                 rxr->base[j].read.hdr_addr = 0;
1281         }
1282
1283
1284         /* Setup our descriptor indices */
1285         rxr->next_check = 0;
1286         rxr->next_refresh = 0;
1287         rxr->lro_enabled = FALSE;
1288         rxr->split = 0;
1289         rxr->bytes = 0;
1290         rxr->discard = FALSE;
1291
1292         wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1293         ixl_flush(vsi->hw);
1294
1295 #if defined(INET6) || defined(INET)
1296         /*
1297         ** Now set up the LRO interface:
1298         */
1299         if (ifp->if_capenable & IFCAP_LRO) {
1300                 int err = tcp_lro_init(lro);
1301                 if (err) {
1302                         if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1303                         goto fail;
1304                 }
1305                 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1306                 rxr->lro_enabled = TRUE;
1307                 lro->ifp = vsi->ifp;
1308         }
1309 #endif
1310
1311         bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1312             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1313
1314 fail:
1315         IXL_RX_UNLOCK(rxr);
1316         return (error);
1317 }
1318
1319
1320 /*********************************************************************
1321  *
1322  *  Free station receive ring data structures
1323  *
1324  **********************************************************************/
1325 void
1326 ixl_free_que_rx(struct ixl_queue *que)
1327 {
1328         struct rx_ring          *rxr = &que->rxr;
1329         struct ixl_rx_buf       *buf;
1330
1331         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1332
1333         /* Cleanup any existing buffers */
1334         if (rxr->buffers != NULL) {
1335                 for (int i = 0; i < que->num_desc; i++) {
1336                         buf = &rxr->buffers[i];
1337                         if (buf->m_head != NULL) {
1338                                 bus_dmamap_sync(rxr->htag, buf->hmap,
1339                                     BUS_DMASYNC_POSTREAD);
1340                                 bus_dmamap_unload(rxr->htag, buf->hmap);
1341                                 buf->m_head->m_flags |= M_PKTHDR;
1342                                 m_freem(buf->m_head);
1343                         }
1344                         if (buf->m_pack != NULL) {
1345                                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1346                                     BUS_DMASYNC_POSTREAD);
1347                                 bus_dmamap_unload(rxr->ptag, buf->pmap);
1348                                 buf->m_pack->m_flags |= M_PKTHDR;
1349                                 m_freem(buf->m_pack);
1350                         }
1351                         buf->m_head = NULL;
1352                         buf->m_pack = NULL;
1353                         if (buf->hmap != NULL) {
1354                                 bus_dmamap_destroy(rxr->htag, buf->hmap);
1355                                 buf->hmap = NULL;
1356                         }
1357                         if (buf->pmap != NULL) {
1358                                 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1359                                 buf->pmap = NULL;
1360                         }
1361                 }
1362                 if (rxr->buffers != NULL) {
1363                         free(rxr->buffers, M_DEVBUF);
1364                         rxr->buffers = NULL;
1365                 }
1366         }
1367
1368         if (rxr->htag != NULL) {
1369                 bus_dma_tag_destroy(rxr->htag);
1370                 rxr->htag = NULL;
1371         }
1372         if (rxr->ptag != NULL) {
1373                 bus_dma_tag_destroy(rxr->ptag);
1374                 rxr->ptag = NULL;
1375         }
1376
1377         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1378         return;
1379 }
1380
1381 static inline void
1382 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1383 {
1384
1385 #if defined(INET6) || defined(INET)
1386         /*
1387          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1388          * should be computed by hardware. Also it should not have VLAN tag in
1389          * ethernet header.
1390          */
1391         if (rxr->lro_enabled &&
1392             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1393             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1394             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1395                 /*
1396                  * Send to the stack if:
1397                  **  - LRO not enabled, or
1398                  **  - no LRO resources, or
1399                  **  - lro enqueue fails
1400                  */
1401                 if (rxr->lro.lro_cnt != 0)
1402                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1403                                 return;
1404         }
1405 #endif
1406         IXL_RX_UNLOCK(rxr);
1407         (*ifp->if_input)(ifp, m);
1408         IXL_RX_LOCK(rxr);
1409 }
1410
1411
1412 static inline void
1413 ixl_rx_discard(struct rx_ring *rxr, int i)
1414 {
1415         struct ixl_rx_buf       *rbuf;
1416
1417         rbuf = &rxr->buffers[i];
1418
1419         if (rbuf->fmp != NULL) {/* Partial chain ? */
1420                 rbuf->fmp->m_flags |= M_PKTHDR;
1421                 m_freem(rbuf->fmp);
1422                 rbuf->fmp = NULL;
1423         }
1424
1425         /*
1426         ** With advanced descriptors the writeback
1427         ** clobbers the buffer addrs, so its easier
1428         ** to just free the existing mbufs and take
1429         ** the normal refresh path to get new buffers
1430         ** and mapping.
1431         */
1432         if (rbuf->m_head) {
1433                 m_free(rbuf->m_head);
1434                 rbuf->m_head = NULL;
1435         }
1436  
1437         if (rbuf->m_pack) {
1438                 m_free(rbuf->m_pack);
1439                 rbuf->m_pack = NULL;
1440         }
1441
1442         return;
1443 }
1444
1445 #ifdef RSS
1446 /*
1447 ** i40e_ptype_to_hash: parse the packet type
1448 ** to determine the appropriate hash.
1449 */
1450 static inline int
1451 ixl_ptype_to_hash(u8 ptype)
1452 {
1453         struct i40e_rx_ptype_decoded    decoded;
1454         u8                              ex = 0;
1455
1456         decoded = decode_rx_desc_ptype(ptype);
1457         ex = decoded.outer_frag;
1458
1459         if (!decoded.known)
1460                 return M_HASHTYPE_OPAQUE_HASH;
1461
1462         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
1463                 return M_HASHTYPE_OPAQUE_HASH;
1464
1465         /* Note: anything that gets to this point is IP */
1466         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
1467                 switch (decoded.inner_prot) {
1468                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1469                                 if (ex)
1470                                         return M_HASHTYPE_RSS_TCP_IPV6_EX;
1471                                 else
1472                                         return M_HASHTYPE_RSS_TCP_IPV6;
1473                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1474                                 if (ex)
1475                                         return M_HASHTYPE_RSS_UDP_IPV6_EX;
1476                                 else
1477                                         return M_HASHTYPE_RSS_UDP_IPV6;
1478                         default:
1479                                 if (ex)
1480                                         return M_HASHTYPE_RSS_IPV6_EX;
1481                                 else
1482                                         return M_HASHTYPE_RSS_IPV6;
1483                 }
1484         }
1485         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
1486                 switch (decoded.inner_prot) {
1487                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1488                                         return M_HASHTYPE_RSS_TCP_IPV4;
1489                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1490                                 if (ex)
1491                                         return M_HASHTYPE_RSS_UDP_IPV4_EX;
1492                                 else
1493                                         return M_HASHTYPE_RSS_UDP_IPV4;
1494                         default:
1495                                         return M_HASHTYPE_RSS_IPV4;
1496                 }
1497         }
1498         /* We should never get here!! */
1499         return M_HASHTYPE_OPAQUE_HASH;
1500 }
1501 #endif /* RSS */
1502
1503 /*********************************************************************
1504  *
1505  *  This routine executes in interrupt context. It replenishes
1506  *  the mbufs in the descriptor and sends data which has been
1507  *  dma'ed into host memory to upper layer.
1508  *
1509  *  We loop at most count times if count is > 0, or until done if
1510  *  count < 0.
1511  *
1512  *  Return TRUE for more work, FALSE for all clean.
1513  *********************************************************************/
1514 bool
1515 ixl_rxeof(struct ixl_queue *que, int count)
1516 {
1517         struct ixl_vsi          *vsi = que->vsi;
1518         struct rx_ring          *rxr = &que->rxr;
1519         struct ifnet            *ifp = vsi->ifp;
1520 #if defined(INET6) || defined(INET)
1521         struct lro_ctrl         *lro = &rxr->lro;
1522 #endif
1523         int                     i, nextp, processed = 0;
1524         union i40e_rx_desc      *cur;
1525         struct ixl_rx_buf       *rbuf, *nbuf;
1526
1527
1528         IXL_RX_LOCK(rxr);
1529
1530 #ifdef DEV_NETMAP
1531         if (netmap_rx_irq(ifp, que->me, &count)) {
1532                 IXL_RX_UNLOCK(rxr);
1533                 return (FALSE);
1534         }
1535 #endif /* DEV_NETMAP */
1536
1537         for (i = rxr->next_check; count != 0;) {
1538                 struct mbuf     *sendmp, *mh, *mp;
1539                 u32             status, error;
1540                 u16             hlen, plen, vtag;
1541                 u64             qword;
1542                 u8              ptype;
1543                 bool            eop;
1544  
1545                 /* Sync the ring. */
1546                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1547                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1548
1549                 cur = &rxr->base[i];
1550                 qword = le64toh(cur->wb.qword1.status_error_len);
1551                 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1552                     >> I40E_RXD_QW1_STATUS_SHIFT;
1553                 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1554                     >> I40E_RXD_QW1_ERROR_SHIFT;
1555                 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1556                     >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1557                 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1558                     >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1559                 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1560                     >> I40E_RXD_QW1_PTYPE_SHIFT;
1561
1562                 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1563                         ++rxr->not_done;
1564                         break;
1565                 }
1566                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1567                         break;
1568
1569                 count--;
1570                 sendmp = NULL;
1571                 nbuf = NULL;
1572                 cur->wb.qword1.status_error_len = 0;
1573                 rbuf = &rxr->buffers[i];
1574                 mh = rbuf->m_head;
1575                 mp = rbuf->m_pack;
1576                 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1577                 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1578                         vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1579                 else
1580                         vtag = 0;
1581
1582                 /*
1583                 ** Make sure bad packets are discarded,
1584                 ** note that only EOP descriptor has valid
1585                 ** error results.
1586                 */
1587                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1588                         rxr->desc_errs++;
1589                         ixl_rx_discard(rxr, i);
1590                         goto next_desc;
1591                 }
1592
1593                 /* Prefetch the next buffer */
1594                 if (!eop) {
1595                         nextp = i + 1;
1596                         if (nextp == que->num_desc)
1597                                 nextp = 0;
1598                         nbuf = &rxr->buffers[nextp];
1599                         prefetch(nbuf);
1600                 }
1601
1602                 /*
1603                 ** The header mbuf is ONLY used when header 
1604                 ** split is enabled, otherwise we get normal 
1605                 ** behavior, ie, both header and payload
1606                 ** are DMA'd into the payload buffer.
1607                 **
1608                 ** Rather than using the fmp/lmp global pointers
1609                 ** we now keep the head of a packet chain in the
1610                 ** buffer struct and pass this along from one
1611                 ** descriptor to the next, until we get EOP.
1612                 */
1613                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1614                         if (hlen > IXL_RX_HDR)
1615                                 hlen = IXL_RX_HDR;
1616                         mh->m_len = hlen;
1617                         mh->m_flags |= M_PKTHDR;
1618                         mh->m_next = NULL;
1619                         mh->m_pkthdr.len = mh->m_len;
1620                         /* Null buf pointer so it is refreshed */
1621                         rbuf->m_head = NULL;
1622                         /*
1623                         ** Check the payload length, this
1624                         ** could be zero if its a small
1625                         ** packet.
1626                         */
1627                         if (plen > 0) {
1628                                 mp->m_len = plen;
1629                                 mp->m_next = NULL;
1630                                 mp->m_flags &= ~M_PKTHDR;
1631                                 mh->m_next = mp;
1632                                 mh->m_pkthdr.len += mp->m_len;
1633                                 /* Null buf pointer so it is refreshed */
1634                                 rbuf->m_pack = NULL;
1635                                 rxr->split++;
1636                         }
1637                         /*
1638                         ** Now create the forward
1639                         ** chain so when complete 
1640                         ** we wont have to.
1641                         */
1642                         if (eop == 0) {
1643                                 /* stash the chain head */
1644                                 nbuf->fmp = mh;
1645                                 /* Make forward chain */
1646                                 if (plen)
1647                                         mp->m_next = nbuf->m_pack;
1648                                 else
1649                                         mh->m_next = nbuf->m_pack;
1650                         } else {
1651                                 /* Singlet, prepare to send */
1652                                 sendmp = mh;
1653                                 if (vtag) {
1654                                         sendmp->m_pkthdr.ether_vtag = vtag;
1655                                         sendmp->m_flags |= M_VLANTAG;
1656                                 }
1657                         }
1658                 } else {
1659                         /*
1660                         ** Either no header split, or a
1661                         ** secondary piece of a fragmented
1662                         ** split packet.
1663                         */
1664                         mp->m_len = plen;
1665                         /*
1666                         ** See if there is a stored head
1667                         ** that determines what we are
1668                         */
1669                         sendmp = rbuf->fmp;
1670                         rbuf->m_pack = rbuf->fmp = NULL;
1671
1672                         if (sendmp != NULL) /* secondary frag */
1673                                 sendmp->m_pkthdr.len += mp->m_len;
1674                         else {
1675                                 /* first desc of a non-ps chain */
1676                                 sendmp = mp;
1677                                 sendmp->m_flags |= M_PKTHDR;
1678                                 sendmp->m_pkthdr.len = mp->m_len;
1679                         }
1680                         /* Pass the head pointer on */
1681                         if (eop == 0) {
1682                                 nbuf->fmp = sendmp;
1683                                 sendmp = NULL;
1684                                 mp->m_next = nbuf->m_pack;
1685                         }
1686                 }
1687                 ++processed;
1688                 /* Sending this frame? */
1689                 if (eop) {
1690                         sendmp->m_pkthdr.rcvif = ifp;
1691                         /* gather stats */
1692                         rxr->rx_packets++;
1693                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1694                         /* capture data for dynamic ITR adjustment */
1695                         rxr->packets++;
1696                         rxr->bytes += sendmp->m_pkthdr.len;
1697                         /* Set VLAN tag (field only valid in eop desc) */
1698                         if (vtag) {
1699                                 sendmp->m_pkthdr.ether_vtag = vtag;
1700                                 sendmp->m_flags |= M_VLANTAG;
1701                         }
1702                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1703                                 ixl_rx_checksum(sendmp, status, error, ptype);
1704 #ifdef RSS
1705                         sendmp->m_pkthdr.flowid =
1706                             le32toh(cur->wb.qword0.hi_dword.rss);
1707                         M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1708 #else
1709                         sendmp->m_pkthdr.flowid = que->msix;
1710                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1711 #endif
1712                 }
1713 next_desc:
1714                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1715                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1716
1717                 /* Advance our pointers to the next descriptor. */
1718                 if (++i == que->num_desc)
1719                         i = 0;
1720
1721                 /* Now send to the stack or do LRO */
1722                 if (sendmp != NULL) {
1723                         rxr->next_check = i;
1724                         ixl_rx_input(rxr, ifp, sendmp, ptype);
1725                         i = rxr->next_check;
1726                 }
1727
1728                /* Every 8 descriptors we go to refresh mbufs */
1729                 if (processed == 8) {
1730                         ixl_refresh_mbufs(que, i);
1731                         processed = 0;
1732                 }
1733         }
1734
1735         /* Refresh any remaining buf structs */
1736         if (ixl_rx_unrefreshed(que))
1737                 ixl_refresh_mbufs(que, i);
1738
1739         rxr->next_check = i;
1740
1741 #if defined(INET6) || defined(INET)
1742         /*
1743          * Flush any outstanding LRO work
1744          */
1745 #if __FreeBSD_version >= 1100105
1746         tcp_lro_flush_all(lro);
1747 #else
1748         struct lro_entry *queued;
1749         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1750                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1751                 tcp_lro_flush(lro, queued);
1752         }
1753 #endif
1754 #endif /* defined(INET6) || defined(INET) */
1755
1756         IXL_RX_UNLOCK(rxr);
1757         return (FALSE);
1758 }
1759
1760
1761 /*********************************************************************
1762  *
1763  *  Verify that the hardware indicated that the checksum is valid.
1764  *  Inform the stack about the status of checksum so that stack
1765  *  doesn't spend time verifying the checksum.
1766  *
1767  *********************************************************************/
1768 static void
1769 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1770 {
1771         struct i40e_rx_ptype_decoded decoded;
1772
1773         decoded = decode_rx_desc_ptype(ptype);
1774
1775         /* Errors? */
1776         if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1777             (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1778                 mp->m_pkthdr.csum_flags = 0;
1779                 return;
1780         }
1781
1782         /* IPv6 with extension headers likely have bad csum */
1783         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1784             decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1785                 if (status &
1786                     (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1787                         mp->m_pkthdr.csum_flags = 0;
1788                         return;
1789                 }
1790
1791  
1792         /* IP Checksum Good */
1793         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1794         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1795
1796         if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1797                 mp->m_pkthdr.csum_flags |= 
1798                     (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1799                 mp->m_pkthdr.csum_data |= htons(0xffff);
1800         }
1801         return;
1802 }
1803
1804 #if __FreeBSD_version >= 1100000
1805 uint64_t
1806 ixl_get_counter(if_t ifp, ift_counter cnt)
1807 {
1808         struct ixl_vsi *vsi;
1809
1810         vsi = if_getsoftc(ifp);
1811
1812         switch (cnt) {
1813         case IFCOUNTER_IPACKETS:
1814                 return (vsi->ipackets);
1815         case IFCOUNTER_IERRORS:
1816                 return (vsi->ierrors);
1817         case IFCOUNTER_OPACKETS:
1818                 return (vsi->opackets);
1819         case IFCOUNTER_OERRORS:
1820                 return (vsi->oerrors);
1821         case IFCOUNTER_COLLISIONS:
1822                 /* Collisions are by standard impossible in 40G/10G Ethernet */
1823                 return (0);
1824         case IFCOUNTER_IBYTES:
1825                 return (vsi->ibytes);
1826         case IFCOUNTER_OBYTES:
1827                 return (vsi->obytes);
1828         case IFCOUNTER_IMCASTS:
1829                 return (vsi->imcasts);
1830         case IFCOUNTER_OMCASTS:
1831                 return (vsi->omcasts);
1832         case IFCOUNTER_IQDROPS:
1833                 return (vsi->iqdrops);
1834         case IFCOUNTER_OQDROPS:
1835                 return (vsi->oqdrops);
1836         case IFCOUNTER_NOPROTO:
1837                 return (vsi->noproto);
1838         default:
1839                 return (if_get_counter_default(ifp, cnt));
1840         }
1841 }
1842 #endif
1843