]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixl/ixl_txrx.c
Drop IXL RX lock during TCP_LRO, fixes LOR mahem while holding the RX
[FreeBSD/FreeBSD.git] / sys / dev / ixl / ixl_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2013-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 **      IXL driver TX/RX Routines:
37 **          This was seperated to allow usage by
38 **          both the PF and VF drivers.
39 */
40
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_rss.h"
45 #endif
46
47 #include "ixl.h"
48
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52
53 /* Local Prototypes */
54 static void     ixl_rx_checksum(struct mbuf *, u32, u32, u8);
55 static void     ixl_refresh_mbufs(struct ixl_queue *, int);
56 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
57 static int      ixl_tx_setup_offload(struct ixl_queue *,
58                     struct mbuf *, u32 *, u32 *);
59 static bool     ixl_tso_setup(struct ixl_queue *, struct mbuf *);
60
61 static inline void ixl_rx_discard(struct rx_ring *, int);
62 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
63                     struct mbuf *, u8);
64
65 static inline bool ixl_tso_detect_sparse(struct mbuf *mp);
66 static inline u32 ixl_get_tx_head(struct ixl_queue *que);
67
68 #ifdef DEV_NETMAP
69 #include <dev/netmap/if_ixl_netmap.h>
70 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1;
71 #endif /* DEV_NETMAP */
72
73 /*
74  * @key key is saved into this parameter
75  */
76 void
77 ixl_get_default_rss_key(u32 *key)
78 {
79         MPASS(key != NULL);
80
81         u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
82             0x183cfd8c, 0xce880440, 0x580cbc3c,
83             0x35897377, 0x328b25e1, 0x4fa98922,
84             0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
85             0x0, 0x0, 0x0};
86
87         bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
88 }
89
90 /*
91 ** Multiqueue Transmit driver
92 */
93 int
94 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
95 {
96         struct ixl_vsi          *vsi = ifp->if_softc;
97         struct ixl_queue        *que;
98         struct tx_ring          *txr;
99         int                     err, i;
100 #ifdef RSS
101         u32                     bucket_id;
102 #endif
103
104         /*
105         ** Which queue to use:
106         **
107         ** When doing RSS, map it to the same outbound
108         ** queue as the incoming flow would be mapped to.
109         ** If everything is setup correctly, it should be
110         ** the same bucket that the current CPU we're on is.
111         */
112         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
113 #ifdef  RSS
114                 if (rss_hash2bucket(m->m_pkthdr.flowid,
115                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
116                         i = bucket_id % vsi->num_queues;
117                 } else
118 #endif
119                         i = m->m_pkthdr.flowid % vsi->num_queues;
120         } else
121                 i = curcpu % vsi->num_queues;
122
123         que = &vsi->queues[i];
124         txr = &que->txr;
125
126         err = drbr_enqueue(ifp, txr->br, m);
127         if (err)
128                 return (err);
129         if (IXL_TX_TRYLOCK(txr)) {
130                 ixl_mq_start_locked(ifp, txr);
131                 IXL_TX_UNLOCK(txr);
132         } else
133                 taskqueue_enqueue(que->tq, &que->tx_task);
134
135         return (0);
136 }
137
138 int
139 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
140 {
141         struct ixl_queue        *que = txr->que;
142         struct ixl_vsi          *vsi = que->vsi;
143         struct mbuf             *next;
144         int                     err = 0;
145
146
147         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
148             vsi->link_active == 0)
149                 return (ENETDOWN);
150
151         /* Process the transmit queue */
152         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
153                 if ((err = ixl_xmit(que, &next)) != 0) {
154                         if (next == NULL)
155                                 drbr_advance(ifp, txr->br);
156                         else
157                                 drbr_putback(ifp, txr->br, next);
158                         break;
159                 }
160                 drbr_advance(ifp, txr->br);
161                 /* Send a copy of the frame to the BPF listener */
162                 ETHER_BPF_MTAP(ifp, next);
163                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
164                         break;
165         }
166
167         if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
168                 ixl_txeof(que);
169
170         return (err);
171 }
172
173 /*
174  * Called from a taskqueue to drain queued transmit packets.
175  */
176 void
177 ixl_deferred_mq_start(void *arg, int pending)
178 {
179         struct ixl_queue        *que = arg;
180         struct tx_ring          *txr = &que->txr;
181         struct ixl_vsi          *vsi = que->vsi;
182         struct ifnet            *ifp = vsi->ifp;
183         
184         IXL_TX_LOCK(txr);
185         if (!drbr_empty(ifp, txr->br))
186                 ixl_mq_start_locked(ifp, txr);
187         IXL_TX_UNLOCK(txr);
188 }
189
190 /*
191 ** Flush all queue ring buffers
192 */
193 void
194 ixl_qflush(struct ifnet *ifp)
195 {
196         struct ixl_vsi  *vsi = ifp->if_softc;
197
198         for (int i = 0; i < vsi->num_queues; i++) {
199                 struct ixl_queue *que = &vsi->queues[i];
200                 struct tx_ring  *txr = &que->txr;
201                 struct mbuf     *m;
202                 IXL_TX_LOCK(txr);
203                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
204                         m_freem(m);
205                 IXL_TX_UNLOCK(txr);
206         }
207         if_qflush(ifp);
208 }
209
210 /*
211 ** Find mbuf chains passed to the driver 
212 ** that are 'sparse', using more than 8
213 ** mbufs to deliver an mss-size chunk of data
214 */
215 static inline bool
216 ixl_tso_detect_sparse(struct mbuf *mp)
217 {
218         struct mbuf     *m;
219         int             num, mss;
220
221         num = 0;
222         mss = mp->m_pkthdr.tso_segsz;
223
224         /* Exclude first mbuf; assume it contains all headers */
225         for (m = mp->m_next; m != NULL; m = m->m_next) {
226                 if (m == NULL)
227                         break;
228                 num++;
229                 mss -= m->m_len % mp->m_pkthdr.tso_segsz;
230
231                 if (mss < 1) {
232                         if (num > IXL_SPARSE_CHAIN)
233                                 return (true);
234                         num = (mss == 0) ? 0 : 1;
235                         mss += mp->m_pkthdr.tso_segsz;
236                 }
237         }
238
239         return (false);
240 }
241
242
243 /*********************************************************************
244  *
245  *  This routine maps the mbufs to tx descriptors, allowing the
246  *  TX engine to transmit the packets. 
247  *      - return 0 on success, positive on failure
248  *
249  **********************************************************************/
250 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
251
252 static int
253 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
254 {
255         struct ixl_vsi          *vsi = que->vsi;
256         struct i40e_hw          *hw = vsi->hw;
257         struct tx_ring          *txr = &que->txr;
258         struct ixl_tx_buf       *buf;
259         struct i40e_tx_desc     *txd = NULL;
260         struct mbuf             *m_head, *m;
261         int                     i, j, error, nsegs;
262         int                     first, last = 0;
263         u16                     vtag = 0;
264         u32                     cmd, off;
265         bus_dmamap_t            map;
266         bus_dma_tag_t           tag;
267         bus_dma_segment_t       segs[IXL_MAX_TSO_SEGS];
268
269         cmd = off = 0;
270         m_head = *m_headp;
271
272         /*
273          * Important to capture the first descriptor
274          * used because it will contain the index of
275          * the one we tell the hardware to report back
276          */
277         first = txr->next_avail;
278         buf = &txr->buffers[first];
279         map = buf->map;
280         tag = txr->tx_tag;
281
282         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
283                 /* Use larger mapping for TSO */
284                 tag = txr->tso_tag;
285                 if (ixl_tso_detect_sparse(m_head)) {
286                         m = m_defrag(m_head, M_NOWAIT);
287                         if (m == NULL) {
288                                 m_freem(*m_headp);
289                                 *m_headp = NULL;
290                                 return (ENOBUFS);
291                         }
292                         *m_headp = m;
293                 }
294         }
295
296         /*
297          * Map the packet for DMA.
298          */
299         error = bus_dmamap_load_mbuf_sg(tag, map,
300             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
301
302         if (error == EFBIG) {
303                 struct mbuf *m;
304
305                 m = m_defrag(*m_headp, M_NOWAIT);
306                 if (m == NULL) {
307                         que->mbuf_defrag_failed++;
308                         m_freem(*m_headp);
309                         *m_headp = NULL;
310                         return (ENOBUFS);
311                 }
312                 *m_headp = m;
313
314                 /* Try it again */
315                 error = bus_dmamap_load_mbuf_sg(tag, map,
316                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
317
318                 if (error != 0) {
319                         que->tx_dmamap_failed++;
320                         m_freem(*m_headp);
321                         *m_headp = NULL;
322                         return (error);
323                 }
324         } else if (error != 0) {
325                 que->tx_dmamap_failed++;
326                 m_freem(*m_headp);
327                 *m_headp = NULL;
328                 return (error);
329         }
330
331         /* Make certain there are enough descriptors */
332         if (nsegs > txr->avail - 2) {
333                 txr->no_desc++;
334                 error = ENOBUFS;
335                 goto xmit_fail;
336         }
337         m_head = *m_headp;
338
339         /* Set up the TSO/CSUM offload */
340         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
341                 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
342                 if (error)
343                         goto xmit_fail;
344         }
345
346         cmd |= I40E_TX_DESC_CMD_ICRC;
347         /* Grab the VLAN tag */
348         if (m_head->m_flags & M_VLANTAG) {
349                 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
350                 vtag = htole16(m_head->m_pkthdr.ether_vtag);
351         }
352
353         i = txr->next_avail;
354         for (j = 0; j < nsegs; j++) {
355                 bus_size_t seglen;
356
357                 buf = &txr->buffers[i];
358                 buf->tag = tag; /* Keep track of the type tag */
359                 txd = &txr->base[i];
360                 seglen = segs[j].ds_len;
361
362                 txd->buffer_addr = htole64(segs[j].ds_addr);
363                 txd->cmd_type_offset_bsz =
364                     htole64(I40E_TX_DESC_DTYPE_DATA
365                     | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
366                     | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
367                     | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
368                     | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
369
370                 last = i; /* descriptor that will get completion IRQ */
371
372                 if (++i == que->num_desc)
373                         i = 0;
374
375                 buf->m_head = NULL;
376                 buf->eop_index = -1;
377         }
378         /* Set the last descriptor for report */
379         txd->cmd_type_offset_bsz |=
380             htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
381         txr->avail -= nsegs;
382         txr->next_avail = i;
383
384         buf->m_head = m_head;
385         /* Swap the dma map between the first and last descriptor */
386         txr->buffers[first].map = buf->map;
387         buf->map = map;
388         bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
389
390         /* Set the index of the descriptor that will be marked done */
391         buf = &txr->buffers[first];
392         buf->eop_index = last;
393
394         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
395             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
396         /*
397          * Advance the Transmit Descriptor Tail (Tdt), this tells the
398          * hardware that this frame is available to transmit.
399          */
400         ++txr->total_packets;
401         wr32(hw, txr->tail, i);
402
403         /* Mark outstanding work */
404         atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
405         return (0);
406
407 xmit_fail:
408         bus_dmamap_unload(tag, buf->map);
409         return (error);
410 }
411
412
413 /*********************************************************************
414  *
415  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
416  *  the information needed to transmit a packet on the wire. This is
417  *  called only once at attach, setup is done every reset.
418  *
419  **********************************************************************/
420 int
421 ixl_allocate_tx_data(struct ixl_queue *que)
422 {
423         struct tx_ring          *txr = &que->txr;
424         struct ixl_vsi          *vsi = que->vsi;
425         device_t                dev = vsi->dev;
426         struct ixl_tx_buf       *buf;
427         int                     error = 0;
428
429         /*
430          * Setup DMA descriptor areas.
431          */
432         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),           /* parent */
433                                1, 0,                    /* alignment, bounds */
434                                BUS_SPACE_MAXADDR,       /* lowaddr */
435                                BUS_SPACE_MAXADDR,       /* highaddr */
436                                NULL, NULL,              /* filter, filterarg */
437                                IXL_TSO_SIZE,            /* maxsize */
438                                IXL_MAX_TX_SEGS,         /* nsegments */
439                                PAGE_SIZE,               /* maxsegsize */
440                                0,                       /* flags */
441                                NULL,                    /* lockfunc */
442                                NULL,                    /* lockfuncarg */
443                                &txr->tx_tag))) {
444                 device_printf(dev,"Unable to allocate TX DMA tag\n");
445                 goto fail;
446         }
447
448         /* Make a special tag for TSO */
449         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),           /* parent */
450                                1, 0,                    /* alignment, bounds */
451                                BUS_SPACE_MAXADDR,       /* lowaddr */
452                                BUS_SPACE_MAXADDR,       /* highaddr */
453                                NULL, NULL,              /* filter, filterarg */
454                                IXL_TSO_SIZE,            /* maxsize */
455                                IXL_MAX_TSO_SEGS,        /* nsegments */
456                                PAGE_SIZE,               /* maxsegsize */
457                                0,                       /* flags */
458                                NULL,                    /* lockfunc */
459                                NULL,                    /* lockfuncarg */
460                                &txr->tso_tag))) {
461                 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
462                 goto fail;
463         }
464
465         if (!(txr->buffers =
466             (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
467             que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
468                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
469                 error = ENOMEM;
470                 goto fail;
471         }
472
473         /* Create the descriptor buffer default dma maps */
474         buf = txr->buffers;
475         for (int i = 0; i < que->num_desc; i++, buf++) {
476                 buf->tag = txr->tx_tag;
477                 error = bus_dmamap_create(buf->tag, 0, &buf->map);
478                 if (error != 0) {
479                         device_printf(dev, "Unable to create TX DMA map\n");
480                         goto fail;
481                 }
482         }
483 fail:
484         return (error);
485 }
486
487
488 /*********************************************************************
489  *
490  *  (Re)Initialize a queue transmit ring.
491  *      - called by init, it clears the descriptor ring,
492  *        and frees any stale mbufs 
493  *
494  **********************************************************************/
495 void
496 ixl_init_tx_ring(struct ixl_queue *que)
497 {
498 #ifdef DEV_NETMAP
499         struct netmap_adapter *na = NA(que->vsi->ifp);
500         struct netmap_slot *slot;
501 #endif /* DEV_NETMAP */
502         struct tx_ring          *txr = &que->txr;
503         struct ixl_tx_buf       *buf;
504
505         /* Clear the old ring contents */
506         IXL_TX_LOCK(txr);
507
508 #ifdef DEV_NETMAP
509         /*
510          * (under lock): if in netmap mode, do some consistency
511          * checks and set slot to entry 0 of the netmap ring.
512          */
513         slot = netmap_reset(na, NR_TX, que->me, 0);
514 #endif /* DEV_NETMAP */
515
516         bzero((void *)txr->base,
517               (sizeof(struct i40e_tx_desc)) * que->num_desc);
518
519         /* Reset indices */
520         txr->next_avail = 0;
521         txr->next_to_clean = 0;
522
523         /* Reset watchdog status */
524         txr->watchdog_timer = 0;
525
526 #ifdef IXL_FDIR
527         /* Initialize flow director */
528         txr->atr_rate = ixl_atr_rate;
529         txr->atr_count = 0;
530 #endif
531         /* Free any existing tx mbufs. */
532         buf = txr->buffers;
533         for (int i = 0; i < que->num_desc; i++, buf++) {
534                 if (buf->m_head != NULL) {
535                         bus_dmamap_sync(buf->tag, buf->map,
536                             BUS_DMASYNC_POSTWRITE);
537                         bus_dmamap_unload(buf->tag, buf->map);
538                         m_freem(buf->m_head);
539                         buf->m_head = NULL;
540                 }
541 #ifdef DEV_NETMAP
542                 /*
543                  * In netmap mode, set the map for the packet buffer.
544                  * NOTE: Some drivers (not this one) also need to set
545                  * the physical buffer address in the NIC ring.
546                  * netmap_idx_n2k() maps a nic index, i, into the corresponding
547                  * netmap slot index, si
548                  */
549                 if (slot) {
550                         int si = netmap_idx_n2k(&na->tx_rings[que->me], i);
551                         netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
552                 }
553 #endif /* DEV_NETMAP */
554                 /* Clear the EOP index */
555                 buf->eop_index = -1;
556         }
557
558         /* Set number of descriptors available */
559         txr->avail = que->num_desc;
560
561         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
562             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
563         IXL_TX_UNLOCK(txr);
564 }
565
566
567 /*********************************************************************
568  *
569  *  Free transmit ring related data structures.
570  *
571  **********************************************************************/
572 void
573 ixl_free_que_tx(struct ixl_queue *que)
574 {
575         struct tx_ring *txr = &que->txr;
576         struct ixl_tx_buf *buf;
577
578         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
579
580         for (int i = 0; i < que->num_desc; i++) {
581                 buf = &txr->buffers[i];
582                 if (buf->m_head != NULL) {
583                         bus_dmamap_sync(buf->tag, buf->map,
584                             BUS_DMASYNC_POSTWRITE);
585                         bus_dmamap_unload(buf->tag,
586                             buf->map);
587                         m_freem(buf->m_head);
588                         buf->m_head = NULL;
589                         if (buf->map != NULL) {
590                                 bus_dmamap_destroy(buf->tag,
591                                     buf->map);
592                                 buf->map = NULL;
593                         }
594                 } else if (buf->map != NULL) {
595                         bus_dmamap_unload(buf->tag,
596                             buf->map);
597                         bus_dmamap_destroy(buf->tag,
598                             buf->map);
599                         buf->map = NULL;
600                 }
601         }
602         if (txr->br != NULL)
603                 buf_ring_free(txr->br, M_DEVBUF);
604         if (txr->buffers != NULL) {
605                 free(txr->buffers, M_DEVBUF);
606                 txr->buffers = NULL;
607         }
608         if (txr->tx_tag != NULL) {
609                 bus_dma_tag_destroy(txr->tx_tag);
610                 txr->tx_tag = NULL;
611         }
612         if (txr->tso_tag != NULL) {
613                 bus_dma_tag_destroy(txr->tso_tag);
614                 txr->tso_tag = NULL;
615         }
616
617         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
618         return;
619 }
620
621 /*********************************************************************
622  *
623  *  Setup descriptor for hw offloads 
624  *
625  **********************************************************************/
626
627 static int
628 ixl_tx_setup_offload(struct ixl_queue *que,
629     struct mbuf *mp, u32 *cmd, u32 *off)
630 {
631         struct ether_vlan_header        *eh;
632 #ifdef INET
633         struct ip                       *ip = NULL;
634 #endif
635         struct tcphdr                   *th = NULL;
636 #ifdef INET6
637         struct ip6_hdr                  *ip6;
638 #endif
639         int                             elen, ip_hlen = 0, tcp_hlen;
640         u16                             etype;
641         u8                              ipproto = 0;
642         bool                            tso = FALSE;
643
644         /* Set up the TSO context descriptor if required */
645         if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
646                 tso = ixl_tso_setup(que, mp);
647                 if (tso)
648                         ++que->tso;
649                 else
650                         return (ENXIO);
651         }
652
653         /*
654          * Determine where frame payload starts.
655          * Jump over vlan headers if already present,
656          * helpful for QinQ too.
657          */
658         eh = mtod(mp, struct ether_vlan_header *);
659         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
660                 etype = ntohs(eh->evl_proto);
661                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
662         } else {
663                 etype = ntohs(eh->evl_encap_proto);
664                 elen = ETHER_HDR_LEN;
665         }
666
667         switch (etype) {
668 #ifdef INET
669                 case ETHERTYPE_IP:
670                         ip = (struct ip *)(mp->m_data + elen);
671                         ip_hlen = ip->ip_hl << 2;
672                         ipproto = ip->ip_p;
673                         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
674                         /* The IP checksum must be recalculated with TSO */
675                         if (tso)
676                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
677                         else
678                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
679                         break;
680 #endif
681 #ifdef INET6
682                 case ETHERTYPE_IPV6:
683                         ip6 = (struct ip6_hdr *)(mp->m_data + elen);
684                         ip_hlen = sizeof(struct ip6_hdr);
685                         ipproto = ip6->ip6_nxt;
686                         th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
687                         *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
688                         break;
689 #endif
690                 default:
691                         break;
692         }
693
694         *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
695         *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
696
697         switch (ipproto) {
698                 case IPPROTO_TCP:
699                         tcp_hlen = th->th_off << 2;
700                         if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
701                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
702                                 *off |= (tcp_hlen >> 2) <<
703                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
704                         }
705 #ifdef IXL_FDIR
706                         ixl_atr(que, th, etype);
707 #endif
708                         break;
709                 case IPPROTO_UDP:
710                         if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
711                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
712                                 *off |= (sizeof(struct udphdr) >> 2) <<
713                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
714                         }
715                         break;
716
717                 case IPPROTO_SCTP:
718                         if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
719                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
720                                 *off |= (sizeof(struct sctphdr) >> 2) <<
721                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
722                         }
723                         /* Fall Thru */
724                 default:
725                         break;
726         }
727
728         return (0);
729 }
730
731
732 /**********************************************************************
733  *
734  *  Setup context for hardware segmentation offload (TSO)
735  *
736  **********************************************************************/
737 static bool
738 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
739 {
740         struct tx_ring                  *txr = &que->txr;
741         struct i40e_tx_context_desc     *TXD;
742         struct ixl_tx_buf               *buf;
743         u32                             cmd, mss, type, tsolen;
744         u16                             etype;
745         int                             idx, elen, ip_hlen, tcp_hlen;
746         struct ether_vlan_header        *eh;
747 #ifdef INET
748         struct ip                       *ip;
749 #endif
750 #ifdef INET6
751         struct ip6_hdr                  *ip6;
752 #endif
753 #if defined(INET6) || defined(INET)
754         struct tcphdr                   *th;
755 #endif
756         u64                             type_cmd_tso_mss;
757
758         /*
759          * Determine where frame payload starts.
760          * Jump over vlan headers if already present
761          */
762         eh = mtod(mp, struct ether_vlan_header *);
763         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
764                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
765                 etype = eh->evl_proto;
766         } else {
767                 elen = ETHER_HDR_LEN;
768                 etype = eh->evl_encap_proto;
769         }
770
771         switch (ntohs(etype)) {
772 #ifdef INET6
773         case ETHERTYPE_IPV6:
774                 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
775                 if (ip6->ip6_nxt != IPPROTO_TCP)
776                         return (ENXIO);
777                 ip_hlen = sizeof(struct ip6_hdr);
778                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
779                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
780                 tcp_hlen = th->th_off << 2;
781                 /*
782                  * The corresponding flag is set by the stack in the IPv4
783                  * TSO case, but not in IPv6 (at least in FreeBSD 10.2).
784                  * So, set it here because the rest of the flow requires it.
785                  */
786                 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
787                 break;
788 #endif
789 #ifdef INET
790         case ETHERTYPE_IP:
791                 ip = (struct ip *)(mp->m_data + elen);
792                 if (ip->ip_p != IPPROTO_TCP)
793                         return (ENXIO);
794                 ip->ip_sum = 0;
795                 ip_hlen = ip->ip_hl << 2;
796                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
797                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
798                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
799                 tcp_hlen = th->th_off << 2;
800                 break;
801 #endif
802         default:
803                 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
804                     __func__, ntohs(etype));
805                 return FALSE;
806         }
807
808         /* Ensure we have at least the IP+TCP header in the first mbuf. */
809         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
810                 return FALSE;
811
812         idx = txr->next_avail;
813         buf = &txr->buffers[idx];
814         TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
815         tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
816
817         type = I40E_TX_DESC_DTYPE_CONTEXT;
818         cmd = I40E_TX_CTX_DESC_TSO;
819         /* TSO MSS must not be less than 64 */
820         if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) {
821                 que->mss_too_small++;
822                 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS;
823         }
824         mss = mp->m_pkthdr.tso_segsz;
825
826         type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
827             ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
828             ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
829             ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
830         TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
831
832         TXD->tunneling_params = htole32(0);
833         buf->m_head = NULL;
834         buf->eop_index = -1;
835
836         if (++idx == que->num_desc)
837                 idx = 0;
838
839         txr->avail--;
840         txr->next_avail = idx;
841
842         return TRUE;
843 }
844
845 /*             
846 ** ixl_get_tx_head - Retrieve the value from the 
847 **    location the HW records its HEAD index
848 */
849 static inline u32
850 ixl_get_tx_head(struct ixl_queue *que)
851 {
852         struct tx_ring  *txr = &que->txr;
853         void *head = &txr->base[que->num_desc];
854         return LE32_TO_CPU(*(volatile __le32 *)head);
855 }
856
857 /**********************************************************************
858  *
859  *  Examine each tx_buffer in the used queue. If the hardware is done
860  *  processing the packet then free associated resources. The
861  *  tx_buffer is put back on the free queue.
862  *
863  **********************************************************************/
864 bool
865 ixl_txeof(struct ixl_queue *que)
866 {
867         struct tx_ring          *txr = &que->txr;
868         u32                     first, last, head, done, processed;
869         struct ixl_tx_buf       *buf;
870         struct i40e_tx_desc     *tx_desc, *eop_desc;
871
872
873         mtx_assert(&txr->mtx, MA_OWNED);
874
875 #ifdef DEV_NETMAP
876         // XXX todo: implement moderation
877         if (netmap_tx_irq(que->vsi->ifp, que->me))
878                 return FALSE;
879 #endif /* DEF_NETMAP */
880
881         /* These are not the descriptors you seek, move along :) */
882         if (txr->avail == que->num_desc) {
883                 atomic_store_rel_32(&txr->watchdog_timer, 0);
884                 return FALSE;
885         }
886
887         processed = 0;
888         first = txr->next_to_clean;
889         buf = &txr->buffers[first];
890         tx_desc = (struct i40e_tx_desc *)&txr->base[first];
891         last = buf->eop_index;
892         if (last == -1)
893                 return FALSE;
894         eop_desc = (struct i40e_tx_desc *)&txr->base[last];
895
896         /* Get the Head WB value */
897         head = ixl_get_tx_head(que);
898
899         /*
900         ** Get the index of the first descriptor
901         ** BEYOND the EOP and call that 'done'.
902         ** I do this so the comparison in the
903         ** inner while loop below can be simple
904         */
905         if (++last == que->num_desc) last = 0;
906         done = last;
907
908         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
909             BUS_DMASYNC_POSTREAD);
910         /*
911         ** The HEAD index of the ring is written in a 
912         ** defined location, this rather than a done bit
913         ** is what is used to keep track of what must be
914         ** 'cleaned'.
915         */
916         while (first != head) {
917                 /* We clean the range of the packet */
918                 while (first != done) {
919                         ++txr->avail;
920                         ++processed;
921
922                         if (buf->m_head) {
923                                 txr->bytes += /* for ITR adjustment */
924                                     buf->m_head->m_pkthdr.len;
925                                 txr->tx_bytes += /* for TX stats */
926                                     buf->m_head->m_pkthdr.len;
927                                 bus_dmamap_sync(buf->tag,
928                                     buf->map,
929                                     BUS_DMASYNC_POSTWRITE);
930                                 bus_dmamap_unload(buf->tag,
931                                     buf->map);
932                                 m_freem(buf->m_head);
933                                 buf->m_head = NULL;
934                         }
935                         buf->eop_index = -1;
936
937                         if (++first == que->num_desc)
938                                 first = 0;
939
940                         buf = &txr->buffers[first];
941                         tx_desc = &txr->base[first];
942                 }
943                 ++txr->packets;
944                 /* See if there is more work now */
945                 last = buf->eop_index;
946                 if (last != -1) {
947                         eop_desc = &txr->base[last];
948                         /* Get next done point */
949                         if (++last == que->num_desc) last = 0;
950                         done = last;
951                 } else
952                         break;
953         }
954         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
955             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
956
957         txr->next_to_clean = first;
958
959
960         /*
961          * If there are no pending descriptors, clear the timeout.
962          */
963         if (txr->avail == que->num_desc) {
964                 atomic_store_rel_32(&txr->watchdog_timer, 0);
965                 return FALSE;
966         }
967
968         return TRUE;
969 }
970
971 /*********************************************************************
972  *
973  *  Refresh mbuf buffers for RX descriptor rings
974  *   - now keeps its own state so discards due to resource
975  *     exhaustion are unnecessary, if an mbuf cannot be obtained
976  *     it just returns, keeping its placeholder, thus it can simply
977  *     be recalled to try again.
978  *
979  **********************************************************************/
980 static void
981 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
982 {
983         struct ixl_vsi          *vsi = que->vsi;
984         struct rx_ring          *rxr = &que->rxr;
985         bus_dma_segment_t       hseg[1];
986         bus_dma_segment_t       pseg[1];
987         struct ixl_rx_buf       *buf;
988         struct mbuf             *mh, *mp;
989         int                     i, j, nsegs, error;
990         bool                    refreshed = FALSE;
991
992         i = j = rxr->next_refresh;
993         /* Control the loop with one beyond */
994         if (++j == que->num_desc)
995                 j = 0;
996
997         while (j != limit) {
998                 buf = &rxr->buffers[i];
999                 if (rxr->hdr_split == FALSE)
1000                         goto no_split;
1001
1002                 if (buf->m_head == NULL) {
1003                         mh = m_gethdr(M_NOWAIT, MT_DATA);
1004                         if (mh == NULL)
1005                                 goto update;
1006                 } else
1007                         mh = buf->m_head;
1008
1009                 mh->m_pkthdr.len = mh->m_len = MHLEN;
1010                 mh->m_len = MHLEN;
1011                 mh->m_flags |= M_PKTHDR;
1012                 /* Get the memory mapping */
1013                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1014                     buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1015                 if (error != 0) {
1016                         printf("Refresh mbufs: hdr dmamap load"
1017                             " failure - %d\n", error);
1018                         m_free(mh);
1019                         buf->m_head = NULL;
1020                         goto update;
1021                 }
1022                 buf->m_head = mh;
1023                 bus_dmamap_sync(rxr->htag, buf->hmap,
1024                     BUS_DMASYNC_PREREAD);
1025                 rxr->base[i].read.hdr_addr =
1026                    htole64(hseg[0].ds_addr);
1027
1028 no_split:
1029                 if (buf->m_pack == NULL) {
1030                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1031                             M_PKTHDR, rxr->mbuf_sz);
1032                         if (mp == NULL)
1033                                 goto update;
1034                 } else
1035                         mp = buf->m_pack;
1036
1037                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1038                 /* Get the memory mapping */
1039                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1040                     buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1041                 if (error != 0) {
1042                         printf("Refresh mbufs: payload dmamap load"
1043                             " failure - %d\n", error);
1044                         m_free(mp);
1045                         buf->m_pack = NULL;
1046                         goto update;
1047                 }
1048                 buf->m_pack = mp;
1049                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1050                     BUS_DMASYNC_PREREAD);
1051                 rxr->base[i].read.pkt_addr =
1052                    htole64(pseg[0].ds_addr);
1053                 /* Used only when doing header split */
1054                 rxr->base[i].read.hdr_addr = 0;
1055
1056                 refreshed = TRUE;
1057                 /* Next is precalculated */
1058                 i = j;
1059                 rxr->next_refresh = i;
1060                 if (++j == que->num_desc)
1061                         j = 0;
1062         }
1063 update:
1064         if (refreshed) /* Update hardware tail index */
1065                 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1066         return;
1067 }
1068
1069
1070 /*********************************************************************
1071  *
1072  *  Allocate memory for rx_buffer structures. Since we use one
1073  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1074  *  that we'll need is equal to the number of receive descriptors
1075  *  that we've defined.
1076  *
1077  **********************************************************************/
1078 int
1079 ixl_allocate_rx_data(struct ixl_queue *que)
1080 {
1081         struct rx_ring          *rxr = &que->rxr;
1082         struct ixl_vsi          *vsi = que->vsi;
1083         device_t                dev = vsi->dev;
1084         struct ixl_rx_buf       *buf;
1085         int                     i, bsize, error;
1086
1087         bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1088         if (!(rxr->buffers =
1089             (struct ixl_rx_buf *) malloc(bsize,
1090             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1091                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1092                 error = ENOMEM;
1093                 return (error);
1094         }
1095
1096         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1097                                    1, 0,        /* alignment, bounds */
1098                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1099                                    BUS_SPACE_MAXADDR,   /* highaddr */
1100                                    NULL, NULL,          /* filter, filterarg */
1101                                    MSIZE,               /* maxsize */
1102                                    1,                   /* nsegments */
1103                                    MSIZE,               /* maxsegsize */
1104                                    0,                   /* flags */
1105                                    NULL,                /* lockfunc */
1106                                    NULL,                /* lockfuncarg */
1107                                    &rxr->htag))) {
1108                 device_printf(dev, "Unable to create RX DMA htag\n");
1109                 return (error);
1110         }
1111
1112         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1113                                    1, 0,        /* alignment, bounds */
1114                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1115                                    BUS_SPACE_MAXADDR,   /* highaddr */
1116                                    NULL, NULL,          /* filter, filterarg */
1117                                    MJUM16BYTES,         /* maxsize */
1118                                    1,                   /* nsegments */
1119                                    MJUM16BYTES,         /* maxsegsize */
1120                                    0,                   /* flags */
1121                                    NULL,                /* lockfunc */
1122                                    NULL,                /* lockfuncarg */
1123                                    &rxr->ptag))) {
1124                 device_printf(dev, "Unable to create RX DMA ptag\n");
1125                 return (error);
1126         }
1127
1128         for (i = 0; i < que->num_desc; i++) {
1129                 buf = &rxr->buffers[i];
1130                 error = bus_dmamap_create(rxr->htag,
1131                     BUS_DMA_NOWAIT, &buf->hmap);
1132                 if (error) {
1133                         device_printf(dev, "Unable to create RX head map\n");
1134                         break;
1135                 }
1136                 error = bus_dmamap_create(rxr->ptag,
1137                     BUS_DMA_NOWAIT, &buf->pmap);
1138                 if (error) {
1139                         device_printf(dev, "Unable to create RX pkt map\n");
1140                         break;
1141                 }
1142         }
1143
1144         return (error);
1145 }
1146
1147
1148 /*********************************************************************
1149  *
1150  *  (Re)Initialize the queue receive ring and its buffers.
1151  *
1152  **********************************************************************/
1153 int
1154 ixl_init_rx_ring(struct ixl_queue *que)
1155 {
1156         struct  rx_ring         *rxr = &que->rxr;
1157         struct ixl_vsi          *vsi = que->vsi;
1158 #if defined(INET6) || defined(INET)
1159         struct ifnet            *ifp = vsi->ifp;
1160         struct lro_ctrl         *lro = &rxr->lro;
1161 #endif
1162         struct ixl_rx_buf       *buf;
1163         bus_dma_segment_t       pseg[1], hseg[1];
1164         int                     rsize, nsegs, error = 0;
1165 #ifdef DEV_NETMAP
1166         struct netmap_adapter *na = NA(que->vsi->ifp);
1167         struct netmap_slot *slot;
1168 #endif /* DEV_NETMAP */
1169
1170         IXL_RX_LOCK(rxr);
1171 #ifdef DEV_NETMAP
1172         /* same as in ixl_init_tx_ring() */
1173         slot = netmap_reset(na, NR_RX, que->me, 0);
1174 #endif /* DEV_NETMAP */
1175         /* Clear the ring contents */
1176         rsize = roundup2(que->num_desc *
1177             sizeof(union i40e_rx_desc), DBA_ALIGN);
1178         bzero((void *)rxr->base, rsize);
1179         /* Cleanup any existing buffers */
1180         for (int i = 0; i < que->num_desc; i++) {
1181                 buf = &rxr->buffers[i];
1182                 if (buf->m_head != NULL) {
1183                         bus_dmamap_sync(rxr->htag, buf->hmap,
1184                             BUS_DMASYNC_POSTREAD);
1185                         bus_dmamap_unload(rxr->htag, buf->hmap);
1186                         buf->m_head->m_flags |= M_PKTHDR;
1187                         m_freem(buf->m_head);
1188                 }
1189                 if (buf->m_pack != NULL) {
1190                         bus_dmamap_sync(rxr->ptag, buf->pmap,
1191                             BUS_DMASYNC_POSTREAD);
1192                         bus_dmamap_unload(rxr->ptag, buf->pmap);
1193                         buf->m_pack->m_flags |= M_PKTHDR;
1194                         m_freem(buf->m_pack);
1195                 }
1196                 buf->m_head = NULL;
1197                 buf->m_pack = NULL;
1198         }
1199
1200         /* header split is off */
1201         rxr->hdr_split = FALSE;
1202
1203         /* Now replenish the mbufs */
1204         for (int j = 0; j != que->num_desc; ++j) {
1205                 struct mbuf     *mh, *mp;
1206
1207                 buf = &rxr->buffers[j];
1208 #ifdef DEV_NETMAP
1209                 /*
1210                  * In netmap mode, fill the map and set the buffer
1211                  * address in the NIC ring, considering the offset
1212                  * between the netmap and NIC rings (see comment in
1213                  * ixgbe_setup_transmit_ring() ). No need to allocate
1214                  * an mbuf, so end the block with a continue;
1215                  */
1216                 if (slot) {
1217                         int sj = netmap_idx_n2k(&na->rx_rings[que->me], j);
1218                         uint64_t paddr;
1219                         void *addr;
1220
1221                         addr = PNMB(na, slot + sj, &paddr);
1222                         netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1223                         /* Update descriptor and the cached value */
1224                         rxr->base[j].read.pkt_addr = htole64(paddr);
1225                         rxr->base[j].read.hdr_addr = 0;
1226                         continue;
1227                 }
1228 #endif /* DEV_NETMAP */
1229                 /*
1230                 ** Don't allocate mbufs if not
1231                 ** doing header split, its wasteful
1232                 */ 
1233                 if (rxr->hdr_split == FALSE)
1234                         goto skip_head;
1235
1236                 /* First the header */
1237                 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1238                 if (buf->m_head == NULL) {
1239                         error = ENOBUFS;
1240                         goto fail;
1241                 }
1242                 m_adj(buf->m_head, ETHER_ALIGN);
1243                 mh = buf->m_head;
1244                 mh->m_len = mh->m_pkthdr.len = MHLEN;
1245                 mh->m_flags |= M_PKTHDR;
1246                 /* Get the memory mapping */
1247                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1248                     buf->hmap, buf->m_head, hseg,
1249                     &nsegs, BUS_DMA_NOWAIT);
1250                 if (error != 0) /* Nothing elegant to do here */
1251                         goto fail;
1252                 bus_dmamap_sync(rxr->htag,
1253                     buf->hmap, BUS_DMASYNC_PREREAD);
1254                 /* Update descriptor */
1255                 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1256
1257 skip_head:
1258                 /* Now the payload cluster */
1259                 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1260                     M_PKTHDR, rxr->mbuf_sz);
1261                 if (buf->m_pack == NULL) {
1262                         error = ENOBUFS;
1263                         goto fail;
1264                 }
1265                 mp = buf->m_pack;
1266                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1267                 /* Get the memory mapping */
1268                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1269                     buf->pmap, mp, pseg,
1270                     &nsegs, BUS_DMA_NOWAIT);
1271                 if (error != 0)
1272                         goto fail;
1273                 bus_dmamap_sync(rxr->ptag,
1274                     buf->pmap, BUS_DMASYNC_PREREAD);
1275                 /* Update descriptor */
1276                 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1277                 rxr->base[j].read.hdr_addr = 0;
1278         }
1279
1280
1281         /* Setup our descriptor indices */
1282         rxr->next_check = 0;
1283         rxr->next_refresh = 0;
1284         rxr->lro_enabled = FALSE;
1285         rxr->split = 0;
1286         rxr->bytes = 0;
1287         rxr->discard = FALSE;
1288
1289         wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1290         ixl_flush(vsi->hw);
1291
1292 #if defined(INET6) || defined(INET)
1293         /*
1294         ** Now set up the LRO interface:
1295         */
1296         if (ifp->if_capenable & IFCAP_LRO) {
1297                 int err = tcp_lro_init(lro);
1298                 if (err) {
1299                         if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1300                         goto fail;
1301                 }
1302                 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1303                 rxr->lro_enabled = TRUE;
1304                 lro->ifp = vsi->ifp;
1305         }
1306 #endif
1307
1308         bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1309             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1310
1311 fail:
1312         IXL_RX_UNLOCK(rxr);
1313         return (error);
1314 }
1315
1316
1317 /*********************************************************************
1318  *
1319  *  Free station receive ring data structures
1320  *
1321  **********************************************************************/
1322 void
1323 ixl_free_que_rx(struct ixl_queue *que)
1324 {
1325         struct rx_ring          *rxr = &que->rxr;
1326         struct ixl_rx_buf       *buf;
1327
1328         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1329
1330         /* Cleanup any existing buffers */
1331         if (rxr->buffers != NULL) {
1332                 for (int i = 0; i < que->num_desc; i++) {
1333                         buf = &rxr->buffers[i];
1334                         if (buf->m_head != NULL) {
1335                                 bus_dmamap_sync(rxr->htag, buf->hmap,
1336                                     BUS_DMASYNC_POSTREAD);
1337                                 bus_dmamap_unload(rxr->htag, buf->hmap);
1338                                 buf->m_head->m_flags |= M_PKTHDR;
1339                                 m_freem(buf->m_head);
1340                         }
1341                         if (buf->m_pack != NULL) {
1342                                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1343                                     BUS_DMASYNC_POSTREAD);
1344                                 bus_dmamap_unload(rxr->ptag, buf->pmap);
1345                                 buf->m_pack->m_flags |= M_PKTHDR;
1346                                 m_freem(buf->m_pack);
1347                         }
1348                         buf->m_head = NULL;
1349                         buf->m_pack = NULL;
1350                         if (buf->hmap != NULL) {
1351                                 bus_dmamap_destroy(rxr->htag, buf->hmap);
1352                                 buf->hmap = NULL;
1353                         }
1354                         if (buf->pmap != NULL) {
1355                                 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1356                                 buf->pmap = NULL;
1357                         }
1358                 }
1359                 if (rxr->buffers != NULL) {
1360                         free(rxr->buffers, M_DEVBUF);
1361                         rxr->buffers = NULL;
1362                 }
1363         }
1364
1365         if (rxr->htag != NULL) {
1366                 bus_dma_tag_destroy(rxr->htag);
1367                 rxr->htag = NULL;
1368         }
1369         if (rxr->ptag != NULL) {
1370                 bus_dma_tag_destroy(rxr->ptag);
1371                 rxr->ptag = NULL;
1372         }
1373
1374         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1375         return;
1376 }
1377
1378 static inline void
1379 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1380 {
1381
1382 #if defined(INET6) || defined(INET)
1383         /*
1384          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1385          * should be computed by hardware. Also it should not have VLAN tag in
1386          * ethernet header.
1387          */
1388         if (rxr->lro_enabled &&
1389             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1390             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1391             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1392                 /*
1393                  * Send to the stack if:
1394                  **  - LRO not enabled, or
1395                  **  - no LRO resources, or
1396                  **  - lro enqueue fails
1397                  */
1398                 if (rxr->lro.lro_cnt != 0)
1399                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1400                                 return;
1401         }
1402 #endif
1403         (*ifp->if_input)(ifp, m);
1404 }
1405
1406
1407 static inline void
1408 ixl_rx_discard(struct rx_ring *rxr, int i)
1409 {
1410         struct ixl_rx_buf       *rbuf;
1411
1412         rbuf = &rxr->buffers[i];
1413
1414         if (rbuf->fmp != NULL) {/* Partial chain ? */
1415                 rbuf->fmp->m_flags |= M_PKTHDR;
1416                 m_freem(rbuf->fmp);
1417                 rbuf->fmp = NULL;
1418         }
1419
1420         /*
1421         ** With advanced descriptors the writeback
1422         ** clobbers the buffer addrs, so its easier
1423         ** to just free the existing mbufs and take
1424         ** the normal refresh path to get new buffers
1425         ** and mapping.
1426         */
1427         if (rbuf->m_head) {
1428                 m_free(rbuf->m_head);
1429                 rbuf->m_head = NULL;
1430         }
1431  
1432         if (rbuf->m_pack) {
1433                 m_free(rbuf->m_pack);
1434                 rbuf->m_pack = NULL;
1435         }
1436
1437         return;
1438 }
1439
1440 #ifdef RSS
1441 /*
1442 ** i40e_ptype_to_hash: parse the packet type
1443 ** to determine the appropriate hash.
1444 */
1445 static inline int
1446 ixl_ptype_to_hash(u8 ptype)
1447 {
1448         struct i40e_rx_ptype_decoded    decoded;
1449         u8                              ex = 0;
1450
1451         decoded = decode_rx_desc_ptype(ptype);
1452         ex = decoded.outer_frag;
1453
1454         if (!decoded.known)
1455                 return M_HASHTYPE_OPAQUE_HASH;
1456
1457         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
1458                 return M_HASHTYPE_OPAQUE_HASH;
1459
1460         /* Note: anything that gets to this point is IP */
1461         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
1462                 switch (decoded.inner_prot) {
1463                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1464                                 if (ex)
1465                                         return M_HASHTYPE_RSS_TCP_IPV6_EX;
1466                                 else
1467                                         return M_HASHTYPE_RSS_TCP_IPV6;
1468                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1469                                 if (ex)
1470                                         return M_HASHTYPE_RSS_UDP_IPV6_EX;
1471                                 else
1472                                         return M_HASHTYPE_RSS_UDP_IPV6;
1473                         default:
1474                                 if (ex)
1475                                         return M_HASHTYPE_RSS_IPV6_EX;
1476                                 else
1477                                         return M_HASHTYPE_RSS_IPV6;
1478                 }
1479         }
1480         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
1481                 switch (decoded.inner_prot) {
1482                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1483                                         return M_HASHTYPE_RSS_TCP_IPV4;
1484                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1485                                 if (ex)
1486                                         return M_HASHTYPE_RSS_UDP_IPV4_EX;
1487                                 else
1488                                         return M_HASHTYPE_RSS_UDP_IPV4;
1489                         default:
1490                                         return M_HASHTYPE_RSS_IPV4;
1491                 }
1492         }
1493         /* We should never get here!! */
1494         return M_HASHTYPE_OPAQUE_HASH;
1495 }
1496 #endif /* RSS */
1497
1498 /*********************************************************************
1499  *
1500  *  This routine executes in interrupt context. It replenishes
1501  *  the mbufs in the descriptor and sends data which has been
1502  *  dma'ed into host memory to upper layer.
1503  *
1504  *  We loop at most count times if count is > 0, or until done if
1505  *  count < 0.
1506  *
1507  *  Return TRUE for more work, FALSE for all clean.
1508  *********************************************************************/
1509 bool
1510 ixl_rxeof(struct ixl_queue *que, int count)
1511 {
1512         struct ixl_vsi          *vsi = que->vsi;
1513         struct rx_ring          *rxr = &que->rxr;
1514         struct ifnet            *ifp = vsi->ifp;
1515 #if defined(INET6) || defined(INET)
1516         struct lro_ctrl         *lro = &rxr->lro;
1517 #endif
1518         int                     i, nextp, processed = 0;
1519         union i40e_rx_desc      *cur;
1520         struct ixl_rx_buf       *rbuf, *nbuf;
1521
1522
1523         IXL_RX_LOCK(rxr);
1524
1525 #ifdef DEV_NETMAP
1526         if (netmap_rx_irq(ifp, que->me, &count)) {
1527                 IXL_RX_UNLOCK(rxr);
1528                 return (FALSE);
1529         }
1530 #endif /* DEV_NETMAP */
1531
1532         for (i = rxr->next_check; count != 0;) {
1533                 struct mbuf     *sendmp, *mh, *mp;
1534                 u32             status, error;
1535                 u16             hlen, plen, vtag;
1536                 u64             qword;
1537                 u8              ptype;
1538                 bool            eop;
1539  
1540                 /* Sync the ring. */
1541                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1542                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1543
1544                 cur = &rxr->base[i];
1545                 qword = le64toh(cur->wb.qword1.status_error_len);
1546                 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1547                     >> I40E_RXD_QW1_STATUS_SHIFT;
1548                 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1549                     >> I40E_RXD_QW1_ERROR_SHIFT;
1550                 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1551                     >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1552                 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1553                     >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1554                 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1555                     >> I40E_RXD_QW1_PTYPE_SHIFT;
1556
1557                 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1558                         ++rxr->not_done;
1559                         break;
1560                 }
1561                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1562                         break;
1563
1564                 count--;
1565                 sendmp = NULL;
1566                 nbuf = NULL;
1567                 cur->wb.qword1.status_error_len = 0;
1568                 rbuf = &rxr->buffers[i];
1569                 mh = rbuf->m_head;
1570                 mp = rbuf->m_pack;
1571                 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1572                 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1573                         vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1574                 else
1575                         vtag = 0;
1576
1577                 /* Remove device access to the rx buffers. */
1578                 if (rbuf->m_head != NULL) {
1579                         bus_dmamap_sync(rxr->htag, rbuf->hmap,
1580                             BUS_DMASYNC_POSTREAD);
1581                         bus_dmamap_unload(rxr->htag, rbuf->hmap);
1582                 }
1583                 if (rbuf->m_pack != NULL) {
1584                         bus_dmamap_sync(rxr->ptag, rbuf->pmap,
1585                             BUS_DMASYNC_POSTREAD);
1586                         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1587                 }
1588
1589                 /*
1590                 ** Make sure bad packets are discarded,
1591                 ** note that only EOP descriptor has valid
1592                 ** error results.
1593                 */
1594                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1595                         rxr->desc_errs++;
1596                         ixl_rx_discard(rxr, i);
1597                         goto next_desc;
1598                 }
1599
1600                 /* Prefetch the next buffer */
1601                 if (!eop) {
1602                         nextp = i + 1;
1603                         if (nextp == que->num_desc)
1604                                 nextp = 0;
1605                         nbuf = &rxr->buffers[nextp];
1606                         prefetch(nbuf);
1607                 }
1608
1609                 /*
1610                 ** The header mbuf is ONLY used when header 
1611                 ** split is enabled, otherwise we get normal 
1612                 ** behavior, ie, both header and payload
1613                 ** are DMA'd into the payload buffer.
1614                 **
1615                 ** Rather than using the fmp/lmp global pointers
1616                 ** we now keep the head of a packet chain in the
1617                 ** buffer struct and pass this along from one
1618                 ** descriptor to the next, until we get EOP.
1619                 */
1620                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1621                         if (hlen > IXL_RX_HDR)
1622                                 hlen = IXL_RX_HDR;
1623                         mh->m_len = hlen;
1624                         mh->m_flags |= M_PKTHDR;
1625                         mh->m_next = NULL;
1626                         mh->m_pkthdr.len = mh->m_len;
1627                         /* Null buf pointer so it is refreshed */
1628                         rbuf->m_head = NULL;
1629                         /*
1630                         ** Check the payload length, this
1631                         ** could be zero if its a small
1632                         ** packet.
1633                         */
1634                         if (plen > 0) {
1635                                 mp->m_len = plen;
1636                                 mp->m_next = NULL;
1637                                 mp->m_flags &= ~M_PKTHDR;
1638                                 mh->m_next = mp;
1639                                 mh->m_pkthdr.len += mp->m_len;
1640                                 /* Null buf pointer so it is refreshed */
1641                                 rbuf->m_pack = NULL;
1642                                 rxr->split++;
1643                         }
1644                         /*
1645                         ** Now create the forward
1646                         ** chain so when complete 
1647                         ** we wont have to.
1648                         */
1649                         if (eop == 0) {
1650                                 /* stash the chain head */
1651                                 nbuf->fmp = mh;
1652                                 /* Make forward chain */
1653                                 if (plen)
1654                                         mp->m_next = nbuf->m_pack;
1655                                 else
1656                                         mh->m_next = nbuf->m_pack;
1657                         } else {
1658                                 /* Singlet, prepare to send */
1659                                 sendmp = mh;
1660                                 if (vtag) {
1661                                         sendmp->m_pkthdr.ether_vtag = vtag;
1662                                         sendmp->m_flags |= M_VLANTAG;
1663                                 }
1664                         }
1665                 } else {
1666                         /*
1667                         ** Either no header split, or a
1668                         ** secondary piece of a fragmented
1669                         ** split packet.
1670                         */
1671                         mp->m_len = plen;
1672                         /*
1673                         ** See if there is a stored head
1674                         ** that determines what we are
1675                         */
1676                         sendmp = rbuf->fmp;
1677                         rbuf->m_pack = rbuf->fmp = NULL;
1678
1679                         if (sendmp != NULL) /* secondary frag */
1680                                 sendmp->m_pkthdr.len += mp->m_len;
1681                         else {
1682                                 /* first desc of a non-ps chain */
1683                                 sendmp = mp;
1684                                 sendmp->m_flags |= M_PKTHDR;
1685                                 sendmp->m_pkthdr.len = mp->m_len;
1686                         }
1687                         /* Pass the head pointer on */
1688                         if (eop == 0) {
1689                                 nbuf->fmp = sendmp;
1690                                 sendmp = NULL;
1691                                 mp->m_next = nbuf->m_pack;
1692                         }
1693                 }
1694                 ++processed;
1695                 /* Sending this frame? */
1696                 if (eop) {
1697                         sendmp->m_pkthdr.rcvif = ifp;
1698                         /* gather stats */
1699                         rxr->rx_packets++;
1700                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1701                         /* capture data for dynamic ITR adjustment */
1702                         rxr->packets++;
1703                         rxr->bytes += sendmp->m_pkthdr.len;
1704                         /* Set VLAN tag (field only valid in eop desc) */
1705                         if (vtag) {
1706                                 sendmp->m_pkthdr.ether_vtag = vtag;
1707                                 sendmp->m_flags |= M_VLANTAG;
1708                         }
1709                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1710                                 ixl_rx_checksum(sendmp, status, error, ptype);
1711 #ifdef RSS
1712                         sendmp->m_pkthdr.flowid =
1713                             le32toh(cur->wb.qword0.hi_dword.rss);
1714                         M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1715 #else
1716                         sendmp->m_pkthdr.flowid = que->msix;
1717                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1718 #endif
1719                 }
1720 next_desc:
1721                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1722                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1723
1724                 /* Advance our pointers to the next descriptor. */
1725                 if (++i == que->num_desc)
1726                         i = 0;
1727
1728                 /* Now send to the stack or do LRO */
1729                 if (sendmp != NULL) {
1730                         rxr->next_check = i;
1731                         IXL_RX_UNLOCK(rxr);
1732                         ixl_rx_input(rxr, ifp, sendmp, ptype);
1733                         IXL_RX_LOCK(rxr);
1734                         i = rxr->next_check;
1735                 }
1736
1737                /* Every 8 descriptors we go to refresh mbufs */
1738                 if (processed == 8) {
1739                         ixl_refresh_mbufs(que, i);
1740                         processed = 0;
1741                 }
1742         }
1743
1744         /* Refresh any remaining buf structs */
1745         if (ixl_rx_unrefreshed(que))
1746                 ixl_refresh_mbufs(que, i);
1747
1748         rxr->next_check = i;
1749
1750         IXL_RX_UNLOCK(rxr);
1751
1752 #if defined(INET6) || defined(INET)
1753         /*
1754          * Flush any outstanding LRO work
1755          */
1756 #if __FreeBSD_version >= 1100105
1757         tcp_lro_flush_all(lro);
1758 #else
1759         struct lro_entry *queued;
1760         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1761                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1762                 tcp_lro_flush(lro, queued);
1763         }
1764 #endif
1765 #endif /* defined(INET6) || defined(INET) */
1766
1767         return (FALSE);
1768 }
1769
1770
1771 /*********************************************************************
1772  *
1773  *  Verify that the hardware indicated that the checksum is valid.
1774  *  Inform the stack about the status of checksum so that stack
1775  *  doesn't spend time verifying the checksum.
1776  *
1777  *********************************************************************/
1778 static void
1779 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1780 {
1781         struct i40e_rx_ptype_decoded decoded;
1782
1783         decoded = decode_rx_desc_ptype(ptype);
1784
1785         /* Errors? */
1786         if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1787             (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1788                 mp->m_pkthdr.csum_flags = 0;
1789                 return;
1790         }
1791
1792         /* IPv6 with extension headers likely have bad csum */
1793         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1794             decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1795                 if (status &
1796                     (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1797                         mp->m_pkthdr.csum_flags = 0;
1798                         return;
1799                 }
1800
1801  
1802         /* IP Checksum Good */
1803         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1804         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1805
1806         if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1807                 mp->m_pkthdr.csum_flags |= 
1808                     (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1809                 mp->m_pkthdr.csum_data |= htons(0xffff);
1810         }
1811         return;
1812 }
1813
1814 #if __FreeBSD_version >= 1100000
1815 uint64_t
1816 ixl_get_counter(if_t ifp, ift_counter cnt)
1817 {
1818         struct ixl_vsi *vsi;
1819
1820         vsi = if_getsoftc(ifp);
1821
1822         switch (cnt) {
1823         case IFCOUNTER_IPACKETS:
1824                 return (vsi->ipackets);
1825         case IFCOUNTER_IERRORS:
1826                 return (vsi->ierrors);
1827         case IFCOUNTER_OPACKETS:
1828                 return (vsi->opackets);
1829         case IFCOUNTER_OERRORS:
1830                 return (vsi->oerrors);
1831         case IFCOUNTER_COLLISIONS:
1832                 /* Collisions are by standard impossible in 40G/10G Ethernet */
1833                 return (0);
1834         case IFCOUNTER_IBYTES:
1835                 return (vsi->ibytes);
1836         case IFCOUNTER_OBYTES:
1837                 return (vsi->obytes);
1838         case IFCOUNTER_IMCASTS:
1839                 return (vsi->imcasts);
1840         case IFCOUNTER_OMCASTS:
1841                 return (vsi->omcasts);
1842         case IFCOUNTER_IQDROPS:
1843                 return (vsi->iqdrops);
1844         case IFCOUNTER_OQDROPS:
1845                 return (vsi->oqdrops);
1846         case IFCOUNTER_NOPROTO:
1847                 return (vsi->noproto);
1848         default:
1849                 return (if_get_counter_default(ifp, cnt));
1850         }
1851 }
1852 #endif
1853