]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/ixl/ixl_txrx.c
MFC r279232: Add native netmap support to ixl
[FreeBSD/stable/10.git] / sys / dev / ixl / ixl_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2013-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 **      IXL driver TX/RX Routines:
37 **          This was seperated to allow usage by
38 **          both the BASE and the VF drivers.
39 */
40
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #endif
45
46 #include "ixl.h"
47
48 #ifdef RSS
49 #include <net/rss_config.h>
50 #endif
51
52 /* Local Prototypes */
53 static void     ixl_rx_checksum(struct mbuf *, u32, u32, u8);
54 static void     ixl_refresh_mbufs(struct ixl_queue *, int);
55 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
56 static int      ixl_tx_setup_offload(struct ixl_queue *,
57                     struct mbuf *, u32 *, u32 *);
58 static bool     ixl_tso_setup(struct ixl_queue *, struct mbuf *);
59
60 static __inline void ixl_rx_discard(struct rx_ring *, int);
61 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
62                     struct mbuf *, u8);
63
64 #ifdef DEV_NETMAP
65 #include <dev/netmap/if_ixl_netmap.h>
66 #endif /* DEV_NETMAP */
67
68 /*
69 ** Multiqueue Transmit driver
70 **
71 */
72 int
73 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
74 {
75         struct ixl_vsi          *vsi = ifp->if_softc;
76         struct ixl_queue        *que;
77         struct tx_ring          *txr;
78         int                     err, i;
79 #ifdef RSS
80         u32                     bucket_id;
81 #endif
82
83         /*
84         ** Which queue to use:
85         **
86         ** When doing RSS, map it to the same outbound
87         ** queue as the incoming flow would be mapped to.
88         ** If everything is setup correctly, it should be
89         ** the same bucket that the current CPU we're on is.
90         */
91         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
92 #ifdef  RSS
93                 if (rss_hash2bucket(m->m_pkthdr.flowid,
94                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
95                         i = bucket_id % vsi->num_queues;
96                 } else
97 #endif
98                         i = m->m_pkthdr.flowid % vsi->num_queues;
99         } else
100                 i = curcpu % vsi->num_queues;
101         /*
102         ** This may not be perfect, but until something
103         ** better comes along it will keep from scheduling
104         ** on stalled queues.
105         */
106         if (((1 << i) & vsi->active_queues) == 0)
107                 i = ffsl(vsi->active_queues);
108
109         que = &vsi->queues[i];
110         txr = &que->txr;
111
112         err = drbr_enqueue(ifp, txr->br, m);
113         if (err)
114                 return(err);
115         if (IXL_TX_TRYLOCK(txr)) {
116                 ixl_mq_start_locked(ifp, txr);
117                 IXL_TX_UNLOCK(txr);
118         } else
119                 taskqueue_enqueue(que->tq, &que->tx_task);
120
121         return (0);
122 }
123
124 int
125 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
126 {
127         struct ixl_queue        *que = txr->que;
128         struct ixl_vsi          *vsi = que->vsi;
129         struct mbuf             *next;
130         int                     err = 0;
131
132
133         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
134             vsi->link_active == 0)
135                 return (ENETDOWN);
136
137         /* Process the transmit queue */
138         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
139                 if ((err = ixl_xmit(que, &next)) != 0) {
140                         if (next == NULL)
141                                 drbr_advance(ifp, txr->br);
142                         else
143                                 drbr_putback(ifp, txr->br, next);
144                         break;
145                 }
146                 drbr_advance(ifp, txr->br);
147                 /* Send a copy of the frame to the BPF listener */
148                 ETHER_BPF_MTAP(ifp, next);
149                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
150                         break;
151         }
152
153         if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
154                 ixl_txeof(que);
155
156         return (err);
157 }
158
159 /*
160  * Called from a taskqueue to drain queued transmit packets.
161  */
162 void
163 ixl_deferred_mq_start(void *arg, int pending)
164 {
165         struct ixl_queue        *que = arg;
166         struct tx_ring          *txr = &que->txr;
167         struct ixl_vsi          *vsi = que->vsi;
168         struct ifnet            *ifp = vsi->ifp;
169         
170         IXL_TX_LOCK(txr);
171         if (!drbr_empty(ifp, txr->br))
172                 ixl_mq_start_locked(ifp, txr);
173         IXL_TX_UNLOCK(txr);
174 }
175
176 /*
177 ** Flush all queue ring buffers
178 */
179 void
180 ixl_qflush(struct ifnet *ifp)
181 {
182         struct ixl_vsi  *vsi = ifp->if_softc;
183
184         for (int i = 0; i < vsi->num_queues; i++) {
185                 struct ixl_queue *que = &vsi->queues[i];
186                 struct tx_ring  *txr = &que->txr;
187                 struct mbuf     *m;
188                 IXL_TX_LOCK(txr);
189                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
190                         m_freem(m);
191                 IXL_TX_UNLOCK(txr);
192         }
193         if_qflush(ifp);
194 }
195
196 /*
197 ** Find mbuf chains passed to the driver 
198 ** that are 'sparse', using more than 8
199 ** mbufs to deliver an mss-size chunk of data
200 */
201 static inline bool
202 ixl_tso_detect_sparse(struct mbuf *mp)
203 {
204         struct mbuf     *m;
205         int             num = 0, mss;
206         bool            ret = FALSE;
207
208         mss = mp->m_pkthdr.tso_segsz;
209         for (m = mp->m_next; m != NULL; m = m->m_next) {
210                 num++;
211                 mss -= m->m_len;
212                 if (mss < 1)
213                         break;
214                 if (m->m_next == NULL)
215                         break;
216         }
217         if (num > IXL_SPARSE_CHAIN)
218                 ret = TRUE;
219
220         return (ret);
221 }
222
223
224 /*********************************************************************
225  *
226  *  This routine maps the mbufs to tx descriptors, allowing the
227  *  TX engine to transmit the packets. 
228  *      - return 0 on success, positive on failure
229  *
230  **********************************************************************/
231 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
232
233 static int
234 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
235 {
236         struct ixl_vsi          *vsi = que->vsi;
237         struct i40e_hw          *hw = vsi->hw;
238         struct tx_ring          *txr = &que->txr;
239         struct ixl_tx_buf       *buf;
240         struct i40e_tx_desc     *txd = NULL;
241         struct mbuf             *m_head, *m;
242         int                     i, j, error, nsegs, maxsegs;
243         int                     first, last = 0;
244         u16                     vtag = 0;
245         u32                     cmd, off;
246         bus_dmamap_t            map;
247         bus_dma_tag_t           tag;
248         bus_dma_segment_t       segs[IXL_MAX_TSO_SEGS];
249
250
251         cmd = off = 0;
252         m_head = *m_headp;
253
254         /*
255          * Important to capture the first descriptor
256          * used because it will contain the index of
257          * the one we tell the hardware to report back
258          */
259         first = txr->next_avail;
260         buf = &txr->buffers[first];
261         map = buf->map;
262         tag = txr->tx_tag;
263         maxsegs = IXL_MAX_TX_SEGS;
264
265         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
266                 /* Use larger mapping for TSO */
267                 tag = txr->tso_tag;
268                 maxsegs = IXL_MAX_TSO_SEGS;
269                 if (ixl_tso_detect_sparse(m_head)) {
270                         m = m_defrag(m_head, M_NOWAIT);
271                         if (m == NULL) {
272                                 m_freem(*m_headp);
273                                 *m_headp = NULL;
274                                 return (ENOBUFS);
275                         }
276                         *m_headp = m;
277                 }
278         }
279
280         /*
281          * Map the packet for DMA.
282          */
283         error = bus_dmamap_load_mbuf_sg(tag, map,
284             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
285
286         if (error == EFBIG) {
287                 struct mbuf *m;
288
289                 m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
290                 if (m == NULL) {
291                         que->mbuf_defrag_failed++;
292                         m_freem(*m_headp);
293                         *m_headp = NULL;
294                         return (ENOBUFS);
295                 }
296                 *m_headp = m;
297
298                 /* Try it again */
299                 error = bus_dmamap_load_mbuf_sg(tag, map,
300                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
301
302                 if (error == ENOMEM) {
303                         que->tx_dma_setup++;
304                         return (error);
305                 } else if (error != 0) {
306                         que->tx_dma_setup++;
307                         m_freem(*m_headp);
308                         *m_headp = NULL;
309                         return (error);
310                 }
311         } else if (error == ENOMEM) {
312                 que->tx_dma_setup++;
313                 return (error);
314         } else if (error != 0) {
315                 que->tx_dma_setup++;
316                 m_freem(*m_headp);
317                 *m_headp = NULL;
318                 return (error);
319         }
320
321         /* Make certain there are enough descriptors */
322         if (nsegs > txr->avail - 2) {
323                 txr->no_desc++;
324                 error = ENOBUFS;
325                 goto xmit_fail;
326         }
327         m_head = *m_headp;
328
329         /* Set up the TSO/CSUM offload */
330         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
331                 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
332                 if (error)
333                         goto xmit_fail;
334         }
335
336         cmd |= I40E_TX_DESC_CMD_ICRC;
337         /* Grab the VLAN tag */
338         if (m_head->m_flags & M_VLANTAG) {
339                 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
340                 vtag = htole16(m_head->m_pkthdr.ether_vtag);
341         }
342
343         i = txr->next_avail;
344         for (j = 0; j < nsegs; j++) {
345                 bus_size_t seglen;
346
347                 buf = &txr->buffers[i];
348                 buf->tag = tag; /* Keep track of the type tag */
349                 txd = &txr->base[i];
350                 seglen = segs[j].ds_len;
351
352                 txd->buffer_addr = htole64(segs[j].ds_addr);
353                 txd->cmd_type_offset_bsz =
354                     htole64(I40E_TX_DESC_DTYPE_DATA
355                     | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
356                     | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
357                     | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
358                     | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
359
360                 last = i; /* descriptor that will get completion IRQ */
361
362                 if (++i == que->num_desc)
363                         i = 0;
364
365                 buf->m_head = NULL;
366                 buf->eop_index = -1;
367         }
368         /* Set the last descriptor for report */
369         txd->cmd_type_offset_bsz |=
370             htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
371         txr->avail -= nsegs;
372         txr->next_avail = i;
373
374         buf->m_head = m_head;
375         /* Swap the dma map between the first and last descriptor */
376         txr->buffers[first].map = buf->map;
377         buf->map = map;
378         bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
379
380         /* Set the index of the descriptor that will be marked done */
381         buf = &txr->buffers[first];
382         buf->eop_index = last;
383
384         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
385             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
386         /*
387          * Advance the Transmit Descriptor Tail (Tdt), this tells the
388          * hardware that this frame is available to transmit.
389          */
390         ++txr->total_packets;
391         wr32(hw, txr->tail, i);
392
393         ixl_flush(hw);
394         /* Mark outstanding work */
395         if (que->busy == 0)
396                 que->busy = 1;
397         return (0);
398
399 xmit_fail:
400         bus_dmamap_unload(tag, buf->map);
401         return (error);
402 }
403
404
405 /*********************************************************************
406  *
407  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
408  *  the information needed to transmit a packet on the wire. This is
409  *  called only once at attach, setup is done every reset.
410  *
411  **********************************************************************/
412 int
413 ixl_allocate_tx_data(struct ixl_queue *que)
414 {
415         struct tx_ring          *txr = &que->txr;
416         struct ixl_vsi          *vsi = que->vsi;
417         device_t                dev = vsi->dev;
418         struct ixl_tx_buf       *buf;
419         int                     error = 0;
420
421         /*
422          * Setup DMA descriptor areas.
423          */
424         if ((error = bus_dma_tag_create(NULL,           /* parent */
425                                1, 0,                    /* alignment, bounds */
426                                BUS_SPACE_MAXADDR,       /* lowaddr */
427                                BUS_SPACE_MAXADDR,       /* highaddr */
428                                NULL, NULL,              /* filter, filterarg */
429                                IXL_TSO_SIZE,            /* maxsize */
430                                IXL_MAX_TX_SEGS,         /* nsegments */
431                                PAGE_SIZE,               /* maxsegsize */
432                                0,                       /* flags */
433                                NULL,                    /* lockfunc */
434                                NULL,                    /* lockfuncarg */
435                                &txr->tx_tag))) {
436                 device_printf(dev,"Unable to allocate TX DMA tag\n");
437                 goto fail;
438         }
439
440         /* Make a special tag for TSO */
441         if ((error = bus_dma_tag_create(NULL,           /* parent */
442                                1, 0,                    /* alignment, bounds */
443                                BUS_SPACE_MAXADDR,       /* lowaddr */
444                                BUS_SPACE_MAXADDR,       /* highaddr */
445                                NULL, NULL,              /* filter, filterarg */
446                                IXL_TSO_SIZE,            /* maxsize */
447                                IXL_MAX_TSO_SEGS,        /* nsegments */
448                                PAGE_SIZE,               /* maxsegsize */
449                                0,                       /* flags */
450                                NULL,                    /* lockfunc */
451                                NULL,                    /* lockfuncarg */
452                                &txr->tso_tag))) {
453                 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
454                 goto fail;
455         }
456
457         if (!(txr->buffers =
458             (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
459             que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
460                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
461                 error = ENOMEM;
462                 goto fail;
463         }
464
465         /* Create the descriptor buffer default dma maps */
466         buf = txr->buffers;
467         for (int i = 0; i < que->num_desc; i++, buf++) {
468                 buf->tag = txr->tx_tag;
469                 error = bus_dmamap_create(buf->tag, 0, &buf->map);
470                 if (error != 0) {
471                         device_printf(dev, "Unable to create TX DMA map\n");
472                         goto fail;
473                 }
474         }
475 fail:
476         return (error);
477 }
478
479
480 /*********************************************************************
481  *
482  *  (Re)Initialize a queue transmit ring.
483  *      - called by init, it clears the descriptor ring,
484  *        and frees any stale mbufs 
485  *
486  **********************************************************************/
487 void
488 ixl_init_tx_ring(struct ixl_queue *que)
489 {
490         struct tx_ring *txr = &que->txr;
491         struct ixl_tx_buf *buf;
492 #ifdef DEV_NETMAP
493         struct netmap_adapter *na = NA(que->vsi->ifp);
494         struct netmap_slot *slot;
495 #endif /* DEV_NETMAP */
496
497         /* Clear the old ring contents */
498         IXL_TX_LOCK(txr);
499 #ifdef DEV_NETMAP
500         /*
501          * (under lock): if in netmap mode, do some consistency
502          * checks and set slot to entry 0 of the netmap ring.
503          */
504         slot = netmap_reset(na, NR_TX, que->me, 0);
505 #endif /* DEV_NETMAP */
506
507         bzero((void *)txr->base,
508               (sizeof(struct i40e_tx_desc)) * que->num_desc);
509
510         /* Reset indices */
511         txr->next_avail = 0;
512         txr->next_to_clean = 0;
513
514 #ifdef IXL_FDIR
515         /* Initialize flow director */
516         txr->atr_rate = ixl_atr_rate;
517         txr->atr_count = 0;
518 #endif
519
520         /* Free any existing tx mbufs. */
521         buf = txr->buffers;
522         for (int i = 0; i < que->num_desc; i++, buf++) {
523                 if (buf->m_head != NULL) {
524                         bus_dmamap_sync(buf->tag, buf->map,
525                             BUS_DMASYNC_POSTWRITE);
526                         bus_dmamap_unload(buf->tag, buf->map);
527                         m_freem(buf->m_head);
528                         buf->m_head = NULL;
529                 }
530 #ifdef DEV_NETMAP
531                 /*
532                  * In netmap mode, set the map for the packet buffer.
533                  * NOTE: Some drivers (not this one) also need to set
534                  * the physical buffer address in the NIC ring.
535                  * netmap_idx_n2k() maps a nic index, i, into the corresponding
536                  * netmap slot index, si
537                  */
538                 if (slot) {
539                         int si = netmap_idx_n2k(&na->tx_rings[que->me], i);
540                         netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
541                 }
542 #endif /* DEV_NETMAP */
543                 /* Clear the EOP index */
544                 buf->eop_index = -1;
545         }
546
547         /* Set number of descriptors available */
548         txr->avail = que->num_desc;
549
550         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
551             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
552         IXL_TX_UNLOCK(txr);
553 }
554
555
556 /*********************************************************************
557  *
558  *  Free transmit ring related data structures.
559  *
560  **********************************************************************/
561 void
562 ixl_free_que_tx(struct ixl_queue *que)
563 {
564         struct tx_ring *txr = &que->txr;
565         struct ixl_tx_buf *buf;
566
567         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
568
569         for (int i = 0; i < que->num_desc; i++) {
570                 buf = &txr->buffers[i];
571                 if (buf->m_head != NULL) {
572                         bus_dmamap_sync(buf->tag, buf->map,
573                             BUS_DMASYNC_POSTWRITE);
574                         bus_dmamap_unload(buf->tag,
575                             buf->map);
576                         m_freem(buf->m_head);
577                         buf->m_head = NULL;
578                         if (buf->map != NULL) {
579                                 bus_dmamap_destroy(buf->tag,
580                                     buf->map);
581                                 buf->map = NULL;
582                         }
583                 } else if (buf->map != NULL) {
584                         bus_dmamap_unload(buf->tag,
585                             buf->map);
586                         bus_dmamap_destroy(buf->tag,
587                             buf->map);
588                         buf->map = NULL;
589                 }
590         }
591         if (txr->br != NULL)
592                 buf_ring_free(txr->br, M_DEVBUF);
593         if (txr->buffers != NULL) {
594                 free(txr->buffers, M_DEVBUF);
595                 txr->buffers = NULL;
596         }
597         if (txr->tx_tag != NULL) {
598                 bus_dma_tag_destroy(txr->tx_tag);
599                 txr->tx_tag = NULL;
600         }
601         if (txr->tso_tag != NULL) {
602                 bus_dma_tag_destroy(txr->tso_tag);
603                 txr->tso_tag = NULL;
604         }
605
606         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
607         return;
608 }
609
610 /*********************************************************************
611  *
612  *  Setup descriptor for hw offloads 
613  *
614  **********************************************************************/
615
616 static int
617 ixl_tx_setup_offload(struct ixl_queue *que,
618     struct mbuf *mp, u32 *cmd, u32 *off)
619 {
620         struct ether_vlan_header        *eh;
621 #ifdef INET
622         struct ip                       *ip = NULL;
623 #endif
624         struct tcphdr                   *th = NULL;
625 #ifdef INET6
626         struct ip6_hdr                  *ip6;
627 #endif
628         int                             elen, ip_hlen = 0, tcp_hlen;
629         u16                             etype;
630         u8                              ipproto = 0;
631         bool                            tso = FALSE;
632
633
634         /* Set up the TSO context descriptor if required */
635         if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
636                 tso = ixl_tso_setup(que, mp);
637                 if (tso)
638                         ++que->tso;
639                 else
640                         return (ENXIO);
641         }
642
643         /*
644          * Determine where frame payload starts.
645          * Jump over vlan headers if already present,
646          * helpful for QinQ too.
647          */
648         eh = mtod(mp, struct ether_vlan_header *);
649         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
650                 etype = ntohs(eh->evl_proto);
651                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
652         } else {
653                 etype = ntohs(eh->evl_encap_proto);
654                 elen = ETHER_HDR_LEN;
655         }
656
657         switch (etype) {
658 #ifdef INET
659                 case ETHERTYPE_IP:
660                         ip = (struct ip *)(mp->m_data + elen);
661                         ip_hlen = ip->ip_hl << 2;
662                         ipproto = ip->ip_p;
663                         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
664                         /* The IP checksum must be recalculated with TSO */
665                         if (tso)
666                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
667                         else
668                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
669                         break;
670 #endif
671 #ifdef INET6
672                 case ETHERTYPE_IPV6:
673                         ip6 = (struct ip6_hdr *)(mp->m_data + elen);
674                         ip_hlen = sizeof(struct ip6_hdr);
675                         ipproto = ip6->ip6_nxt;
676                         th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
677                         *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
678                         break;
679 #endif
680                 default:
681                         break;
682         }
683
684         *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
685         *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
686
687         switch (ipproto) {
688                 case IPPROTO_TCP:
689                         tcp_hlen = th->th_off << 2;
690                         if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
691                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
692                                 *off |= (tcp_hlen >> 2) <<
693                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
694                         }
695 #ifdef IXL_FDIR
696                         ixl_atr(que, th, etype);
697 #endif
698                         break;
699                 case IPPROTO_UDP:
700                         if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
701                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
702                                 *off |= (sizeof(struct udphdr) >> 2) <<
703                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
704                         }
705                         break;
706
707                 case IPPROTO_SCTP:
708                         if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
709                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
710                                 *off |= (sizeof(struct sctphdr) >> 2) <<
711                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
712                         }
713                         /* Fall Thru */
714                 default:
715                         break;
716         }
717
718         return (0);
719 }
720
721
722 /**********************************************************************
723  *
724  *  Setup context for hardware segmentation offload (TSO)
725  *
726  **********************************************************************/
727 static bool
728 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
729 {
730         struct tx_ring                  *txr = &que->txr;
731         struct i40e_tx_context_desc     *TXD;
732         struct ixl_tx_buf               *buf;
733         u32                             cmd, mss, type, tsolen;
734         u16                             etype;
735         int                             idx, elen, ip_hlen, tcp_hlen;
736         struct ether_vlan_header        *eh;
737 #ifdef INET
738         struct ip                       *ip;
739 #endif
740 #ifdef INET6
741         struct ip6_hdr                  *ip6;
742 #endif
743 #if defined(INET6) || defined(INET)
744         struct tcphdr                   *th;
745 #endif
746         u64                             type_cmd_tso_mss;
747
748         /*
749          * Determine where frame payload starts.
750          * Jump over vlan headers if already present
751          */
752         eh = mtod(mp, struct ether_vlan_header *);
753         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
754                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
755                 etype = eh->evl_proto;
756         } else {
757                 elen = ETHER_HDR_LEN;
758                 etype = eh->evl_encap_proto;
759         }
760
761         switch (ntohs(etype)) {
762 #ifdef INET6
763         case ETHERTYPE_IPV6:
764                 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
765                 if (ip6->ip6_nxt != IPPROTO_TCP)
766                         return (ENXIO);
767                 ip_hlen = sizeof(struct ip6_hdr);
768                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
769                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
770                 tcp_hlen = th->th_off << 2;
771                 break;
772 #endif
773 #ifdef INET
774         case ETHERTYPE_IP:
775                 ip = (struct ip *)(mp->m_data + elen);
776                 if (ip->ip_p != IPPROTO_TCP)
777                         return (ENXIO);
778                 ip->ip_sum = 0;
779                 ip_hlen = ip->ip_hl << 2;
780                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
781                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
782                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
783                 tcp_hlen = th->th_off << 2;
784                 break;
785 #endif
786         default:
787                 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
788                     __func__, ntohs(etype));
789                 return FALSE;
790         }
791
792         /* Ensure we have at least the IP+TCP header in the first mbuf. */
793         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
794                 return FALSE;
795
796         idx = txr->next_avail;
797         buf = &txr->buffers[idx];
798         TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
799         tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
800
801         type = I40E_TX_DESC_DTYPE_CONTEXT;
802         cmd = I40E_TX_CTX_DESC_TSO;
803         mss = mp->m_pkthdr.tso_segsz;
804
805         type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
806             ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
807             ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
808             ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
809         TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
810
811         TXD->tunneling_params = htole32(0);
812         buf->m_head = NULL;
813         buf->eop_index = -1;
814
815         if (++idx == que->num_desc)
816                 idx = 0;
817
818         txr->avail--;
819         txr->next_avail = idx;
820
821         return TRUE;
822 }
823
824 /*             
825 ** ixl_get_tx_head - Retrieve the value from the 
826 **    location the HW records its HEAD index
827 */
828 static inline u32
829 ixl_get_tx_head(struct ixl_queue *que)
830 {
831         struct tx_ring  *txr = &que->txr;
832         void *head = &txr->base[que->num_desc];
833         return LE32_TO_CPU(*(volatile __le32 *)head);
834 }
835
836 /**********************************************************************
837  *
838  *  Examine each tx_buffer in the used queue. If the hardware is done
839  *  processing the packet then free associated resources. The
840  *  tx_buffer is put back on the free queue.
841  *
842  **********************************************************************/
843 bool
844 ixl_txeof(struct ixl_queue *que)
845 {
846         struct tx_ring          *txr = &que->txr;
847         u32                     first, last, head, done, processed;
848         struct ixl_tx_buf       *buf;
849         struct i40e_tx_desc     *tx_desc, *eop_desc;
850
851
852         mtx_assert(&txr->mtx, MA_OWNED);
853
854 #ifdef DEV_NETMAP
855         // XXX todo: implement moderation
856         if (netmap_tx_irq(que->vsi->ifp, que->me))
857                 return FALSE;
858 #endif /* DEF_NETMAP */
859
860         /* These are not the descriptors you seek, move along :) */
861         if (txr->avail == que->num_desc) {
862                 que->busy = 0;
863                 return FALSE;
864         }
865
866         processed = 0;
867         first = txr->next_to_clean;
868         buf = &txr->buffers[first];
869         tx_desc = (struct i40e_tx_desc *)&txr->base[first];
870         last = buf->eop_index;
871         if (last == -1)
872                 return FALSE;
873         eop_desc = (struct i40e_tx_desc *)&txr->base[last];
874
875         /* Get the Head WB value */
876         head = ixl_get_tx_head(que);
877
878         /*
879         ** Get the index of the first descriptor
880         ** BEYOND the EOP and call that 'done'.
881         ** I do this so the comparison in the
882         ** inner while loop below can be simple
883         */
884         if (++last == que->num_desc) last = 0;
885         done = last;
886
887         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
888             BUS_DMASYNC_POSTREAD);
889         /*
890         ** The HEAD index of the ring is written in a 
891         ** defined location, this rather than a done bit
892         ** is what is used to keep track of what must be
893         ** 'cleaned'.
894         */
895         while (first != head) {
896                 /* We clean the range of the packet */
897                 while (first != done) {
898                         ++txr->avail;
899                         ++processed;
900
901                         if (buf->m_head) {
902                                 txr->bytes += /* for ITR adjustment */
903                                     buf->m_head->m_pkthdr.len;
904                                 txr->tx_bytes += /* for TX stats */
905                                     buf->m_head->m_pkthdr.len;
906                                 bus_dmamap_sync(buf->tag,
907                                     buf->map,
908                                     BUS_DMASYNC_POSTWRITE);
909                                 bus_dmamap_unload(buf->tag,
910                                     buf->map);
911                                 m_freem(buf->m_head);
912                                 buf->m_head = NULL;
913                                 buf->map = NULL;
914                         }
915                         buf->eop_index = -1;
916
917                         if (++first == que->num_desc)
918                                 first = 0;
919
920                         buf = &txr->buffers[first];
921                         tx_desc = &txr->base[first];
922                 }
923                 ++txr->packets;
924                 /* See if there is more work now */
925                 last = buf->eop_index;
926                 if (last != -1) {
927                         eop_desc = &txr->base[last];
928                         /* Get next done point */
929                         if (++last == que->num_desc) last = 0;
930                         done = last;
931                 } else
932                         break;
933         }
934         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
935             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
936
937         txr->next_to_clean = first;
938
939
940         /*
941         ** Hang detection, we know there's
942         ** work outstanding or the first return
943         ** would have been taken, so indicate an
944         ** unsuccessful pass, in local_timer if
945         ** the value is too great the queue will
946         ** be considered hung. If anything has been
947         ** cleaned then reset the state.
948         */
949         if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
950                 ++que->busy;
951
952         if (processed)
953                 que->busy = 1; /* Note this turns off HUNG */
954
955         /*
956          * If there are no pending descriptors, clear the timeout.
957          */
958         if (txr->avail == que->num_desc) {
959                 que->busy = 0;
960                 return FALSE;
961         }
962
963         return TRUE;
964 }
965
966 /*********************************************************************
967  *
968  *  Refresh mbuf buffers for RX descriptor rings
969  *   - now keeps its own state so discards due to resource
970  *     exhaustion are unnecessary, if an mbuf cannot be obtained
971  *     it just returns, keeping its placeholder, thus it can simply
972  *     be recalled to try again.
973  *
974  **********************************************************************/
975 static void
976 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
977 {
978         struct ixl_vsi          *vsi = que->vsi;
979         struct rx_ring          *rxr = &que->rxr;
980         bus_dma_segment_t       hseg[1];
981         bus_dma_segment_t       pseg[1];
982         struct ixl_rx_buf       *buf;
983         struct mbuf             *mh, *mp;
984         int                     i, j, nsegs, error;
985         bool                    refreshed = FALSE;
986
987         i = j = rxr->next_refresh;
988         /* Control the loop with one beyond */
989         if (++j == que->num_desc)
990                 j = 0;
991
992         while (j != limit) {
993                 buf = &rxr->buffers[i];
994                 if (rxr->hdr_split == FALSE)
995                         goto no_split;
996
997                 if (buf->m_head == NULL) {
998                         mh = m_gethdr(M_NOWAIT, MT_DATA);
999                         if (mh == NULL)
1000                                 goto update;
1001                 } else
1002                         mh = buf->m_head;
1003
1004                 mh->m_pkthdr.len = mh->m_len = MHLEN;
1005                 mh->m_len = MHLEN;
1006                 mh->m_flags |= M_PKTHDR;
1007                 /* Get the memory mapping */
1008                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1009                     buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1010                 if (error != 0) {
1011                         printf("Refresh mbufs: hdr dmamap load"
1012                             " failure - %d\n", error);
1013                         m_free(mh);
1014                         buf->m_head = NULL;
1015                         goto update;
1016                 }
1017                 buf->m_head = mh;
1018                 bus_dmamap_sync(rxr->htag, buf->hmap,
1019                     BUS_DMASYNC_PREREAD);
1020                 rxr->base[i].read.hdr_addr =
1021                    htole64(hseg[0].ds_addr);
1022
1023 no_split:
1024                 if (buf->m_pack == NULL) {
1025                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1026                             M_PKTHDR, rxr->mbuf_sz);
1027                         if (mp == NULL)
1028                                 goto update;
1029                 } else
1030                         mp = buf->m_pack;
1031
1032                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1033                 /* Get the memory mapping */
1034                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1035                     buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1036                 if (error != 0) {
1037                         printf("Refresh mbufs: payload dmamap load"
1038                             " failure - %d\n", error);
1039                         m_free(mp);
1040                         buf->m_pack = NULL;
1041                         goto update;
1042                 }
1043                 buf->m_pack = mp;
1044                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1045                     BUS_DMASYNC_PREREAD);
1046                 rxr->base[i].read.pkt_addr =
1047                    htole64(pseg[0].ds_addr);
1048                 /* Used only when doing header split */
1049                 rxr->base[i].read.hdr_addr = 0;
1050
1051                 refreshed = TRUE;
1052                 /* Next is precalculated */
1053                 i = j;
1054                 rxr->next_refresh = i;
1055                 if (++j == que->num_desc)
1056                         j = 0;
1057         }
1058 update:
1059         if (refreshed) /* Update hardware tail index */
1060                 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1061         return;
1062 }
1063
1064
1065 /*********************************************************************
1066  *
1067  *  Allocate memory for rx_buffer structures. Since we use one
1068  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1069  *  that we'll need is equal to the number of receive descriptors
1070  *  that we've defined.
1071  *
1072  **********************************************************************/
1073 int
1074 ixl_allocate_rx_data(struct ixl_queue *que)
1075 {
1076         struct rx_ring          *rxr = &que->rxr;
1077         struct ixl_vsi          *vsi = que->vsi;
1078         device_t                dev = vsi->dev;
1079         struct ixl_rx_buf       *buf;
1080         int                     i, bsize, error;
1081
1082         bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1083         if (!(rxr->buffers =
1084             (struct ixl_rx_buf *) malloc(bsize,
1085             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1086                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1087                 error = ENOMEM;
1088                 return (error);
1089         }
1090
1091         if ((error = bus_dma_tag_create(NULL,   /* parent */
1092                                    1, 0,        /* alignment, bounds */
1093                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1094                                    BUS_SPACE_MAXADDR,   /* highaddr */
1095                                    NULL, NULL,          /* filter, filterarg */
1096                                    MSIZE,               /* maxsize */
1097                                    1,                   /* nsegments */
1098                                    MSIZE,               /* maxsegsize */
1099                                    0,                   /* flags */
1100                                    NULL,                /* lockfunc */
1101                                    NULL,                /* lockfuncarg */
1102                                    &rxr->htag))) {
1103                 device_printf(dev, "Unable to create RX DMA htag\n");
1104                 return (error);
1105         }
1106
1107         if ((error = bus_dma_tag_create(NULL,   /* parent */
1108                                    1, 0,        /* alignment, bounds */
1109                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1110                                    BUS_SPACE_MAXADDR,   /* highaddr */
1111                                    NULL, NULL,          /* filter, filterarg */
1112                                    MJUM16BYTES,         /* maxsize */
1113                                    1,                   /* nsegments */
1114                                    MJUM16BYTES,         /* maxsegsize */
1115                                    0,                   /* flags */
1116                                    NULL,                /* lockfunc */
1117                                    NULL,                /* lockfuncarg */
1118                                    &rxr->ptag))) {
1119                 device_printf(dev, "Unable to create RX DMA ptag\n");
1120                 return (error);
1121         }
1122
1123         for (i = 0; i < que->num_desc; i++) {
1124                 buf = &rxr->buffers[i];
1125                 error = bus_dmamap_create(rxr->htag,
1126                     BUS_DMA_NOWAIT, &buf->hmap);
1127                 if (error) {
1128                         device_printf(dev, "Unable to create RX head map\n");
1129                         break;
1130                 }
1131                 error = bus_dmamap_create(rxr->ptag,
1132                     BUS_DMA_NOWAIT, &buf->pmap);
1133                 if (error) {
1134                         device_printf(dev, "Unable to create RX pkt map\n");
1135                         break;
1136                 }
1137         }
1138
1139         return (error);
1140 }
1141
1142
1143 /*********************************************************************
1144  *
1145  *  (Re)Initialize the queue receive ring and its buffers.
1146  *
1147  **********************************************************************/
1148 int
1149 ixl_init_rx_ring(struct ixl_queue *que)
1150 {
1151         struct  rx_ring         *rxr = &que->rxr;
1152         struct ixl_vsi          *vsi = que->vsi;
1153 #if defined(INET6) || defined(INET)
1154         struct ifnet            *ifp = vsi->ifp;
1155         struct lro_ctrl         *lro = &rxr->lro;
1156 #endif
1157         struct ixl_rx_buf       *buf;
1158         bus_dma_segment_t       pseg[1], hseg[1];
1159         int                     rsize, nsegs, error = 0;
1160 #ifdef DEV_NETMAP 
1161         struct netmap_adapter *na = NA(que->vsi->ifp);
1162         struct netmap_slot *slot;
1163 #endif /* DEV_NETMAP */
1164
1165         IXL_RX_LOCK(rxr);
1166 #ifdef DEV_NETMAP
1167         /* same as in ixl_init_tx_ring() */
1168         slot = netmap_reset(na, NR_RX, que->me, 0);
1169 #endif /* DEV_NETMAP */
1170         /* Clear the ring contents */
1171         rsize = roundup2(que->num_desc *
1172             sizeof(union i40e_rx_desc), DBA_ALIGN);
1173         bzero((void *)rxr->base, rsize);
1174         /* Cleanup any existing buffers */
1175         for (int i = 0; i < que->num_desc; i++) {
1176                 buf = &rxr->buffers[i];
1177                 if (buf->m_head != NULL) {
1178                         bus_dmamap_sync(rxr->htag, buf->hmap,
1179                             BUS_DMASYNC_POSTREAD);
1180                         bus_dmamap_unload(rxr->htag, buf->hmap);
1181                         buf->m_head->m_flags |= M_PKTHDR;
1182                         m_freem(buf->m_head);
1183                 }
1184                 if (buf->m_pack != NULL) {
1185                         bus_dmamap_sync(rxr->ptag, buf->pmap,
1186                             BUS_DMASYNC_POSTREAD);
1187                         bus_dmamap_unload(rxr->ptag, buf->pmap);
1188                         buf->m_pack->m_flags |= M_PKTHDR;
1189                         m_freem(buf->m_pack);
1190                 }
1191                 buf->m_head = NULL;
1192                 buf->m_pack = NULL;
1193         }
1194
1195         /* header split is off */
1196         rxr->hdr_split = FALSE;
1197
1198         /* Now replenish the mbufs */
1199         for (int j = 0; j != que->num_desc; ++j) {
1200                 struct mbuf     *mh, *mp;
1201
1202                 buf = &rxr->buffers[j];
1203 #ifdef DEV_NETMAP
1204                 /*
1205                  * In netmap mode, fill the map and set the buffer
1206                  * address in the NIC ring, considering the offset
1207                  * between the netmap and NIC rings (see comment in
1208                  * ixgbe_setup_transmit_ring() ). No need to allocate
1209                  * an mbuf, so end the block with a continue;
1210                  */
1211                 if (slot) {
1212                         int sj = netmap_idx_n2k(&na->rx_rings[que->me], j);
1213                         uint64_t paddr;
1214                         void *addr;
1215
1216                         addr = PNMB(na, slot + sj, &paddr);
1217                         netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1218                         /* Update descriptor and the cached value */
1219                         rxr->base[j].read.pkt_addr = htole64(paddr);
1220                         rxr->base[j].read.hdr_addr = 0;
1221                         continue;
1222                 }
1223 #endif /* DEV_NETMAP */
1224
1225                 /*
1226                 ** Don't allocate mbufs if not
1227                 ** doing header split, its wasteful
1228                 */ 
1229                 if (rxr->hdr_split == FALSE)
1230                         goto skip_head;
1231
1232                 /* First the header */
1233                 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1234                 if (buf->m_head == NULL) {
1235                         error = ENOBUFS;
1236                         goto fail;
1237                 }
1238                 m_adj(buf->m_head, ETHER_ALIGN);
1239                 mh = buf->m_head;
1240                 mh->m_len = mh->m_pkthdr.len = MHLEN;
1241                 mh->m_flags |= M_PKTHDR;
1242                 /* Get the memory mapping */
1243                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1244                     buf->hmap, buf->m_head, hseg,
1245                     &nsegs, BUS_DMA_NOWAIT);
1246                 if (error != 0) /* Nothing elegant to do here */
1247                         goto fail;
1248                 bus_dmamap_sync(rxr->htag,
1249                     buf->hmap, BUS_DMASYNC_PREREAD);
1250                 /* Update descriptor */
1251                 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1252
1253 skip_head:
1254                 /* Now the payload cluster */
1255                 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1256                     M_PKTHDR, rxr->mbuf_sz);
1257                 if (buf->m_pack == NULL) {
1258                         error = ENOBUFS;
1259                         goto fail;
1260                 }
1261                 mp = buf->m_pack;
1262                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1263                 /* Get the memory mapping */
1264                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1265                     buf->pmap, mp, pseg,
1266                     &nsegs, BUS_DMA_NOWAIT);
1267                 if (error != 0)
1268                         goto fail;
1269                 bus_dmamap_sync(rxr->ptag,
1270                     buf->pmap, BUS_DMASYNC_PREREAD);
1271                 /* Update descriptor */
1272                 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1273                 rxr->base[j].read.hdr_addr = 0;
1274         }
1275
1276
1277         /* Setup our descriptor indices */
1278         rxr->next_check = 0;
1279         rxr->next_refresh = 0;
1280         rxr->lro_enabled = FALSE;
1281         rxr->split = 0;
1282         rxr->bytes = 0;
1283         rxr->discard = FALSE;
1284
1285         wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1286         ixl_flush(vsi->hw);
1287
1288 #if defined(INET6) || defined(INET)
1289         /*
1290         ** Now set up the LRO interface:
1291         */
1292         if (ifp->if_capenable & IFCAP_LRO) {
1293                 int err = tcp_lro_init(lro);
1294                 if (err) {
1295                         if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1296                         goto fail;
1297                 }
1298                 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1299                 rxr->lro_enabled = TRUE;
1300                 lro->ifp = vsi->ifp;
1301         }
1302 #endif
1303
1304         bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1305             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1306
1307 fail:
1308         IXL_RX_UNLOCK(rxr);
1309         return (error);
1310 }
1311
1312
1313 /*********************************************************************
1314  *
1315  *  Free station receive ring data structures
1316  *
1317  **********************************************************************/
1318 void
1319 ixl_free_que_rx(struct ixl_queue *que)
1320 {
1321         struct rx_ring          *rxr = &que->rxr;
1322         struct ixl_rx_buf       *buf;
1323
1324         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1325
1326         /* Cleanup any existing buffers */
1327         if (rxr->buffers != NULL) {
1328                 for (int i = 0; i < que->num_desc; i++) {
1329                         buf = &rxr->buffers[i];
1330                         if (buf->m_head != NULL) {
1331                                 bus_dmamap_sync(rxr->htag, buf->hmap,
1332                                     BUS_DMASYNC_POSTREAD);
1333                                 bus_dmamap_unload(rxr->htag, buf->hmap);
1334                                 buf->m_head->m_flags |= M_PKTHDR;
1335                                 m_freem(buf->m_head);
1336                         }
1337                         if (buf->m_pack != NULL) {
1338                                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1339                                     BUS_DMASYNC_POSTREAD);
1340                                 bus_dmamap_unload(rxr->ptag, buf->pmap);
1341                                 buf->m_pack->m_flags |= M_PKTHDR;
1342                                 m_freem(buf->m_pack);
1343                         }
1344                         buf->m_head = NULL;
1345                         buf->m_pack = NULL;
1346                         if (buf->hmap != NULL) {
1347                                 bus_dmamap_destroy(rxr->htag, buf->hmap);
1348                                 buf->hmap = NULL;
1349                         }
1350                         if (buf->pmap != NULL) {
1351                                 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1352                                 buf->pmap = NULL;
1353                         }
1354                 }
1355                 if (rxr->buffers != NULL) {
1356                         free(rxr->buffers, M_DEVBUF);
1357                         rxr->buffers = NULL;
1358                 }
1359         }
1360
1361         if (rxr->htag != NULL) {
1362                 bus_dma_tag_destroy(rxr->htag);
1363                 rxr->htag = NULL;
1364         }
1365         if (rxr->ptag != NULL) {
1366                 bus_dma_tag_destroy(rxr->ptag);
1367                 rxr->ptag = NULL;
1368         }
1369
1370         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1371         return;
1372 }
1373
1374 static __inline void
1375 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1376 {
1377
1378 #if defined(INET6) || defined(INET)
1379         /*
1380          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1381          * should be computed by hardware. Also it should not have VLAN tag in
1382          * ethernet header.
1383          */
1384         if (rxr->lro_enabled &&
1385             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1386             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1387             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1388                 /*
1389                  * Send to the stack if:
1390                  **  - LRO not enabled, or
1391                  **  - no LRO resources, or
1392                  **  - lro enqueue fails
1393                  */
1394                 if (rxr->lro.lro_cnt != 0)
1395                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1396                                 return;
1397         }
1398 #endif
1399         IXL_RX_UNLOCK(rxr);
1400         (*ifp->if_input)(ifp, m);
1401         IXL_RX_LOCK(rxr);
1402 }
1403
1404
1405 static __inline void
1406 ixl_rx_discard(struct rx_ring *rxr, int i)
1407 {
1408         struct ixl_rx_buf       *rbuf;
1409
1410         rbuf = &rxr->buffers[i];
1411
1412         if (rbuf->fmp != NULL) {/* Partial chain ? */
1413                 rbuf->fmp->m_flags |= M_PKTHDR;
1414                 m_freem(rbuf->fmp);
1415                 rbuf->fmp = NULL;
1416         }
1417
1418         /*
1419         ** With advanced descriptors the writeback
1420         ** clobbers the buffer addrs, so its easier
1421         ** to just free the existing mbufs and take
1422         ** the normal refresh path to get new buffers
1423         ** and mapping.
1424         */
1425         if (rbuf->m_head) {
1426                 m_free(rbuf->m_head);
1427                 rbuf->m_head = NULL;
1428         }
1429  
1430         if (rbuf->m_pack) {
1431                 m_free(rbuf->m_pack);
1432                 rbuf->m_pack = NULL;
1433         }
1434
1435         return;
1436 }
1437
1438 #ifdef RSS
1439 /*
1440 ** i40e_ptype_to_hash: parse the packet type
1441 ** to determine the appropriate hash.
1442 */
1443 static inline int
1444 ixl_ptype_to_hash(u8 ptype)
1445 {
1446         struct i40e_rx_ptype_decoded    decoded;
1447         u8                              ex = 0;
1448
1449         decoded = decode_rx_desc_ptype(ptype);
1450         ex = decoded.outer_frag;
1451
1452         if (!decoded.known)
1453                 return M_HASHTYPE_OPAQUE;
1454
1455         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
1456                 return M_HASHTYPE_OPAQUE;
1457
1458         /* Note: anything that gets to this point is IP */
1459         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
1460                 switch (decoded.inner_prot) {
1461                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1462                                 if (ex)
1463                                         return M_HASHTYPE_RSS_TCP_IPV6_EX;
1464                                 else
1465                                         return M_HASHTYPE_RSS_TCP_IPV6;
1466                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1467                                 if (ex)
1468                                         return M_HASHTYPE_RSS_UDP_IPV6_EX;
1469                                 else
1470                                         return M_HASHTYPE_RSS_UDP_IPV6;
1471                         default:
1472                                 if (ex)
1473                                         return M_HASHTYPE_RSS_IPV6_EX;
1474                                 else
1475                                         return M_HASHTYPE_RSS_IPV6;
1476                 }
1477         }
1478         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
1479                 switch (decoded.inner_prot) {
1480                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1481                                         return M_HASHTYPE_RSS_TCP_IPV4;
1482                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1483                                 if (ex)
1484                                         return M_HASHTYPE_RSS_UDP_IPV4_EX;
1485                                 else
1486                                         return M_HASHTYPE_RSS_UDP_IPV4;
1487                         default:
1488                                         return M_HASHTYPE_RSS_IPV4;
1489                 }
1490         }
1491         /* We should never get here!! */
1492         return M_HASHTYPE_OPAQUE;
1493 }
1494 #endif /* RSS */
1495
1496 /*********************************************************************
1497  *
1498  *  This routine executes in interrupt context. It replenishes
1499  *  the mbufs in the descriptor and sends data which has been
1500  *  dma'ed into host memory to upper layer.
1501  *
1502  *  We loop at most count times if count is > 0, or until done if
1503  *  count < 0.
1504  *
1505  *  Return TRUE for more work, FALSE for all clean.
1506  *********************************************************************/
1507 bool
1508 ixl_rxeof(struct ixl_queue *que, int count)
1509 {
1510         struct ixl_vsi          *vsi = que->vsi;
1511         struct rx_ring          *rxr = &que->rxr;
1512         struct ifnet            *ifp = vsi->ifp;
1513 #if defined(INET6) || defined(INET)
1514         struct lro_ctrl         *lro = &rxr->lro;
1515         struct lro_entry        *queued;
1516 #endif
1517         int                     i, nextp, processed = 0;
1518         union i40e_rx_desc      *cur;
1519         struct ixl_rx_buf       *rbuf, *nbuf;
1520
1521
1522         IXL_RX_LOCK(rxr);
1523
1524 #ifdef DEV_NETMAP
1525         if (netmap_rx_irq(ifp, que->me, &count)) {
1526                 IXL_RX_UNLOCK(rxr);
1527                 return (FALSE);
1528         }
1529 #endif /* DEV_NETMAP */
1530
1531         for (i = rxr->next_check; count != 0;) {
1532                 struct mbuf     *sendmp, *mh, *mp;
1533                 u32             rsc, status, error;
1534                 u16             hlen, plen, vtag;
1535                 u64             qword;
1536                 u8              ptype;
1537                 bool            eop;
1538  
1539                 /* Sync the ring. */
1540                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1541                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1542
1543                 cur = &rxr->base[i];
1544                 qword = le64toh(cur->wb.qword1.status_error_len);
1545                 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1546                     >> I40E_RXD_QW1_STATUS_SHIFT;
1547                 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1548                     >> I40E_RXD_QW1_ERROR_SHIFT;
1549                 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1550                     >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1551                 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1552                     >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1553                 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1554                     >> I40E_RXD_QW1_PTYPE_SHIFT;
1555
1556                 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1557                         ++rxr->not_done;
1558                         break;
1559                 }
1560                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1561                         break;
1562
1563                 count--;
1564                 sendmp = NULL;
1565                 nbuf = NULL;
1566                 rsc = 0;
1567                 cur->wb.qword1.status_error_len = 0;
1568                 rbuf = &rxr->buffers[i];
1569                 mh = rbuf->m_head;
1570                 mp = rbuf->m_pack;
1571                 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1572                 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1573                         vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1574                 else
1575                         vtag = 0;
1576
1577                 /*
1578                 ** Make sure bad packets are discarded,
1579                 ** note that only EOP descriptor has valid
1580                 ** error results.
1581                 */
1582                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1583                         rxr->discarded++;
1584                         ixl_rx_discard(rxr, i);
1585                         goto next_desc;
1586                 }
1587
1588                 /* Prefetch the next buffer */
1589                 if (!eop) {
1590                         nextp = i + 1;
1591                         if (nextp == que->num_desc)
1592                                 nextp = 0;
1593                         nbuf = &rxr->buffers[nextp];
1594                         prefetch(nbuf);
1595                 }
1596
1597                 /*
1598                 ** The header mbuf is ONLY used when header 
1599                 ** split is enabled, otherwise we get normal 
1600                 ** behavior, ie, both header and payload
1601                 ** are DMA'd into the payload buffer.
1602                 **
1603                 ** Rather than using the fmp/lmp global pointers
1604                 ** we now keep the head of a packet chain in the
1605                 ** buffer struct and pass this along from one
1606                 ** descriptor to the next, until we get EOP.
1607                 */
1608                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1609                         if (hlen > IXL_RX_HDR)
1610                                 hlen = IXL_RX_HDR;
1611                         mh->m_len = hlen;
1612                         mh->m_flags |= M_PKTHDR;
1613                         mh->m_next = NULL;
1614                         mh->m_pkthdr.len = mh->m_len;
1615                         /* Null buf pointer so it is refreshed */
1616                         rbuf->m_head = NULL;
1617                         /*
1618                         ** Check the payload length, this
1619                         ** could be zero if its a small
1620                         ** packet.
1621                         */
1622                         if (plen > 0) {
1623                                 mp->m_len = plen;
1624                                 mp->m_next = NULL;
1625                                 mp->m_flags &= ~M_PKTHDR;
1626                                 mh->m_next = mp;
1627                                 mh->m_pkthdr.len += mp->m_len;
1628                                 /* Null buf pointer so it is refreshed */
1629                                 rbuf->m_pack = NULL;
1630                                 rxr->split++;
1631                         }
1632                         /*
1633                         ** Now create the forward
1634                         ** chain so when complete 
1635                         ** we wont have to.
1636                         */
1637                         if (eop == 0) {
1638                                 /* stash the chain head */
1639                                 nbuf->fmp = mh;
1640                                 /* Make forward chain */
1641                                 if (plen)
1642                                         mp->m_next = nbuf->m_pack;
1643                                 else
1644                                         mh->m_next = nbuf->m_pack;
1645                         } else {
1646                                 /* Singlet, prepare to send */
1647                                 sendmp = mh;
1648                                 if (vtag) {
1649                                         sendmp->m_pkthdr.ether_vtag = vtag;
1650                                         sendmp->m_flags |= M_VLANTAG;
1651                                 }
1652                         }
1653                 } else {
1654                         /*
1655                         ** Either no header split, or a
1656                         ** secondary piece of a fragmented
1657                         ** split packet.
1658                         */
1659                         mp->m_len = plen;
1660                         /*
1661                         ** See if there is a stored head
1662                         ** that determines what we are
1663                         */
1664                         sendmp = rbuf->fmp;
1665                         rbuf->m_pack = rbuf->fmp = NULL;
1666
1667                         if (sendmp != NULL) /* secondary frag */
1668                                 sendmp->m_pkthdr.len += mp->m_len;
1669                         else {
1670                                 /* first desc of a non-ps chain */
1671                                 sendmp = mp;
1672                                 sendmp->m_flags |= M_PKTHDR;
1673                                 sendmp->m_pkthdr.len = mp->m_len;
1674                                 if (vtag) {
1675                                         sendmp->m_pkthdr.ether_vtag = vtag;
1676                                         sendmp->m_flags |= M_VLANTAG;
1677                                 }
1678                         }
1679                         /* Pass the head pointer on */
1680                         if (eop == 0) {
1681                                 nbuf->fmp = sendmp;
1682                                 sendmp = NULL;
1683                                 mp->m_next = nbuf->m_pack;
1684                         }
1685                 }
1686                 ++processed;
1687                 /* Sending this frame? */
1688                 if (eop) {
1689                         sendmp->m_pkthdr.rcvif = ifp;
1690                         /* gather stats */
1691                         rxr->rx_packets++;
1692                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1693                         /* capture data for dynamic ITR adjustment */
1694                         rxr->packets++;
1695                         rxr->bytes += sendmp->m_pkthdr.len;
1696                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1697                                 ixl_rx_checksum(sendmp, status, error, ptype);
1698 #ifdef RSS
1699                         sendmp->m_pkthdr.flowid =
1700                             le32toh(cur->wb.qword0.hi_dword.rss);
1701                         M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1702 #else
1703                         sendmp->m_pkthdr.flowid = que->msix;
1704                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1705 #endif
1706                 }
1707 next_desc:
1708                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1709                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1710
1711                 /* Advance our pointers to the next descriptor. */
1712                 if (++i == que->num_desc)
1713                         i = 0;
1714
1715                 /* Now send to the stack or do LRO */
1716                 if (sendmp != NULL) {
1717                         rxr->next_check = i;
1718                         ixl_rx_input(rxr, ifp, sendmp, ptype);
1719                         i = rxr->next_check;
1720                 }
1721
1722                /* Every 8 descriptors we go to refresh mbufs */
1723                 if (processed == 8) {
1724                         ixl_refresh_mbufs(que, i);
1725                         processed = 0;
1726                 }
1727         }
1728
1729         /* Refresh any remaining buf structs */
1730         if (ixl_rx_unrefreshed(que))
1731                 ixl_refresh_mbufs(que, i);
1732
1733         rxr->next_check = i;
1734
1735 #if defined(INET6) || defined(INET)
1736         /*
1737          * Flush any outstanding LRO work
1738          */
1739         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1740                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1741                 tcp_lro_flush(lro, queued);
1742         }
1743 #endif
1744
1745         IXL_RX_UNLOCK(rxr);
1746         return (FALSE);
1747 }
1748
1749
1750 /*********************************************************************
1751  *
1752  *  Verify that the hardware indicated that the checksum is valid.
1753  *  Inform the stack about the status of checksum so that stack
1754  *  doesn't spend time verifying the checksum.
1755  *
1756  *********************************************************************/
1757 static void
1758 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1759 {
1760         struct i40e_rx_ptype_decoded decoded;
1761
1762         decoded = decode_rx_desc_ptype(ptype);
1763
1764         /* Errors? */
1765         if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1766             (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1767                 mp->m_pkthdr.csum_flags = 0;
1768                 return;
1769         }
1770
1771         /* IPv6 with extension headers likely have bad csum */
1772         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1773             decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1774                 if (status &
1775                     (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1776                         mp->m_pkthdr.csum_flags = 0;
1777                         return;
1778                 }
1779
1780  
1781         /* IP Checksum Good */
1782         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1783         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1784
1785         if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1786                 mp->m_pkthdr.csum_flags |= 
1787                     (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1788                 mp->m_pkthdr.csum_data |= htons(0xffff);
1789         }
1790         return;
1791 }
1792
1793 #if __FreeBSD_version >= 1100000
1794 uint64_t
1795 ixl_get_counter(if_t ifp, ift_counter cnt)
1796 {
1797         struct ixl_vsi *vsi;
1798
1799         vsi = if_getsoftc(ifp);
1800
1801         switch (cnt) {
1802         case IFCOUNTER_IPACKETS:
1803                 return (vsi->ipackets);
1804         case IFCOUNTER_IERRORS:
1805                 return (vsi->ierrors);
1806         case IFCOUNTER_OPACKETS:
1807                 return (vsi->opackets);
1808         case IFCOUNTER_OERRORS:
1809                 return (vsi->oerrors);
1810         case IFCOUNTER_COLLISIONS:
1811                 /* Collisions are by standard impossible in 40G/10G Ethernet */
1812                 return (0);
1813         case IFCOUNTER_IBYTES:
1814                 return (vsi->ibytes);
1815         case IFCOUNTER_OBYTES:
1816                 return (vsi->obytes);
1817         case IFCOUNTER_IMCASTS:
1818                 return (vsi->imcasts);
1819         case IFCOUNTER_OMCASTS:
1820                 return (vsi->omcasts);
1821         case IFCOUNTER_IQDROPS:
1822                 return (vsi->iqdrops);
1823         case IFCOUNTER_OQDROPS:
1824                 return (vsi->oqdrops);
1825         case IFCOUNTER_NOPROTO:
1826                 return (vsi->noproto);
1827         default:
1828                 return (if_get_counter_default(ifp, cnt));
1829         }
1830 }
1831 #endif
1832