]> CyberLeo.Net >> Repos - FreeBSD/releng/10.3.git/blob - sys/dev/ixl/ixl_txrx.c
- Copy stable/10@296371 to releng/10.3 in preparation for 10.3-RC1
[FreeBSD/releng/10.3.git] / sys / dev / ixl / ixl_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2013-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 **      IXL driver TX/RX Routines:
37 **          This was seperated to allow usage by
38 **          both the BASE and the VF drivers.
39 */
40
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #endif
45
46 #include "ixl.h"
47
48 #ifdef RSS
49 #include <net/rss_config.h>
50 #endif
51
52 /* Local Prototypes */
53 static void     ixl_rx_checksum(struct mbuf *, u32, u32, u8);
54 static void     ixl_refresh_mbufs(struct ixl_queue *, int);
55 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
56 static int      ixl_tx_setup_offload(struct ixl_queue *,
57                     struct mbuf *, u32 *, u32 *);
58 static bool     ixl_tso_setup(struct ixl_queue *, struct mbuf *);
59
60 static __inline void ixl_rx_discard(struct rx_ring *, int);
61 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
62                     struct mbuf *, u8);
63
64 #ifdef DEV_NETMAP
65 #include <dev/netmap/if_ixl_netmap.h>
66 #endif /* DEV_NETMAP */
67
68 /*
69 ** Multiqueue Transmit driver
70 */
71 int
72 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
73 {
74         struct ixl_vsi          *vsi = ifp->if_softc;
75         struct ixl_queue        *que;
76         struct tx_ring          *txr;
77         int                     err, i;
78 #ifdef RSS
79         u32                     bucket_id;
80 #endif
81
82         /*
83         ** Which queue to use:
84         **
85         ** When doing RSS, map it to the same outbound
86         ** queue as the incoming flow would be mapped to.
87         ** If everything is setup correctly, it should be
88         ** the same bucket that the current CPU we're on is.
89         */
90         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
91 #ifdef  RSS
92                 if (rss_hash2bucket(m->m_pkthdr.flowid,
93                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
94                         i = bucket_id % vsi->num_queues;
95                 } else
96 #endif
97                         i = m->m_pkthdr.flowid % vsi->num_queues;
98         } else
99                 i = curcpu % vsi->num_queues;
100         /*
101         ** This may not be perfect, but until something
102         ** better comes along it will keep from scheduling
103         ** on stalled queues.
104         */
105         if (((1 << i) & vsi->active_queues) == 0)
106                 i = ffsl(vsi->active_queues);
107
108         que = &vsi->queues[i];
109         txr = &que->txr;
110
111         err = drbr_enqueue(ifp, txr->br, m);
112         if (err)
113                 return (err);
114         if (IXL_TX_TRYLOCK(txr)) {
115                 ixl_mq_start_locked(ifp, txr);
116                 IXL_TX_UNLOCK(txr);
117         } else
118                 taskqueue_enqueue(que->tq, &que->tx_task);
119
120         return (0);
121 }
122
123 int
124 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
125 {
126         struct ixl_queue        *que = txr->que;
127         struct ixl_vsi          *vsi = que->vsi;
128         struct mbuf             *next;
129         int                     err = 0;
130
131
132         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
133             vsi->link_active == 0)
134                 return (ENETDOWN);
135
136         /* Process the transmit queue */
137         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
138                 if ((err = ixl_xmit(que, &next)) != 0) {
139                         if (next == NULL)
140                                 drbr_advance(ifp, txr->br);
141                         else
142                                 drbr_putback(ifp, txr->br, next);
143                         break;
144                 }
145                 drbr_advance(ifp, txr->br);
146                 /* Send a copy of the frame to the BPF listener */
147                 ETHER_BPF_MTAP(ifp, next);
148                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
149                         break;
150         }
151
152         if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
153                 ixl_txeof(que);
154
155         return (err);
156 }
157
158 /*
159  * Called from a taskqueue to drain queued transmit packets.
160  */
161 void
162 ixl_deferred_mq_start(void *arg, int pending)
163 {
164         struct ixl_queue        *que = arg;
165         struct tx_ring          *txr = &que->txr;
166         struct ixl_vsi          *vsi = que->vsi;
167         struct ifnet            *ifp = vsi->ifp;
168         
169         IXL_TX_LOCK(txr);
170         if (!drbr_empty(ifp, txr->br))
171                 ixl_mq_start_locked(ifp, txr);
172         IXL_TX_UNLOCK(txr);
173 }
174
175 /*
176 ** Flush all queue ring buffers
177 */
178 void
179 ixl_qflush(struct ifnet *ifp)
180 {
181         struct ixl_vsi  *vsi = ifp->if_softc;
182
183         for (int i = 0; i < vsi->num_queues; i++) {
184                 struct ixl_queue *que = &vsi->queues[i];
185                 struct tx_ring  *txr = &que->txr;
186                 struct mbuf     *m;
187                 IXL_TX_LOCK(txr);
188                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
189                         m_freem(m);
190                 IXL_TX_UNLOCK(txr);
191         }
192         if_qflush(ifp);
193 }
194
195 /*
196 ** Find mbuf chains passed to the driver 
197 ** that are 'sparse', using more than 8
198 ** mbufs to deliver an mss-size chunk of data
199 */
200 static inline bool
201 ixl_tso_detect_sparse(struct mbuf *mp)
202 {
203         struct mbuf     *m;
204         int             num = 0, mss;
205         bool            ret = FALSE;
206
207         mss = mp->m_pkthdr.tso_segsz;
208         for (m = mp->m_next; m != NULL; m = m->m_next) {
209                 num++;
210                 mss -= m->m_len;
211                 if (mss < 1)
212                         break;
213                 if (m->m_next == NULL)
214                         break;
215         }
216         if (num > IXL_SPARSE_CHAIN)
217                 ret = TRUE;
218
219         return (ret);
220 }
221
222
223 /*********************************************************************
224  *
225  *  This routine maps the mbufs to tx descriptors, allowing the
226  *  TX engine to transmit the packets. 
227  *      - return 0 on success, positive on failure
228  *
229  **********************************************************************/
230 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
231
232 static int
233 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
234 {
235         struct ixl_vsi          *vsi = que->vsi;
236         struct i40e_hw          *hw = vsi->hw;
237         struct tx_ring          *txr = &que->txr;
238         struct ixl_tx_buf       *buf;
239         struct i40e_tx_desc     *txd = NULL;
240         struct mbuf             *m_head, *m;
241         int                     i, j, error, nsegs, maxsegs;
242         int                     first, last = 0;
243         u16                     vtag = 0;
244         u32                     cmd, off;
245         bus_dmamap_t            map;
246         bus_dma_tag_t           tag;
247         bus_dma_segment_t       segs[IXL_MAX_TSO_SEGS];
248
249
250         cmd = off = 0;
251         m_head = *m_headp;
252
253         /*
254          * Important to capture the first descriptor
255          * used because it will contain the index of
256          * the one we tell the hardware to report back
257          */
258         first = txr->next_avail;
259         buf = &txr->buffers[first];
260         map = buf->map;
261         tag = txr->tx_tag;
262         maxsegs = IXL_MAX_TX_SEGS;
263
264         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
265                 /* Use larger mapping for TSO */
266                 tag = txr->tso_tag;
267                 maxsegs = IXL_MAX_TSO_SEGS;
268                 if (ixl_tso_detect_sparse(m_head)) {
269                         m = m_defrag(m_head, M_NOWAIT);
270                         if (m == NULL) {
271                                 m_freem(*m_headp);
272                                 *m_headp = NULL;
273                                 return (ENOBUFS);
274                         }
275                         *m_headp = m;
276                 }
277         }
278
279         /*
280          * Map the packet for DMA.
281          */
282         error = bus_dmamap_load_mbuf_sg(tag, map,
283             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
284
285         if (error == EFBIG) {
286                 struct mbuf *m;
287
288                 m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
289                 if (m == NULL) {
290                         que->mbuf_defrag_failed++;
291                         m_freem(*m_headp);
292                         *m_headp = NULL;
293                         return (ENOBUFS);
294                 }
295                 *m_headp = m;
296
297                 /* Try it again */
298                 error = bus_dmamap_load_mbuf_sg(tag, map,
299                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
300
301                 if (error == ENOMEM) {
302                         que->tx_dma_setup++;
303                         return (error);
304                 } else if (error != 0) {
305                         que->tx_dma_setup++;
306                         m_freem(*m_headp);
307                         *m_headp = NULL;
308                         return (error);
309                 }
310         } else if (error == ENOMEM) {
311                 que->tx_dma_setup++;
312                 return (error);
313         } else if (error != 0) {
314                 que->tx_dma_setup++;
315                 m_freem(*m_headp);
316                 *m_headp = NULL;
317                 return (error);
318         }
319
320         /* Make certain there are enough descriptors */
321         if (nsegs > txr->avail - 2) {
322                 txr->no_desc++;
323                 error = ENOBUFS;
324                 goto xmit_fail;
325         }
326         m_head = *m_headp;
327
328         /* Set up the TSO/CSUM offload */
329         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
330                 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
331                 if (error)
332                         goto xmit_fail;
333         }
334
335         cmd |= I40E_TX_DESC_CMD_ICRC;
336         /* Grab the VLAN tag */
337         if (m_head->m_flags & M_VLANTAG) {
338                 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
339                 vtag = htole16(m_head->m_pkthdr.ether_vtag);
340         }
341
342         i = txr->next_avail;
343         for (j = 0; j < nsegs; j++) {
344                 bus_size_t seglen;
345
346                 buf = &txr->buffers[i];
347                 buf->tag = tag; /* Keep track of the type tag */
348                 txd = &txr->base[i];
349                 seglen = segs[j].ds_len;
350
351                 txd->buffer_addr = htole64(segs[j].ds_addr);
352                 txd->cmd_type_offset_bsz =
353                     htole64(I40E_TX_DESC_DTYPE_DATA
354                     | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
355                     | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
356                     | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
357                     | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
358
359                 last = i; /* descriptor that will get completion IRQ */
360
361                 if (++i == que->num_desc)
362                         i = 0;
363
364                 buf->m_head = NULL;
365                 buf->eop_index = -1;
366         }
367         /* Set the last descriptor for report */
368         txd->cmd_type_offset_bsz |=
369             htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
370         txr->avail -= nsegs;
371         txr->next_avail = i;
372
373         buf->m_head = m_head;
374         /* Swap the dma map between the first and last descriptor */
375         txr->buffers[first].map = buf->map;
376         buf->map = map;
377         bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
378
379         /* Set the index of the descriptor that will be marked done */
380         buf = &txr->buffers[first];
381         buf->eop_index = last;
382
383         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
384             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
385         /*
386          * Advance the Transmit Descriptor Tail (Tdt), this tells the
387          * hardware that this frame is available to transmit.
388          */
389         ++txr->total_packets;
390         wr32(hw, txr->tail, i);
391
392         ixl_flush(hw);
393         /* Mark outstanding work */
394         if (que->busy == 0)
395                 que->busy = 1;
396         return (0);
397
398 xmit_fail:
399         bus_dmamap_unload(tag, buf->map);
400         return (error);
401 }
402
403
404 /*********************************************************************
405  *
406  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
407  *  the information needed to transmit a packet on the wire. This is
408  *  called only once at attach, setup is done every reset.
409  *
410  **********************************************************************/
411 int
412 ixl_allocate_tx_data(struct ixl_queue *que)
413 {
414         struct tx_ring          *txr = &que->txr;
415         struct ixl_vsi          *vsi = que->vsi;
416         device_t                dev = vsi->dev;
417         struct ixl_tx_buf       *buf;
418         int                     error = 0;
419
420         /*
421          * Setup DMA descriptor areas.
422          */
423         if ((error = bus_dma_tag_create(NULL,           /* parent */
424                                1, 0,                    /* alignment, bounds */
425                                BUS_SPACE_MAXADDR,       /* lowaddr */
426                                BUS_SPACE_MAXADDR,       /* highaddr */
427                                NULL, NULL,              /* filter, filterarg */
428                                IXL_TSO_SIZE,            /* maxsize */
429                                IXL_MAX_TX_SEGS,         /* nsegments */
430                                PAGE_SIZE,               /* maxsegsize */
431                                0,                       /* flags */
432                                NULL,                    /* lockfunc */
433                                NULL,                    /* lockfuncarg */
434                                &txr->tx_tag))) {
435                 device_printf(dev,"Unable to allocate TX DMA tag\n");
436                 goto fail;
437         }
438
439         /* Make a special tag for TSO */
440         if ((error = bus_dma_tag_create(NULL,           /* parent */
441                                1, 0,                    /* alignment, bounds */
442                                BUS_SPACE_MAXADDR,       /* lowaddr */
443                                BUS_SPACE_MAXADDR,       /* highaddr */
444                                NULL, NULL,              /* filter, filterarg */
445                                IXL_TSO_SIZE,            /* maxsize */
446                                IXL_MAX_TSO_SEGS,        /* nsegments */
447                                PAGE_SIZE,               /* maxsegsize */
448                                0,                       /* flags */
449                                NULL,                    /* lockfunc */
450                                NULL,                    /* lockfuncarg */
451                                &txr->tso_tag))) {
452                 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
453                 goto fail;
454         }
455
456         if (!(txr->buffers =
457             (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
458             que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
459                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
460                 error = ENOMEM;
461                 goto fail;
462         }
463
464         /* Create the descriptor buffer default dma maps */
465         buf = txr->buffers;
466         for (int i = 0; i < que->num_desc; i++, buf++) {
467                 buf->tag = txr->tx_tag;
468                 error = bus_dmamap_create(buf->tag, 0, &buf->map);
469                 if (error != 0) {
470                         device_printf(dev, "Unable to create TX DMA map\n");
471                         goto fail;
472                 }
473         }
474 fail:
475         return (error);
476 }
477
478
479 /*********************************************************************
480  *
481  *  (Re)Initialize a queue transmit ring.
482  *      - called by init, it clears the descriptor ring,
483  *        and frees any stale mbufs 
484  *
485  **********************************************************************/
486 void
487 ixl_init_tx_ring(struct ixl_queue *que)
488 {
489 #ifdef DEV_NETMAP
490         struct netmap_adapter *na = NA(que->vsi->ifp);
491         struct netmap_slot *slot;
492 #endif /* DEV_NETMAP */
493         struct tx_ring          *txr = &que->txr;
494         struct ixl_tx_buf       *buf;
495
496         /* Clear the old ring contents */
497         IXL_TX_LOCK(txr);
498
499 #ifdef DEV_NETMAP
500         /*
501          * (under lock): if in netmap mode, do some consistency
502          * checks and set slot to entry 0 of the netmap ring.
503          */
504         slot = netmap_reset(na, NR_TX, que->me, 0);
505 #endif /* DEV_NETMAP */
506
507         bzero((void *)txr->base,
508               (sizeof(struct i40e_tx_desc)) * que->num_desc);
509
510         /* Reset indices */
511         txr->next_avail = 0;
512         txr->next_to_clean = 0;
513
514 #ifdef IXL_FDIR
515         /* Initialize flow director */
516         txr->atr_rate = ixl_atr_rate;
517         txr->atr_count = 0;
518 #endif
519
520         /* Free any existing tx mbufs. */
521         buf = txr->buffers;
522         for (int i = 0; i < que->num_desc; i++, buf++) {
523                 if (buf->m_head != NULL) {
524                         bus_dmamap_sync(buf->tag, buf->map,
525                             BUS_DMASYNC_POSTWRITE);
526                         bus_dmamap_unload(buf->tag, buf->map);
527                         m_freem(buf->m_head);
528                         buf->m_head = NULL;
529                 }
530 #ifdef DEV_NETMAP
531                 /*
532                  * In netmap mode, set the map for the packet buffer.
533                  * NOTE: Some drivers (not this one) also need to set
534                  * the physical buffer address in the NIC ring.
535                  * netmap_idx_n2k() maps a nic index, i, into the corresponding
536                  * netmap slot index, si
537                  */
538                 if (slot) {
539                         int si = netmap_idx_n2k(&na->tx_rings[que->me], i);
540                         netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
541                 }
542 #endif /* DEV_NETMAP */
543                 /* Clear the EOP index */
544                 buf->eop_index = -1;
545         }
546
547         /* Set number of descriptors available */
548         txr->avail = que->num_desc;
549
550         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
551             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
552         IXL_TX_UNLOCK(txr);
553 }
554
555
556 /*********************************************************************
557  *
558  *  Free transmit ring related data structures.
559  *
560  **********************************************************************/
561 void
562 ixl_free_que_tx(struct ixl_queue *que)
563 {
564         struct tx_ring *txr = &que->txr;
565         struct ixl_tx_buf *buf;
566
567         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
568
569         for (int i = 0; i < que->num_desc; i++) {
570                 buf = &txr->buffers[i];
571                 if (buf->m_head != NULL) {
572                         bus_dmamap_sync(buf->tag, buf->map,
573                             BUS_DMASYNC_POSTWRITE);
574                         bus_dmamap_unload(buf->tag,
575                             buf->map);
576                         m_freem(buf->m_head);
577                         buf->m_head = NULL;
578                         if (buf->map != NULL) {
579                                 bus_dmamap_destroy(buf->tag,
580                                     buf->map);
581                                 buf->map = NULL;
582                         }
583                 } else if (buf->map != NULL) {
584                         bus_dmamap_unload(buf->tag,
585                             buf->map);
586                         bus_dmamap_destroy(buf->tag,
587                             buf->map);
588                         buf->map = NULL;
589                 }
590         }
591         if (txr->br != NULL)
592                 buf_ring_free(txr->br, M_DEVBUF);
593         if (txr->buffers != NULL) {
594                 free(txr->buffers, M_DEVBUF);
595                 txr->buffers = NULL;
596         }
597         if (txr->tx_tag != NULL) {
598                 bus_dma_tag_destroy(txr->tx_tag);
599                 txr->tx_tag = NULL;
600         }
601         if (txr->tso_tag != NULL) {
602                 bus_dma_tag_destroy(txr->tso_tag);
603                 txr->tso_tag = NULL;
604         }
605
606         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
607         return;
608 }
609
610 /*********************************************************************
611  *
612  *  Setup descriptor for hw offloads 
613  *
614  **********************************************************************/
615
616 static int
617 ixl_tx_setup_offload(struct ixl_queue *que,
618     struct mbuf *mp, u32 *cmd, u32 *off)
619 {
620         struct ether_vlan_header        *eh;
621 #ifdef INET
622         struct ip                       *ip = NULL;
623 #endif
624         struct tcphdr                   *th = NULL;
625 #ifdef INET6
626         struct ip6_hdr                  *ip6;
627 #endif
628         int                             elen, ip_hlen = 0, tcp_hlen;
629         u16                             etype;
630         u8                              ipproto = 0;
631         bool                            tso = FALSE;
632
633
634         /* Set up the TSO context descriptor if required */
635         if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
636                 tso = ixl_tso_setup(que, mp);
637                 if (tso)
638                         ++que->tso;
639                 else
640                         return (ENXIO);
641         }
642
643         /*
644          * Determine where frame payload starts.
645          * Jump over vlan headers if already present,
646          * helpful for QinQ too.
647          */
648         eh = mtod(mp, struct ether_vlan_header *);
649         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
650                 etype = ntohs(eh->evl_proto);
651                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
652         } else {
653                 etype = ntohs(eh->evl_encap_proto);
654                 elen = ETHER_HDR_LEN;
655         }
656
657         switch (etype) {
658 #ifdef INET
659                 case ETHERTYPE_IP:
660                         ip = (struct ip *)(mp->m_data + elen);
661                         ip_hlen = ip->ip_hl << 2;
662                         ipproto = ip->ip_p;
663                         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
664                         /* The IP checksum must be recalculated with TSO */
665                         if (tso)
666                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
667                         else
668                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
669                         break;
670 #endif
671 #ifdef INET6
672                 case ETHERTYPE_IPV6:
673                         ip6 = (struct ip6_hdr *)(mp->m_data + elen);
674                         ip_hlen = sizeof(struct ip6_hdr);
675                         ipproto = ip6->ip6_nxt;
676                         th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
677                         *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
678                         break;
679 #endif
680                 default:
681                         break;
682         }
683
684         *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
685         *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
686
687         switch (ipproto) {
688                 case IPPROTO_TCP:
689                         tcp_hlen = th->th_off << 2;
690                         if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
691                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
692                                 *off |= (tcp_hlen >> 2) <<
693                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
694                         }
695 #ifdef IXL_FDIR
696                         ixl_atr(que, th, etype);
697 #endif
698                         break;
699                 case IPPROTO_UDP:
700                         if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
701                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
702                                 *off |= (sizeof(struct udphdr) >> 2) <<
703                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
704                         }
705                         break;
706
707                 case IPPROTO_SCTP:
708                         if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
709                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
710                                 *off |= (sizeof(struct sctphdr) >> 2) <<
711                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
712                         }
713                         /* Fall Thru */
714                 default:
715                         break;
716         }
717
718         return (0);
719 }
720
721
722 /**********************************************************************
723  *
724  *  Setup context for hardware segmentation offload (TSO)
725  *
726  **********************************************************************/
727 static bool
728 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
729 {
730         struct tx_ring                  *txr = &que->txr;
731         struct i40e_tx_context_desc     *TXD;
732         struct ixl_tx_buf               *buf;
733         u32                             cmd, mss, type, tsolen;
734         u16                             etype;
735         int                             idx, elen, ip_hlen, tcp_hlen;
736         struct ether_vlan_header        *eh;
737 #ifdef INET
738         struct ip                       *ip;
739 #endif
740 #ifdef INET6
741         struct ip6_hdr                  *ip6;
742 #endif
743 #if defined(INET6) || defined(INET)
744         struct tcphdr                   *th;
745 #endif
746         u64                             type_cmd_tso_mss;
747
748         /*
749          * Determine where frame payload starts.
750          * Jump over vlan headers if already present
751          */
752         eh = mtod(mp, struct ether_vlan_header *);
753         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
754                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
755                 etype = eh->evl_proto;
756         } else {
757                 elen = ETHER_HDR_LEN;
758                 etype = eh->evl_encap_proto;
759         }
760
761         switch (ntohs(etype)) {
762 #ifdef INET6
763         case ETHERTYPE_IPV6:
764                 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
765                 if (ip6->ip6_nxt != IPPROTO_TCP)
766                         return (ENXIO);
767                 ip_hlen = sizeof(struct ip6_hdr);
768                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
769                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
770                 tcp_hlen = th->th_off << 2;
771                 break;
772 #endif
773 #ifdef INET
774         case ETHERTYPE_IP:
775                 ip = (struct ip *)(mp->m_data + elen);
776                 if (ip->ip_p != IPPROTO_TCP)
777                         return (ENXIO);
778                 ip->ip_sum = 0;
779                 ip_hlen = ip->ip_hl << 2;
780                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
781                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
782                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
783                 tcp_hlen = th->th_off << 2;
784                 break;
785 #endif
786         default:
787                 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
788                     __func__, ntohs(etype));
789                 return FALSE;
790         }
791
792         /* Ensure we have at least the IP+TCP header in the first mbuf. */
793         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
794                 return FALSE;
795
796         idx = txr->next_avail;
797         buf = &txr->buffers[idx];
798         TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
799         tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
800
801         type = I40E_TX_DESC_DTYPE_CONTEXT;
802         cmd = I40E_TX_CTX_DESC_TSO;
803         mss = mp->m_pkthdr.tso_segsz;
804
805         type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
806             ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
807             ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
808             ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
809         TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
810
811         TXD->tunneling_params = htole32(0);
812         buf->m_head = NULL;
813         buf->eop_index = -1;
814
815         if (++idx == que->num_desc)
816                 idx = 0;
817
818         txr->avail--;
819         txr->next_avail = idx;
820
821         return TRUE;
822 }
823
824 /*             
825 ** ixl_get_tx_head - Retrieve the value from the 
826 **    location the HW records its HEAD index
827 */
828 static inline u32
829 ixl_get_tx_head(struct ixl_queue *que)
830 {
831         struct tx_ring  *txr = &que->txr;
832         void *head = &txr->base[que->num_desc];
833         return LE32_TO_CPU(*(volatile __le32 *)head);
834 }
835
836 /**********************************************************************
837  *
838  *  Examine each tx_buffer in the used queue. If the hardware is done
839  *  processing the packet then free associated resources. The
840  *  tx_buffer is put back on the free queue.
841  *
842  **********************************************************************/
843 bool
844 ixl_txeof(struct ixl_queue *que)
845 {
846         struct tx_ring          *txr = &que->txr;
847         u32                     first, last, head, done, processed;
848         struct ixl_tx_buf       *buf;
849         struct i40e_tx_desc     *tx_desc, *eop_desc;
850
851
852         mtx_assert(&txr->mtx, MA_OWNED);
853
854 #ifdef DEV_NETMAP
855         // XXX todo: implement moderation
856         if (netmap_tx_irq(que->vsi->ifp, que->me))
857                 return FALSE;
858 #endif /* DEF_NETMAP */
859
860         /* These are not the descriptors you seek, move along :) */
861         if (txr->avail == que->num_desc) {
862                 que->busy = 0;
863                 return FALSE;
864         }
865
866         processed = 0;
867         first = txr->next_to_clean;
868         buf = &txr->buffers[first];
869         tx_desc = (struct i40e_tx_desc *)&txr->base[first];
870         last = buf->eop_index;
871         if (last == -1)
872                 return FALSE;
873         eop_desc = (struct i40e_tx_desc *)&txr->base[last];
874
875         /* Get the Head WB value */
876         head = ixl_get_tx_head(que);
877
878         /*
879         ** Get the index of the first descriptor
880         ** BEYOND the EOP and call that 'done'.
881         ** I do this so the comparison in the
882         ** inner while loop below can be simple
883         */
884         if (++last == que->num_desc) last = 0;
885         done = last;
886
887         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
888             BUS_DMASYNC_POSTREAD);
889         /*
890         ** The HEAD index of the ring is written in a 
891         ** defined location, this rather than a done bit
892         ** is what is used to keep track of what must be
893         ** 'cleaned'.
894         */
895         while (first != head) {
896                 /* We clean the range of the packet */
897                 while (first != done) {
898                         ++txr->avail;
899                         ++processed;
900
901                         if (buf->m_head) {
902                                 txr->bytes += /* for ITR adjustment */
903                                     buf->m_head->m_pkthdr.len;
904                                 txr->tx_bytes += /* for TX stats */
905                                     buf->m_head->m_pkthdr.len;
906                                 bus_dmamap_sync(buf->tag,
907                                     buf->map,
908                                     BUS_DMASYNC_POSTWRITE);
909                                 bus_dmamap_unload(buf->tag,
910                                     buf->map);
911                                 m_freem(buf->m_head);
912                                 buf->m_head = NULL;
913                                 buf->map = NULL;
914                         }
915                         buf->eop_index = -1;
916
917                         if (++first == que->num_desc)
918                                 first = 0;
919
920                         buf = &txr->buffers[first];
921                         tx_desc = &txr->base[first];
922                 }
923                 ++txr->packets;
924                 /* See if there is more work now */
925                 last = buf->eop_index;
926                 if (last != -1) {
927                         eop_desc = &txr->base[last];
928                         /* Get next done point */
929                         if (++last == que->num_desc) last = 0;
930                         done = last;
931                 } else
932                         break;
933         }
934         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
935             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
936
937         txr->next_to_clean = first;
938
939
940         /*
941         ** Hang detection, we know there's
942         ** work outstanding or the first return
943         ** would have been taken, so indicate an
944         ** unsuccessful pass, in local_timer if
945         ** the value is too great the queue will
946         ** be considered hung. If anything has been
947         ** cleaned then reset the state.
948         */
949         if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
950                 ++que->busy;
951
952         if (processed)
953                 que->busy = 1; /* Note this turns off HUNG */
954
955         /*
956          * If there are no pending descriptors, clear the timeout.
957          */
958         if (txr->avail == que->num_desc) {
959                 que->busy = 0;
960                 return FALSE;
961         }
962
963         return TRUE;
964 }
965
966 /*********************************************************************
967  *
968  *  Refresh mbuf buffers for RX descriptor rings
969  *   - now keeps its own state so discards due to resource
970  *     exhaustion are unnecessary, if an mbuf cannot be obtained
971  *     it just returns, keeping its placeholder, thus it can simply
972  *     be recalled to try again.
973  *
974  **********************************************************************/
975 static void
976 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
977 {
978         struct ixl_vsi          *vsi = que->vsi;
979         struct rx_ring          *rxr = &que->rxr;
980         bus_dma_segment_t       hseg[1];
981         bus_dma_segment_t       pseg[1];
982         struct ixl_rx_buf       *buf;
983         struct mbuf             *mh, *mp;
984         int                     i, j, nsegs, error;
985         bool                    refreshed = FALSE;
986
987         i = j = rxr->next_refresh;
988         /* Control the loop with one beyond */
989         if (++j == que->num_desc)
990                 j = 0;
991
992         while (j != limit) {
993                 buf = &rxr->buffers[i];
994                 if (rxr->hdr_split == FALSE)
995                         goto no_split;
996
997                 if (buf->m_head == NULL) {
998                         mh = m_gethdr(M_NOWAIT, MT_DATA);
999                         if (mh == NULL)
1000                                 goto update;
1001                 } else
1002                         mh = buf->m_head;
1003
1004                 mh->m_pkthdr.len = mh->m_len = MHLEN;
1005                 mh->m_len = MHLEN;
1006                 mh->m_flags |= M_PKTHDR;
1007                 /* Get the memory mapping */
1008                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1009                     buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1010                 if (error != 0) {
1011                         printf("Refresh mbufs: hdr dmamap load"
1012                             " failure - %d\n", error);
1013                         m_free(mh);
1014                         buf->m_head = NULL;
1015                         goto update;
1016                 }
1017                 buf->m_head = mh;
1018                 bus_dmamap_sync(rxr->htag, buf->hmap,
1019                     BUS_DMASYNC_PREREAD);
1020                 rxr->base[i].read.hdr_addr =
1021                    htole64(hseg[0].ds_addr);
1022
1023 no_split:
1024                 if (buf->m_pack == NULL) {
1025                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1026                             M_PKTHDR, rxr->mbuf_sz);
1027                         if (mp == NULL)
1028                                 goto update;
1029                 } else
1030                         mp = buf->m_pack;
1031
1032                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1033                 /* Get the memory mapping */
1034                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1035                     buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1036                 if (error != 0) {
1037                         printf("Refresh mbufs: payload dmamap load"
1038                             " failure - %d\n", error);
1039                         m_free(mp);
1040                         buf->m_pack = NULL;
1041                         goto update;
1042                 }
1043                 buf->m_pack = mp;
1044                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1045                     BUS_DMASYNC_PREREAD);
1046                 rxr->base[i].read.pkt_addr =
1047                    htole64(pseg[0].ds_addr);
1048                 /* Used only when doing header split */
1049                 rxr->base[i].read.hdr_addr = 0;
1050
1051                 refreshed = TRUE;
1052                 /* Next is precalculated */
1053                 i = j;
1054                 rxr->next_refresh = i;
1055                 if (++j == que->num_desc)
1056                         j = 0;
1057         }
1058 update:
1059         if (refreshed) /* Update hardware tail index */
1060                 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1061         return;
1062 }
1063
1064
1065 /*********************************************************************
1066  *
1067  *  Allocate memory for rx_buffer structures. Since we use one
1068  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1069  *  that we'll need is equal to the number of receive descriptors
1070  *  that we've defined.
1071  *
1072  **********************************************************************/
1073 int
1074 ixl_allocate_rx_data(struct ixl_queue *que)
1075 {
1076         struct rx_ring          *rxr = &que->rxr;
1077         struct ixl_vsi          *vsi = que->vsi;
1078         device_t                dev = vsi->dev;
1079         struct ixl_rx_buf       *buf;
1080         int                     i, bsize, error;
1081
1082         bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1083         if (!(rxr->buffers =
1084             (struct ixl_rx_buf *) malloc(bsize,
1085             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1086                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1087                 error = ENOMEM;
1088                 return (error);
1089         }
1090
1091         if ((error = bus_dma_tag_create(NULL,   /* parent */
1092                                    1, 0,        /* alignment, bounds */
1093                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1094                                    BUS_SPACE_MAXADDR,   /* highaddr */
1095                                    NULL, NULL,          /* filter, filterarg */
1096                                    MSIZE,               /* maxsize */
1097                                    1,                   /* nsegments */
1098                                    MSIZE,               /* maxsegsize */
1099                                    0,                   /* flags */
1100                                    NULL,                /* lockfunc */
1101                                    NULL,                /* lockfuncarg */
1102                                    &rxr->htag))) {
1103                 device_printf(dev, "Unable to create RX DMA htag\n");
1104                 return (error);
1105         }
1106
1107         if ((error = bus_dma_tag_create(NULL,   /* parent */
1108                                    1, 0,        /* alignment, bounds */
1109                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1110                                    BUS_SPACE_MAXADDR,   /* highaddr */
1111                                    NULL, NULL,          /* filter, filterarg */
1112                                    MJUM16BYTES,         /* maxsize */
1113                                    1,                   /* nsegments */
1114                                    MJUM16BYTES,         /* maxsegsize */
1115                                    0,                   /* flags */
1116                                    NULL,                /* lockfunc */
1117                                    NULL,                /* lockfuncarg */
1118                                    &rxr->ptag))) {
1119                 device_printf(dev, "Unable to create RX DMA ptag\n");
1120                 return (error);
1121         }
1122
1123         for (i = 0; i < que->num_desc; i++) {
1124                 buf = &rxr->buffers[i];
1125                 error = bus_dmamap_create(rxr->htag,
1126                     BUS_DMA_NOWAIT, &buf->hmap);
1127                 if (error) {
1128                         device_printf(dev, "Unable to create RX head map\n");
1129                         break;
1130                 }
1131                 error = bus_dmamap_create(rxr->ptag,
1132                     BUS_DMA_NOWAIT, &buf->pmap);
1133                 if (error) {
1134                         device_printf(dev, "Unable to create RX pkt map\n");
1135                         break;
1136                 }
1137         }
1138
1139         return (error);
1140 }
1141
1142
1143 /*********************************************************************
1144  *
1145  *  (Re)Initialize the queue receive ring and its buffers.
1146  *
1147  **********************************************************************/
1148 int
1149 ixl_init_rx_ring(struct ixl_queue *que)
1150 {
1151         struct  rx_ring         *rxr = &que->rxr;
1152         struct ixl_vsi          *vsi = que->vsi;
1153 #if defined(INET6) || defined(INET)
1154         struct ifnet            *ifp = vsi->ifp;
1155         struct lro_ctrl         *lro = &rxr->lro;
1156 #endif
1157         struct ixl_rx_buf       *buf;
1158         bus_dma_segment_t       pseg[1], hseg[1];
1159         int                     rsize, nsegs, error = 0;
1160 #ifdef DEV_NETMAP
1161         struct netmap_adapter *na = NA(que->vsi->ifp);
1162         struct netmap_slot *slot;
1163 #endif /* DEV_NETMAP */
1164
1165         IXL_RX_LOCK(rxr);
1166 #ifdef DEV_NETMAP
1167         /* same as in ixl_init_tx_ring() */
1168         slot = netmap_reset(na, NR_RX, que->me, 0);
1169 #endif /* DEV_NETMAP */
1170         /* Clear the ring contents */
1171         rsize = roundup2(que->num_desc *
1172             sizeof(union i40e_rx_desc), DBA_ALIGN);
1173         bzero((void *)rxr->base, rsize);
1174         /* Cleanup any existing buffers */
1175         for (int i = 0; i < que->num_desc; i++) {
1176                 buf = &rxr->buffers[i];
1177                 if (buf->m_head != NULL) {
1178                         bus_dmamap_sync(rxr->htag, buf->hmap,
1179                             BUS_DMASYNC_POSTREAD);
1180                         bus_dmamap_unload(rxr->htag, buf->hmap);
1181                         buf->m_head->m_flags |= M_PKTHDR;
1182                         m_freem(buf->m_head);
1183                 }
1184                 if (buf->m_pack != NULL) {
1185                         bus_dmamap_sync(rxr->ptag, buf->pmap,
1186                             BUS_DMASYNC_POSTREAD);
1187                         bus_dmamap_unload(rxr->ptag, buf->pmap);
1188                         buf->m_pack->m_flags |= M_PKTHDR;
1189                         m_freem(buf->m_pack);
1190                 }
1191                 buf->m_head = NULL;
1192                 buf->m_pack = NULL;
1193         }
1194
1195         /* header split is off */
1196         rxr->hdr_split = FALSE;
1197
1198         /* Now replenish the mbufs */
1199         for (int j = 0; j != que->num_desc; ++j) {
1200                 struct mbuf     *mh, *mp;
1201
1202                 buf = &rxr->buffers[j];
1203 #ifdef DEV_NETMAP
1204                 /*
1205                  * In netmap mode, fill the map and set the buffer
1206                  * address in the NIC ring, considering the offset
1207                  * between the netmap and NIC rings (see comment in
1208                  * ixgbe_setup_transmit_ring() ). No need to allocate
1209                  * an mbuf, so end the block with a continue;
1210                  */
1211                 if (slot) {
1212                         int sj = netmap_idx_n2k(&na->rx_rings[que->me], j);
1213                         uint64_t paddr;
1214                         void *addr;
1215
1216                         addr = PNMB(na, slot + sj, &paddr);
1217                         netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1218                         /* Update descriptor and the cached value */
1219                         rxr->base[j].read.pkt_addr = htole64(paddr);
1220                         rxr->base[j].read.hdr_addr = 0;
1221                         continue;
1222                 }
1223 #endif /* DEV_NETMAP */
1224                 /*
1225                 ** Don't allocate mbufs if not
1226                 ** doing header split, its wasteful
1227                 */ 
1228                 if (rxr->hdr_split == FALSE)
1229                         goto skip_head;
1230
1231                 /* First the header */
1232                 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1233                 if (buf->m_head == NULL) {
1234                         error = ENOBUFS;
1235                         goto fail;
1236                 }
1237                 m_adj(buf->m_head, ETHER_ALIGN);
1238                 mh = buf->m_head;
1239                 mh->m_len = mh->m_pkthdr.len = MHLEN;
1240                 mh->m_flags |= M_PKTHDR;
1241                 /* Get the memory mapping */
1242                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1243                     buf->hmap, buf->m_head, hseg,
1244                     &nsegs, BUS_DMA_NOWAIT);
1245                 if (error != 0) /* Nothing elegant to do here */
1246                         goto fail;
1247                 bus_dmamap_sync(rxr->htag,
1248                     buf->hmap, BUS_DMASYNC_PREREAD);
1249                 /* Update descriptor */
1250                 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1251
1252 skip_head:
1253                 /* Now the payload cluster */
1254                 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1255                     M_PKTHDR, rxr->mbuf_sz);
1256                 if (buf->m_pack == NULL) {
1257                         error = ENOBUFS;
1258                         goto fail;
1259                 }
1260                 mp = buf->m_pack;
1261                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1262                 /* Get the memory mapping */
1263                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1264                     buf->pmap, mp, pseg,
1265                     &nsegs, BUS_DMA_NOWAIT);
1266                 if (error != 0)
1267                         goto fail;
1268                 bus_dmamap_sync(rxr->ptag,
1269                     buf->pmap, BUS_DMASYNC_PREREAD);
1270                 /* Update descriptor */
1271                 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1272                 rxr->base[j].read.hdr_addr = 0;
1273         }
1274
1275
1276         /* Setup our descriptor indices */
1277         rxr->next_check = 0;
1278         rxr->next_refresh = 0;
1279         rxr->lro_enabled = FALSE;
1280         rxr->split = 0;
1281         rxr->bytes = 0;
1282         rxr->discard = FALSE;
1283
1284         wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1285         ixl_flush(vsi->hw);
1286
1287 #if defined(INET6) || defined(INET)
1288         /*
1289         ** Now set up the LRO interface:
1290         */
1291         if (ifp->if_capenable & IFCAP_LRO) {
1292                 int err = tcp_lro_init(lro);
1293                 if (err) {
1294                         if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1295                         goto fail;
1296                 }
1297                 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1298                 rxr->lro_enabled = TRUE;
1299                 lro->ifp = vsi->ifp;
1300         }
1301 #endif
1302
1303         bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1304             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1305
1306 fail:
1307         IXL_RX_UNLOCK(rxr);
1308         return (error);
1309 }
1310
1311
1312 /*********************************************************************
1313  *
1314  *  Free station receive ring data structures
1315  *
1316  **********************************************************************/
1317 void
1318 ixl_free_que_rx(struct ixl_queue *que)
1319 {
1320         struct rx_ring          *rxr = &que->rxr;
1321         struct ixl_rx_buf       *buf;
1322
1323         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1324
1325         /* Cleanup any existing buffers */
1326         if (rxr->buffers != NULL) {
1327                 for (int i = 0; i < que->num_desc; i++) {
1328                         buf = &rxr->buffers[i];
1329                         if (buf->m_head != NULL) {
1330                                 bus_dmamap_sync(rxr->htag, buf->hmap,
1331                                     BUS_DMASYNC_POSTREAD);
1332                                 bus_dmamap_unload(rxr->htag, buf->hmap);
1333                                 buf->m_head->m_flags |= M_PKTHDR;
1334                                 m_freem(buf->m_head);
1335                         }
1336                         if (buf->m_pack != NULL) {
1337                                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1338                                     BUS_DMASYNC_POSTREAD);
1339                                 bus_dmamap_unload(rxr->ptag, buf->pmap);
1340                                 buf->m_pack->m_flags |= M_PKTHDR;
1341                                 m_freem(buf->m_pack);
1342                         }
1343                         buf->m_head = NULL;
1344                         buf->m_pack = NULL;
1345                         if (buf->hmap != NULL) {
1346                                 bus_dmamap_destroy(rxr->htag, buf->hmap);
1347                                 buf->hmap = NULL;
1348                         }
1349                         if (buf->pmap != NULL) {
1350                                 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1351                                 buf->pmap = NULL;
1352                         }
1353                 }
1354                 if (rxr->buffers != NULL) {
1355                         free(rxr->buffers, M_DEVBUF);
1356                         rxr->buffers = NULL;
1357                 }
1358         }
1359
1360         if (rxr->htag != NULL) {
1361                 bus_dma_tag_destroy(rxr->htag);
1362                 rxr->htag = NULL;
1363         }
1364         if (rxr->ptag != NULL) {
1365                 bus_dma_tag_destroy(rxr->ptag);
1366                 rxr->ptag = NULL;
1367         }
1368
1369         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1370         return;
1371 }
1372
1373 static __inline void
1374 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1375 {
1376
1377 #if defined(INET6) || defined(INET)
1378         /*
1379          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1380          * should be computed by hardware. Also it should not have VLAN tag in
1381          * ethernet header.
1382          */
1383         if (rxr->lro_enabled &&
1384             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1385             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1386             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1387                 /*
1388                  * Send to the stack if:
1389                  **  - LRO not enabled, or
1390                  **  - no LRO resources, or
1391                  **  - lro enqueue fails
1392                  */
1393                 if (rxr->lro.lro_cnt != 0)
1394                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1395                                 return;
1396         }
1397 #endif
1398         IXL_RX_UNLOCK(rxr);
1399         (*ifp->if_input)(ifp, m);
1400         IXL_RX_LOCK(rxr);
1401 }
1402
1403
1404 static __inline void
1405 ixl_rx_discard(struct rx_ring *rxr, int i)
1406 {
1407         struct ixl_rx_buf       *rbuf;
1408
1409         rbuf = &rxr->buffers[i];
1410
1411         if (rbuf->fmp != NULL) {/* Partial chain ? */
1412                 rbuf->fmp->m_flags |= M_PKTHDR;
1413                 m_freem(rbuf->fmp);
1414                 rbuf->fmp = NULL;
1415         }
1416
1417         /*
1418         ** With advanced descriptors the writeback
1419         ** clobbers the buffer addrs, so its easier
1420         ** to just free the existing mbufs and take
1421         ** the normal refresh path to get new buffers
1422         ** and mapping.
1423         */
1424         if (rbuf->m_head) {
1425                 m_free(rbuf->m_head);
1426                 rbuf->m_head = NULL;
1427         }
1428  
1429         if (rbuf->m_pack) {
1430                 m_free(rbuf->m_pack);
1431                 rbuf->m_pack = NULL;
1432         }
1433
1434         return;
1435 }
1436
1437 #ifdef RSS
1438 /*
1439 ** i40e_ptype_to_hash: parse the packet type
1440 ** to determine the appropriate hash.
1441 */
1442 static inline int
1443 ixl_ptype_to_hash(u8 ptype)
1444 {
1445         struct i40e_rx_ptype_decoded    decoded;
1446         u8                              ex = 0;
1447
1448         decoded = decode_rx_desc_ptype(ptype);
1449         ex = decoded.outer_frag;
1450
1451         if (!decoded.known)
1452                 return M_HASHTYPE_OPAQUE;
1453
1454         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
1455                 return M_HASHTYPE_OPAQUE;
1456
1457         /* Note: anything that gets to this point is IP */
1458         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
1459                 switch (decoded.inner_prot) {
1460                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1461                                 if (ex)
1462                                         return M_HASHTYPE_RSS_TCP_IPV6_EX;
1463                                 else
1464                                         return M_HASHTYPE_RSS_TCP_IPV6;
1465                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1466                                 if (ex)
1467                                         return M_HASHTYPE_RSS_UDP_IPV6_EX;
1468                                 else
1469                                         return M_HASHTYPE_RSS_UDP_IPV6;
1470                         default:
1471                                 if (ex)
1472                                         return M_HASHTYPE_RSS_IPV6_EX;
1473                                 else
1474                                         return M_HASHTYPE_RSS_IPV6;
1475                 }
1476         }
1477         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
1478                 switch (decoded.inner_prot) {
1479                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1480                                         return M_HASHTYPE_RSS_TCP_IPV4;
1481                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1482                                 if (ex)
1483                                         return M_HASHTYPE_RSS_UDP_IPV4_EX;
1484                                 else
1485                                         return M_HASHTYPE_RSS_UDP_IPV4;
1486                         default:
1487                                         return M_HASHTYPE_RSS_IPV4;
1488                 }
1489         }
1490         /* We should never get here!! */
1491         return M_HASHTYPE_OPAQUE;
1492 }
1493 #endif /* RSS */
1494
1495 /*********************************************************************
1496  *
1497  *  This routine executes in interrupt context. It replenishes
1498  *  the mbufs in the descriptor and sends data which has been
1499  *  dma'ed into host memory to upper layer.
1500  *
1501  *  We loop at most count times if count is > 0, or until done if
1502  *  count < 0.
1503  *
1504  *  Return TRUE for more work, FALSE for all clean.
1505  *********************************************************************/
1506 bool
1507 ixl_rxeof(struct ixl_queue *que, int count)
1508 {
1509         struct ixl_vsi          *vsi = que->vsi;
1510         struct rx_ring          *rxr = &que->rxr;
1511         struct ifnet            *ifp = vsi->ifp;
1512 #if defined(INET6) || defined(INET)
1513         struct lro_ctrl         *lro = &rxr->lro;
1514         struct lro_entry        *queued;
1515 #endif
1516         int                     i, nextp, processed = 0;
1517         union i40e_rx_desc      *cur;
1518         struct ixl_rx_buf       *rbuf, *nbuf;
1519
1520
1521         IXL_RX_LOCK(rxr);
1522
1523 #ifdef DEV_NETMAP
1524         if (netmap_rx_irq(ifp, que->me, &count)) {
1525                 IXL_RX_UNLOCK(rxr);
1526                 return (FALSE);
1527         }
1528 #endif /* DEV_NETMAP */
1529
1530         for (i = rxr->next_check; count != 0;) {
1531                 struct mbuf     *sendmp, *mh, *mp;
1532                 u32             rsc, status, error;
1533                 u16             hlen, plen, vtag;
1534                 u64             qword;
1535                 u8              ptype;
1536                 bool            eop;
1537  
1538                 /* Sync the ring. */
1539                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1540                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1541
1542                 cur = &rxr->base[i];
1543                 qword = le64toh(cur->wb.qword1.status_error_len);
1544                 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1545                     >> I40E_RXD_QW1_STATUS_SHIFT;
1546                 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1547                     >> I40E_RXD_QW1_ERROR_SHIFT;
1548                 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1549                     >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1550                 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1551                     >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1552                 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1553                     >> I40E_RXD_QW1_PTYPE_SHIFT;
1554
1555                 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1556                         ++rxr->not_done;
1557                         break;
1558                 }
1559                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1560                         break;
1561
1562                 count--;
1563                 sendmp = NULL;
1564                 nbuf = NULL;
1565                 rsc = 0;
1566                 cur->wb.qword1.status_error_len = 0;
1567                 rbuf = &rxr->buffers[i];
1568                 mh = rbuf->m_head;
1569                 mp = rbuf->m_pack;
1570                 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1571                 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1572                         vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1573                 else
1574                         vtag = 0;
1575
1576                 /*
1577                 ** Make sure bad packets are discarded,
1578                 ** note that only EOP descriptor has valid
1579                 ** error results.
1580                 */
1581                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1582                         rxr->discarded++;
1583                         ixl_rx_discard(rxr, i);
1584                         goto next_desc;
1585                 }
1586
1587                 /* Prefetch the next buffer */
1588                 if (!eop) {
1589                         nextp = i + 1;
1590                         if (nextp == que->num_desc)
1591                                 nextp = 0;
1592                         nbuf = &rxr->buffers[nextp];
1593                         prefetch(nbuf);
1594                 }
1595
1596                 /*
1597                 ** The header mbuf is ONLY used when header 
1598                 ** split is enabled, otherwise we get normal 
1599                 ** behavior, ie, both header and payload
1600                 ** are DMA'd into the payload buffer.
1601                 **
1602                 ** Rather than using the fmp/lmp global pointers
1603                 ** we now keep the head of a packet chain in the
1604                 ** buffer struct and pass this along from one
1605                 ** descriptor to the next, until we get EOP.
1606                 */
1607                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1608                         if (hlen > IXL_RX_HDR)
1609                                 hlen = IXL_RX_HDR;
1610                         mh->m_len = hlen;
1611                         mh->m_flags |= M_PKTHDR;
1612                         mh->m_next = NULL;
1613                         mh->m_pkthdr.len = mh->m_len;
1614                         /* Null buf pointer so it is refreshed */
1615                         rbuf->m_head = NULL;
1616                         /*
1617                         ** Check the payload length, this
1618                         ** could be zero if its a small
1619                         ** packet.
1620                         */
1621                         if (plen > 0) {
1622                                 mp->m_len = plen;
1623                                 mp->m_next = NULL;
1624                                 mp->m_flags &= ~M_PKTHDR;
1625                                 mh->m_next = mp;
1626                                 mh->m_pkthdr.len += mp->m_len;
1627                                 /* Null buf pointer so it is refreshed */
1628                                 rbuf->m_pack = NULL;
1629                                 rxr->split++;
1630                         }
1631                         /*
1632                         ** Now create the forward
1633                         ** chain so when complete 
1634                         ** we wont have to.
1635                         */
1636                         if (eop == 0) {
1637                                 /* stash the chain head */
1638                                 nbuf->fmp = mh;
1639                                 /* Make forward chain */
1640                                 if (plen)
1641                                         mp->m_next = nbuf->m_pack;
1642                                 else
1643                                         mh->m_next = nbuf->m_pack;
1644                         } else {
1645                                 /* Singlet, prepare to send */
1646                                 sendmp = mh;
1647                                 if (vtag) {
1648                                         sendmp->m_pkthdr.ether_vtag = vtag;
1649                                         sendmp->m_flags |= M_VLANTAG;
1650                                 }
1651                         }
1652                 } else {
1653                         /*
1654                         ** Either no header split, or a
1655                         ** secondary piece of a fragmented
1656                         ** split packet.
1657                         */
1658                         mp->m_len = plen;
1659                         /*
1660                         ** See if there is a stored head
1661                         ** that determines what we are
1662                         */
1663                         sendmp = rbuf->fmp;
1664                         rbuf->m_pack = rbuf->fmp = NULL;
1665
1666                         if (sendmp != NULL) /* secondary frag */
1667                                 sendmp->m_pkthdr.len += mp->m_len;
1668                         else {
1669                                 /* first desc of a non-ps chain */
1670                                 sendmp = mp;
1671                                 sendmp->m_flags |= M_PKTHDR;
1672                                 sendmp->m_pkthdr.len = mp->m_len;
1673                                 if (vtag) {
1674                                         sendmp->m_pkthdr.ether_vtag = vtag;
1675                                         sendmp->m_flags |= M_VLANTAG;
1676                                 }
1677                         }
1678                         /* Pass the head pointer on */
1679                         if (eop == 0) {
1680                                 nbuf->fmp = sendmp;
1681                                 sendmp = NULL;
1682                                 mp->m_next = nbuf->m_pack;
1683                         }
1684                 }
1685                 ++processed;
1686                 /* Sending this frame? */
1687                 if (eop) {
1688                         sendmp->m_pkthdr.rcvif = ifp;
1689                         /* gather stats */
1690                         rxr->rx_packets++;
1691                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1692                         /* capture data for dynamic ITR adjustment */
1693                         rxr->packets++;
1694                         rxr->bytes += sendmp->m_pkthdr.len;
1695                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1696                                 ixl_rx_checksum(sendmp, status, error, ptype);
1697 #ifdef RSS
1698                         sendmp->m_pkthdr.flowid =
1699                             le32toh(cur->wb.qword0.hi_dword.rss);
1700                         M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1701 #else
1702                         sendmp->m_pkthdr.flowid = que->msix;
1703                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1704 #endif
1705                 }
1706 next_desc:
1707                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1708                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1709
1710                 /* Advance our pointers to the next descriptor. */
1711                 if (++i == que->num_desc)
1712                         i = 0;
1713
1714                 /* Now send to the stack or do LRO */
1715                 if (sendmp != NULL) {
1716                         rxr->next_check = i;
1717                         ixl_rx_input(rxr, ifp, sendmp, ptype);
1718                         i = rxr->next_check;
1719                 }
1720
1721                /* Every 8 descriptors we go to refresh mbufs */
1722                 if (processed == 8) {
1723                         ixl_refresh_mbufs(que, i);
1724                         processed = 0;
1725                 }
1726         }
1727
1728         /* Refresh any remaining buf structs */
1729         if (ixl_rx_unrefreshed(que))
1730                 ixl_refresh_mbufs(que, i);
1731
1732         rxr->next_check = i;
1733
1734 #if defined(INET6) || defined(INET)
1735         /*
1736          * Flush any outstanding LRO work
1737          */
1738         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1739                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1740                 tcp_lro_flush(lro, queued);
1741         }
1742 #endif
1743
1744         IXL_RX_UNLOCK(rxr);
1745         return (FALSE);
1746 }
1747
1748
1749 /*********************************************************************
1750  *
1751  *  Verify that the hardware indicated that the checksum is valid.
1752  *  Inform the stack about the status of checksum so that stack
1753  *  doesn't spend time verifying the checksum.
1754  *
1755  *********************************************************************/
1756 static void
1757 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1758 {
1759         struct i40e_rx_ptype_decoded decoded;
1760
1761         decoded = decode_rx_desc_ptype(ptype);
1762
1763         /* Errors? */
1764         if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1765             (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1766                 mp->m_pkthdr.csum_flags = 0;
1767                 return;
1768         }
1769
1770         /* IPv6 with extension headers likely have bad csum */
1771         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1772             decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1773                 if (status &
1774                     (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1775                         mp->m_pkthdr.csum_flags = 0;
1776                         return;
1777                 }
1778
1779  
1780         /* IP Checksum Good */
1781         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1782         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1783
1784         if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1785                 mp->m_pkthdr.csum_flags |= 
1786                     (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1787                 mp->m_pkthdr.csum_data |= htons(0xffff);
1788         }
1789         return;
1790 }
1791
1792 #if __FreeBSD_version >= 1100000
1793 uint64_t
1794 ixl_get_counter(if_t ifp, ift_counter cnt)
1795 {
1796         struct ixl_vsi *vsi;
1797
1798         vsi = if_getsoftc(ifp);
1799
1800         switch (cnt) {
1801         case IFCOUNTER_IPACKETS:
1802                 return (vsi->ipackets);
1803         case IFCOUNTER_IERRORS:
1804                 return (vsi->ierrors);
1805         case IFCOUNTER_OPACKETS:
1806                 return (vsi->opackets);
1807         case IFCOUNTER_OERRORS:
1808                 return (vsi->oerrors);
1809         case IFCOUNTER_COLLISIONS:
1810                 /* Collisions are by standard impossible in 40G/10G Ethernet */
1811                 return (0);
1812         case IFCOUNTER_IBYTES:
1813                 return (vsi->ibytes);
1814         case IFCOUNTER_OBYTES:
1815                 return (vsi->obytes);
1816         case IFCOUNTER_IMCASTS:
1817                 return (vsi->imcasts);
1818         case IFCOUNTER_OMCASTS:
1819                 return (vsi->omcasts);
1820         case IFCOUNTER_IQDROPS:
1821                 return (vsi->iqdrops);
1822         case IFCOUNTER_OQDROPS:
1823                 return (vsi->oqdrops);
1824         case IFCOUNTER_NOPROTO:
1825                 return (vsi->noproto);
1826         default:
1827                 return (if_get_counter_default(ifp, cnt));
1828         }
1829 }
1830 #endif
1831