]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/ixl/ixl_txrx.c
MFC r279033: Bring the XL710 drivers up to the SW3
[FreeBSD/stable/10.git] / sys / dev / ixl / ixl_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2013-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 **      IXL driver TX/RX Routines:
37 **          This was seperated to allow usage by
38 **          both the BASE and the VF drivers.
39 */
40
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #endif
45
46 #include "ixl.h"
47
48 #ifdef RSS
49 #include <net/rss_config.h>
50 #endif
51
52 /* Local Prototypes */
53 static void     ixl_rx_checksum(struct mbuf *, u32, u32, u8);
54 static void     ixl_refresh_mbufs(struct ixl_queue *, int);
55 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
56 static int      ixl_tx_setup_offload(struct ixl_queue *,
57                     struct mbuf *, u32 *, u32 *);
58 static bool     ixl_tso_setup(struct ixl_queue *, struct mbuf *);
59
60 static __inline void ixl_rx_discard(struct rx_ring *, int);
61 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
62                     struct mbuf *, u8);
63
64 /*
65 ** Multiqueue Transmit driver
66 **
67 */
68 int
69 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
70 {
71         struct ixl_vsi          *vsi = ifp->if_softc;
72         struct ixl_queue        *que;
73         struct tx_ring          *txr;
74         int                     err, i;
75 #ifdef RSS
76         u32                     bucket_id;
77 #endif
78
79         /*
80         ** Which queue to use:
81         **
82         ** When doing RSS, map it to the same outbound
83         ** queue as the incoming flow would be mapped to.
84         ** If everything is setup correctly, it should be
85         ** the same bucket that the current CPU we're on is.
86         */
87         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
88 #ifdef  RSS
89                 if (rss_hash2bucket(m->m_pkthdr.flowid,
90                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
91                         i = bucket_id % vsi->num_queues;
92                 } else
93 #endif
94                         i = m->m_pkthdr.flowid % vsi->num_queues;
95         } else
96                 i = curcpu % vsi->num_queues;
97         /*
98         ** This may not be perfect, but until something
99         ** better comes along it will keep from scheduling
100         ** on stalled queues.
101         */
102         if (((1 << i) & vsi->active_queues) == 0)
103                 i = ffsl(vsi->active_queues);
104
105         que = &vsi->queues[i];
106         txr = &que->txr;
107
108         err = drbr_enqueue(ifp, txr->br, m);
109         if (err)
110                 return(err);
111         if (IXL_TX_TRYLOCK(txr)) {
112                 ixl_mq_start_locked(ifp, txr);
113                 IXL_TX_UNLOCK(txr);
114         } else
115                 taskqueue_enqueue(que->tq, &que->tx_task);
116
117         return (0);
118 }
119
120 int
121 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
122 {
123         struct ixl_queue        *que = txr->que;
124         struct ixl_vsi          *vsi = que->vsi;
125         struct mbuf             *next;
126         int                     err = 0;
127
128
129         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
130             vsi->link_active == 0)
131                 return (ENETDOWN);
132
133         /* Process the transmit queue */
134         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
135                 if ((err = ixl_xmit(que, &next)) != 0) {
136                         if (next == NULL)
137                                 drbr_advance(ifp, txr->br);
138                         else
139                                 drbr_putback(ifp, txr->br, next);
140                         break;
141                 }
142                 drbr_advance(ifp, txr->br);
143                 /* Send a copy of the frame to the BPF listener */
144                 ETHER_BPF_MTAP(ifp, next);
145                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
146                         break;
147         }
148
149         if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
150                 ixl_txeof(que);
151
152         return (err);
153 }
154
155 /*
156  * Called from a taskqueue to drain queued transmit packets.
157  */
158 void
159 ixl_deferred_mq_start(void *arg, int pending)
160 {
161         struct ixl_queue        *que = arg;
162         struct tx_ring          *txr = &que->txr;
163         struct ixl_vsi          *vsi = que->vsi;
164         struct ifnet            *ifp = vsi->ifp;
165         
166         IXL_TX_LOCK(txr);
167         if (!drbr_empty(ifp, txr->br))
168                 ixl_mq_start_locked(ifp, txr);
169         IXL_TX_UNLOCK(txr);
170 }
171
172 /*
173 ** Flush all queue ring buffers
174 */
175 void
176 ixl_qflush(struct ifnet *ifp)
177 {
178         struct ixl_vsi  *vsi = ifp->if_softc;
179
180         for (int i = 0; i < vsi->num_queues; i++) {
181                 struct ixl_queue *que = &vsi->queues[i];
182                 struct tx_ring  *txr = &que->txr;
183                 struct mbuf     *m;
184                 IXL_TX_LOCK(txr);
185                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
186                         m_freem(m);
187                 IXL_TX_UNLOCK(txr);
188         }
189         if_qflush(ifp);
190 }
191
192 /*
193 ** Find mbuf chains passed to the driver 
194 ** that are 'sparse', using more than 8
195 ** mbufs to deliver an mss-size chunk of data
196 */
197 static inline bool
198 ixl_tso_detect_sparse(struct mbuf *mp)
199 {
200         struct mbuf     *m;
201         int             num = 0, mss;
202         bool            ret = FALSE;
203
204         mss = mp->m_pkthdr.tso_segsz;
205         for (m = mp->m_next; m != NULL; m = m->m_next) {
206                 num++;
207                 mss -= m->m_len;
208                 if (mss < 1)
209                         break;
210                 if (m->m_next == NULL)
211                         break;
212         }
213         if (num > IXL_SPARSE_CHAIN)
214                 ret = TRUE;
215
216         return (ret);
217 }
218
219
220 /*********************************************************************
221  *
222  *  This routine maps the mbufs to tx descriptors, allowing the
223  *  TX engine to transmit the packets. 
224  *      - return 0 on success, positive on failure
225  *
226  **********************************************************************/
227 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
228
229 static int
230 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
231 {
232         struct ixl_vsi          *vsi = que->vsi;
233         struct i40e_hw          *hw = vsi->hw;
234         struct tx_ring          *txr = &que->txr;
235         struct ixl_tx_buf       *buf;
236         struct i40e_tx_desc     *txd = NULL;
237         struct mbuf             *m_head, *m;
238         int                     i, j, error, nsegs, maxsegs;
239         int                     first, last = 0;
240         u16                     vtag = 0;
241         u32                     cmd, off;
242         bus_dmamap_t            map;
243         bus_dma_tag_t           tag;
244         bus_dma_segment_t       segs[IXL_MAX_TSO_SEGS];
245
246
247         cmd = off = 0;
248         m_head = *m_headp;
249
250         /*
251          * Important to capture the first descriptor
252          * used because it will contain the index of
253          * the one we tell the hardware to report back
254          */
255         first = txr->next_avail;
256         buf = &txr->buffers[first];
257         map = buf->map;
258         tag = txr->tx_tag;
259         maxsegs = IXL_MAX_TX_SEGS;
260
261         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
262                 /* Use larger mapping for TSO */
263                 tag = txr->tso_tag;
264                 maxsegs = IXL_MAX_TSO_SEGS;
265                 if (ixl_tso_detect_sparse(m_head)) {
266                         m = m_defrag(m_head, M_NOWAIT);
267                         if (m == NULL) {
268                                 m_freem(*m_headp);
269                                 *m_headp = NULL;
270                                 return (ENOBUFS);
271                         }
272                         *m_headp = m;
273                 }
274         }
275
276         /*
277          * Map the packet for DMA.
278          */
279         error = bus_dmamap_load_mbuf_sg(tag, map,
280             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
281
282         if (error == EFBIG) {
283                 struct mbuf *m;
284
285                 m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
286                 if (m == NULL) {
287                         que->mbuf_defrag_failed++;
288                         m_freem(*m_headp);
289                         *m_headp = NULL;
290                         return (ENOBUFS);
291                 }
292                 *m_headp = m;
293
294                 /* Try it again */
295                 error = bus_dmamap_load_mbuf_sg(tag, map,
296                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
297
298                 if (error == ENOMEM) {
299                         que->tx_dma_setup++;
300                         return (error);
301                 } else if (error != 0) {
302                         que->tx_dma_setup++;
303                         m_freem(*m_headp);
304                         *m_headp = NULL;
305                         return (error);
306                 }
307         } else if (error == ENOMEM) {
308                 que->tx_dma_setup++;
309                 return (error);
310         } else if (error != 0) {
311                 que->tx_dma_setup++;
312                 m_freem(*m_headp);
313                 *m_headp = NULL;
314                 return (error);
315         }
316
317         /* Make certain there are enough descriptors */
318         if (nsegs > txr->avail - 2) {
319                 txr->no_desc++;
320                 error = ENOBUFS;
321                 goto xmit_fail;
322         }
323         m_head = *m_headp;
324
325         /* Set up the TSO/CSUM offload */
326         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
327                 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
328                 if (error)
329                         goto xmit_fail;
330         }
331
332         cmd |= I40E_TX_DESC_CMD_ICRC;
333         /* Grab the VLAN tag */
334         if (m_head->m_flags & M_VLANTAG) {
335                 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
336                 vtag = htole16(m_head->m_pkthdr.ether_vtag);
337         }
338
339         i = txr->next_avail;
340         for (j = 0; j < nsegs; j++) {
341                 bus_size_t seglen;
342
343                 buf = &txr->buffers[i];
344                 buf->tag = tag; /* Keep track of the type tag */
345                 txd = &txr->base[i];
346                 seglen = segs[j].ds_len;
347
348                 txd->buffer_addr = htole64(segs[j].ds_addr);
349                 txd->cmd_type_offset_bsz =
350                     htole64(I40E_TX_DESC_DTYPE_DATA
351                     | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
352                     | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
353                     | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
354                     | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
355
356                 last = i; /* descriptor that will get completion IRQ */
357
358                 if (++i == que->num_desc)
359                         i = 0;
360
361                 buf->m_head = NULL;
362                 buf->eop_index = -1;
363         }
364         /* Set the last descriptor for report */
365         txd->cmd_type_offset_bsz |=
366             htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
367         txr->avail -= nsegs;
368         txr->next_avail = i;
369
370         buf->m_head = m_head;
371         /* Swap the dma map between the first and last descriptor */
372         txr->buffers[first].map = buf->map;
373         buf->map = map;
374         bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
375
376         /* Set the index of the descriptor that will be marked done */
377         buf = &txr->buffers[first];
378         buf->eop_index = last;
379
380         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
381             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
382         /*
383          * Advance the Transmit Descriptor Tail (Tdt), this tells the
384          * hardware that this frame is available to transmit.
385          */
386         ++txr->total_packets;
387         wr32(hw, txr->tail, i);
388
389         ixl_flush(hw);
390         /* Mark outstanding work */
391         if (que->busy == 0)
392                 que->busy = 1;
393         return (0);
394
395 xmit_fail:
396         bus_dmamap_unload(tag, buf->map);
397         return (error);
398 }
399
400
401 /*********************************************************************
402  *
403  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
404  *  the information needed to transmit a packet on the wire. This is
405  *  called only once at attach, setup is done every reset.
406  *
407  **********************************************************************/
408 int
409 ixl_allocate_tx_data(struct ixl_queue *que)
410 {
411         struct tx_ring          *txr = &que->txr;
412         struct ixl_vsi          *vsi = que->vsi;
413         device_t                dev = vsi->dev;
414         struct ixl_tx_buf       *buf;
415         int                     error = 0;
416
417         /*
418          * Setup DMA descriptor areas.
419          */
420         if ((error = bus_dma_tag_create(NULL,           /* parent */
421                                1, 0,                    /* alignment, bounds */
422                                BUS_SPACE_MAXADDR,       /* lowaddr */
423                                BUS_SPACE_MAXADDR,       /* highaddr */
424                                NULL, NULL,              /* filter, filterarg */
425                                IXL_TSO_SIZE,            /* maxsize */
426                                IXL_MAX_TX_SEGS,         /* nsegments */
427                                PAGE_SIZE,               /* maxsegsize */
428                                0,                       /* flags */
429                                NULL,                    /* lockfunc */
430                                NULL,                    /* lockfuncarg */
431                                &txr->tx_tag))) {
432                 device_printf(dev,"Unable to allocate TX DMA tag\n");
433                 goto fail;
434         }
435
436         /* Make a special tag for TSO */
437         if ((error = bus_dma_tag_create(NULL,           /* parent */
438                                1, 0,                    /* alignment, bounds */
439                                BUS_SPACE_MAXADDR,       /* lowaddr */
440                                BUS_SPACE_MAXADDR,       /* highaddr */
441                                NULL, NULL,              /* filter, filterarg */
442                                IXL_TSO_SIZE,            /* maxsize */
443                                IXL_MAX_TSO_SEGS,        /* nsegments */
444                                PAGE_SIZE,               /* maxsegsize */
445                                0,                       /* flags */
446                                NULL,                    /* lockfunc */
447                                NULL,                    /* lockfuncarg */
448                                &txr->tso_tag))) {
449                 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
450                 goto fail;
451         }
452
453         if (!(txr->buffers =
454             (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
455             que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
456                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
457                 error = ENOMEM;
458                 goto fail;
459         }
460
461         /* Create the descriptor buffer default dma maps */
462         buf = txr->buffers;
463         for (int i = 0; i < que->num_desc; i++, buf++) {
464                 buf->tag = txr->tx_tag;
465                 error = bus_dmamap_create(buf->tag, 0, &buf->map);
466                 if (error != 0) {
467                         device_printf(dev, "Unable to create TX DMA map\n");
468                         goto fail;
469                 }
470         }
471 fail:
472         return (error);
473 }
474
475
476 /*********************************************************************
477  *
478  *  (Re)Initialize a queue transmit ring.
479  *      - called by init, it clears the descriptor ring,
480  *        and frees any stale mbufs 
481  *
482  **********************************************************************/
483 void
484 ixl_init_tx_ring(struct ixl_queue *que)
485 {
486         struct tx_ring *txr = &que->txr;
487         struct ixl_tx_buf *buf;
488
489         /* Clear the old ring contents */
490         IXL_TX_LOCK(txr);
491         bzero((void *)txr->base,
492               (sizeof(struct i40e_tx_desc)) * que->num_desc);
493
494         /* Reset indices */
495         txr->next_avail = 0;
496         txr->next_to_clean = 0;
497
498 #ifdef IXL_FDIR
499         /* Initialize flow director */
500         txr->atr_rate = ixl_atr_rate;
501         txr->atr_count = 0;
502 #endif
503
504         /* Free any existing tx mbufs. */
505         buf = txr->buffers;
506         for (int i = 0; i < que->num_desc; i++, buf++) {
507                 if (buf->m_head != NULL) {
508                         bus_dmamap_sync(buf->tag, buf->map,
509                             BUS_DMASYNC_POSTWRITE);
510                         bus_dmamap_unload(buf->tag, buf->map);
511                         m_freem(buf->m_head);
512                         buf->m_head = NULL;
513                 }
514                 /* Clear the EOP index */
515                 buf->eop_index = -1;
516         }
517
518         /* Set number of descriptors available */
519         txr->avail = que->num_desc;
520
521         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
522             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
523         IXL_TX_UNLOCK(txr);
524 }
525
526
527 /*********************************************************************
528  *
529  *  Free transmit ring related data structures.
530  *
531  **********************************************************************/
532 void
533 ixl_free_que_tx(struct ixl_queue *que)
534 {
535         struct tx_ring *txr = &que->txr;
536         struct ixl_tx_buf *buf;
537
538         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
539
540         for (int i = 0; i < que->num_desc; i++) {
541                 buf = &txr->buffers[i];
542                 if (buf->m_head != NULL) {
543                         bus_dmamap_sync(buf->tag, buf->map,
544                             BUS_DMASYNC_POSTWRITE);
545                         bus_dmamap_unload(buf->tag,
546                             buf->map);
547                         m_freem(buf->m_head);
548                         buf->m_head = NULL;
549                         if (buf->map != NULL) {
550                                 bus_dmamap_destroy(buf->tag,
551                                     buf->map);
552                                 buf->map = NULL;
553                         }
554                 } else if (buf->map != NULL) {
555                         bus_dmamap_unload(buf->tag,
556                             buf->map);
557                         bus_dmamap_destroy(buf->tag,
558                             buf->map);
559                         buf->map = NULL;
560                 }
561         }
562         if (txr->br != NULL)
563                 buf_ring_free(txr->br, M_DEVBUF);
564         if (txr->buffers != NULL) {
565                 free(txr->buffers, M_DEVBUF);
566                 txr->buffers = NULL;
567         }
568         if (txr->tx_tag != NULL) {
569                 bus_dma_tag_destroy(txr->tx_tag);
570                 txr->tx_tag = NULL;
571         }
572         if (txr->tso_tag != NULL) {
573                 bus_dma_tag_destroy(txr->tso_tag);
574                 txr->tso_tag = NULL;
575         }
576
577         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
578         return;
579 }
580
581 /*********************************************************************
582  *
583  *  Setup descriptor for hw offloads 
584  *
585  **********************************************************************/
586
587 static int
588 ixl_tx_setup_offload(struct ixl_queue *que,
589     struct mbuf *mp, u32 *cmd, u32 *off)
590 {
591         struct ether_vlan_header        *eh;
592 #ifdef INET
593         struct ip                       *ip = NULL;
594 #endif
595         struct tcphdr                   *th = NULL;
596 #ifdef INET6
597         struct ip6_hdr                  *ip6;
598 #endif
599         int                             elen, ip_hlen = 0, tcp_hlen;
600         u16                             etype;
601         u8                              ipproto = 0;
602         bool                            tso = FALSE;
603
604
605         /* Set up the TSO context descriptor if required */
606         if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
607                 tso = ixl_tso_setup(que, mp);
608                 if (tso)
609                         ++que->tso;
610                 else
611                         return (ENXIO);
612         }
613
614         /*
615          * Determine where frame payload starts.
616          * Jump over vlan headers if already present,
617          * helpful for QinQ too.
618          */
619         eh = mtod(mp, struct ether_vlan_header *);
620         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
621                 etype = ntohs(eh->evl_proto);
622                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
623         } else {
624                 etype = ntohs(eh->evl_encap_proto);
625                 elen = ETHER_HDR_LEN;
626         }
627
628         switch (etype) {
629 #ifdef INET
630                 case ETHERTYPE_IP:
631                         ip = (struct ip *)(mp->m_data + elen);
632                         ip_hlen = ip->ip_hl << 2;
633                         ipproto = ip->ip_p;
634                         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
635                         /* The IP checksum must be recalculated with TSO */
636                         if (tso)
637                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
638                         else
639                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
640                         break;
641 #endif
642 #ifdef INET6
643                 case ETHERTYPE_IPV6:
644                         ip6 = (struct ip6_hdr *)(mp->m_data + elen);
645                         ip_hlen = sizeof(struct ip6_hdr);
646                         ipproto = ip6->ip6_nxt;
647                         th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
648                         *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
649                         break;
650 #endif
651                 default:
652                         break;
653         }
654
655         *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
656         *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
657
658         switch (ipproto) {
659                 case IPPROTO_TCP:
660                         tcp_hlen = th->th_off << 2;
661                         if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
662                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
663                                 *off |= (tcp_hlen >> 2) <<
664                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
665                         }
666 #ifdef IXL_FDIR
667                         ixl_atr(que, th, etype);
668 #endif
669                         break;
670                 case IPPROTO_UDP:
671                         if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
672                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
673                                 *off |= (sizeof(struct udphdr) >> 2) <<
674                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
675                         }
676                         break;
677
678                 case IPPROTO_SCTP:
679                         if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
680                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
681                                 *off |= (sizeof(struct sctphdr) >> 2) <<
682                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
683                         }
684                         /* Fall Thru */
685                 default:
686                         break;
687         }
688
689         return (0);
690 }
691
692
693 /**********************************************************************
694  *
695  *  Setup context for hardware segmentation offload (TSO)
696  *
697  **********************************************************************/
698 static bool
699 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
700 {
701         struct tx_ring                  *txr = &que->txr;
702         struct i40e_tx_context_desc     *TXD;
703         struct ixl_tx_buf               *buf;
704         u32                             cmd, mss, type, tsolen;
705         u16                             etype;
706         int                             idx, elen, ip_hlen, tcp_hlen;
707         struct ether_vlan_header        *eh;
708 #ifdef INET
709         struct ip                       *ip;
710 #endif
711 #ifdef INET6
712         struct ip6_hdr                  *ip6;
713 #endif
714 #if defined(INET6) || defined(INET)
715         struct tcphdr                   *th;
716 #endif
717         u64                             type_cmd_tso_mss;
718
719         /*
720          * Determine where frame payload starts.
721          * Jump over vlan headers if already present
722          */
723         eh = mtod(mp, struct ether_vlan_header *);
724         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
725                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
726                 etype = eh->evl_proto;
727         } else {
728                 elen = ETHER_HDR_LEN;
729                 etype = eh->evl_encap_proto;
730         }
731
732         switch (ntohs(etype)) {
733 #ifdef INET6
734         case ETHERTYPE_IPV6:
735                 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
736                 if (ip6->ip6_nxt != IPPROTO_TCP)
737                         return (ENXIO);
738                 ip_hlen = sizeof(struct ip6_hdr);
739                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
740                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
741                 tcp_hlen = th->th_off << 2;
742                 break;
743 #endif
744 #ifdef INET
745         case ETHERTYPE_IP:
746                 ip = (struct ip *)(mp->m_data + elen);
747                 if (ip->ip_p != IPPROTO_TCP)
748                         return (ENXIO);
749                 ip->ip_sum = 0;
750                 ip_hlen = ip->ip_hl << 2;
751                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
752                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
753                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
754                 tcp_hlen = th->th_off << 2;
755                 break;
756 #endif
757         default:
758                 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
759                     __func__, ntohs(etype));
760                 return FALSE;
761         }
762
763         /* Ensure we have at least the IP+TCP header in the first mbuf. */
764         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
765                 return FALSE;
766
767         idx = txr->next_avail;
768         buf = &txr->buffers[idx];
769         TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
770         tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
771
772         type = I40E_TX_DESC_DTYPE_CONTEXT;
773         cmd = I40E_TX_CTX_DESC_TSO;
774         mss = mp->m_pkthdr.tso_segsz;
775
776         type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
777             ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
778             ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
779             ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
780         TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
781
782         TXD->tunneling_params = htole32(0);
783         buf->m_head = NULL;
784         buf->eop_index = -1;
785
786         if (++idx == que->num_desc)
787                 idx = 0;
788
789         txr->avail--;
790         txr->next_avail = idx;
791
792         return TRUE;
793 }
794
795 /*             
796 ** ixl_get_tx_head - Retrieve the value from the 
797 **    location the HW records its HEAD index
798 */
799 static inline u32
800 ixl_get_tx_head(struct ixl_queue *que)
801 {
802         struct tx_ring  *txr = &que->txr;
803         void *head = &txr->base[que->num_desc];
804         return LE32_TO_CPU(*(volatile __le32 *)head);
805 }
806
807 /**********************************************************************
808  *
809  *  Examine each tx_buffer in the used queue. If the hardware is done
810  *  processing the packet then free associated resources. The
811  *  tx_buffer is put back on the free queue.
812  *
813  **********************************************************************/
814 bool
815 ixl_txeof(struct ixl_queue *que)
816 {
817         struct tx_ring          *txr = &que->txr;
818         u32                     first, last, head, done, processed;
819         struct ixl_tx_buf       *buf;
820         struct i40e_tx_desc     *tx_desc, *eop_desc;
821
822
823         mtx_assert(&txr->mtx, MA_OWNED);
824
825
826         /* These are not the descriptors you seek, move along :) */
827         if (txr->avail == que->num_desc) {
828                 que->busy = 0;
829                 return FALSE;
830         }
831
832         processed = 0;
833         first = txr->next_to_clean;
834         buf = &txr->buffers[first];
835         tx_desc = (struct i40e_tx_desc *)&txr->base[first];
836         last = buf->eop_index;
837         if (last == -1)
838                 return FALSE;
839         eop_desc = (struct i40e_tx_desc *)&txr->base[last];
840
841         /* Get the Head WB value */
842         head = ixl_get_tx_head(que);
843
844         /*
845         ** Get the index of the first descriptor
846         ** BEYOND the EOP and call that 'done'.
847         ** I do this so the comparison in the
848         ** inner while loop below can be simple
849         */
850         if (++last == que->num_desc) last = 0;
851         done = last;
852
853         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
854             BUS_DMASYNC_POSTREAD);
855         /*
856         ** The HEAD index of the ring is written in a 
857         ** defined location, this rather than a done bit
858         ** is what is used to keep track of what must be
859         ** 'cleaned'.
860         */
861         while (first != head) {
862                 /* We clean the range of the packet */
863                 while (first != done) {
864                         ++txr->avail;
865                         ++processed;
866
867                         if (buf->m_head) {
868                                 txr->bytes += /* for ITR adjustment */
869                                     buf->m_head->m_pkthdr.len;
870                                 txr->tx_bytes += /* for TX stats */
871                                     buf->m_head->m_pkthdr.len;
872                                 bus_dmamap_sync(buf->tag,
873                                     buf->map,
874                                     BUS_DMASYNC_POSTWRITE);
875                                 bus_dmamap_unload(buf->tag,
876                                     buf->map);
877                                 m_freem(buf->m_head);
878                                 buf->m_head = NULL;
879                                 buf->map = NULL;
880                         }
881                         buf->eop_index = -1;
882
883                         if (++first == que->num_desc)
884                                 first = 0;
885
886                         buf = &txr->buffers[first];
887                         tx_desc = &txr->base[first];
888                 }
889                 ++txr->packets;
890                 /* See if there is more work now */
891                 last = buf->eop_index;
892                 if (last != -1) {
893                         eop_desc = &txr->base[last];
894                         /* Get next done point */
895                         if (++last == que->num_desc) last = 0;
896                         done = last;
897                 } else
898                         break;
899         }
900         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
901             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
902
903         txr->next_to_clean = first;
904
905
906         /*
907         ** Hang detection, we know there's
908         ** work outstanding or the first return
909         ** would have been taken, so indicate an
910         ** unsuccessful pass, in local_timer if
911         ** the value is too great the queue will
912         ** be considered hung. If anything has been
913         ** cleaned then reset the state.
914         */
915         if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
916                 ++que->busy;
917
918         if (processed)
919                 que->busy = 1; /* Note this turns off HUNG */
920
921         /*
922          * If there are no pending descriptors, clear the timeout.
923          */
924         if (txr->avail == que->num_desc) {
925                 que->busy = 0;
926                 return FALSE;
927         }
928
929         return TRUE;
930 }
931
932 /*********************************************************************
933  *
934  *  Refresh mbuf buffers for RX descriptor rings
935  *   - now keeps its own state so discards due to resource
936  *     exhaustion are unnecessary, if an mbuf cannot be obtained
937  *     it just returns, keeping its placeholder, thus it can simply
938  *     be recalled to try again.
939  *
940  **********************************************************************/
941 static void
942 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
943 {
944         struct ixl_vsi          *vsi = que->vsi;
945         struct rx_ring          *rxr = &que->rxr;
946         bus_dma_segment_t       hseg[1];
947         bus_dma_segment_t       pseg[1];
948         struct ixl_rx_buf       *buf;
949         struct mbuf             *mh, *mp;
950         int                     i, j, nsegs, error;
951         bool                    refreshed = FALSE;
952
953         i = j = rxr->next_refresh;
954         /* Control the loop with one beyond */
955         if (++j == que->num_desc)
956                 j = 0;
957
958         while (j != limit) {
959                 buf = &rxr->buffers[i];
960                 if (rxr->hdr_split == FALSE)
961                         goto no_split;
962
963                 if (buf->m_head == NULL) {
964                         mh = m_gethdr(M_NOWAIT, MT_DATA);
965                         if (mh == NULL)
966                                 goto update;
967                 } else
968                         mh = buf->m_head;
969
970                 mh->m_pkthdr.len = mh->m_len = MHLEN;
971                 mh->m_len = MHLEN;
972                 mh->m_flags |= M_PKTHDR;
973                 /* Get the memory mapping */
974                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
975                     buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
976                 if (error != 0) {
977                         printf("Refresh mbufs: hdr dmamap load"
978                             " failure - %d\n", error);
979                         m_free(mh);
980                         buf->m_head = NULL;
981                         goto update;
982                 }
983                 buf->m_head = mh;
984                 bus_dmamap_sync(rxr->htag, buf->hmap,
985                     BUS_DMASYNC_PREREAD);
986                 rxr->base[i].read.hdr_addr =
987                    htole64(hseg[0].ds_addr);
988
989 no_split:
990                 if (buf->m_pack == NULL) {
991                         mp = m_getjcl(M_NOWAIT, MT_DATA,
992                             M_PKTHDR, rxr->mbuf_sz);
993                         if (mp == NULL)
994                                 goto update;
995                 } else
996                         mp = buf->m_pack;
997
998                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
999                 /* Get the memory mapping */
1000                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1001                     buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1002                 if (error != 0) {
1003                         printf("Refresh mbufs: payload dmamap load"
1004                             " failure - %d\n", error);
1005                         m_free(mp);
1006                         buf->m_pack = NULL;
1007                         goto update;
1008                 }
1009                 buf->m_pack = mp;
1010                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1011                     BUS_DMASYNC_PREREAD);
1012                 rxr->base[i].read.pkt_addr =
1013                    htole64(pseg[0].ds_addr);
1014                 /* Used only when doing header split */
1015                 rxr->base[i].read.hdr_addr = 0;
1016
1017                 refreshed = TRUE;
1018                 /* Next is precalculated */
1019                 i = j;
1020                 rxr->next_refresh = i;
1021                 if (++j == que->num_desc)
1022                         j = 0;
1023         }
1024 update:
1025         if (refreshed) /* Update hardware tail index */
1026                 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1027         return;
1028 }
1029
1030
1031 /*********************************************************************
1032  *
1033  *  Allocate memory for rx_buffer structures. Since we use one
1034  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1035  *  that we'll need is equal to the number of receive descriptors
1036  *  that we've defined.
1037  *
1038  **********************************************************************/
1039 int
1040 ixl_allocate_rx_data(struct ixl_queue *que)
1041 {
1042         struct rx_ring          *rxr = &que->rxr;
1043         struct ixl_vsi          *vsi = que->vsi;
1044         device_t                dev = vsi->dev;
1045         struct ixl_rx_buf       *buf;
1046         int                     i, bsize, error;
1047
1048         bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1049         if (!(rxr->buffers =
1050             (struct ixl_rx_buf *) malloc(bsize,
1051             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1052                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1053                 error = ENOMEM;
1054                 return (error);
1055         }
1056
1057         if ((error = bus_dma_tag_create(NULL,   /* parent */
1058                                    1, 0,        /* alignment, bounds */
1059                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1060                                    BUS_SPACE_MAXADDR,   /* highaddr */
1061                                    NULL, NULL,          /* filter, filterarg */
1062                                    MSIZE,               /* maxsize */
1063                                    1,                   /* nsegments */
1064                                    MSIZE,               /* maxsegsize */
1065                                    0,                   /* flags */
1066                                    NULL,                /* lockfunc */
1067                                    NULL,                /* lockfuncarg */
1068                                    &rxr->htag))) {
1069                 device_printf(dev, "Unable to create RX DMA htag\n");
1070                 return (error);
1071         }
1072
1073         if ((error = bus_dma_tag_create(NULL,   /* parent */
1074                                    1, 0,        /* alignment, bounds */
1075                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1076                                    BUS_SPACE_MAXADDR,   /* highaddr */
1077                                    NULL, NULL,          /* filter, filterarg */
1078                                    MJUM16BYTES,         /* maxsize */
1079                                    1,                   /* nsegments */
1080                                    MJUM16BYTES,         /* maxsegsize */
1081                                    0,                   /* flags */
1082                                    NULL,                /* lockfunc */
1083                                    NULL,                /* lockfuncarg */
1084                                    &rxr->ptag))) {
1085                 device_printf(dev, "Unable to create RX DMA ptag\n");
1086                 return (error);
1087         }
1088
1089         for (i = 0; i < que->num_desc; i++) {
1090                 buf = &rxr->buffers[i];
1091                 error = bus_dmamap_create(rxr->htag,
1092                     BUS_DMA_NOWAIT, &buf->hmap);
1093                 if (error) {
1094                         device_printf(dev, "Unable to create RX head map\n");
1095                         break;
1096                 }
1097                 error = bus_dmamap_create(rxr->ptag,
1098                     BUS_DMA_NOWAIT, &buf->pmap);
1099                 if (error) {
1100                         device_printf(dev, "Unable to create RX pkt map\n");
1101                         break;
1102                 }
1103         }
1104
1105         return (error);
1106 }
1107
1108
1109 /*********************************************************************
1110  *
1111  *  (Re)Initialize the queue receive ring and its buffers.
1112  *
1113  **********************************************************************/
1114 int
1115 ixl_init_rx_ring(struct ixl_queue *que)
1116 {
1117         struct  rx_ring         *rxr = &que->rxr;
1118         struct ixl_vsi          *vsi = que->vsi;
1119 #if defined(INET6) || defined(INET)
1120         struct ifnet            *ifp = vsi->ifp;
1121         struct lro_ctrl         *lro = &rxr->lro;
1122 #endif
1123         struct ixl_rx_buf       *buf;
1124         bus_dma_segment_t       pseg[1], hseg[1];
1125         int                     rsize, nsegs, error = 0;
1126
1127         IXL_RX_LOCK(rxr);
1128         /* Clear the ring contents */
1129         rsize = roundup2(que->num_desc *
1130             sizeof(union i40e_rx_desc), DBA_ALIGN);
1131         bzero((void *)rxr->base, rsize);
1132         /* Cleanup any existing buffers */
1133         for (int i = 0; i < que->num_desc; i++) {
1134                 buf = &rxr->buffers[i];
1135                 if (buf->m_head != NULL) {
1136                         bus_dmamap_sync(rxr->htag, buf->hmap,
1137                             BUS_DMASYNC_POSTREAD);
1138                         bus_dmamap_unload(rxr->htag, buf->hmap);
1139                         buf->m_head->m_flags |= M_PKTHDR;
1140                         m_freem(buf->m_head);
1141                 }
1142                 if (buf->m_pack != NULL) {
1143                         bus_dmamap_sync(rxr->ptag, buf->pmap,
1144                             BUS_DMASYNC_POSTREAD);
1145                         bus_dmamap_unload(rxr->ptag, buf->pmap);
1146                         buf->m_pack->m_flags |= M_PKTHDR;
1147                         m_freem(buf->m_pack);
1148                 }
1149                 buf->m_head = NULL;
1150                 buf->m_pack = NULL;
1151         }
1152
1153         /* header split is off */
1154         rxr->hdr_split = FALSE;
1155
1156         /* Now replenish the mbufs */
1157         for (int j = 0; j != que->num_desc; ++j) {
1158                 struct mbuf     *mh, *mp;
1159
1160                 buf = &rxr->buffers[j];
1161                 /*
1162                 ** Don't allocate mbufs if not
1163                 ** doing header split, its wasteful
1164                 */ 
1165                 if (rxr->hdr_split == FALSE)
1166                         goto skip_head;
1167
1168                 /* First the header */
1169                 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1170                 if (buf->m_head == NULL) {
1171                         error = ENOBUFS;
1172                         goto fail;
1173                 }
1174                 m_adj(buf->m_head, ETHER_ALIGN);
1175                 mh = buf->m_head;
1176                 mh->m_len = mh->m_pkthdr.len = MHLEN;
1177                 mh->m_flags |= M_PKTHDR;
1178                 /* Get the memory mapping */
1179                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1180                     buf->hmap, buf->m_head, hseg,
1181                     &nsegs, BUS_DMA_NOWAIT);
1182                 if (error != 0) /* Nothing elegant to do here */
1183                         goto fail;
1184                 bus_dmamap_sync(rxr->htag,
1185                     buf->hmap, BUS_DMASYNC_PREREAD);
1186                 /* Update descriptor */
1187                 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1188
1189 skip_head:
1190                 /* Now the payload cluster */
1191                 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1192                     M_PKTHDR, rxr->mbuf_sz);
1193                 if (buf->m_pack == NULL) {
1194                         error = ENOBUFS;
1195                         goto fail;
1196                 }
1197                 mp = buf->m_pack;
1198                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1199                 /* Get the memory mapping */
1200                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1201                     buf->pmap, mp, pseg,
1202                     &nsegs, BUS_DMA_NOWAIT);
1203                 if (error != 0)
1204                         goto fail;
1205                 bus_dmamap_sync(rxr->ptag,
1206                     buf->pmap, BUS_DMASYNC_PREREAD);
1207                 /* Update descriptor */
1208                 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1209                 rxr->base[j].read.hdr_addr = 0;
1210         }
1211
1212
1213         /* Setup our descriptor indices */
1214         rxr->next_check = 0;
1215         rxr->next_refresh = 0;
1216         rxr->lro_enabled = FALSE;
1217         rxr->split = 0;
1218         rxr->bytes = 0;
1219         rxr->discard = FALSE;
1220
1221         wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1222         ixl_flush(vsi->hw);
1223
1224 #if defined(INET6) || defined(INET)
1225         /*
1226         ** Now set up the LRO interface:
1227         */
1228         if (ifp->if_capenable & IFCAP_LRO) {
1229                 int err = tcp_lro_init(lro);
1230                 if (err) {
1231                         if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1232                         goto fail;
1233                 }
1234                 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1235                 rxr->lro_enabled = TRUE;
1236                 lro->ifp = vsi->ifp;
1237         }
1238 #endif
1239
1240         bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1241             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1242
1243 fail:
1244         IXL_RX_UNLOCK(rxr);
1245         return (error);
1246 }
1247
1248
1249 /*********************************************************************
1250  *
1251  *  Free station receive ring data structures
1252  *
1253  **********************************************************************/
1254 void
1255 ixl_free_que_rx(struct ixl_queue *que)
1256 {
1257         struct rx_ring          *rxr = &que->rxr;
1258         struct ixl_rx_buf       *buf;
1259
1260         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1261
1262         /* Cleanup any existing buffers */
1263         if (rxr->buffers != NULL) {
1264                 for (int i = 0; i < que->num_desc; i++) {
1265                         buf = &rxr->buffers[i];
1266                         if (buf->m_head != NULL) {
1267                                 bus_dmamap_sync(rxr->htag, buf->hmap,
1268                                     BUS_DMASYNC_POSTREAD);
1269                                 bus_dmamap_unload(rxr->htag, buf->hmap);
1270                                 buf->m_head->m_flags |= M_PKTHDR;
1271                                 m_freem(buf->m_head);
1272                         }
1273                         if (buf->m_pack != NULL) {
1274                                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1275                                     BUS_DMASYNC_POSTREAD);
1276                                 bus_dmamap_unload(rxr->ptag, buf->pmap);
1277                                 buf->m_pack->m_flags |= M_PKTHDR;
1278                                 m_freem(buf->m_pack);
1279                         }
1280                         buf->m_head = NULL;
1281                         buf->m_pack = NULL;
1282                         if (buf->hmap != NULL) {
1283                                 bus_dmamap_destroy(rxr->htag, buf->hmap);
1284                                 buf->hmap = NULL;
1285                         }
1286                         if (buf->pmap != NULL) {
1287                                 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1288                                 buf->pmap = NULL;
1289                         }
1290                 }
1291                 if (rxr->buffers != NULL) {
1292                         free(rxr->buffers, M_DEVBUF);
1293                         rxr->buffers = NULL;
1294                 }
1295         }
1296
1297         if (rxr->htag != NULL) {
1298                 bus_dma_tag_destroy(rxr->htag);
1299                 rxr->htag = NULL;
1300         }
1301         if (rxr->ptag != NULL) {
1302                 bus_dma_tag_destroy(rxr->ptag);
1303                 rxr->ptag = NULL;
1304         }
1305
1306         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1307         return;
1308 }
1309
1310 static __inline void
1311 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1312 {
1313
1314 #if defined(INET6) || defined(INET)
1315         /*
1316          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1317          * should be computed by hardware. Also it should not have VLAN tag in
1318          * ethernet header.
1319          */
1320         if (rxr->lro_enabled &&
1321             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1322             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1323             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1324                 /*
1325                  * Send to the stack if:
1326                  **  - LRO not enabled, or
1327                  **  - no LRO resources, or
1328                  **  - lro enqueue fails
1329                  */
1330                 if (rxr->lro.lro_cnt != 0)
1331                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1332                                 return;
1333         }
1334 #endif
1335         IXL_RX_UNLOCK(rxr);
1336         (*ifp->if_input)(ifp, m);
1337         IXL_RX_LOCK(rxr);
1338 }
1339
1340
1341 static __inline void
1342 ixl_rx_discard(struct rx_ring *rxr, int i)
1343 {
1344         struct ixl_rx_buf       *rbuf;
1345
1346         rbuf = &rxr->buffers[i];
1347
1348         if (rbuf->fmp != NULL) {/* Partial chain ? */
1349                 rbuf->fmp->m_flags |= M_PKTHDR;
1350                 m_freem(rbuf->fmp);
1351                 rbuf->fmp = NULL;
1352         }
1353
1354         /*
1355         ** With advanced descriptors the writeback
1356         ** clobbers the buffer addrs, so its easier
1357         ** to just free the existing mbufs and take
1358         ** the normal refresh path to get new buffers
1359         ** and mapping.
1360         */
1361         if (rbuf->m_head) {
1362                 m_free(rbuf->m_head);
1363                 rbuf->m_head = NULL;
1364         }
1365  
1366         if (rbuf->m_pack) {
1367                 m_free(rbuf->m_pack);
1368                 rbuf->m_pack = NULL;
1369         }
1370
1371         return;
1372 }
1373
1374 #ifdef RSS
1375 /*
1376 ** i40e_ptype_to_hash: parse the packet type
1377 ** to determine the appropriate hash.
1378 */
1379 static inline int
1380 ixl_ptype_to_hash(u8 ptype)
1381 {
1382         struct i40e_rx_ptype_decoded    decoded;
1383         u8                              ex = 0;
1384
1385         decoded = decode_rx_desc_ptype(ptype);
1386         ex = decoded.outer_frag;
1387
1388         if (!decoded.known)
1389                 return M_HASHTYPE_OPAQUE;
1390
1391         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
1392                 return M_HASHTYPE_OPAQUE;
1393
1394         /* Note: anything that gets to this point is IP */
1395         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
1396                 switch (decoded.inner_prot) {
1397                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1398                                 if (ex)
1399                                         return M_HASHTYPE_RSS_TCP_IPV6_EX;
1400                                 else
1401                                         return M_HASHTYPE_RSS_TCP_IPV6;
1402                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1403                                 if (ex)
1404                                         return M_HASHTYPE_RSS_UDP_IPV6_EX;
1405                                 else
1406                                         return M_HASHTYPE_RSS_UDP_IPV6;
1407                         default:
1408                                 if (ex)
1409                                         return M_HASHTYPE_RSS_IPV6_EX;
1410                                 else
1411                                         return M_HASHTYPE_RSS_IPV6;
1412                 }
1413         }
1414         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
1415                 switch (decoded.inner_prot) {
1416                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1417                                         return M_HASHTYPE_RSS_TCP_IPV4;
1418                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1419                                 if (ex)
1420                                         return M_HASHTYPE_RSS_UDP_IPV4_EX;
1421                                 else
1422                                         return M_HASHTYPE_RSS_UDP_IPV4;
1423                         default:
1424                                         return M_HASHTYPE_RSS_IPV4;
1425                 }
1426         }
1427         /* We should never get here!! */
1428         return M_HASHTYPE_OPAQUE;
1429 }
1430 #endif /* RSS */
1431
1432 /*********************************************************************
1433  *
1434  *  This routine executes in interrupt context. It replenishes
1435  *  the mbufs in the descriptor and sends data which has been
1436  *  dma'ed into host memory to upper layer.
1437  *
1438  *  We loop at most count times if count is > 0, or until done if
1439  *  count < 0.
1440  *
1441  *  Return TRUE for more work, FALSE for all clean.
1442  *********************************************************************/
1443 bool
1444 ixl_rxeof(struct ixl_queue *que, int count)
1445 {
1446         struct ixl_vsi          *vsi = que->vsi;
1447         struct rx_ring          *rxr = &que->rxr;
1448         struct ifnet            *ifp = vsi->ifp;
1449 #if defined(INET6) || defined(INET)
1450         struct lro_ctrl         *lro = &rxr->lro;
1451         struct lro_entry        *queued;
1452 #endif
1453         int                     i, nextp, processed = 0;
1454         union i40e_rx_desc      *cur;
1455         struct ixl_rx_buf       *rbuf, *nbuf;
1456
1457
1458         IXL_RX_LOCK(rxr);
1459
1460
1461         for (i = rxr->next_check; count != 0;) {
1462                 struct mbuf     *sendmp, *mh, *mp;
1463                 u32             rsc, status, error;
1464                 u16             hlen, plen, vtag;
1465                 u64             qword;
1466                 u8              ptype;
1467                 bool            eop;
1468  
1469                 /* Sync the ring. */
1470                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1471                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1472
1473                 cur = &rxr->base[i];
1474                 qword = le64toh(cur->wb.qword1.status_error_len);
1475                 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1476                     >> I40E_RXD_QW1_STATUS_SHIFT;
1477                 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1478                     >> I40E_RXD_QW1_ERROR_SHIFT;
1479                 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1480                     >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1481                 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1482                     >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1483                 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1484                     >> I40E_RXD_QW1_PTYPE_SHIFT;
1485
1486                 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1487                         ++rxr->not_done;
1488                         break;
1489                 }
1490                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1491                         break;
1492
1493                 count--;
1494                 sendmp = NULL;
1495                 nbuf = NULL;
1496                 rsc = 0;
1497                 cur->wb.qword1.status_error_len = 0;
1498                 rbuf = &rxr->buffers[i];
1499                 mh = rbuf->m_head;
1500                 mp = rbuf->m_pack;
1501                 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1502                 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1503                         vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1504                 else
1505                         vtag = 0;
1506
1507                 /*
1508                 ** Make sure bad packets are discarded,
1509                 ** note that only EOP descriptor has valid
1510                 ** error results.
1511                 */
1512                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1513                         rxr->discarded++;
1514                         ixl_rx_discard(rxr, i);
1515                         goto next_desc;
1516                 }
1517
1518                 /* Prefetch the next buffer */
1519                 if (!eop) {
1520                         nextp = i + 1;
1521                         if (nextp == que->num_desc)
1522                                 nextp = 0;
1523                         nbuf = &rxr->buffers[nextp];
1524                         prefetch(nbuf);
1525                 }
1526
1527                 /*
1528                 ** The header mbuf is ONLY used when header 
1529                 ** split is enabled, otherwise we get normal 
1530                 ** behavior, ie, both header and payload
1531                 ** are DMA'd into the payload buffer.
1532                 **
1533                 ** Rather than using the fmp/lmp global pointers
1534                 ** we now keep the head of a packet chain in the
1535                 ** buffer struct and pass this along from one
1536                 ** descriptor to the next, until we get EOP.
1537                 */
1538                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1539                         if (hlen > IXL_RX_HDR)
1540                                 hlen = IXL_RX_HDR;
1541                         mh->m_len = hlen;
1542                         mh->m_flags |= M_PKTHDR;
1543                         mh->m_next = NULL;
1544                         mh->m_pkthdr.len = mh->m_len;
1545                         /* Null buf pointer so it is refreshed */
1546                         rbuf->m_head = NULL;
1547                         /*
1548                         ** Check the payload length, this
1549                         ** could be zero if its a small
1550                         ** packet.
1551                         */
1552                         if (plen > 0) {
1553                                 mp->m_len = plen;
1554                                 mp->m_next = NULL;
1555                                 mp->m_flags &= ~M_PKTHDR;
1556                                 mh->m_next = mp;
1557                                 mh->m_pkthdr.len += mp->m_len;
1558                                 /* Null buf pointer so it is refreshed */
1559                                 rbuf->m_pack = NULL;
1560                                 rxr->split++;
1561                         }
1562                         /*
1563                         ** Now create the forward
1564                         ** chain so when complete 
1565                         ** we wont have to.
1566                         */
1567                         if (eop == 0) {
1568                                 /* stash the chain head */
1569                                 nbuf->fmp = mh;
1570                                 /* Make forward chain */
1571                                 if (plen)
1572                                         mp->m_next = nbuf->m_pack;
1573                                 else
1574                                         mh->m_next = nbuf->m_pack;
1575                         } else {
1576                                 /* Singlet, prepare to send */
1577                                 sendmp = mh;
1578                                 if (vtag) {
1579                                         sendmp->m_pkthdr.ether_vtag = vtag;
1580                                         sendmp->m_flags |= M_VLANTAG;
1581                                 }
1582                         }
1583                 } else {
1584                         /*
1585                         ** Either no header split, or a
1586                         ** secondary piece of a fragmented
1587                         ** split packet.
1588                         */
1589                         mp->m_len = plen;
1590                         /*
1591                         ** See if there is a stored head
1592                         ** that determines what we are
1593                         */
1594                         sendmp = rbuf->fmp;
1595                         rbuf->m_pack = rbuf->fmp = NULL;
1596
1597                         if (sendmp != NULL) /* secondary frag */
1598                                 sendmp->m_pkthdr.len += mp->m_len;
1599                         else {
1600                                 /* first desc of a non-ps chain */
1601                                 sendmp = mp;
1602                                 sendmp->m_flags |= M_PKTHDR;
1603                                 sendmp->m_pkthdr.len = mp->m_len;
1604                                 if (vtag) {
1605                                         sendmp->m_pkthdr.ether_vtag = vtag;
1606                                         sendmp->m_flags |= M_VLANTAG;
1607                                 }
1608                         }
1609                         /* Pass the head pointer on */
1610                         if (eop == 0) {
1611                                 nbuf->fmp = sendmp;
1612                                 sendmp = NULL;
1613                                 mp->m_next = nbuf->m_pack;
1614                         }
1615                 }
1616                 ++processed;
1617                 /* Sending this frame? */
1618                 if (eop) {
1619                         sendmp->m_pkthdr.rcvif = ifp;
1620                         /* gather stats */
1621                         rxr->rx_packets++;
1622                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1623                         /* capture data for dynamic ITR adjustment */
1624                         rxr->packets++;
1625                         rxr->bytes += sendmp->m_pkthdr.len;
1626                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1627                                 ixl_rx_checksum(sendmp, status, error, ptype);
1628 #ifdef RSS
1629                         sendmp->m_pkthdr.flowid =
1630                             le32toh(cur->wb.qword0.hi_dword.rss);
1631                         M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1632 #else
1633                         sendmp->m_pkthdr.flowid = que->msix;
1634                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1635 #endif
1636                 }
1637 next_desc:
1638                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1639                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1640
1641                 /* Advance our pointers to the next descriptor. */
1642                 if (++i == que->num_desc)
1643                         i = 0;
1644
1645                 /* Now send to the stack or do LRO */
1646                 if (sendmp != NULL) {
1647                         rxr->next_check = i;
1648                         ixl_rx_input(rxr, ifp, sendmp, ptype);
1649                         i = rxr->next_check;
1650                 }
1651
1652                /* Every 8 descriptors we go to refresh mbufs */
1653                 if (processed == 8) {
1654                         ixl_refresh_mbufs(que, i);
1655                         processed = 0;
1656                 }
1657         }
1658
1659         /* Refresh any remaining buf structs */
1660         if (ixl_rx_unrefreshed(que))
1661                 ixl_refresh_mbufs(que, i);
1662
1663         rxr->next_check = i;
1664
1665 #if defined(INET6) || defined(INET)
1666         /*
1667          * Flush any outstanding LRO work
1668          */
1669         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1670                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1671                 tcp_lro_flush(lro, queued);
1672         }
1673 #endif
1674
1675         IXL_RX_UNLOCK(rxr);
1676         return (FALSE);
1677 }
1678
1679
1680 /*********************************************************************
1681  *
1682  *  Verify that the hardware indicated that the checksum is valid.
1683  *  Inform the stack about the status of checksum so that stack
1684  *  doesn't spend time verifying the checksum.
1685  *
1686  *********************************************************************/
1687 static void
1688 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1689 {
1690         struct i40e_rx_ptype_decoded decoded;
1691
1692         decoded = decode_rx_desc_ptype(ptype);
1693
1694         /* Errors? */
1695         if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1696             (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1697                 mp->m_pkthdr.csum_flags = 0;
1698                 return;
1699         }
1700
1701         /* IPv6 with extension headers likely have bad csum */
1702         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1703             decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1704                 if (status &
1705                     (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1706                         mp->m_pkthdr.csum_flags = 0;
1707                         return;
1708                 }
1709
1710  
1711         /* IP Checksum Good */
1712         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1713         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1714
1715         if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1716                 mp->m_pkthdr.csum_flags |= 
1717                     (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1718                 mp->m_pkthdr.csum_data |= htons(0xffff);
1719         }
1720         return;
1721 }
1722
1723 #if __FreeBSD_version >= 1100000
1724 uint64_t
1725 ixl_get_counter(if_t ifp, ift_counter cnt)
1726 {
1727         struct ixl_vsi *vsi;
1728
1729         vsi = if_getsoftc(ifp);
1730
1731         switch (cnt) {
1732         case IFCOUNTER_IPACKETS:
1733                 return (vsi->ipackets);
1734         case IFCOUNTER_IERRORS:
1735                 return (vsi->ierrors);
1736         case IFCOUNTER_OPACKETS:
1737                 return (vsi->opackets);
1738         case IFCOUNTER_OERRORS:
1739                 return (vsi->oerrors);
1740         case IFCOUNTER_COLLISIONS:
1741                 /* Collisions are by standard impossible in 40G/10G Ethernet */
1742                 return (0);
1743         case IFCOUNTER_IBYTES:
1744                 return (vsi->ibytes);
1745         case IFCOUNTER_OBYTES:
1746                 return (vsi->obytes);
1747         case IFCOUNTER_IMCASTS:
1748                 return (vsi->imcasts);
1749         case IFCOUNTER_OMCASTS:
1750                 return (vsi->omcasts);
1751         case IFCOUNTER_IQDROPS:
1752                 return (vsi->iqdrops);
1753         case IFCOUNTER_OQDROPS:
1754                 return (vsi->oqdrops);
1755         case IFCOUNTER_NOPROTO:
1756                 return (vsi->noproto);
1757         default:
1758                 return (if_get_counter_default(ifp, cnt));
1759         }
1760 }
1761 #endif
1762