]> CyberLeo.Net >> Repos - FreeBSD/releng/10.1.git/blob - sys/dev/ixl/ixl_txrx.c
Update the Intel ixl/ixlv drivers to fix a panic in the boot/install
[FreeBSD/releng/10.1.git] / sys / dev / ixl / ixl_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2013-2014, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 **      IXL driver TX/RX Routines:
37 **          This was seperated to allow usage by
38 **          both the BASE and the VF drivers.
39 */
40
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 #include "ixl.h"
44
45 /* Local Prototypes */
46 static void     ixl_rx_checksum(struct mbuf *, u32, u32, u8);
47 static void     ixl_refresh_mbufs(struct ixl_queue *, int);
48 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
49 static int      ixl_tx_setup_offload(struct ixl_queue *,
50                     struct mbuf *, u32 *, u32 *);
51 static bool     ixl_tso_setup(struct ixl_queue *, struct mbuf *);
52
53 static __inline void ixl_rx_discard(struct rx_ring *, int);
54 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
55                     struct mbuf *, u8);
56
57 /*
58 ** Multiqueue Transmit driver
59 **
60 */
61 int
62 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
63 {
64         struct ixl_vsi          *vsi = ifp->if_softc;
65         struct ixl_queue        *que;
66         struct tx_ring          *txr;
67         int                     err, i;
68
69         /* Which queue to use */
70         if ((m->m_flags & M_FLOWID) != 0)
71                 i = m->m_pkthdr.flowid % vsi->num_queues;
72         else
73                 i = curcpu % vsi->num_queues;
74
75         /* Check for a hung queue and pick alternative */
76         if (((1 << i) & vsi->active_queues) == 0)
77                 i = ffsl(vsi->active_queues);
78
79         que = &vsi->queues[i];
80         txr = &que->txr;
81
82         err = drbr_enqueue(ifp, txr->br, m);
83         if (err)
84                 return(err);
85         if (IXL_TX_TRYLOCK(txr)) {
86                 ixl_mq_start_locked(ifp, txr);
87                 IXL_TX_UNLOCK(txr);
88         } else
89                 taskqueue_enqueue(que->tq, &que->tx_task);
90
91         return (0);
92 }
93
94 int
95 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
96 {
97         struct ixl_queue        *que = txr->que;
98         struct ixl_vsi          *vsi = que->vsi;
99         struct mbuf             *next;
100         int                     err = 0;
101
102
103         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
104             vsi->link_active == 0)
105                 return (ENETDOWN);
106
107         /* Process the transmit queue */
108         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
109                 if ((err = ixl_xmit(que, &next)) != 0) {
110                         if (next == NULL)
111                                 drbr_advance(ifp, txr->br);
112                         else
113                                 drbr_putback(ifp, txr->br, next);
114                         break;
115                 }
116                 drbr_advance(ifp, txr->br);
117                 /* Send a copy of the frame to the BPF listener */
118                 ETHER_BPF_MTAP(ifp, next);
119                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
120                         break;
121         }
122
123         if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
124                 ixl_txeof(que);
125
126         return (err);
127 }
128
129 /*
130  * Called from a taskqueue to drain queued transmit packets.
131  */
132 void
133 ixl_deferred_mq_start(void *arg, int pending)
134 {
135         struct ixl_queue        *que = arg;
136         struct tx_ring          *txr = &que->txr;
137         struct ixl_vsi          *vsi = que->vsi;
138         struct ifnet            *ifp = vsi->ifp;
139         
140         IXL_TX_LOCK(txr);
141         if (!drbr_empty(ifp, txr->br))
142                 ixl_mq_start_locked(ifp, txr);
143         IXL_TX_UNLOCK(txr);
144 }
145
146 /*
147 ** Flush all queue ring buffers
148 */
149 void
150 ixl_qflush(struct ifnet *ifp)
151 {
152         struct ixl_vsi  *vsi = ifp->if_softc;
153
154         for (int i = 0; i < vsi->num_queues; i++) {
155                 struct ixl_queue *que = &vsi->queues[i];
156                 struct tx_ring  *txr = &que->txr;
157                 struct mbuf     *m;
158                 IXL_TX_LOCK(txr);
159                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
160                         m_freem(m);
161                 IXL_TX_UNLOCK(txr);
162         }
163         if_qflush(ifp);
164 }
165
166 /*
167 ** Find mbuf chains passed to the driver 
168 ** that are 'sparse', using more than 8
169 ** mbufs to deliver an mss-size chunk of data
170 */
171 static inline bool
172 ixl_tso_detect_sparse(struct mbuf *mp)
173 {
174         struct mbuf     *m;
175         int             num = 0, mss;
176         bool            ret = FALSE;
177
178         mss = mp->m_pkthdr.tso_segsz;
179         for (m = mp->m_next; m != NULL; m = m->m_next) {
180                 num++;
181                 mss -= m->m_len;
182                 if (mss < 1)
183                         break;
184                 if (m->m_next == NULL)
185                         break;
186         }
187         if (num > IXL_SPARSE_CHAIN)
188                 ret = TRUE;
189
190         return (ret);
191 }
192
193
194 /*********************************************************************
195  *
196  *  This routine maps the mbufs to tx descriptors, allowing the
197  *  TX engine to transmit the packets. 
198  *      - return 0 on success, positive on failure
199  *
200  **********************************************************************/
201 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
202
203 static int
204 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
205 {
206         struct ixl_vsi          *vsi = que->vsi;
207         struct i40e_hw          *hw = vsi->hw;
208         struct tx_ring          *txr = &que->txr;
209         struct ixl_tx_buf       *buf;
210         struct i40e_tx_desc     *txd = NULL;
211         struct mbuf             *m_head, *m;
212         int                     i, j, error, nsegs, maxsegs;
213         int                     first, last = 0;
214         u16                     vtag = 0;
215         u32                     cmd, off;
216         bus_dmamap_t            map;
217         bus_dma_tag_t           tag;
218         bus_dma_segment_t       segs[IXL_MAX_TSO_SEGS];
219
220
221         cmd = off = 0;
222         m_head = *m_headp;
223
224         /*
225          * Important to capture the first descriptor
226          * used because it will contain the index of
227          * the one we tell the hardware to report back
228          */
229         first = txr->next_avail;
230         buf = &txr->buffers[first];
231         map = buf->map;
232         tag = txr->tx_tag;
233         maxsegs = IXL_MAX_TX_SEGS;
234
235         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
236                 /* Use larger mapping for TSO */
237                 tag = txr->tso_tag;
238                 maxsegs = IXL_MAX_TSO_SEGS;
239                 if (ixl_tso_detect_sparse(m_head)) {
240                         m = m_defrag(m_head, M_NOWAIT);
241                         if (m == NULL) {
242                                 m_freem(*m_headp);
243                                 *m_headp = NULL;
244                                 return (ENOBUFS);
245                         }
246                         *m_headp = m;
247                 }
248         }
249
250         /*
251          * Map the packet for DMA.
252          */
253         error = bus_dmamap_load_mbuf_sg(tag, map,
254             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
255
256         if (error == EFBIG) {
257                 struct mbuf *m;
258
259                 m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
260                 if (m == NULL) {
261                         que->mbuf_defrag_failed++;
262                         m_freem(*m_headp);
263                         *m_headp = NULL;
264                         return (ENOBUFS);
265                 }
266                 *m_headp = m;
267
268                 /* Try it again */
269                 error = bus_dmamap_load_mbuf_sg(tag, map,
270                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
271
272                 if (error == ENOMEM) {
273                         que->tx_dma_setup++;
274                         return (error);
275                 } else if (error != 0) {
276                         que->tx_dma_setup++;
277                         m_freem(*m_headp);
278                         *m_headp = NULL;
279                         return (error);
280                 }
281         } else if (error == ENOMEM) {
282                 que->tx_dma_setup++;
283                 return (error);
284         } else if (error != 0) {
285                 que->tx_dma_setup++;
286                 m_freem(*m_headp);
287                 *m_headp = NULL;
288                 return (error);
289         }
290
291         /* Make certain there are enough descriptors */
292         if (nsegs > txr->avail - 2) {
293                 txr->no_desc++;
294                 error = ENOBUFS;
295                 goto xmit_fail;
296         }
297         m_head = *m_headp;
298
299         /* Set up the TSO/CSUM offload */
300         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
301                 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
302                 if (error)
303                         goto xmit_fail;
304         }
305
306         cmd |= I40E_TX_DESC_CMD_ICRC;
307         /* Grab the VLAN tag */
308         if (m_head->m_flags & M_VLANTAG) {
309                 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
310                 vtag = htole16(m_head->m_pkthdr.ether_vtag);
311         }
312
313         i = txr->next_avail;
314         for (j = 0; j < nsegs; j++) {
315                 bus_size_t seglen;
316
317                 buf = &txr->buffers[i];
318                 buf->tag = tag; /* Keep track of the type tag */
319                 txd = &txr->base[i];
320                 seglen = segs[j].ds_len;
321
322                 txd->buffer_addr = htole64(segs[j].ds_addr);
323                 txd->cmd_type_offset_bsz =
324                     htole64(I40E_TX_DESC_DTYPE_DATA
325                     | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
326                     | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
327                     | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
328                     | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
329
330                 last = i; /* descriptor that will get completion IRQ */
331
332                 if (++i == que->num_desc)
333                         i = 0;
334
335                 buf->m_head = NULL;
336                 buf->eop_index = -1;
337         }
338         /* Set the last descriptor for report */
339         txd->cmd_type_offset_bsz |=
340             htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
341         txr->avail -= nsegs;
342         txr->next_avail = i;
343
344         buf->m_head = m_head;
345         /* Swap the dma map between the first and last descriptor */
346         txr->buffers[first].map = buf->map;
347         buf->map = map;
348         bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
349
350         /* Set the index of the descriptor that will be marked done */
351         buf = &txr->buffers[first];
352         buf->eop_index = last;
353
354         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
355             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
356         /*
357          * Advance the Transmit Descriptor Tail (Tdt), this tells the
358          * hardware that this frame is available to transmit.
359          */
360         ++txr->total_packets;
361         wr32(hw, txr->tail, i);
362
363         ixl_flush(hw);
364         /* Mark outstanding work */
365         if (que->busy == 0)
366                 que->busy = 1;
367         return (0);
368
369 xmit_fail:
370         bus_dmamap_unload(tag, buf->map);
371         return (error);
372 }
373
374
375 /*********************************************************************
376  *
377  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
378  *  the information needed to transmit a packet on the wire. This is
379  *  called only once at attach, setup is done every reset.
380  *
381  **********************************************************************/
382 int
383 ixl_allocate_tx_data(struct ixl_queue *que)
384 {
385         struct tx_ring          *txr = &que->txr;
386         struct ixl_vsi          *vsi = que->vsi;
387         device_t                dev = vsi->dev;
388         struct ixl_tx_buf       *buf;
389         int                     error = 0;
390
391         /*
392          * Setup DMA descriptor areas.
393          */
394         if ((error = bus_dma_tag_create(NULL,           /* parent */
395                                1, 0,                    /* alignment, bounds */
396                                BUS_SPACE_MAXADDR,       /* lowaddr */
397                                BUS_SPACE_MAXADDR,       /* highaddr */
398                                NULL, NULL,              /* filter, filterarg */
399                                IXL_TSO_SIZE,            /* maxsize */
400                                IXL_MAX_TX_SEGS,         /* nsegments */
401                                PAGE_SIZE,               /* maxsegsize */
402                                0,                       /* flags */
403                                NULL,                    /* lockfunc */
404                                NULL,                    /* lockfuncarg */
405                                &txr->tx_tag))) {
406                 device_printf(dev,"Unable to allocate TX DMA tag\n");
407                 goto fail;
408         }
409
410         /* Make a special tag for TSO */
411         if ((error = bus_dma_tag_create(NULL,           /* parent */
412                                1, 0,                    /* alignment, bounds */
413                                BUS_SPACE_MAXADDR,       /* lowaddr */
414                                BUS_SPACE_MAXADDR,       /* highaddr */
415                                NULL, NULL,              /* filter, filterarg */
416                                IXL_TSO_SIZE,            /* maxsize */
417                                IXL_MAX_TSO_SEGS,        /* nsegments */
418                                PAGE_SIZE,               /* maxsegsize */
419                                0,                       /* flags */
420                                NULL,                    /* lockfunc */
421                                NULL,                    /* lockfuncarg */
422                                &txr->tso_tag))) {
423                 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
424                 goto fail;
425         }
426
427         if (!(txr->buffers =
428             (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
429             que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
430                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
431                 error = ENOMEM;
432                 goto fail;
433         }
434
435         /* Create the descriptor buffer default dma maps */
436         buf = txr->buffers;
437         for (int i = 0; i < que->num_desc; i++, buf++) {
438                 buf->tag = txr->tx_tag;
439                 error = bus_dmamap_create(buf->tag, 0, &buf->map);
440                 if (error != 0) {
441                         device_printf(dev, "Unable to create TX DMA map\n");
442                         goto fail;
443                 }
444         }
445 fail:
446         return (error);
447 }
448
449
450 /*********************************************************************
451  *
452  *  (Re)Initialize a queue transmit ring.
453  *      - called by init, it clears the descriptor ring,
454  *        and frees any stale mbufs 
455  *
456  **********************************************************************/
457 void
458 ixl_init_tx_ring(struct ixl_queue *que)
459 {
460         struct tx_ring *txr = &que->txr;
461         struct ixl_tx_buf *buf;
462
463         /* Clear the old ring contents */
464         IXL_TX_LOCK(txr);
465         bzero((void *)txr->base,
466               (sizeof(struct i40e_tx_desc)) * que->num_desc);
467
468         /* Reset indices */
469         txr->next_avail = 0;
470         txr->next_to_clean = 0;
471
472 #ifdef IXL_FDIR
473         /* Initialize flow director */
474         txr->atr_rate = ixl_atr_rate;
475         txr->atr_count = 0;
476 #endif
477
478         /* Free any existing tx mbufs. */
479         buf = txr->buffers;
480         for (int i = 0; i < que->num_desc; i++, buf++) {
481                 if (buf->m_head != NULL) {
482                         bus_dmamap_sync(buf->tag, buf->map,
483                             BUS_DMASYNC_POSTWRITE);
484                         bus_dmamap_unload(buf->tag, buf->map);
485                         m_freem(buf->m_head);
486                         buf->m_head = NULL;
487                 }
488                 /* Clear the EOP index */
489                 buf->eop_index = -1;
490         }
491
492         /* Set number of descriptors available */
493         txr->avail = que->num_desc;
494
495         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
496             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
497         IXL_TX_UNLOCK(txr);
498 }
499
500
501 /*********************************************************************
502  *
503  *  Free transmit ring related data structures.
504  *
505  **********************************************************************/
506 void
507 ixl_free_que_tx(struct ixl_queue *que)
508 {
509         struct tx_ring *txr = &que->txr;
510         struct ixl_tx_buf *buf;
511
512         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
513
514         for (int i = 0; i < que->num_desc; i++) {
515                 buf = &txr->buffers[i];
516                 if (buf->m_head != NULL) {
517                         bus_dmamap_sync(buf->tag, buf->map,
518                             BUS_DMASYNC_POSTWRITE);
519                         bus_dmamap_unload(buf->tag,
520                             buf->map);
521                         m_freem(buf->m_head);
522                         buf->m_head = NULL;
523                         if (buf->map != NULL) {
524                                 bus_dmamap_destroy(buf->tag,
525                                     buf->map);
526                                 buf->map = NULL;
527                         }
528                 } else if (buf->map != NULL) {
529                         bus_dmamap_unload(buf->tag,
530                             buf->map);
531                         bus_dmamap_destroy(buf->tag,
532                             buf->map);
533                         buf->map = NULL;
534                 }
535         }
536         if (txr->br != NULL)
537                 buf_ring_free(txr->br, M_DEVBUF);
538         if (txr->buffers != NULL) {
539                 free(txr->buffers, M_DEVBUF);
540                 txr->buffers = NULL;
541         }
542         if (txr->tx_tag != NULL) {
543                 bus_dma_tag_destroy(txr->tx_tag);
544                 txr->tx_tag = NULL;
545         }
546         if (txr->tso_tag != NULL) {
547                 bus_dma_tag_destroy(txr->tso_tag);
548                 txr->tso_tag = NULL;
549         }
550
551         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
552         return;
553 }
554
555 /*********************************************************************
556  *
557  *  Setup descriptor for hw offloads 
558  *
559  **********************************************************************/
560
561 static int
562 ixl_tx_setup_offload(struct ixl_queue *que,
563     struct mbuf *mp, u32 *cmd, u32 *off)
564 {
565         struct ether_vlan_header        *eh;
566 #ifdef INET
567         struct ip                       *ip = NULL;
568 #endif
569         struct tcphdr                   *th = NULL;
570 #ifdef INET6
571         struct ip6_hdr                  *ip6;
572 #endif
573         int                             elen, ip_hlen = 0, tcp_hlen;
574         u16                             etype;
575         u8                              ipproto = 0;
576         bool                            tso = FALSE;
577
578
579         /* Set up the TSO context descriptor if required */
580         if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
581                 tso = ixl_tso_setup(que, mp);
582                 if (tso)
583                         ++que->tso;
584                 else
585                         return (ENXIO);
586         }
587
588         /*
589          * Determine where frame payload starts.
590          * Jump over vlan headers if already present,
591          * helpful for QinQ too.
592          */
593         eh = mtod(mp, struct ether_vlan_header *);
594         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
595                 etype = ntohs(eh->evl_proto);
596                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
597         } else {
598                 etype = ntohs(eh->evl_encap_proto);
599                 elen = ETHER_HDR_LEN;
600         }
601
602         switch (etype) {
603 #ifdef INET
604                 case ETHERTYPE_IP:
605                         ip = (struct ip *)(mp->m_data + elen);
606                         ip_hlen = ip->ip_hl << 2;
607                         ipproto = ip->ip_p;
608                         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
609                         /* The IP checksum must be recalculated with TSO */
610                         if (tso)
611                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
612                         else
613                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
614                         break;
615 #endif
616 #ifdef INET6
617                 case ETHERTYPE_IPV6:
618                         ip6 = (struct ip6_hdr *)(mp->m_data + elen);
619                         ip_hlen = sizeof(struct ip6_hdr);
620                         ipproto = ip6->ip6_nxt;
621                         th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
622                         *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
623                         break;
624 #endif
625                 default:
626                         break;
627         }
628
629         *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
630         *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
631
632         switch (ipproto) {
633                 case IPPROTO_TCP:
634                         tcp_hlen = th->th_off << 2;
635                         if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
636                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
637                                 *off |= (tcp_hlen >> 2) <<
638                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
639                         }
640 #ifdef IXL_FDIR
641                         ixl_atr(que, th, etype);
642 #endif
643                         break;
644                 case IPPROTO_UDP:
645                         if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
646                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
647                                 *off |= (sizeof(struct udphdr) >> 2) <<
648                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
649                         }
650                         break;
651
652                 case IPPROTO_SCTP:
653                         if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
654                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
655                                 *off |= (sizeof(struct sctphdr) >> 2) <<
656                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
657                         }
658                         /* Fall Thru */
659                 default:
660                         break;
661         }
662
663         return (0);
664 }
665
666
667 /**********************************************************************
668  *
669  *  Setup context for hardware segmentation offload (TSO)
670  *
671  **********************************************************************/
672 static bool
673 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
674 {
675         struct tx_ring                  *txr = &que->txr;
676         struct i40e_tx_context_desc     *TXD;
677         struct ixl_tx_buf               *buf;
678         u32                             cmd, mss, type, tsolen;
679         u16                             etype;
680         int                             idx, elen, ip_hlen, tcp_hlen;
681         struct ether_vlan_header        *eh;
682 #ifdef INET
683         struct ip                       *ip;
684 #endif
685 #ifdef INET6
686         struct ip6_hdr                  *ip6;
687 #endif
688 #if defined(INET6) || defined(INET)
689         struct tcphdr                   *th;
690 #endif
691         u64                             type_cmd_tso_mss;
692
693         /*
694          * Determine where frame payload starts.
695          * Jump over vlan headers if already present
696          */
697         eh = mtod(mp, struct ether_vlan_header *);
698         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
699                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
700                 etype = eh->evl_proto;
701         } else {
702                 elen = ETHER_HDR_LEN;
703                 etype = eh->evl_encap_proto;
704         }
705
706         switch (ntohs(etype)) {
707 #ifdef INET6
708         case ETHERTYPE_IPV6:
709                 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
710                 if (ip6->ip6_nxt != IPPROTO_TCP)
711                         return (ENXIO);
712                 ip_hlen = sizeof(struct ip6_hdr);
713                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
714                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
715                 tcp_hlen = th->th_off << 2;
716                 break;
717 #endif
718 #ifdef INET
719         case ETHERTYPE_IP:
720                 ip = (struct ip *)(mp->m_data + elen);
721                 if (ip->ip_p != IPPROTO_TCP)
722                         return (ENXIO);
723                 ip->ip_sum = 0;
724                 ip_hlen = ip->ip_hl << 2;
725                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
726                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
727                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
728                 tcp_hlen = th->th_off << 2;
729                 break;
730 #endif
731         default:
732                 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
733                     __func__, ntohs(etype));
734                 return FALSE;
735         }
736
737         /* Ensure we have at least the IP+TCP header in the first mbuf. */
738         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
739                 return FALSE;
740
741         idx = txr->next_avail;
742         buf = &txr->buffers[idx];
743         TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
744         tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
745
746         type = I40E_TX_DESC_DTYPE_CONTEXT;
747         cmd = I40E_TX_CTX_DESC_TSO;
748         mss = mp->m_pkthdr.tso_segsz;
749
750         type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
751             ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
752             ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
753             ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
754         TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
755
756         TXD->tunneling_params = htole32(0);
757         buf->m_head = NULL;
758         buf->eop_index = -1;
759
760         if (++idx == que->num_desc)
761                 idx = 0;
762
763         txr->avail--;
764         txr->next_avail = idx;
765
766         return TRUE;
767 }
768
769 /*             
770 ** ixl_get_tx_head - Retrieve the value from the 
771 **    location the HW records its HEAD index
772 */
773 static inline u32
774 ixl_get_tx_head(struct ixl_queue *que)
775 {
776         struct tx_ring  *txr = &que->txr;
777         void *head = &txr->base[que->num_desc];
778         return LE32_TO_CPU(*(volatile __le32 *)head);
779 }
780
781 /**********************************************************************
782  *
783  *  Examine each tx_buffer in the used queue. If the hardware is done
784  *  processing the packet then free associated resources. The
785  *  tx_buffer is put back on the free queue.
786  *
787  **********************************************************************/
788 bool
789 ixl_txeof(struct ixl_queue *que)
790 {
791         struct tx_ring          *txr = &que->txr;
792         u32                     first, last, head, done, processed;
793         struct ixl_tx_buf       *buf;
794         struct i40e_tx_desc     *tx_desc, *eop_desc;
795
796
797         mtx_assert(&txr->mtx, MA_OWNED);
798
799
800         /* These are not the descriptors you seek, move along :) */
801         if (txr->avail == que->num_desc) {
802                 que->busy = 0;
803                 return FALSE;
804         }
805
806         processed = 0;
807         first = txr->next_to_clean;
808         buf = &txr->buffers[first];
809         tx_desc = (struct i40e_tx_desc *)&txr->base[first];
810         last = buf->eop_index;
811         if (last == -1)
812                 return FALSE;
813         eop_desc = (struct i40e_tx_desc *)&txr->base[last];
814
815         /* Get the Head WB value */
816         head = ixl_get_tx_head(que);
817
818         /*
819         ** Get the index of the first descriptor
820         ** BEYOND the EOP and call that 'done'.
821         ** I do this so the comparison in the
822         ** inner while loop below can be simple
823         */
824         if (++last == que->num_desc) last = 0;
825         done = last;
826
827         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
828             BUS_DMASYNC_POSTREAD);
829         /*
830         ** The HEAD index of the ring is written in a 
831         ** defined location, this rather than a done bit
832         ** is what is used to keep track of what must be
833         ** 'cleaned'.
834         */
835         while (first != head) {
836                 /* We clean the range of the packet */
837                 while (first != done) {
838                         ++txr->avail;
839                         ++processed;
840
841                         if (buf->m_head) {
842                                 txr->bytes += /* for ITR adjustment */
843                                     buf->m_head->m_pkthdr.len;
844                                 txr->tx_bytes += /* for TX stats */
845                                     buf->m_head->m_pkthdr.len;
846                                 bus_dmamap_sync(buf->tag,
847                                     buf->map,
848                                     BUS_DMASYNC_POSTWRITE);
849                                 bus_dmamap_unload(buf->tag,
850                                     buf->map);
851                                 m_freem(buf->m_head);
852                                 buf->m_head = NULL;
853                                 buf->map = NULL;
854                         }
855                         buf->eop_index = -1;
856
857                         if (++first == que->num_desc)
858                                 first = 0;
859
860                         buf = &txr->buffers[first];
861                         tx_desc = &txr->base[first];
862                 }
863                 ++txr->packets;
864                 /* See if there is more work now */
865                 last = buf->eop_index;
866                 if (last != -1) {
867                         eop_desc = &txr->base[last];
868                         /* Get next done point */
869                         if (++last == que->num_desc) last = 0;
870                         done = last;
871                 } else
872                         break;
873         }
874         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
875             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
876
877         txr->next_to_clean = first;
878
879
880         /*
881         ** Hang detection, we know there's
882         ** work outstanding or the first return
883         ** would have been taken, so indicate an
884         ** unsuccessful pass, in local_timer if
885         ** the value is too great the queue will
886         ** be considered hung. If anything has been
887         ** cleaned then reset the state.
888         */
889         if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
890                 ++que->busy;
891
892         if (processed)
893                 que->busy = 1; /* Note this turns off HUNG */
894
895         /*
896          * If there are no pending descriptors, clear the timeout.
897          */
898         if (txr->avail == que->num_desc) {
899                 que->busy = 0;
900                 return FALSE;
901         }
902
903         return TRUE;
904 }
905
906 /*********************************************************************
907  *
908  *  Refresh mbuf buffers for RX descriptor rings
909  *   - now keeps its own state so discards due to resource
910  *     exhaustion are unnecessary, if an mbuf cannot be obtained
911  *     it just returns, keeping its placeholder, thus it can simply
912  *     be recalled to try again.
913  *
914  **********************************************************************/
915 static void
916 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
917 {
918         struct ixl_vsi          *vsi = que->vsi;
919         struct rx_ring          *rxr = &que->rxr;
920         bus_dma_segment_t       hseg[1];
921         bus_dma_segment_t       pseg[1];
922         struct ixl_rx_buf       *buf;
923         struct mbuf             *mh, *mp;
924         int                     i, j, nsegs, error;
925         bool                    refreshed = FALSE;
926
927         i = j = rxr->next_refresh;
928         /* Control the loop with one beyond */
929         if (++j == que->num_desc)
930                 j = 0;
931
932         while (j != limit) {
933                 buf = &rxr->buffers[i];
934                 if (rxr->hdr_split == FALSE)
935                         goto no_split;
936
937                 if (buf->m_head == NULL) {
938                         mh = m_gethdr(M_NOWAIT, MT_DATA);
939                         if (mh == NULL)
940                                 goto update;
941                 } else
942                         mh = buf->m_head;
943
944                 mh->m_pkthdr.len = mh->m_len = MHLEN;
945                 mh->m_len = MHLEN;
946                 mh->m_flags |= M_PKTHDR;
947                 /* Get the memory mapping */
948                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
949                     buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
950                 if (error != 0) {
951                         printf("Refresh mbufs: hdr dmamap load"
952                             " failure - %d\n", error);
953                         m_free(mh);
954                         buf->m_head = NULL;
955                         goto update;
956                 }
957                 buf->m_head = mh;
958                 bus_dmamap_sync(rxr->htag, buf->hmap,
959                     BUS_DMASYNC_PREREAD);
960                 rxr->base[i].read.hdr_addr =
961                    htole64(hseg[0].ds_addr);
962
963 no_split:
964                 if (buf->m_pack == NULL) {
965                         mp = m_getjcl(M_NOWAIT, MT_DATA,
966                             M_PKTHDR, rxr->mbuf_sz);
967                         if (mp == NULL)
968                                 goto update;
969                 } else
970                         mp = buf->m_pack;
971
972                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
973                 /* Get the memory mapping */
974                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
975                     buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
976                 if (error != 0) {
977                         printf("Refresh mbufs: payload dmamap load"
978                             " failure - %d\n", error);
979                         m_free(mp);
980                         buf->m_pack = NULL;
981                         goto update;
982                 }
983                 buf->m_pack = mp;
984                 bus_dmamap_sync(rxr->ptag, buf->pmap,
985                     BUS_DMASYNC_PREREAD);
986                 rxr->base[i].read.pkt_addr =
987                    htole64(pseg[0].ds_addr);
988                 /* Used only when doing header split */
989                 rxr->base[i].read.hdr_addr = 0;
990
991                 refreshed = TRUE;
992                 /* Next is precalculated */
993                 i = j;
994                 rxr->next_refresh = i;
995                 if (++j == que->num_desc)
996                         j = 0;
997         }
998 update:
999         if (refreshed) /* Update hardware tail index */
1000                 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1001         return;
1002 }
1003
1004
1005 /*********************************************************************
1006  *
1007  *  Allocate memory for rx_buffer structures. Since we use one
1008  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1009  *  that we'll need is equal to the number of receive descriptors
1010  *  that we've defined.
1011  *
1012  **********************************************************************/
1013 int
1014 ixl_allocate_rx_data(struct ixl_queue *que)
1015 {
1016         struct rx_ring          *rxr = &que->rxr;
1017         struct ixl_vsi          *vsi = que->vsi;
1018         device_t                dev = vsi->dev;
1019         struct ixl_rx_buf       *buf;
1020         int                     i, bsize, error;
1021
1022         bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1023         if (!(rxr->buffers =
1024             (struct ixl_rx_buf *) malloc(bsize,
1025             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1026                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1027                 error = ENOMEM;
1028                 return (error);
1029         }
1030
1031         if ((error = bus_dma_tag_create(NULL,   /* parent */
1032                                    1, 0,        /* alignment, bounds */
1033                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1034                                    BUS_SPACE_MAXADDR,   /* highaddr */
1035                                    NULL, NULL,          /* filter, filterarg */
1036                                    MSIZE,               /* maxsize */
1037                                    1,                   /* nsegments */
1038                                    MSIZE,               /* maxsegsize */
1039                                    0,                   /* flags */
1040                                    NULL,                /* lockfunc */
1041                                    NULL,                /* lockfuncarg */
1042                                    &rxr->htag))) {
1043                 device_printf(dev, "Unable to create RX DMA htag\n");
1044                 return (error);
1045         }
1046
1047         if ((error = bus_dma_tag_create(NULL,   /* parent */
1048                                    1, 0,        /* alignment, bounds */
1049                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1050                                    BUS_SPACE_MAXADDR,   /* highaddr */
1051                                    NULL, NULL,          /* filter, filterarg */
1052                                    MJUM16BYTES,         /* maxsize */
1053                                    1,                   /* nsegments */
1054                                    MJUM16BYTES,         /* maxsegsize */
1055                                    0,                   /* flags */
1056                                    NULL,                /* lockfunc */
1057                                    NULL,                /* lockfuncarg */
1058                                    &rxr->ptag))) {
1059                 device_printf(dev, "Unable to create RX DMA ptag\n");
1060                 return (error);
1061         }
1062
1063         for (i = 0; i < que->num_desc; i++) {
1064                 buf = &rxr->buffers[i];
1065                 error = bus_dmamap_create(rxr->htag,
1066                     BUS_DMA_NOWAIT, &buf->hmap);
1067                 if (error) {
1068                         device_printf(dev, "Unable to create RX head map\n");
1069                         break;
1070                 }
1071                 error = bus_dmamap_create(rxr->ptag,
1072                     BUS_DMA_NOWAIT, &buf->pmap);
1073                 if (error) {
1074                         device_printf(dev, "Unable to create RX pkt map\n");
1075                         break;
1076                 }
1077         }
1078
1079         return (error);
1080 }
1081
1082
1083 /*********************************************************************
1084  *
1085  *  (Re)Initialize the queue receive ring and its buffers.
1086  *
1087  **********************************************************************/
1088 int
1089 ixl_init_rx_ring(struct ixl_queue *que)
1090 {
1091         struct  rx_ring         *rxr = &que->rxr;
1092 #if defined(INET6) || defined(INET)
1093         struct ixl_vsi          *vsi = que->vsi;
1094         struct ifnet            *ifp = vsi->ifp;
1095         struct lro_ctrl         *lro = &rxr->lro;
1096 #endif
1097         struct ixl_rx_buf       *buf;
1098         bus_dma_segment_t       pseg[1], hseg[1];
1099         int                     rsize, nsegs, error = 0;
1100
1101         IXL_RX_LOCK(rxr);
1102         /* Clear the ring contents */
1103         rsize = roundup2(que->num_desc *
1104             sizeof(union i40e_rx_desc), DBA_ALIGN);
1105         bzero((void *)rxr->base, rsize);
1106         /* Cleanup any existing buffers */
1107         for (int i = 0; i < que->num_desc; i++) {
1108                 buf = &rxr->buffers[i];
1109                 if (buf->m_head != NULL) {
1110                         bus_dmamap_sync(rxr->htag, buf->hmap,
1111                             BUS_DMASYNC_POSTREAD);
1112                         bus_dmamap_unload(rxr->htag, buf->hmap);
1113                         buf->m_head->m_flags |= M_PKTHDR;
1114                         m_freem(buf->m_head);
1115                 }
1116                 if (buf->m_pack != NULL) {
1117                         bus_dmamap_sync(rxr->ptag, buf->pmap,
1118                             BUS_DMASYNC_POSTREAD);
1119                         bus_dmamap_unload(rxr->ptag, buf->pmap);
1120                         buf->m_pack->m_flags |= M_PKTHDR;
1121                         m_freem(buf->m_pack);
1122                 }
1123                 buf->m_head = NULL;
1124                 buf->m_pack = NULL;
1125         }
1126
1127         /* header split is off */
1128         rxr->hdr_split = FALSE;
1129
1130         /* Now replenish the mbufs */
1131         for (int j = 0; j != que->num_desc; ++j) {
1132                 struct mbuf     *mh, *mp;
1133
1134                 buf = &rxr->buffers[j];
1135                 /*
1136                 ** Don't allocate mbufs if not
1137                 ** doing header split, its wasteful
1138                 */ 
1139                 if (rxr->hdr_split == FALSE)
1140                         goto skip_head;
1141
1142                 /* First the header */
1143                 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1144                 if (buf->m_head == NULL) {
1145                         error = ENOBUFS;
1146                         goto fail;
1147                 }
1148                 m_adj(buf->m_head, ETHER_ALIGN);
1149                 mh = buf->m_head;
1150                 mh->m_len = mh->m_pkthdr.len = MHLEN;
1151                 mh->m_flags |= M_PKTHDR;
1152                 /* Get the memory mapping */
1153                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1154                     buf->hmap, buf->m_head, hseg,
1155                     &nsegs, BUS_DMA_NOWAIT);
1156                 if (error != 0) /* Nothing elegant to do here */
1157                         goto fail;
1158                 bus_dmamap_sync(rxr->htag,
1159                     buf->hmap, BUS_DMASYNC_PREREAD);
1160                 /* Update descriptor */
1161                 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1162
1163 skip_head:
1164                 /* Now the payload cluster */
1165                 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1166                     M_PKTHDR, rxr->mbuf_sz);
1167                 if (buf->m_pack == NULL) {
1168                         error = ENOBUFS;
1169                         goto fail;
1170                 }
1171                 mp = buf->m_pack;
1172                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1173                 /* Get the memory mapping */
1174                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1175                     buf->pmap, mp, pseg,
1176                     &nsegs, BUS_DMA_NOWAIT);
1177                 if (error != 0)
1178                         goto fail;
1179                 bus_dmamap_sync(rxr->ptag,
1180                     buf->pmap, BUS_DMASYNC_PREREAD);
1181                 /* Update descriptor */
1182                 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1183                 rxr->base[j].read.hdr_addr = 0;
1184         }
1185
1186
1187         /* Setup our descriptor indices */
1188         rxr->next_check = 0;
1189         rxr->next_refresh = 0;
1190         rxr->lro_enabled = FALSE;
1191         rxr->split = 0;
1192         rxr->bytes = 0;
1193         rxr->discard = FALSE;
1194
1195         wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1196         ixl_flush(vsi->hw);
1197
1198 #if defined(INET6) || defined(INET)
1199         /*
1200         ** Now set up the LRO interface:
1201         */
1202         if (ifp->if_capenable & IFCAP_LRO) {
1203                 int err = tcp_lro_init(lro);
1204                 if (err) {
1205                         if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1206                         goto fail;
1207                 }
1208                 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1209                 rxr->lro_enabled = TRUE;
1210                 lro->ifp = vsi->ifp;
1211         }
1212 #endif
1213
1214         bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1215             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1216
1217 fail:
1218         IXL_RX_UNLOCK(rxr);
1219         return (error);
1220 }
1221
1222
1223 /*********************************************************************
1224  *
1225  *  Free station receive ring data structures
1226  *
1227  **********************************************************************/
1228 void
1229 ixl_free_que_rx(struct ixl_queue *que)
1230 {
1231         struct rx_ring          *rxr = &que->rxr;
1232         struct ixl_rx_buf       *buf;
1233
1234         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1235
1236         /* Cleanup any existing buffers */
1237         if (rxr->buffers != NULL) {
1238                 for (int i = 0; i < que->num_desc; i++) {
1239                         buf = &rxr->buffers[i];
1240                         if (buf->m_head != NULL) {
1241                                 bus_dmamap_sync(rxr->htag, buf->hmap,
1242                                     BUS_DMASYNC_POSTREAD);
1243                                 bus_dmamap_unload(rxr->htag, buf->hmap);
1244                                 buf->m_head->m_flags |= M_PKTHDR;
1245                                 m_freem(buf->m_head);
1246                         }
1247                         if (buf->m_pack != NULL) {
1248                                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1249                                     BUS_DMASYNC_POSTREAD);
1250                                 bus_dmamap_unload(rxr->ptag, buf->pmap);
1251                                 buf->m_pack->m_flags |= M_PKTHDR;
1252                                 m_freem(buf->m_pack);
1253                         }
1254                         buf->m_head = NULL;
1255                         buf->m_pack = NULL;
1256                         if (buf->hmap != NULL) {
1257                                 bus_dmamap_destroy(rxr->htag, buf->hmap);
1258                                 buf->hmap = NULL;
1259                         }
1260                         if (buf->pmap != NULL) {
1261                                 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1262                                 buf->pmap = NULL;
1263                         }
1264                 }
1265                 if (rxr->buffers != NULL) {
1266                         free(rxr->buffers, M_DEVBUF);
1267                         rxr->buffers = NULL;
1268                 }
1269         }
1270
1271         if (rxr->htag != NULL) {
1272                 bus_dma_tag_destroy(rxr->htag);
1273                 rxr->htag = NULL;
1274         }
1275         if (rxr->ptag != NULL) {
1276                 bus_dma_tag_destroy(rxr->ptag);
1277                 rxr->ptag = NULL;
1278         }
1279
1280         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1281         return;
1282 }
1283
1284 static __inline void
1285 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1286 {
1287
1288 #if defined(INET6) || defined(INET)
1289         /*
1290          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1291          * should be computed by hardware. Also it should not have VLAN tag in
1292          * ethernet header.
1293          */
1294         if (rxr->lro_enabled &&
1295             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1296             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1297             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1298                 /*
1299                  * Send to the stack if:
1300                  **  - LRO not enabled, or
1301                  **  - no LRO resources, or
1302                  **  - lro enqueue fails
1303                  */
1304                 if (rxr->lro.lro_cnt != 0)
1305                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1306                                 return;
1307         }
1308 #endif
1309         IXL_RX_UNLOCK(rxr);
1310         (*ifp->if_input)(ifp, m);
1311         IXL_RX_LOCK(rxr);
1312 }
1313
1314
1315 static __inline void
1316 ixl_rx_discard(struct rx_ring *rxr, int i)
1317 {
1318         struct ixl_rx_buf       *rbuf;
1319
1320         rbuf = &rxr->buffers[i];
1321
1322         if (rbuf->fmp != NULL) {/* Partial chain ? */
1323                 rbuf->fmp->m_flags |= M_PKTHDR;
1324                 m_freem(rbuf->fmp);
1325                 rbuf->fmp = NULL;
1326         }
1327
1328         /*
1329         ** With advanced descriptors the writeback
1330         ** clobbers the buffer addrs, so its easier
1331         ** to just free the existing mbufs and take
1332         ** the normal refresh path to get new buffers
1333         ** and mapping.
1334         */
1335         if (rbuf->m_head) {
1336                 m_free(rbuf->m_head);
1337                 rbuf->m_head = NULL;
1338         }
1339  
1340         if (rbuf->m_pack) {
1341                 m_free(rbuf->m_pack);
1342                 rbuf->m_pack = NULL;
1343         }
1344
1345         return;
1346 }
1347
1348
1349 /*********************************************************************
1350  *
1351  *  This routine executes in interrupt context. It replenishes
1352  *  the mbufs in the descriptor and sends data which has been
1353  *  dma'ed into host memory to upper layer.
1354  *
1355  *  We loop at most count times if count is > 0, or until done if
1356  *  count < 0.
1357  *
1358  *  Return TRUE for more work, FALSE for all clean.
1359  *********************************************************************/
1360 bool
1361 ixl_rxeof(struct ixl_queue *que, int count)
1362 {
1363         struct ixl_vsi          *vsi = que->vsi;
1364         struct rx_ring          *rxr = &que->rxr;
1365         struct ifnet            *ifp = vsi->ifp;
1366 #if defined(INET6) || defined(INET)
1367         struct lro_ctrl         *lro = &rxr->lro;
1368         struct lro_entry        *queued;
1369 #endif
1370         int                     i, nextp, processed = 0;
1371         union i40e_rx_desc      *cur;
1372         struct ixl_rx_buf       *rbuf, *nbuf;
1373
1374
1375         IXL_RX_LOCK(rxr);
1376
1377
1378         for (i = rxr->next_check; count != 0;) {
1379                 struct mbuf     *sendmp, *mh, *mp;
1380                 u32             rsc, status, error;
1381                 u16             hlen, plen, vtag;
1382                 u64             qword;
1383                 u8              ptype;
1384                 bool            eop;
1385  
1386                 /* Sync the ring. */
1387                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1388                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1389
1390                 cur = &rxr->base[i];
1391                 qword = le64toh(cur->wb.qword1.status_error_len);
1392                 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1393                     >> I40E_RXD_QW1_STATUS_SHIFT;
1394                 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1395                     >> I40E_RXD_QW1_ERROR_SHIFT;
1396                 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1397                     >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1398                 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1399                     >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1400                 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1401                     >> I40E_RXD_QW1_PTYPE_SHIFT;
1402
1403                 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1404                         ++rxr->not_done;
1405                         break;
1406                 }
1407                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1408                         break;
1409
1410                 count--;
1411                 sendmp = NULL;
1412                 nbuf = NULL;
1413                 rsc = 0;
1414                 cur->wb.qword1.status_error_len = 0;
1415                 rbuf = &rxr->buffers[i];
1416                 mh = rbuf->m_head;
1417                 mp = rbuf->m_pack;
1418                 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1419                 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1420                         vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1421                 else
1422                         vtag = 0;
1423
1424                 /*
1425                 ** Make sure bad packets are discarded,
1426                 ** note that only EOP descriptor has valid
1427                 ** error results.
1428                 */
1429                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1430                         rxr->discarded++;
1431                         ixl_rx_discard(rxr, i);
1432                         goto next_desc;
1433                 }
1434
1435                 /* Prefetch the next buffer */
1436                 if (!eop) {
1437                         nextp = i + 1;
1438                         if (nextp == que->num_desc)
1439                                 nextp = 0;
1440                         nbuf = &rxr->buffers[nextp];
1441                         prefetch(nbuf);
1442                 }
1443
1444                 /*
1445                 ** The header mbuf is ONLY used when header 
1446                 ** split is enabled, otherwise we get normal 
1447                 ** behavior, ie, both header and payload
1448                 ** are DMA'd into the payload buffer.
1449                 **
1450                 ** Rather than using the fmp/lmp global pointers
1451                 ** we now keep the head of a packet chain in the
1452                 ** buffer struct and pass this along from one
1453                 ** descriptor to the next, until we get EOP.
1454                 */
1455                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1456                         if (hlen > IXL_RX_HDR)
1457                                 hlen = IXL_RX_HDR;
1458                         mh->m_len = hlen;
1459                         mh->m_flags |= M_PKTHDR;
1460                         mh->m_next = NULL;
1461                         mh->m_pkthdr.len = mh->m_len;
1462                         /* Null buf pointer so it is refreshed */
1463                         rbuf->m_head = NULL;
1464                         /*
1465                         ** Check the payload length, this
1466                         ** could be zero if its a small
1467                         ** packet.
1468                         */
1469                         if (plen > 0) {
1470                                 mp->m_len = plen;
1471                                 mp->m_next = NULL;
1472                                 mp->m_flags &= ~M_PKTHDR;
1473                                 mh->m_next = mp;
1474                                 mh->m_pkthdr.len += mp->m_len;
1475                                 /* Null buf pointer so it is refreshed */
1476                                 rbuf->m_pack = NULL;
1477                                 rxr->split++;
1478                         }
1479                         /*
1480                         ** Now create the forward
1481                         ** chain so when complete 
1482                         ** we wont have to.
1483                         */
1484                         if (eop == 0) {
1485                                 /* stash the chain head */
1486                                 nbuf->fmp = mh;
1487                                 /* Make forward chain */
1488                                 if (plen)
1489                                         mp->m_next = nbuf->m_pack;
1490                                 else
1491                                         mh->m_next = nbuf->m_pack;
1492                         } else {
1493                                 /* Singlet, prepare to send */
1494                                 sendmp = mh;
1495                                 if (vtag) {
1496                                         sendmp->m_pkthdr.ether_vtag = vtag;
1497                                         sendmp->m_flags |= M_VLANTAG;
1498                                 }
1499                         }
1500                 } else {
1501                         /*
1502                         ** Either no header split, or a
1503                         ** secondary piece of a fragmented
1504                         ** split packet.
1505                         */
1506                         mp->m_len = plen;
1507                         /*
1508                         ** See if there is a stored head
1509                         ** that determines what we are
1510                         */
1511                         sendmp = rbuf->fmp;
1512                         rbuf->m_pack = rbuf->fmp = NULL;
1513
1514                         if (sendmp != NULL) /* secondary frag */
1515                                 sendmp->m_pkthdr.len += mp->m_len;
1516                         else {
1517                                 /* first desc of a non-ps chain */
1518                                 sendmp = mp;
1519                                 sendmp->m_flags |= M_PKTHDR;
1520                                 sendmp->m_pkthdr.len = mp->m_len;
1521                                 if (vtag) {
1522                                         sendmp->m_pkthdr.ether_vtag = vtag;
1523                                         sendmp->m_flags |= M_VLANTAG;
1524                                 }
1525                         }
1526                         /* Pass the head pointer on */
1527                         if (eop == 0) {
1528                                 nbuf->fmp = sendmp;
1529                                 sendmp = NULL;
1530                                 mp->m_next = nbuf->m_pack;
1531                         }
1532                 }
1533                 ++processed;
1534                 /* Sending this frame? */
1535                 if (eop) {
1536                         sendmp->m_pkthdr.rcvif = ifp;
1537                         /* gather stats */
1538                         rxr->rx_packets++;
1539                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1540                         /* capture data for dynamic ITR adjustment */
1541                         rxr->packets++;
1542                         rxr->bytes += sendmp->m_pkthdr.len;
1543                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1544                                 ixl_rx_checksum(sendmp, status, error, ptype);
1545                         sendmp->m_pkthdr.flowid = que->msix;
1546                         sendmp->m_flags |= M_FLOWID;
1547                 }
1548 next_desc:
1549                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1550                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1551
1552                 /* Advance our pointers to the next descriptor. */
1553                 if (++i == que->num_desc)
1554                         i = 0;
1555
1556                 /* Now send to the stack or do LRO */
1557                 if (sendmp != NULL) {
1558                         rxr->next_check = i;
1559                         ixl_rx_input(rxr, ifp, sendmp, ptype);
1560                         i = rxr->next_check;
1561                 }
1562
1563                /* Every 8 descriptors we go to refresh mbufs */
1564                 if (processed == 8) {
1565                         ixl_refresh_mbufs(que, i);
1566                         processed = 0;
1567                 }
1568         }
1569
1570         /* Refresh any remaining buf structs */
1571         if (ixl_rx_unrefreshed(que))
1572                 ixl_refresh_mbufs(que, i);
1573
1574         rxr->next_check = i;
1575
1576 #if defined(INET6) || defined(INET)
1577         /*
1578          * Flush any outstanding LRO work
1579          */
1580         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1581                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1582                 tcp_lro_flush(lro, queued);
1583         }
1584 #endif
1585
1586         IXL_RX_UNLOCK(rxr);
1587         return (FALSE);
1588 }
1589
1590
1591 /*********************************************************************
1592  *
1593  *  Verify that the hardware indicated that the checksum is valid.
1594  *  Inform the stack about the status of checksum so that stack
1595  *  doesn't spend time verifying the checksum.
1596  *
1597  *********************************************************************/
1598 static void
1599 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1600 {
1601         struct i40e_rx_ptype_decoded decoded;
1602
1603         decoded = decode_rx_desc_ptype(ptype);
1604
1605         /* Errors? */
1606         if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1607             (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1608                 mp->m_pkthdr.csum_flags = 0;
1609                 return;
1610         }
1611
1612         /* IPv6 with extension headers likely have bad csum */
1613         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1614             decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1615                 if (status &
1616                     (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1617                         mp->m_pkthdr.csum_flags = 0;
1618                         return;
1619                 }
1620
1621  
1622         /* IP Checksum Good */
1623         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1624         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1625
1626         if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1627                 mp->m_pkthdr.csum_flags |= 
1628                     (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1629                 mp->m_pkthdr.csum_data |= htons(0xffff);
1630         }
1631         return;
1632 }
1633
1634 #if __FreeBSD_version >= 1100000
1635 uint64_t
1636 ixl_get_counter(if_t ifp, ift_counter cnt)
1637 {
1638         struct ixl_vsi *vsi;
1639
1640         vsi = if_getsoftc(ifp);
1641
1642         switch (cnt) {
1643         case IFCOUNTER_IPACKETS:
1644                 return (vsi->ipackets);
1645         case IFCOUNTER_IERRORS:
1646                 return (vsi->ierrors);
1647         case IFCOUNTER_OPACKETS:
1648                 return (vsi->opackets);
1649         case IFCOUNTER_OERRORS:
1650                 return (vsi->oerrors);
1651         case IFCOUNTER_COLLISIONS:
1652                 /* Collisions are by standard impossible in 40G/10G Ethernet */
1653                 return (0);
1654         case IFCOUNTER_IBYTES:
1655                 return (vsi->ibytes);
1656         case IFCOUNTER_OBYTES:
1657                 return (vsi->obytes);
1658         case IFCOUNTER_IMCASTS:
1659                 return (vsi->imcasts);
1660         case IFCOUNTER_OMCASTS:
1661                 return (vsi->omcasts);
1662         case IFCOUNTER_IQDROPS:
1663                 return (vsi->iqdrops);
1664         case IFCOUNTER_OQDROPS:
1665                 return (vsi->oqdrops);
1666         case IFCOUNTER_NOPROTO:
1667                 return (vsi->noproto);
1668         default:
1669                 return (if_get_counter_default(ifp, cnt));
1670         }
1671 }
1672 #endif
1673