]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/ixl/ixl_txrx.c
MFC r277084,r277088,r277130,r277143,r277151,r277262
[FreeBSD/stable/10.git] / sys / dev / ixl / ixl_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2013-2014, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 **      IXL driver TX/RX Routines:
37 **          This was seperated to allow usage by
38 **          both the BASE and the VF drivers.
39 */
40
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 #include "ixl.h"
44
45 #ifdef RSS 
46 #include <net/rss_config.h>
47 #endif
48
49 /* Local Prototypes */
50 static void     ixl_rx_checksum(struct mbuf *, u32, u32, u8);
51 static void     ixl_refresh_mbufs(struct ixl_queue *, int);
52 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
53 static int      ixl_tx_setup_offload(struct ixl_queue *,
54                     struct mbuf *, u32 *, u32 *);
55 static bool     ixl_tso_setup(struct ixl_queue *, struct mbuf *);
56
57 static __inline void ixl_rx_discard(struct rx_ring *, int);
58 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
59                     struct mbuf *, u8);
60
61 /*
62 ** Multiqueue Transmit driver
63 **
64 */
65 int
66 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
67 {
68         struct ixl_vsi          *vsi = ifp->if_softc;
69         struct ixl_queue        *que;
70         struct tx_ring          *txr;
71         int                     err, i;
72 #ifdef RSS
73         u32                     bucket_id;
74 #endif
75
76         /*
77         ** Which queue to use:
78         **
79         ** When doing RSS, map it to the same outbound
80         ** queue as the incoming flow would be mapped to.
81         ** If everything is setup correctly, it should be
82         ** the same bucket that the current CPU we're on is.
83         */
84         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
85 #ifdef  RSS
86                 if (rss_hash2bucket(m->m_pkthdr.flowid,
87                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
88                         i = bucket_id % vsi->num_queues;
89                 } else
90 #endif
91                         i = m->m_pkthdr.flowid % vsi->num_queues;
92         } else
93                 i = curcpu % vsi->num_queues;
94         /*
95         ** This may not be perfect, but until something
96         ** better comes along it will keep from scheduling
97         ** on stalled queues.
98         */
99         if (((1 << i) & vsi->active_queues) == 0)
100                 i = ffsl(vsi->active_queues);
101
102         que = &vsi->queues[i];
103         txr = &que->txr;
104
105         err = drbr_enqueue(ifp, txr->br, m);
106         if (err)
107                 return(err);
108         if (IXL_TX_TRYLOCK(txr)) {
109                 ixl_mq_start_locked(ifp, txr);
110                 IXL_TX_UNLOCK(txr);
111         } else
112                 taskqueue_enqueue(que->tq, &que->tx_task);
113
114         return (0);
115 }
116
117 int
118 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
119 {
120         struct ixl_queue        *que = txr->que;
121         struct ixl_vsi          *vsi = que->vsi;
122         struct mbuf             *next;
123         int                     err = 0;
124
125
126         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
127             vsi->link_active == 0)
128                 return (ENETDOWN);
129
130         /* Process the transmit queue */
131         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
132                 if ((err = ixl_xmit(que, &next)) != 0) {
133                         if (next == NULL)
134                                 drbr_advance(ifp, txr->br);
135                         else
136                                 drbr_putback(ifp, txr->br, next);
137                         break;
138                 }
139                 drbr_advance(ifp, txr->br);
140                 /* Send a copy of the frame to the BPF listener */
141                 ETHER_BPF_MTAP(ifp, next);
142                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
143                         break;
144         }
145
146         if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
147                 ixl_txeof(que);
148
149         return (err);
150 }
151
152 /*
153  * Called from a taskqueue to drain queued transmit packets.
154  */
155 void
156 ixl_deferred_mq_start(void *arg, int pending)
157 {
158         struct ixl_queue        *que = arg;
159         struct tx_ring          *txr = &que->txr;
160         struct ixl_vsi          *vsi = que->vsi;
161         struct ifnet            *ifp = vsi->ifp;
162         
163         IXL_TX_LOCK(txr);
164         if (!drbr_empty(ifp, txr->br))
165                 ixl_mq_start_locked(ifp, txr);
166         IXL_TX_UNLOCK(txr);
167 }
168
169 /*
170 ** Flush all queue ring buffers
171 */
172 void
173 ixl_qflush(struct ifnet *ifp)
174 {
175         struct ixl_vsi  *vsi = ifp->if_softc;
176
177         for (int i = 0; i < vsi->num_queues; i++) {
178                 struct ixl_queue *que = &vsi->queues[i];
179                 struct tx_ring  *txr = &que->txr;
180                 struct mbuf     *m;
181                 IXL_TX_LOCK(txr);
182                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
183                         m_freem(m);
184                 IXL_TX_UNLOCK(txr);
185         }
186         if_qflush(ifp);
187 }
188
189 /*
190 ** Find mbuf chains passed to the driver 
191 ** that are 'sparse', using more than 8
192 ** mbufs to deliver an mss-size chunk of data
193 */
194 static inline bool
195 ixl_tso_detect_sparse(struct mbuf *mp)
196 {
197         struct mbuf     *m;
198         int             num = 0, mss;
199         bool            ret = FALSE;
200
201         mss = mp->m_pkthdr.tso_segsz;
202         for (m = mp->m_next; m != NULL; m = m->m_next) {
203                 num++;
204                 mss -= m->m_len;
205                 if (mss < 1)
206                         break;
207                 if (m->m_next == NULL)
208                         break;
209         }
210         if (num > IXL_SPARSE_CHAIN)
211                 ret = TRUE;
212
213         return (ret);
214 }
215
216
217 /*********************************************************************
218  *
219  *  This routine maps the mbufs to tx descriptors, allowing the
220  *  TX engine to transmit the packets. 
221  *      - return 0 on success, positive on failure
222  *
223  **********************************************************************/
224 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
225
226 static int
227 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
228 {
229         struct ixl_vsi          *vsi = que->vsi;
230         struct i40e_hw          *hw = vsi->hw;
231         struct tx_ring          *txr = &que->txr;
232         struct ixl_tx_buf       *buf;
233         struct i40e_tx_desc     *txd = NULL;
234         struct mbuf             *m_head, *m;
235         int                     i, j, error, nsegs, maxsegs;
236         int                     first, last = 0;
237         u16                     vtag = 0;
238         u32                     cmd, off;
239         bus_dmamap_t            map;
240         bus_dma_tag_t           tag;
241         bus_dma_segment_t       segs[IXL_MAX_TSO_SEGS];
242
243
244         cmd = off = 0;
245         m_head = *m_headp;
246
247         /*
248          * Important to capture the first descriptor
249          * used because it will contain the index of
250          * the one we tell the hardware to report back
251          */
252         first = txr->next_avail;
253         buf = &txr->buffers[first];
254         map = buf->map;
255         tag = txr->tx_tag;
256         maxsegs = IXL_MAX_TX_SEGS;
257
258         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
259                 /* Use larger mapping for TSO */
260                 tag = txr->tso_tag;
261                 maxsegs = IXL_MAX_TSO_SEGS;
262                 if (ixl_tso_detect_sparse(m_head)) {
263                         m = m_defrag(m_head, M_NOWAIT);
264                         if (m == NULL) {
265                                 m_freem(*m_headp);
266                                 *m_headp = NULL;
267                                 return (ENOBUFS);
268                         }
269                         *m_headp = m;
270                 }
271         }
272
273         /*
274          * Map the packet for DMA.
275          */
276         error = bus_dmamap_load_mbuf_sg(tag, map,
277             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
278
279         if (error == EFBIG) {
280                 struct mbuf *m;
281
282                 m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
283                 if (m == NULL) {
284                         que->mbuf_defrag_failed++;
285                         m_freem(*m_headp);
286                         *m_headp = NULL;
287                         return (ENOBUFS);
288                 }
289                 *m_headp = m;
290
291                 /* Try it again */
292                 error = bus_dmamap_load_mbuf_sg(tag, map,
293                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
294
295                 if (error == ENOMEM) {
296                         que->tx_dma_setup++;
297                         return (error);
298                 } else if (error != 0) {
299                         que->tx_dma_setup++;
300                         m_freem(*m_headp);
301                         *m_headp = NULL;
302                         return (error);
303                 }
304         } else if (error == ENOMEM) {
305                 que->tx_dma_setup++;
306                 return (error);
307         } else if (error != 0) {
308                 que->tx_dma_setup++;
309                 m_freem(*m_headp);
310                 *m_headp = NULL;
311                 return (error);
312         }
313
314         /* Make certain there are enough descriptors */
315         if (nsegs > txr->avail - 2) {
316                 txr->no_desc++;
317                 error = ENOBUFS;
318                 goto xmit_fail;
319         }
320         m_head = *m_headp;
321
322         /* Set up the TSO/CSUM offload */
323         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
324                 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
325                 if (error)
326                         goto xmit_fail;
327         }
328
329         cmd |= I40E_TX_DESC_CMD_ICRC;
330         /* Grab the VLAN tag */
331         if (m_head->m_flags & M_VLANTAG) {
332                 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
333                 vtag = htole16(m_head->m_pkthdr.ether_vtag);
334         }
335
336         i = txr->next_avail;
337         for (j = 0; j < nsegs; j++) {
338                 bus_size_t seglen;
339
340                 buf = &txr->buffers[i];
341                 buf->tag = tag; /* Keep track of the type tag */
342                 txd = &txr->base[i];
343                 seglen = segs[j].ds_len;
344
345                 txd->buffer_addr = htole64(segs[j].ds_addr);
346                 txd->cmd_type_offset_bsz =
347                     htole64(I40E_TX_DESC_DTYPE_DATA
348                     | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
349                     | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
350                     | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
351                     | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
352
353                 last = i; /* descriptor that will get completion IRQ */
354
355                 if (++i == que->num_desc)
356                         i = 0;
357
358                 buf->m_head = NULL;
359                 buf->eop_index = -1;
360         }
361         /* Set the last descriptor for report */
362         txd->cmd_type_offset_bsz |=
363             htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
364         txr->avail -= nsegs;
365         txr->next_avail = i;
366
367         buf->m_head = m_head;
368         /* Swap the dma map between the first and last descriptor */
369         txr->buffers[first].map = buf->map;
370         buf->map = map;
371         bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
372
373         /* Set the index of the descriptor that will be marked done */
374         buf = &txr->buffers[first];
375         buf->eop_index = last;
376
377         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
378             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
379         /*
380          * Advance the Transmit Descriptor Tail (Tdt), this tells the
381          * hardware that this frame is available to transmit.
382          */
383         ++txr->total_packets;
384         wr32(hw, txr->tail, i);
385
386         ixl_flush(hw);
387         /* Mark outstanding work */
388         if (que->busy == 0)
389                 que->busy = 1;
390         return (0);
391
392 xmit_fail:
393         bus_dmamap_unload(tag, buf->map);
394         return (error);
395 }
396
397
398 /*********************************************************************
399  *
400  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
401  *  the information needed to transmit a packet on the wire. This is
402  *  called only once at attach, setup is done every reset.
403  *
404  **********************************************************************/
405 int
406 ixl_allocate_tx_data(struct ixl_queue *que)
407 {
408         struct tx_ring          *txr = &que->txr;
409         struct ixl_vsi          *vsi = que->vsi;
410         device_t                dev = vsi->dev;
411         struct ixl_tx_buf       *buf;
412         int                     error = 0;
413
414         /*
415          * Setup DMA descriptor areas.
416          */
417         if ((error = bus_dma_tag_create(NULL,           /* parent */
418                                1, 0,                    /* alignment, bounds */
419                                BUS_SPACE_MAXADDR,       /* lowaddr */
420                                BUS_SPACE_MAXADDR,       /* highaddr */
421                                NULL, NULL,              /* filter, filterarg */
422                                IXL_TSO_SIZE,            /* maxsize */
423                                IXL_MAX_TX_SEGS,         /* nsegments */
424                                PAGE_SIZE,               /* maxsegsize */
425                                0,                       /* flags */
426                                NULL,                    /* lockfunc */
427                                NULL,                    /* lockfuncarg */
428                                &txr->tx_tag))) {
429                 device_printf(dev,"Unable to allocate TX DMA tag\n");
430                 goto fail;
431         }
432
433         /* Make a special tag for TSO */
434         if ((error = bus_dma_tag_create(NULL,           /* parent */
435                                1, 0,                    /* alignment, bounds */
436                                BUS_SPACE_MAXADDR,       /* lowaddr */
437                                BUS_SPACE_MAXADDR,       /* highaddr */
438                                NULL, NULL,              /* filter, filterarg */
439                                IXL_TSO_SIZE,            /* maxsize */
440                                IXL_MAX_TSO_SEGS,        /* nsegments */
441                                PAGE_SIZE,               /* maxsegsize */
442                                0,                       /* flags */
443                                NULL,                    /* lockfunc */
444                                NULL,                    /* lockfuncarg */
445                                &txr->tso_tag))) {
446                 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
447                 goto fail;
448         }
449
450         if (!(txr->buffers =
451             (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
452             que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
453                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
454                 error = ENOMEM;
455                 goto fail;
456         }
457
458         /* Create the descriptor buffer default dma maps */
459         buf = txr->buffers;
460         for (int i = 0; i < que->num_desc; i++, buf++) {
461                 buf->tag = txr->tx_tag;
462                 error = bus_dmamap_create(buf->tag, 0, &buf->map);
463                 if (error != 0) {
464                         device_printf(dev, "Unable to create TX DMA map\n");
465                         goto fail;
466                 }
467         }
468 fail:
469         return (error);
470 }
471
472
473 /*********************************************************************
474  *
475  *  (Re)Initialize a queue transmit ring.
476  *      - called by init, it clears the descriptor ring,
477  *        and frees any stale mbufs 
478  *
479  **********************************************************************/
480 void
481 ixl_init_tx_ring(struct ixl_queue *que)
482 {
483         struct tx_ring *txr = &que->txr;
484         struct ixl_tx_buf *buf;
485
486         /* Clear the old ring contents */
487         IXL_TX_LOCK(txr);
488         bzero((void *)txr->base,
489               (sizeof(struct i40e_tx_desc)) * que->num_desc);
490
491         /* Reset indices */
492         txr->next_avail = 0;
493         txr->next_to_clean = 0;
494
495 #ifdef IXL_FDIR
496         /* Initialize flow director */
497         txr->atr_rate = ixl_atr_rate;
498         txr->atr_count = 0;
499 #endif
500
501         /* Free any existing tx mbufs. */
502         buf = txr->buffers;
503         for (int i = 0; i < que->num_desc; i++, buf++) {
504                 if (buf->m_head != NULL) {
505                         bus_dmamap_sync(buf->tag, buf->map,
506                             BUS_DMASYNC_POSTWRITE);
507                         bus_dmamap_unload(buf->tag, buf->map);
508                         m_freem(buf->m_head);
509                         buf->m_head = NULL;
510                 }
511                 /* Clear the EOP index */
512                 buf->eop_index = -1;
513         }
514
515         /* Set number of descriptors available */
516         txr->avail = que->num_desc;
517
518         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
519             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
520         IXL_TX_UNLOCK(txr);
521 }
522
523
524 /*********************************************************************
525  *
526  *  Free transmit ring related data structures.
527  *
528  **********************************************************************/
529 void
530 ixl_free_que_tx(struct ixl_queue *que)
531 {
532         struct tx_ring *txr = &que->txr;
533         struct ixl_tx_buf *buf;
534
535         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
536
537         for (int i = 0; i < que->num_desc; i++) {
538                 buf = &txr->buffers[i];
539                 if (buf->m_head != NULL) {
540                         bus_dmamap_sync(buf->tag, buf->map,
541                             BUS_DMASYNC_POSTWRITE);
542                         bus_dmamap_unload(buf->tag,
543                             buf->map);
544                         m_freem(buf->m_head);
545                         buf->m_head = NULL;
546                         if (buf->map != NULL) {
547                                 bus_dmamap_destroy(buf->tag,
548                                     buf->map);
549                                 buf->map = NULL;
550                         }
551                 } else if (buf->map != NULL) {
552                         bus_dmamap_unload(buf->tag,
553                             buf->map);
554                         bus_dmamap_destroy(buf->tag,
555                             buf->map);
556                         buf->map = NULL;
557                 }
558         }
559         if (txr->br != NULL)
560                 buf_ring_free(txr->br, M_DEVBUF);
561         if (txr->buffers != NULL) {
562                 free(txr->buffers, M_DEVBUF);
563                 txr->buffers = NULL;
564         }
565         if (txr->tx_tag != NULL) {
566                 bus_dma_tag_destroy(txr->tx_tag);
567                 txr->tx_tag = NULL;
568         }
569         if (txr->tso_tag != NULL) {
570                 bus_dma_tag_destroy(txr->tso_tag);
571                 txr->tso_tag = NULL;
572         }
573
574         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
575         return;
576 }
577
578 /*********************************************************************
579  *
580  *  Setup descriptor for hw offloads 
581  *
582  **********************************************************************/
583
584 static int
585 ixl_tx_setup_offload(struct ixl_queue *que,
586     struct mbuf *mp, u32 *cmd, u32 *off)
587 {
588         struct ether_vlan_header        *eh;
589 #ifdef INET
590         struct ip                       *ip = NULL;
591 #endif
592         struct tcphdr                   *th = NULL;
593 #ifdef INET6
594         struct ip6_hdr                  *ip6;
595 #endif
596         int                             elen, ip_hlen = 0, tcp_hlen;
597         u16                             etype;
598         u8                              ipproto = 0;
599         bool                            tso = FALSE;
600
601
602         /* Set up the TSO context descriptor if required */
603         if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
604                 tso = ixl_tso_setup(que, mp);
605                 if (tso)
606                         ++que->tso;
607                 else
608                         return (ENXIO);
609         }
610
611         /*
612          * Determine where frame payload starts.
613          * Jump over vlan headers if already present,
614          * helpful for QinQ too.
615          */
616         eh = mtod(mp, struct ether_vlan_header *);
617         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
618                 etype = ntohs(eh->evl_proto);
619                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
620         } else {
621                 etype = ntohs(eh->evl_encap_proto);
622                 elen = ETHER_HDR_LEN;
623         }
624
625         switch (etype) {
626 #ifdef INET
627                 case ETHERTYPE_IP:
628                         ip = (struct ip *)(mp->m_data + elen);
629                         ip_hlen = ip->ip_hl << 2;
630                         ipproto = ip->ip_p;
631                         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
632                         /* The IP checksum must be recalculated with TSO */
633                         if (tso)
634                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
635                         else
636                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
637                         break;
638 #endif
639 #ifdef INET6
640                 case ETHERTYPE_IPV6:
641                         ip6 = (struct ip6_hdr *)(mp->m_data + elen);
642                         ip_hlen = sizeof(struct ip6_hdr);
643                         ipproto = ip6->ip6_nxt;
644                         th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
645                         *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
646                         break;
647 #endif
648                 default:
649                         break;
650         }
651
652         *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
653         *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
654
655         switch (ipproto) {
656                 case IPPROTO_TCP:
657                         tcp_hlen = th->th_off << 2;
658                         if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
659                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
660                                 *off |= (tcp_hlen >> 2) <<
661                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
662                         }
663 #ifdef IXL_FDIR
664                         ixl_atr(que, th, etype);
665 #endif
666                         break;
667                 case IPPROTO_UDP:
668                         if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
669                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
670                                 *off |= (sizeof(struct udphdr) >> 2) <<
671                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
672                         }
673                         break;
674
675                 case IPPROTO_SCTP:
676                         if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
677                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
678                                 *off |= (sizeof(struct sctphdr) >> 2) <<
679                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
680                         }
681                         /* Fall Thru */
682                 default:
683                         break;
684         }
685
686         return (0);
687 }
688
689
690 /**********************************************************************
691  *
692  *  Setup context for hardware segmentation offload (TSO)
693  *
694  **********************************************************************/
695 static bool
696 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
697 {
698         struct tx_ring                  *txr = &que->txr;
699         struct i40e_tx_context_desc     *TXD;
700         struct ixl_tx_buf               *buf;
701         u32                             cmd, mss, type, tsolen;
702         u16                             etype;
703         int                             idx, elen, ip_hlen, tcp_hlen;
704         struct ether_vlan_header        *eh;
705 #ifdef INET
706         struct ip                       *ip;
707 #endif
708 #ifdef INET6
709         struct ip6_hdr                  *ip6;
710 #endif
711 #if defined(INET6) || defined(INET)
712         struct tcphdr                   *th;
713 #endif
714         u64                             type_cmd_tso_mss;
715
716         /*
717          * Determine where frame payload starts.
718          * Jump over vlan headers if already present
719          */
720         eh = mtod(mp, struct ether_vlan_header *);
721         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
722                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
723                 etype = eh->evl_proto;
724         } else {
725                 elen = ETHER_HDR_LEN;
726                 etype = eh->evl_encap_proto;
727         }
728
729         switch (ntohs(etype)) {
730 #ifdef INET6
731         case ETHERTYPE_IPV6:
732                 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
733                 if (ip6->ip6_nxt != IPPROTO_TCP)
734                         return (ENXIO);
735                 ip_hlen = sizeof(struct ip6_hdr);
736                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
737                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
738                 tcp_hlen = th->th_off << 2;
739                 break;
740 #endif
741 #ifdef INET
742         case ETHERTYPE_IP:
743                 ip = (struct ip *)(mp->m_data + elen);
744                 if (ip->ip_p != IPPROTO_TCP)
745                         return (ENXIO);
746                 ip->ip_sum = 0;
747                 ip_hlen = ip->ip_hl << 2;
748                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
749                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
750                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
751                 tcp_hlen = th->th_off << 2;
752                 break;
753 #endif
754         default:
755                 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
756                     __func__, ntohs(etype));
757                 return FALSE;
758         }
759
760         /* Ensure we have at least the IP+TCP header in the first mbuf. */
761         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
762                 return FALSE;
763
764         idx = txr->next_avail;
765         buf = &txr->buffers[idx];
766         TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
767         tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
768
769         type = I40E_TX_DESC_DTYPE_CONTEXT;
770         cmd = I40E_TX_CTX_DESC_TSO;
771         mss = mp->m_pkthdr.tso_segsz;
772
773         type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
774             ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
775             ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
776             ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
777         TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
778
779         TXD->tunneling_params = htole32(0);
780         buf->m_head = NULL;
781         buf->eop_index = -1;
782
783         if (++idx == que->num_desc)
784                 idx = 0;
785
786         txr->avail--;
787         txr->next_avail = idx;
788
789         return TRUE;
790 }
791
792 /*             
793 ** ixl_get_tx_head - Retrieve the value from the 
794 **    location the HW records its HEAD index
795 */
796 static inline u32
797 ixl_get_tx_head(struct ixl_queue *que)
798 {
799         struct tx_ring  *txr = &que->txr;
800         void *head = &txr->base[que->num_desc];
801         return LE32_TO_CPU(*(volatile __le32 *)head);
802 }
803
804 /**********************************************************************
805  *
806  *  Examine each tx_buffer in the used queue. If the hardware is done
807  *  processing the packet then free associated resources. The
808  *  tx_buffer is put back on the free queue.
809  *
810  **********************************************************************/
811 bool
812 ixl_txeof(struct ixl_queue *que)
813 {
814         struct tx_ring          *txr = &que->txr;
815         u32                     first, last, head, done, processed;
816         struct ixl_tx_buf       *buf;
817         struct i40e_tx_desc     *tx_desc, *eop_desc;
818
819
820         mtx_assert(&txr->mtx, MA_OWNED);
821
822
823         /* These are not the descriptors you seek, move along :) */
824         if (txr->avail == que->num_desc) {
825                 que->busy = 0;
826                 return FALSE;
827         }
828
829         processed = 0;
830         first = txr->next_to_clean;
831         buf = &txr->buffers[first];
832         tx_desc = (struct i40e_tx_desc *)&txr->base[first];
833         last = buf->eop_index;
834         if (last == -1)
835                 return FALSE;
836         eop_desc = (struct i40e_tx_desc *)&txr->base[last];
837
838         /* Get the Head WB value */
839         head = ixl_get_tx_head(que);
840
841         /*
842         ** Get the index of the first descriptor
843         ** BEYOND the EOP and call that 'done'.
844         ** I do this so the comparison in the
845         ** inner while loop below can be simple
846         */
847         if (++last == que->num_desc) last = 0;
848         done = last;
849
850         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
851             BUS_DMASYNC_POSTREAD);
852         /*
853         ** The HEAD index of the ring is written in a 
854         ** defined location, this rather than a done bit
855         ** is what is used to keep track of what must be
856         ** 'cleaned'.
857         */
858         while (first != head) {
859                 /* We clean the range of the packet */
860                 while (first != done) {
861                         ++txr->avail;
862                         ++processed;
863
864                         if (buf->m_head) {
865                                 txr->bytes += /* for ITR adjustment */
866                                     buf->m_head->m_pkthdr.len;
867                                 txr->tx_bytes += /* for TX stats */
868                                     buf->m_head->m_pkthdr.len;
869                                 bus_dmamap_sync(buf->tag,
870                                     buf->map,
871                                     BUS_DMASYNC_POSTWRITE);
872                                 bus_dmamap_unload(buf->tag,
873                                     buf->map);
874                                 m_freem(buf->m_head);
875                                 buf->m_head = NULL;
876                                 buf->map = NULL;
877                         }
878                         buf->eop_index = -1;
879
880                         if (++first == que->num_desc)
881                                 first = 0;
882
883                         buf = &txr->buffers[first];
884                         tx_desc = &txr->base[first];
885                 }
886                 ++txr->packets;
887                 /* See if there is more work now */
888                 last = buf->eop_index;
889                 if (last != -1) {
890                         eop_desc = &txr->base[last];
891                         /* Get next done point */
892                         if (++last == que->num_desc) last = 0;
893                         done = last;
894                 } else
895                         break;
896         }
897         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
898             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
899
900         txr->next_to_clean = first;
901
902
903         /*
904         ** Hang detection, we know there's
905         ** work outstanding or the first return
906         ** would have been taken, so indicate an
907         ** unsuccessful pass, in local_timer if
908         ** the value is too great the queue will
909         ** be considered hung. If anything has been
910         ** cleaned then reset the state.
911         */
912         if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
913                 ++que->busy;
914
915         if (processed)
916                 que->busy = 1; /* Note this turns off HUNG */
917
918         /*
919          * If there are no pending descriptors, clear the timeout.
920          */
921         if (txr->avail == que->num_desc) {
922                 que->busy = 0;
923                 return FALSE;
924         }
925
926         return TRUE;
927 }
928
929 /*********************************************************************
930  *
931  *  Refresh mbuf buffers for RX descriptor rings
932  *   - now keeps its own state so discards due to resource
933  *     exhaustion are unnecessary, if an mbuf cannot be obtained
934  *     it just returns, keeping its placeholder, thus it can simply
935  *     be recalled to try again.
936  *
937  **********************************************************************/
938 static void
939 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
940 {
941         struct ixl_vsi          *vsi = que->vsi;
942         struct rx_ring          *rxr = &que->rxr;
943         bus_dma_segment_t       hseg[1];
944         bus_dma_segment_t       pseg[1];
945         struct ixl_rx_buf       *buf;
946         struct mbuf             *mh, *mp;
947         int                     i, j, nsegs, error;
948         bool                    refreshed = FALSE;
949
950         i = j = rxr->next_refresh;
951         /* Control the loop with one beyond */
952         if (++j == que->num_desc)
953                 j = 0;
954
955         while (j != limit) {
956                 buf = &rxr->buffers[i];
957                 if (rxr->hdr_split == FALSE)
958                         goto no_split;
959
960                 if (buf->m_head == NULL) {
961                         mh = m_gethdr(M_NOWAIT, MT_DATA);
962                         if (mh == NULL)
963                                 goto update;
964                 } else
965                         mh = buf->m_head;
966
967                 mh->m_pkthdr.len = mh->m_len = MHLEN;
968                 mh->m_len = MHLEN;
969                 mh->m_flags |= M_PKTHDR;
970                 /* Get the memory mapping */
971                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
972                     buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
973                 if (error != 0) {
974                         printf("Refresh mbufs: hdr dmamap load"
975                             " failure - %d\n", error);
976                         m_free(mh);
977                         buf->m_head = NULL;
978                         goto update;
979                 }
980                 buf->m_head = mh;
981                 bus_dmamap_sync(rxr->htag, buf->hmap,
982                     BUS_DMASYNC_PREREAD);
983                 rxr->base[i].read.hdr_addr =
984                    htole64(hseg[0].ds_addr);
985
986 no_split:
987                 if (buf->m_pack == NULL) {
988                         mp = m_getjcl(M_NOWAIT, MT_DATA,
989                             M_PKTHDR, rxr->mbuf_sz);
990                         if (mp == NULL)
991                                 goto update;
992                 } else
993                         mp = buf->m_pack;
994
995                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
996                 /* Get the memory mapping */
997                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
998                     buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
999                 if (error != 0) {
1000                         printf("Refresh mbufs: payload dmamap load"
1001                             " failure - %d\n", error);
1002                         m_free(mp);
1003                         buf->m_pack = NULL;
1004                         goto update;
1005                 }
1006                 buf->m_pack = mp;
1007                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1008                     BUS_DMASYNC_PREREAD);
1009                 rxr->base[i].read.pkt_addr =
1010                    htole64(pseg[0].ds_addr);
1011                 /* Used only when doing header split */
1012                 rxr->base[i].read.hdr_addr = 0;
1013
1014                 refreshed = TRUE;
1015                 /* Next is precalculated */
1016                 i = j;
1017                 rxr->next_refresh = i;
1018                 if (++j == que->num_desc)
1019                         j = 0;
1020         }
1021 update:
1022         if (refreshed) /* Update hardware tail index */
1023                 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1024         return;
1025 }
1026
1027
1028 /*********************************************************************
1029  *
1030  *  Allocate memory for rx_buffer structures. Since we use one
1031  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1032  *  that we'll need is equal to the number of receive descriptors
1033  *  that we've defined.
1034  *
1035  **********************************************************************/
1036 int
1037 ixl_allocate_rx_data(struct ixl_queue *que)
1038 {
1039         struct rx_ring          *rxr = &que->rxr;
1040         struct ixl_vsi          *vsi = que->vsi;
1041         device_t                dev = vsi->dev;
1042         struct ixl_rx_buf       *buf;
1043         int                     i, bsize, error;
1044
1045         bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1046         if (!(rxr->buffers =
1047             (struct ixl_rx_buf *) malloc(bsize,
1048             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1049                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1050                 error = ENOMEM;
1051                 return (error);
1052         }
1053
1054         if ((error = bus_dma_tag_create(NULL,   /* parent */
1055                                    1, 0,        /* alignment, bounds */
1056                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1057                                    BUS_SPACE_MAXADDR,   /* highaddr */
1058                                    NULL, NULL,          /* filter, filterarg */
1059                                    MSIZE,               /* maxsize */
1060                                    1,                   /* nsegments */
1061                                    MSIZE,               /* maxsegsize */
1062                                    0,                   /* flags */
1063                                    NULL,                /* lockfunc */
1064                                    NULL,                /* lockfuncarg */
1065                                    &rxr->htag))) {
1066                 device_printf(dev, "Unable to create RX DMA htag\n");
1067                 return (error);
1068         }
1069
1070         if ((error = bus_dma_tag_create(NULL,   /* parent */
1071                                    1, 0,        /* alignment, bounds */
1072                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1073                                    BUS_SPACE_MAXADDR,   /* highaddr */
1074                                    NULL, NULL,          /* filter, filterarg */
1075                                    MJUM16BYTES,         /* maxsize */
1076                                    1,                   /* nsegments */
1077                                    MJUM16BYTES,         /* maxsegsize */
1078                                    0,                   /* flags */
1079                                    NULL,                /* lockfunc */
1080                                    NULL,                /* lockfuncarg */
1081                                    &rxr->ptag))) {
1082                 device_printf(dev, "Unable to create RX DMA ptag\n");
1083                 return (error);
1084         }
1085
1086         for (i = 0; i < que->num_desc; i++) {
1087                 buf = &rxr->buffers[i];
1088                 error = bus_dmamap_create(rxr->htag,
1089                     BUS_DMA_NOWAIT, &buf->hmap);
1090                 if (error) {
1091                         device_printf(dev, "Unable to create RX head map\n");
1092                         break;
1093                 }
1094                 error = bus_dmamap_create(rxr->ptag,
1095                     BUS_DMA_NOWAIT, &buf->pmap);
1096                 if (error) {
1097                         device_printf(dev, "Unable to create RX pkt map\n");
1098                         break;
1099                 }
1100         }
1101
1102         return (error);
1103 }
1104
1105
1106 /*********************************************************************
1107  *
1108  *  (Re)Initialize the queue receive ring and its buffers.
1109  *
1110  **********************************************************************/
1111 int
1112 ixl_init_rx_ring(struct ixl_queue *que)
1113 {
1114         struct  rx_ring         *rxr = &que->rxr;
1115         struct ixl_vsi          *vsi = que->vsi;
1116 #if defined(INET6) || defined(INET)
1117         struct ifnet            *ifp = vsi->ifp;
1118         struct lro_ctrl         *lro = &rxr->lro;
1119 #endif
1120         struct ixl_rx_buf       *buf;
1121         bus_dma_segment_t       pseg[1], hseg[1];
1122         int                     rsize, nsegs, error = 0;
1123
1124         IXL_RX_LOCK(rxr);
1125         /* Clear the ring contents */
1126         rsize = roundup2(que->num_desc *
1127             sizeof(union i40e_rx_desc), DBA_ALIGN);
1128         bzero((void *)rxr->base, rsize);
1129         /* Cleanup any existing buffers */
1130         for (int i = 0; i < que->num_desc; i++) {
1131                 buf = &rxr->buffers[i];
1132                 if (buf->m_head != NULL) {
1133                         bus_dmamap_sync(rxr->htag, buf->hmap,
1134                             BUS_DMASYNC_POSTREAD);
1135                         bus_dmamap_unload(rxr->htag, buf->hmap);
1136                         buf->m_head->m_flags |= M_PKTHDR;
1137                         m_freem(buf->m_head);
1138                 }
1139                 if (buf->m_pack != NULL) {
1140                         bus_dmamap_sync(rxr->ptag, buf->pmap,
1141                             BUS_DMASYNC_POSTREAD);
1142                         bus_dmamap_unload(rxr->ptag, buf->pmap);
1143                         buf->m_pack->m_flags |= M_PKTHDR;
1144                         m_freem(buf->m_pack);
1145                 }
1146                 buf->m_head = NULL;
1147                 buf->m_pack = NULL;
1148         }
1149
1150         /* header split is off */
1151         rxr->hdr_split = FALSE;
1152
1153         /* Now replenish the mbufs */
1154         for (int j = 0; j != que->num_desc; ++j) {
1155                 struct mbuf     *mh, *mp;
1156
1157                 buf = &rxr->buffers[j];
1158                 /*
1159                 ** Don't allocate mbufs if not
1160                 ** doing header split, its wasteful
1161                 */ 
1162                 if (rxr->hdr_split == FALSE)
1163                         goto skip_head;
1164
1165                 /* First the header */
1166                 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1167                 if (buf->m_head == NULL) {
1168                         error = ENOBUFS;
1169                         goto fail;
1170                 }
1171                 m_adj(buf->m_head, ETHER_ALIGN);
1172                 mh = buf->m_head;
1173                 mh->m_len = mh->m_pkthdr.len = MHLEN;
1174                 mh->m_flags |= M_PKTHDR;
1175                 /* Get the memory mapping */
1176                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1177                     buf->hmap, buf->m_head, hseg,
1178                     &nsegs, BUS_DMA_NOWAIT);
1179                 if (error != 0) /* Nothing elegant to do here */
1180                         goto fail;
1181                 bus_dmamap_sync(rxr->htag,
1182                     buf->hmap, BUS_DMASYNC_PREREAD);
1183                 /* Update descriptor */
1184                 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1185
1186 skip_head:
1187                 /* Now the payload cluster */
1188                 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1189                     M_PKTHDR, rxr->mbuf_sz);
1190                 if (buf->m_pack == NULL) {
1191                         error = ENOBUFS;
1192                         goto fail;
1193                 }
1194                 mp = buf->m_pack;
1195                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1196                 /* Get the memory mapping */
1197                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1198                     buf->pmap, mp, pseg,
1199                     &nsegs, BUS_DMA_NOWAIT);
1200                 if (error != 0)
1201                         goto fail;
1202                 bus_dmamap_sync(rxr->ptag,
1203                     buf->pmap, BUS_DMASYNC_PREREAD);
1204                 /* Update descriptor */
1205                 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1206                 rxr->base[j].read.hdr_addr = 0;
1207         }
1208
1209
1210         /* Setup our descriptor indices */
1211         rxr->next_check = 0;
1212         rxr->next_refresh = 0;
1213         rxr->lro_enabled = FALSE;
1214         rxr->split = 0;
1215         rxr->bytes = 0;
1216         rxr->discard = FALSE;
1217
1218         wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1219         ixl_flush(vsi->hw);
1220
1221 #if defined(INET6) || defined(INET)
1222         /*
1223         ** Now set up the LRO interface:
1224         */
1225         if (ifp->if_capenable & IFCAP_LRO) {
1226                 int err = tcp_lro_init(lro);
1227                 if (err) {
1228                         if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1229                         goto fail;
1230                 }
1231                 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1232                 rxr->lro_enabled = TRUE;
1233                 lro->ifp = vsi->ifp;
1234         }
1235 #endif
1236
1237         bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1238             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1239
1240 fail:
1241         IXL_RX_UNLOCK(rxr);
1242         return (error);
1243 }
1244
1245
1246 /*********************************************************************
1247  *
1248  *  Free station receive ring data structures
1249  *
1250  **********************************************************************/
1251 void
1252 ixl_free_que_rx(struct ixl_queue *que)
1253 {
1254         struct rx_ring          *rxr = &que->rxr;
1255         struct ixl_rx_buf       *buf;
1256
1257         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1258
1259         /* Cleanup any existing buffers */
1260         if (rxr->buffers != NULL) {
1261                 for (int i = 0; i < que->num_desc; i++) {
1262                         buf = &rxr->buffers[i];
1263                         if (buf->m_head != NULL) {
1264                                 bus_dmamap_sync(rxr->htag, buf->hmap,
1265                                     BUS_DMASYNC_POSTREAD);
1266                                 bus_dmamap_unload(rxr->htag, buf->hmap);
1267                                 buf->m_head->m_flags |= M_PKTHDR;
1268                                 m_freem(buf->m_head);
1269                         }
1270                         if (buf->m_pack != NULL) {
1271                                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1272                                     BUS_DMASYNC_POSTREAD);
1273                                 bus_dmamap_unload(rxr->ptag, buf->pmap);
1274                                 buf->m_pack->m_flags |= M_PKTHDR;
1275                                 m_freem(buf->m_pack);
1276                         }
1277                         buf->m_head = NULL;
1278                         buf->m_pack = NULL;
1279                         if (buf->hmap != NULL) {
1280                                 bus_dmamap_destroy(rxr->htag, buf->hmap);
1281                                 buf->hmap = NULL;
1282                         }
1283                         if (buf->pmap != NULL) {
1284                                 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1285                                 buf->pmap = NULL;
1286                         }
1287                 }
1288                 if (rxr->buffers != NULL) {
1289                         free(rxr->buffers, M_DEVBUF);
1290                         rxr->buffers = NULL;
1291                 }
1292         }
1293
1294         if (rxr->htag != NULL) {
1295                 bus_dma_tag_destroy(rxr->htag);
1296                 rxr->htag = NULL;
1297         }
1298         if (rxr->ptag != NULL) {
1299                 bus_dma_tag_destroy(rxr->ptag);
1300                 rxr->ptag = NULL;
1301         }
1302
1303         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1304         return;
1305 }
1306
1307 static __inline void
1308 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1309 {
1310
1311 #if defined(INET6) || defined(INET)
1312         /*
1313          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1314          * should be computed by hardware. Also it should not have VLAN tag in
1315          * ethernet header.
1316          */
1317         if (rxr->lro_enabled &&
1318             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1319             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1320             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1321                 /*
1322                  * Send to the stack if:
1323                  **  - LRO not enabled, or
1324                  **  - no LRO resources, or
1325                  **  - lro enqueue fails
1326                  */
1327                 if (rxr->lro.lro_cnt != 0)
1328                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1329                                 return;
1330         }
1331 #endif
1332         IXL_RX_UNLOCK(rxr);
1333         (*ifp->if_input)(ifp, m);
1334         IXL_RX_LOCK(rxr);
1335 }
1336
1337
1338 static __inline void
1339 ixl_rx_discard(struct rx_ring *rxr, int i)
1340 {
1341         struct ixl_rx_buf       *rbuf;
1342
1343         rbuf = &rxr->buffers[i];
1344
1345         if (rbuf->fmp != NULL) {/* Partial chain ? */
1346                 rbuf->fmp->m_flags |= M_PKTHDR;
1347                 m_freem(rbuf->fmp);
1348                 rbuf->fmp = NULL;
1349         }
1350
1351         /*
1352         ** With advanced descriptors the writeback
1353         ** clobbers the buffer addrs, so its easier
1354         ** to just free the existing mbufs and take
1355         ** the normal refresh path to get new buffers
1356         ** and mapping.
1357         */
1358         if (rbuf->m_head) {
1359                 m_free(rbuf->m_head);
1360                 rbuf->m_head = NULL;
1361         }
1362  
1363         if (rbuf->m_pack) {
1364                 m_free(rbuf->m_pack);
1365                 rbuf->m_pack = NULL;
1366         }
1367
1368         return;
1369 }
1370
1371 #ifdef RSS
1372 /*
1373 ** ixl_ptype_to_hash: parse the packet type
1374 ** to determine the appropriate hash.
1375 */
1376 static inline int
1377 ixl_ptype_to_hash(u8 ptype)
1378 {
1379         struct i40e_rx_ptype_decoded    decoded;
1380         u8                              ex = 0;
1381
1382         decoded = decode_rx_desc_ptype(ptype);
1383         ex = decoded.outer_frag;
1384
1385         if (!decoded.known)
1386                 return M_HASHTYPE_OPAQUE;
1387
1388         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
1389                 return M_HASHTYPE_OPAQUE;
1390
1391         /* Note: anything that gets to this point is IP */
1392         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
1393                 switch (decoded.inner_prot) {
1394                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1395                                 if (ex)
1396                                         return M_HASHTYPE_RSS_TCP_IPV6_EX;
1397                                 else
1398                                         return M_HASHTYPE_RSS_TCP_IPV6;
1399                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1400                                 if (ex)
1401                                         return M_HASHTYPE_RSS_UDP_IPV6_EX;
1402                                 else
1403                                         return M_HASHTYPE_RSS_UDP_IPV6;
1404                         default:
1405                                 if (ex)
1406                                         return M_HASHTYPE_RSS_IPV6_EX;
1407                                 else
1408                                         return M_HASHTYPE_RSS_IPV6;
1409                 }
1410         }
1411         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
1412                 switch (decoded.inner_prot) {
1413                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1414                                         return M_HASHTYPE_RSS_TCP_IPV4;
1415                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1416                                 if (ex)
1417                                         return M_HASHTYPE_RSS_UDP_IPV4_EX;
1418                                 else
1419                                         return M_HASHTYPE_RSS_UDP_IPV4;
1420                         default:
1421                                         return M_HASHTYPE_RSS_IPV4;
1422                 }
1423         }
1424         /* We should never get here!! */
1425         return M_HASHTYPE_OPAQUE;
1426 }
1427 #endif /* RSS */
1428
1429 /*********************************************************************
1430  *
1431  *  This routine executes in interrupt context. It replenishes
1432  *  the mbufs in the descriptor and sends data which has been
1433  *  dma'ed into host memory to upper layer.
1434  *
1435  *  We loop at most count times if count is > 0, or until done if
1436  *  count < 0.
1437  *
1438  *  Return TRUE for more work, FALSE for all clean.
1439  *********************************************************************/
1440 bool
1441 ixl_rxeof(struct ixl_queue *que, int count)
1442 {
1443         struct ixl_vsi          *vsi = que->vsi;
1444         struct rx_ring          *rxr = &que->rxr;
1445         struct ifnet            *ifp = vsi->ifp;
1446 #if defined(INET6) || defined(INET)
1447         struct lro_ctrl         *lro = &rxr->lro;
1448         struct lro_entry        *queued;
1449 #endif
1450         int                     i, nextp, processed = 0;
1451         union i40e_rx_desc      *cur;
1452         struct ixl_rx_buf       *rbuf, *nbuf;
1453
1454
1455         IXL_RX_LOCK(rxr);
1456
1457
1458         for (i = rxr->next_check; count != 0;) {
1459                 struct mbuf     *sendmp, *mh, *mp;
1460                 u32             rsc, status, error;
1461                 u16             hlen, plen, vtag;
1462                 u64             qword;
1463                 u8              ptype;
1464                 bool            eop;
1465  
1466                 /* Sync the ring. */
1467                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1468                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1469
1470                 cur = &rxr->base[i];
1471                 qword = le64toh(cur->wb.qword1.status_error_len);
1472                 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1473                     >> I40E_RXD_QW1_STATUS_SHIFT;
1474                 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1475                     >> I40E_RXD_QW1_ERROR_SHIFT;
1476                 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1477                     >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1478                 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1479                     >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1480                 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1481                     >> I40E_RXD_QW1_PTYPE_SHIFT;
1482
1483                 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1484                         ++rxr->not_done;
1485                         break;
1486                 }
1487                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1488                         break;
1489
1490                 count--;
1491                 sendmp = NULL;
1492                 nbuf = NULL;
1493                 rsc = 0;
1494                 cur->wb.qword1.status_error_len = 0;
1495                 rbuf = &rxr->buffers[i];
1496                 mh = rbuf->m_head;
1497                 mp = rbuf->m_pack;
1498                 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1499                 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1500                         vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1501                 else
1502                         vtag = 0;
1503
1504                 /*
1505                 ** Make sure bad packets are discarded,
1506                 ** note that only EOP descriptor has valid
1507                 ** error results.
1508                 */
1509                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1510                         rxr->discarded++;
1511                         ixl_rx_discard(rxr, i);
1512                         goto next_desc;
1513                 }
1514
1515                 /* Prefetch the next buffer */
1516                 if (!eop) {
1517                         nextp = i + 1;
1518                         if (nextp == que->num_desc)
1519                                 nextp = 0;
1520                         nbuf = &rxr->buffers[nextp];
1521                         prefetch(nbuf);
1522                 }
1523
1524                 /*
1525                 ** The header mbuf is ONLY used when header 
1526                 ** split is enabled, otherwise we get normal 
1527                 ** behavior, ie, both header and payload
1528                 ** are DMA'd into the payload buffer.
1529                 **
1530                 ** Rather than using the fmp/lmp global pointers
1531                 ** we now keep the head of a packet chain in the
1532                 ** buffer struct and pass this along from one
1533                 ** descriptor to the next, until we get EOP.
1534                 */
1535                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1536                         if (hlen > IXL_RX_HDR)
1537                                 hlen = IXL_RX_HDR;
1538                         mh->m_len = hlen;
1539                         mh->m_flags |= M_PKTHDR;
1540                         mh->m_next = NULL;
1541                         mh->m_pkthdr.len = mh->m_len;
1542                         /* Null buf pointer so it is refreshed */
1543                         rbuf->m_head = NULL;
1544                         /*
1545                         ** Check the payload length, this
1546                         ** could be zero if its a small
1547                         ** packet.
1548                         */
1549                         if (plen > 0) {
1550                                 mp->m_len = plen;
1551                                 mp->m_next = NULL;
1552                                 mp->m_flags &= ~M_PKTHDR;
1553                                 mh->m_next = mp;
1554                                 mh->m_pkthdr.len += mp->m_len;
1555                                 /* Null buf pointer so it is refreshed */
1556                                 rbuf->m_pack = NULL;
1557                                 rxr->split++;
1558                         }
1559                         /*
1560                         ** Now create the forward
1561                         ** chain so when complete 
1562                         ** we wont have to.
1563                         */
1564                         if (eop == 0) {
1565                                 /* stash the chain head */
1566                                 nbuf->fmp = mh;
1567                                 /* Make forward chain */
1568                                 if (plen)
1569                                         mp->m_next = nbuf->m_pack;
1570                                 else
1571                                         mh->m_next = nbuf->m_pack;
1572                         } else {
1573                                 /* Singlet, prepare to send */
1574                                 sendmp = mh;
1575                                 if (vtag) {
1576                                         sendmp->m_pkthdr.ether_vtag = vtag;
1577                                         sendmp->m_flags |= M_VLANTAG;
1578                                 }
1579                         }
1580                 } else {
1581                         /*
1582                         ** Either no header split, or a
1583                         ** secondary piece of a fragmented
1584                         ** split packet.
1585                         */
1586                         mp->m_len = plen;
1587                         /*
1588                         ** See if there is a stored head
1589                         ** that determines what we are
1590                         */
1591                         sendmp = rbuf->fmp;
1592                         rbuf->m_pack = rbuf->fmp = NULL;
1593
1594                         if (sendmp != NULL) /* secondary frag */
1595                                 sendmp->m_pkthdr.len += mp->m_len;
1596                         else {
1597                                 /* first desc of a non-ps chain */
1598                                 sendmp = mp;
1599                                 sendmp->m_flags |= M_PKTHDR;
1600                                 sendmp->m_pkthdr.len = mp->m_len;
1601                                 if (vtag) {
1602                                         sendmp->m_pkthdr.ether_vtag = vtag;
1603                                         sendmp->m_flags |= M_VLANTAG;
1604                                 }
1605                         }
1606                         /* Pass the head pointer on */
1607                         if (eop == 0) {
1608                                 nbuf->fmp = sendmp;
1609                                 sendmp = NULL;
1610                                 mp->m_next = nbuf->m_pack;
1611                         }
1612                 }
1613                 ++processed;
1614                 /* Sending this frame? */
1615                 if (eop) {
1616                         sendmp->m_pkthdr.rcvif = ifp;
1617                         /* gather stats */
1618                         rxr->rx_packets++;
1619                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1620                         /* capture data for dynamic ITR adjustment */
1621                         rxr->packets++;
1622                         rxr->bytes += sendmp->m_pkthdr.len;
1623                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1624                                 ixl_rx_checksum(sendmp, status, error, ptype);
1625 #ifdef RSS
1626                         sendmp->m_pkthdr.flowid =
1627                             le32toh(cur->wb.qword0.hi_dword.rss);
1628                         M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1629 #else
1630                         sendmp->m_pkthdr.flowid = que->msix;
1631                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1632 #endif
1633                 }
1634 next_desc:
1635                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1636                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1637
1638                 /* Advance our pointers to the next descriptor. */
1639                 if (++i == que->num_desc)
1640                         i = 0;
1641
1642                 /* Now send to the stack or do LRO */
1643                 if (sendmp != NULL) {
1644                         rxr->next_check = i;
1645                         ixl_rx_input(rxr, ifp, sendmp, ptype);
1646                         i = rxr->next_check;
1647                 }
1648
1649                /* Every 8 descriptors we go to refresh mbufs */
1650                 if (processed == 8) {
1651                         ixl_refresh_mbufs(que, i);
1652                         processed = 0;
1653                 }
1654         }
1655
1656         /* Refresh any remaining buf structs */
1657         if (ixl_rx_unrefreshed(que))
1658                 ixl_refresh_mbufs(que, i);
1659
1660         rxr->next_check = i;
1661
1662 #if defined(INET6) || defined(INET)
1663         /*
1664          * Flush any outstanding LRO work
1665          */
1666         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1667                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1668                 tcp_lro_flush(lro, queued);
1669         }
1670 #endif
1671
1672         IXL_RX_UNLOCK(rxr);
1673         return (FALSE);
1674 }
1675
1676
1677 /*********************************************************************
1678  *
1679  *  Verify that the hardware indicated that the checksum is valid.
1680  *  Inform the stack about the status of checksum so that stack
1681  *  doesn't spend time verifying the checksum.
1682  *
1683  *********************************************************************/
1684 static void
1685 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1686 {
1687         struct i40e_rx_ptype_decoded decoded;
1688
1689         decoded = decode_rx_desc_ptype(ptype);
1690
1691         /* Errors? */
1692         if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1693             (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1694                 mp->m_pkthdr.csum_flags = 0;
1695                 return;
1696         }
1697
1698         /* IPv6 with extension headers likely have bad csum */
1699         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1700             decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1701                 if (status &
1702                     (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1703                         mp->m_pkthdr.csum_flags = 0;
1704                         return;
1705                 }
1706
1707  
1708         /* IP Checksum Good */
1709         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1710         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1711
1712         if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1713                 mp->m_pkthdr.csum_flags |= 
1714                     (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1715                 mp->m_pkthdr.csum_data |= htons(0xffff);
1716         }
1717         return;
1718 }
1719
1720 #if __FreeBSD_version >= 1100000
1721 uint64_t
1722 ixl_get_counter(if_t ifp, ift_counter cnt)
1723 {
1724         struct ixl_vsi *vsi;
1725
1726         vsi = if_getsoftc(ifp);
1727
1728         switch (cnt) {
1729         case IFCOUNTER_IPACKETS:
1730                 return (vsi->ipackets);
1731         case IFCOUNTER_IERRORS:
1732                 return (vsi->ierrors);
1733         case IFCOUNTER_OPACKETS:
1734                 return (vsi->opackets);
1735         case IFCOUNTER_OERRORS:
1736                 return (vsi->oerrors);
1737         case IFCOUNTER_COLLISIONS:
1738                 /* Collisions are by standard impossible in 40G/10G Ethernet */
1739                 return (0);
1740         case IFCOUNTER_IBYTES:
1741                 return (vsi->ibytes);
1742         case IFCOUNTER_OBYTES:
1743                 return (vsi->obytes);
1744         case IFCOUNTER_IMCASTS:
1745                 return (vsi->imcasts);
1746         case IFCOUNTER_OMCASTS:
1747                 return (vsi->omcasts);
1748         case IFCOUNTER_IQDROPS:
1749                 return (vsi->iqdrops);
1750         case IFCOUNTER_OQDROPS:
1751                 return (vsi->oqdrops);
1752         case IFCOUNTER_NOPROTO:
1753                 return (vsi->noproto);
1754         default:
1755                 return (if_get_counter_default(ifp, cnt));
1756         }
1757 }
1758 #endif
1759