]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixl/ixl_txrx.c
Change uses of taskqueue_start_threads_pinned() -> taskqueue_start_threads_cpuset()
[FreeBSD/FreeBSD.git] / sys / dev / ixl / ixl_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2013-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 **      IXL driver TX/RX Routines:
37 **          This was seperated to allow usage by
38 **          both the BASE and the VF drivers.
39 */
40
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_rss.h"
45 #endif
46
47 #include "ixl.h"
48
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52
53 /* Local Prototypes */
54 static void     ixl_rx_checksum(struct mbuf *, u32, u32, u8);
55 static void     ixl_refresh_mbufs(struct ixl_queue *, int);
56 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
57 static int      ixl_tx_setup_offload(struct ixl_queue *,
58                     struct mbuf *, u32 *, u32 *);
59 static bool     ixl_tso_setup(struct ixl_queue *, struct mbuf *);
60
61 static __inline void ixl_rx_discard(struct rx_ring *, int);
62 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
63                     struct mbuf *, u8);
64
65 #ifdef DEV_NETMAP
66 #include <dev/netmap/if_ixl_netmap.h>
67 #endif /* DEV_NETMAP */
68
69 /*
70 ** Multiqueue Transmit driver
71 **
72 */
73 int
74 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
75 {
76         struct ixl_vsi          *vsi = ifp->if_softc;
77         struct ixl_queue        *que;
78         struct tx_ring          *txr;
79         int                     err, i;
80 #ifdef RSS
81         u32                     bucket_id;
82 #endif
83
84         /*
85         ** Which queue to use:
86         **
87         ** When doing RSS, map it to the same outbound
88         ** queue as the incoming flow would be mapped to.
89         ** If everything is setup correctly, it should be
90         ** the same bucket that the current CPU we're on is.
91         */
92         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
93 #ifdef  RSS
94                 if (rss_hash2bucket(m->m_pkthdr.flowid,
95                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
96                         i = bucket_id % vsi->num_queues;
97                 } else
98 #endif
99                         i = m->m_pkthdr.flowid % vsi->num_queues;
100         } else
101                 i = curcpu % vsi->num_queues;
102         /*
103         ** This may not be perfect, but until something
104         ** better comes along it will keep from scheduling
105         ** on stalled queues.
106         */
107         if (((1 << i) & vsi->active_queues) == 0)
108                 i = ffsl(vsi->active_queues);
109
110         que = &vsi->queues[i];
111         txr = &que->txr;
112
113         err = drbr_enqueue(ifp, txr->br, m);
114         if (err)
115                 return(err);
116         if (IXL_TX_TRYLOCK(txr)) {
117                 ixl_mq_start_locked(ifp, txr);
118                 IXL_TX_UNLOCK(txr);
119         } else
120                 taskqueue_enqueue(que->tq, &que->tx_task);
121
122         return (0);
123 }
124
125 int
126 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
127 {
128         struct ixl_queue        *que = txr->que;
129         struct ixl_vsi          *vsi = que->vsi;
130         struct mbuf             *next;
131         int                     err = 0;
132
133
134         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
135             vsi->link_active == 0)
136                 return (ENETDOWN);
137
138         /* Process the transmit queue */
139         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
140                 if ((err = ixl_xmit(que, &next)) != 0) {
141                         if (next == NULL)
142                                 drbr_advance(ifp, txr->br);
143                         else
144                                 drbr_putback(ifp, txr->br, next);
145                         break;
146                 }
147                 drbr_advance(ifp, txr->br);
148                 /* Send a copy of the frame to the BPF listener */
149                 ETHER_BPF_MTAP(ifp, next);
150                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
151                         break;
152         }
153
154         if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
155                 ixl_txeof(que);
156
157         return (err);
158 }
159
160 /*
161  * Called from a taskqueue to drain queued transmit packets.
162  */
163 void
164 ixl_deferred_mq_start(void *arg, int pending)
165 {
166         struct ixl_queue        *que = arg;
167         struct tx_ring          *txr = &que->txr;
168         struct ixl_vsi          *vsi = que->vsi;
169         struct ifnet            *ifp = vsi->ifp;
170         
171         IXL_TX_LOCK(txr);
172         if (!drbr_empty(ifp, txr->br))
173                 ixl_mq_start_locked(ifp, txr);
174         IXL_TX_UNLOCK(txr);
175 }
176
177 /*
178 ** Flush all queue ring buffers
179 */
180 void
181 ixl_qflush(struct ifnet *ifp)
182 {
183         struct ixl_vsi  *vsi = ifp->if_softc;
184
185         for (int i = 0; i < vsi->num_queues; i++) {
186                 struct ixl_queue *que = &vsi->queues[i];
187                 struct tx_ring  *txr = &que->txr;
188                 struct mbuf     *m;
189                 IXL_TX_LOCK(txr);
190                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
191                         m_freem(m);
192                 IXL_TX_UNLOCK(txr);
193         }
194         if_qflush(ifp);
195 }
196
197 /*
198 ** Find mbuf chains passed to the driver 
199 ** that are 'sparse', using more than 8
200 ** mbufs to deliver an mss-size chunk of data
201 */
202 static inline bool
203 ixl_tso_detect_sparse(struct mbuf *mp)
204 {
205         struct mbuf     *m;
206         int             num = 0, mss;
207         bool            ret = FALSE;
208
209         mss = mp->m_pkthdr.tso_segsz;
210         for (m = mp->m_next; m != NULL; m = m->m_next) {
211                 num++;
212                 mss -= m->m_len;
213                 if (mss < 1)
214                         break;
215                 if (m->m_next == NULL)
216                         break;
217         }
218         if (num > IXL_SPARSE_CHAIN)
219                 ret = TRUE;
220
221         return (ret);
222 }
223
224
225 /*********************************************************************
226  *
227  *  This routine maps the mbufs to tx descriptors, allowing the
228  *  TX engine to transmit the packets. 
229  *      - return 0 on success, positive on failure
230  *
231  **********************************************************************/
232 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
233
234 static int
235 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
236 {
237         struct ixl_vsi          *vsi = que->vsi;
238         struct i40e_hw          *hw = vsi->hw;
239         struct tx_ring          *txr = &que->txr;
240         struct ixl_tx_buf       *buf;
241         struct i40e_tx_desc     *txd = NULL;
242         struct mbuf             *m_head, *m;
243         int                     i, j, error, nsegs, maxsegs;
244         int                     first, last = 0;
245         u16                     vtag = 0;
246         u32                     cmd, off;
247         bus_dmamap_t            map;
248         bus_dma_tag_t           tag;
249         bus_dma_segment_t       segs[IXL_MAX_TSO_SEGS];
250
251
252         cmd = off = 0;
253         m_head = *m_headp;
254
255         /*
256          * Important to capture the first descriptor
257          * used because it will contain the index of
258          * the one we tell the hardware to report back
259          */
260         first = txr->next_avail;
261         buf = &txr->buffers[first];
262         map = buf->map;
263         tag = txr->tx_tag;
264         maxsegs = IXL_MAX_TX_SEGS;
265
266         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
267                 /* Use larger mapping for TSO */
268                 tag = txr->tso_tag;
269                 maxsegs = IXL_MAX_TSO_SEGS;
270                 if (ixl_tso_detect_sparse(m_head)) {
271                         m = m_defrag(m_head, M_NOWAIT);
272                         if (m == NULL) {
273                                 m_freem(*m_headp);
274                                 *m_headp = NULL;
275                                 return (ENOBUFS);
276                         }
277                         *m_headp = m;
278                 }
279         }
280
281         /*
282          * Map the packet for DMA.
283          */
284         error = bus_dmamap_load_mbuf_sg(tag, map,
285             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
286
287         if (error == EFBIG) {
288                 struct mbuf *m;
289
290                 m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
291                 if (m == NULL) {
292                         que->mbuf_defrag_failed++;
293                         m_freem(*m_headp);
294                         *m_headp = NULL;
295                         return (ENOBUFS);
296                 }
297                 *m_headp = m;
298
299                 /* Try it again */
300                 error = bus_dmamap_load_mbuf_sg(tag, map,
301                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
302
303                 if (error == ENOMEM) {
304                         que->tx_dma_setup++;
305                         return (error);
306                 } else if (error != 0) {
307                         que->tx_dma_setup++;
308                         m_freem(*m_headp);
309                         *m_headp = NULL;
310                         return (error);
311                 }
312         } else if (error == ENOMEM) {
313                 que->tx_dma_setup++;
314                 return (error);
315         } else if (error != 0) {
316                 que->tx_dma_setup++;
317                 m_freem(*m_headp);
318                 *m_headp = NULL;
319                 return (error);
320         }
321
322         /* Make certain there are enough descriptors */
323         if (nsegs > txr->avail - 2) {
324                 txr->no_desc++;
325                 error = ENOBUFS;
326                 goto xmit_fail;
327         }
328         m_head = *m_headp;
329
330         /* Set up the TSO/CSUM offload */
331         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
332                 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
333                 if (error)
334                         goto xmit_fail;
335         }
336
337         cmd |= I40E_TX_DESC_CMD_ICRC;
338         /* Grab the VLAN tag */
339         if (m_head->m_flags & M_VLANTAG) {
340                 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
341                 vtag = htole16(m_head->m_pkthdr.ether_vtag);
342         }
343
344         i = txr->next_avail;
345         for (j = 0; j < nsegs; j++) {
346                 bus_size_t seglen;
347
348                 buf = &txr->buffers[i];
349                 buf->tag = tag; /* Keep track of the type tag */
350                 txd = &txr->base[i];
351                 seglen = segs[j].ds_len;
352
353                 txd->buffer_addr = htole64(segs[j].ds_addr);
354                 txd->cmd_type_offset_bsz =
355                     htole64(I40E_TX_DESC_DTYPE_DATA
356                     | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
357                     | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
358                     | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
359                     | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
360
361                 last = i; /* descriptor that will get completion IRQ */
362
363                 if (++i == que->num_desc)
364                         i = 0;
365
366                 buf->m_head = NULL;
367                 buf->eop_index = -1;
368         }
369         /* Set the last descriptor for report */
370         txd->cmd_type_offset_bsz |=
371             htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
372         txr->avail -= nsegs;
373         txr->next_avail = i;
374
375         buf->m_head = m_head;
376         /* Swap the dma map between the first and last descriptor */
377         txr->buffers[first].map = buf->map;
378         buf->map = map;
379         bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
380
381         /* Set the index of the descriptor that will be marked done */
382         buf = &txr->buffers[first];
383         buf->eop_index = last;
384
385         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
386             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
387         /*
388          * Advance the Transmit Descriptor Tail (Tdt), this tells the
389          * hardware that this frame is available to transmit.
390          */
391         ++txr->total_packets;
392         wr32(hw, txr->tail, i);
393
394         ixl_flush(hw);
395         /* Mark outstanding work */
396         if (que->busy == 0)
397                 que->busy = 1;
398         return (0);
399
400 xmit_fail:
401         bus_dmamap_unload(tag, buf->map);
402         return (error);
403 }
404
405
406 /*********************************************************************
407  *
408  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
409  *  the information needed to transmit a packet on the wire. This is
410  *  called only once at attach, setup is done every reset.
411  *
412  **********************************************************************/
413 int
414 ixl_allocate_tx_data(struct ixl_queue *que)
415 {
416         struct tx_ring          *txr = &que->txr;
417         struct ixl_vsi          *vsi = que->vsi;
418         device_t                dev = vsi->dev;
419         struct ixl_tx_buf       *buf;
420         int                     error = 0;
421
422         /*
423          * Setup DMA descriptor areas.
424          */
425         if ((error = bus_dma_tag_create(NULL,           /* parent */
426                                1, 0,                    /* alignment, bounds */
427                                BUS_SPACE_MAXADDR,       /* lowaddr */
428                                BUS_SPACE_MAXADDR,       /* highaddr */
429                                NULL, NULL,              /* filter, filterarg */
430                                IXL_TSO_SIZE,            /* maxsize */
431                                IXL_MAX_TX_SEGS,         /* nsegments */
432                                PAGE_SIZE,               /* maxsegsize */
433                                0,                       /* flags */
434                                NULL,                    /* lockfunc */
435                                NULL,                    /* lockfuncarg */
436                                &txr->tx_tag))) {
437                 device_printf(dev,"Unable to allocate TX DMA tag\n");
438                 goto fail;
439         }
440
441         /* Make a special tag for TSO */
442         if ((error = bus_dma_tag_create(NULL,           /* parent */
443                                1, 0,                    /* alignment, bounds */
444                                BUS_SPACE_MAXADDR,       /* lowaddr */
445                                BUS_SPACE_MAXADDR,       /* highaddr */
446                                NULL, NULL,              /* filter, filterarg */
447                                IXL_TSO_SIZE,            /* maxsize */
448                                IXL_MAX_TSO_SEGS,        /* nsegments */
449                                PAGE_SIZE,               /* maxsegsize */
450                                0,                       /* flags */
451                                NULL,                    /* lockfunc */
452                                NULL,                    /* lockfuncarg */
453                                &txr->tso_tag))) {
454                 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
455                 goto fail;
456         }
457
458         if (!(txr->buffers =
459             (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
460             que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
461                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
462                 error = ENOMEM;
463                 goto fail;
464         }
465
466         /* Create the descriptor buffer default dma maps */
467         buf = txr->buffers;
468         for (int i = 0; i < que->num_desc; i++, buf++) {
469                 buf->tag = txr->tx_tag;
470                 error = bus_dmamap_create(buf->tag, 0, &buf->map);
471                 if (error != 0) {
472                         device_printf(dev, "Unable to create TX DMA map\n");
473                         goto fail;
474                 }
475         }
476 fail:
477         return (error);
478 }
479
480
481 /*********************************************************************
482  *
483  *  (Re)Initialize a queue transmit ring.
484  *      - called by init, it clears the descriptor ring,
485  *        and frees any stale mbufs 
486  *
487  **********************************************************************/
488 void
489 ixl_init_tx_ring(struct ixl_queue *que)
490 {
491         struct tx_ring *txr = &que->txr;
492         struct ixl_tx_buf *buf;
493 #ifdef DEV_NETMAP
494         struct netmap_adapter *na = NA(que->vsi->ifp);
495         struct netmap_slot *slot;
496 #endif /* DEV_NETMAP */
497
498         /* Clear the old ring contents */
499         IXL_TX_LOCK(txr);
500 #ifdef DEV_NETMAP
501         /*
502          * (under lock): if in netmap mode, do some consistency
503          * checks and set slot to entry 0 of the netmap ring.
504          */
505         slot = netmap_reset(na, NR_TX, que->me, 0);
506 #endif /* DEV_NETMAP */
507
508         bzero((void *)txr->base,
509               (sizeof(struct i40e_tx_desc)) * que->num_desc);
510
511         /* Reset indices */
512         txr->next_avail = 0;
513         txr->next_to_clean = 0;
514
515 #ifdef IXL_FDIR
516         /* Initialize flow director */
517         txr->atr_rate = ixl_atr_rate;
518         txr->atr_count = 0;
519 #endif
520
521         /* Free any existing tx mbufs. */
522         buf = txr->buffers;
523         for (int i = 0; i < que->num_desc; i++, buf++) {
524                 if (buf->m_head != NULL) {
525                         bus_dmamap_sync(buf->tag, buf->map,
526                             BUS_DMASYNC_POSTWRITE);
527                         bus_dmamap_unload(buf->tag, buf->map);
528                         m_freem(buf->m_head);
529                         buf->m_head = NULL;
530                 }
531 #ifdef DEV_NETMAP
532                 /*
533                  * In netmap mode, set the map for the packet buffer.
534                  * NOTE: Some drivers (not this one) also need to set
535                  * the physical buffer address in the NIC ring.
536                  * netmap_idx_n2k() maps a nic index, i, into the corresponding
537                  * netmap slot index, si
538                  */
539                 if (slot) {
540                         int si = netmap_idx_n2k(&na->tx_rings[que->me], i);
541                         netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
542                 }
543 #endif /* DEV_NETMAP */
544                 /* Clear the EOP index */
545                 buf->eop_index = -1;
546         }
547
548         /* Set number of descriptors available */
549         txr->avail = que->num_desc;
550
551         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
552             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
553         IXL_TX_UNLOCK(txr);
554 }
555
556
557 /*********************************************************************
558  *
559  *  Free transmit ring related data structures.
560  *
561  **********************************************************************/
562 void
563 ixl_free_que_tx(struct ixl_queue *que)
564 {
565         struct tx_ring *txr = &que->txr;
566         struct ixl_tx_buf *buf;
567
568         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
569
570         for (int i = 0; i < que->num_desc; i++) {
571                 buf = &txr->buffers[i];
572                 if (buf->m_head != NULL) {
573                         bus_dmamap_sync(buf->tag, buf->map,
574                             BUS_DMASYNC_POSTWRITE);
575                         bus_dmamap_unload(buf->tag,
576                             buf->map);
577                         m_freem(buf->m_head);
578                         buf->m_head = NULL;
579                         if (buf->map != NULL) {
580                                 bus_dmamap_destroy(buf->tag,
581                                     buf->map);
582                                 buf->map = NULL;
583                         }
584                 } else if (buf->map != NULL) {
585                         bus_dmamap_unload(buf->tag,
586                             buf->map);
587                         bus_dmamap_destroy(buf->tag,
588                             buf->map);
589                         buf->map = NULL;
590                 }
591         }
592         if (txr->br != NULL)
593                 buf_ring_free(txr->br, M_DEVBUF);
594         if (txr->buffers != NULL) {
595                 free(txr->buffers, M_DEVBUF);
596                 txr->buffers = NULL;
597         }
598         if (txr->tx_tag != NULL) {
599                 bus_dma_tag_destroy(txr->tx_tag);
600                 txr->tx_tag = NULL;
601         }
602         if (txr->tso_tag != NULL) {
603                 bus_dma_tag_destroy(txr->tso_tag);
604                 txr->tso_tag = NULL;
605         }
606
607         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
608         return;
609 }
610
611 /*********************************************************************
612  *
613  *  Setup descriptor for hw offloads 
614  *
615  **********************************************************************/
616
617 static int
618 ixl_tx_setup_offload(struct ixl_queue *que,
619     struct mbuf *mp, u32 *cmd, u32 *off)
620 {
621         struct ether_vlan_header        *eh;
622 #ifdef INET
623         struct ip                       *ip = NULL;
624 #endif
625         struct tcphdr                   *th = NULL;
626 #ifdef INET6
627         struct ip6_hdr                  *ip6;
628 #endif
629         int                             elen, ip_hlen = 0, tcp_hlen;
630         u16                             etype;
631         u8                              ipproto = 0;
632         bool                            tso = FALSE;
633
634
635         /* Set up the TSO context descriptor if required */
636         if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
637                 tso = ixl_tso_setup(que, mp);
638                 if (tso)
639                         ++que->tso;
640                 else
641                         return (ENXIO);
642         }
643
644         /*
645          * Determine where frame payload starts.
646          * Jump over vlan headers if already present,
647          * helpful for QinQ too.
648          */
649         eh = mtod(mp, struct ether_vlan_header *);
650         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
651                 etype = ntohs(eh->evl_proto);
652                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
653         } else {
654                 etype = ntohs(eh->evl_encap_proto);
655                 elen = ETHER_HDR_LEN;
656         }
657
658         switch (etype) {
659 #ifdef INET
660                 case ETHERTYPE_IP:
661                         ip = (struct ip *)(mp->m_data + elen);
662                         ip_hlen = ip->ip_hl << 2;
663                         ipproto = ip->ip_p;
664                         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
665                         /* The IP checksum must be recalculated with TSO */
666                         if (tso)
667                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
668                         else
669                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
670                         break;
671 #endif
672 #ifdef INET6
673                 case ETHERTYPE_IPV6:
674                         ip6 = (struct ip6_hdr *)(mp->m_data + elen);
675                         ip_hlen = sizeof(struct ip6_hdr);
676                         ipproto = ip6->ip6_nxt;
677                         th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
678                         *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
679                         break;
680 #endif
681                 default:
682                         break;
683         }
684
685         *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
686         *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
687
688         switch (ipproto) {
689                 case IPPROTO_TCP:
690                         tcp_hlen = th->th_off << 2;
691                         if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
692                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
693                                 *off |= (tcp_hlen >> 2) <<
694                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
695                         }
696 #ifdef IXL_FDIR
697                         ixl_atr(que, th, etype);
698 #endif
699                         break;
700                 case IPPROTO_UDP:
701                         if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
702                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
703                                 *off |= (sizeof(struct udphdr) >> 2) <<
704                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
705                         }
706                         break;
707
708                 case IPPROTO_SCTP:
709                         if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
710                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
711                                 *off |= (sizeof(struct sctphdr) >> 2) <<
712                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
713                         }
714                         /* Fall Thru */
715                 default:
716                         break;
717         }
718
719         return (0);
720 }
721
722
723 /**********************************************************************
724  *
725  *  Setup context for hardware segmentation offload (TSO)
726  *
727  **********************************************************************/
728 static bool
729 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
730 {
731         struct tx_ring                  *txr = &que->txr;
732         struct i40e_tx_context_desc     *TXD;
733         struct ixl_tx_buf               *buf;
734         u32                             cmd, mss, type, tsolen;
735         u16                             etype;
736         int                             idx, elen, ip_hlen, tcp_hlen;
737         struct ether_vlan_header        *eh;
738 #ifdef INET
739         struct ip                       *ip;
740 #endif
741 #ifdef INET6
742         struct ip6_hdr                  *ip6;
743 #endif
744 #if defined(INET6) || defined(INET)
745         struct tcphdr                   *th;
746 #endif
747         u64                             type_cmd_tso_mss;
748
749         /*
750          * Determine where frame payload starts.
751          * Jump over vlan headers if already present
752          */
753         eh = mtod(mp, struct ether_vlan_header *);
754         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
755                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
756                 etype = eh->evl_proto;
757         } else {
758                 elen = ETHER_HDR_LEN;
759                 etype = eh->evl_encap_proto;
760         }
761
762         switch (ntohs(etype)) {
763 #ifdef INET6
764         case ETHERTYPE_IPV6:
765                 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
766                 if (ip6->ip6_nxt != IPPROTO_TCP)
767                         return (ENXIO);
768                 ip_hlen = sizeof(struct ip6_hdr);
769                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
770                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
771                 tcp_hlen = th->th_off << 2;
772                 break;
773 #endif
774 #ifdef INET
775         case ETHERTYPE_IP:
776                 ip = (struct ip *)(mp->m_data + elen);
777                 if (ip->ip_p != IPPROTO_TCP)
778                         return (ENXIO);
779                 ip->ip_sum = 0;
780                 ip_hlen = ip->ip_hl << 2;
781                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
782                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
783                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
784                 tcp_hlen = th->th_off << 2;
785                 break;
786 #endif
787         default:
788                 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
789                     __func__, ntohs(etype));
790                 return FALSE;
791         }
792
793         /* Ensure we have at least the IP+TCP header in the first mbuf. */
794         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
795                 return FALSE;
796
797         idx = txr->next_avail;
798         buf = &txr->buffers[idx];
799         TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
800         tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
801
802         type = I40E_TX_DESC_DTYPE_CONTEXT;
803         cmd = I40E_TX_CTX_DESC_TSO;
804         mss = mp->m_pkthdr.tso_segsz;
805
806         type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
807             ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
808             ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
809             ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
810         TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
811
812         TXD->tunneling_params = htole32(0);
813         buf->m_head = NULL;
814         buf->eop_index = -1;
815
816         if (++idx == que->num_desc)
817                 idx = 0;
818
819         txr->avail--;
820         txr->next_avail = idx;
821
822         return TRUE;
823 }
824
825 /*             
826 ** ixl_get_tx_head - Retrieve the value from the 
827 **    location the HW records its HEAD index
828 */
829 static inline u32
830 ixl_get_tx_head(struct ixl_queue *que)
831 {
832         struct tx_ring  *txr = &que->txr;
833         void *head = &txr->base[que->num_desc];
834         return LE32_TO_CPU(*(volatile __le32 *)head);
835 }
836
837 /**********************************************************************
838  *
839  *  Examine each tx_buffer in the used queue. If the hardware is done
840  *  processing the packet then free associated resources. The
841  *  tx_buffer is put back on the free queue.
842  *
843  **********************************************************************/
844 bool
845 ixl_txeof(struct ixl_queue *que)
846 {
847         struct tx_ring          *txr = &que->txr;
848         u32                     first, last, head, done, processed;
849         struct ixl_tx_buf       *buf;
850         struct i40e_tx_desc     *tx_desc, *eop_desc;
851
852
853         mtx_assert(&txr->mtx, MA_OWNED);
854
855 #ifdef DEV_NETMAP
856         // XXX todo: implement moderation
857         if (netmap_tx_irq(que->vsi->ifp, que->me))
858                 return FALSE;
859 #endif /* DEF_NETMAP */
860
861         /* These are not the descriptors you seek, move along :) */
862         if (txr->avail == que->num_desc) {
863                 que->busy = 0;
864                 return FALSE;
865         }
866
867         processed = 0;
868         first = txr->next_to_clean;
869         buf = &txr->buffers[first];
870         tx_desc = (struct i40e_tx_desc *)&txr->base[first];
871         last = buf->eop_index;
872         if (last == -1)
873                 return FALSE;
874         eop_desc = (struct i40e_tx_desc *)&txr->base[last];
875
876         /* Get the Head WB value */
877         head = ixl_get_tx_head(que);
878
879         /*
880         ** Get the index of the first descriptor
881         ** BEYOND the EOP and call that 'done'.
882         ** I do this so the comparison in the
883         ** inner while loop below can be simple
884         */
885         if (++last == que->num_desc) last = 0;
886         done = last;
887
888         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
889             BUS_DMASYNC_POSTREAD);
890         /*
891         ** The HEAD index of the ring is written in a 
892         ** defined location, this rather than a done bit
893         ** is what is used to keep track of what must be
894         ** 'cleaned'.
895         */
896         while (first != head) {
897                 /* We clean the range of the packet */
898                 while (first != done) {
899                         ++txr->avail;
900                         ++processed;
901
902                         if (buf->m_head) {
903                                 txr->bytes += /* for ITR adjustment */
904                                     buf->m_head->m_pkthdr.len;
905                                 txr->tx_bytes += /* for TX stats */
906                                     buf->m_head->m_pkthdr.len;
907                                 bus_dmamap_sync(buf->tag,
908                                     buf->map,
909                                     BUS_DMASYNC_POSTWRITE);
910                                 bus_dmamap_unload(buf->tag,
911                                     buf->map);
912                                 m_freem(buf->m_head);
913                                 buf->m_head = NULL;
914                                 buf->map = NULL;
915                         }
916                         buf->eop_index = -1;
917
918                         if (++first == que->num_desc)
919                                 first = 0;
920
921                         buf = &txr->buffers[first];
922                         tx_desc = &txr->base[first];
923                 }
924                 ++txr->packets;
925                 /* See if there is more work now */
926                 last = buf->eop_index;
927                 if (last != -1) {
928                         eop_desc = &txr->base[last];
929                         /* Get next done point */
930                         if (++last == que->num_desc) last = 0;
931                         done = last;
932                 } else
933                         break;
934         }
935         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
936             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
937
938         txr->next_to_clean = first;
939
940
941         /*
942         ** Hang detection, we know there's
943         ** work outstanding or the first return
944         ** would have been taken, so indicate an
945         ** unsuccessful pass, in local_timer if
946         ** the value is too great the queue will
947         ** be considered hung. If anything has been
948         ** cleaned then reset the state.
949         */
950         if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
951                 ++que->busy;
952
953         if (processed)
954                 que->busy = 1; /* Note this turns off HUNG */
955
956         /*
957          * If there are no pending descriptors, clear the timeout.
958          */
959         if (txr->avail == que->num_desc) {
960                 que->busy = 0;
961                 return FALSE;
962         }
963
964         return TRUE;
965 }
966
967 /*********************************************************************
968  *
969  *  Refresh mbuf buffers for RX descriptor rings
970  *   - now keeps its own state so discards due to resource
971  *     exhaustion are unnecessary, if an mbuf cannot be obtained
972  *     it just returns, keeping its placeholder, thus it can simply
973  *     be recalled to try again.
974  *
975  **********************************************************************/
976 static void
977 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
978 {
979         struct ixl_vsi          *vsi = que->vsi;
980         struct rx_ring          *rxr = &que->rxr;
981         bus_dma_segment_t       hseg[1];
982         bus_dma_segment_t       pseg[1];
983         struct ixl_rx_buf       *buf;
984         struct mbuf             *mh, *mp;
985         int                     i, j, nsegs, error;
986         bool                    refreshed = FALSE;
987
988         i = j = rxr->next_refresh;
989         /* Control the loop with one beyond */
990         if (++j == que->num_desc)
991                 j = 0;
992
993         while (j != limit) {
994                 buf = &rxr->buffers[i];
995                 if (rxr->hdr_split == FALSE)
996                         goto no_split;
997
998                 if (buf->m_head == NULL) {
999                         mh = m_gethdr(M_NOWAIT, MT_DATA);
1000                         if (mh == NULL)
1001                                 goto update;
1002                 } else
1003                         mh = buf->m_head;
1004
1005                 mh->m_pkthdr.len = mh->m_len = MHLEN;
1006                 mh->m_len = MHLEN;
1007                 mh->m_flags |= M_PKTHDR;
1008                 /* Get the memory mapping */
1009                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1010                     buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1011                 if (error != 0) {
1012                         printf("Refresh mbufs: hdr dmamap load"
1013                             " failure - %d\n", error);
1014                         m_free(mh);
1015                         buf->m_head = NULL;
1016                         goto update;
1017                 }
1018                 buf->m_head = mh;
1019                 bus_dmamap_sync(rxr->htag, buf->hmap,
1020                     BUS_DMASYNC_PREREAD);
1021                 rxr->base[i].read.hdr_addr =
1022                    htole64(hseg[0].ds_addr);
1023
1024 no_split:
1025                 if (buf->m_pack == NULL) {
1026                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1027                             M_PKTHDR, rxr->mbuf_sz);
1028                         if (mp == NULL)
1029                                 goto update;
1030                 } else
1031                         mp = buf->m_pack;
1032
1033                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1034                 /* Get the memory mapping */
1035                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1036                     buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1037                 if (error != 0) {
1038                         printf("Refresh mbufs: payload dmamap load"
1039                             " failure - %d\n", error);
1040                         m_free(mp);
1041                         buf->m_pack = NULL;
1042                         goto update;
1043                 }
1044                 buf->m_pack = mp;
1045                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1046                     BUS_DMASYNC_PREREAD);
1047                 rxr->base[i].read.pkt_addr =
1048                    htole64(pseg[0].ds_addr);
1049                 /* Used only when doing header split */
1050                 rxr->base[i].read.hdr_addr = 0;
1051
1052                 refreshed = TRUE;
1053                 /* Next is precalculated */
1054                 i = j;
1055                 rxr->next_refresh = i;
1056                 if (++j == que->num_desc)
1057                         j = 0;
1058         }
1059 update:
1060         if (refreshed) /* Update hardware tail index */
1061                 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1062         return;
1063 }
1064
1065
1066 /*********************************************************************
1067  *
1068  *  Allocate memory for rx_buffer structures. Since we use one
1069  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1070  *  that we'll need is equal to the number of receive descriptors
1071  *  that we've defined.
1072  *
1073  **********************************************************************/
1074 int
1075 ixl_allocate_rx_data(struct ixl_queue *que)
1076 {
1077         struct rx_ring          *rxr = &que->rxr;
1078         struct ixl_vsi          *vsi = que->vsi;
1079         device_t                dev = vsi->dev;
1080         struct ixl_rx_buf       *buf;
1081         int                     i, bsize, error;
1082
1083         bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1084         if (!(rxr->buffers =
1085             (struct ixl_rx_buf *) malloc(bsize,
1086             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1087                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1088                 error = ENOMEM;
1089                 return (error);
1090         }
1091
1092         if ((error = bus_dma_tag_create(NULL,   /* parent */
1093                                    1, 0,        /* alignment, bounds */
1094                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1095                                    BUS_SPACE_MAXADDR,   /* highaddr */
1096                                    NULL, NULL,          /* filter, filterarg */
1097                                    MSIZE,               /* maxsize */
1098                                    1,                   /* nsegments */
1099                                    MSIZE,               /* maxsegsize */
1100                                    0,                   /* flags */
1101                                    NULL,                /* lockfunc */
1102                                    NULL,                /* lockfuncarg */
1103                                    &rxr->htag))) {
1104                 device_printf(dev, "Unable to create RX DMA htag\n");
1105                 return (error);
1106         }
1107
1108         if ((error = bus_dma_tag_create(NULL,   /* parent */
1109                                    1, 0,        /* alignment, bounds */
1110                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1111                                    BUS_SPACE_MAXADDR,   /* highaddr */
1112                                    NULL, NULL,          /* filter, filterarg */
1113                                    MJUM16BYTES,         /* maxsize */
1114                                    1,                   /* nsegments */
1115                                    MJUM16BYTES,         /* maxsegsize */
1116                                    0,                   /* flags */
1117                                    NULL,                /* lockfunc */
1118                                    NULL,                /* lockfuncarg */
1119                                    &rxr->ptag))) {
1120                 device_printf(dev, "Unable to create RX DMA ptag\n");
1121                 return (error);
1122         }
1123
1124         for (i = 0; i < que->num_desc; i++) {
1125                 buf = &rxr->buffers[i];
1126                 error = bus_dmamap_create(rxr->htag,
1127                     BUS_DMA_NOWAIT, &buf->hmap);
1128                 if (error) {
1129                         device_printf(dev, "Unable to create RX head map\n");
1130                         break;
1131                 }
1132                 error = bus_dmamap_create(rxr->ptag,
1133                     BUS_DMA_NOWAIT, &buf->pmap);
1134                 if (error) {
1135                         device_printf(dev, "Unable to create RX pkt map\n");
1136                         break;
1137                 }
1138         }
1139
1140         return (error);
1141 }
1142
1143
1144 /*********************************************************************
1145  *
1146  *  (Re)Initialize the queue receive ring and its buffers.
1147  *
1148  **********************************************************************/
1149 int
1150 ixl_init_rx_ring(struct ixl_queue *que)
1151 {
1152         struct  rx_ring         *rxr = &que->rxr;
1153         struct ixl_vsi          *vsi = que->vsi;
1154 #if defined(INET6) || defined(INET)
1155         struct ifnet            *ifp = vsi->ifp;
1156         struct lro_ctrl         *lro = &rxr->lro;
1157 #endif
1158         struct ixl_rx_buf       *buf;
1159         bus_dma_segment_t       pseg[1], hseg[1];
1160         int                     rsize, nsegs, error = 0;
1161 #ifdef DEV_NETMAP 
1162         struct netmap_adapter *na = NA(que->vsi->ifp);
1163         struct netmap_slot *slot;
1164 #endif /* DEV_NETMAP */
1165
1166         IXL_RX_LOCK(rxr);
1167 #ifdef DEV_NETMAP
1168         /* same as in ixl_init_tx_ring() */
1169         slot = netmap_reset(na, NR_RX, que->me, 0);
1170 #endif /* DEV_NETMAP */
1171         /* Clear the ring contents */
1172         rsize = roundup2(que->num_desc *
1173             sizeof(union i40e_rx_desc), DBA_ALIGN);
1174         bzero((void *)rxr->base, rsize);
1175         /* Cleanup any existing buffers */
1176         for (int i = 0; i < que->num_desc; i++) {
1177                 buf = &rxr->buffers[i];
1178                 if (buf->m_head != NULL) {
1179                         bus_dmamap_sync(rxr->htag, buf->hmap,
1180                             BUS_DMASYNC_POSTREAD);
1181                         bus_dmamap_unload(rxr->htag, buf->hmap);
1182                         buf->m_head->m_flags |= M_PKTHDR;
1183                         m_freem(buf->m_head);
1184                 }
1185                 if (buf->m_pack != NULL) {
1186                         bus_dmamap_sync(rxr->ptag, buf->pmap,
1187                             BUS_DMASYNC_POSTREAD);
1188                         bus_dmamap_unload(rxr->ptag, buf->pmap);
1189                         buf->m_pack->m_flags |= M_PKTHDR;
1190                         m_freem(buf->m_pack);
1191                 }
1192                 buf->m_head = NULL;
1193                 buf->m_pack = NULL;
1194         }
1195
1196         /* header split is off */
1197         rxr->hdr_split = FALSE;
1198
1199         /* Now replenish the mbufs */
1200         for (int j = 0; j != que->num_desc; ++j) {
1201                 struct mbuf     *mh, *mp;
1202
1203                 buf = &rxr->buffers[j];
1204 #ifdef DEV_NETMAP
1205                 /*
1206                  * In netmap mode, fill the map and set the buffer
1207                  * address in the NIC ring, considering the offset
1208                  * between the netmap and NIC rings (see comment in
1209                  * ixgbe_setup_transmit_ring() ). No need to allocate
1210                  * an mbuf, so end the block with a continue;
1211                  */
1212                 if (slot) {
1213                         int sj = netmap_idx_n2k(&na->rx_rings[que->me], j);
1214                         uint64_t paddr;
1215                         void *addr;
1216
1217                         addr = PNMB(na, slot + sj, &paddr);
1218                         netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1219                         /* Update descriptor and the cached value */
1220                         rxr->base[j].read.pkt_addr = htole64(paddr);
1221                         rxr->base[j].read.hdr_addr = 0;
1222                         continue;
1223                 }
1224 #endif /* DEV_NETMAP */
1225
1226                 /*
1227                 ** Don't allocate mbufs if not
1228                 ** doing header split, its wasteful
1229                 */ 
1230                 if (rxr->hdr_split == FALSE)
1231                         goto skip_head;
1232
1233                 /* First the header */
1234                 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1235                 if (buf->m_head == NULL) {
1236                         error = ENOBUFS;
1237                         goto fail;
1238                 }
1239                 m_adj(buf->m_head, ETHER_ALIGN);
1240                 mh = buf->m_head;
1241                 mh->m_len = mh->m_pkthdr.len = MHLEN;
1242                 mh->m_flags |= M_PKTHDR;
1243                 /* Get the memory mapping */
1244                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1245                     buf->hmap, buf->m_head, hseg,
1246                     &nsegs, BUS_DMA_NOWAIT);
1247                 if (error != 0) /* Nothing elegant to do here */
1248                         goto fail;
1249                 bus_dmamap_sync(rxr->htag,
1250                     buf->hmap, BUS_DMASYNC_PREREAD);
1251                 /* Update descriptor */
1252                 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1253
1254 skip_head:
1255                 /* Now the payload cluster */
1256                 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1257                     M_PKTHDR, rxr->mbuf_sz);
1258                 if (buf->m_pack == NULL) {
1259                         error = ENOBUFS;
1260                         goto fail;
1261                 }
1262                 mp = buf->m_pack;
1263                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1264                 /* Get the memory mapping */
1265                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1266                     buf->pmap, mp, pseg,
1267                     &nsegs, BUS_DMA_NOWAIT);
1268                 if (error != 0)
1269                         goto fail;
1270                 bus_dmamap_sync(rxr->ptag,
1271                     buf->pmap, BUS_DMASYNC_PREREAD);
1272                 /* Update descriptor */
1273                 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1274                 rxr->base[j].read.hdr_addr = 0;
1275         }
1276
1277
1278         /* Setup our descriptor indices */
1279         rxr->next_check = 0;
1280         rxr->next_refresh = 0;
1281         rxr->lro_enabled = FALSE;
1282         rxr->split = 0;
1283         rxr->bytes = 0;
1284         rxr->discard = FALSE;
1285
1286         wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1287         ixl_flush(vsi->hw);
1288
1289 #if defined(INET6) || defined(INET)
1290         /*
1291         ** Now set up the LRO interface:
1292         */
1293         if (ifp->if_capenable & IFCAP_LRO) {
1294                 int err = tcp_lro_init(lro);
1295                 if (err) {
1296                         if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1297                         goto fail;
1298                 }
1299                 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1300                 rxr->lro_enabled = TRUE;
1301                 lro->ifp = vsi->ifp;
1302         }
1303 #endif
1304
1305         bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1306             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1307
1308 fail:
1309         IXL_RX_UNLOCK(rxr);
1310         return (error);
1311 }
1312
1313
1314 /*********************************************************************
1315  *
1316  *  Free station receive ring data structures
1317  *
1318  **********************************************************************/
1319 void
1320 ixl_free_que_rx(struct ixl_queue *que)
1321 {
1322         struct rx_ring          *rxr = &que->rxr;
1323         struct ixl_rx_buf       *buf;
1324
1325         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1326
1327         /* Cleanup any existing buffers */
1328         if (rxr->buffers != NULL) {
1329                 for (int i = 0; i < que->num_desc; i++) {
1330                         buf = &rxr->buffers[i];
1331                         if (buf->m_head != NULL) {
1332                                 bus_dmamap_sync(rxr->htag, buf->hmap,
1333                                     BUS_DMASYNC_POSTREAD);
1334                                 bus_dmamap_unload(rxr->htag, buf->hmap);
1335                                 buf->m_head->m_flags |= M_PKTHDR;
1336                                 m_freem(buf->m_head);
1337                         }
1338                         if (buf->m_pack != NULL) {
1339                                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1340                                     BUS_DMASYNC_POSTREAD);
1341                                 bus_dmamap_unload(rxr->ptag, buf->pmap);
1342                                 buf->m_pack->m_flags |= M_PKTHDR;
1343                                 m_freem(buf->m_pack);
1344                         }
1345                         buf->m_head = NULL;
1346                         buf->m_pack = NULL;
1347                         if (buf->hmap != NULL) {
1348                                 bus_dmamap_destroy(rxr->htag, buf->hmap);
1349                                 buf->hmap = NULL;
1350                         }
1351                         if (buf->pmap != NULL) {
1352                                 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1353                                 buf->pmap = NULL;
1354                         }
1355                 }
1356                 if (rxr->buffers != NULL) {
1357                         free(rxr->buffers, M_DEVBUF);
1358                         rxr->buffers = NULL;
1359                 }
1360         }
1361
1362         if (rxr->htag != NULL) {
1363                 bus_dma_tag_destroy(rxr->htag);
1364                 rxr->htag = NULL;
1365         }
1366         if (rxr->ptag != NULL) {
1367                 bus_dma_tag_destroy(rxr->ptag);
1368                 rxr->ptag = NULL;
1369         }
1370
1371         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1372         return;
1373 }
1374
1375 static __inline void
1376 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1377 {
1378
1379 #if defined(INET6) || defined(INET)
1380         /*
1381          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1382          * should be computed by hardware. Also it should not have VLAN tag in
1383          * ethernet header.
1384          */
1385         if (rxr->lro_enabled &&
1386             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1387             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1388             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1389                 /*
1390                  * Send to the stack if:
1391                  **  - LRO not enabled, or
1392                  **  - no LRO resources, or
1393                  **  - lro enqueue fails
1394                  */
1395                 if (rxr->lro.lro_cnt != 0)
1396                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1397                                 return;
1398         }
1399 #endif
1400         IXL_RX_UNLOCK(rxr);
1401         (*ifp->if_input)(ifp, m);
1402         IXL_RX_LOCK(rxr);
1403 }
1404
1405
1406 static __inline void
1407 ixl_rx_discard(struct rx_ring *rxr, int i)
1408 {
1409         struct ixl_rx_buf       *rbuf;
1410
1411         rbuf = &rxr->buffers[i];
1412
1413         if (rbuf->fmp != NULL) {/* Partial chain ? */
1414                 rbuf->fmp->m_flags |= M_PKTHDR;
1415                 m_freem(rbuf->fmp);
1416                 rbuf->fmp = NULL;
1417         }
1418
1419         /*
1420         ** With advanced descriptors the writeback
1421         ** clobbers the buffer addrs, so its easier
1422         ** to just free the existing mbufs and take
1423         ** the normal refresh path to get new buffers
1424         ** and mapping.
1425         */
1426         if (rbuf->m_head) {
1427                 m_free(rbuf->m_head);
1428                 rbuf->m_head = NULL;
1429         }
1430  
1431         if (rbuf->m_pack) {
1432                 m_free(rbuf->m_pack);
1433                 rbuf->m_pack = NULL;
1434         }
1435
1436         return;
1437 }
1438
1439 #ifdef RSS
1440 /*
1441 ** i40e_ptype_to_hash: parse the packet type
1442 ** to determine the appropriate hash.
1443 */
1444 static inline int
1445 ixl_ptype_to_hash(u8 ptype)
1446 {
1447         struct i40e_rx_ptype_decoded    decoded;
1448         u8                              ex = 0;
1449
1450         decoded = decode_rx_desc_ptype(ptype);
1451         ex = decoded.outer_frag;
1452
1453         if (!decoded.known)
1454                 return M_HASHTYPE_OPAQUE;
1455
1456         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
1457                 return M_HASHTYPE_OPAQUE;
1458
1459         /* Note: anything that gets to this point is IP */
1460         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
1461                 switch (decoded.inner_prot) {
1462                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1463                                 if (ex)
1464                                         return M_HASHTYPE_RSS_TCP_IPV6_EX;
1465                                 else
1466                                         return M_HASHTYPE_RSS_TCP_IPV6;
1467                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1468                                 if (ex)
1469                                         return M_HASHTYPE_RSS_UDP_IPV6_EX;
1470                                 else
1471                                         return M_HASHTYPE_RSS_UDP_IPV6;
1472                         default:
1473                                 if (ex)
1474                                         return M_HASHTYPE_RSS_IPV6_EX;
1475                                 else
1476                                         return M_HASHTYPE_RSS_IPV6;
1477                 }
1478         }
1479         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
1480                 switch (decoded.inner_prot) {
1481                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1482                                         return M_HASHTYPE_RSS_TCP_IPV4;
1483                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1484                                 if (ex)
1485                                         return M_HASHTYPE_RSS_UDP_IPV4_EX;
1486                                 else
1487                                         return M_HASHTYPE_RSS_UDP_IPV4;
1488                         default:
1489                                         return M_HASHTYPE_RSS_IPV4;
1490                 }
1491         }
1492         /* We should never get here!! */
1493         return M_HASHTYPE_OPAQUE;
1494 }
1495 #endif /* RSS */
1496
1497 /*********************************************************************
1498  *
1499  *  This routine executes in interrupt context. It replenishes
1500  *  the mbufs in the descriptor and sends data which has been
1501  *  dma'ed into host memory to upper layer.
1502  *
1503  *  We loop at most count times if count is > 0, or until done if
1504  *  count < 0.
1505  *
1506  *  Return TRUE for more work, FALSE for all clean.
1507  *********************************************************************/
1508 bool
1509 ixl_rxeof(struct ixl_queue *que, int count)
1510 {
1511         struct ixl_vsi          *vsi = que->vsi;
1512         struct rx_ring          *rxr = &que->rxr;
1513         struct ifnet            *ifp = vsi->ifp;
1514 #if defined(INET6) || defined(INET)
1515         struct lro_ctrl         *lro = &rxr->lro;
1516         struct lro_entry        *queued;
1517 #endif
1518         int                     i, nextp, processed = 0;
1519         union i40e_rx_desc      *cur;
1520         struct ixl_rx_buf       *rbuf, *nbuf;
1521
1522
1523         IXL_RX_LOCK(rxr);
1524
1525 #ifdef DEV_NETMAP
1526         if (netmap_rx_irq(ifp, que->me, &count)) {
1527                 IXL_RX_UNLOCK(rxr);
1528                 return (FALSE);
1529         }
1530 #endif /* DEV_NETMAP */
1531
1532         for (i = rxr->next_check; count != 0;) {
1533                 struct mbuf     *sendmp, *mh, *mp;
1534                 u32             rsc, status, error;
1535                 u16             hlen, plen, vtag;
1536                 u64             qword;
1537                 u8              ptype;
1538                 bool            eop;
1539  
1540                 /* Sync the ring. */
1541                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1542                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1543
1544                 cur = &rxr->base[i];
1545                 qword = le64toh(cur->wb.qword1.status_error_len);
1546                 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1547                     >> I40E_RXD_QW1_STATUS_SHIFT;
1548                 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1549                     >> I40E_RXD_QW1_ERROR_SHIFT;
1550                 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1551                     >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1552                 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1553                     >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1554                 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1555                     >> I40E_RXD_QW1_PTYPE_SHIFT;
1556
1557                 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1558                         ++rxr->not_done;
1559                         break;
1560                 }
1561                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1562                         break;
1563
1564                 count--;
1565                 sendmp = NULL;
1566                 nbuf = NULL;
1567                 rsc = 0;
1568                 cur->wb.qword1.status_error_len = 0;
1569                 rbuf = &rxr->buffers[i];
1570                 mh = rbuf->m_head;
1571                 mp = rbuf->m_pack;
1572                 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1573                 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1574                         vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1575                 else
1576                         vtag = 0;
1577
1578                 /*
1579                 ** Make sure bad packets are discarded,
1580                 ** note that only EOP descriptor has valid
1581                 ** error results.
1582                 */
1583                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1584                         rxr->discarded++;
1585                         ixl_rx_discard(rxr, i);
1586                         goto next_desc;
1587                 }
1588
1589                 /* Prefetch the next buffer */
1590                 if (!eop) {
1591                         nextp = i + 1;
1592                         if (nextp == que->num_desc)
1593                                 nextp = 0;
1594                         nbuf = &rxr->buffers[nextp];
1595                         prefetch(nbuf);
1596                 }
1597
1598                 /*
1599                 ** The header mbuf is ONLY used when header 
1600                 ** split is enabled, otherwise we get normal 
1601                 ** behavior, ie, both header and payload
1602                 ** are DMA'd into the payload buffer.
1603                 **
1604                 ** Rather than using the fmp/lmp global pointers
1605                 ** we now keep the head of a packet chain in the
1606                 ** buffer struct and pass this along from one
1607                 ** descriptor to the next, until we get EOP.
1608                 */
1609                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1610                         if (hlen > IXL_RX_HDR)
1611                                 hlen = IXL_RX_HDR;
1612                         mh->m_len = hlen;
1613                         mh->m_flags |= M_PKTHDR;
1614                         mh->m_next = NULL;
1615                         mh->m_pkthdr.len = mh->m_len;
1616                         /* Null buf pointer so it is refreshed */
1617                         rbuf->m_head = NULL;
1618                         /*
1619                         ** Check the payload length, this
1620                         ** could be zero if its a small
1621                         ** packet.
1622                         */
1623                         if (plen > 0) {
1624                                 mp->m_len = plen;
1625                                 mp->m_next = NULL;
1626                                 mp->m_flags &= ~M_PKTHDR;
1627                                 mh->m_next = mp;
1628                                 mh->m_pkthdr.len += mp->m_len;
1629                                 /* Null buf pointer so it is refreshed */
1630                                 rbuf->m_pack = NULL;
1631                                 rxr->split++;
1632                         }
1633                         /*
1634                         ** Now create the forward
1635                         ** chain so when complete 
1636                         ** we wont have to.
1637                         */
1638                         if (eop == 0) {
1639                                 /* stash the chain head */
1640                                 nbuf->fmp = mh;
1641                                 /* Make forward chain */
1642                                 if (plen)
1643                                         mp->m_next = nbuf->m_pack;
1644                                 else
1645                                         mh->m_next = nbuf->m_pack;
1646                         } else {
1647                                 /* Singlet, prepare to send */
1648                                 sendmp = mh;
1649                                 if (vtag) {
1650                                         sendmp->m_pkthdr.ether_vtag = vtag;
1651                                         sendmp->m_flags |= M_VLANTAG;
1652                                 }
1653                         }
1654                 } else {
1655                         /*
1656                         ** Either no header split, or a
1657                         ** secondary piece of a fragmented
1658                         ** split packet.
1659                         */
1660                         mp->m_len = plen;
1661                         /*
1662                         ** See if there is a stored head
1663                         ** that determines what we are
1664                         */
1665                         sendmp = rbuf->fmp;
1666                         rbuf->m_pack = rbuf->fmp = NULL;
1667
1668                         if (sendmp != NULL) /* secondary frag */
1669                                 sendmp->m_pkthdr.len += mp->m_len;
1670                         else {
1671                                 /* first desc of a non-ps chain */
1672                                 sendmp = mp;
1673                                 sendmp->m_flags |= M_PKTHDR;
1674                                 sendmp->m_pkthdr.len = mp->m_len;
1675                                 if (vtag) {
1676                                         sendmp->m_pkthdr.ether_vtag = vtag;
1677                                         sendmp->m_flags |= M_VLANTAG;
1678                                 }
1679                         }
1680                         /* Pass the head pointer on */
1681                         if (eop == 0) {
1682                                 nbuf->fmp = sendmp;
1683                                 sendmp = NULL;
1684                                 mp->m_next = nbuf->m_pack;
1685                         }
1686                 }
1687                 ++processed;
1688                 /* Sending this frame? */
1689                 if (eop) {
1690                         sendmp->m_pkthdr.rcvif = ifp;
1691                         /* gather stats */
1692                         rxr->rx_packets++;
1693                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1694                         /* capture data for dynamic ITR adjustment */
1695                         rxr->packets++;
1696                         rxr->bytes += sendmp->m_pkthdr.len;
1697                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1698                                 ixl_rx_checksum(sendmp, status, error, ptype);
1699 #ifdef RSS
1700                         sendmp->m_pkthdr.flowid =
1701                             le32toh(cur->wb.qword0.hi_dword.rss);
1702                         M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1703 #else
1704                         sendmp->m_pkthdr.flowid = que->msix;
1705                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1706 #endif
1707                 }
1708 next_desc:
1709                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1710                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1711
1712                 /* Advance our pointers to the next descriptor. */
1713                 if (++i == que->num_desc)
1714                         i = 0;
1715
1716                 /* Now send to the stack or do LRO */
1717                 if (sendmp != NULL) {
1718                         rxr->next_check = i;
1719                         ixl_rx_input(rxr, ifp, sendmp, ptype);
1720                         i = rxr->next_check;
1721                 }
1722
1723                /* Every 8 descriptors we go to refresh mbufs */
1724                 if (processed == 8) {
1725                         ixl_refresh_mbufs(que, i);
1726                         processed = 0;
1727                 }
1728         }
1729
1730         /* Refresh any remaining buf structs */
1731         if (ixl_rx_unrefreshed(que))
1732                 ixl_refresh_mbufs(que, i);
1733
1734         rxr->next_check = i;
1735
1736 #if defined(INET6) || defined(INET)
1737         /*
1738          * Flush any outstanding LRO work
1739          */
1740         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1741                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1742                 tcp_lro_flush(lro, queued);
1743         }
1744 #endif
1745
1746         IXL_RX_UNLOCK(rxr);
1747         return (FALSE);
1748 }
1749
1750
1751 /*********************************************************************
1752  *
1753  *  Verify that the hardware indicated that the checksum is valid.
1754  *  Inform the stack about the status of checksum so that stack
1755  *  doesn't spend time verifying the checksum.
1756  *
1757  *********************************************************************/
1758 static void
1759 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1760 {
1761         struct i40e_rx_ptype_decoded decoded;
1762
1763         decoded = decode_rx_desc_ptype(ptype);
1764
1765         /* Errors? */
1766         if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1767             (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1768                 mp->m_pkthdr.csum_flags = 0;
1769                 return;
1770         }
1771
1772         /* IPv6 with extension headers likely have bad csum */
1773         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1774             decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1775                 if (status &
1776                     (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1777                         mp->m_pkthdr.csum_flags = 0;
1778                         return;
1779                 }
1780
1781  
1782         /* IP Checksum Good */
1783         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1784         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1785
1786         if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1787                 mp->m_pkthdr.csum_flags |= 
1788                     (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1789                 mp->m_pkthdr.csum_data |= htons(0xffff);
1790         }
1791         return;
1792 }
1793
1794 #if __FreeBSD_version >= 1100000
1795 uint64_t
1796 ixl_get_counter(if_t ifp, ift_counter cnt)
1797 {
1798         struct ixl_vsi *vsi;
1799
1800         vsi = if_getsoftc(ifp);
1801
1802         switch (cnt) {
1803         case IFCOUNTER_IPACKETS:
1804                 return (vsi->ipackets);
1805         case IFCOUNTER_IERRORS:
1806                 return (vsi->ierrors);
1807         case IFCOUNTER_OPACKETS:
1808                 return (vsi->opackets);
1809         case IFCOUNTER_OERRORS:
1810                 return (vsi->oerrors);
1811         case IFCOUNTER_COLLISIONS:
1812                 /* Collisions are by standard impossible in 40G/10G Ethernet */
1813                 return (0);
1814         case IFCOUNTER_IBYTES:
1815                 return (vsi->ibytes);
1816         case IFCOUNTER_OBYTES:
1817                 return (vsi->obytes);
1818         case IFCOUNTER_IMCASTS:
1819                 return (vsi->imcasts);
1820         case IFCOUNTER_OMCASTS:
1821                 return (vsi->omcasts);
1822         case IFCOUNTER_IQDROPS:
1823                 return (vsi->iqdrops);
1824         case IFCOUNTER_OQDROPS:
1825                 return (vsi->oqdrops);
1826         case IFCOUNTER_NOPROTO:
1827                 return (vsi->noproto);
1828         default:
1829                 return (if_get_counter_default(ifp, cnt));
1830         }
1831 }
1832 #endif
1833