]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixl/ixl_txrx.c
MFV r293415:
[FreeBSD/FreeBSD.git] / sys / dev / ixl / ixl_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2013-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 **      IXL driver TX/RX Routines:
37 **          This was seperated to allow usage by
38 **          both the BASE and the VF drivers.
39 */
40
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_rss.h"
45 #endif
46
47 #include "ixl.h"
48
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52
53 /* Local Prototypes */
54 static void     ixl_rx_checksum(struct mbuf *, u32, u32, u8);
55 static void     ixl_refresh_mbufs(struct ixl_queue *, int);
56 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
57 static int      ixl_tx_setup_offload(struct ixl_queue *,
58                     struct mbuf *, u32 *, u32 *);
59 static bool     ixl_tso_setup(struct ixl_queue *, struct mbuf *);
60
61 static __inline void ixl_rx_discard(struct rx_ring *, int);
62 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
63                     struct mbuf *, u8);
64
65 #ifdef DEV_NETMAP
66 #include <dev/netmap/if_ixl_netmap.h>
67 #endif /* DEV_NETMAP */
68
69 /*
70 ** Multiqueue Transmit driver
71 */
72 int
73 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
74 {
75         struct ixl_vsi          *vsi = ifp->if_softc;
76         struct ixl_queue        *que;
77         struct tx_ring          *txr;
78         int                     err, i;
79 #ifdef RSS
80         u32                     bucket_id;
81 #endif
82
83         /*
84         ** Which queue to use:
85         **
86         ** When doing RSS, map it to the same outbound
87         ** queue as the incoming flow would be mapped to.
88         ** If everything is setup correctly, it should be
89         ** the same bucket that the current CPU we're on is.
90         */
91         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
92 #ifdef  RSS
93                 if (rss_hash2bucket(m->m_pkthdr.flowid,
94                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
95                         i = bucket_id % vsi->num_queues;
96                 } else
97 #endif
98                         i = m->m_pkthdr.flowid % vsi->num_queues;
99         } else
100                 i = curcpu % vsi->num_queues;
101         /*
102         ** This may not be perfect, but until something
103         ** better comes along it will keep from scheduling
104         ** on stalled queues.
105         */
106         if (((1 << i) & vsi->active_queues) == 0)
107                 i = ffsl(vsi->active_queues);
108
109         que = &vsi->queues[i];
110         txr = &que->txr;
111
112         err = drbr_enqueue(ifp, txr->br, m);
113         if (err)
114                 return (err);
115         if (IXL_TX_TRYLOCK(txr)) {
116                 ixl_mq_start_locked(ifp, txr);
117                 IXL_TX_UNLOCK(txr);
118         } else
119                 taskqueue_enqueue(que->tq, &que->tx_task);
120
121         return (0);
122 }
123
124 int
125 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
126 {
127         struct ixl_queue        *que = txr->que;
128         struct ixl_vsi          *vsi = que->vsi;
129         struct mbuf             *next;
130         int                     err = 0;
131
132
133         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
134             vsi->link_active == 0)
135                 return (ENETDOWN);
136
137         /* Process the transmit queue */
138         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
139                 if ((err = ixl_xmit(que, &next)) != 0) {
140                         if (next == NULL)
141                                 drbr_advance(ifp, txr->br);
142                         else
143                                 drbr_putback(ifp, txr->br, next);
144                         break;
145                 }
146                 drbr_advance(ifp, txr->br);
147                 /* Send a copy of the frame to the BPF listener */
148                 ETHER_BPF_MTAP(ifp, next);
149                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
150                         break;
151         }
152
153         if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
154                 ixl_txeof(que);
155
156         return (err);
157 }
158
159 /*
160  * Called from a taskqueue to drain queued transmit packets.
161  */
162 void
163 ixl_deferred_mq_start(void *arg, int pending)
164 {
165         struct ixl_queue        *que = arg;
166         struct tx_ring          *txr = &que->txr;
167         struct ixl_vsi          *vsi = que->vsi;
168         struct ifnet            *ifp = vsi->ifp;
169         
170         IXL_TX_LOCK(txr);
171         if (!drbr_empty(ifp, txr->br))
172                 ixl_mq_start_locked(ifp, txr);
173         IXL_TX_UNLOCK(txr);
174 }
175
176 /*
177 ** Flush all queue ring buffers
178 */
179 void
180 ixl_qflush(struct ifnet *ifp)
181 {
182         struct ixl_vsi  *vsi = ifp->if_softc;
183
184         for (int i = 0; i < vsi->num_queues; i++) {
185                 struct ixl_queue *que = &vsi->queues[i];
186                 struct tx_ring  *txr = &que->txr;
187                 struct mbuf     *m;
188                 IXL_TX_LOCK(txr);
189                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
190                         m_freem(m);
191                 IXL_TX_UNLOCK(txr);
192         }
193         if_qflush(ifp);
194 }
195
196 /*
197 ** Find mbuf chains passed to the driver 
198 ** that are 'sparse', using more than 8
199 ** mbufs to deliver an mss-size chunk of data
200 */
201 static inline bool
202 ixl_tso_detect_sparse(struct mbuf *mp)
203 {
204         struct mbuf     *m;
205         int             num = 0, mss;
206         bool            ret = FALSE;
207
208         mss = mp->m_pkthdr.tso_segsz;
209         for (m = mp->m_next; m != NULL; m = m->m_next) {
210                 num++;
211                 mss -= m->m_len;
212                 if (mss < 1)
213                         break;
214                 if (m->m_next == NULL)
215                         break;
216         }
217         if (num > IXL_SPARSE_CHAIN)
218                 ret = TRUE;
219
220         return (ret);
221 }
222
223
224 /*********************************************************************
225  *
226  *  This routine maps the mbufs to tx descriptors, allowing the
227  *  TX engine to transmit the packets. 
228  *      - return 0 on success, positive on failure
229  *
230  **********************************************************************/
231 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
232
233 static int
234 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
235 {
236         struct ixl_vsi          *vsi = que->vsi;
237         struct i40e_hw          *hw = vsi->hw;
238         struct tx_ring          *txr = &que->txr;
239         struct ixl_tx_buf       *buf;
240         struct i40e_tx_desc     *txd = NULL;
241         struct mbuf             *m_head, *m;
242         int                     i, j, error, nsegs, maxsegs;
243         int                     first, last = 0;
244         u16                     vtag = 0;
245         u32                     cmd, off;
246         bus_dmamap_t            map;
247         bus_dma_tag_t           tag;
248         bus_dma_segment_t       segs[IXL_MAX_TSO_SEGS];
249
250
251         cmd = off = 0;
252         m_head = *m_headp;
253
254         /*
255          * Important to capture the first descriptor
256          * used because it will contain the index of
257          * the one we tell the hardware to report back
258          */
259         first = txr->next_avail;
260         buf = &txr->buffers[first];
261         map = buf->map;
262         tag = txr->tx_tag;
263         maxsegs = IXL_MAX_TX_SEGS;
264
265         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
266                 /* Use larger mapping for TSO */
267                 tag = txr->tso_tag;
268                 maxsegs = IXL_MAX_TSO_SEGS;
269                 if (ixl_tso_detect_sparse(m_head)) {
270                         m = m_defrag(m_head, M_NOWAIT);
271                         if (m == NULL) {
272                                 m_freem(*m_headp);
273                                 *m_headp = NULL;
274                                 return (ENOBUFS);
275                         }
276                         *m_headp = m;
277                 }
278         }
279
280         /*
281          * Map the packet for DMA.
282          */
283         error = bus_dmamap_load_mbuf_sg(tag, map,
284             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
285
286         if (error == EFBIG) {
287                 struct mbuf *m;
288
289                 m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
290                 if (m == NULL) {
291                         que->mbuf_defrag_failed++;
292                         m_freem(*m_headp);
293                         *m_headp = NULL;
294                         return (ENOBUFS);
295                 }
296                 *m_headp = m;
297
298                 /* Try it again */
299                 error = bus_dmamap_load_mbuf_sg(tag, map,
300                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
301
302                 if (error == ENOMEM) {
303                         que->tx_dma_setup++;
304                         return (error);
305                 } else if (error != 0) {
306                         que->tx_dma_setup++;
307                         m_freem(*m_headp);
308                         *m_headp = NULL;
309                         return (error);
310                 }
311         } else if (error == ENOMEM) {
312                 que->tx_dma_setup++;
313                 return (error);
314         } else if (error != 0) {
315                 que->tx_dma_setup++;
316                 m_freem(*m_headp);
317                 *m_headp = NULL;
318                 return (error);
319         }
320
321         /* Make certain there are enough descriptors */
322         if (nsegs > txr->avail - 2) {
323                 txr->no_desc++;
324                 error = ENOBUFS;
325                 goto xmit_fail;
326         }
327         m_head = *m_headp;
328
329         /* Set up the TSO/CSUM offload */
330         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
331                 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
332                 if (error)
333                         goto xmit_fail;
334         }
335
336         cmd |= I40E_TX_DESC_CMD_ICRC;
337         /* Grab the VLAN tag */
338         if (m_head->m_flags & M_VLANTAG) {
339                 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
340                 vtag = htole16(m_head->m_pkthdr.ether_vtag);
341         }
342
343         i = txr->next_avail;
344         for (j = 0; j < nsegs; j++) {
345                 bus_size_t seglen;
346
347                 buf = &txr->buffers[i];
348                 buf->tag = tag; /* Keep track of the type tag */
349                 txd = &txr->base[i];
350                 seglen = segs[j].ds_len;
351
352                 txd->buffer_addr = htole64(segs[j].ds_addr);
353                 txd->cmd_type_offset_bsz =
354                     htole64(I40E_TX_DESC_DTYPE_DATA
355                     | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
356                     | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
357                     | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
358                     | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
359
360                 last = i; /* descriptor that will get completion IRQ */
361
362                 if (++i == que->num_desc)
363                         i = 0;
364
365                 buf->m_head = NULL;
366                 buf->eop_index = -1;
367         }
368         /* Set the last descriptor for report */
369         txd->cmd_type_offset_bsz |=
370             htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
371         txr->avail -= nsegs;
372         txr->next_avail = i;
373
374         buf->m_head = m_head;
375         /* Swap the dma map between the first and last descriptor */
376         txr->buffers[first].map = buf->map;
377         buf->map = map;
378         bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
379
380         /* Set the index of the descriptor that will be marked done */
381         buf = &txr->buffers[first];
382         buf->eop_index = last;
383
384         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
385             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
386         /*
387          * Advance the Transmit Descriptor Tail (Tdt), this tells the
388          * hardware that this frame is available to transmit.
389          */
390         ++txr->total_packets;
391         wr32(hw, txr->tail, i);
392
393         ixl_flush(hw);
394         /* Mark outstanding work */
395         if (que->busy == 0)
396                 que->busy = 1;
397         return (0);
398
399 xmit_fail:
400         bus_dmamap_unload(tag, buf->map);
401         return (error);
402 }
403
404
405 /*********************************************************************
406  *
407  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
408  *  the information needed to transmit a packet on the wire. This is
409  *  called only once at attach, setup is done every reset.
410  *
411  **********************************************************************/
412 int
413 ixl_allocate_tx_data(struct ixl_queue *que)
414 {
415         struct tx_ring          *txr = &que->txr;
416         struct ixl_vsi          *vsi = que->vsi;
417         device_t                dev = vsi->dev;
418         struct ixl_tx_buf       *buf;
419         int                     error = 0;
420
421         /*
422          * Setup DMA descriptor areas.
423          */
424         if ((error = bus_dma_tag_create(NULL,           /* parent */
425                                1, 0,                    /* alignment, bounds */
426                                BUS_SPACE_MAXADDR,       /* lowaddr */
427                                BUS_SPACE_MAXADDR,       /* highaddr */
428                                NULL, NULL,              /* filter, filterarg */
429                                IXL_TSO_SIZE,            /* maxsize */
430                                IXL_MAX_TX_SEGS,         /* nsegments */
431                                PAGE_SIZE,               /* maxsegsize */
432                                0,                       /* flags */
433                                NULL,                    /* lockfunc */
434                                NULL,                    /* lockfuncarg */
435                                &txr->tx_tag))) {
436                 device_printf(dev,"Unable to allocate TX DMA tag\n");
437                 goto fail;
438         }
439
440         /* Make a special tag for TSO */
441         if ((error = bus_dma_tag_create(NULL,           /* parent */
442                                1, 0,                    /* alignment, bounds */
443                                BUS_SPACE_MAXADDR,       /* lowaddr */
444                                BUS_SPACE_MAXADDR,       /* highaddr */
445                                NULL, NULL,              /* filter, filterarg */
446                                IXL_TSO_SIZE,            /* maxsize */
447                                IXL_MAX_TSO_SEGS,        /* nsegments */
448                                PAGE_SIZE,               /* maxsegsize */
449                                0,                       /* flags */
450                                NULL,                    /* lockfunc */
451                                NULL,                    /* lockfuncarg */
452                                &txr->tso_tag))) {
453                 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
454                 goto fail;
455         }
456
457         if (!(txr->buffers =
458             (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
459             que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
460                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
461                 error = ENOMEM;
462                 goto fail;
463         }
464
465         /* Create the descriptor buffer default dma maps */
466         buf = txr->buffers;
467         for (int i = 0; i < que->num_desc; i++, buf++) {
468                 buf->tag = txr->tx_tag;
469                 error = bus_dmamap_create(buf->tag, 0, &buf->map);
470                 if (error != 0) {
471                         device_printf(dev, "Unable to create TX DMA map\n");
472                         goto fail;
473                 }
474         }
475 fail:
476         return (error);
477 }
478
479
480 /*********************************************************************
481  *
482  *  (Re)Initialize a queue transmit ring.
483  *      - called by init, it clears the descriptor ring,
484  *        and frees any stale mbufs 
485  *
486  **********************************************************************/
487 void
488 ixl_init_tx_ring(struct ixl_queue *que)
489 {
490 #ifdef DEV_NETMAP
491         struct netmap_adapter *na = NA(que->vsi->ifp);
492         struct netmap_slot *slot;
493 #endif /* DEV_NETMAP */
494         struct tx_ring          *txr = &que->txr;
495         struct ixl_tx_buf       *buf;
496
497         /* Clear the old ring contents */
498         IXL_TX_LOCK(txr);
499
500 #ifdef DEV_NETMAP
501         /*
502          * (under lock): if in netmap mode, do some consistency
503          * checks and set slot to entry 0 of the netmap ring.
504          */
505         slot = netmap_reset(na, NR_TX, que->me, 0);
506 #endif /* DEV_NETMAP */
507
508         bzero((void *)txr->base,
509               (sizeof(struct i40e_tx_desc)) * que->num_desc);
510
511         /* Reset indices */
512         txr->next_avail = 0;
513         txr->next_to_clean = 0;
514
515 #ifdef IXL_FDIR
516         /* Initialize flow director */
517         txr->atr_rate = ixl_atr_rate;
518         txr->atr_count = 0;
519 #endif
520
521         /* Free any existing tx mbufs. */
522         buf = txr->buffers;
523         for (int i = 0; i < que->num_desc; i++, buf++) {
524                 if (buf->m_head != NULL) {
525                         bus_dmamap_sync(buf->tag, buf->map,
526                             BUS_DMASYNC_POSTWRITE);
527                         bus_dmamap_unload(buf->tag, buf->map);
528                         m_freem(buf->m_head);
529                         buf->m_head = NULL;
530                 }
531 #ifdef DEV_NETMAP
532                 /*
533                  * In netmap mode, set the map for the packet buffer.
534                  * NOTE: Some drivers (not this one) also need to set
535                  * the physical buffer address in the NIC ring.
536                  * netmap_idx_n2k() maps a nic index, i, into the corresponding
537                  * netmap slot index, si
538                  */
539                 if (slot) {
540                         int si = netmap_idx_n2k(&na->tx_rings[que->me], i);
541                         netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
542                 }
543 #endif /* DEV_NETMAP */
544                 /* Clear the EOP index */
545                 buf->eop_index = -1;
546         }
547
548         /* Set number of descriptors available */
549         txr->avail = que->num_desc;
550
551         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
552             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
553         IXL_TX_UNLOCK(txr);
554 }
555
556
557 /*********************************************************************
558  *
559  *  Free transmit ring related data structures.
560  *
561  **********************************************************************/
562 void
563 ixl_free_que_tx(struct ixl_queue *que)
564 {
565         struct tx_ring *txr = &que->txr;
566         struct ixl_tx_buf *buf;
567
568         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
569
570         for (int i = 0; i < que->num_desc; i++) {
571                 buf = &txr->buffers[i];
572                 if (buf->m_head != NULL) {
573                         bus_dmamap_sync(buf->tag, buf->map,
574                             BUS_DMASYNC_POSTWRITE);
575                         bus_dmamap_unload(buf->tag,
576                             buf->map);
577                         m_freem(buf->m_head);
578                         buf->m_head = NULL;
579                         if (buf->map != NULL) {
580                                 bus_dmamap_destroy(buf->tag,
581                                     buf->map);
582                                 buf->map = NULL;
583                         }
584                 } else if (buf->map != NULL) {
585                         bus_dmamap_unload(buf->tag,
586                             buf->map);
587                         bus_dmamap_destroy(buf->tag,
588                             buf->map);
589                         buf->map = NULL;
590                 }
591         }
592         if (txr->br != NULL)
593                 buf_ring_free(txr->br, M_DEVBUF);
594         if (txr->buffers != NULL) {
595                 free(txr->buffers, M_DEVBUF);
596                 txr->buffers = NULL;
597         }
598         if (txr->tx_tag != NULL) {
599                 bus_dma_tag_destroy(txr->tx_tag);
600                 txr->tx_tag = NULL;
601         }
602         if (txr->tso_tag != NULL) {
603                 bus_dma_tag_destroy(txr->tso_tag);
604                 txr->tso_tag = NULL;
605         }
606
607         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
608         return;
609 }
610
611 /*********************************************************************
612  *
613  *  Setup descriptor for hw offloads 
614  *
615  **********************************************************************/
616
617 static int
618 ixl_tx_setup_offload(struct ixl_queue *que,
619     struct mbuf *mp, u32 *cmd, u32 *off)
620 {
621         struct ether_vlan_header        *eh;
622 #ifdef INET
623         struct ip                       *ip = NULL;
624 #endif
625         struct tcphdr                   *th = NULL;
626 #ifdef INET6
627         struct ip6_hdr                  *ip6;
628 #endif
629         int                             elen, ip_hlen = 0, tcp_hlen;
630         u16                             etype;
631         u8                              ipproto = 0;
632         bool                            tso = FALSE;
633
634
635         /* Set up the TSO context descriptor if required */
636         if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
637                 tso = ixl_tso_setup(que, mp);
638                 if (tso)
639                         ++que->tso;
640                 else
641                         return (ENXIO);
642         }
643
644         /*
645          * Determine where frame payload starts.
646          * Jump over vlan headers if already present,
647          * helpful for QinQ too.
648          */
649         eh = mtod(mp, struct ether_vlan_header *);
650         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
651                 etype = ntohs(eh->evl_proto);
652                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
653         } else {
654                 etype = ntohs(eh->evl_encap_proto);
655                 elen = ETHER_HDR_LEN;
656         }
657
658         switch (etype) {
659 #ifdef INET
660                 case ETHERTYPE_IP:
661                         ip = (struct ip *)(mp->m_data + elen);
662                         ip_hlen = ip->ip_hl << 2;
663                         ipproto = ip->ip_p;
664                         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
665                         /* The IP checksum must be recalculated with TSO */
666                         if (tso)
667                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
668                         else
669                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
670                         break;
671 #endif
672 #ifdef INET6
673                 case ETHERTYPE_IPV6:
674                         ip6 = (struct ip6_hdr *)(mp->m_data + elen);
675                         ip_hlen = sizeof(struct ip6_hdr);
676                         ipproto = ip6->ip6_nxt;
677                         th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
678                         *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
679                         break;
680 #endif
681                 default:
682                         break;
683         }
684
685         *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
686         *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
687
688         switch (ipproto) {
689                 case IPPROTO_TCP:
690                         tcp_hlen = th->th_off << 2;
691                         if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
692                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
693                                 *off |= (tcp_hlen >> 2) <<
694                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
695                         }
696 #ifdef IXL_FDIR
697                         ixl_atr(que, th, etype);
698 #endif
699                         break;
700                 case IPPROTO_UDP:
701                         if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
702                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
703                                 *off |= (sizeof(struct udphdr) >> 2) <<
704                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
705                         }
706                         break;
707
708                 case IPPROTO_SCTP:
709                         if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
710                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
711                                 *off |= (sizeof(struct sctphdr) >> 2) <<
712                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
713                         }
714                         /* Fall Thru */
715                 default:
716                         break;
717         }
718
719         return (0);
720 }
721
722
723 /**********************************************************************
724  *
725  *  Setup context for hardware segmentation offload (TSO)
726  *
727  **********************************************************************/
728 static bool
729 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
730 {
731         struct tx_ring                  *txr = &que->txr;
732         struct i40e_tx_context_desc     *TXD;
733         struct ixl_tx_buf               *buf;
734         u32                             cmd, mss, type, tsolen;
735         u16                             etype;
736         int                             idx, elen, ip_hlen, tcp_hlen;
737         struct ether_vlan_header        *eh;
738 #ifdef INET
739         struct ip                       *ip;
740 #endif
741 #ifdef INET6
742         struct ip6_hdr                  *ip6;
743 #endif
744 #if defined(INET6) || defined(INET)
745         struct tcphdr                   *th;
746 #endif
747         u64                             type_cmd_tso_mss;
748
749         /*
750          * Determine where frame payload starts.
751          * Jump over vlan headers if already present
752          */
753         eh = mtod(mp, struct ether_vlan_header *);
754         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
755                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
756                 etype = eh->evl_proto;
757         } else {
758                 elen = ETHER_HDR_LEN;
759                 etype = eh->evl_encap_proto;
760         }
761
762         switch (ntohs(etype)) {
763 #ifdef INET6
764         case ETHERTYPE_IPV6:
765                 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
766                 if (ip6->ip6_nxt != IPPROTO_TCP)
767                         return (ENXIO);
768                 ip_hlen = sizeof(struct ip6_hdr);
769                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
770                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
771                 tcp_hlen = th->th_off << 2;
772                 break;
773 #endif
774 #ifdef INET
775         case ETHERTYPE_IP:
776                 ip = (struct ip *)(mp->m_data + elen);
777                 if (ip->ip_p != IPPROTO_TCP)
778                         return (ENXIO);
779                 ip->ip_sum = 0;
780                 ip_hlen = ip->ip_hl << 2;
781                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
782                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
783                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
784                 tcp_hlen = th->th_off << 2;
785                 break;
786 #endif
787         default:
788                 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
789                     __func__, ntohs(etype));
790                 return FALSE;
791         }
792
793         /* Ensure we have at least the IP+TCP header in the first mbuf. */
794         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
795                 return FALSE;
796
797         idx = txr->next_avail;
798         buf = &txr->buffers[idx];
799         TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
800         tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
801
802         type = I40E_TX_DESC_DTYPE_CONTEXT;
803         cmd = I40E_TX_CTX_DESC_TSO;
804         mss = mp->m_pkthdr.tso_segsz;
805
806         type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
807             ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
808             ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
809             ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
810         TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
811
812         TXD->tunneling_params = htole32(0);
813         buf->m_head = NULL;
814         buf->eop_index = -1;
815
816         if (++idx == que->num_desc)
817                 idx = 0;
818
819         txr->avail--;
820         txr->next_avail = idx;
821
822         return TRUE;
823 }
824
825 /*             
826 ** ixl_get_tx_head - Retrieve the value from the 
827 **    location the HW records its HEAD index
828 */
829 static inline u32
830 ixl_get_tx_head(struct ixl_queue *que)
831 {
832         struct tx_ring  *txr = &que->txr;
833         void *head = &txr->base[que->num_desc];
834         return LE32_TO_CPU(*(volatile __le32 *)head);
835 }
836
837 /**********************************************************************
838  *
839  *  Examine each tx_buffer in the used queue. If the hardware is done
840  *  processing the packet then free associated resources. The
841  *  tx_buffer is put back on the free queue.
842  *
843  **********************************************************************/
844 bool
845 ixl_txeof(struct ixl_queue *que)
846 {
847         struct tx_ring          *txr = &que->txr;
848         u32                     first, last, head, done, processed;
849         struct ixl_tx_buf       *buf;
850         struct i40e_tx_desc     *tx_desc, *eop_desc;
851
852
853         mtx_assert(&txr->mtx, MA_OWNED);
854
855 #ifdef DEV_NETMAP
856         // XXX todo: implement moderation
857         if (netmap_tx_irq(que->vsi->ifp, que->me))
858                 return FALSE;
859 #endif /* DEF_NETMAP */
860
861         /* These are not the descriptors you seek, move along :) */
862         if (txr->avail == que->num_desc) {
863                 que->busy = 0;
864                 return FALSE;
865         }
866
867         processed = 0;
868         first = txr->next_to_clean;
869         buf = &txr->buffers[first];
870         tx_desc = (struct i40e_tx_desc *)&txr->base[first];
871         last = buf->eop_index;
872         if (last == -1)
873                 return FALSE;
874         eop_desc = (struct i40e_tx_desc *)&txr->base[last];
875
876         /* Get the Head WB value */
877         head = ixl_get_tx_head(que);
878
879         /*
880         ** Get the index of the first descriptor
881         ** BEYOND the EOP and call that 'done'.
882         ** I do this so the comparison in the
883         ** inner while loop below can be simple
884         */
885         if (++last == que->num_desc) last = 0;
886         done = last;
887
888         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
889             BUS_DMASYNC_POSTREAD);
890         /*
891         ** The HEAD index of the ring is written in a 
892         ** defined location, this rather than a done bit
893         ** is what is used to keep track of what must be
894         ** 'cleaned'.
895         */
896         while (first != head) {
897                 /* We clean the range of the packet */
898                 while (first != done) {
899                         ++txr->avail;
900                         ++processed;
901
902                         if (buf->m_head) {
903                                 txr->bytes += /* for ITR adjustment */
904                                     buf->m_head->m_pkthdr.len;
905                                 txr->tx_bytes += /* for TX stats */
906                                     buf->m_head->m_pkthdr.len;
907                                 bus_dmamap_sync(buf->tag,
908                                     buf->map,
909                                     BUS_DMASYNC_POSTWRITE);
910                                 bus_dmamap_unload(buf->tag,
911                                     buf->map);
912                                 m_freem(buf->m_head);
913                                 buf->m_head = NULL;
914                                 buf->map = NULL;
915                         }
916                         buf->eop_index = -1;
917
918                         if (++first == que->num_desc)
919                                 first = 0;
920
921                         buf = &txr->buffers[first];
922                         tx_desc = &txr->base[first];
923                 }
924                 ++txr->packets;
925                 /* See if there is more work now */
926                 last = buf->eop_index;
927                 if (last != -1) {
928                         eop_desc = &txr->base[last];
929                         /* Get next done point */
930                         if (++last == que->num_desc) last = 0;
931                         done = last;
932                 } else
933                         break;
934         }
935         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
936             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
937
938         txr->next_to_clean = first;
939
940
941         /*
942         ** Hang detection, we know there's
943         ** work outstanding or the first return
944         ** would have been taken, so indicate an
945         ** unsuccessful pass, in local_timer if
946         ** the value is too great the queue will
947         ** be considered hung. If anything has been
948         ** cleaned then reset the state.
949         */
950         if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
951                 ++que->busy;
952
953         if (processed)
954                 que->busy = 1; /* Note this turns off HUNG */
955
956         /*
957          * If there are no pending descriptors, clear the timeout.
958          */
959         if (txr->avail == que->num_desc) {
960                 que->busy = 0;
961                 return FALSE;
962         }
963
964         return TRUE;
965 }
966
967 /*********************************************************************
968  *
969  *  Refresh mbuf buffers for RX descriptor rings
970  *   - now keeps its own state so discards due to resource
971  *     exhaustion are unnecessary, if an mbuf cannot be obtained
972  *     it just returns, keeping its placeholder, thus it can simply
973  *     be recalled to try again.
974  *
975  **********************************************************************/
976 static void
977 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
978 {
979         struct ixl_vsi          *vsi = que->vsi;
980         struct rx_ring          *rxr = &que->rxr;
981         bus_dma_segment_t       hseg[1];
982         bus_dma_segment_t       pseg[1];
983         struct ixl_rx_buf       *buf;
984         struct mbuf             *mh, *mp;
985         int                     i, j, nsegs, error;
986         bool                    refreshed = FALSE;
987
988         i = j = rxr->next_refresh;
989         /* Control the loop with one beyond */
990         if (++j == que->num_desc)
991                 j = 0;
992
993         while (j != limit) {
994                 buf = &rxr->buffers[i];
995                 if (rxr->hdr_split == FALSE)
996                         goto no_split;
997
998                 if (buf->m_head == NULL) {
999                         mh = m_gethdr(M_NOWAIT, MT_DATA);
1000                         if (mh == NULL)
1001                                 goto update;
1002                 } else
1003                         mh = buf->m_head;
1004
1005                 mh->m_pkthdr.len = mh->m_len = MHLEN;
1006                 mh->m_len = MHLEN;
1007                 mh->m_flags |= M_PKTHDR;
1008                 /* Get the memory mapping */
1009                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1010                     buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1011                 if (error != 0) {
1012                         printf("Refresh mbufs: hdr dmamap load"
1013                             " failure - %d\n", error);
1014                         m_free(mh);
1015                         buf->m_head = NULL;
1016                         goto update;
1017                 }
1018                 buf->m_head = mh;
1019                 bus_dmamap_sync(rxr->htag, buf->hmap,
1020                     BUS_DMASYNC_PREREAD);
1021                 rxr->base[i].read.hdr_addr =
1022                    htole64(hseg[0].ds_addr);
1023
1024 no_split:
1025                 if (buf->m_pack == NULL) {
1026                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1027                             M_PKTHDR, rxr->mbuf_sz);
1028                         if (mp == NULL)
1029                                 goto update;
1030                 } else
1031                         mp = buf->m_pack;
1032
1033                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1034                 /* Get the memory mapping */
1035                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1036                     buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1037                 if (error != 0) {
1038                         printf("Refresh mbufs: payload dmamap load"
1039                             " failure - %d\n", error);
1040                         m_free(mp);
1041                         buf->m_pack = NULL;
1042                         goto update;
1043                 }
1044                 buf->m_pack = mp;
1045                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1046                     BUS_DMASYNC_PREREAD);
1047                 rxr->base[i].read.pkt_addr =
1048                    htole64(pseg[0].ds_addr);
1049                 /* Used only when doing header split */
1050                 rxr->base[i].read.hdr_addr = 0;
1051
1052                 refreshed = TRUE;
1053                 /* Next is precalculated */
1054                 i = j;
1055                 rxr->next_refresh = i;
1056                 if (++j == que->num_desc)
1057                         j = 0;
1058         }
1059 update:
1060         if (refreshed) /* Update hardware tail index */
1061                 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1062         return;
1063 }
1064
1065
1066 /*********************************************************************
1067  *
1068  *  Allocate memory for rx_buffer structures. Since we use one
1069  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1070  *  that we'll need is equal to the number of receive descriptors
1071  *  that we've defined.
1072  *
1073  **********************************************************************/
1074 int
1075 ixl_allocate_rx_data(struct ixl_queue *que)
1076 {
1077         struct rx_ring          *rxr = &que->rxr;
1078         struct ixl_vsi          *vsi = que->vsi;
1079         device_t                dev = vsi->dev;
1080         struct ixl_rx_buf       *buf;
1081         int                     i, bsize, error;
1082
1083         bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1084         if (!(rxr->buffers =
1085             (struct ixl_rx_buf *) malloc(bsize,
1086             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1087                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1088                 error = ENOMEM;
1089                 return (error);
1090         }
1091
1092         if ((error = bus_dma_tag_create(NULL,   /* parent */
1093                                    1, 0,        /* alignment, bounds */
1094                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1095                                    BUS_SPACE_MAXADDR,   /* highaddr */
1096                                    NULL, NULL,          /* filter, filterarg */
1097                                    MSIZE,               /* maxsize */
1098                                    1,                   /* nsegments */
1099                                    MSIZE,               /* maxsegsize */
1100                                    0,                   /* flags */
1101                                    NULL,                /* lockfunc */
1102                                    NULL,                /* lockfuncarg */
1103                                    &rxr->htag))) {
1104                 device_printf(dev, "Unable to create RX DMA htag\n");
1105                 return (error);
1106         }
1107
1108         if ((error = bus_dma_tag_create(NULL,   /* parent */
1109                                    1, 0,        /* alignment, bounds */
1110                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1111                                    BUS_SPACE_MAXADDR,   /* highaddr */
1112                                    NULL, NULL,          /* filter, filterarg */
1113                                    MJUM16BYTES,         /* maxsize */
1114                                    1,                   /* nsegments */
1115                                    MJUM16BYTES,         /* maxsegsize */
1116                                    0,                   /* flags */
1117                                    NULL,                /* lockfunc */
1118                                    NULL,                /* lockfuncarg */
1119                                    &rxr->ptag))) {
1120                 device_printf(dev, "Unable to create RX DMA ptag\n");
1121                 return (error);
1122         }
1123
1124         for (i = 0; i < que->num_desc; i++) {
1125                 buf = &rxr->buffers[i];
1126                 error = bus_dmamap_create(rxr->htag,
1127                     BUS_DMA_NOWAIT, &buf->hmap);
1128                 if (error) {
1129                         device_printf(dev, "Unable to create RX head map\n");
1130                         break;
1131                 }
1132                 error = bus_dmamap_create(rxr->ptag,
1133                     BUS_DMA_NOWAIT, &buf->pmap);
1134                 if (error) {
1135                         device_printf(dev, "Unable to create RX pkt map\n");
1136                         break;
1137                 }
1138         }
1139
1140         return (error);
1141 }
1142
1143
1144 /*********************************************************************
1145  *
1146  *  (Re)Initialize the queue receive ring and its buffers.
1147  *
1148  **********************************************************************/
1149 int
1150 ixl_init_rx_ring(struct ixl_queue *que)
1151 {
1152         struct  rx_ring         *rxr = &que->rxr;
1153         struct ixl_vsi          *vsi = que->vsi;
1154 #if defined(INET6) || defined(INET)
1155         struct ifnet            *ifp = vsi->ifp;
1156         struct lro_ctrl         *lro = &rxr->lro;
1157 #endif
1158         struct ixl_rx_buf       *buf;
1159         bus_dma_segment_t       pseg[1], hseg[1];
1160         int                     rsize, nsegs, error = 0;
1161 #ifdef DEV_NETMAP
1162         struct netmap_adapter *na = NA(que->vsi->ifp);
1163         struct netmap_slot *slot;
1164 #endif /* DEV_NETMAP */
1165
1166         IXL_RX_LOCK(rxr);
1167 #ifdef DEV_NETMAP
1168         /* same as in ixl_init_tx_ring() */
1169         slot = netmap_reset(na, NR_RX, que->me, 0);
1170 #endif /* DEV_NETMAP */
1171         /* Clear the ring contents */
1172         rsize = roundup2(que->num_desc *
1173             sizeof(union i40e_rx_desc), DBA_ALIGN);
1174         bzero((void *)rxr->base, rsize);
1175         /* Cleanup any existing buffers */
1176         for (int i = 0; i < que->num_desc; i++) {
1177                 buf = &rxr->buffers[i];
1178                 if (buf->m_head != NULL) {
1179                         bus_dmamap_sync(rxr->htag, buf->hmap,
1180                             BUS_DMASYNC_POSTREAD);
1181                         bus_dmamap_unload(rxr->htag, buf->hmap);
1182                         buf->m_head->m_flags |= M_PKTHDR;
1183                         m_freem(buf->m_head);
1184                 }
1185                 if (buf->m_pack != NULL) {
1186                         bus_dmamap_sync(rxr->ptag, buf->pmap,
1187                             BUS_DMASYNC_POSTREAD);
1188                         bus_dmamap_unload(rxr->ptag, buf->pmap);
1189                         buf->m_pack->m_flags |= M_PKTHDR;
1190                         m_freem(buf->m_pack);
1191                 }
1192                 buf->m_head = NULL;
1193                 buf->m_pack = NULL;
1194         }
1195
1196         /* header split is off */
1197         rxr->hdr_split = FALSE;
1198
1199         /* Now replenish the mbufs */
1200         for (int j = 0; j != que->num_desc; ++j) {
1201                 struct mbuf     *mh, *mp;
1202
1203                 buf = &rxr->buffers[j];
1204 #ifdef DEV_NETMAP
1205                 /*
1206                  * In netmap mode, fill the map and set the buffer
1207                  * address in the NIC ring, considering the offset
1208                  * between the netmap and NIC rings (see comment in
1209                  * ixgbe_setup_transmit_ring() ). No need to allocate
1210                  * an mbuf, so end the block with a continue;
1211                  */
1212                 if (slot) {
1213                         int sj = netmap_idx_n2k(&na->rx_rings[que->me], j);
1214                         uint64_t paddr;
1215                         void *addr;
1216
1217                         addr = PNMB(na, slot + sj, &paddr);
1218                         netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1219                         /* Update descriptor and the cached value */
1220                         rxr->base[j].read.pkt_addr = htole64(paddr);
1221                         rxr->base[j].read.hdr_addr = 0;
1222                         continue;
1223                 }
1224 #endif /* DEV_NETMAP */
1225                 /*
1226                 ** Don't allocate mbufs if not
1227                 ** doing header split, its wasteful
1228                 */ 
1229                 if (rxr->hdr_split == FALSE)
1230                         goto skip_head;
1231
1232                 /* First the header */
1233                 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1234                 if (buf->m_head == NULL) {
1235                         error = ENOBUFS;
1236                         goto fail;
1237                 }
1238                 m_adj(buf->m_head, ETHER_ALIGN);
1239                 mh = buf->m_head;
1240                 mh->m_len = mh->m_pkthdr.len = MHLEN;
1241                 mh->m_flags |= M_PKTHDR;
1242                 /* Get the memory mapping */
1243                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1244                     buf->hmap, buf->m_head, hseg,
1245                     &nsegs, BUS_DMA_NOWAIT);
1246                 if (error != 0) /* Nothing elegant to do here */
1247                         goto fail;
1248                 bus_dmamap_sync(rxr->htag,
1249                     buf->hmap, BUS_DMASYNC_PREREAD);
1250                 /* Update descriptor */
1251                 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1252
1253 skip_head:
1254                 /* Now the payload cluster */
1255                 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1256                     M_PKTHDR, rxr->mbuf_sz);
1257                 if (buf->m_pack == NULL) {
1258                         error = ENOBUFS;
1259                         goto fail;
1260                 }
1261                 mp = buf->m_pack;
1262                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1263                 /* Get the memory mapping */
1264                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1265                     buf->pmap, mp, pseg,
1266                     &nsegs, BUS_DMA_NOWAIT);
1267                 if (error != 0)
1268                         goto fail;
1269                 bus_dmamap_sync(rxr->ptag,
1270                     buf->pmap, BUS_DMASYNC_PREREAD);
1271                 /* Update descriptor */
1272                 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1273                 rxr->base[j].read.hdr_addr = 0;
1274         }
1275
1276
1277         /* Setup our descriptor indices */
1278         rxr->next_check = 0;
1279         rxr->next_refresh = 0;
1280         rxr->lro_enabled = FALSE;
1281         rxr->split = 0;
1282         rxr->bytes = 0;
1283         rxr->discard = FALSE;
1284
1285         wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1286         ixl_flush(vsi->hw);
1287
1288 #if defined(INET6) || defined(INET)
1289         /*
1290         ** Now set up the LRO interface:
1291         */
1292         if (ifp->if_capenable & IFCAP_LRO) {
1293                 int err = tcp_lro_init(lro);
1294                 if (err) {
1295                         if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1296                         goto fail;
1297                 }
1298                 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1299                 rxr->lro_enabled = TRUE;
1300                 lro->ifp = vsi->ifp;
1301         }
1302 #endif
1303
1304         bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1305             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1306
1307 fail:
1308         IXL_RX_UNLOCK(rxr);
1309         return (error);
1310 }
1311
1312
1313 /*********************************************************************
1314  *
1315  *  Free station receive ring data structures
1316  *
1317  **********************************************************************/
1318 void
1319 ixl_free_que_rx(struct ixl_queue *que)
1320 {
1321         struct rx_ring          *rxr = &que->rxr;
1322         struct ixl_rx_buf       *buf;
1323
1324         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1325
1326         /* Cleanup any existing buffers */
1327         if (rxr->buffers != NULL) {
1328                 for (int i = 0; i < que->num_desc; i++) {
1329                         buf = &rxr->buffers[i];
1330                         if (buf->m_head != NULL) {
1331                                 bus_dmamap_sync(rxr->htag, buf->hmap,
1332                                     BUS_DMASYNC_POSTREAD);
1333                                 bus_dmamap_unload(rxr->htag, buf->hmap);
1334                                 buf->m_head->m_flags |= M_PKTHDR;
1335                                 m_freem(buf->m_head);
1336                         }
1337                         if (buf->m_pack != NULL) {
1338                                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1339                                     BUS_DMASYNC_POSTREAD);
1340                                 bus_dmamap_unload(rxr->ptag, buf->pmap);
1341                                 buf->m_pack->m_flags |= M_PKTHDR;
1342                                 m_freem(buf->m_pack);
1343                         }
1344                         buf->m_head = NULL;
1345                         buf->m_pack = NULL;
1346                         if (buf->hmap != NULL) {
1347                                 bus_dmamap_destroy(rxr->htag, buf->hmap);
1348                                 buf->hmap = NULL;
1349                         }
1350                         if (buf->pmap != NULL) {
1351                                 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1352                                 buf->pmap = NULL;
1353                         }
1354                 }
1355                 if (rxr->buffers != NULL) {
1356                         free(rxr->buffers, M_DEVBUF);
1357                         rxr->buffers = NULL;
1358                 }
1359         }
1360
1361         if (rxr->htag != NULL) {
1362                 bus_dma_tag_destroy(rxr->htag);
1363                 rxr->htag = NULL;
1364         }
1365         if (rxr->ptag != NULL) {
1366                 bus_dma_tag_destroy(rxr->ptag);
1367                 rxr->ptag = NULL;
1368         }
1369
1370         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1371         return;
1372 }
1373
1374 static __inline void
1375 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1376 {
1377
1378 #if defined(INET6) || defined(INET)
1379         /*
1380          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1381          * should be computed by hardware. Also it should not have VLAN tag in
1382          * ethernet header.
1383          */
1384         if (rxr->lro_enabled &&
1385             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1386             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1387             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1388                 /*
1389                  * Send to the stack if:
1390                  **  - LRO not enabled, or
1391                  **  - no LRO resources, or
1392                  **  - lro enqueue fails
1393                  */
1394                 if (rxr->lro.lro_cnt != 0)
1395                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1396                                 return;
1397         }
1398 #endif
1399         IXL_RX_UNLOCK(rxr);
1400         (*ifp->if_input)(ifp, m);
1401         IXL_RX_LOCK(rxr);
1402 }
1403
1404
1405 static __inline void
1406 ixl_rx_discard(struct rx_ring *rxr, int i)
1407 {
1408         struct ixl_rx_buf       *rbuf;
1409
1410         rbuf = &rxr->buffers[i];
1411
1412         if (rbuf->fmp != NULL) {/* Partial chain ? */
1413                 rbuf->fmp->m_flags |= M_PKTHDR;
1414                 m_freem(rbuf->fmp);
1415                 rbuf->fmp = NULL;
1416         }
1417
1418         /*
1419         ** With advanced descriptors the writeback
1420         ** clobbers the buffer addrs, so its easier
1421         ** to just free the existing mbufs and take
1422         ** the normal refresh path to get new buffers
1423         ** and mapping.
1424         */
1425         if (rbuf->m_head) {
1426                 m_free(rbuf->m_head);
1427                 rbuf->m_head = NULL;
1428         }
1429  
1430         if (rbuf->m_pack) {
1431                 m_free(rbuf->m_pack);
1432                 rbuf->m_pack = NULL;
1433         }
1434
1435         return;
1436 }
1437
1438 #ifdef RSS
1439 /*
1440 ** i40e_ptype_to_hash: parse the packet type
1441 ** to determine the appropriate hash.
1442 */
1443 static inline int
1444 ixl_ptype_to_hash(u8 ptype)
1445 {
1446         struct i40e_rx_ptype_decoded    decoded;
1447         u8                              ex = 0;
1448
1449         decoded = decode_rx_desc_ptype(ptype);
1450         ex = decoded.outer_frag;
1451
1452         if (!decoded.known)
1453                 return M_HASHTYPE_OPAQUE;
1454
1455         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
1456                 return M_HASHTYPE_OPAQUE;
1457
1458         /* Note: anything that gets to this point is IP */
1459         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
1460                 switch (decoded.inner_prot) {
1461                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1462                                 if (ex)
1463                                         return M_HASHTYPE_RSS_TCP_IPV6_EX;
1464                                 else
1465                                         return M_HASHTYPE_RSS_TCP_IPV6;
1466                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1467                                 if (ex)
1468                                         return M_HASHTYPE_RSS_UDP_IPV6_EX;
1469                                 else
1470                                         return M_HASHTYPE_RSS_UDP_IPV6;
1471                         default:
1472                                 if (ex)
1473                                         return M_HASHTYPE_RSS_IPV6_EX;
1474                                 else
1475                                         return M_HASHTYPE_RSS_IPV6;
1476                 }
1477         }
1478         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
1479                 switch (decoded.inner_prot) {
1480                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1481                                         return M_HASHTYPE_RSS_TCP_IPV4;
1482                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1483                                 if (ex)
1484                                         return M_HASHTYPE_RSS_UDP_IPV4_EX;
1485                                 else
1486                                         return M_HASHTYPE_RSS_UDP_IPV4;
1487                         default:
1488                                         return M_HASHTYPE_RSS_IPV4;
1489                 }
1490         }
1491         /* We should never get here!! */
1492         return M_HASHTYPE_OPAQUE;
1493 }
1494 #endif /* RSS */
1495
1496 /*********************************************************************
1497  *
1498  *  This routine executes in interrupt context. It replenishes
1499  *  the mbufs in the descriptor and sends data which has been
1500  *  dma'ed into host memory to upper layer.
1501  *
1502  *  We loop at most count times if count is > 0, or until done if
1503  *  count < 0.
1504  *
1505  *  Return TRUE for more work, FALSE for all clean.
1506  *********************************************************************/
1507 bool
1508 ixl_rxeof(struct ixl_queue *que, int count)
1509 {
1510         struct ixl_vsi          *vsi = que->vsi;
1511         struct rx_ring          *rxr = &que->rxr;
1512         struct ifnet            *ifp = vsi->ifp;
1513 #if defined(INET6) || defined(INET)
1514         struct lro_ctrl         *lro = &rxr->lro;
1515         struct lro_entry        *queued;
1516 #endif
1517         int                     i, nextp, processed = 0;
1518         union i40e_rx_desc      *cur;
1519         struct ixl_rx_buf       *rbuf, *nbuf;
1520
1521
1522         IXL_RX_LOCK(rxr);
1523
1524 #ifdef DEV_NETMAP
1525         if (netmap_rx_irq(ifp, que->me, &count)) {
1526                 IXL_RX_UNLOCK(rxr);
1527                 return (FALSE);
1528         }
1529 #endif /* DEV_NETMAP */
1530
1531         for (i = rxr->next_check; count != 0;) {
1532                 struct mbuf     *sendmp, *mh, *mp;
1533                 u32             rsc, status, error;
1534                 u16             hlen, plen, vtag;
1535                 u64             qword;
1536                 u8              ptype;
1537                 bool            eop;
1538  
1539                 /* Sync the ring. */
1540                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1541                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1542
1543                 cur = &rxr->base[i];
1544                 qword = le64toh(cur->wb.qword1.status_error_len);
1545                 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1546                     >> I40E_RXD_QW1_STATUS_SHIFT;
1547                 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1548                     >> I40E_RXD_QW1_ERROR_SHIFT;
1549                 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1550                     >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1551                 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1552                     >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1553                 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1554                     >> I40E_RXD_QW1_PTYPE_SHIFT;
1555
1556                 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1557                         ++rxr->not_done;
1558                         break;
1559                 }
1560                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1561                         break;
1562
1563                 count--;
1564                 sendmp = NULL;
1565                 nbuf = NULL;
1566                 rsc = 0;
1567                 cur->wb.qword1.status_error_len = 0;
1568                 rbuf = &rxr->buffers[i];
1569                 mh = rbuf->m_head;
1570                 mp = rbuf->m_pack;
1571                 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1572                 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1573                         vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1574                 else
1575                         vtag = 0;
1576
1577                 /*
1578                 ** Make sure bad packets are discarded,
1579                 ** note that only EOP descriptor has valid
1580                 ** error results.
1581                 */
1582                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1583                         rxr->discarded++;
1584                         ixl_rx_discard(rxr, i);
1585                         goto next_desc;
1586                 }
1587
1588                 /* Prefetch the next buffer */
1589                 if (!eop) {
1590                         nextp = i + 1;
1591                         if (nextp == que->num_desc)
1592                                 nextp = 0;
1593                         nbuf = &rxr->buffers[nextp];
1594                         prefetch(nbuf);
1595                 }
1596
1597                 /*
1598                 ** The header mbuf is ONLY used when header 
1599                 ** split is enabled, otherwise we get normal 
1600                 ** behavior, ie, both header and payload
1601                 ** are DMA'd into the payload buffer.
1602                 **
1603                 ** Rather than using the fmp/lmp global pointers
1604                 ** we now keep the head of a packet chain in the
1605                 ** buffer struct and pass this along from one
1606                 ** descriptor to the next, until we get EOP.
1607                 */
1608                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1609                         if (hlen > IXL_RX_HDR)
1610                                 hlen = IXL_RX_HDR;
1611                         mh->m_len = hlen;
1612                         mh->m_flags |= M_PKTHDR;
1613                         mh->m_next = NULL;
1614                         mh->m_pkthdr.len = mh->m_len;
1615                         /* Null buf pointer so it is refreshed */
1616                         rbuf->m_head = NULL;
1617                         /*
1618                         ** Check the payload length, this
1619                         ** could be zero if its a small
1620                         ** packet.
1621                         */
1622                         if (plen > 0) {
1623                                 mp->m_len = plen;
1624                                 mp->m_next = NULL;
1625                                 mp->m_flags &= ~M_PKTHDR;
1626                                 mh->m_next = mp;
1627                                 mh->m_pkthdr.len += mp->m_len;
1628                                 /* Null buf pointer so it is refreshed */
1629                                 rbuf->m_pack = NULL;
1630                                 rxr->split++;
1631                         }
1632                         /*
1633                         ** Now create the forward
1634                         ** chain so when complete 
1635                         ** we wont have to.
1636                         */
1637                         if (eop == 0) {
1638                                 /* stash the chain head */
1639                                 nbuf->fmp = mh;
1640                                 /* Make forward chain */
1641                                 if (plen)
1642                                         mp->m_next = nbuf->m_pack;
1643                                 else
1644                                         mh->m_next = nbuf->m_pack;
1645                         } else {
1646                                 /* Singlet, prepare to send */
1647                                 sendmp = mh;
1648                                 if (vtag) {
1649                                         sendmp->m_pkthdr.ether_vtag = vtag;
1650                                         sendmp->m_flags |= M_VLANTAG;
1651                                 }
1652                         }
1653                 } else {
1654                         /*
1655                         ** Either no header split, or a
1656                         ** secondary piece of a fragmented
1657                         ** split packet.
1658                         */
1659                         mp->m_len = plen;
1660                         /*
1661                         ** See if there is a stored head
1662                         ** that determines what we are
1663                         */
1664                         sendmp = rbuf->fmp;
1665                         rbuf->m_pack = rbuf->fmp = NULL;
1666
1667                         if (sendmp != NULL) /* secondary frag */
1668                                 sendmp->m_pkthdr.len += mp->m_len;
1669                         else {
1670                                 /* first desc of a non-ps chain */
1671                                 sendmp = mp;
1672                                 sendmp->m_flags |= M_PKTHDR;
1673                                 sendmp->m_pkthdr.len = mp->m_len;
1674                                 if (vtag) {
1675                                         sendmp->m_pkthdr.ether_vtag = vtag;
1676                                         sendmp->m_flags |= M_VLANTAG;
1677                                 }
1678                         }
1679                         /* Pass the head pointer on */
1680                         if (eop == 0) {
1681                                 nbuf->fmp = sendmp;
1682                                 sendmp = NULL;
1683                                 mp->m_next = nbuf->m_pack;
1684                         }
1685                 }
1686                 ++processed;
1687                 /* Sending this frame? */
1688                 if (eop) {
1689                         sendmp->m_pkthdr.rcvif = ifp;
1690                         /* gather stats */
1691                         rxr->rx_packets++;
1692                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1693                         /* capture data for dynamic ITR adjustment */
1694                         rxr->packets++;
1695                         rxr->bytes += sendmp->m_pkthdr.len;
1696                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1697                                 ixl_rx_checksum(sendmp, status, error, ptype);
1698 #ifdef RSS
1699                         sendmp->m_pkthdr.flowid =
1700                             le32toh(cur->wb.qword0.hi_dword.rss);
1701                         M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1702 #else
1703                         sendmp->m_pkthdr.flowid = que->msix;
1704                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1705 #endif
1706                 }
1707 next_desc:
1708                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1709                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1710
1711                 /* Advance our pointers to the next descriptor. */
1712                 if (++i == que->num_desc)
1713                         i = 0;
1714
1715                 /* Now send to the stack or do LRO */
1716                 if (sendmp != NULL) {
1717                         rxr->next_check = i;
1718                         ixl_rx_input(rxr, ifp, sendmp, ptype);
1719                         i = rxr->next_check;
1720                 }
1721
1722                /* Every 8 descriptors we go to refresh mbufs */
1723                 if (processed == 8) {
1724                         ixl_refresh_mbufs(que, i);
1725                         processed = 0;
1726                 }
1727         }
1728
1729         /* Refresh any remaining buf structs */
1730         if (ixl_rx_unrefreshed(que))
1731                 ixl_refresh_mbufs(que, i);
1732
1733         rxr->next_check = i;
1734
1735 #if defined(INET6) || defined(INET)
1736         /*
1737          * Flush any outstanding LRO work
1738          */
1739         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1740                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1741                 tcp_lro_flush(lro, queued);
1742         }
1743 #endif
1744
1745         IXL_RX_UNLOCK(rxr);
1746         return (FALSE);
1747 }
1748
1749
1750 /*********************************************************************
1751  *
1752  *  Verify that the hardware indicated that the checksum is valid.
1753  *  Inform the stack about the status of checksum so that stack
1754  *  doesn't spend time verifying the checksum.
1755  *
1756  *********************************************************************/
1757 static void
1758 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1759 {
1760         struct i40e_rx_ptype_decoded decoded;
1761
1762         decoded = decode_rx_desc_ptype(ptype);
1763
1764         /* Errors? */
1765         if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1766             (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1767                 mp->m_pkthdr.csum_flags = 0;
1768                 return;
1769         }
1770
1771         /* IPv6 with extension headers likely have bad csum */
1772         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1773             decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1774                 if (status &
1775                     (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1776                         mp->m_pkthdr.csum_flags = 0;
1777                         return;
1778                 }
1779
1780  
1781         /* IP Checksum Good */
1782         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1783         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1784
1785         if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1786                 mp->m_pkthdr.csum_flags |= 
1787                     (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1788                 mp->m_pkthdr.csum_data |= htons(0xffff);
1789         }
1790         return;
1791 }
1792
1793 #if __FreeBSD_version >= 1100000
1794 uint64_t
1795 ixl_get_counter(if_t ifp, ift_counter cnt)
1796 {
1797         struct ixl_vsi *vsi;
1798
1799         vsi = if_getsoftc(ifp);
1800
1801         switch (cnt) {
1802         case IFCOUNTER_IPACKETS:
1803                 return (vsi->ipackets);
1804         case IFCOUNTER_IERRORS:
1805                 return (vsi->ierrors);
1806         case IFCOUNTER_OPACKETS:
1807                 return (vsi->opackets);
1808         case IFCOUNTER_OERRORS:
1809                 return (vsi->oerrors);
1810         case IFCOUNTER_COLLISIONS:
1811                 /* Collisions are by standard impossible in 40G/10G Ethernet */
1812                 return (0);
1813         case IFCOUNTER_IBYTES:
1814                 return (vsi->ibytes);
1815         case IFCOUNTER_OBYTES:
1816                 return (vsi->obytes);
1817         case IFCOUNTER_IMCASTS:
1818                 return (vsi->imcasts);
1819         case IFCOUNTER_OMCASTS:
1820                 return (vsi->omcasts);
1821         case IFCOUNTER_IQDROPS:
1822                 return (vsi->iqdrops);
1823         case IFCOUNTER_OQDROPS:
1824                 return (vsi->oqdrops);
1825         case IFCOUNTER_NOPROTO:
1826                 return (vsi->noproto);
1827         default:
1828                 return (if_get_counter_default(ifp, cnt));
1829         }
1830 }
1831 #endif
1832