]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixl/ixl_txrx.c
netmap: align codebase to the current upstream (commit id 3fb001303718146)
[FreeBSD/FreeBSD.git] / sys / dev / ixl / ixl_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2013-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 **      IXL driver TX/RX Routines:
37 **          This was seperated to allow usage by
38 **          both the PF and VF drivers.
39 */
40
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_rss.h"
45 #endif
46
47 #include "ixl.h"
48
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52
53 /* Local Prototypes */
54 static void     ixl_rx_checksum(struct mbuf *, u32, u32, u8);
55 static void     ixl_refresh_mbufs(struct ixl_queue *, int);
56 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
57 static int      ixl_tx_setup_offload(struct ixl_queue *,
58                     struct mbuf *, u32 *, u32 *);
59 static bool     ixl_tso_setup(struct ixl_queue *, struct mbuf *);
60
61 static inline void ixl_rx_discard(struct rx_ring *, int);
62 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
63                     struct mbuf *, u8);
64
65 static inline bool ixl_tso_detect_sparse(struct mbuf *mp);
66 static inline u32 ixl_get_tx_head(struct ixl_queue *que);
67
68 #ifdef DEV_NETMAP
69 #include <dev/netmap/if_ixl_netmap.h>
70 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1;
71 #endif /* DEV_NETMAP */
72
73 /*
74  * @key key is saved into this parameter
75  */
76 void
77 ixl_get_default_rss_key(u32 *key)
78 {
79         MPASS(key != NULL);
80
81         u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
82             0x183cfd8c, 0xce880440, 0x580cbc3c,
83             0x35897377, 0x328b25e1, 0x4fa98922,
84             0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
85             0x0, 0x0, 0x0};
86
87         bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
88 }
89
90 /*
91 ** Multiqueue Transmit driver
92 */
93 int
94 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
95 {
96         struct ixl_vsi          *vsi = ifp->if_softc;
97         struct ixl_queue        *que;
98         struct tx_ring          *txr;
99         int                     err, i;
100 #ifdef RSS
101         u32                     bucket_id;
102 #endif
103
104         /*
105         ** Which queue to use:
106         **
107         ** When doing RSS, map it to the same outbound
108         ** queue as the incoming flow would be mapped to.
109         ** If everything is setup correctly, it should be
110         ** the same bucket that the current CPU we're on is.
111         */
112         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
113 #ifdef  RSS
114                 if (rss_hash2bucket(m->m_pkthdr.flowid,
115                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
116                         i = bucket_id % vsi->num_queues;
117                 } else
118 #endif
119                         i = m->m_pkthdr.flowid % vsi->num_queues;
120         } else
121                 i = curcpu % vsi->num_queues;
122
123         que = &vsi->queues[i];
124         txr = &que->txr;
125
126         err = drbr_enqueue(ifp, txr->br, m);
127         if (err)
128                 return (err);
129         if (IXL_TX_TRYLOCK(txr)) {
130                 ixl_mq_start_locked(ifp, txr);
131                 IXL_TX_UNLOCK(txr);
132         } else
133                 taskqueue_enqueue(que->tq, &que->tx_task);
134
135         return (0);
136 }
137
138 int
139 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
140 {
141         struct ixl_queue        *que = txr->que;
142         struct ixl_vsi          *vsi = que->vsi;
143         struct mbuf             *next;
144         int                     err = 0;
145
146
147         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
148             vsi->link_active == 0)
149                 return (ENETDOWN);
150
151         /* Process the transmit queue */
152         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
153                 if ((err = ixl_xmit(que, &next)) != 0) {
154                         if (next == NULL)
155                                 drbr_advance(ifp, txr->br);
156                         else
157                                 drbr_putback(ifp, txr->br, next);
158                         break;
159                 }
160                 drbr_advance(ifp, txr->br);
161                 /* Send a copy of the frame to the BPF listener */
162                 ETHER_BPF_MTAP(ifp, next);
163                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
164                         break;
165         }
166
167         if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
168                 ixl_txeof(que);
169
170         return (err);
171 }
172
173 /*
174  * Called from a taskqueue to drain queued transmit packets.
175  */
176 void
177 ixl_deferred_mq_start(void *arg, int pending)
178 {
179         struct ixl_queue        *que = arg;
180         struct tx_ring          *txr = &que->txr;
181         struct ixl_vsi          *vsi = que->vsi;
182         struct ifnet            *ifp = vsi->ifp;
183         
184         IXL_TX_LOCK(txr);
185         if (!drbr_empty(ifp, txr->br))
186                 ixl_mq_start_locked(ifp, txr);
187         IXL_TX_UNLOCK(txr);
188 }
189
190 /*
191 ** Flush all queue ring buffers
192 */
193 void
194 ixl_qflush(struct ifnet *ifp)
195 {
196         struct ixl_vsi  *vsi = ifp->if_softc;
197
198         for (int i = 0; i < vsi->num_queues; i++) {
199                 struct ixl_queue *que = &vsi->queues[i];
200                 struct tx_ring  *txr = &que->txr;
201                 struct mbuf     *m;
202                 IXL_TX_LOCK(txr);
203                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
204                         m_freem(m);
205                 IXL_TX_UNLOCK(txr);
206         }
207         if_qflush(ifp);
208 }
209
210 /*
211 ** Find mbuf chains passed to the driver 
212 ** that are 'sparse', using more than 8
213 ** mbufs to deliver an mss-size chunk of data
214 */
215 static inline bool
216 ixl_tso_detect_sparse(struct mbuf *mp)
217 {
218         struct mbuf     *m;
219         int             num, mss;
220
221         num = 0;
222         mss = mp->m_pkthdr.tso_segsz;
223
224         /* Exclude first mbuf; assume it contains all headers */
225         for (m = mp->m_next; m != NULL; m = m->m_next) {
226                 if (m == NULL)
227                         break;
228                 num++;
229                 mss -= m->m_len % mp->m_pkthdr.tso_segsz;
230
231                 if (mss < 1) {
232                         if (num > IXL_SPARSE_CHAIN)
233                                 return (true);
234                         num = (mss == 0) ? 0 : 1;
235                         mss += mp->m_pkthdr.tso_segsz;
236                 }
237         }
238
239         return (false);
240 }
241
242
243 /*********************************************************************
244  *
245  *  This routine maps the mbufs to tx descriptors, allowing the
246  *  TX engine to transmit the packets. 
247  *      - return 0 on success, positive on failure
248  *
249  **********************************************************************/
250 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
251
252 static int
253 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
254 {
255         struct ixl_vsi          *vsi = que->vsi;
256         struct i40e_hw          *hw = vsi->hw;
257         struct tx_ring          *txr = &que->txr;
258         struct ixl_tx_buf       *buf;
259         struct i40e_tx_desc     *txd = NULL;
260         struct mbuf             *m_head, *m;
261         int                     i, j, error, nsegs;
262         int                     first, last = 0;
263         u16                     vtag = 0;
264         u32                     cmd, off;
265         bus_dmamap_t            map;
266         bus_dma_tag_t           tag;
267         bus_dma_segment_t       segs[IXL_MAX_TSO_SEGS];
268
269         cmd = off = 0;
270         m_head = *m_headp;
271
272         /*
273          * Important to capture the first descriptor
274          * used because it will contain the index of
275          * the one we tell the hardware to report back
276          */
277         first = txr->next_avail;
278         buf = &txr->buffers[first];
279         map = buf->map;
280         tag = txr->tx_tag;
281
282         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
283                 /* Use larger mapping for TSO */
284                 tag = txr->tso_tag;
285                 if (ixl_tso_detect_sparse(m_head)) {
286                         m = m_defrag(m_head, M_NOWAIT);
287                         if (m == NULL) {
288                                 m_freem(*m_headp);
289                                 *m_headp = NULL;
290                                 return (ENOBUFS);
291                         }
292                         *m_headp = m;
293                 }
294         }
295
296         /*
297          * Map the packet for DMA.
298          */
299         error = bus_dmamap_load_mbuf_sg(tag, map,
300             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
301
302         if (error == EFBIG) {
303                 struct mbuf *m;
304
305                 m = m_defrag(*m_headp, M_NOWAIT);
306                 if (m == NULL) {
307                         que->mbuf_defrag_failed++;
308                         m_freem(*m_headp);
309                         *m_headp = NULL;
310                         return (ENOBUFS);
311                 }
312                 *m_headp = m;
313
314                 /* Try it again */
315                 error = bus_dmamap_load_mbuf_sg(tag, map,
316                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
317
318                 if (error != 0) {
319                         que->tx_dmamap_failed++;
320                         m_freem(*m_headp);
321                         *m_headp = NULL;
322                         return (error);
323                 }
324         } else if (error != 0) {
325                 que->tx_dmamap_failed++;
326                 m_freem(*m_headp);
327                 *m_headp = NULL;
328                 return (error);
329         }
330
331         /* Make certain there are enough descriptors */
332         if (nsegs > txr->avail - 2) {
333                 txr->no_desc++;
334                 error = ENOBUFS;
335                 goto xmit_fail;
336         }
337         m_head = *m_headp;
338
339         /* Set up the TSO/CSUM offload */
340         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
341                 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
342                 if (error)
343                         goto xmit_fail;
344         }
345
346         cmd |= I40E_TX_DESC_CMD_ICRC;
347         /* Grab the VLAN tag */
348         if (m_head->m_flags & M_VLANTAG) {
349                 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
350                 vtag = htole16(m_head->m_pkthdr.ether_vtag);
351         }
352
353         i = txr->next_avail;
354         for (j = 0; j < nsegs; j++) {
355                 bus_size_t seglen;
356
357                 buf = &txr->buffers[i];
358                 buf->tag = tag; /* Keep track of the type tag */
359                 txd = &txr->base[i];
360                 seglen = segs[j].ds_len;
361
362                 txd->buffer_addr = htole64(segs[j].ds_addr);
363                 txd->cmd_type_offset_bsz =
364                     htole64(I40E_TX_DESC_DTYPE_DATA
365                     | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
366                     | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
367                     | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
368                     | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
369
370                 last = i; /* descriptor that will get completion IRQ */
371
372                 if (++i == que->num_desc)
373                         i = 0;
374
375                 buf->m_head = NULL;
376                 buf->eop_index = -1;
377         }
378         /* Set the last descriptor for report */
379         txd->cmd_type_offset_bsz |=
380             htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
381         txr->avail -= nsegs;
382         txr->next_avail = i;
383
384         buf->m_head = m_head;
385         /* Swap the dma map between the first and last descriptor */
386         txr->buffers[first].map = buf->map;
387         buf->map = map;
388         bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
389
390         /* Set the index of the descriptor that will be marked done */
391         buf = &txr->buffers[first];
392         buf->eop_index = last;
393
394         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
395             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
396         /*
397          * Advance the Transmit Descriptor Tail (Tdt), this tells the
398          * hardware that this frame is available to transmit.
399          */
400         ++txr->total_packets;
401         wr32(hw, txr->tail, i);
402
403         /* Mark outstanding work */
404         atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
405         return (0);
406
407 xmit_fail:
408         bus_dmamap_unload(tag, buf->map);
409         return (error);
410 }
411
412
413 /*********************************************************************
414  *
415  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
416  *  the information needed to transmit a packet on the wire. This is
417  *  called only once at attach, setup is done every reset.
418  *
419  **********************************************************************/
420 int
421 ixl_allocate_tx_data(struct ixl_queue *que)
422 {
423         struct tx_ring          *txr = &que->txr;
424         struct ixl_vsi          *vsi = que->vsi;
425         device_t                dev = vsi->dev;
426         struct ixl_tx_buf       *buf;
427         int                     error = 0;
428
429         /*
430          * Setup DMA descriptor areas.
431          */
432         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),           /* parent */
433                                1, 0,                    /* alignment, bounds */
434                                BUS_SPACE_MAXADDR,       /* lowaddr */
435                                BUS_SPACE_MAXADDR,       /* highaddr */
436                                NULL, NULL,              /* filter, filterarg */
437                                IXL_TSO_SIZE,            /* maxsize */
438                                IXL_MAX_TX_SEGS,         /* nsegments */
439                                PAGE_SIZE,               /* maxsegsize */
440                                0,                       /* flags */
441                                NULL,                    /* lockfunc */
442                                NULL,                    /* lockfuncarg */
443                                &txr->tx_tag))) {
444                 device_printf(dev,"Unable to allocate TX DMA tag\n");
445                 goto fail;
446         }
447
448         /* Make a special tag for TSO */
449         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),           /* parent */
450                                1, 0,                    /* alignment, bounds */
451                                BUS_SPACE_MAXADDR,       /* lowaddr */
452                                BUS_SPACE_MAXADDR,       /* highaddr */
453                                NULL, NULL,              /* filter, filterarg */
454                                IXL_TSO_SIZE,            /* maxsize */
455                                IXL_MAX_TSO_SEGS,        /* nsegments */
456                                PAGE_SIZE,               /* maxsegsize */
457                                0,                       /* flags */
458                                NULL,                    /* lockfunc */
459                                NULL,                    /* lockfuncarg */
460                                &txr->tso_tag))) {
461                 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
462                 goto fail;
463         }
464
465         if (!(txr->buffers =
466             (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
467             que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
468                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
469                 error = ENOMEM;
470                 goto fail;
471         }
472
473         /* Create the descriptor buffer default dma maps */
474         buf = txr->buffers;
475         for (int i = 0; i < que->num_desc; i++, buf++) {
476                 buf->tag = txr->tx_tag;
477                 error = bus_dmamap_create(buf->tag, 0, &buf->map);
478                 if (error != 0) {
479                         device_printf(dev, "Unable to create TX DMA map\n");
480                         goto fail;
481                 }
482         }
483 fail:
484         return (error);
485 }
486
487
488 /*********************************************************************
489  *
490  *  (Re)Initialize a queue transmit ring.
491  *      - called by init, it clears the descriptor ring,
492  *        and frees any stale mbufs 
493  *
494  **********************************************************************/
495 void
496 ixl_init_tx_ring(struct ixl_queue *que)
497 {
498 #ifdef DEV_NETMAP
499         struct netmap_adapter *na = NA(que->vsi->ifp);
500         struct netmap_slot *slot;
501 #endif /* DEV_NETMAP */
502         struct tx_ring          *txr = &que->txr;
503         struct ixl_tx_buf       *buf;
504
505         /* Clear the old ring contents */
506         IXL_TX_LOCK(txr);
507
508 #ifdef DEV_NETMAP
509         /*
510          * (under lock): if in netmap mode, do some consistency
511          * checks and set slot to entry 0 of the netmap ring.
512          */
513         slot = netmap_reset(na, NR_TX, que->me, 0);
514 #endif /* DEV_NETMAP */
515
516         bzero((void *)txr->base,
517               (sizeof(struct i40e_tx_desc)) * que->num_desc);
518
519         /* Reset indices */
520         txr->next_avail = 0;
521         txr->next_to_clean = 0;
522
523         /* Reset watchdog status */
524         txr->watchdog_timer = 0;
525
526 #ifdef IXL_FDIR
527         /* Initialize flow director */
528         txr->atr_rate = ixl_atr_rate;
529         txr->atr_count = 0;
530 #endif
531         /* Free any existing tx mbufs. */
532         buf = txr->buffers;
533         for (int i = 0; i < que->num_desc; i++, buf++) {
534                 if (buf->m_head != NULL) {
535                         bus_dmamap_sync(buf->tag, buf->map,
536                             BUS_DMASYNC_POSTWRITE);
537                         bus_dmamap_unload(buf->tag, buf->map);
538                         m_freem(buf->m_head);
539                         buf->m_head = NULL;
540                 }
541 #ifdef DEV_NETMAP
542                 /*
543                  * In netmap mode, set the map for the packet buffer.
544                  * NOTE: Some drivers (not this one) also need to set
545                  * the physical buffer address in the NIC ring.
546                  * netmap_idx_n2k() maps a nic index, i, into the corresponding
547                  * netmap slot index, si
548                  */
549                 if (slot) {
550                         int si = netmap_idx_n2k(na->tx_rings[que->me], i);
551                         netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
552                 }
553 #endif /* DEV_NETMAP */
554                 /* Clear the EOP index */
555                 buf->eop_index = -1;
556         }
557
558         /* Set number of descriptors available */
559         txr->avail = que->num_desc;
560
561         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
562             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
563         IXL_TX_UNLOCK(txr);
564 }
565
566
567 /*********************************************************************
568  *
569  *  Free transmit ring related data structures.
570  *
571  **********************************************************************/
572 void
573 ixl_free_que_tx(struct ixl_queue *que)
574 {
575         struct tx_ring *txr = &que->txr;
576         struct ixl_tx_buf *buf;
577
578         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
579
580         for (int i = 0; i < que->num_desc; i++) {
581                 buf = &txr->buffers[i];
582                 if (buf->m_head != NULL) {
583                         bus_dmamap_sync(buf->tag, buf->map,
584                             BUS_DMASYNC_POSTWRITE);
585                         bus_dmamap_unload(buf->tag,
586                             buf->map);
587                         m_freem(buf->m_head);
588                         buf->m_head = NULL;
589                         if (buf->map != NULL) {
590                                 bus_dmamap_destroy(buf->tag,
591                                     buf->map);
592                                 buf->map = NULL;
593                         }
594                 } else if (buf->map != NULL) {
595                         bus_dmamap_unload(buf->tag,
596                             buf->map);
597                         bus_dmamap_destroy(buf->tag,
598                             buf->map);
599                         buf->map = NULL;
600                 }
601         }
602         if (txr->br != NULL)
603                 buf_ring_free(txr->br, M_DEVBUF);
604         if (txr->buffers != NULL) {
605                 free(txr->buffers, M_DEVBUF);
606                 txr->buffers = NULL;
607         }
608         if (txr->tx_tag != NULL) {
609                 bus_dma_tag_destroy(txr->tx_tag);
610                 txr->tx_tag = NULL;
611         }
612         if (txr->tso_tag != NULL) {
613                 bus_dma_tag_destroy(txr->tso_tag);
614                 txr->tso_tag = NULL;
615         }
616
617         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
618         return;
619 }
620
621 /*********************************************************************
622  *
623  *  Setup descriptor for hw offloads 
624  *
625  **********************************************************************/
626
627 static int
628 ixl_tx_setup_offload(struct ixl_queue *que,
629     struct mbuf *mp, u32 *cmd, u32 *off)
630 {
631         struct ether_vlan_header        *eh;
632 #ifdef INET
633         struct ip                       *ip = NULL;
634 #endif
635         struct tcphdr                   *th = NULL;
636 #ifdef INET6
637         struct ip6_hdr                  *ip6;
638 #endif
639         int                             elen, ip_hlen = 0, tcp_hlen;
640         u16                             etype;
641         u8                              ipproto = 0;
642         bool                            tso = FALSE;
643
644         /* Set up the TSO context descriptor if required */
645         if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
646                 tso = ixl_tso_setup(que, mp);
647                 if (tso)
648                         ++que->tso;
649                 else
650                         return (ENXIO);
651         }
652
653         /*
654          * Determine where frame payload starts.
655          * Jump over vlan headers if already present,
656          * helpful for QinQ too.
657          */
658         eh = mtod(mp, struct ether_vlan_header *);
659         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
660                 etype = ntohs(eh->evl_proto);
661                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
662         } else {
663                 etype = ntohs(eh->evl_encap_proto);
664                 elen = ETHER_HDR_LEN;
665         }
666
667         switch (etype) {
668 #ifdef INET
669                 case ETHERTYPE_IP:
670                         ip = (struct ip *)(mp->m_data + elen);
671                         ip_hlen = ip->ip_hl << 2;
672                         ipproto = ip->ip_p;
673                         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
674                         /* The IP checksum must be recalculated with TSO */
675                         if (tso)
676                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
677                         else
678                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
679                         break;
680 #endif
681 #ifdef INET6
682                 case ETHERTYPE_IPV6:
683                         ip6 = (struct ip6_hdr *)(mp->m_data + elen);
684                         ip_hlen = sizeof(struct ip6_hdr);
685                         ipproto = ip6->ip6_nxt;
686                         th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
687                         *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
688                         break;
689 #endif
690                 default:
691                         break;
692         }
693
694         *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
695         *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
696
697         switch (ipproto) {
698                 case IPPROTO_TCP:
699                         tcp_hlen = th->th_off << 2;
700                         if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
701                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
702                                 *off |= (tcp_hlen >> 2) <<
703                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
704                         }
705 #ifdef IXL_FDIR
706                         ixl_atr(que, th, etype);
707 #endif
708                         break;
709                 case IPPROTO_UDP:
710                         if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
711                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
712                                 *off |= (sizeof(struct udphdr) >> 2) <<
713                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
714                         }
715                         break;
716
717                 case IPPROTO_SCTP:
718                         if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
719                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
720                                 *off |= (sizeof(struct sctphdr) >> 2) <<
721                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
722                         }
723                         /* Fall Thru */
724                 default:
725                         break;
726         }
727
728         return (0);
729 }
730
731
732 /**********************************************************************
733  *
734  *  Setup context for hardware segmentation offload (TSO)
735  *
736  **********************************************************************/
737 static bool
738 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
739 {
740         struct tx_ring                  *txr = &que->txr;
741         struct i40e_tx_context_desc     *TXD;
742         struct ixl_tx_buf               *buf;
743         u32                             cmd, mss, type, tsolen;
744         u16                             etype;
745         int                             idx, elen, ip_hlen, tcp_hlen;
746         struct ether_vlan_header        *eh;
747 #ifdef INET
748         struct ip                       *ip;
749 #endif
750 #ifdef INET6
751         struct ip6_hdr                  *ip6;
752 #endif
753 #if defined(INET6) || defined(INET)
754         struct tcphdr                   *th;
755 #endif
756         u64                             type_cmd_tso_mss;
757
758         /*
759          * Determine where frame payload starts.
760          * Jump over vlan headers if already present
761          */
762         eh = mtod(mp, struct ether_vlan_header *);
763         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
764                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
765                 etype = eh->evl_proto;
766         } else {
767                 elen = ETHER_HDR_LEN;
768                 etype = eh->evl_encap_proto;
769         }
770
771         switch (ntohs(etype)) {
772 #ifdef INET6
773         case ETHERTYPE_IPV6:
774                 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
775                 if (ip6->ip6_nxt != IPPROTO_TCP)
776                         return (ENXIO);
777                 ip_hlen = sizeof(struct ip6_hdr);
778                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
779                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
780                 tcp_hlen = th->th_off << 2;
781                 /*
782                  * The corresponding flag is set by the stack in the IPv4
783                  * TSO case, but not in IPv6 (at least in FreeBSD 10.2).
784                  * So, set it here because the rest of the flow requires it.
785                  */
786                 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
787                 break;
788 #endif
789 #ifdef INET
790         case ETHERTYPE_IP:
791                 ip = (struct ip *)(mp->m_data + elen);
792                 if (ip->ip_p != IPPROTO_TCP)
793                         return (ENXIO);
794                 ip->ip_sum = 0;
795                 ip_hlen = ip->ip_hl << 2;
796                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
797                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
798                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
799                 tcp_hlen = th->th_off << 2;
800                 break;
801 #endif
802         default:
803                 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
804                     __func__, ntohs(etype));
805                 return FALSE;
806         }
807
808         /* Ensure we have at least the IP+TCP header in the first mbuf. */
809         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
810                 return FALSE;
811
812         idx = txr->next_avail;
813         buf = &txr->buffers[idx];
814         TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
815         tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
816
817         type = I40E_TX_DESC_DTYPE_CONTEXT;
818         cmd = I40E_TX_CTX_DESC_TSO;
819         /* TSO MSS must not be less than 64 */
820         if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) {
821                 que->mss_too_small++;
822                 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS;
823         }
824         mss = mp->m_pkthdr.tso_segsz;
825
826         type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
827             ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
828             ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
829             ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
830         TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
831
832         TXD->tunneling_params = htole32(0);
833         buf->m_head = NULL;
834         buf->eop_index = -1;
835
836         if (++idx == que->num_desc)
837                 idx = 0;
838
839         txr->avail--;
840         txr->next_avail = idx;
841
842         return TRUE;
843 }
844
845 /*             
846 ** ixl_get_tx_head - Retrieve the value from the 
847 **    location the HW records its HEAD index
848 */
849 static inline u32
850 ixl_get_tx_head(struct ixl_queue *que)
851 {
852         struct tx_ring  *txr = &que->txr;
853         void *head = &txr->base[que->num_desc];
854         return LE32_TO_CPU(*(volatile __le32 *)head);
855 }
856
857 /**********************************************************************
858  *
859  *  Examine each tx_buffer in the used queue. If the hardware is done
860  *  processing the packet then free associated resources. The
861  *  tx_buffer is put back on the free queue.
862  *
863  **********************************************************************/
864 bool
865 ixl_txeof(struct ixl_queue *que)
866 {
867         struct tx_ring          *txr = &que->txr;
868         u32                     first, last, head, done, processed;
869         struct ixl_tx_buf       *buf;
870         struct i40e_tx_desc     *tx_desc, *eop_desc;
871
872
873         mtx_assert(&txr->mtx, MA_OWNED);
874
875 #ifdef DEV_NETMAP
876         // XXX todo: implement moderation
877         if (netmap_tx_irq(que->vsi->ifp, que->me))
878                 return FALSE;
879 #endif /* DEF_NETMAP */
880
881         /* These are not the descriptors you seek, move along :) */
882         if (txr->avail == que->num_desc) {
883                 atomic_store_rel_32(&txr->watchdog_timer, 0);
884                 return FALSE;
885         }
886
887         processed = 0;
888         first = txr->next_to_clean;
889         buf = &txr->buffers[first];
890         tx_desc = (struct i40e_tx_desc *)&txr->base[first];
891         last = buf->eop_index;
892         if (last == -1)
893                 return FALSE;
894         eop_desc = (struct i40e_tx_desc *)&txr->base[last];
895
896         /* Get the Head WB value */
897         head = ixl_get_tx_head(que);
898
899         /*
900         ** Get the index of the first descriptor
901         ** BEYOND the EOP and call that 'done'.
902         ** I do this so the comparison in the
903         ** inner while loop below can be simple
904         */
905         if (++last == que->num_desc) last = 0;
906         done = last;
907
908         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
909             BUS_DMASYNC_POSTREAD);
910         /*
911         ** The HEAD index of the ring is written in a 
912         ** defined location, this rather than a done bit
913         ** is what is used to keep track of what must be
914         ** 'cleaned'.
915         */
916         while (first != head) {
917                 /* We clean the range of the packet */
918                 while (first != done) {
919                         ++txr->avail;
920                         ++processed;
921
922                         if (buf->m_head) {
923                                 txr->bytes += /* for ITR adjustment */
924                                     buf->m_head->m_pkthdr.len;
925                                 txr->tx_bytes += /* for TX stats */
926                                     buf->m_head->m_pkthdr.len;
927                                 bus_dmamap_sync(buf->tag,
928                                     buf->map,
929                                     BUS_DMASYNC_POSTWRITE);
930                                 bus_dmamap_unload(buf->tag,
931                                     buf->map);
932                                 m_freem(buf->m_head);
933                                 buf->m_head = NULL;
934                         }
935                         buf->eop_index = -1;
936
937                         if (++first == que->num_desc)
938                                 first = 0;
939
940                         buf = &txr->buffers[first];
941                         tx_desc = &txr->base[first];
942                 }
943                 ++txr->packets;
944                 /* See if there is more work now */
945                 last = buf->eop_index;
946                 if (last != -1) {
947                         eop_desc = &txr->base[last];
948                         /* Get next done point */
949                         if (++last == que->num_desc) last = 0;
950                         done = last;
951                 } else
952                         break;
953         }
954         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
955             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
956
957         txr->next_to_clean = first;
958
959
960         /*
961          * If there are no pending descriptors, clear the timeout.
962          */
963         if (txr->avail == que->num_desc) {
964                 atomic_store_rel_32(&txr->watchdog_timer, 0);
965                 return FALSE;
966         }
967
968         return TRUE;
969 }
970
971 /*********************************************************************
972  *
973  *  Refresh mbuf buffers for RX descriptor rings
974  *   - now keeps its own state so discards due to resource
975  *     exhaustion are unnecessary, if an mbuf cannot be obtained
976  *     it just returns, keeping its placeholder, thus it can simply
977  *     be recalled to try again.
978  *
979  **********************************************************************/
980 static void
981 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
982 {
983         struct ixl_vsi          *vsi = que->vsi;
984         struct rx_ring          *rxr = &que->rxr;
985         bus_dma_segment_t       hseg[1];
986         bus_dma_segment_t       pseg[1];
987         struct ixl_rx_buf       *buf;
988         struct mbuf             *mh, *mp;
989         int                     i, j, nsegs, error;
990         bool                    refreshed = FALSE;
991
992         i = j = rxr->next_refresh;
993         /* Control the loop with one beyond */
994         if (++j == que->num_desc)
995                 j = 0;
996
997         while (j != limit) {
998                 buf = &rxr->buffers[i];
999                 if (rxr->hdr_split == FALSE)
1000                         goto no_split;
1001
1002                 if (buf->m_head == NULL) {
1003                         mh = m_gethdr(M_NOWAIT, MT_DATA);
1004                         if (mh == NULL)
1005                                 goto update;
1006                 } else
1007                         mh = buf->m_head;
1008
1009                 mh->m_pkthdr.len = mh->m_len = MHLEN;
1010                 mh->m_len = MHLEN;
1011                 mh->m_flags |= M_PKTHDR;
1012                 /* Get the memory mapping */
1013                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1014                     buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1015                 if (error != 0) {
1016                         printf("Refresh mbufs: hdr dmamap load"
1017                             " failure - %d\n", error);
1018                         m_free(mh);
1019                         buf->m_head = NULL;
1020                         goto update;
1021                 }
1022                 buf->m_head = mh;
1023                 bus_dmamap_sync(rxr->htag, buf->hmap,
1024                     BUS_DMASYNC_PREREAD);
1025                 rxr->base[i].read.hdr_addr =
1026                    htole64(hseg[0].ds_addr);
1027
1028 no_split:
1029                 if (buf->m_pack == NULL) {
1030                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1031                             M_PKTHDR, rxr->mbuf_sz);
1032                         if (mp == NULL)
1033                                 goto update;
1034                 } else
1035                         mp = buf->m_pack;
1036
1037                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1038                 /* Get the memory mapping */
1039                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1040                     buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1041                 if (error != 0) {
1042                         printf("Refresh mbufs: payload dmamap load"
1043                             " failure - %d\n", error);
1044                         m_free(mp);
1045                         buf->m_pack = NULL;
1046                         goto update;
1047                 }
1048                 buf->m_pack = mp;
1049                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1050                     BUS_DMASYNC_PREREAD);
1051                 rxr->base[i].read.pkt_addr =
1052                    htole64(pseg[0].ds_addr);
1053                 /* Used only when doing header split */
1054                 rxr->base[i].read.hdr_addr = 0;
1055
1056                 refreshed = TRUE;
1057                 /* Next is precalculated */
1058                 i = j;
1059                 rxr->next_refresh = i;
1060                 if (++j == que->num_desc)
1061                         j = 0;
1062         }
1063 update:
1064         if (refreshed) /* Update hardware tail index */
1065                 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1066         return;
1067 }
1068
1069
1070 /*********************************************************************
1071  *
1072  *  Allocate memory for rx_buffer structures. Since we use one
1073  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1074  *  that we'll need is equal to the number of receive descriptors
1075  *  that we've defined.
1076  *
1077  **********************************************************************/
1078 int
1079 ixl_allocate_rx_data(struct ixl_queue *que)
1080 {
1081         struct rx_ring          *rxr = &que->rxr;
1082         struct ixl_vsi          *vsi = que->vsi;
1083         device_t                dev = vsi->dev;
1084         struct ixl_rx_buf       *buf;
1085         int                     i, bsize, error;
1086
1087         bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1088         if (!(rxr->buffers =
1089             (struct ixl_rx_buf *) malloc(bsize,
1090             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1091                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1092                 error = ENOMEM;
1093                 return (error);
1094         }
1095
1096         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1097                                    1, 0,        /* alignment, bounds */
1098                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1099                                    BUS_SPACE_MAXADDR,   /* highaddr */
1100                                    NULL, NULL,          /* filter, filterarg */
1101                                    MSIZE,               /* maxsize */
1102                                    1,                   /* nsegments */
1103                                    MSIZE,               /* maxsegsize */
1104                                    0,                   /* flags */
1105                                    NULL,                /* lockfunc */
1106                                    NULL,                /* lockfuncarg */
1107                                    &rxr->htag))) {
1108                 device_printf(dev, "Unable to create RX DMA htag\n");
1109                 return (error);
1110         }
1111
1112         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1113                                    1, 0,        /* alignment, bounds */
1114                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1115                                    BUS_SPACE_MAXADDR,   /* highaddr */
1116                                    NULL, NULL,          /* filter, filterarg */
1117                                    MJUM16BYTES,         /* maxsize */
1118                                    1,                   /* nsegments */
1119                                    MJUM16BYTES,         /* maxsegsize */
1120                                    0,                   /* flags */
1121                                    NULL,                /* lockfunc */
1122                                    NULL,                /* lockfuncarg */
1123                                    &rxr->ptag))) {
1124                 device_printf(dev, "Unable to create RX DMA ptag\n");
1125                 return (error);
1126         }
1127
1128         for (i = 0; i < que->num_desc; i++) {
1129                 buf = &rxr->buffers[i];
1130                 error = bus_dmamap_create(rxr->htag,
1131                     BUS_DMA_NOWAIT, &buf->hmap);
1132                 if (error) {
1133                         device_printf(dev, "Unable to create RX head map\n");
1134                         break;
1135                 }
1136                 error = bus_dmamap_create(rxr->ptag,
1137                     BUS_DMA_NOWAIT, &buf->pmap);
1138                 if (error) {
1139                         device_printf(dev, "Unable to create RX pkt map\n");
1140                         break;
1141                 }
1142         }
1143
1144         return (error);
1145 }
1146
1147
1148 /*********************************************************************
1149  *
1150  *  (Re)Initialize the queue receive ring and its buffers.
1151  *
1152  **********************************************************************/
1153 int
1154 ixl_init_rx_ring(struct ixl_queue *que)
1155 {
1156         struct  rx_ring         *rxr = &que->rxr;
1157         struct ixl_vsi          *vsi = que->vsi;
1158 #if defined(INET6) || defined(INET)
1159         struct ifnet            *ifp = vsi->ifp;
1160         struct lro_ctrl         *lro = &rxr->lro;
1161 #endif
1162         struct ixl_rx_buf       *buf;
1163         bus_dma_segment_t       pseg[1], hseg[1];
1164         int                     rsize, nsegs, error = 0;
1165 #ifdef DEV_NETMAP
1166         struct netmap_adapter *na = NA(que->vsi->ifp);
1167         struct netmap_slot *slot;
1168 #endif /* DEV_NETMAP */
1169
1170         IXL_RX_LOCK(rxr);
1171 #ifdef DEV_NETMAP
1172         /* same as in ixl_init_tx_ring() */
1173         slot = netmap_reset(na, NR_RX, que->me, 0);
1174 #endif /* DEV_NETMAP */
1175         /* Clear the ring contents */
1176         rsize = roundup2(que->num_desc *
1177             sizeof(union i40e_rx_desc), DBA_ALIGN);
1178         bzero((void *)rxr->base, rsize);
1179         /* Cleanup any existing buffers */
1180         for (int i = 0; i < que->num_desc; i++) {
1181                 buf = &rxr->buffers[i];
1182                 if (buf->m_head != NULL) {
1183                         bus_dmamap_sync(rxr->htag, buf->hmap,
1184                             BUS_DMASYNC_POSTREAD);
1185                         bus_dmamap_unload(rxr->htag, buf->hmap);
1186                         buf->m_head->m_flags |= M_PKTHDR;
1187                         m_freem(buf->m_head);
1188                 }
1189                 if (buf->m_pack != NULL) {
1190                         bus_dmamap_sync(rxr->ptag, buf->pmap,
1191                             BUS_DMASYNC_POSTREAD);
1192                         bus_dmamap_unload(rxr->ptag, buf->pmap);
1193                         buf->m_pack->m_flags |= M_PKTHDR;
1194                         m_freem(buf->m_pack);
1195                 }
1196                 buf->m_head = NULL;
1197                 buf->m_pack = NULL;
1198         }
1199
1200         /* header split is off */
1201         rxr->hdr_split = FALSE;
1202
1203         /* Now replenish the mbufs */
1204         for (int j = 0; j != que->num_desc; ++j) {
1205                 struct mbuf     *mh, *mp;
1206
1207                 buf = &rxr->buffers[j];
1208 #ifdef DEV_NETMAP
1209                 /*
1210                  * In netmap mode, fill the map and set the buffer
1211                  * address in the NIC ring, considering the offset
1212                  * between the netmap and NIC rings (see comment in
1213                  * ixgbe_setup_transmit_ring() ). No need to allocate
1214                  * an mbuf, so end the block with a continue;
1215                  */
1216                 if (slot) {
1217                         int sj = netmap_idx_n2k(na->rx_rings[que->me], j);
1218                         uint64_t paddr;
1219                         void *addr;
1220
1221                         addr = PNMB(na, slot + sj, &paddr);
1222                         netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1223                         /* Update descriptor and the cached value */
1224                         rxr->base[j].read.pkt_addr = htole64(paddr);
1225                         rxr->base[j].read.hdr_addr = 0;
1226                         continue;
1227                 }
1228 #endif /* DEV_NETMAP */
1229                 /*
1230                 ** Don't allocate mbufs if not
1231                 ** doing header split, its wasteful
1232                 */ 
1233                 if (rxr->hdr_split == FALSE)
1234                         goto skip_head;
1235
1236                 /* First the header */
1237                 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1238                 if (buf->m_head == NULL) {
1239                         error = ENOBUFS;
1240                         goto fail;
1241                 }
1242                 m_adj(buf->m_head, ETHER_ALIGN);
1243                 mh = buf->m_head;
1244                 mh->m_len = mh->m_pkthdr.len = MHLEN;
1245                 mh->m_flags |= M_PKTHDR;
1246                 /* Get the memory mapping */
1247                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1248                     buf->hmap, buf->m_head, hseg,
1249                     &nsegs, BUS_DMA_NOWAIT);
1250                 if (error != 0) /* Nothing elegant to do here */
1251                         goto fail;
1252                 bus_dmamap_sync(rxr->htag,
1253                     buf->hmap, BUS_DMASYNC_PREREAD);
1254                 /* Update descriptor */
1255                 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1256
1257 skip_head:
1258                 /* Now the payload cluster */
1259                 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1260                     M_PKTHDR, rxr->mbuf_sz);
1261                 if (buf->m_pack == NULL) {
1262                         error = ENOBUFS;
1263                         goto fail;
1264                 }
1265                 mp = buf->m_pack;
1266                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1267                 /* Get the memory mapping */
1268                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1269                     buf->pmap, mp, pseg,
1270                     &nsegs, BUS_DMA_NOWAIT);
1271                 if (error != 0)
1272                         goto fail;
1273                 bus_dmamap_sync(rxr->ptag,
1274                     buf->pmap, BUS_DMASYNC_PREREAD);
1275                 /* Update descriptor */
1276                 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1277                 rxr->base[j].read.hdr_addr = 0;
1278         }
1279
1280
1281         /* Setup our descriptor indices */
1282         rxr->next_check = 0;
1283         rxr->next_refresh = 0;
1284         rxr->lro_enabled = FALSE;
1285         rxr->split = 0;
1286         rxr->bytes = 0;
1287         rxr->discard = FALSE;
1288
1289         wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1290         ixl_flush(vsi->hw);
1291
1292 #if defined(INET6) || defined(INET)
1293         /*
1294         ** Now set up the LRO interface:
1295         */
1296         if (ifp->if_capenable & IFCAP_LRO) {
1297                 int err = tcp_lro_init(lro);
1298                 if (err) {
1299                         if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1300                         goto fail;
1301                 }
1302                 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1303                 rxr->lro_enabled = TRUE;
1304                 lro->ifp = vsi->ifp;
1305         }
1306 #endif
1307
1308         bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1309             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1310
1311 fail:
1312         IXL_RX_UNLOCK(rxr);
1313         return (error);
1314 }
1315
1316
1317 /*********************************************************************
1318  *
1319  *  Free station receive ring data structures
1320  *
1321  **********************************************************************/
1322 void
1323 ixl_free_que_rx(struct ixl_queue *que)
1324 {
1325         struct rx_ring          *rxr = &que->rxr;
1326         struct ixl_rx_buf       *buf;
1327
1328         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1329
1330         /* Cleanup any existing buffers */
1331         if (rxr->buffers != NULL) {
1332                 for (int i = 0; i < que->num_desc; i++) {
1333                         buf = &rxr->buffers[i];
1334                         if (buf->m_head != NULL) {
1335                                 bus_dmamap_sync(rxr->htag, buf->hmap,
1336                                     BUS_DMASYNC_POSTREAD);
1337                                 bus_dmamap_unload(rxr->htag, buf->hmap);
1338                                 buf->m_head->m_flags |= M_PKTHDR;
1339                                 m_freem(buf->m_head);
1340                         }
1341                         if (buf->m_pack != NULL) {
1342                                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1343                                     BUS_DMASYNC_POSTREAD);
1344                                 bus_dmamap_unload(rxr->ptag, buf->pmap);
1345                                 buf->m_pack->m_flags |= M_PKTHDR;
1346                                 m_freem(buf->m_pack);
1347                         }
1348                         buf->m_head = NULL;
1349                         buf->m_pack = NULL;
1350                         if (buf->hmap != NULL) {
1351                                 bus_dmamap_destroy(rxr->htag, buf->hmap);
1352                                 buf->hmap = NULL;
1353                         }
1354                         if (buf->pmap != NULL) {
1355                                 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1356                                 buf->pmap = NULL;
1357                         }
1358                 }
1359                 if (rxr->buffers != NULL) {
1360                         free(rxr->buffers, M_DEVBUF);
1361                         rxr->buffers = NULL;
1362                 }
1363         }
1364
1365         if (rxr->htag != NULL) {
1366                 bus_dma_tag_destroy(rxr->htag);
1367                 rxr->htag = NULL;
1368         }
1369         if (rxr->ptag != NULL) {
1370                 bus_dma_tag_destroy(rxr->ptag);
1371                 rxr->ptag = NULL;
1372         }
1373
1374         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1375         return;
1376 }
1377
1378 static inline void
1379 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1380 {
1381
1382 #if defined(INET6) || defined(INET)
1383         /*
1384          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1385          * should be computed by hardware. Also it should not have VLAN tag in
1386          * ethernet header.
1387          */
1388         if (rxr->lro_enabled &&
1389             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1390             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1391             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1392                 /*
1393                  * Send to the stack if:
1394                  **  - LRO not enabled, or
1395                  **  - no LRO resources, or
1396                  **  - lro enqueue fails
1397                  */
1398                 if (rxr->lro.lro_cnt != 0)
1399                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1400                                 return;
1401         }
1402 #endif
1403         (*ifp->if_input)(ifp, m);
1404 }
1405
1406
1407 static inline void
1408 ixl_rx_discard(struct rx_ring *rxr, int i)
1409 {
1410         struct ixl_rx_buf       *rbuf;
1411
1412         rbuf = &rxr->buffers[i];
1413
1414         if (rbuf->fmp != NULL) {/* Partial chain ? */
1415                 rbuf->fmp->m_flags |= M_PKTHDR;
1416                 m_freem(rbuf->fmp);
1417                 rbuf->fmp = NULL;
1418         }
1419
1420         /*
1421         ** With advanced descriptors the writeback
1422         ** clobbers the buffer addrs, so its easier
1423         ** to just free the existing mbufs and take
1424         ** the normal refresh path to get new buffers
1425         ** and mapping.
1426         */
1427         if (rbuf->m_head) {
1428                 m_free(rbuf->m_head);
1429                 rbuf->m_head = NULL;
1430         }
1431  
1432         if (rbuf->m_pack) {
1433                 m_free(rbuf->m_pack);
1434                 rbuf->m_pack = NULL;
1435         }
1436
1437         return;
1438 }
1439
1440 #ifdef RSS
1441 /*
1442 ** i40e_ptype_to_hash: parse the packet type
1443 ** to determine the appropriate hash.
1444 */
1445 static inline int
1446 ixl_ptype_to_hash(u8 ptype)
1447 {
1448         struct i40e_rx_ptype_decoded    decoded;
1449
1450         decoded = decode_rx_desc_ptype(ptype);
1451
1452         if (!decoded.known)
1453                 return M_HASHTYPE_OPAQUE_HASH;
1454
1455         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
1456                 return M_HASHTYPE_OPAQUE_HASH;
1457
1458         /* Note: anything that gets to this point is IP */
1459         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
1460                 switch (decoded.inner_prot) {
1461                 case I40E_RX_PTYPE_INNER_PROT_TCP:
1462                         return M_HASHTYPE_RSS_TCP_IPV6;
1463                 case I40E_RX_PTYPE_INNER_PROT_UDP:
1464                         return M_HASHTYPE_RSS_UDP_IPV6;
1465                 default:
1466                         return M_HASHTYPE_RSS_IPV6;
1467                 }
1468         }
1469         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
1470                 switch (decoded.inner_prot) {
1471                 case I40E_RX_PTYPE_INNER_PROT_TCP:
1472                         return M_HASHTYPE_RSS_TCP_IPV4;
1473                 case I40E_RX_PTYPE_INNER_PROT_UDP:
1474                         return M_HASHTYPE_RSS_UDP_IPV4;
1475                 default:
1476                         return M_HASHTYPE_RSS_IPV4;
1477                 }
1478         }
1479         /* We should never get here!! */
1480         return M_HASHTYPE_OPAQUE_HASH;
1481 }
1482 #endif /* RSS */
1483
1484 /*********************************************************************
1485  *
1486  *  This routine executes in interrupt context. It replenishes
1487  *  the mbufs in the descriptor and sends data which has been
1488  *  dma'ed into host memory to upper layer.
1489  *
1490  *  We loop at most count times if count is > 0, or until done if
1491  *  count < 0.
1492  *
1493  *  Return TRUE for more work, FALSE for all clean.
1494  *********************************************************************/
1495 bool
1496 ixl_rxeof(struct ixl_queue *que, int count)
1497 {
1498         struct ixl_vsi          *vsi = que->vsi;
1499         struct rx_ring          *rxr = &que->rxr;
1500         struct ifnet            *ifp = vsi->ifp;
1501 #if defined(INET6) || defined(INET)
1502         struct lro_ctrl         *lro = &rxr->lro;
1503 #endif
1504         int                     i, nextp, processed = 0;
1505         union i40e_rx_desc      *cur;
1506         struct ixl_rx_buf       *rbuf, *nbuf;
1507
1508
1509         IXL_RX_LOCK(rxr);
1510
1511 #ifdef DEV_NETMAP
1512         if (netmap_rx_irq(ifp, que->me, &count)) {
1513                 IXL_RX_UNLOCK(rxr);
1514                 return (FALSE);
1515         }
1516 #endif /* DEV_NETMAP */
1517
1518         for (i = rxr->next_check; count != 0;) {
1519                 struct mbuf     *sendmp, *mh, *mp;
1520                 u32             status, error;
1521                 u16             hlen, plen, vtag;
1522                 u64             qword;
1523                 u8              ptype;
1524                 bool            eop;
1525  
1526                 /* Sync the ring. */
1527                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1528                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1529
1530                 cur = &rxr->base[i];
1531                 qword = le64toh(cur->wb.qword1.status_error_len);
1532                 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1533                     >> I40E_RXD_QW1_STATUS_SHIFT;
1534                 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1535                     >> I40E_RXD_QW1_ERROR_SHIFT;
1536                 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1537                     >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1538                 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1539                     >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1540                 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1541                     >> I40E_RXD_QW1_PTYPE_SHIFT;
1542
1543                 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1544                         ++rxr->not_done;
1545                         break;
1546                 }
1547                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1548                         break;
1549
1550                 count--;
1551                 sendmp = NULL;
1552                 nbuf = NULL;
1553                 cur->wb.qword1.status_error_len = 0;
1554                 rbuf = &rxr->buffers[i];
1555                 mh = rbuf->m_head;
1556                 mp = rbuf->m_pack;
1557                 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1558                 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1559                         vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1560                 else
1561                         vtag = 0;
1562
1563                 /* Remove device access to the rx buffers. */
1564                 if (rbuf->m_head != NULL) {
1565                         bus_dmamap_sync(rxr->htag, rbuf->hmap,
1566                             BUS_DMASYNC_POSTREAD);
1567                         bus_dmamap_unload(rxr->htag, rbuf->hmap);
1568                 }
1569                 if (rbuf->m_pack != NULL) {
1570                         bus_dmamap_sync(rxr->ptag, rbuf->pmap,
1571                             BUS_DMASYNC_POSTREAD);
1572                         bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1573                 }
1574
1575                 /*
1576                 ** Make sure bad packets are discarded,
1577                 ** note that only EOP descriptor has valid
1578                 ** error results.
1579                 */
1580                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1581                         rxr->desc_errs++;
1582                         ixl_rx_discard(rxr, i);
1583                         goto next_desc;
1584                 }
1585
1586                 /* Prefetch the next buffer */
1587                 if (!eop) {
1588                         nextp = i + 1;
1589                         if (nextp == que->num_desc)
1590                                 nextp = 0;
1591                         nbuf = &rxr->buffers[nextp];
1592                         prefetch(nbuf);
1593                 }
1594
1595                 /*
1596                 ** The header mbuf is ONLY used when header 
1597                 ** split is enabled, otherwise we get normal 
1598                 ** behavior, ie, both header and payload
1599                 ** are DMA'd into the payload buffer.
1600                 **
1601                 ** Rather than using the fmp/lmp global pointers
1602                 ** we now keep the head of a packet chain in the
1603                 ** buffer struct and pass this along from one
1604                 ** descriptor to the next, until we get EOP.
1605                 */
1606                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1607                         if (hlen > IXL_RX_HDR)
1608                                 hlen = IXL_RX_HDR;
1609                         mh->m_len = hlen;
1610                         mh->m_flags |= M_PKTHDR;
1611                         mh->m_next = NULL;
1612                         mh->m_pkthdr.len = mh->m_len;
1613                         /* Null buf pointer so it is refreshed */
1614                         rbuf->m_head = NULL;
1615                         /*
1616                         ** Check the payload length, this
1617                         ** could be zero if its a small
1618                         ** packet.
1619                         */
1620                         if (plen > 0) {
1621                                 mp->m_len = plen;
1622                                 mp->m_next = NULL;
1623                                 mp->m_flags &= ~M_PKTHDR;
1624                                 mh->m_next = mp;
1625                                 mh->m_pkthdr.len += mp->m_len;
1626                                 /* Null buf pointer so it is refreshed */
1627                                 rbuf->m_pack = NULL;
1628                                 rxr->split++;
1629                         }
1630                         /*
1631                         ** Now create the forward
1632                         ** chain so when complete 
1633                         ** we wont have to.
1634                         */
1635                         if (eop == 0) {
1636                                 /* stash the chain head */
1637                                 nbuf->fmp = mh;
1638                                 /* Make forward chain */
1639                                 if (plen)
1640                                         mp->m_next = nbuf->m_pack;
1641                                 else
1642                                         mh->m_next = nbuf->m_pack;
1643                         } else {
1644                                 /* Singlet, prepare to send */
1645                                 sendmp = mh;
1646                                 if (vtag) {
1647                                         sendmp->m_pkthdr.ether_vtag = vtag;
1648                                         sendmp->m_flags |= M_VLANTAG;
1649                                 }
1650                         }
1651                 } else {
1652                         /*
1653                         ** Either no header split, or a
1654                         ** secondary piece of a fragmented
1655                         ** split packet.
1656                         */
1657                         mp->m_len = plen;
1658                         /*
1659                         ** See if there is a stored head
1660                         ** that determines what we are
1661                         */
1662                         sendmp = rbuf->fmp;
1663                         rbuf->m_pack = rbuf->fmp = NULL;
1664
1665                         if (sendmp != NULL) /* secondary frag */
1666                                 sendmp->m_pkthdr.len += mp->m_len;
1667                         else {
1668                                 /* first desc of a non-ps chain */
1669                                 sendmp = mp;
1670                                 sendmp->m_flags |= M_PKTHDR;
1671                                 sendmp->m_pkthdr.len = mp->m_len;
1672                         }
1673                         /* Pass the head pointer on */
1674                         if (eop == 0) {
1675                                 nbuf->fmp = sendmp;
1676                                 sendmp = NULL;
1677                                 mp->m_next = nbuf->m_pack;
1678                         }
1679                 }
1680                 ++processed;
1681                 /* Sending this frame? */
1682                 if (eop) {
1683                         sendmp->m_pkthdr.rcvif = ifp;
1684                         /* gather stats */
1685                         rxr->rx_packets++;
1686                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1687                         /* capture data for dynamic ITR adjustment */
1688                         rxr->packets++;
1689                         rxr->bytes += sendmp->m_pkthdr.len;
1690                         /* Set VLAN tag (field only valid in eop desc) */
1691                         if (vtag) {
1692                                 sendmp->m_pkthdr.ether_vtag = vtag;
1693                                 sendmp->m_flags |= M_VLANTAG;
1694                         }
1695                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1696                                 ixl_rx_checksum(sendmp, status, error, ptype);
1697 #ifdef RSS
1698                         sendmp->m_pkthdr.flowid =
1699                             le32toh(cur->wb.qword0.hi_dword.rss);
1700                         M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1701 #else
1702                         sendmp->m_pkthdr.flowid = que->msix;
1703                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1704 #endif
1705                 }
1706 next_desc:
1707                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1708                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1709
1710                 /* Advance our pointers to the next descriptor. */
1711                 if (++i == que->num_desc)
1712                         i = 0;
1713
1714                 /* Now send to the stack or do LRO */
1715                 if (sendmp != NULL) {
1716                         rxr->next_check = i;
1717                         IXL_RX_UNLOCK(rxr);
1718                         ixl_rx_input(rxr, ifp, sendmp, ptype);
1719                         IXL_RX_LOCK(rxr);
1720                         i = rxr->next_check;
1721                 }
1722
1723                /* Every 8 descriptors we go to refresh mbufs */
1724                 if (processed == 8) {
1725                         ixl_refresh_mbufs(que, i);
1726                         processed = 0;
1727                 }
1728         }
1729
1730         /* Refresh any remaining buf structs */
1731         if (ixl_rx_unrefreshed(que))
1732                 ixl_refresh_mbufs(que, i);
1733
1734         rxr->next_check = i;
1735
1736         IXL_RX_UNLOCK(rxr);
1737
1738 #if defined(INET6) || defined(INET)
1739         /*
1740          * Flush any outstanding LRO work
1741          */
1742 #if __FreeBSD_version >= 1100105
1743         tcp_lro_flush_all(lro);
1744 #else
1745         struct lro_entry *queued;
1746         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1747                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1748                 tcp_lro_flush(lro, queued);
1749         }
1750 #endif
1751 #endif /* defined(INET6) || defined(INET) */
1752
1753         return (FALSE);
1754 }
1755
1756
1757 /*********************************************************************
1758  *
1759  *  Verify that the hardware indicated that the checksum is valid.
1760  *  Inform the stack about the status of checksum so that stack
1761  *  doesn't spend time verifying the checksum.
1762  *
1763  *********************************************************************/
1764 static void
1765 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1766 {
1767         struct i40e_rx_ptype_decoded decoded;
1768
1769         decoded = decode_rx_desc_ptype(ptype);
1770
1771         /* Errors? */
1772         if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1773             (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1774                 mp->m_pkthdr.csum_flags = 0;
1775                 return;
1776         }
1777
1778         /* IPv6 with extension headers likely have bad csum */
1779         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1780             decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1781                 if (status &
1782                     (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1783                         mp->m_pkthdr.csum_flags = 0;
1784                         return;
1785                 }
1786
1787  
1788         /* IP Checksum Good */
1789         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1790         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1791
1792         if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1793                 mp->m_pkthdr.csum_flags |= 
1794                     (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1795                 mp->m_pkthdr.csum_data |= htons(0xffff);
1796         }
1797         return;
1798 }
1799
1800 #if __FreeBSD_version >= 1100000
1801 uint64_t
1802 ixl_get_counter(if_t ifp, ift_counter cnt)
1803 {
1804         struct ixl_vsi *vsi;
1805
1806         vsi = if_getsoftc(ifp);
1807
1808         switch (cnt) {
1809         case IFCOUNTER_IPACKETS:
1810                 return (vsi->ipackets);
1811         case IFCOUNTER_IERRORS:
1812                 return (vsi->ierrors);
1813         case IFCOUNTER_OPACKETS:
1814                 return (vsi->opackets);
1815         case IFCOUNTER_OERRORS:
1816                 return (vsi->oerrors);
1817         case IFCOUNTER_COLLISIONS:
1818                 /* Collisions are by standard impossible in 40G/10G Ethernet */
1819                 return (0);
1820         case IFCOUNTER_IBYTES:
1821                 return (vsi->ibytes);
1822         case IFCOUNTER_OBYTES:
1823                 return (vsi->obytes);
1824         case IFCOUNTER_IMCASTS:
1825                 return (vsi->imcasts);
1826         case IFCOUNTER_OMCASTS:
1827                 return (vsi->omcasts);
1828         case IFCOUNTER_IQDROPS:
1829                 return (vsi->iqdrops);
1830         case IFCOUNTER_OQDROPS:
1831                 return (vsi->oqdrops);
1832         case IFCOUNTER_NOPROTO:
1833                 return (vsi->noproto);
1834         default:
1835                 return (if_get_counter_default(ifp, cnt));
1836         }
1837 }
1838 #endif
1839