]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixl/ixl_txrx.c
Update llvm to trunk r290819 and resolve conflicts.
[FreeBSD/FreeBSD.git] / sys / dev / ixl / ixl_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2013-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 **      IXL driver TX/RX Routines:
37 **          This was seperated to allow usage by
38 **          both the PF and VF drivers.
39 */
40
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_rss.h"
45 #endif
46
47 #include "ixl.h"
48
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52
53 /* Local Prototypes */
54 static void     ixl_rx_checksum(struct mbuf *, u32, u32, u8);
55 static void     ixl_refresh_mbufs(struct ixl_queue *, int);
56 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
57 static int      ixl_tx_setup_offload(struct ixl_queue *,
58                     struct mbuf *, u32 *, u32 *);
59 static bool     ixl_tso_setup(struct ixl_queue *, struct mbuf *);
60
61 static inline void ixl_rx_discard(struct rx_ring *, int);
62 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
63                     struct mbuf *, u8);
64
65 static inline bool ixl_tso_detect_sparse(struct mbuf *mp);
66 static int      ixl_tx_setup_offload(struct ixl_queue *que,
67     struct mbuf *mp, u32 *cmd, u32 *off);
68 static inline u32 ixl_get_tx_head(struct ixl_queue *que);
69
70 #ifdef DEV_NETMAP
71 #include <dev/netmap/if_ixl_netmap.h>
72 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1;
73 #endif /* DEV_NETMAP */
74
75 /*
76  * @key key is saved into this parameter
77  */
78 void
79 ixl_get_default_rss_key(u32 *key)
80 {
81         MPASS(key != NULL);
82
83         u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
84             0x183cfd8c, 0xce880440, 0x580cbc3c,
85             0x35897377, 0x328b25e1, 0x4fa98922,
86             0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
87             0x0, 0x0, 0x0};
88
89         bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
90 }
91
92 /*
93 ** Multiqueue Transmit driver
94 */
95 int
96 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
97 {
98         struct ixl_vsi          *vsi = ifp->if_softc;
99         struct ixl_queue        *que;
100         struct tx_ring          *txr;
101         int                     err, i;
102 #ifdef RSS
103         u32                     bucket_id;
104 #endif
105
106         /*
107         ** Which queue to use:
108         **
109         ** When doing RSS, map it to the same outbound
110         ** queue as the incoming flow would be mapped to.
111         ** If everything is setup correctly, it should be
112         ** the same bucket that the current CPU we're on is.
113         */
114         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
115 #ifdef  RSS
116                 if (rss_hash2bucket(m->m_pkthdr.flowid,
117                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
118                         i = bucket_id % vsi->num_queues;
119                 } else
120 #endif
121                         i = m->m_pkthdr.flowid % vsi->num_queues;
122         } else
123                 i = curcpu % vsi->num_queues;
124
125         que = &vsi->queues[i];
126         txr = &que->txr;
127
128         err = drbr_enqueue(ifp, txr->br, m);
129         if (err)
130                 return (err);
131         if (IXL_TX_TRYLOCK(txr)) {
132                 ixl_mq_start_locked(ifp, txr);
133                 IXL_TX_UNLOCK(txr);
134         } else
135                 taskqueue_enqueue(que->tq, &que->tx_task);
136
137         return (0);
138 }
139
140 int
141 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
142 {
143         struct ixl_queue        *que = txr->que;
144         struct ixl_vsi          *vsi = que->vsi;
145         struct mbuf             *next;
146         int                     err = 0;
147
148
149         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
150             vsi->link_active == 0)
151                 return (ENETDOWN);
152
153         /* Process the transmit queue */
154         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
155                 if ((err = ixl_xmit(que, &next)) != 0) {
156                         if (next == NULL)
157                                 drbr_advance(ifp, txr->br);
158                         else
159                                 drbr_putback(ifp, txr->br, next);
160                         break;
161                 }
162                 drbr_advance(ifp, txr->br);
163                 /* Send a copy of the frame to the BPF listener */
164                 ETHER_BPF_MTAP(ifp, next);
165                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
166                         break;
167         }
168
169         if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
170                 ixl_txeof(que);
171
172         return (err);
173 }
174
175 /*
176  * Called from a taskqueue to drain queued transmit packets.
177  */
178 void
179 ixl_deferred_mq_start(void *arg, int pending)
180 {
181         struct ixl_queue        *que = arg;
182         struct tx_ring          *txr = &que->txr;
183         struct ixl_vsi          *vsi = que->vsi;
184         struct ifnet            *ifp = vsi->ifp;
185         
186         IXL_TX_LOCK(txr);
187         if (!drbr_empty(ifp, txr->br))
188                 ixl_mq_start_locked(ifp, txr);
189         IXL_TX_UNLOCK(txr);
190 }
191
192 /*
193 ** Flush all queue ring buffers
194 */
195 void
196 ixl_qflush(struct ifnet *ifp)
197 {
198         struct ixl_vsi  *vsi = ifp->if_softc;
199
200         for (int i = 0; i < vsi->num_queues; i++) {
201                 struct ixl_queue *que = &vsi->queues[i];
202                 struct tx_ring  *txr = &que->txr;
203                 struct mbuf     *m;
204                 IXL_TX_LOCK(txr);
205                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
206                         m_freem(m);
207                 IXL_TX_UNLOCK(txr);
208         }
209         if_qflush(ifp);
210 }
211
212 /*
213 ** Find mbuf chains passed to the driver 
214 ** that are 'sparse', using more than 8
215 ** mbufs to deliver an mss-size chunk of data
216 */
217 static inline bool
218 ixl_tso_detect_sparse(struct mbuf *mp)
219 {
220         struct mbuf     *m;
221         int             num = 0, mss;
222         bool            ret = FALSE;
223
224         mss = mp->m_pkthdr.tso_segsz;
225         for (m = mp->m_next; m != NULL; m = m->m_next) {
226                 num++;
227                 mss -= m->m_len;
228                 if (mss < 1)
229                         break;
230                 if (m->m_next == NULL)
231                         break;
232         }
233         if (num > IXL_SPARSE_CHAIN)
234                 ret = TRUE;
235
236         return (ret);
237 }
238
239
240 /*********************************************************************
241  *
242  *  This routine maps the mbufs to tx descriptors, allowing the
243  *  TX engine to transmit the packets. 
244  *      - return 0 on success, positive on failure
245  *
246  **********************************************************************/
247 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
248
249 static int
250 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
251 {
252         struct ixl_vsi          *vsi = que->vsi;
253         struct i40e_hw          *hw = vsi->hw;
254         struct tx_ring          *txr = &que->txr;
255         struct ixl_tx_buf       *buf;
256         struct i40e_tx_desc     *txd = NULL;
257         struct mbuf             *m_head, *m;
258         int                     i, j, error, nsegs;
259         int                     first, last = 0;
260         u16                     vtag = 0;
261         u32                     cmd, off;
262         bus_dmamap_t            map;
263         bus_dma_tag_t           tag;
264         bus_dma_segment_t       segs[IXL_MAX_TSO_SEGS];
265
266         cmd = off = 0;
267         m_head = *m_headp;
268
269         /*
270          * Important to capture the first descriptor
271          * used because it will contain the index of
272          * the one we tell the hardware to report back
273          */
274         first = txr->next_avail;
275         buf = &txr->buffers[first];
276         map = buf->map;
277         tag = txr->tx_tag;
278
279         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
280                 /* Use larger mapping for TSO */
281                 tag = txr->tso_tag;
282                 if (ixl_tso_detect_sparse(m_head)) {
283                         m = m_defrag(m_head, M_NOWAIT);
284                         if (m == NULL) {
285                                 m_freem(*m_headp);
286                                 *m_headp = NULL;
287                                 return (ENOBUFS);
288                         }
289                         *m_headp = m;
290                 }
291         }
292
293         /*
294          * Map the packet for DMA.
295          */
296         error = bus_dmamap_load_mbuf_sg(tag, map,
297             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
298
299         if (error == EFBIG) {
300                 struct mbuf *m;
301
302                 m = m_defrag(*m_headp, M_NOWAIT);
303                 if (m == NULL) {
304                         que->mbuf_defrag_failed++;
305                         m_freem(*m_headp);
306                         *m_headp = NULL;
307                         return (ENOBUFS);
308                 }
309                 *m_headp = m;
310
311                 /* Try it again */
312                 error = bus_dmamap_load_mbuf_sg(tag, map,
313                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
314
315                 if (error == ENOMEM) {
316                         que->tx_dmamap_failed++;
317                         return (error);
318                 } else if (error != 0) {
319                         que->tx_dmamap_failed++;
320                         m_freem(*m_headp);
321                         *m_headp = NULL;
322                         return (error);
323                 }
324         } else if (error == ENOMEM) {
325                 que->tx_dmamap_failed++;
326                 return (error);
327         } else if (error != 0) {
328                 que->tx_dmamap_failed++;
329                 m_freem(*m_headp);
330                 *m_headp = NULL;
331                 return (error);
332         }
333
334         /* Make certain there are enough descriptors */
335         if (nsegs > txr->avail - 2) {
336                 txr->no_desc++;
337                 error = ENOBUFS;
338                 goto xmit_fail;
339         }
340         m_head = *m_headp;
341
342         /* Set up the TSO/CSUM offload */
343         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
344                 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
345                 if (error)
346                         goto xmit_fail;
347         }
348
349         cmd |= I40E_TX_DESC_CMD_ICRC;
350         /* Grab the VLAN tag */
351         if (m_head->m_flags & M_VLANTAG) {
352                 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
353                 vtag = htole16(m_head->m_pkthdr.ether_vtag);
354         }
355
356         i = txr->next_avail;
357         for (j = 0; j < nsegs; j++) {
358                 bus_size_t seglen;
359
360                 buf = &txr->buffers[i];
361                 buf->tag = tag; /* Keep track of the type tag */
362                 txd = &txr->base[i];
363                 seglen = segs[j].ds_len;
364
365                 txd->buffer_addr = htole64(segs[j].ds_addr);
366                 txd->cmd_type_offset_bsz =
367                     htole64(I40E_TX_DESC_DTYPE_DATA
368                     | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
369                     | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
370                     | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
371                     | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
372
373                 last = i; /* descriptor that will get completion IRQ */
374
375                 if (++i == que->num_desc)
376                         i = 0;
377
378                 buf->m_head = NULL;
379                 buf->eop_index = -1;
380         }
381         /* Set the last descriptor for report */
382         txd->cmd_type_offset_bsz |=
383             htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
384         txr->avail -= nsegs;
385         txr->next_avail = i;
386
387         buf->m_head = m_head;
388         /* Swap the dma map between the first and last descriptor */
389         txr->buffers[first].map = buf->map;
390         buf->map = map;
391         bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
392
393         /* Set the index of the descriptor that will be marked done */
394         buf = &txr->buffers[first];
395         buf->eop_index = last;
396
397         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
398             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
399         /*
400          * Advance the Transmit Descriptor Tail (Tdt), this tells the
401          * hardware that this frame is available to transmit.
402          */
403         ++txr->total_packets;
404         wr32(hw, txr->tail, i);
405
406         /* Mark outstanding work */
407         if (que->busy == 0)
408                 que->busy = 1;
409         return (0);
410
411 xmit_fail:
412         bus_dmamap_unload(tag, buf->map);
413         return (error);
414 }
415
416
417 /*********************************************************************
418  *
419  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
420  *  the information needed to transmit a packet on the wire. This is
421  *  called only once at attach, setup is done every reset.
422  *
423  **********************************************************************/
424 int
425 ixl_allocate_tx_data(struct ixl_queue *que)
426 {
427         struct tx_ring          *txr = &que->txr;
428         struct ixl_vsi          *vsi = que->vsi;
429         device_t                dev = vsi->dev;
430         struct ixl_tx_buf       *buf;
431         int                     error = 0;
432
433         /*
434          * Setup DMA descriptor areas.
435          */
436         if ((error = bus_dma_tag_create(NULL,           /* parent */
437                                1, 0,                    /* alignment, bounds */
438                                BUS_SPACE_MAXADDR,       /* lowaddr */
439                                BUS_SPACE_MAXADDR,       /* highaddr */
440                                NULL, NULL,              /* filter, filterarg */
441                                IXL_TSO_SIZE,            /* maxsize */
442                                IXL_MAX_TX_SEGS,         /* nsegments */
443                                PAGE_SIZE,               /* maxsegsize */
444                                0,                       /* flags */
445                                NULL,                    /* lockfunc */
446                                NULL,                    /* lockfuncarg */
447                                &txr->tx_tag))) {
448                 device_printf(dev,"Unable to allocate TX DMA tag\n");
449                 goto fail;
450         }
451
452         /* Make a special tag for TSO */
453         if ((error = bus_dma_tag_create(NULL,           /* parent */
454                                1, 0,                    /* alignment, bounds */
455                                BUS_SPACE_MAXADDR,       /* lowaddr */
456                                BUS_SPACE_MAXADDR,       /* highaddr */
457                                NULL, NULL,              /* filter, filterarg */
458                                IXL_TSO_SIZE,            /* maxsize */
459                                IXL_MAX_TSO_SEGS,        /* nsegments */
460                                PAGE_SIZE,               /* maxsegsize */
461                                0,                       /* flags */
462                                NULL,                    /* lockfunc */
463                                NULL,                    /* lockfuncarg */
464                                &txr->tso_tag))) {
465                 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
466                 goto fail;
467         }
468
469         if (!(txr->buffers =
470             (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
471             que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
472                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
473                 error = ENOMEM;
474                 goto fail;
475         }
476
477         /* Create the descriptor buffer default dma maps */
478         buf = txr->buffers;
479         for (int i = 0; i < que->num_desc; i++, buf++) {
480                 buf->tag = txr->tx_tag;
481                 error = bus_dmamap_create(buf->tag, 0, &buf->map);
482                 if (error != 0) {
483                         device_printf(dev, "Unable to create TX DMA map\n");
484                         goto fail;
485                 }
486         }
487 fail:
488         return (error);
489 }
490
491
492 /*********************************************************************
493  *
494  *  (Re)Initialize a queue transmit ring.
495  *      - called by init, it clears the descriptor ring,
496  *        and frees any stale mbufs 
497  *
498  **********************************************************************/
499 void
500 ixl_init_tx_ring(struct ixl_queue *que)
501 {
502 #ifdef DEV_NETMAP
503         struct netmap_adapter *na = NA(que->vsi->ifp);
504         struct netmap_slot *slot;
505 #endif /* DEV_NETMAP */
506         struct tx_ring          *txr = &que->txr;
507         struct ixl_tx_buf       *buf;
508
509         /* Clear the old ring contents */
510         IXL_TX_LOCK(txr);
511
512 #ifdef DEV_NETMAP
513         /*
514          * (under lock): if in netmap mode, do some consistency
515          * checks and set slot to entry 0 of the netmap ring.
516          */
517         slot = netmap_reset(na, NR_TX, que->me, 0);
518 #endif /* DEV_NETMAP */
519
520         bzero((void *)txr->base,
521               (sizeof(struct i40e_tx_desc)) * que->num_desc);
522
523         /* Reset indices */
524         txr->next_avail = 0;
525         txr->next_to_clean = 0;
526
527 #ifdef IXL_FDIR
528         /* Initialize flow director */
529         txr->atr_rate = ixl_atr_rate;
530         txr->atr_count = 0;
531 #endif
532
533         /* Free any existing tx mbufs. */
534         buf = txr->buffers;
535         for (int i = 0; i < que->num_desc; i++, buf++) {
536                 if (buf->m_head != NULL) {
537                         bus_dmamap_sync(buf->tag, buf->map,
538                             BUS_DMASYNC_POSTWRITE);
539                         bus_dmamap_unload(buf->tag, buf->map);
540                         m_freem(buf->m_head);
541                         buf->m_head = NULL;
542                 }
543 #ifdef DEV_NETMAP
544                 /*
545                  * In netmap mode, set the map for the packet buffer.
546                  * NOTE: Some drivers (not this one) also need to set
547                  * the physical buffer address in the NIC ring.
548                  * netmap_idx_n2k() maps a nic index, i, into the corresponding
549                  * netmap slot index, si
550                  */
551                 if (slot) {
552                         int si = netmap_idx_n2k(&na->tx_rings[que->me], i);
553                         netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
554                 }
555 #endif /* DEV_NETMAP */
556                 /* Clear the EOP index */
557                 buf->eop_index = -1;
558         }
559
560         /* Set number of descriptors available */
561         txr->avail = que->num_desc;
562
563         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
564             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
565         IXL_TX_UNLOCK(txr);
566 }
567
568
569 /*********************************************************************
570  *
571  *  Free transmit ring related data structures.
572  *
573  **********************************************************************/
574 void
575 ixl_free_que_tx(struct ixl_queue *que)
576 {
577         struct tx_ring *txr = &que->txr;
578         struct ixl_tx_buf *buf;
579
580         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
581
582         for (int i = 0; i < que->num_desc; i++) {
583                 buf = &txr->buffers[i];
584                 if (buf->m_head != NULL) {
585                         bus_dmamap_sync(buf->tag, buf->map,
586                             BUS_DMASYNC_POSTWRITE);
587                         bus_dmamap_unload(buf->tag,
588                             buf->map);
589                         m_freem(buf->m_head);
590                         buf->m_head = NULL;
591                         if (buf->map != NULL) {
592                                 bus_dmamap_destroy(buf->tag,
593                                     buf->map);
594                                 buf->map = NULL;
595                         }
596                 } else if (buf->map != NULL) {
597                         bus_dmamap_unload(buf->tag,
598                             buf->map);
599                         bus_dmamap_destroy(buf->tag,
600                             buf->map);
601                         buf->map = NULL;
602                 }
603         }
604         if (txr->br != NULL)
605                 buf_ring_free(txr->br, M_DEVBUF);
606         if (txr->buffers != NULL) {
607                 free(txr->buffers, M_DEVBUF);
608                 txr->buffers = NULL;
609         }
610         if (txr->tx_tag != NULL) {
611                 bus_dma_tag_destroy(txr->tx_tag);
612                 txr->tx_tag = NULL;
613         }
614         if (txr->tso_tag != NULL) {
615                 bus_dma_tag_destroy(txr->tso_tag);
616                 txr->tso_tag = NULL;
617         }
618
619         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
620         return;
621 }
622
623 /*********************************************************************
624  *
625  *  Setup descriptor for hw offloads 
626  *
627  **********************************************************************/
628
629 static int
630 ixl_tx_setup_offload(struct ixl_queue *que,
631     struct mbuf *mp, u32 *cmd, u32 *off)
632 {
633         struct ether_vlan_header        *eh;
634 #ifdef INET
635         struct ip                       *ip = NULL;
636 #endif
637         struct tcphdr                   *th = NULL;
638 #ifdef INET6
639         struct ip6_hdr                  *ip6;
640 #endif
641         int                             elen, ip_hlen = 0, tcp_hlen;
642         u16                             etype;
643         u8                              ipproto = 0;
644         bool                            tso = FALSE;
645
646         /* Set up the TSO context descriptor if required */
647         if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
648                 tso = ixl_tso_setup(que, mp);
649                 if (tso)
650                         ++que->tso;
651                 else
652                         return (ENXIO);
653         }
654
655         /*
656          * Determine where frame payload starts.
657          * Jump over vlan headers if already present,
658          * helpful for QinQ too.
659          */
660         eh = mtod(mp, struct ether_vlan_header *);
661         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
662                 etype = ntohs(eh->evl_proto);
663                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
664         } else {
665                 etype = ntohs(eh->evl_encap_proto);
666                 elen = ETHER_HDR_LEN;
667         }
668
669         switch (etype) {
670 #ifdef INET
671                 case ETHERTYPE_IP:
672                         ip = (struct ip *)(mp->m_data + elen);
673                         ip_hlen = ip->ip_hl << 2;
674                         ipproto = ip->ip_p;
675                         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
676                         /* The IP checksum must be recalculated with TSO */
677                         if (tso)
678                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
679                         else
680                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
681                         break;
682 #endif
683 #ifdef INET6
684                 case ETHERTYPE_IPV6:
685                         ip6 = (struct ip6_hdr *)(mp->m_data + elen);
686                         ip_hlen = sizeof(struct ip6_hdr);
687                         ipproto = ip6->ip6_nxt;
688                         th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
689                         *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
690                         break;
691 #endif
692                 default:
693                         break;
694         }
695
696         *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
697         *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
698
699         switch (ipproto) {
700                 case IPPROTO_TCP:
701                         tcp_hlen = th->th_off << 2;
702                         if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
703                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
704                                 *off |= (tcp_hlen >> 2) <<
705                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
706                         }
707 #ifdef IXL_FDIR
708                         ixl_atr(que, th, etype);
709 #endif
710                         break;
711                 case IPPROTO_UDP:
712                         if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
713                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
714                                 *off |= (sizeof(struct udphdr) >> 2) <<
715                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
716                         }
717                         break;
718
719                 case IPPROTO_SCTP:
720                         if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
721                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
722                                 *off |= (sizeof(struct sctphdr) >> 2) <<
723                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
724                         }
725                         /* Fall Thru */
726                 default:
727                         break;
728         }
729
730         return (0);
731 }
732
733
734 /**********************************************************************
735  *
736  *  Setup context for hardware segmentation offload (TSO)
737  *
738  **********************************************************************/
739 static bool
740 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
741 {
742         struct tx_ring                  *txr = &que->txr;
743         struct i40e_tx_context_desc     *TXD;
744         struct ixl_tx_buf               *buf;
745         u32                             cmd, mss, type, tsolen;
746         u16                             etype;
747         int                             idx, elen, ip_hlen, tcp_hlen;
748         struct ether_vlan_header        *eh;
749 #ifdef INET
750         struct ip                       *ip;
751 #endif
752 #ifdef INET6
753         struct ip6_hdr                  *ip6;
754 #endif
755 #if defined(INET6) || defined(INET)
756         struct tcphdr                   *th;
757 #endif
758         u64                             type_cmd_tso_mss;
759
760         /*
761          * Determine where frame payload starts.
762          * Jump over vlan headers if already present
763          */
764         eh = mtod(mp, struct ether_vlan_header *);
765         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
766                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
767                 etype = eh->evl_proto;
768         } else {
769                 elen = ETHER_HDR_LEN;
770                 etype = eh->evl_encap_proto;
771         }
772
773         switch (ntohs(etype)) {
774 #ifdef INET6
775         case ETHERTYPE_IPV6:
776                 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
777                 if (ip6->ip6_nxt != IPPROTO_TCP)
778                         return (ENXIO);
779                 ip_hlen = sizeof(struct ip6_hdr);
780                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
781                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
782                 tcp_hlen = th->th_off << 2;
783                 /*
784                  * The corresponding flag is set by the stack in the IPv4
785                  * TSO case, but not in IPv6 (at least in FreeBSD 10.2).
786                  * So, set it here because the rest of the flow requires it.
787                  */
788                 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
789                 break;
790 #endif
791 #ifdef INET
792         case ETHERTYPE_IP:
793                 ip = (struct ip *)(mp->m_data + elen);
794                 if (ip->ip_p != IPPROTO_TCP)
795                         return (ENXIO);
796                 ip->ip_sum = 0;
797                 ip_hlen = ip->ip_hl << 2;
798                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
799                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
800                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
801                 tcp_hlen = th->th_off << 2;
802                 break;
803 #endif
804         default:
805                 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
806                     __func__, ntohs(etype));
807                 return FALSE;
808         }
809
810         /* Ensure we have at least the IP+TCP header in the first mbuf. */
811         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
812                 return FALSE;
813
814         idx = txr->next_avail;
815         buf = &txr->buffers[idx];
816         TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
817         tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
818
819         type = I40E_TX_DESC_DTYPE_CONTEXT;
820         cmd = I40E_TX_CTX_DESC_TSO;
821         /* ERJ: this must not be less than 64 */
822         mss = mp->m_pkthdr.tso_segsz;
823
824         type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
825             ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
826             ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
827             ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
828         TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
829
830         TXD->tunneling_params = htole32(0);
831         buf->m_head = NULL;
832         buf->eop_index = -1;
833
834         if (++idx == que->num_desc)
835                 idx = 0;
836
837         txr->avail--;
838         txr->next_avail = idx;
839
840         return TRUE;
841 }
842
843 /*             
844 ** ixl_get_tx_head - Retrieve the value from the 
845 **    location the HW records its HEAD index
846 */
847 static inline u32
848 ixl_get_tx_head(struct ixl_queue *que)
849 {
850         struct tx_ring  *txr = &que->txr;
851         void *head = &txr->base[que->num_desc];
852         return LE32_TO_CPU(*(volatile __le32 *)head);
853 }
854
855 /**********************************************************************
856  *
857  *  Examine each tx_buffer in the used queue. If the hardware is done
858  *  processing the packet then free associated resources. The
859  *  tx_buffer is put back on the free queue.
860  *
861  **********************************************************************/
862 bool
863 ixl_txeof(struct ixl_queue *que)
864 {
865         struct tx_ring          *txr = &que->txr;
866         u32                     first, last, head, done, processed;
867         struct ixl_tx_buf       *buf;
868         struct i40e_tx_desc     *tx_desc, *eop_desc;
869
870
871         mtx_assert(&txr->mtx, MA_OWNED);
872
873 #ifdef DEV_NETMAP
874         // XXX todo: implement moderation
875         if (netmap_tx_irq(que->vsi->ifp, que->me))
876                 return FALSE;
877 #endif /* DEF_NETMAP */
878
879         /* These are not the descriptors you seek, move along :) */
880         if (txr->avail == que->num_desc) {
881                 que->busy = 0;
882                 return FALSE;
883         }
884
885         processed = 0;
886         first = txr->next_to_clean;
887         buf = &txr->buffers[first];
888         tx_desc = (struct i40e_tx_desc *)&txr->base[first];
889         last = buf->eop_index;
890         if (last == -1)
891                 return FALSE;
892         eop_desc = (struct i40e_tx_desc *)&txr->base[last];
893
894         /* Get the Head WB value */
895         head = ixl_get_tx_head(que);
896
897         /*
898         ** Get the index of the first descriptor
899         ** BEYOND the EOP and call that 'done'.
900         ** I do this so the comparison in the
901         ** inner while loop below can be simple
902         */
903         if (++last == que->num_desc) last = 0;
904         done = last;
905
906         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
907             BUS_DMASYNC_POSTREAD);
908         /*
909         ** The HEAD index of the ring is written in a 
910         ** defined location, this rather than a done bit
911         ** is what is used to keep track of what must be
912         ** 'cleaned'.
913         */
914         while (first != head) {
915                 /* We clean the range of the packet */
916                 while (first != done) {
917                         ++txr->avail;
918                         ++processed;
919
920                         if (buf->m_head) {
921                                 txr->bytes += /* for ITR adjustment */
922                                     buf->m_head->m_pkthdr.len;
923                                 txr->tx_bytes += /* for TX stats */
924                                     buf->m_head->m_pkthdr.len;
925                                 bus_dmamap_sync(buf->tag,
926                                     buf->map,
927                                     BUS_DMASYNC_POSTWRITE);
928                                 bus_dmamap_unload(buf->tag,
929                                     buf->map);
930                                 m_freem(buf->m_head);
931                                 buf->m_head = NULL;
932                                 buf->map = NULL;
933                         }
934                         buf->eop_index = -1;
935
936                         if (++first == que->num_desc)
937                                 first = 0;
938
939                         buf = &txr->buffers[first];
940                         tx_desc = &txr->base[first];
941                 }
942                 ++txr->packets;
943                 /* See if there is more work now */
944                 last = buf->eop_index;
945                 if (last != -1) {
946                         eop_desc = &txr->base[last];
947                         /* Get next done point */
948                         if (++last == que->num_desc) last = 0;
949                         done = last;
950                 } else
951                         break;
952         }
953         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
954             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
955
956         txr->next_to_clean = first;
957
958
959         /*
960         ** Hang detection, we know there's
961         ** work outstanding or the first return
962         ** would have been taken, so indicate an
963         ** unsuccessful pass, in local_timer if
964         ** the value is too great the queue will
965         ** be considered hung. If anything has been
966         ** cleaned then reset the state.
967         */
968         if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
969                 ++que->busy;
970
971         if (processed)
972                 que->busy = 1; /* Note this turns off HUNG */
973
974         /*
975          * If there are no pending descriptors, clear the timeout.
976          */
977         if (txr->avail == que->num_desc) {
978                 que->busy = 0;
979                 return FALSE;
980         }
981
982         return TRUE;
983 }
984
985 /*********************************************************************
986  *
987  *  Refresh mbuf buffers for RX descriptor rings
988  *   - now keeps its own state so discards due to resource
989  *     exhaustion are unnecessary, if an mbuf cannot be obtained
990  *     it just returns, keeping its placeholder, thus it can simply
991  *     be recalled to try again.
992  *
993  **********************************************************************/
994 static void
995 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
996 {
997         struct ixl_vsi          *vsi = que->vsi;
998         struct rx_ring          *rxr = &que->rxr;
999         bus_dma_segment_t       hseg[1];
1000         bus_dma_segment_t       pseg[1];
1001         struct ixl_rx_buf       *buf;
1002         struct mbuf             *mh, *mp;
1003         int                     i, j, nsegs, error;
1004         bool                    refreshed = FALSE;
1005
1006         i = j = rxr->next_refresh;
1007         /* Control the loop with one beyond */
1008         if (++j == que->num_desc)
1009                 j = 0;
1010
1011         while (j != limit) {
1012                 buf = &rxr->buffers[i];
1013                 if (rxr->hdr_split == FALSE)
1014                         goto no_split;
1015
1016                 if (buf->m_head == NULL) {
1017                         mh = m_gethdr(M_NOWAIT, MT_DATA);
1018                         if (mh == NULL)
1019                                 goto update;
1020                 } else
1021                         mh = buf->m_head;
1022
1023                 mh->m_pkthdr.len = mh->m_len = MHLEN;
1024                 mh->m_len = MHLEN;
1025                 mh->m_flags |= M_PKTHDR;
1026                 /* Get the memory mapping */
1027                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1028                     buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1029                 if (error != 0) {
1030                         printf("Refresh mbufs: hdr dmamap load"
1031                             " failure - %d\n", error);
1032                         m_free(mh);
1033                         buf->m_head = NULL;
1034                         goto update;
1035                 }
1036                 buf->m_head = mh;
1037                 bus_dmamap_sync(rxr->htag, buf->hmap,
1038                     BUS_DMASYNC_PREREAD);
1039                 rxr->base[i].read.hdr_addr =
1040                    htole64(hseg[0].ds_addr);
1041
1042 no_split:
1043                 if (buf->m_pack == NULL) {
1044                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1045                             M_PKTHDR, rxr->mbuf_sz);
1046                         if (mp == NULL)
1047                                 goto update;
1048                 } else
1049                         mp = buf->m_pack;
1050
1051                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1052                 /* Get the memory mapping */
1053                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1054                     buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1055                 if (error != 0) {
1056                         printf("Refresh mbufs: payload dmamap load"
1057                             " failure - %d\n", error);
1058                         m_free(mp);
1059                         buf->m_pack = NULL;
1060                         goto update;
1061                 }
1062                 buf->m_pack = mp;
1063                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1064                     BUS_DMASYNC_PREREAD);
1065                 rxr->base[i].read.pkt_addr =
1066                    htole64(pseg[0].ds_addr);
1067                 /* Used only when doing header split */
1068                 rxr->base[i].read.hdr_addr = 0;
1069
1070                 refreshed = TRUE;
1071                 /* Next is precalculated */
1072                 i = j;
1073                 rxr->next_refresh = i;
1074                 if (++j == que->num_desc)
1075                         j = 0;
1076         }
1077 update:
1078         if (refreshed) /* Update hardware tail index */
1079                 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1080         return;
1081 }
1082
1083
1084 /*********************************************************************
1085  *
1086  *  Allocate memory for rx_buffer structures. Since we use one
1087  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1088  *  that we'll need is equal to the number of receive descriptors
1089  *  that we've defined.
1090  *
1091  **********************************************************************/
1092 int
1093 ixl_allocate_rx_data(struct ixl_queue *que)
1094 {
1095         struct rx_ring          *rxr = &que->rxr;
1096         struct ixl_vsi          *vsi = que->vsi;
1097         device_t                dev = vsi->dev;
1098         struct ixl_rx_buf       *buf;
1099         int                     i, bsize, error;
1100
1101         bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1102         if (!(rxr->buffers =
1103             (struct ixl_rx_buf *) malloc(bsize,
1104             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1105                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1106                 error = ENOMEM;
1107                 return (error);
1108         }
1109
1110         if ((error = bus_dma_tag_create(NULL,   /* parent */
1111                                    1, 0,        /* alignment, bounds */
1112                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1113                                    BUS_SPACE_MAXADDR,   /* highaddr */
1114                                    NULL, NULL,          /* filter, filterarg */
1115                                    MSIZE,               /* maxsize */
1116                                    1,                   /* nsegments */
1117                                    MSIZE,               /* maxsegsize */
1118                                    0,                   /* flags */
1119                                    NULL,                /* lockfunc */
1120                                    NULL,                /* lockfuncarg */
1121                                    &rxr->htag))) {
1122                 device_printf(dev, "Unable to create RX DMA htag\n");
1123                 return (error);
1124         }
1125
1126         if ((error = bus_dma_tag_create(NULL,   /* parent */
1127                                    1, 0,        /* alignment, bounds */
1128                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1129                                    BUS_SPACE_MAXADDR,   /* highaddr */
1130                                    NULL, NULL,          /* filter, filterarg */
1131                                    MJUM16BYTES,         /* maxsize */
1132                                    1,                   /* nsegments */
1133                                    MJUM16BYTES,         /* maxsegsize */
1134                                    0,                   /* flags */
1135                                    NULL,                /* lockfunc */
1136                                    NULL,                /* lockfuncarg */
1137                                    &rxr->ptag))) {
1138                 device_printf(dev, "Unable to create RX DMA ptag\n");
1139                 return (error);
1140         }
1141
1142         for (i = 0; i < que->num_desc; i++) {
1143                 buf = &rxr->buffers[i];
1144                 error = bus_dmamap_create(rxr->htag,
1145                     BUS_DMA_NOWAIT, &buf->hmap);
1146                 if (error) {
1147                         device_printf(dev, "Unable to create RX head map\n");
1148                         break;
1149                 }
1150                 error = bus_dmamap_create(rxr->ptag,
1151                     BUS_DMA_NOWAIT, &buf->pmap);
1152                 if (error) {
1153                         device_printf(dev, "Unable to create RX pkt map\n");
1154                         break;
1155                 }
1156         }
1157
1158         return (error);
1159 }
1160
1161
1162 /*********************************************************************
1163  *
1164  *  (Re)Initialize the queue receive ring and its buffers.
1165  *
1166  **********************************************************************/
1167 int
1168 ixl_init_rx_ring(struct ixl_queue *que)
1169 {
1170         struct  rx_ring         *rxr = &que->rxr;
1171         struct ixl_vsi          *vsi = que->vsi;
1172 #if defined(INET6) || defined(INET)
1173         struct ifnet            *ifp = vsi->ifp;
1174         struct lro_ctrl         *lro = &rxr->lro;
1175 #endif
1176         struct ixl_rx_buf       *buf;
1177         bus_dma_segment_t       pseg[1], hseg[1];
1178         int                     rsize, nsegs, error = 0;
1179 #ifdef DEV_NETMAP
1180         struct netmap_adapter *na = NA(que->vsi->ifp);
1181         struct netmap_slot *slot;
1182 #endif /* DEV_NETMAP */
1183
1184         IXL_RX_LOCK(rxr);
1185 #ifdef DEV_NETMAP
1186         /* same as in ixl_init_tx_ring() */
1187         slot = netmap_reset(na, NR_RX, que->me, 0);
1188 #endif /* DEV_NETMAP */
1189         /* Clear the ring contents */
1190         rsize = roundup2(que->num_desc *
1191             sizeof(union i40e_rx_desc), DBA_ALIGN);
1192         bzero((void *)rxr->base, rsize);
1193         /* Cleanup any existing buffers */
1194         for (int i = 0; i < que->num_desc; i++) {
1195                 buf = &rxr->buffers[i];
1196                 if (buf->m_head != NULL) {
1197                         bus_dmamap_sync(rxr->htag, buf->hmap,
1198                             BUS_DMASYNC_POSTREAD);
1199                         bus_dmamap_unload(rxr->htag, buf->hmap);
1200                         buf->m_head->m_flags |= M_PKTHDR;
1201                         m_freem(buf->m_head);
1202                 }
1203                 if (buf->m_pack != NULL) {
1204                         bus_dmamap_sync(rxr->ptag, buf->pmap,
1205                             BUS_DMASYNC_POSTREAD);
1206                         bus_dmamap_unload(rxr->ptag, buf->pmap);
1207                         buf->m_pack->m_flags |= M_PKTHDR;
1208                         m_freem(buf->m_pack);
1209                 }
1210                 buf->m_head = NULL;
1211                 buf->m_pack = NULL;
1212         }
1213
1214         /* header split is off */
1215         rxr->hdr_split = FALSE;
1216
1217         /* Now replenish the mbufs */
1218         for (int j = 0; j != que->num_desc; ++j) {
1219                 struct mbuf     *mh, *mp;
1220
1221                 buf = &rxr->buffers[j];
1222 #ifdef DEV_NETMAP
1223                 /*
1224                  * In netmap mode, fill the map and set the buffer
1225                  * address in the NIC ring, considering the offset
1226                  * between the netmap and NIC rings (see comment in
1227                  * ixgbe_setup_transmit_ring() ). No need to allocate
1228                  * an mbuf, so end the block with a continue;
1229                  */
1230                 if (slot) {
1231                         int sj = netmap_idx_n2k(&na->rx_rings[que->me], j);
1232                         uint64_t paddr;
1233                         void *addr;
1234
1235                         addr = PNMB(na, slot + sj, &paddr);
1236                         netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1237                         /* Update descriptor and the cached value */
1238                         rxr->base[j].read.pkt_addr = htole64(paddr);
1239                         rxr->base[j].read.hdr_addr = 0;
1240                         continue;
1241                 }
1242 #endif /* DEV_NETMAP */
1243                 /*
1244                 ** Don't allocate mbufs if not
1245                 ** doing header split, its wasteful
1246                 */ 
1247                 if (rxr->hdr_split == FALSE)
1248                         goto skip_head;
1249
1250                 /* First the header */
1251                 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1252                 if (buf->m_head == NULL) {
1253                         error = ENOBUFS;
1254                         goto fail;
1255                 }
1256                 m_adj(buf->m_head, ETHER_ALIGN);
1257                 mh = buf->m_head;
1258                 mh->m_len = mh->m_pkthdr.len = MHLEN;
1259                 mh->m_flags |= M_PKTHDR;
1260                 /* Get the memory mapping */
1261                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1262                     buf->hmap, buf->m_head, hseg,
1263                     &nsegs, BUS_DMA_NOWAIT);
1264                 if (error != 0) /* Nothing elegant to do here */
1265                         goto fail;
1266                 bus_dmamap_sync(rxr->htag,
1267                     buf->hmap, BUS_DMASYNC_PREREAD);
1268                 /* Update descriptor */
1269                 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1270
1271 skip_head:
1272                 /* Now the payload cluster */
1273                 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1274                     M_PKTHDR, rxr->mbuf_sz);
1275                 if (buf->m_pack == NULL) {
1276                         error = ENOBUFS;
1277                         goto fail;
1278                 }
1279                 mp = buf->m_pack;
1280                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1281                 /* Get the memory mapping */
1282                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1283                     buf->pmap, mp, pseg,
1284                     &nsegs, BUS_DMA_NOWAIT);
1285                 if (error != 0)
1286                         goto fail;
1287                 bus_dmamap_sync(rxr->ptag,
1288                     buf->pmap, BUS_DMASYNC_PREREAD);
1289                 /* Update descriptor */
1290                 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1291                 rxr->base[j].read.hdr_addr = 0;
1292         }
1293
1294
1295         /* Setup our descriptor indices */
1296         rxr->next_check = 0;
1297         rxr->next_refresh = 0;
1298         rxr->lro_enabled = FALSE;
1299         rxr->split = 0;
1300         rxr->bytes = 0;
1301         rxr->discard = FALSE;
1302
1303         wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1304         ixl_flush(vsi->hw);
1305
1306 #if defined(INET6) || defined(INET)
1307         /*
1308         ** Now set up the LRO interface:
1309         */
1310         if (ifp->if_capenable & IFCAP_LRO) {
1311                 int err = tcp_lro_init(lro);
1312                 if (err) {
1313                         if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1314                         goto fail;
1315                 }
1316                 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1317                 rxr->lro_enabled = TRUE;
1318                 lro->ifp = vsi->ifp;
1319         }
1320 #endif
1321
1322         bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1323             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1324
1325 fail:
1326         IXL_RX_UNLOCK(rxr);
1327         return (error);
1328 }
1329
1330
1331 /*********************************************************************
1332  *
1333  *  Free station receive ring data structures
1334  *
1335  **********************************************************************/
1336 void
1337 ixl_free_que_rx(struct ixl_queue *que)
1338 {
1339         struct rx_ring          *rxr = &que->rxr;
1340         struct ixl_rx_buf       *buf;
1341
1342         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1343
1344         /* Cleanup any existing buffers */
1345         if (rxr->buffers != NULL) {
1346                 for (int i = 0; i < que->num_desc; i++) {
1347                         buf = &rxr->buffers[i];
1348                         if (buf->m_head != NULL) {
1349                                 bus_dmamap_sync(rxr->htag, buf->hmap,
1350                                     BUS_DMASYNC_POSTREAD);
1351                                 bus_dmamap_unload(rxr->htag, buf->hmap);
1352                                 buf->m_head->m_flags |= M_PKTHDR;
1353                                 m_freem(buf->m_head);
1354                         }
1355                         if (buf->m_pack != NULL) {
1356                                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1357                                     BUS_DMASYNC_POSTREAD);
1358                                 bus_dmamap_unload(rxr->ptag, buf->pmap);
1359                                 buf->m_pack->m_flags |= M_PKTHDR;
1360                                 m_freem(buf->m_pack);
1361                         }
1362                         buf->m_head = NULL;
1363                         buf->m_pack = NULL;
1364                         if (buf->hmap != NULL) {
1365                                 bus_dmamap_destroy(rxr->htag, buf->hmap);
1366                                 buf->hmap = NULL;
1367                         }
1368                         if (buf->pmap != NULL) {
1369                                 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1370                                 buf->pmap = NULL;
1371                         }
1372                 }
1373                 if (rxr->buffers != NULL) {
1374                         free(rxr->buffers, M_DEVBUF);
1375                         rxr->buffers = NULL;
1376                 }
1377         }
1378
1379         if (rxr->htag != NULL) {
1380                 bus_dma_tag_destroy(rxr->htag);
1381                 rxr->htag = NULL;
1382         }
1383         if (rxr->ptag != NULL) {
1384                 bus_dma_tag_destroy(rxr->ptag);
1385                 rxr->ptag = NULL;
1386         }
1387
1388         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1389         return;
1390 }
1391
1392 static inline void
1393 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1394 {
1395
1396 #if defined(INET6) || defined(INET)
1397         /*
1398          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1399          * should be computed by hardware. Also it should not have VLAN tag in
1400          * ethernet header.
1401          */
1402         if (rxr->lro_enabled &&
1403             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1404             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1405             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1406                 /*
1407                  * Send to the stack if:
1408                  **  - LRO not enabled, or
1409                  **  - no LRO resources, or
1410                  **  - lro enqueue fails
1411                  */
1412                 if (rxr->lro.lro_cnt != 0)
1413                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1414                                 return;
1415         }
1416 #endif
1417         IXL_RX_UNLOCK(rxr);
1418         (*ifp->if_input)(ifp, m);
1419         IXL_RX_LOCK(rxr);
1420 }
1421
1422
1423 static inline void
1424 ixl_rx_discard(struct rx_ring *rxr, int i)
1425 {
1426         struct ixl_rx_buf       *rbuf;
1427
1428         rbuf = &rxr->buffers[i];
1429
1430         if (rbuf->fmp != NULL) {/* Partial chain ? */
1431                 rbuf->fmp->m_flags |= M_PKTHDR;
1432                 m_freem(rbuf->fmp);
1433                 rbuf->fmp = NULL;
1434         }
1435
1436         /*
1437         ** With advanced descriptors the writeback
1438         ** clobbers the buffer addrs, so its easier
1439         ** to just free the existing mbufs and take
1440         ** the normal refresh path to get new buffers
1441         ** and mapping.
1442         */
1443         if (rbuf->m_head) {
1444                 m_free(rbuf->m_head);
1445                 rbuf->m_head = NULL;
1446         }
1447  
1448         if (rbuf->m_pack) {
1449                 m_free(rbuf->m_pack);
1450                 rbuf->m_pack = NULL;
1451         }
1452
1453         return;
1454 }
1455
1456 #ifdef RSS
1457 /*
1458 ** i40e_ptype_to_hash: parse the packet type
1459 ** to determine the appropriate hash.
1460 */
1461 static inline int
1462 ixl_ptype_to_hash(u8 ptype)
1463 {
1464         struct i40e_rx_ptype_decoded    decoded;
1465         u8                              ex = 0;
1466
1467         decoded = decode_rx_desc_ptype(ptype);
1468         ex = decoded.outer_frag;
1469
1470         if (!decoded.known)
1471                 return M_HASHTYPE_OPAQUE_HASH;
1472
1473         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
1474                 return M_HASHTYPE_OPAQUE_HASH;
1475
1476         /* Note: anything that gets to this point is IP */
1477         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
1478                 switch (decoded.inner_prot) {
1479                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1480                                 if (ex)
1481                                         return M_HASHTYPE_RSS_TCP_IPV6_EX;
1482                                 else
1483                                         return M_HASHTYPE_RSS_TCP_IPV6;
1484                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1485                                 if (ex)
1486                                         return M_HASHTYPE_RSS_UDP_IPV6_EX;
1487                                 else
1488                                         return M_HASHTYPE_RSS_UDP_IPV6;
1489                         default:
1490                                 if (ex)
1491                                         return M_HASHTYPE_RSS_IPV6_EX;
1492                                 else
1493                                         return M_HASHTYPE_RSS_IPV6;
1494                 }
1495         }
1496         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
1497                 switch (decoded.inner_prot) {
1498                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1499                                         return M_HASHTYPE_RSS_TCP_IPV4;
1500                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1501                                 if (ex)
1502                                         return M_HASHTYPE_RSS_UDP_IPV4_EX;
1503                                 else
1504                                         return M_HASHTYPE_RSS_UDP_IPV4;
1505                         default:
1506                                         return M_HASHTYPE_RSS_IPV4;
1507                 }
1508         }
1509         /* We should never get here!! */
1510         return M_HASHTYPE_OPAQUE_HASH;
1511 }
1512 #endif /* RSS */
1513
1514 /*********************************************************************
1515  *
1516  *  This routine executes in interrupt context. It replenishes
1517  *  the mbufs in the descriptor and sends data which has been
1518  *  dma'ed into host memory to upper layer.
1519  *
1520  *  We loop at most count times if count is > 0, or until done if
1521  *  count < 0.
1522  *
1523  *  Return TRUE for more work, FALSE for all clean.
1524  *********************************************************************/
1525 bool
1526 ixl_rxeof(struct ixl_queue *que, int count)
1527 {
1528         struct ixl_vsi          *vsi = que->vsi;
1529         struct rx_ring          *rxr = &que->rxr;
1530         struct ifnet            *ifp = vsi->ifp;
1531 #if defined(INET6) || defined(INET)
1532         struct lro_ctrl         *lro = &rxr->lro;
1533 #endif
1534         int                     i, nextp, processed = 0;
1535         union i40e_rx_desc      *cur;
1536         struct ixl_rx_buf       *rbuf, *nbuf;
1537
1538
1539         IXL_RX_LOCK(rxr);
1540
1541 #ifdef DEV_NETMAP
1542         if (netmap_rx_irq(ifp, que->me, &count)) {
1543                 IXL_RX_UNLOCK(rxr);
1544                 return (FALSE);
1545         }
1546 #endif /* DEV_NETMAP */
1547
1548         for (i = rxr->next_check; count != 0;) {
1549                 struct mbuf     *sendmp, *mh, *mp;
1550                 u32             status, error;
1551                 u16             hlen, plen, vtag;
1552                 u64             qword;
1553                 u8              ptype;
1554                 bool            eop;
1555  
1556                 /* Sync the ring. */
1557                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1558                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1559
1560                 cur = &rxr->base[i];
1561                 qword = le64toh(cur->wb.qword1.status_error_len);
1562                 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1563                     >> I40E_RXD_QW1_STATUS_SHIFT;
1564                 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1565                     >> I40E_RXD_QW1_ERROR_SHIFT;
1566                 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1567                     >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1568                 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1569                     >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1570                 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1571                     >> I40E_RXD_QW1_PTYPE_SHIFT;
1572
1573                 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1574                         ++rxr->not_done;
1575                         break;
1576                 }
1577                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1578                         break;
1579
1580                 count--;
1581                 sendmp = NULL;
1582                 nbuf = NULL;
1583                 cur->wb.qword1.status_error_len = 0;
1584                 rbuf = &rxr->buffers[i];
1585                 mh = rbuf->m_head;
1586                 mp = rbuf->m_pack;
1587                 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1588                 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1589                         vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1590                 else
1591                         vtag = 0;
1592
1593                 /*
1594                 ** Make sure bad packets are discarded,
1595                 ** note that only EOP descriptor has valid
1596                 ** error results.
1597                 */
1598                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1599                         rxr->desc_errs++;
1600                         ixl_rx_discard(rxr, i);
1601                         goto next_desc;
1602                 }
1603
1604                 /* Prefetch the next buffer */
1605                 if (!eop) {
1606                         nextp = i + 1;
1607                         if (nextp == que->num_desc)
1608                                 nextp = 0;
1609                         nbuf = &rxr->buffers[nextp];
1610                         prefetch(nbuf);
1611                 }
1612
1613                 /*
1614                 ** The header mbuf is ONLY used when header 
1615                 ** split is enabled, otherwise we get normal 
1616                 ** behavior, ie, both header and payload
1617                 ** are DMA'd into the payload buffer.
1618                 **
1619                 ** Rather than using the fmp/lmp global pointers
1620                 ** we now keep the head of a packet chain in the
1621                 ** buffer struct and pass this along from one
1622                 ** descriptor to the next, until we get EOP.
1623                 */
1624                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1625                         if (hlen > IXL_RX_HDR)
1626                                 hlen = IXL_RX_HDR;
1627                         mh->m_len = hlen;
1628                         mh->m_flags |= M_PKTHDR;
1629                         mh->m_next = NULL;
1630                         mh->m_pkthdr.len = mh->m_len;
1631                         /* Null buf pointer so it is refreshed */
1632                         rbuf->m_head = NULL;
1633                         /*
1634                         ** Check the payload length, this
1635                         ** could be zero if its a small
1636                         ** packet.
1637                         */
1638                         if (plen > 0) {
1639                                 mp->m_len = plen;
1640                                 mp->m_next = NULL;
1641                                 mp->m_flags &= ~M_PKTHDR;
1642                                 mh->m_next = mp;
1643                                 mh->m_pkthdr.len += mp->m_len;
1644                                 /* Null buf pointer so it is refreshed */
1645                                 rbuf->m_pack = NULL;
1646                                 rxr->split++;
1647                         }
1648                         /*
1649                         ** Now create the forward
1650                         ** chain so when complete 
1651                         ** we wont have to.
1652                         */
1653                         if (eop == 0) {
1654                                 /* stash the chain head */
1655                                 nbuf->fmp = mh;
1656                                 /* Make forward chain */
1657                                 if (plen)
1658                                         mp->m_next = nbuf->m_pack;
1659                                 else
1660                                         mh->m_next = nbuf->m_pack;
1661                         } else {
1662                                 /* Singlet, prepare to send */
1663                                 sendmp = mh;
1664                                 if (vtag) {
1665                                         sendmp->m_pkthdr.ether_vtag = vtag;
1666                                         sendmp->m_flags |= M_VLANTAG;
1667                                 }
1668                         }
1669                 } else {
1670                         /*
1671                         ** Either no header split, or a
1672                         ** secondary piece of a fragmented
1673                         ** split packet.
1674                         */
1675                         mp->m_len = plen;
1676                         /*
1677                         ** See if there is a stored head
1678                         ** that determines what we are
1679                         */
1680                         sendmp = rbuf->fmp;
1681                         rbuf->m_pack = rbuf->fmp = NULL;
1682
1683                         if (sendmp != NULL) /* secondary frag */
1684                                 sendmp->m_pkthdr.len += mp->m_len;
1685                         else {
1686                                 /* first desc of a non-ps chain */
1687                                 sendmp = mp;
1688                                 sendmp->m_flags |= M_PKTHDR;
1689                                 sendmp->m_pkthdr.len = mp->m_len;
1690                         }
1691                         /* Pass the head pointer on */
1692                         if (eop == 0) {
1693                                 nbuf->fmp = sendmp;
1694                                 sendmp = NULL;
1695                                 mp->m_next = nbuf->m_pack;
1696                         }
1697                 }
1698                 ++processed;
1699                 /* Sending this frame? */
1700                 if (eop) {
1701                         sendmp->m_pkthdr.rcvif = ifp;
1702                         /* gather stats */
1703                         rxr->rx_packets++;
1704                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1705                         /* capture data for dynamic ITR adjustment */
1706                         rxr->packets++;
1707                         rxr->bytes += sendmp->m_pkthdr.len;
1708                         /* Set VLAN tag (field only valid in eop desc) */
1709                         if (vtag) {
1710                                 sendmp->m_pkthdr.ether_vtag = vtag;
1711                                 sendmp->m_flags |= M_VLANTAG;
1712                         }
1713                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1714                                 ixl_rx_checksum(sendmp, status, error, ptype);
1715 #ifdef RSS
1716                         sendmp->m_pkthdr.flowid =
1717                             le32toh(cur->wb.qword0.hi_dword.rss);
1718                         M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1719 #else
1720                         sendmp->m_pkthdr.flowid = que->msix;
1721                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1722 #endif
1723                 }
1724 next_desc:
1725                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1726                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1727
1728                 /* Advance our pointers to the next descriptor. */
1729                 if (++i == que->num_desc)
1730                         i = 0;
1731
1732                 /* Now send to the stack or do LRO */
1733                 if (sendmp != NULL) {
1734                         rxr->next_check = i;
1735                         ixl_rx_input(rxr, ifp, sendmp, ptype);
1736                         i = rxr->next_check;
1737                 }
1738
1739                /* Every 8 descriptors we go to refresh mbufs */
1740                 if (processed == 8) {
1741                         ixl_refresh_mbufs(que, i);
1742                         processed = 0;
1743                 }
1744         }
1745
1746         /* Refresh any remaining buf structs */
1747         if (ixl_rx_unrefreshed(que))
1748                 ixl_refresh_mbufs(que, i);
1749
1750         rxr->next_check = i;
1751
1752 #if defined(INET6) || defined(INET)
1753         /*
1754          * Flush any outstanding LRO work
1755          */
1756         tcp_lro_flush_all(lro);
1757 #endif
1758
1759         IXL_RX_UNLOCK(rxr);
1760         return (FALSE);
1761 }
1762
1763
1764 /*********************************************************************
1765  *
1766  *  Verify that the hardware indicated that the checksum is valid.
1767  *  Inform the stack about the status of checksum so that stack
1768  *  doesn't spend time verifying the checksum.
1769  *
1770  *********************************************************************/
1771 static void
1772 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1773 {
1774         struct i40e_rx_ptype_decoded decoded;
1775
1776         decoded = decode_rx_desc_ptype(ptype);
1777
1778         /* Errors? */
1779         if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1780             (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1781                 mp->m_pkthdr.csum_flags = 0;
1782                 return;
1783         }
1784
1785         /* IPv6 with extension headers likely have bad csum */
1786         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1787             decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1788                 if (status &
1789                     (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1790                         mp->m_pkthdr.csum_flags = 0;
1791                         return;
1792                 }
1793
1794  
1795         /* IP Checksum Good */
1796         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1797         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1798
1799         if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1800                 mp->m_pkthdr.csum_flags |= 
1801                     (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1802                 mp->m_pkthdr.csum_data |= htons(0xffff);
1803         }
1804         return;
1805 }
1806
1807 #if __FreeBSD_version >= 1100000
1808 uint64_t
1809 ixl_get_counter(if_t ifp, ift_counter cnt)
1810 {
1811         struct ixl_vsi *vsi;
1812
1813         vsi = if_getsoftc(ifp);
1814
1815         switch (cnt) {
1816         case IFCOUNTER_IPACKETS:
1817                 return (vsi->ipackets);
1818         case IFCOUNTER_IERRORS:
1819                 return (vsi->ierrors);
1820         case IFCOUNTER_OPACKETS:
1821                 return (vsi->opackets);
1822         case IFCOUNTER_OERRORS:
1823                 return (vsi->oerrors);
1824         case IFCOUNTER_COLLISIONS:
1825                 /* Collisions are by standard impossible in 40G/10G Ethernet */
1826                 return (0);
1827         case IFCOUNTER_IBYTES:
1828                 return (vsi->ibytes);
1829         case IFCOUNTER_OBYTES:
1830                 return (vsi->obytes);
1831         case IFCOUNTER_IMCASTS:
1832                 return (vsi->imcasts);
1833         case IFCOUNTER_OMCASTS:
1834                 return (vsi->omcasts);
1835         case IFCOUNTER_IQDROPS:
1836                 return (vsi->iqdrops);
1837         case IFCOUNTER_OQDROPS:
1838                 return (vsi->oqdrops);
1839         case IFCOUNTER_NOPROTO:
1840                 return (vsi->noproto);
1841         default:
1842                 return (if_get_counter_default(ifp, cnt));
1843         }
1844 }
1845 #endif
1846