]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixl/ixl_txrx.c
Merge llvm, clang, lld, lldb, compiler-rt and libc++ r306325, and update
[FreeBSD/FreeBSD.git] / sys / dev / ixl / ixl_txrx.c
1 /******************************************************************************
2
3   Copyright (c) 2013-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 **      IXL driver TX/RX Routines:
37 **          This was seperated to allow usage by
38 **          both the PF and VF drivers.
39 */
40
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_rss.h"
45 #endif
46
47 #include "ixl.h"
48
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52
53 /* Local Prototypes */
54 static void     ixl_rx_checksum(struct mbuf *, u32, u32, u8);
55 static void     ixl_refresh_mbufs(struct ixl_queue *, int);
56 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
57 static int      ixl_tx_setup_offload(struct ixl_queue *,
58                     struct mbuf *, u32 *, u32 *);
59 static bool     ixl_tso_setup(struct ixl_queue *, struct mbuf *);
60
61 static inline void ixl_rx_discard(struct rx_ring *, int);
62 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
63                     struct mbuf *, u8);
64
65 static inline bool ixl_tso_detect_sparse(struct mbuf *mp);
66 static int      ixl_tx_setup_offload(struct ixl_queue *que,
67     struct mbuf *mp, u32 *cmd, u32 *off);
68 static inline u32 ixl_get_tx_head(struct ixl_queue *que);
69
70 #ifdef DEV_NETMAP
71 #include <dev/netmap/if_ixl_netmap.h>
72 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1;
73 #endif /* DEV_NETMAP */
74
75 /*
76  * @key key is saved into this parameter
77  */
78 void
79 ixl_get_default_rss_key(u32 *key)
80 {
81         MPASS(key != NULL);
82
83         u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
84             0x183cfd8c, 0xce880440, 0x580cbc3c,
85             0x35897377, 0x328b25e1, 0x4fa98922,
86             0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
87             0x0, 0x0, 0x0};
88
89         bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
90 }
91
92 /*
93 ** Multiqueue Transmit driver
94 */
95 int
96 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
97 {
98         struct ixl_vsi          *vsi = ifp->if_softc;
99         struct ixl_queue        *que;
100         struct tx_ring          *txr;
101         int                     err, i;
102 #ifdef RSS
103         u32                     bucket_id;
104 #endif
105
106         /*
107         ** Which queue to use:
108         **
109         ** When doing RSS, map it to the same outbound
110         ** queue as the incoming flow would be mapped to.
111         ** If everything is setup correctly, it should be
112         ** the same bucket that the current CPU we're on is.
113         */
114         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
115 #ifdef  RSS
116                 if (rss_hash2bucket(m->m_pkthdr.flowid,
117                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
118                         i = bucket_id % vsi->num_queues;
119                 } else
120 #endif
121                         i = m->m_pkthdr.flowid % vsi->num_queues;
122         } else
123                 i = curcpu % vsi->num_queues;
124
125         que = &vsi->queues[i];
126         txr = &que->txr;
127
128         err = drbr_enqueue(ifp, txr->br, m);
129         if (err)
130                 return (err);
131         if (IXL_TX_TRYLOCK(txr)) {
132                 ixl_mq_start_locked(ifp, txr);
133                 IXL_TX_UNLOCK(txr);
134         } else
135                 taskqueue_enqueue(que->tq, &que->tx_task);
136
137         return (0);
138 }
139
140 int
141 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
142 {
143         struct ixl_queue        *que = txr->que;
144         struct ixl_vsi          *vsi = que->vsi;
145         struct mbuf             *next;
146         int                     err = 0;
147
148
149         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
150             vsi->link_active == 0)
151                 return (ENETDOWN);
152
153         /* Process the transmit queue */
154         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
155                 if ((err = ixl_xmit(que, &next)) != 0) {
156                         if (next == NULL)
157                                 drbr_advance(ifp, txr->br);
158                         else
159                                 drbr_putback(ifp, txr->br, next);
160                         break;
161                 }
162                 drbr_advance(ifp, txr->br);
163                 /* Send a copy of the frame to the BPF listener */
164                 ETHER_BPF_MTAP(ifp, next);
165                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
166                         break;
167         }
168
169         if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
170                 ixl_txeof(que);
171
172         return (err);
173 }
174
175 /*
176  * Called from a taskqueue to drain queued transmit packets.
177  */
178 void
179 ixl_deferred_mq_start(void *arg, int pending)
180 {
181         struct ixl_queue        *que = arg;
182         struct tx_ring          *txr = &que->txr;
183         struct ixl_vsi          *vsi = que->vsi;
184         struct ifnet            *ifp = vsi->ifp;
185         
186         IXL_TX_LOCK(txr);
187         if (!drbr_empty(ifp, txr->br))
188                 ixl_mq_start_locked(ifp, txr);
189         IXL_TX_UNLOCK(txr);
190 }
191
192 /*
193 ** Flush all queue ring buffers
194 */
195 void
196 ixl_qflush(struct ifnet *ifp)
197 {
198         struct ixl_vsi  *vsi = ifp->if_softc;
199
200         for (int i = 0; i < vsi->num_queues; i++) {
201                 struct ixl_queue *que = &vsi->queues[i];
202                 struct tx_ring  *txr = &que->txr;
203                 struct mbuf     *m;
204                 IXL_TX_LOCK(txr);
205                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
206                         m_freem(m);
207                 IXL_TX_UNLOCK(txr);
208         }
209         if_qflush(ifp);
210 }
211
212 /*
213 ** Find mbuf chains passed to the driver 
214 ** that are 'sparse', using more than 8
215 ** mbufs to deliver an mss-size chunk of data
216 */
217 static inline bool
218 ixl_tso_detect_sparse(struct mbuf *mp)
219 {
220         struct mbuf     *m;
221         int             num, mss;
222
223         num = 0;
224         mss = mp->m_pkthdr.tso_segsz;
225
226         /* Exclude first mbuf; assume it contains all headers */
227         for (m = mp->m_next; m != NULL; m = m->m_next) {
228                 if (m == NULL)
229                         break;
230                 num++;
231                 mss -= m->m_len % mp->m_pkthdr.tso_segsz;
232
233                 if (mss < 1) {
234                         if (num > IXL_SPARSE_CHAIN)
235                                 return (true);
236                         num = (mss == 0) ? 0 : 1;
237                         mss += mp->m_pkthdr.tso_segsz;
238                 }
239         }
240
241         return (false);
242 }
243
244
245 /*********************************************************************
246  *
247  *  This routine maps the mbufs to tx descriptors, allowing the
248  *  TX engine to transmit the packets. 
249  *      - return 0 on success, positive on failure
250  *
251  **********************************************************************/
252 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
253
254 static int
255 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
256 {
257         struct ixl_vsi          *vsi = que->vsi;
258         struct i40e_hw          *hw = vsi->hw;
259         struct tx_ring          *txr = &que->txr;
260         struct ixl_tx_buf       *buf;
261         struct i40e_tx_desc     *txd = NULL;
262         struct mbuf             *m_head, *m;
263         int                     i, j, error, nsegs;
264         int                     first, last = 0;
265         u16                     vtag = 0;
266         u32                     cmd, off;
267         bus_dmamap_t            map;
268         bus_dma_tag_t           tag;
269         bus_dma_segment_t       segs[IXL_MAX_TSO_SEGS];
270
271         cmd = off = 0;
272         m_head = *m_headp;
273
274         /*
275          * Important to capture the first descriptor
276          * used because it will contain the index of
277          * the one we tell the hardware to report back
278          */
279         first = txr->next_avail;
280         buf = &txr->buffers[first];
281         map = buf->map;
282         tag = txr->tx_tag;
283
284         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
285                 /* Use larger mapping for TSO */
286                 tag = txr->tso_tag;
287                 if (ixl_tso_detect_sparse(m_head)) {
288                         m = m_defrag(m_head, M_NOWAIT);
289                         if (m == NULL) {
290                                 m_freem(*m_headp);
291                                 *m_headp = NULL;
292                                 return (ENOBUFS);
293                         }
294                         *m_headp = m;
295                 }
296         }
297
298         /*
299          * Map the packet for DMA.
300          */
301         error = bus_dmamap_load_mbuf_sg(tag, map,
302             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
303
304         if (error == EFBIG) {
305                 struct mbuf *m;
306
307                 m = m_defrag(*m_headp, M_NOWAIT);
308                 if (m == NULL) {
309                         que->mbuf_defrag_failed++;
310                         m_freem(*m_headp);
311                         *m_headp = NULL;
312                         return (ENOBUFS);
313                 }
314                 *m_headp = m;
315
316                 /* Try it again */
317                 error = bus_dmamap_load_mbuf_sg(tag, map,
318                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
319
320                 if (error != 0) {
321                         que->tx_dmamap_failed++;
322                         m_freem(*m_headp);
323                         *m_headp = NULL;
324                         return (error);
325                 }
326         } else if (error != 0) {
327                 que->tx_dmamap_failed++;
328                 m_freem(*m_headp);
329                 *m_headp = NULL;
330                 return (error);
331         }
332
333         /* Make certain there are enough descriptors */
334         if (nsegs > txr->avail - 2) {
335                 txr->no_desc++;
336                 error = ENOBUFS;
337                 goto xmit_fail;
338         }
339         m_head = *m_headp;
340
341         /* Set up the TSO/CSUM offload */
342         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
343                 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
344                 if (error)
345                         goto xmit_fail;
346         }
347
348         cmd |= I40E_TX_DESC_CMD_ICRC;
349         /* Grab the VLAN tag */
350         if (m_head->m_flags & M_VLANTAG) {
351                 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
352                 vtag = htole16(m_head->m_pkthdr.ether_vtag);
353         }
354
355         i = txr->next_avail;
356         for (j = 0; j < nsegs; j++) {
357                 bus_size_t seglen;
358
359                 buf = &txr->buffers[i];
360                 buf->tag = tag; /* Keep track of the type tag */
361                 txd = &txr->base[i];
362                 seglen = segs[j].ds_len;
363
364                 txd->buffer_addr = htole64(segs[j].ds_addr);
365                 txd->cmd_type_offset_bsz =
366                     htole64(I40E_TX_DESC_DTYPE_DATA
367                     | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
368                     | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
369                     | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
370                     | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
371
372                 last = i; /* descriptor that will get completion IRQ */
373
374                 if (++i == que->num_desc)
375                         i = 0;
376
377                 buf->m_head = NULL;
378                 buf->eop_index = -1;
379         }
380         /* Set the last descriptor for report */
381         txd->cmd_type_offset_bsz |=
382             htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
383         txr->avail -= nsegs;
384         txr->next_avail = i;
385
386         buf->m_head = m_head;
387         /* Swap the dma map between the first and last descriptor */
388         txr->buffers[first].map = buf->map;
389         buf->map = map;
390         bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
391
392         /* Set the index of the descriptor that will be marked done */
393         buf = &txr->buffers[first];
394         buf->eop_index = last;
395
396         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
397             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
398         /*
399          * Advance the Transmit Descriptor Tail (Tdt), this tells the
400          * hardware that this frame is available to transmit.
401          */
402         ++txr->total_packets;
403         wr32(hw, txr->tail, i);
404
405         /* Mark outstanding work */
406         atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
407         return (0);
408
409 xmit_fail:
410         bus_dmamap_unload(tag, buf->map);
411         return (error);
412 }
413
414
415 /*********************************************************************
416  *
417  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
418  *  the information needed to transmit a packet on the wire. This is
419  *  called only once at attach, setup is done every reset.
420  *
421  **********************************************************************/
422 int
423 ixl_allocate_tx_data(struct ixl_queue *que)
424 {
425         struct tx_ring          *txr = &que->txr;
426         struct ixl_vsi          *vsi = que->vsi;
427         device_t                dev = vsi->dev;
428         struct ixl_tx_buf       *buf;
429         int                     error = 0;
430
431         /*
432          * Setup DMA descriptor areas.
433          */
434         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),           /* parent */
435                                1, 0,                    /* alignment, bounds */
436                                BUS_SPACE_MAXADDR,       /* lowaddr */
437                                BUS_SPACE_MAXADDR,       /* highaddr */
438                                NULL, NULL,              /* filter, filterarg */
439                                IXL_TSO_SIZE,            /* maxsize */
440                                IXL_MAX_TX_SEGS,         /* nsegments */
441                                PAGE_SIZE,               /* maxsegsize */
442                                0,                       /* flags */
443                                NULL,                    /* lockfunc */
444                                NULL,                    /* lockfuncarg */
445                                &txr->tx_tag))) {
446                 device_printf(dev,"Unable to allocate TX DMA tag\n");
447                 goto fail;
448         }
449
450         /* Make a special tag for TSO */
451         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),           /* parent */
452                                1, 0,                    /* alignment, bounds */
453                                BUS_SPACE_MAXADDR,       /* lowaddr */
454                                BUS_SPACE_MAXADDR,       /* highaddr */
455                                NULL, NULL,              /* filter, filterarg */
456                                IXL_TSO_SIZE,            /* maxsize */
457                                IXL_MAX_TSO_SEGS,        /* nsegments */
458                                PAGE_SIZE,               /* maxsegsize */
459                                0,                       /* flags */
460                                NULL,                    /* lockfunc */
461                                NULL,                    /* lockfuncarg */
462                                &txr->tso_tag))) {
463                 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
464                 goto fail;
465         }
466
467         if (!(txr->buffers =
468             (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
469             que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
470                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
471                 error = ENOMEM;
472                 goto fail;
473         }
474
475         /* Create the descriptor buffer default dma maps */
476         buf = txr->buffers;
477         for (int i = 0; i < que->num_desc; i++, buf++) {
478                 buf->tag = txr->tx_tag;
479                 error = bus_dmamap_create(buf->tag, 0, &buf->map);
480                 if (error != 0) {
481                         device_printf(dev, "Unable to create TX DMA map\n");
482                         goto fail;
483                 }
484         }
485 fail:
486         return (error);
487 }
488
489
490 /*********************************************************************
491  *
492  *  (Re)Initialize a queue transmit ring.
493  *      - called by init, it clears the descriptor ring,
494  *        and frees any stale mbufs 
495  *
496  **********************************************************************/
497 void
498 ixl_init_tx_ring(struct ixl_queue *que)
499 {
500 #ifdef DEV_NETMAP
501         struct netmap_adapter *na = NA(que->vsi->ifp);
502         struct netmap_slot *slot;
503 #endif /* DEV_NETMAP */
504         struct tx_ring          *txr = &que->txr;
505         struct ixl_tx_buf       *buf;
506
507         /* Clear the old ring contents */
508         IXL_TX_LOCK(txr);
509
510 #ifdef DEV_NETMAP
511         /*
512          * (under lock): if in netmap mode, do some consistency
513          * checks and set slot to entry 0 of the netmap ring.
514          */
515         slot = netmap_reset(na, NR_TX, que->me, 0);
516 #endif /* DEV_NETMAP */
517
518         bzero((void *)txr->base,
519               (sizeof(struct i40e_tx_desc)) * que->num_desc);
520
521         /* Reset indices */
522         txr->next_avail = 0;
523         txr->next_to_clean = 0;
524
525         /* Reset watchdog status */
526         txr->watchdog_timer = 0;
527
528 #ifdef IXL_FDIR
529         /* Initialize flow director */
530         txr->atr_rate = ixl_atr_rate;
531         txr->atr_count = 0;
532 #endif
533         /* Free any existing tx mbufs. */
534         buf = txr->buffers;
535         for (int i = 0; i < que->num_desc; i++, buf++) {
536                 if (buf->m_head != NULL) {
537                         bus_dmamap_sync(buf->tag, buf->map,
538                             BUS_DMASYNC_POSTWRITE);
539                         bus_dmamap_unload(buf->tag, buf->map);
540                         m_freem(buf->m_head);
541                         buf->m_head = NULL;
542                 }
543 #ifdef DEV_NETMAP
544                 /*
545                  * In netmap mode, set the map for the packet buffer.
546                  * NOTE: Some drivers (not this one) also need to set
547                  * the physical buffer address in the NIC ring.
548                  * netmap_idx_n2k() maps a nic index, i, into the corresponding
549                  * netmap slot index, si
550                  */
551                 if (slot) {
552                         int si = netmap_idx_n2k(&na->tx_rings[que->me], i);
553                         netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
554                 }
555 #endif /* DEV_NETMAP */
556                 /* Clear the EOP index */
557                 buf->eop_index = -1;
558         }
559
560         /* Set number of descriptors available */
561         txr->avail = que->num_desc;
562
563         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
564             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
565         IXL_TX_UNLOCK(txr);
566 }
567
568
569 /*********************************************************************
570  *
571  *  Free transmit ring related data structures.
572  *
573  **********************************************************************/
574 void
575 ixl_free_que_tx(struct ixl_queue *que)
576 {
577         struct tx_ring *txr = &que->txr;
578         struct ixl_tx_buf *buf;
579
580         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
581
582         for (int i = 0; i < que->num_desc; i++) {
583                 buf = &txr->buffers[i];
584                 if (buf->m_head != NULL) {
585                         bus_dmamap_sync(buf->tag, buf->map,
586                             BUS_DMASYNC_POSTWRITE);
587                         bus_dmamap_unload(buf->tag,
588                             buf->map);
589                         m_freem(buf->m_head);
590                         buf->m_head = NULL;
591                         if (buf->map != NULL) {
592                                 bus_dmamap_destroy(buf->tag,
593                                     buf->map);
594                                 buf->map = NULL;
595                         }
596                 } else if (buf->map != NULL) {
597                         bus_dmamap_unload(buf->tag,
598                             buf->map);
599                         bus_dmamap_destroy(buf->tag,
600                             buf->map);
601                         buf->map = NULL;
602                 }
603         }
604         if (txr->br != NULL)
605                 buf_ring_free(txr->br, M_DEVBUF);
606         if (txr->buffers != NULL) {
607                 free(txr->buffers, M_DEVBUF);
608                 txr->buffers = NULL;
609         }
610         if (txr->tx_tag != NULL) {
611                 bus_dma_tag_destroy(txr->tx_tag);
612                 txr->tx_tag = NULL;
613         }
614         if (txr->tso_tag != NULL) {
615                 bus_dma_tag_destroy(txr->tso_tag);
616                 txr->tso_tag = NULL;
617         }
618
619         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
620         return;
621 }
622
623 /*********************************************************************
624  *
625  *  Setup descriptor for hw offloads 
626  *
627  **********************************************************************/
628
629 static int
630 ixl_tx_setup_offload(struct ixl_queue *que,
631     struct mbuf *mp, u32 *cmd, u32 *off)
632 {
633         struct ether_vlan_header        *eh;
634 #ifdef INET
635         struct ip                       *ip = NULL;
636 #endif
637         struct tcphdr                   *th = NULL;
638 #ifdef INET6
639         struct ip6_hdr                  *ip6;
640 #endif
641         int                             elen, ip_hlen = 0, tcp_hlen;
642         u16                             etype;
643         u8                              ipproto = 0;
644         bool                            tso = FALSE;
645
646         /* Set up the TSO context descriptor if required */
647         if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
648                 tso = ixl_tso_setup(que, mp);
649                 if (tso)
650                         ++que->tso;
651                 else
652                         return (ENXIO);
653         }
654
655         /*
656          * Determine where frame payload starts.
657          * Jump over vlan headers if already present,
658          * helpful for QinQ too.
659          */
660         eh = mtod(mp, struct ether_vlan_header *);
661         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
662                 etype = ntohs(eh->evl_proto);
663                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
664         } else {
665                 etype = ntohs(eh->evl_encap_proto);
666                 elen = ETHER_HDR_LEN;
667         }
668
669         switch (etype) {
670 #ifdef INET
671                 case ETHERTYPE_IP:
672                         ip = (struct ip *)(mp->m_data + elen);
673                         ip_hlen = ip->ip_hl << 2;
674                         ipproto = ip->ip_p;
675                         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
676                         /* The IP checksum must be recalculated with TSO */
677                         if (tso)
678                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
679                         else
680                                 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
681                         break;
682 #endif
683 #ifdef INET6
684                 case ETHERTYPE_IPV6:
685                         ip6 = (struct ip6_hdr *)(mp->m_data + elen);
686                         ip_hlen = sizeof(struct ip6_hdr);
687                         ipproto = ip6->ip6_nxt;
688                         th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
689                         *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
690                         break;
691 #endif
692                 default:
693                         break;
694         }
695
696         *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
697         *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
698
699         switch (ipproto) {
700                 case IPPROTO_TCP:
701                         tcp_hlen = th->th_off << 2;
702                         if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
703                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
704                                 *off |= (tcp_hlen >> 2) <<
705                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
706                         }
707 #ifdef IXL_FDIR
708                         ixl_atr(que, th, etype);
709 #endif
710                         break;
711                 case IPPROTO_UDP:
712                         if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
713                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
714                                 *off |= (sizeof(struct udphdr) >> 2) <<
715                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
716                         }
717                         break;
718
719                 case IPPROTO_SCTP:
720                         if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
721                                 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
722                                 *off |= (sizeof(struct sctphdr) >> 2) <<
723                                     I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
724                         }
725                         /* Fall Thru */
726                 default:
727                         break;
728         }
729
730         return (0);
731 }
732
733
734 /**********************************************************************
735  *
736  *  Setup context for hardware segmentation offload (TSO)
737  *
738  **********************************************************************/
739 static bool
740 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
741 {
742         struct tx_ring                  *txr = &que->txr;
743         struct i40e_tx_context_desc     *TXD;
744         struct ixl_tx_buf               *buf;
745         u32                             cmd, mss, type, tsolen;
746         u16                             etype;
747         int                             idx, elen, ip_hlen, tcp_hlen;
748         struct ether_vlan_header        *eh;
749 #ifdef INET
750         struct ip                       *ip;
751 #endif
752 #ifdef INET6
753         struct ip6_hdr                  *ip6;
754 #endif
755 #if defined(INET6) || defined(INET)
756         struct tcphdr                   *th;
757 #endif
758         u64                             type_cmd_tso_mss;
759
760         /*
761          * Determine where frame payload starts.
762          * Jump over vlan headers if already present
763          */
764         eh = mtod(mp, struct ether_vlan_header *);
765         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
766                 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
767                 etype = eh->evl_proto;
768         } else {
769                 elen = ETHER_HDR_LEN;
770                 etype = eh->evl_encap_proto;
771         }
772
773         switch (ntohs(etype)) {
774 #ifdef INET6
775         case ETHERTYPE_IPV6:
776                 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
777                 if (ip6->ip6_nxt != IPPROTO_TCP)
778                         return (ENXIO);
779                 ip_hlen = sizeof(struct ip6_hdr);
780                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
781                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
782                 tcp_hlen = th->th_off << 2;
783                 /*
784                  * The corresponding flag is set by the stack in the IPv4
785                  * TSO case, but not in IPv6 (at least in FreeBSD 10.2).
786                  * So, set it here because the rest of the flow requires it.
787                  */
788                 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
789                 break;
790 #endif
791 #ifdef INET
792         case ETHERTYPE_IP:
793                 ip = (struct ip *)(mp->m_data + elen);
794                 if (ip->ip_p != IPPROTO_TCP)
795                         return (ENXIO);
796                 ip->ip_sum = 0;
797                 ip_hlen = ip->ip_hl << 2;
798                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
799                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
800                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
801                 tcp_hlen = th->th_off << 2;
802                 break;
803 #endif
804         default:
805                 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
806                     __func__, ntohs(etype));
807                 return FALSE;
808         }
809
810         /* Ensure we have at least the IP+TCP header in the first mbuf. */
811         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
812                 return FALSE;
813
814         idx = txr->next_avail;
815         buf = &txr->buffers[idx];
816         TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
817         tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
818
819         type = I40E_TX_DESC_DTYPE_CONTEXT;
820         cmd = I40E_TX_CTX_DESC_TSO;
821         /* TSO MSS must not be less than 64 */
822         if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) {
823                 que->mss_too_small++;
824                 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS;
825         }
826         mss = mp->m_pkthdr.tso_segsz;
827
828         type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
829             ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
830             ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
831             ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
832         TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
833
834         TXD->tunneling_params = htole32(0);
835         buf->m_head = NULL;
836         buf->eop_index = -1;
837
838         if (++idx == que->num_desc)
839                 idx = 0;
840
841         txr->avail--;
842         txr->next_avail = idx;
843
844         return TRUE;
845 }
846
847 /*             
848 ** ixl_get_tx_head - Retrieve the value from the 
849 **    location the HW records its HEAD index
850 */
851 static inline u32
852 ixl_get_tx_head(struct ixl_queue *que)
853 {
854         struct tx_ring  *txr = &que->txr;
855         void *head = &txr->base[que->num_desc];
856         return LE32_TO_CPU(*(volatile __le32 *)head);
857 }
858
859 /**********************************************************************
860  *
861  *  Examine each tx_buffer in the used queue. If the hardware is done
862  *  processing the packet then free associated resources. The
863  *  tx_buffer is put back on the free queue.
864  *
865  **********************************************************************/
866 bool
867 ixl_txeof(struct ixl_queue *que)
868 {
869         struct tx_ring          *txr = &que->txr;
870         u32                     first, last, head, done, processed;
871         struct ixl_tx_buf       *buf;
872         struct i40e_tx_desc     *tx_desc, *eop_desc;
873
874
875         mtx_assert(&txr->mtx, MA_OWNED);
876
877 #ifdef DEV_NETMAP
878         // XXX todo: implement moderation
879         if (netmap_tx_irq(que->vsi->ifp, que->me))
880                 return FALSE;
881 #endif /* DEF_NETMAP */
882
883         /* These are not the descriptors you seek, move along :) */
884         if (txr->avail == que->num_desc) {
885                 atomic_store_rel_32(&txr->watchdog_timer, 0);
886                 return FALSE;
887         }
888
889         processed = 0;
890         first = txr->next_to_clean;
891         buf = &txr->buffers[first];
892         tx_desc = (struct i40e_tx_desc *)&txr->base[first];
893         last = buf->eop_index;
894         if (last == -1)
895                 return FALSE;
896         eop_desc = (struct i40e_tx_desc *)&txr->base[last];
897
898         /* Get the Head WB value */
899         head = ixl_get_tx_head(que);
900
901         /*
902         ** Get the index of the first descriptor
903         ** BEYOND the EOP and call that 'done'.
904         ** I do this so the comparison in the
905         ** inner while loop below can be simple
906         */
907         if (++last == que->num_desc) last = 0;
908         done = last;
909
910         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
911             BUS_DMASYNC_POSTREAD);
912         /*
913         ** The HEAD index of the ring is written in a 
914         ** defined location, this rather than a done bit
915         ** is what is used to keep track of what must be
916         ** 'cleaned'.
917         */
918         while (first != head) {
919                 /* We clean the range of the packet */
920                 while (first != done) {
921                         ++txr->avail;
922                         ++processed;
923
924                         if (buf->m_head) {
925                                 txr->bytes += /* for ITR adjustment */
926                                     buf->m_head->m_pkthdr.len;
927                                 txr->tx_bytes += /* for TX stats */
928                                     buf->m_head->m_pkthdr.len;
929                                 bus_dmamap_sync(buf->tag,
930                                     buf->map,
931                                     BUS_DMASYNC_POSTWRITE);
932                                 bus_dmamap_unload(buf->tag,
933                                     buf->map);
934                                 m_freem(buf->m_head);
935                                 buf->m_head = NULL;
936                         }
937                         buf->eop_index = -1;
938
939                         if (++first == que->num_desc)
940                                 first = 0;
941
942                         buf = &txr->buffers[first];
943                         tx_desc = &txr->base[first];
944                 }
945                 ++txr->packets;
946                 /* See if there is more work now */
947                 last = buf->eop_index;
948                 if (last != -1) {
949                         eop_desc = &txr->base[last];
950                         /* Get next done point */
951                         if (++last == que->num_desc) last = 0;
952                         done = last;
953                 } else
954                         break;
955         }
956         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
957             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
958
959         txr->next_to_clean = first;
960
961
962         /*
963          * If there are no pending descriptors, clear the timeout.
964          */
965         if (txr->avail == que->num_desc) {
966                 atomic_store_rel_32(&txr->watchdog_timer, 0);
967                 return FALSE;
968         }
969
970         return TRUE;
971 }
972
973 /*********************************************************************
974  *
975  *  Refresh mbuf buffers for RX descriptor rings
976  *   - now keeps its own state so discards due to resource
977  *     exhaustion are unnecessary, if an mbuf cannot be obtained
978  *     it just returns, keeping its placeholder, thus it can simply
979  *     be recalled to try again.
980  *
981  **********************************************************************/
982 static void
983 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
984 {
985         struct ixl_vsi          *vsi = que->vsi;
986         struct rx_ring          *rxr = &que->rxr;
987         bus_dma_segment_t       hseg[1];
988         bus_dma_segment_t       pseg[1];
989         struct ixl_rx_buf       *buf;
990         struct mbuf             *mh, *mp;
991         int                     i, j, nsegs, error;
992         bool                    refreshed = FALSE;
993
994         i = j = rxr->next_refresh;
995         /* Control the loop with one beyond */
996         if (++j == que->num_desc)
997                 j = 0;
998
999         while (j != limit) {
1000                 buf = &rxr->buffers[i];
1001                 if (rxr->hdr_split == FALSE)
1002                         goto no_split;
1003
1004                 if (buf->m_head == NULL) {
1005                         mh = m_gethdr(M_NOWAIT, MT_DATA);
1006                         if (mh == NULL)
1007                                 goto update;
1008                 } else
1009                         mh = buf->m_head;
1010
1011                 mh->m_pkthdr.len = mh->m_len = MHLEN;
1012                 mh->m_len = MHLEN;
1013                 mh->m_flags |= M_PKTHDR;
1014                 /* Get the memory mapping */
1015                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1016                     buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1017                 if (error != 0) {
1018                         printf("Refresh mbufs: hdr dmamap load"
1019                             " failure - %d\n", error);
1020                         m_free(mh);
1021                         buf->m_head = NULL;
1022                         goto update;
1023                 }
1024                 buf->m_head = mh;
1025                 bus_dmamap_sync(rxr->htag, buf->hmap,
1026                     BUS_DMASYNC_PREREAD);
1027                 rxr->base[i].read.hdr_addr =
1028                    htole64(hseg[0].ds_addr);
1029
1030 no_split:
1031                 if (buf->m_pack == NULL) {
1032                         mp = m_getjcl(M_NOWAIT, MT_DATA,
1033                             M_PKTHDR, rxr->mbuf_sz);
1034                         if (mp == NULL)
1035                                 goto update;
1036                 } else
1037                         mp = buf->m_pack;
1038
1039                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1040                 /* Get the memory mapping */
1041                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1042                     buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1043                 if (error != 0) {
1044                         printf("Refresh mbufs: payload dmamap load"
1045                             " failure - %d\n", error);
1046                         m_free(mp);
1047                         buf->m_pack = NULL;
1048                         goto update;
1049                 }
1050                 buf->m_pack = mp;
1051                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1052                     BUS_DMASYNC_PREREAD);
1053                 rxr->base[i].read.pkt_addr =
1054                    htole64(pseg[0].ds_addr);
1055                 /* Used only when doing header split */
1056                 rxr->base[i].read.hdr_addr = 0;
1057
1058                 refreshed = TRUE;
1059                 /* Next is precalculated */
1060                 i = j;
1061                 rxr->next_refresh = i;
1062                 if (++j == que->num_desc)
1063                         j = 0;
1064         }
1065 update:
1066         if (refreshed) /* Update hardware tail index */
1067                 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1068         return;
1069 }
1070
1071
1072 /*********************************************************************
1073  *
1074  *  Allocate memory for rx_buffer structures. Since we use one
1075  *  rx_buffer per descriptor, the maximum number of rx_buffer's
1076  *  that we'll need is equal to the number of receive descriptors
1077  *  that we've defined.
1078  *
1079  **********************************************************************/
1080 int
1081 ixl_allocate_rx_data(struct ixl_queue *que)
1082 {
1083         struct rx_ring          *rxr = &que->rxr;
1084         struct ixl_vsi          *vsi = que->vsi;
1085         device_t                dev = vsi->dev;
1086         struct ixl_rx_buf       *buf;
1087         int                     i, bsize, error;
1088
1089         bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1090         if (!(rxr->buffers =
1091             (struct ixl_rx_buf *) malloc(bsize,
1092             M_DEVBUF, M_NOWAIT | M_ZERO))) {
1093                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1094                 error = ENOMEM;
1095                 return (error);
1096         }
1097
1098         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1099                                    1, 0,        /* alignment, bounds */
1100                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1101                                    BUS_SPACE_MAXADDR,   /* highaddr */
1102                                    NULL, NULL,          /* filter, filterarg */
1103                                    MSIZE,               /* maxsize */
1104                                    1,                   /* nsegments */
1105                                    MSIZE,               /* maxsegsize */
1106                                    0,                   /* flags */
1107                                    NULL,                /* lockfunc */
1108                                    NULL,                /* lockfuncarg */
1109                                    &rxr->htag))) {
1110                 device_printf(dev, "Unable to create RX DMA htag\n");
1111                 return (error);
1112         }
1113
1114         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
1115                                    1, 0,        /* alignment, bounds */
1116                                    BUS_SPACE_MAXADDR,   /* lowaddr */
1117                                    BUS_SPACE_MAXADDR,   /* highaddr */
1118                                    NULL, NULL,          /* filter, filterarg */
1119                                    MJUM16BYTES,         /* maxsize */
1120                                    1,                   /* nsegments */
1121                                    MJUM16BYTES,         /* maxsegsize */
1122                                    0,                   /* flags */
1123                                    NULL,                /* lockfunc */
1124                                    NULL,                /* lockfuncarg */
1125                                    &rxr->ptag))) {
1126                 device_printf(dev, "Unable to create RX DMA ptag\n");
1127                 return (error);
1128         }
1129
1130         for (i = 0; i < que->num_desc; i++) {
1131                 buf = &rxr->buffers[i];
1132                 error = bus_dmamap_create(rxr->htag,
1133                     BUS_DMA_NOWAIT, &buf->hmap);
1134                 if (error) {
1135                         device_printf(dev, "Unable to create RX head map\n");
1136                         break;
1137                 }
1138                 error = bus_dmamap_create(rxr->ptag,
1139                     BUS_DMA_NOWAIT, &buf->pmap);
1140                 if (error) {
1141                         device_printf(dev, "Unable to create RX pkt map\n");
1142                         break;
1143                 }
1144         }
1145
1146         return (error);
1147 }
1148
1149
1150 /*********************************************************************
1151  *
1152  *  (Re)Initialize the queue receive ring and its buffers.
1153  *
1154  **********************************************************************/
1155 int
1156 ixl_init_rx_ring(struct ixl_queue *que)
1157 {
1158         struct  rx_ring         *rxr = &que->rxr;
1159         struct ixl_vsi          *vsi = que->vsi;
1160 #if defined(INET6) || defined(INET)
1161         struct ifnet            *ifp = vsi->ifp;
1162         struct lro_ctrl         *lro = &rxr->lro;
1163 #endif
1164         struct ixl_rx_buf       *buf;
1165         bus_dma_segment_t       pseg[1], hseg[1];
1166         int                     rsize, nsegs, error = 0;
1167 #ifdef DEV_NETMAP
1168         struct netmap_adapter *na = NA(que->vsi->ifp);
1169         struct netmap_slot *slot;
1170 #endif /* DEV_NETMAP */
1171
1172         IXL_RX_LOCK(rxr);
1173 #ifdef DEV_NETMAP
1174         /* same as in ixl_init_tx_ring() */
1175         slot = netmap_reset(na, NR_RX, que->me, 0);
1176 #endif /* DEV_NETMAP */
1177         /* Clear the ring contents */
1178         rsize = roundup2(que->num_desc *
1179             sizeof(union i40e_rx_desc), DBA_ALIGN);
1180         bzero((void *)rxr->base, rsize);
1181         /* Cleanup any existing buffers */
1182         for (int i = 0; i < que->num_desc; i++) {
1183                 buf = &rxr->buffers[i];
1184                 if (buf->m_head != NULL) {
1185                         bus_dmamap_sync(rxr->htag, buf->hmap,
1186                             BUS_DMASYNC_POSTREAD);
1187                         bus_dmamap_unload(rxr->htag, buf->hmap);
1188                         buf->m_head->m_flags |= M_PKTHDR;
1189                         m_freem(buf->m_head);
1190                 }
1191                 if (buf->m_pack != NULL) {
1192                         bus_dmamap_sync(rxr->ptag, buf->pmap,
1193                             BUS_DMASYNC_POSTREAD);
1194                         bus_dmamap_unload(rxr->ptag, buf->pmap);
1195                         buf->m_pack->m_flags |= M_PKTHDR;
1196                         m_freem(buf->m_pack);
1197                 }
1198                 buf->m_head = NULL;
1199                 buf->m_pack = NULL;
1200         }
1201
1202         /* header split is off */
1203         rxr->hdr_split = FALSE;
1204
1205         /* Now replenish the mbufs */
1206         for (int j = 0; j != que->num_desc; ++j) {
1207                 struct mbuf     *mh, *mp;
1208
1209                 buf = &rxr->buffers[j];
1210 #ifdef DEV_NETMAP
1211                 /*
1212                  * In netmap mode, fill the map and set the buffer
1213                  * address in the NIC ring, considering the offset
1214                  * between the netmap and NIC rings (see comment in
1215                  * ixgbe_setup_transmit_ring() ). No need to allocate
1216                  * an mbuf, so end the block with a continue;
1217                  */
1218                 if (slot) {
1219                         int sj = netmap_idx_n2k(&na->rx_rings[que->me], j);
1220                         uint64_t paddr;
1221                         void *addr;
1222
1223                         addr = PNMB(na, slot + sj, &paddr);
1224                         netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1225                         /* Update descriptor and the cached value */
1226                         rxr->base[j].read.pkt_addr = htole64(paddr);
1227                         rxr->base[j].read.hdr_addr = 0;
1228                         continue;
1229                 }
1230 #endif /* DEV_NETMAP */
1231                 /*
1232                 ** Don't allocate mbufs if not
1233                 ** doing header split, its wasteful
1234                 */ 
1235                 if (rxr->hdr_split == FALSE)
1236                         goto skip_head;
1237
1238                 /* First the header */
1239                 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1240                 if (buf->m_head == NULL) {
1241                         error = ENOBUFS;
1242                         goto fail;
1243                 }
1244                 m_adj(buf->m_head, ETHER_ALIGN);
1245                 mh = buf->m_head;
1246                 mh->m_len = mh->m_pkthdr.len = MHLEN;
1247                 mh->m_flags |= M_PKTHDR;
1248                 /* Get the memory mapping */
1249                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1250                     buf->hmap, buf->m_head, hseg,
1251                     &nsegs, BUS_DMA_NOWAIT);
1252                 if (error != 0) /* Nothing elegant to do here */
1253                         goto fail;
1254                 bus_dmamap_sync(rxr->htag,
1255                     buf->hmap, BUS_DMASYNC_PREREAD);
1256                 /* Update descriptor */
1257                 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1258
1259 skip_head:
1260                 /* Now the payload cluster */
1261                 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1262                     M_PKTHDR, rxr->mbuf_sz);
1263                 if (buf->m_pack == NULL) {
1264                         error = ENOBUFS;
1265                         goto fail;
1266                 }
1267                 mp = buf->m_pack;
1268                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1269                 /* Get the memory mapping */
1270                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1271                     buf->pmap, mp, pseg,
1272                     &nsegs, BUS_DMA_NOWAIT);
1273                 if (error != 0)
1274                         goto fail;
1275                 bus_dmamap_sync(rxr->ptag,
1276                     buf->pmap, BUS_DMASYNC_PREREAD);
1277                 /* Update descriptor */
1278                 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1279                 rxr->base[j].read.hdr_addr = 0;
1280         }
1281
1282
1283         /* Setup our descriptor indices */
1284         rxr->next_check = 0;
1285         rxr->next_refresh = 0;
1286         rxr->lro_enabled = FALSE;
1287         rxr->split = 0;
1288         rxr->bytes = 0;
1289         rxr->discard = FALSE;
1290
1291         wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1292         ixl_flush(vsi->hw);
1293
1294 #if defined(INET6) || defined(INET)
1295         /*
1296         ** Now set up the LRO interface:
1297         */
1298         if (ifp->if_capenable & IFCAP_LRO) {
1299                 int err = tcp_lro_init(lro);
1300                 if (err) {
1301                         if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1302                         goto fail;
1303                 }
1304                 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1305                 rxr->lro_enabled = TRUE;
1306                 lro->ifp = vsi->ifp;
1307         }
1308 #endif
1309
1310         bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1311             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1312
1313 fail:
1314         IXL_RX_UNLOCK(rxr);
1315         return (error);
1316 }
1317
1318
1319 /*********************************************************************
1320  *
1321  *  Free station receive ring data structures
1322  *
1323  **********************************************************************/
1324 void
1325 ixl_free_que_rx(struct ixl_queue *que)
1326 {
1327         struct rx_ring          *rxr = &que->rxr;
1328         struct ixl_rx_buf       *buf;
1329
1330         INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1331
1332         /* Cleanup any existing buffers */
1333         if (rxr->buffers != NULL) {
1334                 for (int i = 0; i < que->num_desc; i++) {
1335                         buf = &rxr->buffers[i];
1336                         if (buf->m_head != NULL) {
1337                                 bus_dmamap_sync(rxr->htag, buf->hmap,
1338                                     BUS_DMASYNC_POSTREAD);
1339                                 bus_dmamap_unload(rxr->htag, buf->hmap);
1340                                 buf->m_head->m_flags |= M_PKTHDR;
1341                                 m_freem(buf->m_head);
1342                         }
1343                         if (buf->m_pack != NULL) {
1344                                 bus_dmamap_sync(rxr->ptag, buf->pmap,
1345                                     BUS_DMASYNC_POSTREAD);
1346                                 bus_dmamap_unload(rxr->ptag, buf->pmap);
1347                                 buf->m_pack->m_flags |= M_PKTHDR;
1348                                 m_freem(buf->m_pack);
1349                         }
1350                         buf->m_head = NULL;
1351                         buf->m_pack = NULL;
1352                         if (buf->hmap != NULL) {
1353                                 bus_dmamap_destroy(rxr->htag, buf->hmap);
1354                                 buf->hmap = NULL;
1355                         }
1356                         if (buf->pmap != NULL) {
1357                                 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1358                                 buf->pmap = NULL;
1359                         }
1360                 }
1361                 if (rxr->buffers != NULL) {
1362                         free(rxr->buffers, M_DEVBUF);
1363                         rxr->buffers = NULL;
1364                 }
1365         }
1366
1367         if (rxr->htag != NULL) {
1368                 bus_dma_tag_destroy(rxr->htag);
1369                 rxr->htag = NULL;
1370         }
1371         if (rxr->ptag != NULL) {
1372                 bus_dma_tag_destroy(rxr->ptag);
1373                 rxr->ptag = NULL;
1374         }
1375
1376         INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1377         return;
1378 }
1379
1380 static inline void
1381 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1382 {
1383
1384 #if defined(INET6) || defined(INET)
1385         /*
1386          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1387          * should be computed by hardware. Also it should not have VLAN tag in
1388          * ethernet header.
1389          */
1390         if (rxr->lro_enabled &&
1391             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1392             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1393             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1394                 /*
1395                  * Send to the stack if:
1396                  **  - LRO not enabled, or
1397                  **  - no LRO resources, or
1398                  **  - lro enqueue fails
1399                  */
1400                 if (rxr->lro.lro_cnt != 0)
1401                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1402                                 return;
1403         }
1404 #endif
1405         IXL_RX_UNLOCK(rxr);
1406         (*ifp->if_input)(ifp, m);
1407         IXL_RX_LOCK(rxr);
1408 }
1409
1410
1411 static inline void
1412 ixl_rx_discard(struct rx_ring *rxr, int i)
1413 {
1414         struct ixl_rx_buf       *rbuf;
1415
1416         rbuf = &rxr->buffers[i];
1417
1418         if (rbuf->fmp != NULL) {/* Partial chain ? */
1419                 rbuf->fmp->m_flags |= M_PKTHDR;
1420                 m_freem(rbuf->fmp);
1421                 rbuf->fmp = NULL;
1422         }
1423
1424         /*
1425         ** With advanced descriptors the writeback
1426         ** clobbers the buffer addrs, so its easier
1427         ** to just free the existing mbufs and take
1428         ** the normal refresh path to get new buffers
1429         ** and mapping.
1430         */
1431         if (rbuf->m_head) {
1432                 m_free(rbuf->m_head);
1433                 rbuf->m_head = NULL;
1434         }
1435  
1436         if (rbuf->m_pack) {
1437                 m_free(rbuf->m_pack);
1438                 rbuf->m_pack = NULL;
1439         }
1440
1441         return;
1442 }
1443
1444 #ifdef RSS
1445 /*
1446 ** i40e_ptype_to_hash: parse the packet type
1447 ** to determine the appropriate hash.
1448 */
1449 static inline int
1450 ixl_ptype_to_hash(u8 ptype)
1451 {
1452         struct i40e_rx_ptype_decoded    decoded;
1453         u8                              ex = 0;
1454
1455         decoded = decode_rx_desc_ptype(ptype);
1456         ex = decoded.outer_frag;
1457
1458         if (!decoded.known)
1459                 return M_HASHTYPE_OPAQUE_HASH;
1460
1461         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
1462                 return M_HASHTYPE_OPAQUE_HASH;
1463
1464         /* Note: anything that gets to this point is IP */
1465         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
1466                 switch (decoded.inner_prot) {
1467                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1468                                 if (ex)
1469                                         return M_HASHTYPE_RSS_TCP_IPV6_EX;
1470                                 else
1471                                         return M_HASHTYPE_RSS_TCP_IPV6;
1472                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1473                                 if (ex)
1474                                         return M_HASHTYPE_RSS_UDP_IPV6_EX;
1475                                 else
1476                                         return M_HASHTYPE_RSS_UDP_IPV6;
1477                         default:
1478                                 if (ex)
1479                                         return M_HASHTYPE_RSS_IPV6_EX;
1480                                 else
1481                                         return M_HASHTYPE_RSS_IPV6;
1482                 }
1483         }
1484         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
1485                 switch (decoded.inner_prot) {
1486                         case I40E_RX_PTYPE_INNER_PROT_TCP:
1487                                         return M_HASHTYPE_RSS_TCP_IPV4;
1488                         case I40E_RX_PTYPE_INNER_PROT_UDP:
1489                                 if (ex)
1490                                         return M_HASHTYPE_RSS_UDP_IPV4_EX;
1491                                 else
1492                                         return M_HASHTYPE_RSS_UDP_IPV4;
1493                         default:
1494                                         return M_HASHTYPE_RSS_IPV4;
1495                 }
1496         }
1497         /* We should never get here!! */
1498         return M_HASHTYPE_OPAQUE_HASH;
1499 }
1500 #endif /* RSS */
1501
1502 /*********************************************************************
1503  *
1504  *  This routine executes in interrupt context. It replenishes
1505  *  the mbufs in the descriptor and sends data which has been
1506  *  dma'ed into host memory to upper layer.
1507  *
1508  *  We loop at most count times if count is > 0, or until done if
1509  *  count < 0.
1510  *
1511  *  Return TRUE for more work, FALSE for all clean.
1512  *********************************************************************/
1513 bool
1514 ixl_rxeof(struct ixl_queue *que, int count)
1515 {
1516         struct ixl_vsi          *vsi = que->vsi;
1517         struct rx_ring          *rxr = &que->rxr;
1518         struct ifnet            *ifp = vsi->ifp;
1519 #if defined(INET6) || defined(INET)
1520         struct lro_ctrl         *lro = &rxr->lro;
1521 #endif
1522         int                     i, nextp, processed = 0;
1523         union i40e_rx_desc      *cur;
1524         struct ixl_rx_buf       *rbuf, *nbuf;
1525
1526
1527         IXL_RX_LOCK(rxr);
1528
1529 #ifdef DEV_NETMAP
1530         if (netmap_rx_irq(ifp, que->me, &count)) {
1531                 IXL_RX_UNLOCK(rxr);
1532                 return (FALSE);
1533         }
1534 #endif /* DEV_NETMAP */
1535
1536         for (i = rxr->next_check; count != 0;) {
1537                 struct mbuf     *sendmp, *mh, *mp;
1538                 u32             status, error;
1539                 u16             hlen, plen, vtag;
1540                 u64             qword;
1541                 u8              ptype;
1542                 bool            eop;
1543  
1544                 /* Sync the ring. */
1545                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1546                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1547
1548                 cur = &rxr->base[i];
1549                 qword = le64toh(cur->wb.qword1.status_error_len);
1550                 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1551                     >> I40E_RXD_QW1_STATUS_SHIFT;
1552                 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1553                     >> I40E_RXD_QW1_ERROR_SHIFT;
1554                 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1555                     >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1556                 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1557                     >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1558                 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1559                     >> I40E_RXD_QW1_PTYPE_SHIFT;
1560
1561                 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1562                         ++rxr->not_done;
1563                         break;
1564                 }
1565                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1566                         break;
1567
1568                 count--;
1569                 sendmp = NULL;
1570                 nbuf = NULL;
1571                 cur->wb.qword1.status_error_len = 0;
1572                 rbuf = &rxr->buffers[i];
1573                 mh = rbuf->m_head;
1574                 mp = rbuf->m_pack;
1575                 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1576                 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1577                         vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1578                 else
1579                         vtag = 0;
1580
1581                 /*
1582                 ** Make sure bad packets are discarded,
1583                 ** note that only EOP descriptor has valid
1584                 ** error results.
1585                 */
1586                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1587                         rxr->desc_errs++;
1588                         ixl_rx_discard(rxr, i);
1589                         goto next_desc;
1590                 }
1591
1592                 /* Prefetch the next buffer */
1593                 if (!eop) {
1594                         nextp = i + 1;
1595                         if (nextp == que->num_desc)
1596                                 nextp = 0;
1597                         nbuf = &rxr->buffers[nextp];
1598                         prefetch(nbuf);
1599                 }
1600
1601                 /*
1602                 ** The header mbuf is ONLY used when header 
1603                 ** split is enabled, otherwise we get normal 
1604                 ** behavior, ie, both header and payload
1605                 ** are DMA'd into the payload buffer.
1606                 **
1607                 ** Rather than using the fmp/lmp global pointers
1608                 ** we now keep the head of a packet chain in the
1609                 ** buffer struct and pass this along from one
1610                 ** descriptor to the next, until we get EOP.
1611                 */
1612                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1613                         if (hlen > IXL_RX_HDR)
1614                                 hlen = IXL_RX_HDR;
1615                         mh->m_len = hlen;
1616                         mh->m_flags |= M_PKTHDR;
1617                         mh->m_next = NULL;
1618                         mh->m_pkthdr.len = mh->m_len;
1619                         /* Null buf pointer so it is refreshed */
1620                         rbuf->m_head = NULL;
1621                         /*
1622                         ** Check the payload length, this
1623                         ** could be zero if its a small
1624                         ** packet.
1625                         */
1626                         if (plen > 0) {
1627                                 mp->m_len = plen;
1628                                 mp->m_next = NULL;
1629                                 mp->m_flags &= ~M_PKTHDR;
1630                                 mh->m_next = mp;
1631                                 mh->m_pkthdr.len += mp->m_len;
1632                                 /* Null buf pointer so it is refreshed */
1633                                 rbuf->m_pack = NULL;
1634                                 rxr->split++;
1635                         }
1636                         /*
1637                         ** Now create the forward
1638                         ** chain so when complete 
1639                         ** we wont have to.
1640                         */
1641                         if (eop == 0) {
1642                                 /* stash the chain head */
1643                                 nbuf->fmp = mh;
1644                                 /* Make forward chain */
1645                                 if (plen)
1646                                         mp->m_next = nbuf->m_pack;
1647                                 else
1648                                         mh->m_next = nbuf->m_pack;
1649                         } else {
1650                                 /* Singlet, prepare to send */
1651                                 sendmp = mh;
1652                                 if (vtag) {
1653                                         sendmp->m_pkthdr.ether_vtag = vtag;
1654                                         sendmp->m_flags |= M_VLANTAG;
1655                                 }
1656                         }
1657                 } else {
1658                         /*
1659                         ** Either no header split, or a
1660                         ** secondary piece of a fragmented
1661                         ** split packet.
1662                         */
1663                         mp->m_len = plen;
1664                         /*
1665                         ** See if there is a stored head
1666                         ** that determines what we are
1667                         */
1668                         sendmp = rbuf->fmp;
1669                         rbuf->m_pack = rbuf->fmp = NULL;
1670
1671                         if (sendmp != NULL) /* secondary frag */
1672                                 sendmp->m_pkthdr.len += mp->m_len;
1673                         else {
1674                                 /* first desc of a non-ps chain */
1675                                 sendmp = mp;
1676                                 sendmp->m_flags |= M_PKTHDR;
1677                                 sendmp->m_pkthdr.len = mp->m_len;
1678                         }
1679                         /* Pass the head pointer on */
1680                         if (eop == 0) {
1681                                 nbuf->fmp = sendmp;
1682                                 sendmp = NULL;
1683                                 mp->m_next = nbuf->m_pack;
1684                         }
1685                 }
1686                 ++processed;
1687                 /* Sending this frame? */
1688                 if (eop) {
1689                         sendmp->m_pkthdr.rcvif = ifp;
1690                         /* gather stats */
1691                         rxr->rx_packets++;
1692                         rxr->rx_bytes += sendmp->m_pkthdr.len;
1693                         /* capture data for dynamic ITR adjustment */
1694                         rxr->packets++;
1695                         rxr->bytes += sendmp->m_pkthdr.len;
1696                         /* Set VLAN tag (field only valid in eop desc) */
1697                         if (vtag) {
1698                                 sendmp->m_pkthdr.ether_vtag = vtag;
1699                                 sendmp->m_flags |= M_VLANTAG;
1700                         }
1701                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1702                                 ixl_rx_checksum(sendmp, status, error, ptype);
1703 #ifdef RSS
1704                         sendmp->m_pkthdr.flowid =
1705                             le32toh(cur->wb.qword0.hi_dword.rss);
1706                         M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1707 #else
1708                         sendmp->m_pkthdr.flowid = que->msix;
1709                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1710 #endif
1711                 }
1712 next_desc:
1713                 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1714                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1715
1716                 /* Advance our pointers to the next descriptor. */
1717                 if (++i == que->num_desc)
1718                         i = 0;
1719
1720                 /* Now send to the stack or do LRO */
1721                 if (sendmp != NULL) {
1722                         rxr->next_check = i;
1723                         ixl_rx_input(rxr, ifp, sendmp, ptype);
1724                         i = rxr->next_check;
1725                 }
1726
1727                /* Every 8 descriptors we go to refresh mbufs */
1728                 if (processed == 8) {
1729                         ixl_refresh_mbufs(que, i);
1730                         processed = 0;
1731                 }
1732         }
1733
1734         /* Refresh any remaining buf structs */
1735         if (ixl_rx_unrefreshed(que))
1736                 ixl_refresh_mbufs(que, i);
1737
1738         rxr->next_check = i;
1739
1740 #if defined(INET6) || defined(INET)
1741         /*
1742          * Flush any outstanding LRO work
1743          */
1744 #if __FreeBSD_version >= 1100105
1745         tcp_lro_flush_all(lro);
1746 #else
1747         struct lro_entry *queued;
1748         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1749                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1750                 tcp_lro_flush(lro, queued);
1751         }
1752 #endif
1753 #endif /* defined(INET6) || defined(INET) */
1754
1755         IXL_RX_UNLOCK(rxr);
1756         return (FALSE);
1757 }
1758
1759
1760 /*********************************************************************
1761  *
1762  *  Verify that the hardware indicated that the checksum is valid.
1763  *  Inform the stack about the status of checksum so that stack
1764  *  doesn't spend time verifying the checksum.
1765  *
1766  *********************************************************************/
1767 static void
1768 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1769 {
1770         struct i40e_rx_ptype_decoded decoded;
1771
1772         decoded = decode_rx_desc_ptype(ptype);
1773
1774         /* Errors? */
1775         if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1776             (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1777                 mp->m_pkthdr.csum_flags = 0;
1778                 return;
1779         }
1780
1781         /* IPv6 with extension headers likely have bad csum */
1782         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1783             decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1784                 if (status &
1785                     (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1786                         mp->m_pkthdr.csum_flags = 0;
1787                         return;
1788                 }
1789
1790  
1791         /* IP Checksum Good */
1792         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1793         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1794
1795         if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1796                 mp->m_pkthdr.csum_flags |= 
1797                     (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1798                 mp->m_pkthdr.csum_data |= htons(0xffff);
1799         }
1800         return;
1801 }
1802
1803 #if __FreeBSD_version >= 1100000
1804 uint64_t
1805 ixl_get_counter(if_t ifp, ift_counter cnt)
1806 {
1807         struct ixl_vsi *vsi;
1808
1809         vsi = if_getsoftc(ifp);
1810
1811         switch (cnt) {
1812         case IFCOUNTER_IPACKETS:
1813                 return (vsi->ipackets);
1814         case IFCOUNTER_IERRORS:
1815                 return (vsi->ierrors);
1816         case IFCOUNTER_OPACKETS:
1817                 return (vsi->opackets);
1818         case IFCOUNTER_OERRORS:
1819                 return (vsi->oerrors);
1820         case IFCOUNTER_COLLISIONS:
1821                 /* Collisions are by standard impossible in 40G/10G Ethernet */
1822                 return (0);
1823         case IFCOUNTER_IBYTES:
1824                 return (vsi->ibytes);
1825         case IFCOUNTER_OBYTES:
1826                 return (vsi->obytes);
1827         case IFCOUNTER_IMCASTS:
1828                 return (vsi->imcasts);
1829         case IFCOUNTER_OMCASTS:
1830                 return (vsi->omcasts);
1831         case IFCOUNTER_IQDROPS:
1832                 return (vsi->iqdrops);
1833         case IFCOUNTER_OQDROPS:
1834                 return (vsi->oqdrops);
1835         case IFCOUNTER_NOPROTO:
1836                 return (vsi->noproto);
1837         default:
1838                 return (if_get_counter_default(ifp, cnt));
1839         }
1840 }
1841 #endif
1842