]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx4/mlx4_en/mlx4_en_rx.c
Import device-tree files from Linux 5.12
[FreeBSD/FreeBSD.git] / sys / dev / mlx4 / mlx4_en / mlx4_en_rx.c
1 /*
2  * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  */
33 #include "opt_inet.h"
34 #include <dev/mlx4/cq.h>
35 #include <linux/slab.h>
36 #include <dev/mlx4/qp.h>
37 #include <linux/if_ether.h>
38 #include <linux/if_vlan.h>
39 #include <linux/vmalloc.h>
40 #include <dev/mlx4/driver.h>
41 #ifdef CONFIG_NET_RX_BUSY_POLL
42 #include <net/busy_poll.h>
43 #endif
44
45 #include "en.h"
46
47 #if (MLX4_EN_MAX_RX_SEGS == 1)
48 static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
49                                  struct mlx4_en_rx_ring *ring,
50                                  int index)
51 {
52         struct mlx4_en_rx_desc *rx_desc =
53             ((struct mlx4_en_rx_desc *)ring->buf) + index;
54         int i;
55
56         /* Set size and memtype fields */
57         rx_desc->data[0].byte_count = cpu_to_be32(priv->rx_mb_size - MLX4_NET_IP_ALIGN);
58         rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key);
59
60         /*
61          * If the number of used fragments does not fill up the ring
62          * stride, remaining (unused) fragments must be padded with
63          * null address/size and a special memory key:
64          */
65         for (i = 1; i < MLX4_EN_MAX_RX_SEGS; i++) {
66                 rx_desc->data[i].byte_count = 0;
67                 rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
68                 rx_desc->data[i].addr = 0;
69         }
70 }
71 #endif
72
73 static inline struct mbuf *
74 mlx4_en_alloc_mbuf(struct mlx4_en_rx_ring *ring)
75 {
76         struct mbuf *mb;
77
78 #if (MLX4_EN_MAX_RX_SEGS == 1)
79         mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size);
80         if (likely(mb != NULL))
81                 mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size;
82 #else
83         mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MLX4_EN_MAX_RX_BYTES);
84         if (likely(mb != NULL)) {
85                 struct mbuf *mb_head = mb;
86                 int i;
87
88                 mb->m_len = MLX4_EN_MAX_RX_BYTES;
89                 mb->m_pkthdr.len = MLX4_EN_MAX_RX_BYTES;
90
91                 for (i = 1; i != MLX4_EN_MAX_RX_SEGS; i++) {
92                         if (mb_head->m_pkthdr.len >= ring->rx_mb_size)
93                                 break;
94                         mb = (mb->m_next = m_getjcl(M_NOWAIT, MT_DATA, 0, MLX4_EN_MAX_RX_BYTES));
95                         if (unlikely(mb == NULL)) {
96                                 m_freem(mb_head);
97                                 return (NULL);
98                         }
99                         mb->m_len = MLX4_EN_MAX_RX_BYTES;
100                         mb_head->m_pkthdr.len += MLX4_EN_MAX_RX_BYTES;
101                 }
102                 /* rewind to first mbuf in chain */
103                 mb = mb_head;
104         }
105 #endif
106         return (mb);
107 }
108
109 static int
110 mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_desc *rx_desc,
111     struct mlx4_en_rx_mbuf *mb_list)
112 {
113         bus_dma_segment_t segs[MLX4_EN_MAX_RX_SEGS];
114         bus_dmamap_t map;
115         struct mbuf *mb;
116         int nsegs;
117         int err;
118 #if (MLX4_EN_MAX_RX_SEGS != 1)
119         int i;
120 #endif
121
122         /* try to allocate a new spare mbuf */
123         if (unlikely(ring->spare.mbuf == NULL)) {
124                 mb = mlx4_en_alloc_mbuf(ring);
125                 if (unlikely(mb == NULL))
126                         return (-ENOMEM);
127
128                 /* make sure IP header gets aligned */
129                 m_adj(mb, MLX4_NET_IP_ALIGN);
130
131                 /* load spare mbuf into BUSDMA */
132                 err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, ring->spare.dma_map,
133                     mb, ring->spare.segs, &nsegs, BUS_DMA_NOWAIT);
134                 if (unlikely(err != 0)) {
135                         m_freem(mb);
136                         return (err);
137                 }
138
139                 /* store spare info */
140                 ring->spare.mbuf = mb;
141
142 #if (MLX4_EN_MAX_RX_SEGS != 1)
143                 /* zero remaining segs */
144                 for (i = nsegs; i != MLX4_EN_MAX_RX_SEGS; i++) {
145                         ring->spare.segs[i].ds_addr = 0;
146                         ring->spare.segs[i].ds_len = 0;
147                 }
148 #endif
149                 bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map,
150                     BUS_DMASYNC_PREREAD);
151         }
152
153         /* synchronize and unload the current mbuf, if any */
154         if (likely(mb_list->mbuf != NULL)) {
155                 bus_dmamap_sync(ring->dma_tag, mb_list->dma_map,
156                     BUS_DMASYNC_POSTREAD);
157                 bus_dmamap_unload(ring->dma_tag, mb_list->dma_map);
158         }
159
160         mb = mlx4_en_alloc_mbuf(ring);
161         if (unlikely(mb == NULL))
162                 goto use_spare;
163
164         /* make sure IP header gets aligned */
165         m_adj(mb, MLX4_NET_IP_ALIGN);
166
167         err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, mb_list->dma_map,
168             mb, segs, &nsegs, BUS_DMA_NOWAIT);
169         if (unlikely(err != 0)) {
170                 m_freem(mb);
171                 goto use_spare;
172         }
173
174 #if (MLX4_EN_MAX_RX_SEGS == 1)
175         rx_desc->data[0].addr = cpu_to_be64(segs[0].ds_addr);
176 #else
177         for (i = 0; i != nsegs; i++) {
178                 rx_desc->data[i].byte_count = cpu_to_be32(segs[i].ds_len);
179                 rx_desc->data[i].lkey = ring->rx_mr_key_be;
180                 rx_desc->data[i].addr = cpu_to_be64(segs[i].ds_addr);
181         }
182         for (; i != MLX4_EN_MAX_RX_SEGS; i++) {
183                 rx_desc->data[i].byte_count = 0;
184                 rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
185                 rx_desc->data[i].addr = 0;
186         }
187 #endif
188         mb_list->mbuf = mb;
189
190         bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, BUS_DMASYNC_PREREAD);
191         return (0);
192
193 use_spare:
194         /* swap DMA maps */
195         map = mb_list->dma_map;
196         mb_list->dma_map = ring->spare.dma_map;
197         ring->spare.dma_map = map;
198
199         /* swap MBUFs */
200         mb_list->mbuf = ring->spare.mbuf;
201         ring->spare.mbuf = NULL;
202
203         /* store physical address */
204 #if (MLX4_EN_MAX_RX_SEGS == 1)
205         rx_desc->data[0].addr = cpu_to_be64(ring->spare.segs[0].ds_addr);
206 #else
207         for (i = 0; i != MLX4_EN_MAX_RX_SEGS; i++) {
208                 if (ring->spare.segs[i].ds_len != 0) {
209                         rx_desc->data[i].byte_count = cpu_to_be32(ring->spare.segs[i].ds_len);
210                         rx_desc->data[i].lkey = ring->rx_mr_key_be;
211                         rx_desc->data[i].addr = cpu_to_be64(ring->spare.segs[i].ds_addr);
212                 } else {
213                         rx_desc->data[i].byte_count = 0;
214                         rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
215                         rx_desc->data[i].addr = 0;
216                 }
217         }
218 #endif
219         return (0);
220 }
221
222 static void
223 mlx4_en_free_buf(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_mbuf *mb_list)
224 {
225         bus_dmamap_t map = mb_list->dma_map;
226         bus_dmamap_sync(ring->dma_tag, map, BUS_DMASYNC_POSTREAD);
227         bus_dmamap_unload(ring->dma_tag, map);
228         m_freem(mb_list->mbuf);
229         mb_list->mbuf = NULL;   /* safety clearing */
230 }
231
232 static int
233 mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
234     struct mlx4_en_rx_ring *ring, int index)
235 {
236         struct mlx4_en_rx_desc *rx_desc =
237             ((struct mlx4_en_rx_desc *)ring->buf) + index;
238         struct mlx4_en_rx_mbuf *mb_list = ring->mbuf + index;
239
240         mb_list->mbuf = NULL;
241
242         if (mlx4_en_alloc_buf(ring, rx_desc, mb_list)) {
243                 priv->port_stats.rx_alloc_failed++;
244                 return (-ENOMEM);
245         }
246         return (0);
247 }
248
249 static inline void
250 mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
251 {
252         *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
253 }
254
255 static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
256 {
257         struct mlx4_en_rx_ring *ring;
258         int ring_ind;
259         int buf_ind;
260         int new_size;
261         int err;
262
263         for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) {
264                 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
265                         ring = priv->rx_ring[ring_ind];
266
267                         err = mlx4_en_prepare_rx_desc(priv, ring,
268                                                       ring->actual_size);
269                         if (err) {
270                                 if (ring->actual_size == 0) {
271                                         en_err(priv, "Failed to allocate "
272                                                      "enough rx buffers\n");
273                                         return -ENOMEM;
274                                 } else {
275                                         new_size =
276                                                 rounddown_pow_of_two(ring->actual_size);
277                                         en_warn(priv, "Only %d buffers allocated "
278                                                       "reducing ring size to %d\n",
279                                                 ring->actual_size, new_size);
280                                         goto reduce_rings;
281                                 }
282                         }
283                         ring->actual_size++;
284                         ring->prod++;
285                 }
286         }
287         return 0;
288
289 reduce_rings:
290         for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
291                 ring = priv->rx_ring[ring_ind];
292                 while (ring->actual_size > new_size) {
293                         ring->actual_size--;
294                         ring->prod--;
295                         mlx4_en_free_buf(ring,
296                             ring->mbuf + ring->actual_size);
297                 }
298         }
299
300         return 0;
301 }
302
303 static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
304                                 struct mlx4_en_rx_ring *ring)
305 {
306         int index;
307
308         en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n",
309                ring->cons, ring->prod);
310
311         /* Unmap and free Rx buffers */
312         BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size);
313         while (ring->cons != ring->prod) {
314                 index = ring->cons & ring->size_mask;
315                 en_dbg(DRV, priv, "Processing descriptor:%d\n", index);
316                 mlx4_en_free_buf(ring, ring->mbuf + index);
317                 ++ring->cons;
318         }
319 }
320
321 void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev)
322 {
323         int i;
324         int num_of_eqs;
325         int num_rx_rings;
326         struct mlx4_dev *dev = mdev->dev;
327
328         mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
329                 num_of_eqs = max_t(int, MIN_RX_RINGS,
330                                    min_t(int,
331                                          mlx4_get_eqs_per_port(mdev->dev, i),
332                                          DEF_RX_RINGS));
333
334                 num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS :
335                                                            num_of_eqs;
336                 mdev->profile.prof[i].rx_ring_num =
337                         rounddown_pow_of_two(num_rx_rings);
338         }
339 }
340
341 void mlx4_en_calc_rx_buf(struct ifnet *dev)
342 {
343         struct mlx4_en_priv *priv = netdev_priv(dev);
344         int eff_mtu = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN +
345             MLX4_NET_IP_ALIGN;
346
347         if (eff_mtu > MJUM16BYTES) {
348                 en_err(priv, "MTU(%u) is too big\n", (unsigned)dev->if_mtu);
349                 eff_mtu = MJUM16BYTES;
350         } else if (eff_mtu > MJUM9BYTES) {
351                 eff_mtu = MJUM16BYTES;
352         } else if (eff_mtu > MJUMPAGESIZE) {
353                 eff_mtu = MJUM9BYTES;
354         } else if (eff_mtu > MCLBYTES) {
355                 eff_mtu = MJUMPAGESIZE;
356         } else {
357                 eff_mtu = MCLBYTES;
358         }
359
360         priv->rx_mb_size = eff_mtu;
361
362         en_dbg(DRV, priv, "Effective RX MTU: %d bytes\n", eff_mtu);
363 }
364
365 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
366                            struct mlx4_en_rx_ring **pring,
367                            u32 size, int node)
368 {
369         struct mlx4_en_dev *mdev = priv->mdev;
370         struct mlx4_en_rx_ring *ring;
371         int err;
372         int tmp;
373         uint32_t x;
374
375         ring = kzalloc(sizeof(struct mlx4_en_rx_ring), GFP_KERNEL);
376         if (!ring) {
377                 en_err(priv, "Failed to allocate RX ring structure\n");
378                 return -ENOMEM;
379         }
380
381         /* Create DMA descriptor TAG */
382         if ((err = -bus_dma_tag_create(
383             bus_get_dma_tag(mdev->pdev->dev.bsddev),
384             1,                          /* any alignment */
385             0,                          /* no boundary */
386             BUS_SPACE_MAXADDR,          /* lowaddr */
387             BUS_SPACE_MAXADDR,          /* highaddr */
388             NULL, NULL,                 /* filter, filterarg */
389             MJUM16BYTES,                /* maxsize */
390             MLX4_EN_MAX_RX_SEGS,        /* nsegments */
391             MJUM16BYTES,                /* maxsegsize */
392             0,                          /* flags */
393             NULL, NULL,                 /* lockfunc, lockfuncarg */
394             &ring->dma_tag))) {
395                 en_err(priv, "Failed to create DMA tag\n");
396                 goto err_ring;
397         }
398
399         ring->prod = 0;
400         ring->cons = 0;
401         ring->size = size;
402         ring->size_mask = size - 1;
403
404         ring->log_stride = ilog2(sizeof(struct mlx4_en_rx_desc));
405         ring->buf_size = (ring->size * sizeof(struct mlx4_en_rx_desc)) + TXBB_SIZE;
406
407         tmp = size * sizeof(struct mlx4_en_rx_mbuf);
408
409         ring->mbuf = kzalloc(tmp, GFP_KERNEL);
410         if (ring->mbuf == NULL) {
411                 err = -ENOMEM;
412                 goto err_dma_tag;
413         }
414
415         err = -bus_dmamap_create(ring->dma_tag, 0, &ring->spare.dma_map);
416         if (err != 0)
417                 goto err_info;
418
419         for (x = 0; x != size; x++) {
420                 err = -bus_dmamap_create(ring->dma_tag, 0,
421                     &ring->mbuf[x].dma_map);
422                 if (err != 0) {
423                         while (x--)
424                                 bus_dmamap_destroy(ring->dma_tag,
425                                     ring->mbuf[x].dma_map);
426                         goto err_info;
427                 }
428         }
429         en_dbg(DRV, priv, "Allocated MBUF ring at addr:%p size:%d\n",
430                  ring->mbuf, tmp);
431
432         err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
433                                  ring->buf_size, 2 * PAGE_SIZE);
434         if (err)
435                 goto err_dma_map;
436
437         err = mlx4_en_map_buffer(&ring->wqres.buf);
438         if (err) {
439                 en_err(priv, "Failed to map RX buffer\n");
440                 goto err_hwq;
441         }
442         ring->buf = ring->wqres.buf.direct.buf;
443         *pring = ring;
444         return 0;
445
446 err_hwq:
447         mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
448 err_dma_map:
449         for (x = 0; x != size; x++) {
450                 bus_dmamap_destroy(ring->dma_tag,
451                     ring->mbuf[x].dma_map);
452         }
453         bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map);
454 err_info:
455         vfree(ring->mbuf);
456 err_dma_tag:
457         bus_dma_tag_destroy(ring->dma_tag);
458 err_ring:
459         kfree(ring);
460         return (err);
461 }
462
463 int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
464 {
465         struct mlx4_en_rx_ring *ring;
466 #if (MLX4_EN_MAX_RX_SEGS == 1)
467         int i;
468 #endif
469         int ring_ind;
470         int err;
471
472         for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
473                 ring = priv->rx_ring[ring_ind];
474
475                 ring->prod = 0;
476                 ring->cons = 0;
477                 ring->actual_size = 0;
478                 ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn;
479                 ring->rx_mb_size = priv->rx_mb_size;
480
481                 if (sizeof(struct mlx4_en_rx_desc) <= TXBB_SIZE) {
482                         /* Stamp first unused send wqe */
483                         __be32 *ptr = (__be32 *)ring->buf;
484                         __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT);
485                         *ptr = stamp;
486                         /* Move pointer to start of rx section */
487                         ring->buf += TXBB_SIZE;
488                 }
489
490                 ring->log_stride = ilog2(sizeof(struct mlx4_en_rx_desc));
491                 ring->buf_size = ring->size * sizeof(struct mlx4_en_rx_desc);
492
493                 memset(ring->buf, 0, ring->buf_size);
494                 mlx4_en_update_rx_prod_db(ring);
495
496 #if (MLX4_EN_MAX_RX_SEGS == 1)
497                 /* Initialize all descriptors */
498                 for (i = 0; i < ring->size; i++)
499                         mlx4_en_init_rx_desc(priv, ring, i);
500 #endif
501                 ring->rx_mr_key_be = cpu_to_be32(priv->mdev->mr.key);
502
503 #ifdef INET
504                 /* Configure lro mngr */
505                 if (priv->dev->if_capenable & IFCAP_LRO) {
506                         if (tcp_lro_init(&ring->lro))
507                                 priv->dev->if_capenable &= ~IFCAP_LRO;
508                         else
509                                 ring->lro.ifp = priv->dev;
510                 }
511 #endif
512         }
513
514
515         err = mlx4_en_fill_rx_buffers(priv);
516         if (err)
517                 goto err_buffers;
518
519         for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
520                 ring = priv->rx_ring[ring_ind];
521
522                 ring->size_mask = ring->actual_size - 1;
523                 mlx4_en_update_rx_prod_db(ring);
524         }
525
526         return 0;
527
528 err_buffers:
529         for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++)
530                 mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]);
531
532         ring_ind = priv->rx_ring_num - 1;
533
534         while (ring_ind >= 0) {
535                 ring = priv->rx_ring[ring_ind];
536                 if (sizeof(struct mlx4_en_rx_desc) <= TXBB_SIZE)
537                         ring->buf -= TXBB_SIZE;
538                 ring_ind--;
539         }
540
541         return err;
542 }
543
544
545 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
546                              struct mlx4_en_rx_ring **pring,
547                              u32 size)
548 {
549         struct mlx4_en_dev *mdev = priv->mdev;
550         struct mlx4_en_rx_ring *ring = *pring;
551         uint32_t x;
552
553         mlx4_en_unmap_buffer(&ring->wqres.buf);
554         mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * sizeof(struct mlx4_en_rx_desc) + TXBB_SIZE);
555         for (x = 0; x != size; x++)
556                 bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map);
557         /* free spare mbuf, if any */
558         if (ring->spare.mbuf != NULL) {
559                 bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map,
560                     BUS_DMASYNC_POSTREAD);
561                 bus_dmamap_unload(ring->dma_tag, ring->spare.dma_map);
562                 m_freem(ring->spare.mbuf);
563         }
564         bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map);
565         vfree(ring->mbuf);
566         bus_dma_tag_destroy(ring->dma_tag);
567         kfree(ring);
568         *pring = NULL;
569 #ifdef CONFIG_RFS_ACCEL
570         mlx4_en_cleanup_filters(priv, ring);
571 #endif
572 }
573
574 void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
575                                 struct mlx4_en_rx_ring *ring)
576 {
577 #ifdef INET
578         tcp_lro_free(&ring->lro);
579 #endif
580         mlx4_en_free_rx_buf(priv, ring);
581         if (sizeof(struct mlx4_en_rx_desc) <= TXBB_SIZE)
582                 ring->buf -= TXBB_SIZE;
583 }
584
585
586 static void validate_loopback(struct mlx4_en_priv *priv, struct mbuf *mb)
587 {
588         int i;
589         int offset = ETHER_HDR_LEN;
590
591         for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) {
592                 if (*(mb->m_data + offset) != (unsigned char) (i & 0xff))
593                         goto out_loopback;
594         }
595         /* Loopback found */
596         priv->loopback_ok = 1;
597
598 out_loopback:
599         m_freem(mb);
600 }
601
602
603 static inline int invalid_cqe(struct mlx4_en_priv *priv,
604                               struct mlx4_cqe *cqe)
605 {
606         /* Drop packet on bad receive or bad checksum */
607         if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
608                      MLX4_CQE_OPCODE_ERROR)) {
609                 en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n",
610                        ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome,
611                        ((struct mlx4_err_cqe *)cqe)->syndrome);
612                 return 1;
613         }
614         if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) {
615                 en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n");
616                 return 1;
617         }
618
619         return 0;
620 }
621
622 static struct mbuf *
623 mlx4_en_rx_mb(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring,
624     struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_mbuf *mb_list,
625     int length)
626 {
627 #if (MLX4_EN_MAX_RX_SEGS != 1)
628         struct mbuf *mb_head;
629 #endif
630         struct mbuf *mb;
631
632         /* optimise reception of small packets */
633         if (length <= (MHLEN - MLX4_NET_IP_ALIGN) &&
634             (mb = m_gethdr(M_NOWAIT, MT_DATA)) != NULL) {
635
636                 /* set packet length */
637                 mb->m_pkthdr.len = mb->m_len = length;
638
639                 /* make sure IP header gets aligned */
640                 mb->m_data += MLX4_NET_IP_ALIGN;
641
642                 bus_dmamap_sync(ring->dma_tag, mb_list->dma_map,
643                     BUS_DMASYNC_POSTREAD);
644
645                 bcopy(mtod(mb_list->mbuf, caddr_t), mtod(mb, caddr_t), length);
646
647                 return (mb);
648         }
649
650         /* get mbuf */
651         mb = mb_list->mbuf;
652
653         /* collect used fragment while atomically replacing it */
654         if (mlx4_en_alloc_buf(ring, rx_desc, mb_list))
655                 return (NULL);
656
657         /* range check hardware computed value */
658         if (unlikely(length > mb->m_pkthdr.len))
659                 length = mb->m_pkthdr.len;
660
661 #if (MLX4_EN_MAX_RX_SEGS == 1)
662         /* update total packet length in packet header */
663         mb->m_len = mb->m_pkthdr.len = length;
664 #else
665         mb->m_pkthdr.len = length;
666         for (mb_head = mb; mb != NULL; mb = mb->m_next) {
667                 if (mb->m_len > length)
668                         mb->m_len = length;
669                 length -= mb->m_len;
670                 if (likely(length == 0)) {
671                         if (likely(mb->m_next != NULL)) {
672                                 /* trim off empty mbufs */
673                                 m_freem(mb->m_next);
674                                 mb->m_next = NULL;
675                         }
676                         break;
677                 }
678         }
679         /* rewind to first mbuf in chain */
680         mb = mb_head;
681 #endif
682         return (mb);
683 }
684
685 static __inline int
686 mlx4_en_rss_hash(__be16 status, int udp_rss)
687 {
688         enum {
689                 status_all = cpu_to_be16(
690                         MLX4_CQE_STATUS_IPV4    |
691                         MLX4_CQE_STATUS_IPV4F   |
692                         MLX4_CQE_STATUS_IPV6    |
693                         MLX4_CQE_STATUS_TCP     |
694                         MLX4_CQE_STATUS_UDP),
695                 status_ipv4_tcp = cpu_to_be16(
696                         MLX4_CQE_STATUS_IPV4    |
697                         MLX4_CQE_STATUS_TCP),
698                 status_ipv6_tcp = cpu_to_be16(
699                         MLX4_CQE_STATUS_IPV6    |
700                         MLX4_CQE_STATUS_TCP),
701                 status_ipv4_udp = cpu_to_be16(
702                         MLX4_CQE_STATUS_IPV4    |
703                         MLX4_CQE_STATUS_UDP),
704                 status_ipv6_udp = cpu_to_be16(
705                         MLX4_CQE_STATUS_IPV6    |
706                         MLX4_CQE_STATUS_UDP),
707                 status_ipv4 = cpu_to_be16(MLX4_CQE_STATUS_IPV4),
708                 status_ipv6 = cpu_to_be16(MLX4_CQE_STATUS_IPV6)
709         };
710
711         status &= status_all;
712         switch (status) {
713         case status_ipv4_tcp:
714                 return (M_HASHTYPE_RSS_TCP_IPV4);
715         case status_ipv6_tcp:
716                 return (M_HASHTYPE_RSS_TCP_IPV6);
717         case status_ipv4_udp:
718                 return (udp_rss ? M_HASHTYPE_RSS_UDP_IPV4
719                     : M_HASHTYPE_RSS_IPV4);
720         case status_ipv6_udp:
721                 return (udp_rss ? M_HASHTYPE_RSS_UDP_IPV6
722                     : M_HASHTYPE_RSS_IPV6);
723         default:
724                 if (status & status_ipv4)
725                         return (M_HASHTYPE_RSS_IPV4);
726                 if (status & status_ipv6)
727                         return (M_HASHTYPE_RSS_IPV6);
728                 return (M_HASHTYPE_OPAQUE_HASH);
729         }
730 }
731
732 /* For cpu arch with cache line of 64B the performance is better when cqe size==64B
733  * To enlarge cqe size from 32B to 64B --> 32B of garbage (i.e. 0xccccccc)
734  * was added in the beginning of each cqe (the real data is in the corresponding 32B).
735  * The following calc ensures that when factor==1, it means we are aligned to 64B
736  * and we get the real cqe data*/
737 #define CQE_FACTOR_INDEX(index, factor) (((index) << (factor)) + (factor))
738 int mlx4_en_process_rx_cq(struct ifnet *dev, struct mlx4_en_cq *cq, int budget)
739 {
740         struct mlx4_en_priv *priv = netdev_priv(dev);
741         struct mlx4_cqe *cqe;
742         struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
743         struct mlx4_en_rx_mbuf *mb_list;
744         struct mlx4_en_rx_desc *rx_desc;
745         struct mbuf *mb;
746         struct mlx4_cq *mcq = &cq->mcq;
747         struct mlx4_cqe *buf = cq->buf;
748         int index;
749         unsigned int length;
750         int polled = 0;
751         u32 cons_index = mcq->cons_index;
752         u32 size_mask = ring->size_mask;
753         int size = cq->size;
754         int factor = priv->cqe_factor;
755         const int udp_rss = priv->mdev->profile.udp_rss;
756
757         if (!priv->port_up)
758                 return 0;
759
760         /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
761          * descriptor offset can be deducted from the CQE index instead of
762          * reading 'cqe->index' */
763         index = cons_index & size_mask;
764         cqe = &buf[CQE_FACTOR_INDEX(index, factor)];
765
766         /* Process all completed CQEs */
767         while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
768                     cons_index & size)) {
769                 mb_list = ring->mbuf + index;
770                 rx_desc = ((struct mlx4_en_rx_desc *)ring->buf) + index;
771
772                 /*
773                  * make sure we read the CQE after we read the ownership bit
774                  */
775                 rmb();
776
777                 if (invalid_cqe(priv, cqe)) {
778                         goto next;
779                 }
780                 /*
781                  * Packet is OK - process it.
782                  */
783                 length = be32_to_cpu(cqe->byte_cnt);
784                 length -= ring->fcs_del;
785
786                 mb = mlx4_en_rx_mb(priv, ring, rx_desc, mb_list, length);
787                 if (unlikely(!mb)) {
788                         ring->errors++;
789                         goto next;
790                 }
791
792                 ring->bytes += length;
793                 ring->packets++;
794
795                 if (unlikely(priv->validate_loopback)) {
796                         validate_loopback(priv, mb);
797                         goto next;
798                 }
799
800                 /* forward Toeplitz compatible hash value */
801                 mb->m_pkthdr.flowid = be32_to_cpu(cqe->immed_rss_invalid);
802                 M_HASHTYPE_SET(mb, mlx4_en_rss_hash(cqe->status, udp_rss));
803                 mb->m_pkthdr.rcvif = dev;
804                 if (be32_to_cpu(cqe->vlan_my_qpn) &
805                     MLX4_CQE_CVLAN_PRESENT_MASK) {
806                         mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->sl_vid);
807                         mb->m_flags |= M_VLANTAG;
808                 }
809                 if (likely(dev->if_capenable &
810                     (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) &&
811                     (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
812                     (cqe->checksum == cpu_to_be16(0xffff))) {
813                         priv->port_stats.rx_chksum_good++;
814                         mb->m_pkthdr.csum_flags =
815                             CSUM_IP_CHECKED | CSUM_IP_VALID |
816                             CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
817                         mb->m_pkthdr.csum_data = htons(0xffff);
818                         /* This packet is eligible for LRO if it is:
819                          * - DIX Ethernet (type interpretation)
820                          * - TCP/IP (v4)
821                          * - without IP options
822                          * - not an IP fragment
823                          */
824 #ifdef INET
825                         if (mlx4_en_can_lro(cqe->status) &&
826                                         (dev->if_capenable & IFCAP_LRO)) {
827                                 if (ring->lro.lro_cnt != 0 &&
828                                                 tcp_lro_rx(&ring->lro, mb, 0) == 0)
829                                         goto next;
830                         }
831
832 #endif
833                         /* LRO not possible, complete processing here */
834                         INC_PERF_COUNTER(priv->pstats.lro_misses);
835                 } else {
836                         mb->m_pkthdr.csum_flags = 0;
837                         priv->port_stats.rx_chksum_none++;
838                 }
839
840                 /* Push it up the stack */
841                 dev->if_input(dev, mb);
842
843 next:
844                 ++cons_index;
845                 index = cons_index & size_mask;
846                 cqe = &buf[CQE_FACTOR_INDEX(index, factor)];
847                 if (++polled == budget)
848                         goto out;
849         }
850         /* Flush all pending IP reassembly sessions */
851 out:
852 #ifdef INET
853         tcp_lro_flush_all(&ring->lro);
854 #endif
855         AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
856         mcq->cons_index = cons_index;
857         mlx4_cq_set_ci(mcq);
858         wmb(); /* ensure HW sees CQ consumer before we post new buffers */
859         ring->cons = mcq->cons_index;
860         ring->prod += polled; /* Polled descriptors were realocated in place */
861         mlx4_en_update_rx_prod_db(ring);
862         return polled;
863
864 }
865
866 /* Rx CQ polling - called by NAPI */
867 static int mlx4_en_poll_rx_cq(struct mlx4_en_cq *cq, int budget)
868 {
869         struct ifnet *dev = cq->dev;
870         struct epoch_tracker et;
871         int done;
872
873         NET_EPOCH_ENTER(et);
874         done = mlx4_en_process_rx_cq(dev, cq, budget);
875         NET_EPOCH_EXIT(et);
876         cq->tot_rx += done;
877
878         return done;
879 }
880 void mlx4_en_rx_irq(struct mlx4_cq *mcq)
881 {
882         struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
883         struct mlx4_en_priv *priv = netdev_priv(cq->dev);
884         int done;
885
886         // Shoot one within the irq context 
887         // Because there is no NAPI in freeBSD
888         done = mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET);
889         if (priv->port_up  && (done == MLX4_EN_RX_BUDGET) ) {
890                 cq->curr_poll_rx_cpu_id = curcpu;
891                 taskqueue_enqueue(cq->tq, &cq->cq_task);
892         }
893         else {
894                 mlx4_en_arm_cq(priv, cq);
895         }
896 }
897
898 void mlx4_en_rx_que(void *context, int pending)
899 {
900         struct epoch_tracker et;
901         struct mlx4_en_cq *cq;
902         struct thread *td;
903
904         cq = context;
905         td = curthread;
906
907         thread_lock(td);
908         sched_bind(td, cq->curr_poll_rx_cpu_id);
909         thread_unlock(td);
910
911         NET_EPOCH_ENTER(et);
912         while (mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET)
913                         == MLX4_EN_RX_BUDGET);
914         NET_EPOCH_EXIT(et);
915         mlx4_en_arm_cq(cq->dev->if_softc, cq);
916 }
917
918
919 /* RSS related functions */
920
921 static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
922                                  struct mlx4_en_rx_ring *ring,
923                                  enum mlx4_qp_state *state,
924                                  struct mlx4_qp *qp)
925 {
926         struct mlx4_en_dev *mdev = priv->mdev;
927         struct mlx4_qp_context *context;
928         int err = 0;
929
930         context = kmalloc(sizeof *context , GFP_KERNEL);
931         if (!context) {
932                 en_err(priv, "Failed to allocate qp context\n");
933                 return -ENOMEM;
934         }
935
936         err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL);
937         if (err) {
938                 en_err(priv, "Failed to allocate qp #%x\n", qpn);
939                 goto out;
940         }
941         qp->event = mlx4_en_sqp_event;
942
943         memset(context, 0, sizeof *context);
944         mlx4_en_fill_qp_context(priv, ring->actual_size, sizeof(struct mlx4_en_rx_desc), 0, 0,
945                                 qpn, ring->cqn, -1, context);
946         context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma);
947
948         /* Cancel FCS removal if FW allows */
949         if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) {
950                 context->param3 |= cpu_to_be32(1 << 29);
951                 ring->fcs_del = ETH_FCS_LEN;
952         } else
953                 ring->fcs_del = 0;
954
955         err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state);
956         if (err) {
957                 mlx4_qp_remove(mdev->dev, qp);
958                 mlx4_qp_free(mdev->dev, qp);
959         }
960         mlx4_en_update_rx_prod_db(ring);
961 out:
962         kfree(context);
963         return err;
964 }
965
966 int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
967 {
968         int err;
969         u32 qpn;
970
971         err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, 0);
972         if (err) {
973                 en_err(priv, "Failed reserving drop qpn\n");
974                 return err;
975         }
976         err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL);
977         if (err) {
978                 en_err(priv, "Failed allocating drop qp\n");
979                 mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
980                 return err;
981         }
982
983         return 0;
984 }
985
986 void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv)
987 {
988         u32 qpn;
989
990         qpn = priv->drop_qp.qpn;
991         mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp);
992         mlx4_qp_free(priv->mdev->dev, &priv->drop_qp);
993         mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
994 }
995
996 const u32 *
997 mlx4_en_get_rss_key(struct mlx4_en_priv *priv __unused,
998     u16 *keylen)
999 {
1000         static const u32 rsskey[10] = {
1001                 cpu_to_be32(0xD181C62C),
1002                 cpu_to_be32(0xF7F4DB5B),
1003                 cpu_to_be32(0x1983A2FC),
1004                 cpu_to_be32(0x943E1ADB),
1005                 cpu_to_be32(0xD9389E6B),
1006                 cpu_to_be32(0xD1039C2C),
1007                 cpu_to_be32(0xA74499AD),
1008                 cpu_to_be32(0x593D56D9),
1009                 cpu_to_be32(0xF3253C06),
1010                 cpu_to_be32(0x2ADC1FFC)
1011         };
1012
1013         if (keylen != NULL)
1014                 *keylen = sizeof(rsskey);
1015         return (rsskey);
1016 }
1017
1018 u8 mlx4_en_get_rss_mask(struct mlx4_en_priv *priv)
1019 {
1020         u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 |
1021                         MLX4_RSS_TCP_IPV6);
1022
1023         if (priv->mdev->profile.udp_rss)
1024                 rss_mask |=  MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6;
1025         return (rss_mask);
1026 }
1027
1028 /* Allocate rx qp's and configure them according to rss map */
1029 int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
1030 {
1031         struct mlx4_en_dev *mdev = priv->mdev;
1032         struct mlx4_en_rss_map *rss_map = &priv->rss_map;
1033         struct mlx4_qp_context context;
1034         struct mlx4_rss_context *rss_context;
1035         const u32 *key;
1036         int rss_rings;
1037         void *ptr;
1038         int i;
1039         int err = 0;
1040         int good_qps = 0;
1041
1042         en_dbg(DRV, priv, "Configuring rss steering\n");
1043         err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
1044                                     priv->rx_ring_num,
1045                                     &rss_map->base_qpn, 0);
1046         if (err) {
1047                 en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
1048                 return err;
1049         }
1050
1051         for (i = 0; i < priv->rx_ring_num; i++) {
1052                 priv->rx_ring[i]->qpn = rss_map->base_qpn + i;
1053                 err = mlx4_en_config_rss_qp(priv, priv->rx_ring[i]->qpn,
1054                                             priv->rx_ring[i],
1055                                             &rss_map->state[i],
1056                                             &rss_map->qps[i]);
1057                 if (err)
1058                         goto rss_err;
1059
1060                 ++good_qps;
1061         }
1062
1063         /* Configure RSS indirection qp */
1064         err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL);
1065         if (err) {
1066                 en_err(priv, "Failed to allocate RSS indirection QP\n");
1067                 goto rss_err;
1068         }
1069         rss_map->indir_qp.event = mlx4_en_sqp_event;
1070         mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
1071                                 priv->rx_ring[0]->cqn, -1, &context);
1072
1073         if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num)
1074                 rss_rings = priv->rx_ring_num;
1075         else
1076                 rss_rings = priv->prof->rss_rings;
1077
1078         ptr = ((u8 *)&context) + offsetof(struct mlx4_qp_context, pri_path) +
1079             MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
1080         rss_context = ptr;
1081         rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 |
1082                                             (rss_map->base_qpn));
1083         rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
1084         if (priv->mdev->profile.udp_rss)
1085                 rss_context->base_qpn_udp = rss_context->default_qpn;
1086         rss_context->flags = mlx4_en_get_rss_mask(priv);
1087         rss_context->hash_fn = MLX4_RSS_HASH_TOP;
1088         key = mlx4_en_get_rss_key(priv, NULL);
1089         for (i = 0; i < 10; i++)
1090                 rss_context->rss_key[i] = key[i];
1091
1092         err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
1093                                &rss_map->indir_qp, &rss_map->indir_state);
1094         if (err)
1095                 goto indir_err;
1096
1097         return 0;
1098
1099 indir_err:
1100         mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
1101                        MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
1102         mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
1103         mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
1104 rss_err:
1105         for (i = 0; i < good_qps; i++) {
1106                 mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
1107                                MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
1108                 mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
1109                 mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
1110         }
1111         mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
1112         return err;
1113 }
1114
1115 void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv)
1116 {
1117         struct mlx4_en_dev *mdev = priv->mdev;
1118         struct mlx4_en_rss_map *rss_map = &priv->rss_map;
1119         int i;
1120
1121         mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
1122                        MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
1123         mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
1124         mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
1125
1126         for (i = 0; i < priv->rx_ring_num; i++) {
1127                 mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
1128                                MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
1129                 mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
1130                 mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
1131         }
1132         mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
1133 }
1134