]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx5/mlx5_en/mlx5_en_main.c
MFC r347260 and r347326:
[FreeBSD/FreeBSD.git] / sys / dev / mlx5 / mlx5_en / mlx5_en_main.c
1 /*-
2  * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27
28 #include "en.h"
29
30 #include <sys/sockio.h>
31 #include <machine/atomic.h>
32
33 #ifndef ETH_DRIVER_VERSION
34 #define ETH_DRIVER_VERSION      "3.5.0"
35 #endif
36 #define DRIVER_RELDATE  "November 2018"
37
38 static const char mlx5e_version[] = "mlx5en: Mellanox Ethernet driver "
39         ETH_DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
40
41 static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs);
42
43 struct mlx5e_channel_param {
44         struct mlx5e_rq_param rq;
45         struct mlx5e_sq_param sq;
46         struct mlx5e_cq_param rx_cq;
47         struct mlx5e_cq_param tx_cq;
48 };
49
50 static const struct {
51         u32     subtype;
52         u64     baudrate;
53 }       mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
54
55         [MLX5E_1000BASE_CX_SGMII] = {
56                 .subtype = IFM_1000_CX_SGMII,
57                 .baudrate = IF_Mbps(1000ULL),
58         },
59         [MLX5E_1000BASE_KX] = {
60                 .subtype = IFM_1000_KX,
61                 .baudrate = IF_Mbps(1000ULL),
62         },
63         [MLX5E_10GBASE_CX4] = {
64                 .subtype = IFM_10G_CX4,
65                 .baudrate = IF_Gbps(10ULL),
66         },
67         [MLX5E_10GBASE_KX4] = {
68                 .subtype = IFM_10G_KX4,
69                 .baudrate = IF_Gbps(10ULL),
70         },
71         [MLX5E_10GBASE_KR] = {
72                 .subtype = IFM_10G_KR,
73                 .baudrate = IF_Gbps(10ULL),
74         },
75         [MLX5E_20GBASE_KR2] = {
76                 .subtype = IFM_20G_KR2,
77                 .baudrate = IF_Gbps(20ULL),
78         },
79         [MLX5E_40GBASE_CR4] = {
80                 .subtype = IFM_40G_CR4,
81                 .baudrate = IF_Gbps(40ULL),
82         },
83         [MLX5E_40GBASE_KR4] = {
84                 .subtype = IFM_40G_KR4,
85                 .baudrate = IF_Gbps(40ULL),
86         },
87         [MLX5E_56GBASE_R4] = {
88                 .subtype = IFM_56G_R4,
89                 .baudrate = IF_Gbps(56ULL),
90         },
91         [MLX5E_10GBASE_CR] = {
92                 .subtype = IFM_10G_CR1,
93                 .baudrate = IF_Gbps(10ULL),
94         },
95         [MLX5E_10GBASE_SR] = {
96                 .subtype = IFM_10G_SR,
97                 .baudrate = IF_Gbps(10ULL),
98         },
99         [MLX5E_10GBASE_ER] = {
100                 .subtype = IFM_10G_ER,
101                 .baudrate = IF_Gbps(10ULL),
102         },
103         [MLX5E_40GBASE_SR4] = {
104                 .subtype = IFM_40G_SR4,
105                 .baudrate = IF_Gbps(40ULL),
106         },
107         [MLX5E_40GBASE_LR4] = {
108                 .subtype = IFM_40G_LR4,
109                 .baudrate = IF_Gbps(40ULL),
110         },
111         [MLX5E_100GBASE_CR4] = {
112                 .subtype = IFM_100G_CR4,
113                 .baudrate = IF_Gbps(100ULL),
114         },
115         [MLX5E_100GBASE_SR4] = {
116                 .subtype = IFM_100G_SR4,
117                 .baudrate = IF_Gbps(100ULL),
118         },
119         [MLX5E_100GBASE_KR4] = {
120                 .subtype = IFM_100G_KR4,
121                 .baudrate = IF_Gbps(100ULL),
122         },
123         [MLX5E_100GBASE_LR4] = {
124                 .subtype = IFM_100G_LR4,
125                 .baudrate = IF_Gbps(100ULL),
126         },
127         [MLX5E_100BASE_TX] = {
128                 .subtype = IFM_100_TX,
129                 .baudrate = IF_Mbps(100ULL),
130         },
131         [MLX5E_1000BASE_T] = {
132                 .subtype = IFM_1000_T,
133                 .baudrate = IF_Mbps(1000ULL),
134         },
135         [MLX5E_10GBASE_T] = {
136                 .subtype = IFM_10G_T,
137                 .baudrate = IF_Gbps(10ULL),
138         },
139         [MLX5E_25GBASE_CR] = {
140                 .subtype = IFM_25G_CR,
141                 .baudrate = IF_Gbps(25ULL),
142         },
143         [MLX5E_25GBASE_KR] = {
144                 .subtype = IFM_25G_KR,
145                 .baudrate = IF_Gbps(25ULL),
146         },
147         [MLX5E_25GBASE_SR] = {
148                 .subtype = IFM_25G_SR,
149                 .baudrate = IF_Gbps(25ULL),
150         },
151         [MLX5E_50GBASE_CR2] = {
152                 .subtype = IFM_50G_CR2,
153                 .baudrate = IF_Gbps(50ULL),
154         },
155         [MLX5E_50GBASE_KR2] = {
156                 .subtype = IFM_50G_KR2,
157                 .baudrate = IF_Gbps(50ULL),
158         },
159 };
160
161 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
162
163 static void
164 mlx5e_update_carrier(struct mlx5e_priv *priv)
165 {
166         struct mlx5_core_dev *mdev = priv->mdev;
167         u32 out[MLX5_ST_SZ_DW(ptys_reg)];
168         u32 eth_proto_oper;
169         int error;
170         u8 port_state;
171         u8 is_er_type;
172         u8 i;
173
174         port_state = mlx5_query_vport_state(mdev,
175             MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
176
177         if (port_state == VPORT_STATE_UP) {
178                 priv->media_status_last |= IFM_ACTIVE;
179         } else {
180                 priv->media_status_last &= ~IFM_ACTIVE;
181                 priv->media_active_last = IFM_ETHER;
182                 if_link_state_change(priv->ifp, LINK_STATE_DOWN);
183                 return;
184         }
185
186         error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
187         if (error) {
188                 priv->media_active_last = IFM_ETHER;
189                 priv->ifp->if_baudrate = 1;
190                 if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
191                     __func__, error);
192                 return;
193         }
194         eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
195
196         for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
197                 if (mlx5e_mode_table[i].baudrate == 0)
198                         continue;
199                 if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
200                         u32 subtype = mlx5e_mode_table[i].subtype;
201
202                         priv->ifp->if_baudrate =
203                             mlx5e_mode_table[i].baudrate;
204
205                         switch (subtype) {
206                         case IFM_10G_ER:
207                                 error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
208                                 if (error != 0) {
209                                         if_printf(priv->ifp, "%s: query port pddr failed: %d\n",
210                                             __func__, error);
211                                 }
212                                 if (error != 0 || is_er_type == 0)
213                                         subtype = IFM_10G_LR;
214                                 break;
215                         case IFM_40G_LR4:
216                                 error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
217                                 if (error != 0) {
218                                         if_printf(priv->ifp, "%s: query port pddr failed: %d\n",
219                                             __func__, error);
220                                 }
221                                 if (error == 0 && is_er_type != 0)
222                                         subtype = IFM_40G_ER4;
223                                 break;
224                         }
225                         priv->media_active_last = subtype | IFM_ETHER | IFM_FDX;
226                         break;
227                 }
228         }
229         if_link_state_change(priv->ifp, LINK_STATE_UP);
230 }
231
232 static void
233 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
234 {
235         struct mlx5e_priv *priv = dev->if_softc;
236
237         ifmr->ifm_status = priv->media_status_last;
238         ifmr->ifm_active = priv->media_active_last |
239             (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
240             (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
241
242 }
243
244 static u32
245 mlx5e_find_link_mode(u32 subtype)
246 {
247         u32 i;
248         u32 link_mode = 0;
249
250         switch (subtype) {
251         case IFM_10G_LR:
252                 subtype = IFM_10G_ER;
253                 break;
254         case IFM_40G_ER4:
255                 subtype = IFM_40G_LR4;
256                 break;
257         }
258
259         for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
260                 if (mlx5e_mode_table[i].baudrate == 0)
261                         continue;
262                 if (mlx5e_mode_table[i].subtype == subtype)
263                         link_mode |= MLX5E_PROT_MASK(i);
264         }
265
266         return (link_mode);
267 }
268
269 static int
270 mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
271 {
272         return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
273             priv->params.rx_pauseframe_control,
274             priv->params.tx_pauseframe_control,
275             priv->params.rx_priority_flow_control,
276             priv->params.tx_priority_flow_control));
277 }
278
279 static int
280 mlx5e_set_port_pfc(struct mlx5e_priv *priv)
281 {
282         int error;
283
284         if (priv->gone != 0) {
285                 error = -ENXIO;
286         } else if (priv->params.rx_pauseframe_control ||
287             priv->params.tx_pauseframe_control) {
288                 if_printf(priv->ifp,
289                     "Global pauseframes must be disabled before enabling PFC.\n");
290                 error = -EINVAL;
291         } else {
292                 error = mlx5e_set_port_pause_and_pfc(priv);
293         }
294         return (error);
295 }
296
297 static int
298 mlx5e_media_change(struct ifnet *dev)
299 {
300         struct mlx5e_priv *priv = dev->if_softc;
301         struct mlx5_core_dev *mdev = priv->mdev;
302         u32 eth_proto_cap;
303         u32 link_mode;
304         int was_opened;
305         int locked;
306         int error;
307
308         locked = PRIV_LOCKED(priv);
309         if (!locked)
310                 PRIV_LOCK(priv);
311
312         if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
313                 error = EINVAL;
314                 goto done;
315         }
316         link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
317
318         /* query supported capabilities */
319         error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
320         if (error != 0) {
321                 if_printf(dev, "Query port media capability failed\n");
322                 goto done;
323         }
324         /* check for autoselect */
325         if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
326                 link_mode = eth_proto_cap;
327                 if (link_mode == 0) {
328                         if_printf(dev, "Port media capability is zero\n");
329                         error = EINVAL;
330                         goto done;
331                 }
332         } else {
333                 link_mode = link_mode & eth_proto_cap;
334                 if (link_mode == 0) {
335                         if_printf(dev, "Not supported link mode requested\n");
336                         error = EINVAL;
337                         goto done;
338                 }
339         }
340         if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
341                 /* check if PFC is enabled */
342                 if (priv->params.rx_priority_flow_control ||
343                     priv->params.tx_priority_flow_control) {
344                         if_printf(dev, "PFC must be disabled before enabling global pauseframes.\n");
345                         error = EINVAL;
346                         goto done;
347                 }
348         }
349         /* update pauseframe control bits */
350         priv->params.rx_pauseframe_control =
351             (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
352         priv->params.tx_pauseframe_control =
353             (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
354
355         /* check if device is opened */
356         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
357
358         /* reconfigure the hardware */
359         mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
360         mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
361         error = -mlx5e_set_port_pause_and_pfc(priv);
362         if (was_opened)
363                 mlx5_set_port_status(mdev, MLX5_PORT_UP);
364
365 done:
366         if (!locked)
367                 PRIV_UNLOCK(priv);
368         return (error);
369 }
370
371 static void
372 mlx5e_update_carrier_work(struct work_struct *work)
373 {
374         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
375             update_carrier_work);
376
377         PRIV_LOCK(priv);
378         if (test_bit(MLX5E_STATE_OPENED, &priv->state))
379                 mlx5e_update_carrier(priv);
380         PRIV_UNLOCK(priv);
381 }
382
383 /*
384  * This function reads the physical port counters from the firmware
385  * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
386  * macros. The output is converted from big-endian 64-bit values into
387  * host endian ones and stored in the "priv->stats.pport" structure.
388  */
389 static void
390 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
391 {
392         struct mlx5_core_dev *mdev = priv->mdev;
393         struct mlx5e_pport_stats *s = &priv->stats.pport;
394         struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
395         u32 *in;
396         u32 *out;
397         const u64 *ptr;
398         unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
399         unsigned x;
400         unsigned y;
401         unsigned z;
402
403         /* allocate firmware request structures */
404         in = mlx5_vzalloc(sz);
405         out = mlx5_vzalloc(sz);
406         if (in == NULL || out == NULL)
407                 goto free_out;
408
409         /*
410          * Get pointer to the 64-bit counter set which is located at a
411          * fixed offset in the output firmware request structure:
412          */
413         ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
414
415         MLX5_SET(ppcnt_reg, in, local_port, 1);
416
417         /* read IEEE802_3 counter group using predefined counter layout */
418         MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
419         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
420         for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
421              x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
422                 s->arg[y] = be64toh(ptr[x]);
423
424         /* read RFC2819 counter group using predefined counter layout */
425         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
426         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
427         for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
428                 s->arg[y] = be64toh(ptr[x]);
429         for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
430             MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
431                 s_debug->arg[y] = be64toh(ptr[x]);
432
433         /* read RFC2863 counter group using predefined counter layout */
434         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
435         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
436         for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
437                 s_debug->arg[y] = be64toh(ptr[x]);
438
439         /* read physical layer stats counter group using predefined counter layout */
440         MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
441         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
442         for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
443                 s_debug->arg[y] = be64toh(ptr[x]);
444
445         /* read Extended Ethernet counter group using predefined counter layout */
446         MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
447         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
448         for (x = 0; x != MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG_NUM; x++, y++)
449                 s_debug->arg[y] = be64toh(ptr[x]);
450
451         /* read per-priority counters */
452         MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
453
454         /* iterate all the priorities */
455         for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
456                 MLX5_SET(ppcnt_reg, in, prio_tc, z);
457                 mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
458
459                 /* read per priority stats counter group using predefined counter layout */
460                 for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
461                     MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
462                         s->arg[y] = be64toh(ptr[x]);
463         }
464
465 free_out:
466         /* free firmware request structures */
467         kvfree(in);
468         kvfree(out);
469 }
470
471 /*
472  * This function is called regularly to collect all statistics
473  * counters from the firmware. The values can be viewed through the
474  * sysctl interface. Execution is serialized using the priv's global
475  * configuration lock.
476  */
477 static void
478 mlx5e_update_stats_work(struct work_struct *work)
479 {
480         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
481             update_stats_work);
482         struct mlx5_core_dev *mdev = priv->mdev;
483         struct mlx5e_vport_stats *s = &priv->stats.vport;
484         struct mlx5e_sq_stats *sq_stats;
485         struct buf_ring *sq_br;
486 #if (__FreeBSD_version < 1100000)
487         struct ifnet *ifp = priv->ifp;
488 #endif
489
490         u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
491         u32 *out;
492         int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
493         u64 tso_packets = 0;
494         u64 tso_bytes = 0;
495         u64 tx_queue_dropped = 0;
496         u64 tx_defragged = 0;
497         u64 tx_offload_none = 0;
498         u64 lro_packets = 0;
499         u64 lro_bytes = 0;
500         u64 sw_lro_queued = 0;
501         u64 sw_lro_flushed = 0;
502         u64 rx_csum_none = 0;
503         u64 rx_wqe_err = 0;
504         u32 rx_out_of_buffer = 0;
505         int i;
506         int j;
507
508         PRIV_LOCK(priv);
509         out = mlx5_vzalloc(outlen);
510         if (out == NULL)
511                 goto free_out;
512         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
513                 goto free_out;
514
515         /* Collect firts the SW counters and then HW for consistency */
516         for (i = 0; i < priv->params.num_channels; i++) {
517                 struct mlx5e_channel *pch = priv->channel + i;
518                 struct mlx5e_rq *rq = &pch->rq;
519                 struct mlx5e_rq_stats *rq_stats = &pch->rq.stats;
520
521                 /* collect stats from LRO */
522                 rq_stats->sw_lro_queued = rq->lro.lro_queued;
523                 rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
524                 sw_lro_queued += rq_stats->sw_lro_queued;
525                 sw_lro_flushed += rq_stats->sw_lro_flushed;
526                 lro_packets += rq_stats->lro_packets;
527                 lro_bytes += rq_stats->lro_bytes;
528                 rx_csum_none += rq_stats->csum_none;
529                 rx_wqe_err += rq_stats->wqe_err;
530
531                 for (j = 0; j < priv->num_tc; j++) {
532                         sq_stats = &pch->sq[j].stats;
533                         sq_br = pch->sq[j].br;
534
535                         tso_packets += sq_stats->tso_packets;
536                         tso_bytes += sq_stats->tso_bytes;
537                         tx_queue_dropped += sq_stats->dropped;
538                         if (sq_br != NULL)
539                                 tx_queue_dropped += sq_br->br_drops;
540                         tx_defragged += sq_stats->defragged;
541                         tx_offload_none += sq_stats->csum_offload_none;
542                 }
543         }
544
545         /* update counters */
546         s->tso_packets = tso_packets;
547         s->tso_bytes = tso_bytes;
548         s->tx_queue_dropped = tx_queue_dropped;
549         s->tx_defragged = tx_defragged;
550         s->lro_packets = lro_packets;
551         s->lro_bytes = lro_bytes;
552         s->sw_lro_queued = sw_lro_queued;
553         s->sw_lro_flushed = sw_lro_flushed;
554         s->rx_csum_none = rx_csum_none;
555         s->rx_wqe_err = rx_wqe_err;
556
557         /* HW counters */
558         memset(in, 0, sizeof(in));
559
560         MLX5_SET(query_vport_counter_in, in, opcode,
561             MLX5_CMD_OP_QUERY_VPORT_COUNTER);
562         MLX5_SET(query_vport_counter_in, in, op_mod, 0);
563         MLX5_SET(query_vport_counter_in, in, other_vport, 0);
564
565         memset(out, 0, outlen);
566
567         /* get number of out-of-buffer drops first */
568         if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
569             &rx_out_of_buffer))
570                 goto free_out;
571
572         /* accumulate difference into a 64-bit counter */
573         s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
574         s->rx_out_of_buffer_prev = rx_out_of_buffer;
575
576         /* get port statistics */
577         if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
578                 goto free_out;
579
580 #define MLX5_GET_CTR(out, x) \
581         MLX5_GET64(query_vport_counter_out, out, x)
582
583         s->rx_error_packets =
584             MLX5_GET_CTR(out, received_errors.packets);
585         s->rx_error_bytes =
586             MLX5_GET_CTR(out, received_errors.octets);
587         s->tx_error_packets =
588             MLX5_GET_CTR(out, transmit_errors.packets);
589         s->tx_error_bytes =
590             MLX5_GET_CTR(out, transmit_errors.octets);
591
592         s->rx_unicast_packets =
593             MLX5_GET_CTR(out, received_eth_unicast.packets);
594         s->rx_unicast_bytes =
595             MLX5_GET_CTR(out, received_eth_unicast.octets);
596         s->tx_unicast_packets =
597             MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
598         s->tx_unicast_bytes =
599             MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
600
601         s->rx_multicast_packets =
602             MLX5_GET_CTR(out, received_eth_multicast.packets);
603         s->rx_multicast_bytes =
604             MLX5_GET_CTR(out, received_eth_multicast.octets);
605         s->tx_multicast_packets =
606             MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
607         s->tx_multicast_bytes =
608             MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
609
610         s->rx_broadcast_packets =
611             MLX5_GET_CTR(out, received_eth_broadcast.packets);
612         s->rx_broadcast_bytes =
613             MLX5_GET_CTR(out, received_eth_broadcast.octets);
614         s->tx_broadcast_packets =
615             MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
616         s->tx_broadcast_bytes =
617             MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
618
619         s->rx_packets =
620             s->rx_unicast_packets +
621             s->rx_multicast_packets +
622             s->rx_broadcast_packets -
623             s->rx_out_of_buffer;
624         s->rx_bytes =
625             s->rx_unicast_bytes +
626             s->rx_multicast_bytes +
627             s->rx_broadcast_bytes;
628         s->tx_packets =
629             s->tx_unicast_packets +
630             s->tx_multicast_packets +
631             s->tx_broadcast_packets;
632         s->tx_bytes =
633             s->tx_unicast_bytes +
634             s->tx_multicast_bytes +
635             s->tx_broadcast_bytes;
636
637         /* Update calculated offload counters */
638         s->tx_csum_offload = s->tx_packets - tx_offload_none;
639         s->rx_csum_good = s->rx_packets - s->rx_csum_none;
640
641         /* Get physical port counters */
642         mlx5e_update_pport_counters(priv);
643
644         s->tx_jumbo_packets =
645             priv->stats.port_stats_debug.tx_stat_p1519to2047octets +
646             priv->stats.port_stats_debug.tx_stat_p2048to4095octets +
647             priv->stats.port_stats_debug.tx_stat_p4096to8191octets +
648             priv->stats.port_stats_debug.tx_stat_p8192to10239octets;
649
650 #if (__FreeBSD_version < 1100000)
651         /* no get_counters interface in fbsd 10 */
652         ifp->if_ipackets = s->rx_packets;
653         ifp->if_ierrors = s->rx_error_packets +
654             priv->stats.pport.alignment_err +
655             priv->stats.pport.check_seq_err +
656             priv->stats.pport.crc_align_errors +
657             priv->stats.pport.in_range_len_errors +
658             priv->stats.pport.jabbers +
659             priv->stats.pport.out_of_range_len +
660             priv->stats.pport.oversize_pkts +
661             priv->stats.pport.symbol_err +
662             priv->stats.pport.too_long_errors +
663             priv->stats.pport.undersize_pkts +
664             priv->stats.pport.unsupported_op_rx;
665         ifp->if_iqdrops = s->rx_out_of_buffer +
666             priv->stats.pport.drop_events;
667         ifp->if_opackets = s->tx_packets;
668         ifp->if_oerrors = s->tx_error_packets;
669         ifp->if_snd.ifq_drops = s->tx_queue_dropped;
670         ifp->if_ibytes = s->rx_bytes;
671         ifp->if_obytes = s->tx_bytes;
672         ifp->if_collisions =
673             priv->stats.pport.collisions;
674 #endif
675
676 free_out:
677         kvfree(out);
678
679         /* Update diagnostics, if any */
680         if (priv->params_ethtool.diag_pci_enable ||
681             priv->params_ethtool.diag_general_enable) {
682                 int error = mlx5_core_get_diagnostics_full(mdev,
683                     priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
684                     priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
685                 if (error != 0)
686                         if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error);
687         }
688         PRIV_UNLOCK(priv);
689 }
690
691 static void
692 mlx5e_update_stats(void *arg)
693 {
694         struct mlx5e_priv *priv = arg;
695
696         queue_work(priv->wq, &priv->update_stats_work);
697
698         callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
699 }
700
701 static void
702 mlx5e_async_event_sub(struct mlx5e_priv *priv,
703     enum mlx5_dev_event event)
704 {
705         switch (event) {
706         case MLX5_DEV_EVENT_PORT_UP:
707         case MLX5_DEV_EVENT_PORT_DOWN:
708                 queue_work(priv->wq, &priv->update_carrier_work);
709                 break;
710
711         default:
712                 break;
713         }
714 }
715
716 static void
717 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
718     enum mlx5_dev_event event, unsigned long param)
719 {
720         struct mlx5e_priv *priv = vpriv;
721
722         mtx_lock(&priv->async_events_mtx);
723         if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
724                 mlx5e_async_event_sub(priv, event);
725         mtx_unlock(&priv->async_events_mtx);
726 }
727
728 static void
729 mlx5e_enable_async_events(struct mlx5e_priv *priv)
730 {
731         set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
732 }
733
734 static void
735 mlx5e_disable_async_events(struct mlx5e_priv *priv)
736 {
737         mtx_lock(&priv->async_events_mtx);
738         clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
739         mtx_unlock(&priv->async_events_mtx);
740 }
741
742 static const char *mlx5e_rq_stats_desc[] = {
743         MLX5E_RQ_STATS(MLX5E_STATS_DESC)
744 };
745
746 static int
747 mlx5e_create_rq(struct mlx5e_channel *c,
748     struct mlx5e_rq_param *param,
749     struct mlx5e_rq *rq)
750 {
751         struct mlx5e_priv *priv = c->priv;
752         struct mlx5_core_dev *mdev = priv->mdev;
753         char buffer[16];
754         void *rqc = param->rqc;
755         void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
756         int wq_sz;
757         int err;
758         int i;
759         u32 nsegs, wqe_sz;
760
761         err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
762         if (err != 0)
763                 goto done;
764
765         /* Create DMA descriptor TAG */
766         if ((err = -bus_dma_tag_create(
767             bus_get_dma_tag(mdev->pdev->dev.bsddev),
768             1,                          /* any alignment */
769             0,                          /* no boundary */
770             BUS_SPACE_MAXADDR,          /* lowaddr */
771             BUS_SPACE_MAXADDR,          /* highaddr */
772             NULL, NULL,                 /* filter, filterarg */
773             nsegs * MLX5E_MAX_RX_BYTES, /* maxsize */
774             nsegs,                      /* nsegments */
775             nsegs * MLX5E_MAX_RX_BYTES, /* maxsegsize */
776             0,                          /* flags */
777             NULL, NULL,                 /* lockfunc, lockfuncarg */
778             &rq->dma_tag)))
779                 goto done;
780
781         err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
782             &rq->wq_ctrl);
783         if (err)
784                 goto err_free_dma_tag;
785
786         rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
787
788         err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs);
789         if (err != 0)
790                 goto err_rq_wq_destroy;
791
792         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
793
794         err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz);
795         if (err)
796                 goto err_rq_wq_destroy;
797
798         rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
799         for (i = 0; i != wq_sz; i++) {
800                 struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
801 #if (MLX5E_MAX_RX_SEGS == 1)
802                 uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
803 #else
804                 int j;
805 #endif
806
807                 err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
808                 if (err != 0) {
809                         while (i--)
810                                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
811                         goto err_rq_mbuf_free;
812                 }
813
814                 /* set value for constant fields */
815 #if (MLX5E_MAX_RX_SEGS == 1)
816                 wqe->data[0].lkey = c->mkey_be;
817                 wqe->data[0].byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
818 #else
819                 for (j = 0; j < rq->nsegs; j++)
820                         wqe->data[j].lkey = c->mkey_be;
821 #endif
822         }
823
824         INIT_WORK(&rq->dim.work, mlx5e_dim_work);
825         if (priv->params.rx_cq_moderation_mode < 2) {
826                 rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
827         } else {
828                 void *cqc = container_of(param,
829                     struct mlx5e_channel_param, rq)->rx_cq.cqc;
830
831                 switch (MLX5_GET(cqc, cqc, cq_period_mode)) {
832                 case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
833                         rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
834                         break;
835                 case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
836                         rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
837                         break;
838                 default:
839                         rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
840                         break;
841                 }
842         }
843
844         rq->ifp = c->ifp;
845         rq->channel = c;
846         rq->ix = c->ix;
847
848         snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
849         mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
850             buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
851             rq->stats.arg);
852         return (0);
853
854 err_rq_mbuf_free:
855         free(rq->mbuf, M_MLX5EN);
856         tcp_lro_free(&rq->lro);
857 err_rq_wq_destroy:
858         mlx5_wq_destroy(&rq->wq_ctrl);
859 err_free_dma_tag:
860         bus_dma_tag_destroy(rq->dma_tag);
861 done:
862         return (err);
863 }
864
865 static void
866 mlx5e_destroy_rq(struct mlx5e_rq *rq)
867 {
868         int wq_sz;
869         int i;
870
871         /* destroy all sysctl nodes */
872         sysctl_ctx_free(&rq->stats.ctx);
873
874         /* free leftover LRO packets, if any */
875         tcp_lro_free(&rq->lro);
876
877         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
878         for (i = 0; i != wq_sz; i++) {
879                 if (rq->mbuf[i].mbuf != NULL) {
880                         bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
881                         m_freem(rq->mbuf[i].mbuf);
882                 }
883                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
884         }
885         free(rq->mbuf, M_MLX5EN);
886         mlx5_wq_destroy(&rq->wq_ctrl);
887 }
888
889 static int
890 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
891 {
892         struct mlx5e_channel *c = rq->channel;
893         struct mlx5e_priv *priv = c->priv;
894         struct mlx5_core_dev *mdev = priv->mdev;
895
896         void *in;
897         void *rqc;
898         void *wq;
899         int inlen;
900         int err;
901
902         inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
903             sizeof(u64) * rq->wq_ctrl.buf.npages;
904         in = mlx5_vzalloc(inlen);
905         if (in == NULL)
906                 return (-ENOMEM);
907
908         rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
909         wq = MLX5_ADDR_OF(rqc, rqc, wq);
910
911         memcpy(rqc, param->rqc, sizeof(param->rqc));
912
913         MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
914         MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
915         MLX5_SET(rqc, rqc, flush_in_error_en, 1);
916         if (priv->counter_set_id >= 0)
917                 MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
918         MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
919             PAGE_SHIFT);
920         MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
921
922         mlx5_fill_page_array(&rq->wq_ctrl.buf,
923             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
924
925         err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
926
927         kvfree(in);
928
929         return (err);
930 }
931
932 static int
933 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
934 {
935         struct mlx5e_channel *c = rq->channel;
936         struct mlx5e_priv *priv = c->priv;
937         struct mlx5_core_dev *mdev = priv->mdev;
938
939         void *in;
940         void *rqc;
941         int inlen;
942         int err;
943
944         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
945         in = mlx5_vzalloc(inlen);
946         if (in == NULL)
947                 return (-ENOMEM);
948
949         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
950
951         MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
952         MLX5_SET(modify_rq_in, in, rq_state, curr_state);
953         MLX5_SET(rqc, rqc, state, next_state);
954
955         err = mlx5_core_modify_rq(mdev, in, inlen);
956
957         kvfree(in);
958
959         return (err);
960 }
961
962 static void
963 mlx5e_disable_rq(struct mlx5e_rq *rq)
964 {
965         struct mlx5e_channel *c = rq->channel;
966         struct mlx5e_priv *priv = c->priv;
967         struct mlx5_core_dev *mdev = priv->mdev;
968
969         mlx5_core_destroy_rq(mdev, rq->rqn);
970 }
971
972 static int
973 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
974 {
975         struct mlx5e_channel *c = rq->channel;
976         struct mlx5e_priv *priv = c->priv;
977         struct mlx5_wq_ll *wq = &rq->wq;
978         int i;
979
980         for (i = 0; i < 1000; i++) {
981                 if (wq->cur_sz >= priv->params.min_rx_wqes)
982                         return (0);
983
984                 msleep(4);
985         }
986         return (-ETIMEDOUT);
987 }
988
989 static int
990 mlx5e_open_rq(struct mlx5e_channel *c,
991     struct mlx5e_rq_param *param,
992     struct mlx5e_rq *rq)
993 {
994         int err;
995
996         err = mlx5e_create_rq(c, param, rq);
997         if (err)
998                 return (err);
999
1000         err = mlx5e_enable_rq(rq, param);
1001         if (err)
1002                 goto err_destroy_rq;
1003
1004         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
1005         if (err)
1006                 goto err_disable_rq;
1007
1008         c->rq.enabled = 1;
1009
1010         return (0);
1011
1012 err_disable_rq:
1013         mlx5e_disable_rq(rq);
1014 err_destroy_rq:
1015         mlx5e_destroy_rq(rq);
1016
1017         return (err);
1018 }
1019
1020 static void
1021 mlx5e_close_rq(struct mlx5e_rq *rq)
1022 {
1023         mtx_lock(&rq->mtx);
1024         rq->enabled = 0;
1025         callout_stop(&rq->watchdog);
1026         mtx_unlock(&rq->mtx);
1027
1028         callout_drain(&rq->watchdog);
1029
1030         mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
1031 }
1032
1033 static void
1034 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
1035 {
1036         struct mlx5_core_dev *mdev = rq->channel->priv->mdev;
1037
1038         /* wait till RQ is empty */
1039         while (!mlx5_wq_ll_is_empty(&rq->wq) &&
1040                (mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
1041                 msleep(4);
1042                 rq->cq.mcq.comp(&rq->cq.mcq);
1043         }
1044
1045         cancel_work_sync(&rq->dim.work);
1046         mlx5e_disable_rq(rq);
1047         mlx5e_destroy_rq(rq);
1048 }
1049
1050 void
1051 mlx5e_free_sq_db(struct mlx5e_sq *sq)
1052 {
1053         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1054         int x;
1055
1056         for (x = 0; x != wq_sz; x++)
1057                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1058         free(sq->mbuf, M_MLX5EN);
1059 }
1060
1061 int
1062 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
1063 {
1064         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1065         int err;
1066         int x;
1067
1068         sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
1069
1070         /* Create DMA descriptor MAPs */
1071         for (x = 0; x != wq_sz; x++) {
1072                 err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
1073                 if (err != 0) {
1074                         while (x--)
1075                                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1076                         free(sq->mbuf, M_MLX5EN);
1077                         return (err);
1078                 }
1079         }
1080         return (0);
1081 }
1082
1083 static const char *mlx5e_sq_stats_desc[] = {
1084         MLX5E_SQ_STATS(MLX5E_STATS_DESC)
1085 };
1086
1087 void
1088 mlx5e_update_sq_inline(struct mlx5e_sq *sq)
1089 {
1090         sq->max_inline = sq->priv->params.tx_max_inline;
1091         sq->min_inline_mode = sq->priv->params.tx_min_inline_mode;
1092
1093         /*
1094          * Check if trust state is DSCP or if inline mode is NONE which
1095          * indicates CX-5 or newer hardware.
1096          */
1097         if (sq->priv->params_ethtool.trust_state != MLX5_QPTS_TRUST_PCP ||
1098             sq->min_inline_mode == MLX5_INLINE_MODE_NONE) {
1099                 if (MLX5_CAP_ETH(sq->priv->mdev, wqe_vlan_insert))
1100                         sq->min_insert_caps = MLX5E_INSERT_VLAN | MLX5E_INSERT_NON_VLAN;
1101                 else
1102                         sq->min_insert_caps = MLX5E_INSERT_NON_VLAN;
1103         } else {
1104                 sq->min_insert_caps = 0;
1105         }
1106 }
1107
1108 static void
1109 mlx5e_refresh_sq_inline_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
1110 {
1111         int i;
1112
1113         for (i = 0; i != c->num_tc; i++) {
1114                 mtx_lock(&c->sq[i].lock);
1115                 mlx5e_update_sq_inline(&c->sq[i]);
1116                 mtx_unlock(&c->sq[i].lock);
1117         }
1118 }
1119
1120 void
1121 mlx5e_refresh_sq_inline(struct mlx5e_priv *priv)
1122 {
1123         int i;
1124
1125         /* check if channels are closed */
1126         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
1127                 return;
1128
1129         for (i = 0; i < priv->params.num_channels; i++)
1130                 mlx5e_refresh_sq_inline_sub(priv, &priv->channel[i]);
1131 }
1132
1133 static int
1134 mlx5e_create_sq(struct mlx5e_channel *c,
1135     int tc,
1136     struct mlx5e_sq_param *param,
1137     struct mlx5e_sq *sq)
1138 {
1139         struct mlx5e_priv *priv = c->priv;
1140         struct mlx5_core_dev *mdev = priv->mdev;
1141         char buffer[16];
1142         void *sqc = param->sqc;
1143         void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
1144         int err;
1145
1146         /* Create DMA descriptor TAG */
1147         if ((err = -bus_dma_tag_create(
1148             bus_get_dma_tag(mdev->pdev->dev.bsddev),
1149             1,                          /* any alignment */
1150             0,                          /* no boundary */
1151             BUS_SPACE_MAXADDR,          /* lowaddr */
1152             BUS_SPACE_MAXADDR,          /* highaddr */
1153             NULL, NULL,                 /* filter, filterarg */
1154             MLX5E_MAX_TX_PAYLOAD_SIZE,  /* maxsize */
1155             MLX5E_MAX_TX_MBUF_FRAGS,    /* nsegments */
1156             MLX5E_MAX_TX_MBUF_SIZE,     /* maxsegsize */
1157             0,                          /* flags */
1158             NULL, NULL,                 /* lockfunc, lockfuncarg */
1159             &sq->dma_tag)))
1160                 goto done;
1161
1162         err = mlx5_alloc_map_uar(mdev, &sq->uar);
1163         if (err)
1164                 goto err_free_dma_tag;
1165
1166         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
1167             &sq->wq_ctrl);
1168         if (err)
1169                 goto err_unmap_free_uar;
1170
1171         sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1172         sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
1173
1174         err = mlx5e_alloc_sq_db(sq);
1175         if (err)
1176                 goto err_sq_wq_destroy;
1177
1178         sq->mkey_be = c->mkey_be;
1179         sq->ifp = priv->ifp;
1180         sq->priv = priv;
1181         sq->tc = tc;
1182
1183         mlx5e_update_sq_inline(sq);
1184
1185         snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1186         mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1187             buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1188             sq->stats.arg);
1189
1190         return (0);
1191
1192 err_sq_wq_destroy:
1193         mlx5_wq_destroy(&sq->wq_ctrl);
1194
1195 err_unmap_free_uar:
1196         mlx5_unmap_free_uar(mdev, &sq->uar);
1197
1198 err_free_dma_tag:
1199         bus_dma_tag_destroy(sq->dma_tag);
1200 done:
1201         return (err);
1202 }
1203
1204 static void
1205 mlx5e_destroy_sq(struct mlx5e_sq *sq)
1206 {
1207         /* destroy all sysctl nodes */
1208         sysctl_ctx_free(&sq->stats.ctx);
1209
1210         mlx5e_free_sq_db(sq);
1211         mlx5_wq_destroy(&sq->wq_ctrl);
1212         mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
1213 }
1214
1215 int
1216 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
1217     int tis_num)
1218 {
1219         void *in;
1220         void *sqc;
1221         void *wq;
1222         int inlen;
1223         int err;
1224
1225         inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1226             sizeof(u64) * sq->wq_ctrl.buf.npages;
1227         in = mlx5_vzalloc(inlen);
1228         if (in == NULL)
1229                 return (-ENOMEM);
1230
1231         sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1232         wq = MLX5_ADDR_OF(sqc, sqc, wq);
1233
1234         memcpy(sqc, param->sqc, sizeof(param->sqc));
1235
1236         MLX5_SET(sqc, sqc, tis_num_0, tis_num);
1237         MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
1238         MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1239         MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1240         MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1241
1242         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1243         MLX5_SET(wq, wq, uar_page, sq->uar.index);
1244         MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1245             PAGE_SHIFT);
1246         MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1247
1248         mlx5_fill_page_array(&sq->wq_ctrl.buf,
1249             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1250
1251         err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
1252
1253         kvfree(in);
1254
1255         return (err);
1256 }
1257
1258 int
1259 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1260 {
1261         void *in;
1262         void *sqc;
1263         int inlen;
1264         int err;
1265
1266         inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1267         in = mlx5_vzalloc(inlen);
1268         if (in == NULL)
1269                 return (-ENOMEM);
1270
1271         sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1272
1273         MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1274         MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1275         MLX5_SET(sqc, sqc, state, next_state);
1276
1277         err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
1278
1279         kvfree(in);
1280
1281         return (err);
1282 }
1283
1284 void
1285 mlx5e_disable_sq(struct mlx5e_sq *sq)
1286 {
1287
1288         mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
1289 }
1290
1291 static int
1292 mlx5e_open_sq(struct mlx5e_channel *c,
1293     int tc,
1294     struct mlx5e_sq_param *param,
1295     struct mlx5e_sq *sq)
1296 {
1297         int err;
1298
1299         err = mlx5e_create_sq(c, tc, param, sq);
1300         if (err)
1301                 return (err);
1302
1303         err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
1304         if (err)
1305                 goto err_destroy_sq;
1306
1307         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1308         if (err)
1309                 goto err_disable_sq;
1310
1311         WRITE_ONCE(sq->running, 1);
1312
1313         return (0);
1314
1315 err_disable_sq:
1316         mlx5e_disable_sq(sq);
1317 err_destroy_sq:
1318         mlx5e_destroy_sq(sq);
1319
1320         return (err);
1321 }
1322
1323 static void
1324 mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1325 {
1326         /* fill up remainder with NOPs */
1327         while (sq->cev_counter != 0) {
1328                 while (!mlx5e_sq_has_room_for(sq, 1)) {
1329                         if (can_sleep != 0) {
1330                                 mtx_unlock(&sq->lock);
1331                                 msleep(4);
1332                                 mtx_lock(&sq->lock);
1333                         } else {
1334                                 goto done;
1335                         }
1336                 }
1337                 /* send a single NOP */
1338                 mlx5e_send_nop(sq, 1);
1339                 atomic_thread_fence_rel();
1340         }
1341 done:
1342         /* Check if we need to write the doorbell */
1343         if (likely(sq->doorbell.d64 != 0)) {
1344                 mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1345                 sq->doorbell.d64 = 0;
1346         }
1347 }
1348
1349 void
1350 mlx5e_sq_cev_timeout(void *arg)
1351 {
1352         struct mlx5e_sq *sq = arg;
1353
1354         mtx_assert(&sq->lock, MA_OWNED);
1355
1356         /* check next state */
1357         switch (sq->cev_next_state) {
1358         case MLX5E_CEV_STATE_SEND_NOPS:
1359                 /* fill TX ring with NOPs, if any */
1360                 mlx5e_sq_send_nops_locked(sq, 0);
1361
1362                 /* check if completed */
1363                 if (sq->cev_counter == 0) {
1364                         sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1365                         return;
1366                 }
1367                 break;
1368         default:
1369                 /* send NOPs on next timeout */
1370                 sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1371                 break;
1372         }
1373
1374         /* restart timer */
1375         callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1376 }
1377
1378 void
1379 mlx5e_drain_sq(struct mlx5e_sq *sq)
1380 {
1381         int error;
1382         struct mlx5_core_dev *mdev= sq->priv->mdev;
1383
1384         /*
1385          * Check if already stopped.
1386          *
1387          * NOTE: Serialization of this function is managed by the
1388          * caller ensuring the priv's state lock is locked or in case
1389          * of rate limit support, a single thread manages drain and
1390          * resume of SQs. The "running" variable can therefore safely
1391          * be read without any locks.
1392          */
1393         if (READ_ONCE(sq->running) == 0)
1394                 return;
1395
1396         /* don't put more packets into the SQ */
1397         WRITE_ONCE(sq->running, 0);
1398
1399         /* serialize access to DMA rings */
1400         mtx_lock(&sq->lock);
1401
1402         /* teardown event factor timer, if any */
1403         sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1404         callout_stop(&sq->cev_callout);
1405
1406         /* send dummy NOPs in order to flush the transmit ring */
1407         mlx5e_sq_send_nops_locked(sq, 1);
1408         mtx_unlock(&sq->lock);
1409
1410         /* make sure it is safe to free the callout */
1411         callout_drain(&sq->cev_callout);
1412
1413         /* wait till SQ is empty or link is down */
1414         mtx_lock(&sq->lock);
1415         while (sq->cc != sq->pc &&
1416             (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
1417             mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1418                 mtx_unlock(&sq->lock);
1419                 msleep(1);
1420                 sq->cq.mcq.comp(&sq->cq.mcq);
1421                 mtx_lock(&sq->lock);
1422         }
1423         mtx_unlock(&sq->lock);
1424
1425         /* error out remaining requests */
1426         error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1427         if (error != 0) {
1428                 if_printf(sq->ifp,
1429                     "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
1430         }
1431
1432         /* wait till SQ is empty */
1433         mtx_lock(&sq->lock);
1434         while (sq->cc != sq->pc &&
1435                mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1436                 mtx_unlock(&sq->lock);
1437                 msleep(1);
1438                 sq->cq.mcq.comp(&sq->cq.mcq);
1439                 mtx_lock(&sq->lock);
1440         }
1441         mtx_unlock(&sq->lock);
1442 }
1443
1444 static void
1445 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1446 {
1447
1448         mlx5e_drain_sq(sq);
1449         mlx5e_disable_sq(sq);
1450         mlx5e_destroy_sq(sq);
1451 }
1452
1453 static int
1454 mlx5e_create_cq(struct mlx5e_priv *priv,
1455     struct mlx5e_cq_param *param,
1456     struct mlx5e_cq *cq,
1457     mlx5e_cq_comp_t *comp,
1458     int eq_ix)
1459 {
1460         struct mlx5_core_dev *mdev = priv->mdev;
1461         struct mlx5_core_cq *mcq = &cq->mcq;
1462         int eqn_not_used;
1463         int irqn;
1464         int err;
1465         u32 i;
1466
1467         param->wq.buf_numa_node = 0;
1468         param->wq.db_numa_node = 0;
1469
1470         err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1471             &cq->wq_ctrl);
1472         if (err)
1473                 return (err);
1474
1475         mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
1476
1477         mcq->cqe_sz = 64;
1478         mcq->set_ci_db = cq->wq_ctrl.db.db;
1479         mcq->arm_db = cq->wq_ctrl.db.db + 1;
1480         *mcq->set_ci_db = 0;
1481         *mcq->arm_db = 0;
1482         mcq->vector = eq_ix;
1483         mcq->comp = comp;
1484         mcq->event = mlx5e_cq_error_event;
1485         mcq->irqn = irqn;
1486         mcq->uar = &priv->cq_uar;
1487
1488         for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1489                 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1490
1491                 cqe->op_own = 0xf1;
1492         }
1493
1494         cq->priv = priv;
1495
1496         return (0);
1497 }
1498
1499 static void
1500 mlx5e_destroy_cq(struct mlx5e_cq *cq)
1501 {
1502         mlx5_wq_destroy(&cq->wq_ctrl);
1503 }
1504
1505 static int
1506 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
1507 {
1508         struct mlx5_core_cq *mcq = &cq->mcq;
1509         void *in;
1510         void *cqc;
1511         int inlen;
1512         int irqn_not_used;
1513         int eqn;
1514         int err;
1515
1516         inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1517             sizeof(u64) * cq->wq_ctrl.buf.npages;
1518         in = mlx5_vzalloc(inlen);
1519         if (in == NULL)
1520                 return (-ENOMEM);
1521
1522         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1523
1524         memcpy(cqc, param->cqc, sizeof(param->cqc));
1525
1526         mlx5_fill_page_array(&cq->wq_ctrl.buf,
1527             (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1528
1529         mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
1530
1531         MLX5_SET(cqc, cqc, c_eqn, eqn);
1532         MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1533         MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1534             PAGE_SHIFT);
1535         MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1536
1537         err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
1538
1539         kvfree(in);
1540
1541         if (err)
1542                 return (err);
1543
1544         mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
1545
1546         return (0);
1547 }
1548
1549 static void
1550 mlx5e_disable_cq(struct mlx5e_cq *cq)
1551 {
1552
1553         mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
1554 }
1555
1556 int
1557 mlx5e_open_cq(struct mlx5e_priv *priv,
1558     struct mlx5e_cq_param *param,
1559     struct mlx5e_cq *cq,
1560     mlx5e_cq_comp_t *comp,
1561     int eq_ix)
1562 {
1563         int err;
1564
1565         err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
1566         if (err)
1567                 return (err);
1568
1569         err = mlx5e_enable_cq(cq, param, eq_ix);
1570         if (err)
1571                 goto err_destroy_cq;
1572
1573         return (0);
1574
1575 err_destroy_cq:
1576         mlx5e_destroy_cq(cq);
1577
1578         return (err);
1579 }
1580
1581 void
1582 mlx5e_close_cq(struct mlx5e_cq *cq)
1583 {
1584         mlx5e_disable_cq(cq);
1585         mlx5e_destroy_cq(cq);
1586 }
1587
1588 static int
1589 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1590     struct mlx5e_channel_param *cparam)
1591 {
1592         int err;
1593         int tc;
1594
1595         for (tc = 0; tc < c->num_tc; tc++) {
1596                 /* open completion queue */
1597                 err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
1598                     &mlx5e_tx_cq_comp, c->ix);
1599                 if (err)
1600                         goto err_close_tx_cqs;
1601         }
1602         return (0);
1603
1604 err_close_tx_cqs:
1605         for (tc--; tc >= 0; tc--)
1606                 mlx5e_close_cq(&c->sq[tc].cq);
1607
1608         return (err);
1609 }
1610
1611 static void
1612 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1613 {
1614         int tc;
1615
1616         for (tc = 0; tc < c->num_tc; tc++)
1617                 mlx5e_close_cq(&c->sq[tc].cq);
1618 }
1619
1620 static int
1621 mlx5e_open_sqs(struct mlx5e_channel *c,
1622     struct mlx5e_channel_param *cparam)
1623 {
1624         int err;
1625         int tc;
1626
1627         for (tc = 0; tc < c->num_tc; tc++) {
1628                 err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1629                 if (err)
1630                         goto err_close_sqs;
1631         }
1632
1633         return (0);
1634
1635 err_close_sqs:
1636         for (tc--; tc >= 0; tc--)
1637                 mlx5e_close_sq_wait(&c->sq[tc]);
1638
1639         return (err);
1640 }
1641
1642 static void
1643 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1644 {
1645         int tc;
1646
1647         for (tc = 0; tc < c->num_tc; tc++)
1648                 mlx5e_close_sq_wait(&c->sq[tc]);
1649 }
1650
1651 static void
1652 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1653 {
1654         int tc;
1655
1656         mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1657
1658         callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
1659
1660         for (tc = 0; tc < c->num_tc; tc++) {
1661                 struct mlx5e_sq *sq = c->sq + tc;
1662
1663                 mtx_init(&sq->lock, "mlx5tx",
1664                     MTX_NETWORK_LOCK " TX", MTX_DEF);
1665                 mtx_init(&sq->comp_lock, "mlx5comp",
1666                     MTX_NETWORK_LOCK " TX", MTX_DEF);
1667
1668                 callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1669
1670                 sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1671
1672                 /* ensure the TX completion event factor is not zero */
1673                 if (sq->cev_factor == 0)
1674                         sq->cev_factor = 1;
1675         }
1676 }
1677
1678 static void
1679 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1680 {
1681         int tc;
1682
1683         mtx_destroy(&c->rq.mtx);
1684
1685         for (tc = 0; tc < c->num_tc; tc++) {
1686                 mtx_destroy(&c->sq[tc].lock);
1687                 mtx_destroy(&c->sq[tc].comp_lock);
1688         }
1689 }
1690
1691 static int
1692 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1693     struct mlx5e_channel_param *cparam,
1694     struct mlx5e_channel *c)
1695 {
1696         int err;
1697
1698         memset(c, 0, sizeof(*c));
1699
1700         c->priv = priv;
1701         c->ix = ix;
1702         c->ifp = priv->ifp;
1703         c->mkey_be = cpu_to_be32(priv->mr.key);
1704         c->num_tc = priv->num_tc;
1705
1706         /* init mutexes */
1707         mlx5e_chan_mtx_init(c);
1708
1709         /* open transmit completion queue */
1710         err = mlx5e_open_tx_cqs(c, cparam);
1711         if (err)
1712                 goto err_free;
1713
1714         /* open receive completion queue */
1715         err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
1716             &mlx5e_rx_cq_comp, c->ix);
1717         if (err)
1718                 goto err_close_tx_cqs;
1719
1720         err = mlx5e_open_sqs(c, cparam);
1721         if (err)
1722                 goto err_close_rx_cq;
1723
1724         err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1725         if (err)
1726                 goto err_close_sqs;
1727
1728         /* poll receive queue initially */
1729         c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1730
1731         return (0);
1732
1733 err_close_sqs:
1734         mlx5e_close_sqs_wait(c);
1735
1736 err_close_rx_cq:
1737         mlx5e_close_cq(&c->rq.cq);
1738
1739 err_close_tx_cqs:
1740         mlx5e_close_tx_cqs(c);
1741
1742 err_free:
1743         /* destroy mutexes */
1744         mlx5e_chan_mtx_destroy(c);
1745         return (err);
1746 }
1747
1748 static void
1749 mlx5e_close_channel(struct mlx5e_channel *c)
1750 {
1751         mlx5e_close_rq(&c->rq);
1752 }
1753
1754 static void
1755 mlx5e_close_channel_wait(struct mlx5e_channel *c)
1756 {
1757         mlx5e_close_rq_wait(&c->rq);
1758         mlx5e_close_sqs_wait(c);
1759         mlx5e_close_cq(&c->rq.cq);
1760         mlx5e_close_tx_cqs(c);
1761         /* destroy mutexes */
1762         mlx5e_chan_mtx_destroy(c);
1763 }
1764
1765 static int
1766 mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs)
1767 {
1768         u32 r, n;
1769
1770         r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz :
1771             MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
1772         if (r > MJUM16BYTES)
1773                 return (-ENOMEM);
1774
1775         if (r > MJUM9BYTES)
1776                 r = MJUM16BYTES;
1777         else if (r > MJUMPAGESIZE)
1778                 r = MJUM9BYTES;
1779         else if (r > MCLBYTES)
1780                 r = MJUMPAGESIZE;
1781         else
1782                 r = MCLBYTES;
1783
1784         /*
1785          * n + 1 must be a power of two, because stride size must be.
1786          * Stride size is 16 * (n + 1), as the first segment is
1787          * control.
1788          */
1789         for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++)
1790                 ;
1791
1792         *wqe_sz = r;
1793         *nsegs = n;
1794         return (0);
1795 }
1796
1797 static void
1798 mlx5e_build_rq_param(struct mlx5e_priv *priv,
1799     struct mlx5e_rq_param *param)
1800 {
1801         void *rqc = param->rqc;
1802         void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1803         u32 wqe_sz, nsegs;
1804
1805         mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
1806         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1807         MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1808         MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) +
1809             nsegs * sizeof(struct mlx5_wqe_data_seg)));
1810         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1811         MLX5_SET(wq, wq, pd, priv->pdn);
1812
1813         param->wq.buf_numa_node = 0;
1814         param->wq.db_numa_node = 0;
1815         param->wq.linear = 1;
1816 }
1817
1818 static void
1819 mlx5e_build_sq_param(struct mlx5e_priv *priv,
1820     struct mlx5e_sq_param *param)
1821 {
1822         void *sqc = param->sqc;
1823         void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1824
1825         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1826         MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1827         MLX5_SET(wq, wq, pd, priv->pdn);
1828
1829         param->wq.buf_numa_node = 0;
1830         param->wq.db_numa_node = 0;
1831         param->wq.linear = 1;
1832 }
1833
1834 static void
1835 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1836     struct mlx5e_cq_param *param)
1837 {
1838         void *cqc = param->cqc;
1839
1840         MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1841 }
1842
1843 static void
1844 mlx5e_get_default_profile(struct mlx5e_priv *priv, int mode, struct net_dim_cq_moder *ptr)
1845 {
1846
1847         *ptr = net_dim_get_profile(mode, MLX5E_DIM_DEFAULT_PROFILE);
1848
1849         /* apply LRO restrictions */
1850         if (priv->params.hw_lro_en &&
1851             ptr->pkts > MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO) {
1852                 ptr->pkts = MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO;
1853         }
1854 }
1855
1856 static void
1857 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1858     struct mlx5e_cq_param *param)
1859 {
1860         struct net_dim_cq_moder curr;
1861         void *cqc = param->cqc;
1862
1863
1864         /*
1865          * TODO The sysctl to control on/off is a bool value for now, which means
1866          * we only support CSUM, once HASH is implemnted we'll need to address that.
1867          */
1868         if (priv->params.cqe_zipping_en) {
1869                 MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1870                 MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1871         }
1872
1873         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1874
1875         switch (priv->params.rx_cq_moderation_mode) {
1876         case 0:
1877                 MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1878                 MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1879                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1880                 break;
1881         case 1:
1882                 MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1883                 MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1884                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1885                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1886                 else
1887                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1888                 break;
1889         case 2:
1890                 mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE, &curr);
1891                 MLX5_SET(cqc, cqc, cq_period, curr.usec);
1892                 MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
1893                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1894                 break;
1895         case 3:
1896                 mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE, &curr);
1897                 MLX5_SET(cqc, cqc, cq_period, curr.usec);
1898                 MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
1899                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1900                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1901                 else
1902                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1903                 break;
1904         default:
1905                 break;
1906         }
1907
1908         mlx5e_dim_build_cq_param(priv, param);
1909
1910         mlx5e_build_common_cq_param(priv, param);
1911 }
1912
1913 static void
1914 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1915     struct mlx5e_cq_param *param)
1916 {
1917         void *cqc = param->cqc;
1918
1919         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1920         MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1921         MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1922
1923         switch (priv->params.tx_cq_moderation_mode) {
1924         case 0:
1925                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1926                 break;
1927         default:
1928                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1929                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1930                 else
1931                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1932                 break;
1933         }
1934
1935         mlx5e_build_common_cq_param(priv, param);
1936 }
1937
1938 static void
1939 mlx5e_build_channel_param(struct mlx5e_priv *priv,
1940     struct mlx5e_channel_param *cparam)
1941 {
1942         memset(cparam, 0, sizeof(*cparam));
1943
1944         mlx5e_build_rq_param(priv, &cparam->rq);
1945         mlx5e_build_sq_param(priv, &cparam->sq);
1946         mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1947         mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1948 }
1949
1950 static int
1951 mlx5e_open_channels(struct mlx5e_priv *priv)
1952 {
1953         struct mlx5e_channel_param cparam;
1954         int err;
1955         int i;
1956         int j;
1957
1958         mlx5e_build_channel_param(priv, &cparam);
1959         for (i = 0; i < priv->params.num_channels; i++) {
1960                 err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1961                 if (err)
1962                         goto err_close_channels;
1963         }
1964
1965         for (j = 0; j < priv->params.num_channels; j++) {
1966                 err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j].rq);
1967                 if (err)
1968                         goto err_close_channels;
1969         }
1970
1971         return (0);
1972
1973 err_close_channels:
1974         while (i--) {
1975                 mlx5e_close_channel(&priv->channel[i]);
1976                 mlx5e_close_channel_wait(&priv->channel[i]);
1977         }
1978         return (err);
1979 }
1980
1981 static void
1982 mlx5e_close_channels(struct mlx5e_priv *priv)
1983 {
1984         int i;
1985
1986         for (i = 0; i < priv->params.num_channels; i++)
1987                 mlx5e_close_channel(&priv->channel[i]);
1988         for (i = 0; i < priv->params.num_channels; i++)
1989                 mlx5e_close_channel_wait(&priv->channel[i]);
1990 }
1991
1992 static int
1993 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
1994 {
1995
1996         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
1997                 uint8_t cq_mode;
1998
1999                 switch (priv->params.tx_cq_moderation_mode) {
2000                 case 0:
2001                 case 2:
2002                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2003                         break;
2004                 default:
2005                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2006                         break;
2007                 }
2008
2009                 return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
2010                     priv->params.tx_cq_moderation_usec,
2011                     priv->params.tx_cq_moderation_pkts,
2012                     cq_mode));
2013         }
2014
2015         return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
2016             priv->params.tx_cq_moderation_usec,
2017             priv->params.tx_cq_moderation_pkts));
2018 }
2019
2020 static int
2021 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
2022 {
2023
2024         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
2025                 uint8_t cq_mode;
2026                 uint8_t dim_mode;
2027                 int retval;
2028
2029                 switch (priv->params.rx_cq_moderation_mode) {
2030                 case 0:
2031                 case 2:
2032                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2033                         dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
2034                         break;
2035                 default:
2036                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2037                         dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
2038                         break;
2039                 }
2040
2041                 /* tear down dynamic interrupt moderation */
2042                 mtx_lock(&rq->mtx);
2043                 rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
2044                 mtx_unlock(&rq->mtx);
2045
2046                 /* wait for dynamic interrupt moderation work task, if any */
2047                 cancel_work_sync(&rq->dim.work);
2048
2049                 if (priv->params.rx_cq_moderation_mode >= 2) {
2050                         struct net_dim_cq_moder curr;
2051
2052                         mlx5e_get_default_profile(priv, dim_mode, &curr);
2053
2054                         retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
2055                             curr.usec, curr.pkts, cq_mode);
2056
2057                         /* set dynamic interrupt moderation mode and zero defaults */
2058                         mtx_lock(&rq->mtx);
2059                         rq->dim.mode = dim_mode;
2060                         rq->dim.state = 0;
2061                         rq->dim.profile_ix = MLX5E_DIM_DEFAULT_PROFILE;
2062                         mtx_unlock(&rq->mtx);
2063                 } else {
2064                         retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
2065                             priv->params.rx_cq_moderation_usec,
2066                             priv->params.rx_cq_moderation_pkts,
2067                             cq_mode);
2068                 }
2069                 return (retval);
2070         }
2071
2072         return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
2073             priv->params.rx_cq_moderation_usec,
2074             priv->params.rx_cq_moderation_pkts));
2075 }
2076
2077 static int
2078 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
2079 {
2080         int err;
2081         int i;
2082
2083         err = mlx5e_refresh_rq_params(priv, &c->rq);
2084         if (err)
2085                 goto done;
2086
2087         for (i = 0; i != c->num_tc; i++) {
2088                 err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
2089                 if (err)
2090                         goto done;
2091         }
2092 done:
2093         return (err);
2094 }
2095
2096 int
2097 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
2098 {
2099         int i;
2100
2101         /* check if channels are closed */
2102         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2103                 return (EINVAL);
2104
2105         for (i = 0; i < priv->params.num_channels; i++) {
2106                 int err;
2107
2108                 err = mlx5e_refresh_channel_params_sub(priv, &priv->channel[i]);
2109                 if (err)
2110                         return (err);
2111         }
2112         return (0);
2113 }
2114
2115 static int
2116 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
2117 {
2118         struct mlx5_core_dev *mdev = priv->mdev;
2119         u32 in[MLX5_ST_SZ_DW(create_tis_in)];
2120         void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
2121
2122         memset(in, 0, sizeof(in));
2123
2124         MLX5_SET(tisc, tisc, prio, tc);
2125         MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
2126
2127         return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
2128 }
2129
2130 static void
2131 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
2132 {
2133         mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
2134 }
2135
2136 static int
2137 mlx5e_open_tises(struct mlx5e_priv *priv)
2138 {
2139         int num_tc = priv->num_tc;
2140         int err;
2141         int tc;
2142
2143         for (tc = 0; tc < num_tc; tc++) {
2144                 err = mlx5e_open_tis(priv, tc);
2145                 if (err)
2146                         goto err_close_tises;
2147         }
2148
2149         return (0);
2150
2151 err_close_tises:
2152         for (tc--; tc >= 0; tc--)
2153                 mlx5e_close_tis(priv, tc);
2154
2155         return (err);
2156 }
2157
2158 static void
2159 mlx5e_close_tises(struct mlx5e_priv *priv)
2160 {
2161         int num_tc = priv->num_tc;
2162         int tc;
2163
2164         for (tc = 0; tc < num_tc; tc++)
2165                 mlx5e_close_tis(priv, tc);
2166 }
2167
2168 static int
2169 mlx5e_open_rqt(struct mlx5e_priv *priv)
2170 {
2171         struct mlx5_core_dev *mdev = priv->mdev;
2172         u32 *in;
2173         u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
2174         void *rqtc;
2175         int inlen;
2176         int err;
2177         int sz;
2178         int i;
2179
2180         sz = 1 << priv->params.rx_hash_log_tbl_sz;
2181
2182         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
2183         in = mlx5_vzalloc(inlen);
2184         if (in == NULL)
2185                 return (-ENOMEM);
2186         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
2187
2188         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
2189         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
2190
2191         for (i = 0; i < sz; i++) {
2192                 int ix = i;
2193 #ifdef RSS
2194                 ix = rss_get_indirection_to_bucket(ix);
2195 #endif
2196                 /* ensure we don't overflow */
2197                 ix %= priv->params.num_channels;
2198
2199                 /* apply receive side scaling stride, if any */
2200                 ix -= ix % (int)priv->params.channels_rsss;
2201
2202                 MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix].rq.rqn);
2203         }
2204
2205         MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
2206
2207         err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
2208         if (!err)
2209                 priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
2210
2211         kvfree(in);
2212
2213         return (err);
2214 }
2215
2216 static void
2217 mlx5e_close_rqt(struct mlx5e_priv *priv)
2218 {
2219         u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
2220         u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
2221
2222         MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
2223         MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
2224
2225         mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
2226 }
2227
2228 static void
2229 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
2230 {
2231         void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
2232         __be32 *hkey;
2233
2234         MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
2235
2236 #define ROUGH_MAX_L2_L3_HDR_SZ 256
2237
2238 #define MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2239                           MLX5_HASH_FIELD_SEL_DST_IP)
2240
2241 #define MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2242                           MLX5_HASH_FIELD_SEL_DST_IP   |\
2243                           MLX5_HASH_FIELD_SEL_L4_SPORT |\
2244                           MLX5_HASH_FIELD_SEL_L4_DPORT)
2245
2246 #define MLX5_HASH_IP_IPSEC_SPI  (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2247                                  MLX5_HASH_FIELD_SEL_DST_IP   |\
2248                                  MLX5_HASH_FIELD_SEL_IPSEC_SPI)
2249
2250         if (priv->params.hw_lro_en) {
2251                 MLX5_SET(tirc, tirc, lro_enable_mask,
2252                     MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2253                     MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
2254                 MLX5_SET(tirc, tirc, lro_max_msg_sz,
2255                     (priv->params.lro_wqe_sz -
2256                     ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
2257                 /* TODO: add the option to choose timer value dynamically */
2258                 MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
2259                     MLX5_CAP_ETH(priv->mdev,
2260                     lro_timer_supported_periods[2]));
2261         }
2262
2263         /* setup parameters for hashing TIR type, if any */
2264         switch (tt) {
2265         case MLX5E_TT_ANY:
2266                 MLX5_SET(tirc, tirc, disp_type,
2267                     MLX5_TIRC_DISP_TYPE_DIRECT);
2268                 MLX5_SET(tirc, tirc, inline_rqn,
2269                     priv->channel[0].rq.rqn);
2270                 break;
2271         default:
2272                 MLX5_SET(tirc, tirc, disp_type,
2273                     MLX5_TIRC_DISP_TYPE_INDIRECT);
2274                 MLX5_SET(tirc, tirc, indirect_table,
2275                     priv->rqtn);
2276                 MLX5_SET(tirc, tirc, rx_hash_fn,
2277                     MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2278                 hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2279 #ifdef RSS
2280                 /*
2281                  * The FreeBSD RSS implementation does currently not
2282                  * support symmetric Toeplitz hashes:
2283                  */
2284                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2285                 rss_getkey((uint8_t *)hkey);
2286 #else
2287                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2288                 hkey[0] = cpu_to_be32(0xD181C62C);
2289                 hkey[1] = cpu_to_be32(0xF7F4DB5B);
2290                 hkey[2] = cpu_to_be32(0x1983A2FC);
2291                 hkey[3] = cpu_to_be32(0x943E1ADB);
2292                 hkey[4] = cpu_to_be32(0xD9389E6B);
2293                 hkey[5] = cpu_to_be32(0xD1039C2C);
2294                 hkey[6] = cpu_to_be32(0xA74499AD);
2295                 hkey[7] = cpu_to_be32(0x593D56D9);
2296                 hkey[8] = cpu_to_be32(0xF3253C06);
2297                 hkey[9] = cpu_to_be32(0x2ADC1FFC);
2298 #endif
2299                 break;
2300         }
2301
2302         switch (tt) {
2303         case MLX5E_TT_IPV4_TCP:
2304                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2305                     MLX5_L3_PROT_TYPE_IPV4);
2306                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2307                     MLX5_L4_PROT_TYPE_TCP);
2308 #ifdef RSS
2309                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2310                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2311                             MLX5_HASH_IP);
2312                 } else
2313 #endif
2314                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2315                     MLX5_HASH_ALL);
2316                 break;
2317
2318         case MLX5E_TT_IPV6_TCP:
2319                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2320                     MLX5_L3_PROT_TYPE_IPV6);
2321                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2322                     MLX5_L4_PROT_TYPE_TCP);
2323 #ifdef RSS
2324                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2325                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2326                             MLX5_HASH_IP);
2327                 } else
2328 #endif
2329                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2330                     MLX5_HASH_ALL);
2331                 break;
2332
2333         case MLX5E_TT_IPV4_UDP:
2334                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2335                     MLX5_L3_PROT_TYPE_IPV4);
2336                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2337                     MLX5_L4_PROT_TYPE_UDP);
2338 #ifdef RSS
2339                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2340                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2341                             MLX5_HASH_IP);
2342                 } else
2343 #endif
2344                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2345                     MLX5_HASH_ALL);
2346                 break;
2347
2348         case MLX5E_TT_IPV6_UDP:
2349                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2350                     MLX5_L3_PROT_TYPE_IPV6);
2351                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2352                     MLX5_L4_PROT_TYPE_UDP);
2353 #ifdef RSS
2354                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2355                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2356                             MLX5_HASH_IP);
2357                 } else
2358 #endif
2359                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2360                     MLX5_HASH_ALL);
2361                 break;
2362
2363         case MLX5E_TT_IPV4_IPSEC_AH:
2364                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2365                     MLX5_L3_PROT_TYPE_IPV4);
2366                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2367                     MLX5_HASH_IP_IPSEC_SPI);
2368                 break;
2369
2370         case MLX5E_TT_IPV6_IPSEC_AH:
2371                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2372                     MLX5_L3_PROT_TYPE_IPV6);
2373                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2374                     MLX5_HASH_IP_IPSEC_SPI);
2375                 break;
2376
2377         case MLX5E_TT_IPV4_IPSEC_ESP:
2378                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2379                     MLX5_L3_PROT_TYPE_IPV4);
2380                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2381                     MLX5_HASH_IP_IPSEC_SPI);
2382                 break;
2383
2384         case MLX5E_TT_IPV6_IPSEC_ESP:
2385                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2386                     MLX5_L3_PROT_TYPE_IPV6);
2387                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2388                     MLX5_HASH_IP_IPSEC_SPI);
2389                 break;
2390
2391         case MLX5E_TT_IPV4:
2392                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2393                     MLX5_L3_PROT_TYPE_IPV4);
2394                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2395                     MLX5_HASH_IP);
2396                 break;
2397
2398         case MLX5E_TT_IPV6:
2399                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2400                     MLX5_L3_PROT_TYPE_IPV6);
2401                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2402                     MLX5_HASH_IP);
2403                 break;
2404
2405         default:
2406                 break;
2407         }
2408 }
2409
2410 static int
2411 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2412 {
2413         struct mlx5_core_dev *mdev = priv->mdev;
2414         u32 *in;
2415         void *tirc;
2416         int inlen;
2417         int err;
2418
2419         inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2420         in = mlx5_vzalloc(inlen);
2421         if (in == NULL)
2422                 return (-ENOMEM);
2423         tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2424
2425         mlx5e_build_tir_ctx(priv, tirc, tt);
2426
2427         err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2428
2429         kvfree(in);
2430
2431         return (err);
2432 }
2433
2434 static void
2435 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2436 {
2437         mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2438 }
2439
2440 static int
2441 mlx5e_open_tirs(struct mlx5e_priv *priv)
2442 {
2443         int err;
2444         int i;
2445
2446         for (i = 0; i < MLX5E_NUM_TT; i++) {
2447                 err = mlx5e_open_tir(priv, i);
2448                 if (err)
2449                         goto err_close_tirs;
2450         }
2451
2452         return (0);
2453
2454 err_close_tirs:
2455         for (i--; i >= 0; i--)
2456                 mlx5e_close_tir(priv, i);
2457
2458         return (err);
2459 }
2460
2461 static void
2462 mlx5e_close_tirs(struct mlx5e_priv *priv)
2463 {
2464         int i;
2465
2466         for (i = 0; i < MLX5E_NUM_TT; i++)
2467                 mlx5e_close_tir(priv, i);
2468 }
2469
2470 /*
2471  * SW MTU does not include headers,
2472  * HW MTU includes all headers and checksums.
2473  */
2474 static int
2475 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2476 {
2477         struct mlx5e_priv *priv = ifp->if_softc;
2478         struct mlx5_core_dev *mdev = priv->mdev;
2479         int hw_mtu;
2480         int err;
2481
2482         hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
2483
2484         err = mlx5_set_port_mtu(mdev, hw_mtu);
2485         if (err) {
2486                 if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2487                     __func__, sw_mtu, err);
2488                 return (err);
2489         }
2490
2491         /* Update vport context MTU */
2492         err = mlx5_set_vport_mtu(mdev, hw_mtu);
2493         if (err) {
2494                 if_printf(ifp, "%s: Failed updating vport context with MTU size, err=%d\n",
2495                     __func__, err);
2496         }
2497
2498         ifp->if_mtu = sw_mtu;
2499
2500         err = mlx5_query_vport_mtu(mdev, &hw_mtu);
2501         if (err || !hw_mtu) {
2502                 /* fallback to port oper mtu */
2503                 err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2504         }
2505         if (err) {
2506                 if_printf(ifp, "Query port MTU, after setting new "
2507                     "MTU value, failed\n");
2508                 return (err);
2509         } else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
2510                 err = -E2BIG,
2511                 if_printf(ifp, "Port MTU %d is smaller than "
2512                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2513         } else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
2514                 err = -EINVAL;
2515                 if_printf(ifp, "Port MTU %d is bigger than "
2516                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2517         }
2518         priv->params_ethtool.hw_mtu = hw_mtu;
2519
2520         return (err);
2521 }
2522
2523 int
2524 mlx5e_open_locked(struct ifnet *ifp)
2525 {
2526         struct mlx5e_priv *priv = ifp->if_softc;
2527         int err;
2528         u16 set_id;
2529
2530         /* check if already opened */
2531         if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2532                 return (0);
2533
2534 #ifdef RSS
2535         if (rss_getnumbuckets() > priv->params.num_channels) {
2536                 if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2537                     "channels(%u) available\n", rss_getnumbuckets(),
2538                     priv->params.num_channels);
2539         }
2540 #endif
2541         err = mlx5e_open_tises(priv);
2542         if (err) {
2543                 if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2544                     __func__, err);
2545                 return (err);
2546         }
2547         err = mlx5_vport_alloc_q_counter(priv->mdev,
2548             MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
2549         if (err) {
2550                 if_printf(priv->ifp,
2551                     "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2552                     __func__, err);
2553                 goto err_close_tises;
2554         }
2555         /* store counter set ID */
2556         priv->counter_set_id = set_id;
2557
2558         err = mlx5e_open_channels(priv);
2559         if (err) {
2560                 if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2561                     __func__, err);
2562                 goto err_dalloc_q_counter;
2563         }
2564         err = mlx5e_open_rqt(priv);
2565         if (err) {
2566                 if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2567                     __func__, err);
2568                 goto err_close_channels;
2569         }
2570         err = mlx5e_open_tirs(priv);
2571         if (err) {
2572                 if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2573                     __func__, err);
2574                 goto err_close_rqls;
2575         }
2576         err = mlx5e_open_flow_table(priv);
2577         if (err) {
2578                 if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2579                     __func__, err);
2580                 goto err_close_tirs;
2581         }
2582         err = mlx5e_add_all_vlan_rules(priv);
2583         if (err) {
2584                 if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2585                     __func__, err);
2586                 goto err_close_flow_table;
2587         }
2588         set_bit(MLX5E_STATE_OPENED, &priv->state);
2589
2590         mlx5e_update_carrier(priv);
2591         mlx5e_set_rx_mode_core(priv);
2592
2593         return (0);
2594
2595 err_close_flow_table:
2596         mlx5e_close_flow_table(priv);
2597
2598 err_close_tirs:
2599         mlx5e_close_tirs(priv);
2600
2601 err_close_rqls:
2602         mlx5e_close_rqt(priv);
2603
2604 err_close_channels:
2605         mlx5e_close_channels(priv);
2606
2607 err_dalloc_q_counter:
2608         mlx5_vport_dealloc_q_counter(priv->mdev,
2609             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2610
2611 err_close_tises:
2612         mlx5e_close_tises(priv);
2613
2614         return (err);
2615 }
2616
2617 static void
2618 mlx5e_open(void *arg)
2619 {
2620         struct mlx5e_priv *priv = arg;
2621
2622         PRIV_LOCK(priv);
2623         if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2624                 if_printf(priv->ifp,
2625                     "%s: Setting port status to up failed\n",
2626                     __func__);
2627
2628         mlx5e_open_locked(priv->ifp);
2629         priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2630         PRIV_UNLOCK(priv);
2631 }
2632
2633 int
2634 mlx5e_close_locked(struct ifnet *ifp)
2635 {
2636         struct mlx5e_priv *priv = ifp->if_softc;
2637
2638         /* check if already closed */
2639         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2640                 return (0);
2641
2642         clear_bit(MLX5E_STATE_OPENED, &priv->state);
2643
2644         mlx5e_set_rx_mode_core(priv);
2645         mlx5e_del_all_vlan_rules(priv);
2646         if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2647         mlx5e_close_flow_table(priv);
2648         mlx5e_close_tirs(priv);
2649         mlx5e_close_rqt(priv);
2650         mlx5e_close_channels(priv);
2651         mlx5_vport_dealloc_q_counter(priv->mdev,
2652             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2653         mlx5e_close_tises(priv);
2654
2655         return (0);
2656 }
2657
2658 #if (__FreeBSD_version >= 1100000)
2659 static uint64_t
2660 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2661 {
2662         struct mlx5e_priv *priv = ifp->if_softc;
2663         u64 retval;
2664
2665         /* PRIV_LOCK(priv); XXX not allowed */
2666         switch (cnt) {
2667         case IFCOUNTER_IPACKETS:
2668                 retval = priv->stats.vport.rx_packets;
2669                 break;
2670         case IFCOUNTER_IERRORS:
2671                 retval = priv->stats.vport.rx_error_packets +
2672                     priv->stats.pport.alignment_err +
2673                     priv->stats.pport.check_seq_err +
2674                     priv->stats.pport.crc_align_errors +
2675                     priv->stats.pport.in_range_len_errors +
2676                     priv->stats.pport.jabbers +
2677                     priv->stats.pport.out_of_range_len +
2678                     priv->stats.pport.oversize_pkts +
2679                     priv->stats.pport.symbol_err +
2680                     priv->stats.pport.too_long_errors +
2681                     priv->stats.pport.undersize_pkts +
2682                     priv->stats.pport.unsupported_op_rx;
2683                 break;
2684         case IFCOUNTER_IQDROPS:
2685                 retval = priv->stats.vport.rx_out_of_buffer +
2686                     priv->stats.pport.drop_events;
2687                 break;
2688         case IFCOUNTER_OPACKETS:
2689                 retval = priv->stats.vport.tx_packets;
2690                 break;
2691         case IFCOUNTER_OERRORS:
2692                 retval = priv->stats.vport.tx_error_packets;
2693                 break;
2694         case IFCOUNTER_IBYTES:
2695                 retval = priv->stats.vport.rx_bytes;
2696                 break;
2697         case IFCOUNTER_OBYTES:
2698                 retval = priv->stats.vport.tx_bytes;
2699                 break;
2700         case IFCOUNTER_IMCASTS:
2701                 retval = priv->stats.vport.rx_multicast_packets;
2702                 break;
2703         case IFCOUNTER_OMCASTS:
2704                 retval = priv->stats.vport.tx_multicast_packets;
2705                 break;
2706         case IFCOUNTER_OQDROPS:
2707                 retval = priv->stats.vport.tx_queue_dropped;
2708                 break;
2709         case IFCOUNTER_COLLISIONS:
2710                 retval = priv->stats.pport.collisions;
2711                 break;
2712         default:
2713                 retval = if_get_counter_default(ifp, cnt);
2714                 break;
2715         }
2716         /* PRIV_UNLOCK(priv); XXX not allowed */
2717         return (retval);
2718 }
2719 #endif
2720
2721 static void
2722 mlx5e_set_rx_mode(struct ifnet *ifp)
2723 {
2724         struct mlx5e_priv *priv = ifp->if_softc;
2725
2726         queue_work(priv->wq, &priv->set_rx_mode_work);
2727 }
2728
2729 static int
2730 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2731 {
2732         struct mlx5e_priv *priv;
2733         struct ifreq *ifr;
2734         struct ifi2creq i2c;
2735         int error = 0;
2736         int mask = 0;
2737         int size_read = 0;
2738         int module_status;
2739         int module_num;
2740         int max_mtu;
2741         uint8_t read_addr;
2742
2743         priv = ifp->if_softc;
2744
2745         /* check if detaching */
2746         if (priv == NULL || priv->gone != 0)
2747                 return (ENXIO);
2748
2749         switch (command) {
2750         case SIOCSIFMTU:
2751                 ifr = (struct ifreq *)data;
2752
2753                 PRIV_LOCK(priv);
2754                 mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2755
2756                 if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2757                     ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2758                         int was_opened;
2759
2760                         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2761                         if (was_opened)
2762                                 mlx5e_close_locked(ifp);
2763
2764                         /* set new MTU */
2765                         mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2766
2767                         if (was_opened)
2768                                 mlx5e_open_locked(ifp);
2769                 } else {
2770                         error = EINVAL;
2771                         if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2772                             MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2773                 }
2774                 PRIV_UNLOCK(priv);
2775                 break;
2776         case SIOCSIFFLAGS:
2777                 if ((ifp->if_flags & IFF_UP) &&
2778                     (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2779                         mlx5e_set_rx_mode(ifp);
2780                         break;
2781                 }
2782                 PRIV_LOCK(priv);
2783                 if (ifp->if_flags & IFF_UP) {
2784                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2785                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2786                                         mlx5e_open_locked(ifp);
2787                                 ifp->if_drv_flags |= IFF_DRV_RUNNING;
2788                                 mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2789                         }
2790                 } else {
2791                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2792                                 mlx5_set_port_status(priv->mdev,
2793                                     MLX5_PORT_DOWN);
2794                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2795                                         mlx5e_close_locked(ifp);
2796                                 mlx5e_update_carrier(priv);
2797                                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2798                         }
2799                 }
2800                 PRIV_UNLOCK(priv);
2801                 break;
2802         case SIOCADDMULTI:
2803         case SIOCDELMULTI:
2804                 mlx5e_set_rx_mode(ifp);
2805                 break;
2806         case SIOCSIFMEDIA:
2807         case SIOCGIFMEDIA:
2808         case SIOCGIFXMEDIA:
2809                 ifr = (struct ifreq *)data;
2810                 error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2811                 break;
2812         case SIOCSIFCAP:
2813                 ifr = (struct ifreq *)data;
2814                 PRIV_LOCK(priv);
2815                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2816
2817                 if (mask & IFCAP_TXCSUM) {
2818                         ifp->if_capenable ^= IFCAP_TXCSUM;
2819                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2820
2821                         if (IFCAP_TSO4 & ifp->if_capenable &&
2822                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2823                                 ifp->if_capenable &= ~IFCAP_TSO4;
2824                                 ifp->if_hwassist &= ~CSUM_IP_TSO;
2825                                 if_printf(ifp,
2826                                     "tso4 disabled due to -txcsum.\n");
2827                         }
2828                 }
2829                 if (mask & IFCAP_TXCSUM_IPV6) {
2830                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2831                         ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2832
2833                         if (IFCAP_TSO6 & ifp->if_capenable &&
2834                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2835                                 ifp->if_capenable &= ~IFCAP_TSO6;
2836                                 ifp->if_hwassist &= ~CSUM_IP6_TSO;
2837                                 if_printf(ifp,
2838                                     "tso6 disabled due to -txcsum6.\n");
2839                         }
2840                 }
2841                 if (mask & IFCAP_RXCSUM)
2842                         ifp->if_capenable ^= IFCAP_RXCSUM;
2843                 if (mask & IFCAP_RXCSUM_IPV6)
2844                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2845                 if (mask & IFCAP_TSO4) {
2846                         if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2847                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2848                                 if_printf(ifp, "enable txcsum first.\n");
2849                                 error = EAGAIN;
2850                                 goto out;
2851                         }
2852                         ifp->if_capenable ^= IFCAP_TSO4;
2853                         ifp->if_hwassist ^= CSUM_IP_TSO;
2854                 }
2855                 if (mask & IFCAP_TSO6) {
2856                         if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2857                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2858                                 if_printf(ifp, "enable txcsum6 first.\n");
2859                                 error = EAGAIN;
2860                                 goto out;
2861                         }
2862                         ifp->if_capenable ^= IFCAP_TSO6;
2863                         ifp->if_hwassist ^= CSUM_IP6_TSO;
2864                 }
2865                 if (mask & IFCAP_VLAN_HWFILTER) {
2866                         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2867                                 mlx5e_disable_vlan_filter(priv);
2868                         else
2869                                 mlx5e_enable_vlan_filter(priv);
2870
2871                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2872                 }
2873                 if (mask & IFCAP_VLAN_HWTAGGING)
2874                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2875                 if (mask & IFCAP_WOL_MAGIC)
2876                         ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2877
2878                 VLAN_CAPABILITIES(ifp);
2879                 /* turn off LRO means also turn of HW LRO - if it's on */
2880                 if (mask & IFCAP_LRO) {
2881                         int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2882                         bool need_restart = false;
2883
2884                         ifp->if_capenable ^= IFCAP_LRO;
2885
2886                         /* figure out if updating HW LRO is needed */
2887                         if (!(ifp->if_capenable & IFCAP_LRO)) {
2888                                 if (priv->params.hw_lro_en) {
2889                                         priv->params.hw_lro_en = false;
2890                                         need_restart = true;
2891                                 }
2892                         } else {
2893                                 if (priv->params.hw_lro_en == false &&
2894                                     priv->params_ethtool.hw_lro != 0) {
2895                                         priv->params.hw_lro_en = true;
2896                                         need_restart = true;
2897                                 }
2898                         }
2899                         if (was_opened && need_restart) {
2900                                 mlx5e_close_locked(ifp);
2901                                 mlx5e_open_locked(ifp);
2902                         }
2903                 }
2904 out:
2905                 PRIV_UNLOCK(priv);
2906                 break;
2907
2908         case SIOCGI2C:
2909                 ifr = (struct ifreq *)data;
2910
2911                 /*
2912                  * Copy from the user-space address ifr_data to the
2913                  * kernel-space address i2c
2914                  */
2915                 error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
2916                 if (error)
2917                         break;
2918
2919                 if (i2c.len > sizeof(i2c.data)) {
2920                         error = EINVAL;
2921                         break;
2922                 }
2923
2924                 PRIV_LOCK(priv);
2925                 /* Get module_num which is required for the query_eeprom */
2926                 error = mlx5_query_module_num(priv->mdev, &module_num);
2927                 if (error) {
2928                         if_printf(ifp, "Query module num failed, eeprom "
2929                             "reading is not supported\n");
2930                         error = EINVAL;
2931                         goto err_i2c;
2932                 }
2933                 /* Check if module is present before doing an access */
2934                 module_status = mlx5_query_module_status(priv->mdev, module_num);
2935                 if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
2936                     module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
2937                         error = EINVAL;
2938                         goto err_i2c;
2939                 }
2940                 /*
2941                  * Currently 0XA0 and 0xA2 are the only addresses permitted.
2942                  * The internal conversion is as follows:
2943                  */
2944                 if (i2c.dev_addr == 0xA0)
2945                         read_addr = MLX5E_I2C_ADDR_LOW;
2946                 else if (i2c.dev_addr == 0xA2)
2947                         read_addr = MLX5E_I2C_ADDR_HIGH;
2948                 else {
2949                         if_printf(ifp, "Query eeprom failed, "
2950                             "Invalid Address: %X\n", i2c.dev_addr);
2951                         error = EINVAL;
2952                         goto err_i2c;
2953                 }
2954                 error = mlx5_query_eeprom(priv->mdev,
2955                     read_addr, MLX5E_EEPROM_LOW_PAGE,
2956                     (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2957                     (uint32_t *)i2c.data, &size_read);
2958                 if (error) {
2959                         if_printf(ifp, "Query eeprom failed, eeprom "
2960                             "reading is not supported\n");
2961                         error = EINVAL;
2962                         goto err_i2c;
2963                 }
2964
2965                 if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2966                         error = mlx5_query_eeprom(priv->mdev,
2967                             read_addr, MLX5E_EEPROM_LOW_PAGE,
2968                             (uint32_t)(i2c.offset + size_read),
2969                             (uint32_t)(i2c.len - size_read), module_num,
2970                             (uint32_t *)(i2c.data + size_read), &size_read);
2971                 }
2972                 if (error) {
2973                         if_printf(ifp, "Query eeprom failed, eeprom "
2974                             "reading is not supported\n");
2975                         error = EINVAL;
2976                         goto err_i2c;
2977                 }
2978
2979                 error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
2980 err_i2c:
2981                 PRIV_UNLOCK(priv);
2982                 break;
2983
2984         default:
2985                 error = ether_ioctl(ifp, command, data);
2986                 break;
2987         }
2988         return (error);
2989 }
2990
2991 static int
2992 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2993 {
2994         /*
2995          * TODO: uncoment once FW really sets all these bits if
2996          * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2997          * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2998          * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2999          * -ENOTSUPP;
3000          */
3001
3002         /* TODO: add more must-to-have features */
3003
3004         if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
3005                 return (-ENODEV);
3006
3007         return (0);
3008 }
3009
3010 static u16
3011 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
3012 {
3013         uint32_t bf_buf_size = (1U << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2U;
3014
3015         bf_buf_size -= sizeof(struct mlx5e_tx_wqe) - 2;
3016
3017         /* verify against driver hardware limit */
3018         if (bf_buf_size > MLX5E_MAX_TX_INLINE)
3019                 bf_buf_size = MLX5E_MAX_TX_INLINE;
3020
3021         return (bf_buf_size);
3022 }
3023
3024 static int
3025 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
3026     struct mlx5e_priv *priv,
3027     int num_comp_vectors)
3028 {
3029         int err;
3030
3031         /*
3032          * TODO: Consider link speed for setting "log_sq_size",
3033          * "log_rq_size" and "cq_moderation_xxx":
3034          */
3035         priv->params.log_sq_size =
3036             MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
3037         priv->params.log_rq_size =
3038             MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
3039         priv->params.rx_cq_moderation_usec =
3040             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
3041             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
3042             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
3043         priv->params.rx_cq_moderation_mode =
3044             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
3045         priv->params.rx_cq_moderation_pkts =
3046             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
3047         priv->params.tx_cq_moderation_usec =
3048             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
3049         priv->params.tx_cq_moderation_pkts =
3050             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
3051         priv->params.min_rx_wqes =
3052             MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
3053         priv->params.rx_hash_log_tbl_sz =
3054             (order_base_2(num_comp_vectors) >
3055             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
3056             order_base_2(num_comp_vectors) :
3057             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
3058         priv->params.num_tc = 1;
3059         priv->params.default_vlan_prio = 0;
3060         priv->counter_set_id = -1;
3061         priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
3062
3063         err = mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
3064         if (err)
3065                 return (err);
3066
3067         /*
3068          * hw lro is currently defaulted to off. when it won't anymore we
3069          * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
3070          */
3071         priv->params.hw_lro_en = false;
3072         priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
3073
3074         priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
3075
3076         priv->mdev = mdev;
3077         priv->params.num_channels = num_comp_vectors;
3078         priv->params.channels_rsss = 1;
3079         priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
3080         priv->queue_mapping_channel_mask =
3081             roundup_pow_of_two(num_comp_vectors) - 1;
3082         priv->num_tc = priv->params.num_tc;
3083         priv->default_vlan_prio = priv->params.default_vlan_prio;
3084
3085         INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
3086         INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
3087         INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
3088
3089         return (0);
3090 }
3091
3092 static int
3093 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
3094                   struct mlx5_core_mr *mkey)
3095 {
3096         struct ifnet *ifp = priv->ifp;
3097         struct mlx5_core_dev *mdev = priv->mdev;
3098         int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
3099         void *mkc;
3100         u32 *in;
3101         int err;
3102
3103         in = mlx5_vzalloc(inlen);
3104         if (in == NULL) {
3105                 if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
3106                 return (-ENOMEM);
3107         }
3108
3109         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
3110         MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
3111         MLX5_SET(mkc, mkc, lw, 1);
3112         MLX5_SET(mkc, mkc, lr, 1);
3113
3114         MLX5_SET(mkc, mkc, pd, pdn);
3115         MLX5_SET(mkc, mkc, length64, 1);
3116         MLX5_SET(mkc, mkc, qpn, 0xffffff);
3117
3118         err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
3119         if (err)
3120                 if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
3121                     __func__, err);
3122
3123         kvfree(in);
3124         return (err);
3125 }
3126
3127 static const char *mlx5e_vport_stats_desc[] = {
3128         MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
3129 };
3130
3131 static const char *mlx5e_pport_stats_desc[] = {
3132         MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
3133 };
3134
3135 static void
3136 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
3137 {
3138         mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
3139         sx_init(&priv->state_lock, "mlx5state");
3140         callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
3141         MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
3142 }
3143
3144 static void
3145 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
3146 {
3147         mtx_destroy(&priv->async_events_mtx);
3148         sx_destroy(&priv->state_lock);
3149 }
3150
3151 static int
3152 sysctl_firmware(SYSCTL_HANDLER_ARGS)
3153 {
3154         /*
3155          * %d.%d%.d the string format.
3156          * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
3157          * We need at most 5 chars to store that.
3158          * It also has: two "." and NULL at the end, which means we need 18
3159          * (5*3 + 3) chars at most.
3160          */
3161         char fw[18];
3162         struct mlx5e_priv *priv = arg1;
3163         int error;
3164
3165         snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
3166             fw_rev_sub(priv->mdev));
3167         error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
3168         return (error);
3169 }
3170
3171 static void
3172 mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
3173 {
3174         int i;
3175
3176         for (i = 0; i < ch->num_tc; i++)
3177                 mlx5e_drain_sq(&ch->sq[i]);
3178 }
3179
3180 static void
3181 mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
3182 {
3183
3184         sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
3185         sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
3186         mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
3187         sq->doorbell.d64 = 0;
3188 }
3189
3190 void
3191 mlx5e_resume_sq(struct mlx5e_sq *sq)
3192 {
3193         int err;
3194
3195         /* check if already enabled */
3196         if (READ_ONCE(sq->running) != 0)
3197                 return;
3198
3199         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
3200             MLX5_SQC_STATE_RST);
3201         if (err != 0) {
3202                 if_printf(sq->ifp,
3203                     "mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
3204         }
3205
3206         sq->cc = 0;
3207         sq->pc = 0;
3208
3209         /* reset doorbell prior to moving from RST to RDY */
3210         mlx5e_reset_sq_doorbell_record(sq);
3211
3212         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
3213             MLX5_SQC_STATE_RDY);
3214         if (err != 0) {
3215                 if_printf(sq->ifp,
3216                     "mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
3217         }
3218
3219         sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
3220         WRITE_ONCE(sq->running, 1);
3221 }
3222
3223 static void
3224 mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
3225 {
3226         int i;
3227
3228         for (i = 0; i < ch->num_tc; i++)
3229                 mlx5e_resume_sq(&ch->sq[i]);
3230 }
3231
3232 static void
3233 mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
3234 {
3235         struct mlx5e_rq *rq = &ch->rq;
3236         int err;
3237
3238         mtx_lock(&rq->mtx);
3239         rq->enabled = 0;
3240         callout_stop(&rq->watchdog);
3241         mtx_unlock(&rq->mtx);
3242
3243         callout_drain(&rq->watchdog);
3244
3245         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
3246         if (err != 0) {
3247                 if_printf(rq->ifp,
3248                     "mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
3249         }
3250
3251         while (!mlx5_wq_ll_is_empty(&rq->wq)) {
3252                 msleep(1);
3253                 rq->cq.mcq.comp(&rq->cq.mcq);
3254         }
3255
3256         /*
3257          * Transitioning into RST state will allow the FW to track less ERR state queues,
3258          * thus reducing the recv queue flushing time
3259          */
3260         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
3261         if (err != 0) {
3262                 if_printf(rq->ifp,
3263                     "mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
3264         }
3265 }
3266
3267 static void
3268 mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
3269 {
3270         struct mlx5e_rq *rq = &ch->rq;
3271         int err;
3272
3273         rq->wq.wqe_ctr = 0;
3274         mlx5_wq_ll_update_db_record(&rq->wq);
3275         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
3276         if (err != 0) {
3277                 if_printf(rq->ifp,
3278                     "mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
3279         }
3280
3281         rq->enabled = 1;
3282
3283         rq->cq.mcq.comp(&rq->cq.mcq);
3284 }
3285
3286 void
3287 mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
3288 {
3289         int i;
3290
3291         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
3292                 return;
3293
3294         for (i = 0; i < priv->params.num_channels; i++) {
3295                 if (value)
3296                         mlx5e_disable_tx_dma(&priv->channel[i]);
3297                 else
3298                         mlx5e_enable_tx_dma(&priv->channel[i]);
3299         }
3300 }
3301
3302 void
3303 mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
3304 {
3305         int i;
3306
3307         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
3308                 return;
3309
3310         for (i = 0; i < priv->params.num_channels; i++) {
3311                 if (value)
3312                         mlx5e_disable_rx_dma(&priv->channel[i]);
3313                 else
3314                         mlx5e_enable_rx_dma(&priv->channel[i]);
3315         }
3316 }
3317
3318 static void
3319 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
3320 {
3321         SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3322             OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
3323             sysctl_firmware, "A", "HCA firmware version");
3324
3325         SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3326             OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
3327             "Board ID");
3328 }
3329
3330 static int
3331 mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3332 {
3333         struct mlx5e_priv *priv = arg1;
3334         uint8_t temp[MLX5E_MAX_PRIORITY];
3335         uint32_t tx_pfc;
3336         int err;
3337         int i;
3338
3339         PRIV_LOCK(priv);
3340
3341         tx_pfc = priv->params.tx_priority_flow_control;
3342
3343         for (i = 0; i != MLX5E_MAX_PRIORITY; i++)
3344                 temp[i] = (tx_pfc >> i) & 1;
3345
3346         err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY);
3347         if (err || !req->newptr)
3348                 goto done;
3349         err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY);
3350         if (err)
3351                 goto done;
3352
3353         priv->params.tx_priority_flow_control = 0;
3354
3355         /* range check input value */
3356         for (i = 0; i != MLX5E_MAX_PRIORITY; i++) {
3357                 if (temp[i] > 1) {
3358                         err = ERANGE;
3359                         goto done;
3360                 }
3361                 priv->params.tx_priority_flow_control |= (temp[i] << i);
3362         }
3363
3364         /* check if update is required */
3365         if (tx_pfc != priv->params.tx_priority_flow_control)
3366                 err = -mlx5e_set_port_pfc(priv);
3367 done:
3368         if (err != 0)
3369                 priv->params.tx_priority_flow_control= tx_pfc;
3370         PRIV_UNLOCK(priv);
3371
3372         return (err);
3373 }
3374
3375 static int
3376 mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3377 {
3378         struct mlx5e_priv *priv = arg1;
3379         uint8_t temp[MLX5E_MAX_PRIORITY];
3380         uint32_t rx_pfc;
3381         int err;
3382         int i;
3383
3384         PRIV_LOCK(priv);
3385
3386         rx_pfc = priv->params.rx_priority_flow_control;
3387
3388         for (i = 0; i != MLX5E_MAX_PRIORITY; i++)
3389                 temp[i] = (rx_pfc >> i) & 1;
3390
3391         err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY);
3392         if (err || !req->newptr)
3393                 goto done;
3394         err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY);
3395         if (err)
3396                 goto done;
3397
3398         priv->params.rx_priority_flow_control = 0;
3399
3400         /* range check input value */
3401         for (i = 0; i != MLX5E_MAX_PRIORITY; i++) {
3402                 if (temp[i] > 1) {
3403                         err = ERANGE;
3404                         goto done;
3405                 }
3406                 priv->params.rx_priority_flow_control |= (temp[i] << i);
3407         }
3408
3409         /* check if update is required */
3410         if (rx_pfc != priv->params.rx_priority_flow_control)
3411                 err = -mlx5e_set_port_pfc(priv);
3412 done:
3413         if (err != 0)
3414                 priv->params.rx_priority_flow_control= rx_pfc;
3415         PRIV_UNLOCK(priv);
3416
3417         return (err);
3418 }
3419
3420 static void
3421 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
3422 {
3423 #if (__FreeBSD_version < 1100000)
3424         char path[96];
3425 #endif
3426         int error;
3427
3428         /* enable pauseframes by default */
3429         priv->params.tx_pauseframe_control = 1;
3430         priv->params.rx_pauseframe_control = 1;
3431
3432         /* disable ports flow control, PFC, by default */
3433         priv->params.tx_priority_flow_control = 0;
3434         priv->params.rx_priority_flow_control = 0;
3435
3436 #if (__FreeBSD_version < 1100000)
3437         /* compute path for sysctl */
3438         snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
3439             device_get_unit(priv->mdev->pdev->dev.bsddev));
3440
3441         /* try to fetch tunable, if any */
3442         TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
3443
3444         /* compute path for sysctl */
3445         snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
3446             device_get_unit(priv->mdev->pdev->dev.bsddev));
3447
3448         /* try to fetch tunable, if any */
3449         TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
3450 #endif
3451
3452         /* register pauseframe SYSCTLs */
3453         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3454             OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
3455             &priv->params.tx_pauseframe_control, 0,
3456             "Set to enable TX pause frames. Clear to disable.");
3457
3458         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3459             OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
3460             &priv->params.rx_pauseframe_control, 0,
3461             "Set to enable RX pause frames. Clear to disable.");
3462
3463         /* register priority flow control, PFC, SYSCTLs */
3464         SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3465             OID_AUTO, "tx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN |
3466             CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_tx_priority_flow_control, "CU",
3467             "Set to enable TX ports flow control frames for priorities 0..7. Clear to disable.");
3468
3469         SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3470             OID_AUTO, "rx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN |
3471             CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_rx_priority_flow_control, "CU",
3472             "Set to enable RX ports flow control frames for priorities 0..7. Clear to disable.");
3473
3474         PRIV_LOCK(priv);
3475
3476         /* range check */
3477         priv->params.tx_pauseframe_control =
3478             priv->params.tx_pauseframe_control ? 1 : 0;
3479         priv->params.rx_pauseframe_control =
3480             priv->params.rx_pauseframe_control ? 1 : 0;
3481
3482         /* update firmware */
3483         error = mlx5e_set_port_pause_and_pfc(priv);
3484         if (error == -EINVAL) {
3485                 if_printf(priv->ifp,
3486                     "Global pauseframes must be disabled before enabling PFC.\n");
3487                 priv->params.rx_priority_flow_control = 0;
3488                 priv->params.tx_priority_flow_control = 0;
3489
3490                 /* update firmware */
3491                 (void) mlx5e_set_port_pause_and_pfc(priv);
3492         }
3493         PRIV_UNLOCK(priv);
3494 }
3495
3496 static void *
3497 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
3498 {
3499         struct ifnet *ifp;
3500         struct mlx5e_priv *priv;
3501         u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
3502         struct sysctl_oid_list *child;
3503         int ncv = mdev->priv.eq_table.num_comp_vectors;
3504         char unit[16];
3505         int err;
3506         int i;
3507         u32 eth_proto_cap;
3508
3509         if (mlx5e_check_required_hca_cap(mdev)) {
3510                 mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
3511                 return (NULL);
3512         }
3513         /*
3514          * Try to allocate the priv and make room for worst-case
3515          * number of channel structures:
3516          */
3517         priv = malloc(sizeof(*priv) +
3518             (sizeof(priv->channel[0]) * mdev->priv.eq_table.num_comp_vectors),
3519             M_MLX5EN, M_WAITOK | M_ZERO);
3520         mlx5e_priv_mtx_init(priv);
3521
3522         ifp = priv->ifp = if_alloc(IFT_ETHER);
3523         if (ifp == NULL) {
3524                 mlx5_core_err(mdev, "if_alloc() failed\n");
3525                 goto err_free_priv;
3526         }
3527         ifp->if_softc = priv;
3528         if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
3529         ifp->if_mtu = ETHERMTU;
3530         ifp->if_init = mlx5e_open;
3531         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3532         ifp->if_ioctl = mlx5e_ioctl;
3533         ifp->if_transmit = mlx5e_xmit;
3534         ifp->if_qflush = if_qflush;
3535 #if (__FreeBSD_version >= 1100000)
3536         ifp->if_get_counter = mlx5e_get_counter;
3537 #endif
3538         ifp->if_snd.ifq_maxlen = ifqmaxlen;
3539         /*
3540          * Set driver features
3541          */
3542         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
3543         ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
3544         ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
3545         ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
3546         ifp->if_capabilities |= IFCAP_LRO;
3547         ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
3548         ifp->if_capabilities |= IFCAP_HWSTATS;
3549
3550         /* set TSO limits so that we don't have to drop TX packets */
3551         ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3552         ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
3553         ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
3554
3555         ifp->if_capenable = ifp->if_capabilities;
3556         ifp->if_hwassist = 0;
3557         if (ifp->if_capenable & IFCAP_TSO)
3558                 ifp->if_hwassist |= CSUM_TSO;
3559         if (ifp->if_capenable & IFCAP_TXCSUM)
3560                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
3561         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3562                 ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
3563
3564         /* ifnet sysctl tree */
3565         sysctl_ctx_init(&priv->sysctl_ctx);
3566         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
3567             OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
3568         if (priv->sysctl_ifnet == NULL) {
3569                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3570                 goto err_free_sysctl;
3571         }
3572         snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
3573         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3574             OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
3575         if (priv->sysctl_ifnet == NULL) {
3576                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3577                 goto err_free_sysctl;
3578         }
3579
3580         /* HW sysctl tree */
3581         child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
3582         priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
3583             OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
3584         if (priv->sysctl_hw == NULL) {
3585                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3586                 goto err_free_sysctl;
3587         }
3588
3589         err = mlx5e_build_ifp_priv(mdev, priv, ncv);
3590         if (err) {
3591                 mlx5_core_err(mdev, "mlx5e_build_ifp_priv() failed (%d)\n", err);
3592                 goto err_free_sysctl;
3593         }
3594
3595         snprintf(unit, sizeof(unit), "mce%u_wq",
3596             device_get_unit(mdev->pdev->dev.bsddev));
3597         priv->wq = alloc_workqueue(unit, 0, 1);
3598         if (priv->wq == NULL) {
3599                 if_printf(ifp, "%s: alloc_workqueue failed\n", __func__);
3600                 goto err_free_sysctl;
3601         }
3602
3603         err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
3604         if (err) {
3605                 if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
3606                     __func__, err);
3607                 goto err_free_wq;
3608         }
3609         err = mlx5_core_alloc_pd(mdev, &priv->pdn);
3610         if (err) {
3611                 if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
3612                     __func__, err);
3613                 goto err_unmap_free_uar;
3614         }
3615         err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
3616         if (err) {
3617                 if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
3618                     __func__, err);
3619                 goto err_dealloc_pd;
3620         }
3621         err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
3622         if (err) {
3623                 if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
3624                     __func__, err);
3625                 goto err_dealloc_transport_domain;
3626         }
3627         mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
3628
3629         /* check if we should generate a random MAC address */
3630         if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3631             is_zero_ether_addr(dev_addr)) {
3632                 random_ether_addr(dev_addr);
3633                 if_printf(ifp, "Assigned random MAC address\n");
3634         }
3635
3636         /* set default MTU */
3637         mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3638
3639         /* Set default media status */
3640         priv->media_status_last = IFM_AVALID;
3641         priv->media_active_last = IFM_ETHER | IFM_AUTO |
3642             IFM_ETH_RXPAUSE | IFM_FDX;
3643
3644         /* setup default pauseframes configuration */
3645         mlx5e_setup_pauseframes(priv);
3646
3647         err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3648         if (err) {
3649                 eth_proto_cap = 0;
3650                 if_printf(ifp, "%s: Query port media capability failed, %d\n",
3651                     __func__, err);
3652         }
3653
3654         /* Setup supported medias */
3655         ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3656             mlx5e_media_change, mlx5e_media_status);
3657
3658         for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3659                 if (mlx5e_mode_table[i].baudrate == 0)
3660                         continue;
3661                 if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3662                         ifmedia_add(&priv->media,
3663                             mlx5e_mode_table[i].subtype |
3664                             IFM_ETHER, 0, NULL);
3665                         ifmedia_add(&priv->media,
3666                             mlx5e_mode_table[i].subtype |
3667                             IFM_ETHER | IFM_FDX |
3668                             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3669                 }
3670         }
3671
3672         /* Additional supported medias */
3673         ifmedia_add(&priv->media, IFM_10G_LR | IFM_ETHER, 0, NULL);
3674         ifmedia_add(&priv->media, IFM_10G_LR |
3675             IFM_ETHER | IFM_FDX |
3676             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3677
3678         ifmedia_add(&priv->media, IFM_40G_ER4 | IFM_ETHER, 0, NULL);
3679         ifmedia_add(&priv->media, IFM_40G_ER4 |
3680             IFM_ETHER | IFM_FDX |
3681             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3682
3683         ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3684         ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3685             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3686
3687         /* Set autoselect by default */
3688         ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3689             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3690         ether_ifattach(ifp, dev_addr);
3691
3692         /* Register for VLAN events */
3693         priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3694             mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3695         priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3696             mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3697
3698         /* Link is down by default */
3699         if_link_state_change(ifp, LINK_STATE_DOWN);
3700
3701         mlx5e_enable_async_events(priv);
3702
3703         mlx5e_add_hw_stats(priv);
3704
3705         mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3706             "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3707             priv->stats.vport.arg);
3708
3709         mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3710             "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3711             priv->stats.pport.arg);
3712
3713         mlx5e_create_ethtool(priv);
3714
3715         mtx_lock(&priv->async_events_mtx);
3716         mlx5e_update_stats(priv);
3717         mtx_unlock(&priv->async_events_mtx);
3718
3719         return (priv);
3720
3721 err_dealloc_transport_domain:
3722         mlx5_dealloc_transport_domain(mdev, priv->tdn);
3723
3724 err_dealloc_pd:
3725         mlx5_core_dealloc_pd(mdev, priv->pdn);
3726
3727 err_unmap_free_uar:
3728         mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3729
3730 err_free_wq:
3731         destroy_workqueue(priv->wq);
3732
3733 err_free_sysctl:
3734         sysctl_ctx_free(&priv->sysctl_ctx);
3735         if (priv->sysctl_debug)
3736                 sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3737         if_free(ifp);
3738
3739 err_free_priv:
3740         mlx5e_priv_mtx_destroy(priv);
3741         free(priv, M_MLX5EN);
3742         return (NULL);
3743 }
3744
3745 static void
3746 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3747 {
3748         struct mlx5e_priv *priv = vpriv;
3749         struct ifnet *ifp = priv->ifp;
3750
3751         /* don't allow more IOCTLs */
3752         priv->gone = 1;
3753
3754         /* XXX wait a bit to allow IOCTL handlers to complete */
3755         pause("W", hz);
3756
3757         /* stop watchdog timer */
3758         callout_drain(&priv->watchdog);
3759
3760         if (priv->vlan_attach != NULL)
3761                 EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3762         if (priv->vlan_detach != NULL)
3763                 EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3764
3765         /* make sure device gets closed */
3766         PRIV_LOCK(priv);
3767         mlx5e_close_locked(ifp);
3768         PRIV_UNLOCK(priv);
3769
3770         /* unregister device */
3771         ifmedia_removeall(&priv->media);
3772         ether_ifdetach(ifp);
3773         if_free(ifp);
3774
3775         /* destroy all remaining sysctl nodes */
3776         sysctl_ctx_free(&priv->stats.vport.ctx);
3777         sysctl_ctx_free(&priv->stats.pport.ctx);
3778         if (priv->sysctl_debug)
3779                 sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3780         sysctl_ctx_free(&priv->sysctl_ctx);
3781
3782         mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3783         mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3784         mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3785         mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3786         mlx5e_disable_async_events(priv);
3787         destroy_workqueue(priv->wq);
3788         mlx5e_priv_mtx_destroy(priv);
3789         free(priv, M_MLX5EN);
3790 }
3791
3792 static void *
3793 mlx5e_get_ifp(void *vpriv)
3794 {
3795         struct mlx5e_priv *priv = vpriv;
3796
3797         return (priv->ifp);
3798 }
3799
3800 static struct mlx5_interface mlx5e_interface = {
3801         .add = mlx5e_create_ifp,
3802         .remove = mlx5e_destroy_ifp,
3803         .event = mlx5e_async_event,
3804         .protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3805         .get_dev = mlx5e_get_ifp,
3806 };
3807
3808 void
3809 mlx5e_init(void)
3810 {
3811         mlx5_register_interface(&mlx5e_interface);
3812 }
3813
3814 void
3815 mlx5e_cleanup(void)
3816 {
3817         mlx5_unregister_interface(&mlx5e_interface);
3818 }
3819
3820 static void
3821 mlx5e_show_version(void __unused *arg)
3822 {
3823
3824         printf("%s", mlx5e_version);
3825 }
3826 SYSINIT(mlx5e_show_version, SI_SUB_DRIVERS, SI_ORDER_ANY, mlx5e_show_version, NULL);
3827
3828 module_init_order(mlx5e_init, SI_ORDER_THIRD);
3829 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3830
3831 #if (__FreeBSD_version >= 1100000)
3832 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3833 #endif
3834 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3835 MODULE_VERSION(mlx5en, 1);