]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx5/mlx5_en/mlx5_en_main.c
MFC r341582:
[FreeBSD/FreeBSD.git] / sys / dev / mlx5 / mlx5_en / mlx5_en_main.c
1 /*-
2  * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27
28 #include "en.h"
29
30 #include <sys/sockio.h>
31 #include <machine/atomic.h>
32
33 #ifndef ETH_DRIVER_VERSION
34 #define ETH_DRIVER_VERSION      "3.4.2"
35 #endif
36
37 static const char mlx5e_version[] = "mlx5en: Mellanox Ethernet driver "
38         ETH_DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
39
40 static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs);
41
42 struct mlx5e_channel_param {
43         struct mlx5e_rq_param rq;
44         struct mlx5e_sq_param sq;
45         struct mlx5e_cq_param rx_cq;
46         struct mlx5e_cq_param tx_cq;
47 };
48
49 static const struct {
50         u32     subtype;
51         u64     baudrate;
52 }       mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
53
54         [MLX5E_1000BASE_CX_SGMII] = {
55                 .subtype = IFM_1000_CX_SGMII,
56                 .baudrate = IF_Mbps(1000ULL),
57         },
58         [MLX5E_1000BASE_KX] = {
59                 .subtype = IFM_1000_KX,
60                 .baudrate = IF_Mbps(1000ULL),
61         },
62         [MLX5E_10GBASE_CX4] = {
63                 .subtype = IFM_10G_CX4,
64                 .baudrate = IF_Gbps(10ULL),
65         },
66         [MLX5E_10GBASE_KX4] = {
67                 .subtype = IFM_10G_KX4,
68                 .baudrate = IF_Gbps(10ULL),
69         },
70         [MLX5E_10GBASE_KR] = {
71                 .subtype = IFM_10G_KR,
72                 .baudrate = IF_Gbps(10ULL),
73         },
74         [MLX5E_20GBASE_KR2] = {
75                 .subtype = IFM_20G_KR2,
76                 .baudrate = IF_Gbps(20ULL),
77         },
78         [MLX5E_40GBASE_CR4] = {
79                 .subtype = IFM_40G_CR4,
80                 .baudrate = IF_Gbps(40ULL),
81         },
82         [MLX5E_40GBASE_KR4] = {
83                 .subtype = IFM_40G_KR4,
84                 .baudrate = IF_Gbps(40ULL),
85         },
86         [MLX5E_56GBASE_R4] = {
87                 .subtype = IFM_56G_R4,
88                 .baudrate = IF_Gbps(56ULL),
89         },
90         [MLX5E_10GBASE_CR] = {
91                 .subtype = IFM_10G_CR1,
92                 .baudrate = IF_Gbps(10ULL),
93         },
94         [MLX5E_10GBASE_SR] = {
95                 .subtype = IFM_10G_SR,
96                 .baudrate = IF_Gbps(10ULL),
97         },
98         [MLX5E_10GBASE_ER] = {
99                 .subtype = IFM_10G_ER,
100                 .baudrate = IF_Gbps(10ULL),
101         },
102         [MLX5E_40GBASE_SR4] = {
103                 .subtype = IFM_40G_SR4,
104                 .baudrate = IF_Gbps(40ULL),
105         },
106         [MLX5E_40GBASE_LR4] = {
107                 .subtype = IFM_40G_LR4,
108                 .baudrate = IF_Gbps(40ULL),
109         },
110         [MLX5E_100GBASE_CR4] = {
111                 .subtype = IFM_100G_CR4,
112                 .baudrate = IF_Gbps(100ULL),
113         },
114         [MLX5E_100GBASE_SR4] = {
115                 .subtype = IFM_100G_SR4,
116                 .baudrate = IF_Gbps(100ULL),
117         },
118         [MLX5E_100GBASE_KR4] = {
119                 .subtype = IFM_100G_KR4,
120                 .baudrate = IF_Gbps(100ULL),
121         },
122         [MLX5E_100GBASE_LR4] = {
123                 .subtype = IFM_100G_LR4,
124                 .baudrate = IF_Gbps(100ULL),
125         },
126         [MLX5E_100BASE_TX] = {
127                 .subtype = IFM_100_TX,
128                 .baudrate = IF_Mbps(100ULL),
129         },
130         [MLX5E_1000BASE_T] = {
131                 .subtype = IFM_1000_T,
132                 .baudrate = IF_Mbps(1000ULL),
133         },
134         [MLX5E_10GBASE_T] = {
135                 .subtype = IFM_10G_T,
136                 .baudrate = IF_Gbps(10ULL),
137         },
138         [MLX5E_25GBASE_CR] = {
139                 .subtype = IFM_25G_CR,
140                 .baudrate = IF_Gbps(25ULL),
141         },
142         [MLX5E_25GBASE_KR] = {
143                 .subtype = IFM_25G_KR,
144                 .baudrate = IF_Gbps(25ULL),
145         },
146         [MLX5E_25GBASE_SR] = {
147                 .subtype = IFM_25G_SR,
148                 .baudrate = IF_Gbps(25ULL),
149         },
150         [MLX5E_50GBASE_CR2] = {
151                 .subtype = IFM_50G_CR2,
152                 .baudrate = IF_Gbps(50ULL),
153         },
154         [MLX5E_50GBASE_KR2] = {
155                 .subtype = IFM_50G_KR2,
156                 .baudrate = IF_Gbps(50ULL),
157         },
158 };
159
160 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
161
162 static void
163 mlx5e_update_carrier(struct mlx5e_priv *priv)
164 {
165         struct mlx5_core_dev *mdev = priv->mdev;
166         u32 out[MLX5_ST_SZ_DW(ptys_reg)];
167         u32 eth_proto_oper;
168         int error;
169         u8 port_state;
170         u8 is_er_type;
171         u8 i;
172
173         port_state = mlx5_query_vport_state(mdev,
174             MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
175
176         if (port_state == VPORT_STATE_UP) {
177                 priv->media_status_last |= IFM_ACTIVE;
178         } else {
179                 priv->media_status_last &= ~IFM_ACTIVE;
180                 priv->media_active_last = IFM_ETHER;
181                 if_link_state_change(priv->ifp, LINK_STATE_DOWN);
182                 return;
183         }
184
185         error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
186         if (error) {
187                 priv->media_active_last = IFM_ETHER;
188                 priv->ifp->if_baudrate = 1;
189                 if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
190                     __func__, error);
191                 return;
192         }
193         eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
194
195         for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
196                 if (mlx5e_mode_table[i].baudrate == 0)
197                         continue;
198                 if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
199                         u32 subtype = mlx5e_mode_table[i].subtype;
200
201                         priv->ifp->if_baudrate =
202                             mlx5e_mode_table[i].baudrate;
203
204                         switch (subtype) {
205                         case IFM_10G_ER:
206                                 error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
207                                 if (error != 0) {
208                                         if_printf(priv->ifp, "%s: query port pddr failed: %d\n",
209                                             __func__, error);
210                                 }
211                                 if (error != 0 || is_er_type == 0)
212                                         subtype = IFM_10G_LR;
213                                 break;
214                         case IFM_40G_LR4:
215                                 error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
216                                 if (error != 0) {
217                                         if_printf(priv->ifp, "%s: query port pddr failed: %d\n",
218                                             __func__, error);
219                                 }
220                                 if (error == 0 && is_er_type != 0)
221                                         subtype = IFM_40G_ER4;
222                                 break;
223                         }
224                         priv->media_active_last = subtype | IFM_ETHER | IFM_FDX;
225                         break;
226                 }
227         }
228         if_link_state_change(priv->ifp, LINK_STATE_UP);
229 }
230
231 static void
232 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
233 {
234         struct mlx5e_priv *priv = dev->if_softc;
235
236         ifmr->ifm_status = priv->media_status_last;
237         ifmr->ifm_active = priv->media_active_last |
238             (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
239             (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
240
241 }
242
243 static u32
244 mlx5e_find_link_mode(u32 subtype)
245 {
246         u32 i;
247         u32 link_mode = 0;
248
249         switch (subtype) {
250         case IFM_10G_LR:
251                 subtype = IFM_10G_ER;
252                 break;
253         case IFM_40G_ER4:
254                 subtype = IFM_40G_LR4;
255                 break;
256         }
257
258         for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
259                 if (mlx5e_mode_table[i].baudrate == 0)
260                         continue;
261                 if (mlx5e_mode_table[i].subtype == subtype)
262                         link_mode |= MLX5E_PROT_MASK(i);
263         }
264
265         return (link_mode);
266 }
267
268 static int
269 mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
270 {
271         return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
272             priv->params.rx_pauseframe_control,
273             priv->params.tx_pauseframe_control,
274             priv->params.rx_priority_flow_control,
275             priv->params.tx_priority_flow_control));
276 }
277
278 static int
279 mlx5e_set_port_pfc(struct mlx5e_priv *priv)
280 {
281         int error;
282
283         if (priv->params.rx_pauseframe_control ||
284             priv->params.tx_pauseframe_control) {
285                 if_printf(priv->ifp,
286                     "Global pauseframes must be disabled before enabling PFC.\n");
287                 error = -EINVAL;
288         } else {
289                 error = mlx5e_set_port_pause_and_pfc(priv);
290         }
291         return (error);
292 }
293
294 static int
295 mlx5e_media_change(struct ifnet *dev)
296 {
297         struct mlx5e_priv *priv = dev->if_softc;
298         struct mlx5_core_dev *mdev = priv->mdev;
299         u32 eth_proto_cap;
300         u32 link_mode;
301         int was_opened;
302         int locked;
303         int error;
304
305         locked = PRIV_LOCKED(priv);
306         if (!locked)
307                 PRIV_LOCK(priv);
308
309         if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
310                 error = EINVAL;
311                 goto done;
312         }
313         link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
314
315         /* query supported capabilities */
316         error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
317         if (error != 0) {
318                 if_printf(dev, "Query port media capability failed\n");
319                 goto done;
320         }
321         /* check for autoselect */
322         if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
323                 link_mode = eth_proto_cap;
324                 if (link_mode == 0) {
325                         if_printf(dev, "Port media capability is zero\n");
326                         error = EINVAL;
327                         goto done;
328                 }
329         } else {
330                 link_mode = link_mode & eth_proto_cap;
331                 if (link_mode == 0) {
332                         if_printf(dev, "Not supported link mode requested\n");
333                         error = EINVAL;
334                         goto done;
335                 }
336         }
337         if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
338                 /* check if PFC is enabled */
339                 if (priv->params.rx_priority_flow_control ||
340                     priv->params.tx_priority_flow_control) {
341                         if_printf(dev, "PFC must be disabled before enabling global pauseframes.\n");
342                         error = EINVAL;
343                         goto done;
344                 }
345         }
346         /* update pauseframe control bits */
347         priv->params.rx_pauseframe_control =
348             (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
349         priv->params.tx_pauseframe_control =
350             (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
351
352         /* check if device is opened */
353         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
354
355         /* reconfigure the hardware */
356         mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
357         mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
358         error = -mlx5e_set_port_pause_and_pfc(priv);
359         if (was_opened)
360                 mlx5_set_port_status(mdev, MLX5_PORT_UP);
361
362 done:
363         if (!locked)
364                 PRIV_UNLOCK(priv);
365         return (error);
366 }
367
368 static void
369 mlx5e_update_carrier_work(struct work_struct *work)
370 {
371         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
372             update_carrier_work);
373
374         PRIV_LOCK(priv);
375         if (test_bit(MLX5E_STATE_OPENED, &priv->state))
376                 mlx5e_update_carrier(priv);
377         PRIV_UNLOCK(priv);
378 }
379
380 /*
381  * This function reads the physical port counters from the firmware
382  * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
383  * macros. The output is converted from big-endian 64-bit values into
384  * host endian ones and stored in the "priv->stats.pport" structure.
385  */
386 static void
387 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
388 {
389         struct mlx5_core_dev *mdev = priv->mdev;
390         struct mlx5e_pport_stats *s = &priv->stats.pport;
391         struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
392         u32 *in;
393         u32 *out;
394         const u64 *ptr;
395         unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
396         unsigned x;
397         unsigned y;
398         unsigned z;
399
400         /* allocate firmware request structures */
401         in = mlx5_vzalloc(sz);
402         out = mlx5_vzalloc(sz);
403         if (in == NULL || out == NULL)
404                 goto free_out;
405
406         /*
407          * Get pointer to the 64-bit counter set which is located at a
408          * fixed offset in the output firmware request structure:
409          */
410         ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
411
412         MLX5_SET(ppcnt_reg, in, local_port, 1);
413
414         /* read IEEE802_3 counter group using predefined counter layout */
415         MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
416         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
417         for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
418              x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
419                 s->arg[y] = be64toh(ptr[x]);
420
421         /* read RFC2819 counter group using predefined counter layout */
422         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
423         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
424         for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
425                 s->arg[y] = be64toh(ptr[x]);
426         for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
427             MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
428                 s_debug->arg[y] = be64toh(ptr[x]);
429
430         /* read RFC2863 counter group using predefined counter layout */
431         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
432         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
433         for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
434                 s_debug->arg[y] = be64toh(ptr[x]);
435
436         /* read physical layer stats counter group using predefined counter layout */
437         MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
438         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
439         for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
440                 s_debug->arg[y] = be64toh(ptr[x]);
441
442         /* read per-priority counters */
443         MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
444
445         /* iterate all the priorities */
446         for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
447                 MLX5_SET(ppcnt_reg, in, prio_tc, z);
448                 mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
449
450                 /* read per priority stats counter group using predefined counter layout */
451                 for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
452                     MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
453                         s->arg[y] = be64toh(ptr[x]);
454         }
455 free_out:
456         /* free firmware request structures */
457         kvfree(in);
458         kvfree(out);
459 }
460
461 /*
462  * This function is called regularly to collect all statistics
463  * counters from the firmware. The values can be viewed through the
464  * sysctl interface. Execution is serialized using the priv's global
465  * configuration lock.
466  */
467 static void
468 mlx5e_update_stats_work(struct work_struct *work)
469 {
470         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
471             update_stats_work);
472         struct mlx5_core_dev *mdev = priv->mdev;
473         struct mlx5e_vport_stats *s = &priv->stats.vport;
474         struct mlx5e_rq_stats *rq_stats;
475         struct mlx5e_sq_stats *sq_stats;
476         struct buf_ring *sq_br;
477 #if (__FreeBSD_version < 1100000)
478         struct ifnet *ifp = priv->ifp;
479 #endif
480
481         u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
482         u32 *out;
483         int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
484         u64 tso_packets = 0;
485         u64 tso_bytes = 0;
486         u64 tx_queue_dropped = 0;
487         u64 tx_defragged = 0;
488         u64 tx_offload_none = 0;
489         u64 lro_packets = 0;
490         u64 lro_bytes = 0;
491         u64 sw_lro_queued = 0;
492         u64 sw_lro_flushed = 0;
493         u64 rx_csum_none = 0;
494         u64 rx_wqe_err = 0;
495         u32 rx_out_of_buffer = 0;
496         int i;
497         int j;
498
499         PRIV_LOCK(priv);
500         out = mlx5_vzalloc(outlen);
501         if (out == NULL)
502                 goto free_out;
503         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
504                 goto free_out;
505
506         /* Collect firts the SW counters and then HW for consistency */
507         for (i = 0; i < priv->params.num_channels; i++) {
508                 struct mlx5e_rq *rq = &priv->channel[i]->rq;
509
510                 rq_stats = &priv->channel[i]->rq.stats;
511
512                 /* collect stats from LRO */
513                 rq_stats->sw_lro_queued = rq->lro.lro_queued;
514                 rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
515                 sw_lro_queued += rq_stats->sw_lro_queued;
516                 sw_lro_flushed += rq_stats->sw_lro_flushed;
517                 lro_packets += rq_stats->lro_packets;
518                 lro_bytes += rq_stats->lro_bytes;
519                 rx_csum_none += rq_stats->csum_none;
520                 rx_wqe_err += rq_stats->wqe_err;
521
522                 for (j = 0; j < priv->num_tc; j++) {
523                         sq_stats = &priv->channel[i]->sq[j].stats;
524                         sq_br = priv->channel[i]->sq[j].br;
525
526                         tso_packets += sq_stats->tso_packets;
527                         tso_bytes += sq_stats->tso_bytes;
528                         tx_queue_dropped += sq_stats->dropped;
529                         if (sq_br != NULL)
530                                 tx_queue_dropped += sq_br->br_drops;
531                         tx_defragged += sq_stats->defragged;
532                         tx_offload_none += sq_stats->csum_offload_none;
533                 }
534         }
535
536         s->tx_jumbo_packets =
537             priv->stats.port_stats_debug.p1519to2047octets +
538             priv->stats.port_stats_debug.p2048to4095octets +
539             priv->stats.port_stats_debug.p4096to8191octets +
540             priv->stats.port_stats_debug.p8192to10239octets;
541
542         /* update counters */
543         s->tso_packets = tso_packets;
544         s->tso_bytes = tso_bytes;
545         s->tx_queue_dropped = tx_queue_dropped;
546         s->tx_defragged = tx_defragged;
547         s->lro_packets = lro_packets;
548         s->lro_bytes = lro_bytes;
549         s->sw_lro_queued = sw_lro_queued;
550         s->sw_lro_flushed = sw_lro_flushed;
551         s->rx_csum_none = rx_csum_none;
552         s->rx_wqe_err = rx_wqe_err;
553
554         /* HW counters */
555         memset(in, 0, sizeof(in));
556
557         MLX5_SET(query_vport_counter_in, in, opcode,
558             MLX5_CMD_OP_QUERY_VPORT_COUNTER);
559         MLX5_SET(query_vport_counter_in, in, op_mod, 0);
560         MLX5_SET(query_vport_counter_in, in, other_vport, 0);
561
562         memset(out, 0, outlen);
563
564         /* get number of out-of-buffer drops first */
565         if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
566             &rx_out_of_buffer))
567                 goto free_out;
568
569         /* accumulate difference into a 64-bit counter */
570         s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
571         s->rx_out_of_buffer_prev = rx_out_of_buffer;
572
573         /* get port statistics */
574         if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
575                 goto free_out;
576
577 #define MLX5_GET_CTR(out, x) \
578         MLX5_GET64(query_vport_counter_out, out, x)
579
580         s->rx_error_packets =
581             MLX5_GET_CTR(out, received_errors.packets);
582         s->rx_error_bytes =
583             MLX5_GET_CTR(out, received_errors.octets);
584         s->tx_error_packets =
585             MLX5_GET_CTR(out, transmit_errors.packets);
586         s->tx_error_bytes =
587             MLX5_GET_CTR(out, transmit_errors.octets);
588
589         s->rx_unicast_packets =
590             MLX5_GET_CTR(out, received_eth_unicast.packets);
591         s->rx_unicast_bytes =
592             MLX5_GET_CTR(out, received_eth_unicast.octets);
593         s->tx_unicast_packets =
594             MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
595         s->tx_unicast_bytes =
596             MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
597
598         s->rx_multicast_packets =
599             MLX5_GET_CTR(out, received_eth_multicast.packets);
600         s->rx_multicast_bytes =
601             MLX5_GET_CTR(out, received_eth_multicast.octets);
602         s->tx_multicast_packets =
603             MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
604         s->tx_multicast_bytes =
605             MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
606
607         s->rx_broadcast_packets =
608             MLX5_GET_CTR(out, received_eth_broadcast.packets);
609         s->rx_broadcast_bytes =
610             MLX5_GET_CTR(out, received_eth_broadcast.octets);
611         s->tx_broadcast_packets =
612             MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
613         s->tx_broadcast_bytes =
614             MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
615
616         s->rx_packets =
617             s->rx_unicast_packets +
618             s->rx_multicast_packets +
619             s->rx_broadcast_packets -
620             s->rx_out_of_buffer;
621         s->rx_bytes =
622             s->rx_unicast_bytes +
623             s->rx_multicast_bytes +
624             s->rx_broadcast_bytes;
625         s->tx_packets =
626             s->tx_unicast_packets +
627             s->tx_multicast_packets +
628             s->tx_broadcast_packets;
629         s->tx_bytes =
630             s->tx_unicast_bytes +
631             s->tx_multicast_bytes +
632             s->tx_broadcast_bytes;
633
634         /* Update calculated offload counters */
635         s->tx_csum_offload = s->tx_packets - tx_offload_none;
636         s->rx_csum_good = s->rx_packets - s->rx_csum_none;
637
638         /* Get physical port counters */
639         mlx5e_update_pport_counters(priv);
640
641 #if (__FreeBSD_version < 1100000)
642         /* no get_counters interface in fbsd 10 */
643         ifp->if_ipackets = s->rx_packets;
644         ifp->if_ierrors = s->rx_error_packets +
645             priv->stats.pport.alignment_err +
646             priv->stats.pport.check_seq_err +
647             priv->stats.pport.crc_align_errors +
648             priv->stats.pport.in_range_len_errors +
649             priv->stats.pport.jabbers +
650             priv->stats.pport.out_of_range_len +
651             priv->stats.pport.oversize_pkts +
652             priv->stats.pport.symbol_err +
653             priv->stats.pport.too_long_errors +
654             priv->stats.pport.undersize_pkts +
655             priv->stats.pport.unsupported_op_rx;
656         ifp->if_iqdrops = s->rx_out_of_buffer +
657             priv->stats.pport.drop_events;
658         ifp->if_opackets = s->tx_packets;
659         ifp->if_oerrors = s->tx_error_packets;
660         ifp->if_snd.ifq_drops = s->tx_queue_dropped;
661         ifp->if_ibytes = s->rx_bytes;
662         ifp->if_obytes = s->tx_bytes;
663         ifp->if_collisions =
664             priv->stats.pport.collisions;
665 #endif
666
667 free_out:
668         kvfree(out);
669
670         /* Update diagnostics, if any */
671         if (priv->params_ethtool.diag_pci_enable ||
672             priv->params_ethtool.diag_general_enable) {
673                 int error = mlx5_core_get_diagnostics_full(mdev,
674                     priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
675                     priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
676                 if (error != 0)
677                         if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error);
678         }
679         PRIV_UNLOCK(priv);
680 }
681
682 static void
683 mlx5e_update_stats(void *arg)
684 {
685         struct mlx5e_priv *priv = arg;
686
687         queue_work(priv->wq, &priv->update_stats_work);
688
689         callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
690 }
691
692 static void
693 mlx5e_async_event_sub(struct mlx5e_priv *priv,
694     enum mlx5_dev_event event)
695 {
696         switch (event) {
697         case MLX5_DEV_EVENT_PORT_UP:
698         case MLX5_DEV_EVENT_PORT_DOWN:
699                 queue_work(priv->wq, &priv->update_carrier_work);
700                 break;
701
702         default:
703                 break;
704         }
705 }
706
707 static void
708 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
709     enum mlx5_dev_event event, unsigned long param)
710 {
711         struct mlx5e_priv *priv = vpriv;
712
713         mtx_lock(&priv->async_events_mtx);
714         if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
715                 mlx5e_async_event_sub(priv, event);
716         mtx_unlock(&priv->async_events_mtx);
717 }
718
719 static void
720 mlx5e_enable_async_events(struct mlx5e_priv *priv)
721 {
722         set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
723 }
724
725 static void
726 mlx5e_disable_async_events(struct mlx5e_priv *priv)
727 {
728         mtx_lock(&priv->async_events_mtx);
729         clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
730         mtx_unlock(&priv->async_events_mtx);
731 }
732
733 static void mlx5e_calibration_callout(void *arg);
734 static int mlx5e_calibration_duration = 20;
735 static int mlx5e_fast_calibration = 1;
736 static int mlx5e_normal_calibration = 30;
737
738 static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW, 0,
739     "MLX5 timestamp calibration parameteres");
740
741 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN,
742     &mlx5e_calibration_duration, 0,
743     "Duration of initial calibration");
744 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN,
745     &mlx5e_fast_calibration, 0,
746     "Recalibration interval during initial calibration");
747 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN,
748     &mlx5e_normal_calibration, 0,
749     "Recalibration interval during normal operations");
750
751 /*
752  * Ignites the calibration process.
753  */
754 static void
755 mlx5e_reset_calibration_callout(struct mlx5e_priv *priv)
756 {
757
758         if (priv->clbr_done == 0)
759                 mlx5e_calibration_callout(priv);
760         else
761                 callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done <
762                     mlx5e_calibration_duration ? mlx5e_fast_calibration :
763                     mlx5e_normal_calibration) * hz, mlx5e_calibration_callout,
764                     priv);
765 }
766
767 static uint64_t
768 mlx5e_timespec2usec(const struct timespec *ts)
769 {
770
771         return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec);
772 }
773
774 static uint64_t
775 mlx5e_hw_clock(struct mlx5e_priv *priv)
776 {
777         struct mlx5_init_seg *iseg;
778         uint32_t hw_h, hw_h1, hw_l;
779
780         iseg = priv->mdev->iseg;
781         do {
782                 hw_h = ioread32be(&iseg->internal_timer_h);
783                 hw_l = ioread32be(&iseg->internal_timer_l);
784                 hw_h1 = ioread32be(&iseg->internal_timer_h);
785         } while (hw_h1 != hw_h);
786         return (((uint64_t)hw_h << 32) | hw_l);
787 }
788
789 /*
790  * The calibration callout, it runs either in the context of the
791  * thread which enables calibration, or in callout.  It takes the
792  * snapshot of system and adapter clocks, then advances the pointers to
793  * the calibration point to allow rx path to read the consistent data
794  * lockless.
795  */
796 static void
797 mlx5e_calibration_callout(void *arg)
798 {
799         struct mlx5e_priv *priv;
800         struct mlx5e_clbr_point *next, *curr;
801         struct timespec ts;
802         int clbr_curr_next;
803
804         priv = arg;
805         curr = &priv->clbr_points[priv->clbr_curr];
806         clbr_curr_next = priv->clbr_curr + 1;
807         if (clbr_curr_next >= nitems(priv->clbr_points))
808                 clbr_curr_next = 0;
809         next = &priv->clbr_points[clbr_curr_next];
810
811         next->base_prev = curr->base_curr;
812         next->clbr_hw_prev = curr->clbr_hw_curr;
813
814         next->clbr_hw_curr = mlx5e_hw_clock(priv);
815         if (((next->clbr_hw_curr - curr->clbr_hw_prev) >> MLX5E_TSTMP_PREC) ==
816             0) {
817                 if_printf(priv->ifp, "HW failed tstmp frozen %#jx %#jx,"
818                     "disabling\n", next->clbr_hw_curr, curr->clbr_hw_prev);
819                 priv->clbr_done = 0;
820                 return;
821         }
822
823         nanouptime(&ts);
824         next->base_curr = mlx5e_timespec2usec(&ts);
825
826         curr->clbr_gen = 0;
827         atomic_thread_fence_rel();
828         priv->clbr_curr = clbr_curr_next;
829         atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen));
830
831         if (priv->clbr_done < mlx5e_calibration_duration)
832                 priv->clbr_done++;
833         mlx5e_reset_calibration_callout(priv);
834 }
835
836 static const char *mlx5e_rq_stats_desc[] = {
837         MLX5E_RQ_STATS(MLX5E_STATS_DESC)
838 };
839
840 static int
841 mlx5e_create_rq(struct mlx5e_channel *c,
842     struct mlx5e_rq_param *param,
843     struct mlx5e_rq *rq)
844 {
845         struct mlx5e_priv *priv = c->priv;
846         struct mlx5_core_dev *mdev = priv->mdev;
847         char buffer[16];
848         void *rqc = param->rqc;
849         void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
850         int wq_sz;
851         int err;
852         int i;
853         u32 nsegs, wqe_sz;
854
855         err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
856         if (err != 0)
857                 goto done;
858
859         /* Create DMA descriptor TAG */
860         if ((err = -bus_dma_tag_create(
861             bus_get_dma_tag(mdev->pdev->dev.bsddev),
862             1,                          /* any alignment */
863             0,                          /* no boundary */
864             BUS_SPACE_MAXADDR,          /* lowaddr */
865             BUS_SPACE_MAXADDR,          /* highaddr */
866             NULL, NULL,                 /* filter, filterarg */
867             nsegs * MLX5E_MAX_RX_BYTES, /* maxsize */
868             nsegs,                      /* nsegments */
869             nsegs * MLX5E_MAX_RX_BYTES, /* maxsegsize */
870             0,                          /* flags */
871             NULL, NULL,                 /* lockfunc, lockfuncarg */
872             &rq->dma_tag)))
873                 goto done;
874
875         err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
876             &rq->wq_ctrl);
877         if (err)
878                 goto err_free_dma_tag;
879
880         rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
881
882         err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs);
883         if (err != 0)
884                 goto err_rq_wq_destroy;
885
886         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
887
888         err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz);
889         if (err)
890                 goto err_rq_wq_destroy;
891
892         rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
893         for (i = 0; i != wq_sz; i++) {
894                 struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
895 #if (MLX5E_MAX_RX_SEGS == 1)
896                 uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
897 #else
898                 int j;
899 #endif
900
901                 err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
902                 if (err != 0) {
903                         while (i--)
904                                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
905                         goto err_rq_mbuf_free;
906                 }
907
908                 /* set value for constant fields */
909 #if (MLX5E_MAX_RX_SEGS == 1)
910                 wqe->data[0].lkey = c->mkey_be;
911                 wqe->data[0].byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
912 #else
913                 for (j = 0; j < rq->nsegs; j++)
914                         wqe->data[j].lkey = c->mkey_be;
915 #endif
916         }
917
918         rq->ifp = c->ifp;
919         rq->channel = c;
920         rq->ix = c->ix;
921
922         snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
923         mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
924             buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
925             rq->stats.arg);
926         return (0);
927
928 err_rq_mbuf_free:
929         free(rq->mbuf, M_MLX5EN);
930         tcp_lro_free(&rq->lro);
931 err_rq_wq_destroy:
932         mlx5_wq_destroy(&rq->wq_ctrl);
933 err_free_dma_tag:
934         bus_dma_tag_destroy(rq->dma_tag);
935 done:
936         return (err);
937 }
938
939 static void
940 mlx5e_destroy_rq(struct mlx5e_rq *rq)
941 {
942         int wq_sz;
943         int i;
944
945         /* destroy all sysctl nodes */
946         sysctl_ctx_free(&rq->stats.ctx);
947
948         /* free leftover LRO packets, if any */
949         tcp_lro_free(&rq->lro);
950
951         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
952         for (i = 0; i != wq_sz; i++) {
953                 if (rq->mbuf[i].mbuf != NULL) {
954                         bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
955                         m_freem(rq->mbuf[i].mbuf);
956                 }
957                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
958         }
959         free(rq->mbuf, M_MLX5EN);
960         mlx5_wq_destroy(&rq->wq_ctrl);
961 }
962
963 static int
964 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
965 {
966         struct mlx5e_channel *c = rq->channel;
967         struct mlx5e_priv *priv = c->priv;
968         struct mlx5_core_dev *mdev = priv->mdev;
969
970         void *in;
971         void *rqc;
972         void *wq;
973         int inlen;
974         int err;
975
976         inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
977             sizeof(u64) * rq->wq_ctrl.buf.npages;
978         in = mlx5_vzalloc(inlen);
979         if (in == NULL)
980                 return (-ENOMEM);
981
982         rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
983         wq = MLX5_ADDR_OF(rqc, rqc, wq);
984
985         memcpy(rqc, param->rqc, sizeof(param->rqc));
986
987         MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
988         MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
989         MLX5_SET(rqc, rqc, flush_in_error_en, 1);
990         if (priv->counter_set_id >= 0)
991                 MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
992         MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
993             PAGE_SHIFT);
994         MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
995
996         mlx5_fill_page_array(&rq->wq_ctrl.buf,
997             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
998
999         err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
1000
1001         kvfree(in);
1002
1003         return (err);
1004 }
1005
1006 static int
1007 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
1008 {
1009         struct mlx5e_channel *c = rq->channel;
1010         struct mlx5e_priv *priv = c->priv;
1011         struct mlx5_core_dev *mdev = priv->mdev;
1012
1013         void *in;
1014         void *rqc;
1015         int inlen;
1016         int err;
1017
1018         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
1019         in = mlx5_vzalloc(inlen);
1020         if (in == NULL)
1021                 return (-ENOMEM);
1022
1023         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
1024
1025         MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
1026         MLX5_SET(modify_rq_in, in, rq_state, curr_state);
1027         MLX5_SET(rqc, rqc, state, next_state);
1028
1029         err = mlx5_core_modify_rq(mdev, in, inlen);
1030
1031         kvfree(in);
1032
1033         return (err);
1034 }
1035
1036 static void
1037 mlx5e_disable_rq(struct mlx5e_rq *rq)
1038 {
1039         struct mlx5e_channel *c = rq->channel;
1040         struct mlx5e_priv *priv = c->priv;
1041         struct mlx5_core_dev *mdev = priv->mdev;
1042
1043         mlx5_core_destroy_rq(mdev, rq->rqn);
1044 }
1045
1046 static int
1047 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
1048 {
1049         struct mlx5e_channel *c = rq->channel;
1050         struct mlx5e_priv *priv = c->priv;
1051         struct mlx5_wq_ll *wq = &rq->wq;
1052         int i;
1053
1054         for (i = 0; i < 1000; i++) {
1055                 if (wq->cur_sz >= priv->params.min_rx_wqes)
1056                         return (0);
1057
1058                 msleep(4);
1059         }
1060         return (-ETIMEDOUT);
1061 }
1062
1063 static int
1064 mlx5e_open_rq(struct mlx5e_channel *c,
1065     struct mlx5e_rq_param *param,
1066     struct mlx5e_rq *rq)
1067 {
1068         int err;
1069
1070         err = mlx5e_create_rq(c, param, rq);
1071         if (err)
1072                 return (err);
1073
1074         err = mlx5e_enable_rq(rq, param);
1075         if (err)
1076                 goto err_destroy_rq;
1077
1078         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
1079         if (err)
1080                 goto err_disable_rq;
1081
1082         c->rq.enabled = 1;
1083
1084         return (0);
1085
1086 err_disable_rq:
1087         mlx5e_disable_rq(rq);
1088 err_destroy_rq:
1089         mlx5e_destroy_rq(rq);
1090
1091         return (err);
1092 }
1093
1094 static void
1095 mlx5e_close_rq(struct mlx5e_rq *rq)
1096 {
1097         mtx_lock(&rq->mtx);
1098         rq->enabled = 0;
1099         callout_stop(&rq->watchdog);
1100         mtx_unlock(&rq->mtx);
1101
1102         callout_drain(&rq->watchdog);
1103
1104         mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
1105 }
1106
1107 static void
1108 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
1109 {
1110         struct mlx5_core_dev *mdev = rq->channel->priv->mdev;
1111
1112         /* wait till RQ is empty */
1113         while (!mlx5_wq_ll_is_empty(&rq->wq) &&
1114                (mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
1115                 msleep(4);
1116                 rq->cq.mcq.comp(&rq->cq.mcq);
1117         }
1118
1119         mlx5e_disable_rq(rq);
1120         mlx5e_destroy_rq(rq);
1121 }
1122
1123 void
1124 mlx5e_free_sq_db(struct mlx5e_sq *sq)
1125 {
1126         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1127         int x;
1128
1129         for (x = 0; x != wq_sz; x++)
1130                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1131         free(sq->mbuf, M_MLX5EN);
1132 }
1133
1134 int
1135 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
1136 {
1137         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1138         int err;
1139         int x;
1140
1141         sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
1142
1143         /* Create DMA descriptor MAPs */
1144         for (x = 0; x != wq_sz; x++) {
1145                 err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
1146                 if (err != 0) {
1147                         while (x--)
1148                                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1149                         free(sq->mbuf, M_MLX5EN);
1150                         return (err);
1151                 }
1152         }
1153         return (0);
1154 }
1155
1156 static const char *mlx5e_sq_stats_desc[] = {
1157         MLX5E_SQ_STATS(MLX5E_STATS_DESC)
1158 };
1159
1160 void
1161 mlx5e_update_sq_inline(struct mlx5e_sq *sq)
1162 {
1163         sq->max_inline = sq->priv->params.tx_max_inline;
1164         sq->min_inline_mode = sq->priv->params.tx_min_inline_mode;
1165
1166         /*
1167          * Check if trust state is DSCP or if inline mode is NONE which
1168          * indicates CX-5 or newer hardware.
1169          */
1170         if (sq->priv->params_ethtool.trust_state != MLX5_QPTS_TRUST_PCP ||
1171             sq->min_inline_mode == MLX5_INLINE_MODE_NONE) {
1172                 if (MLX5_CAP_ETH(sq->priv->mdev, wqe_vlan_insert))
1173                         sq->min_insert_caps = MLX5E_INSERT_VLAN | MLX5E_INSERT_NON_VLAN;
1174                 else
1175                         sq->min_insert_caps = MLX5E_INSERT_NON_VLAN;
1176         } else {
1177                 sq->min_insert_caps = 0;
1178         }
1179 }
1180
1181 static void
1182 mlx5e_refresh_sq_inline_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
1183 {
1184         int i;
1185
1186         for (i = 0; i != c->num_tc; i++) {
1187                 mtx_lock(&c->sq[i].lock);
1188                 mlx5e_update_sq_inline(&c->sq[i]);
1189                 mtx_unlock(&c->sq[i].lock);
1190         }
1191 }
1192
1193 void
1194 mlx5e_refresh_sq_inline(struct mlx5e_priv *priv)
1195 {
1196         int i;
1197
1198         /* check if channels are closed */
1199         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
1200                 return;
1201
1202         for (i = 0; i < priv->params.num_channels; i++)
1203                 mlx5e_refresh_sq_inline_sub(priv, priv->channel[i]);
1204 }
1205
1206 static int
1207 mlx5e_create_sq(struct mlx5e_channel *c,
1208     int tc,
1209     struct mlx5e_sq_param *param,
1210     struct mlx5e_sq *sq)
1211 {
1212         struct mlx5e_priv *priv = c->priv;
1213         struct mlx5_core_dev *mdev = priv->mdev;
1214         char buffer[16];
1215         void *sqc = param->sqc;
1216         void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
1217         int err;
1218
1219         /* Create DMA descriptor TAG */
1220         if ((err = -bus_dma_tag_create(
1221             bus_get_dma_tag(mdev->pdev->dev.bsddev),
1222             1,                          /* any alignment */
1223             0,                          /* no boundary */
1224             BUS_SPACE_MAXADDR,          /* lowaddr */
1225             BUS_SPACE_MAXADDR,          /* highaddr */
1226             NULL, NULL,                 /* filter, filterarg */
1227             MLX5E_MAX_TX_PAYLOAD_SIZE,  /* maxsize */
1228             MLX5E_MAX_TX_MBUF_FRAGS,    /* nsegments */
1229             MLX5E_MAX_TX_MBUF_SIZE,     /* maxsegsize */
1230             0,                          /* flags */
1231             NULL, NULL,                 /* lockfunc, lockfuncarg */
1232             &sq->dma_tag)))
1233                 goto done;
1234
1235         err = mlx5_alloc_map_uar(mdev, &sq->uar);
1236         if (err)
1237                 goto err_free_dma_tag;
1238
1239         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
1240             &sq->wq_ctrl);
1241         if (err)
1242                 goto err_unmap_free_uar;
1243
1244         sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1245         sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
1246
1247         err = mlx5e_alloc_sq_db(sq);
1248         if (err)
1249                 goto err_sq_wq_destroy;
1250
1251         sq->mkey_be = c->mkey_be;
1252         sq->ifp = priv->ifp;
1253         sq->priv = priv;
1254         sq->tc = tc;
1255
1256         mlx5e_update_sq_inline(sq);
1257
1258         snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1259         mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1260             buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1261             sq->stats.arg);
1262
1263         return (0);
1264
1265 err_sq_wq_destroy:
1266         mlx5_wq_destroy(&sq->wq_ctrl);
1267
1268 err_unmap_free_uar:
1269         mlx5_unmap_free_uar(mdev, &sq->uar);
1270
1271 err_free_dma_tag:
1272         bus_dma_tag_destroy(sq->dma_tag);
1273 done:
1274         return (err);
1275 }
1276
1277 static void
1278 mlx5e_destroy_sq(struct mlx5e_sq *sq)
1279 {
1280         /* destroy all sysctl nodes */
1281         sysctl_ctx_free(&sq->stats.ctx);
1282
1283         mlx5e_free_sq_db(sq);
1284         mlx5_wq_destroy(&sq->wq_ctrl);
1285         mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
1286 }
1287
1288 int
1289 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
1290     int tis_num)
1291 {
1292         void *in;
1293         void *sqc;
1294         void *wq;
1295         int inlen;
1296         int err;
1297
1298         inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1299             sizeof(u64) * sq->wq_ctrl.buf.npages;
1300         in = mlx5_vzalloc(inlen);
1301         if (in == NULL)
1302                 return (-ENOMEM);
1303
1304         sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1305         wq = MLX5_ADDR_OF(sqc, sqc, wq);
1306
1307         memcpy(sqc, param->sqc, sizeof(param->sqc));
1308
1309         MLX5_SET(sqc, sqc, tis_num_0, tis_num);
1310         MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
1311         MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1312         MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1313         MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1314
1315         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1316         MLX5_SET(wq, wq, uar_page, sq->uar.index);
1317         MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1318             PAGE_SHIFT);
1319         MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1320
1321         mlx5_fill_page_array(&sq->wq_ctrl.buf,
1322             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1323
1324         err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
1325
1326         kvfree(in);
1327
1328         return (err);
1329 }
1330
1331 int
1332 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1333 {
1334         void *in;
1335         void *sqc;
1336         int inlen;
1337         int err;
1338
1339         inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1340         in = mlx5_vzalloc(inlen);
1341         if (in == NULL)
1342                 return (-ENOMEM);
1343
1344         sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1345
1346         MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1347         MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1348         MLX5_SET(sqc, sqc, state, next_state);
1349
1350         err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
1351
1352         kvfree(in);
1353
1354         return (err);
1355 }
1356
1357 void
1358 mlx5e_disable_sq(struct mlx5e_sq *sq)
1359 {
1360
1361         mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
1362 }
1363
1364 static int
1365 mlx5e_open_sq(struct mlx5e_channel *c,
1366     int tc,
1367     struct mlx5e_sq_param *param,
1368     struct mlx5e_sq *sq)
1369 {
1370         int err;
1371
1372         err = mlx5e_create_sq(c, tc, param, sq);
1373         if (err)
1374                 return (err);
1375
1376         err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
1377         if (err)
1378                 goto err_destroy_sq;
1379
1380         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1381         if (err)
1382                 goto err_disable_sq;
1383
1384         return (0);
1385
1386 err_disable_sq:
1387         mlx5e_disable_sq(sq);
1388 err_destroy_sq:
1389         mlx5e_destroy_sq(sq);
1390
1391         return (err);
1392 }
1393
1394 static void
1395 mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1396 {
1397         /* fill up remainder with NOPs */
1398         while (sq->cev_counter != 0) {
1399                 while (!mlx5e_sq_has_room_for(sq, 1)) {
1400                         if (can_sleep != 0) {
1401                                 mtx_unlock(&sq->lock);
1402                                 msleep(4);
1403                                 mtx_lock(&sq->lock);
1404                         } else {
1405                                 goto done;
1406                         }
1407                 }
1408                 /* send a single NOP */
1409                 mlx5e_send_nop(sq, 1);
1410                 atomic_thread_fence_rel();
1411         }
1412 done:
1413         /* Check if we need to write the doorbell */
1414         if (likely(sq->doorbell.d64 != 0)) {
1415                 mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1416                 sq->doorbell.d64 = 0;
1417         }
1418 }
1419
1420 void
1421 mlx5e_sq_cev_timeout(void *arg)
1422 {
1423         struct mlx5e_sq *sq = arg;
1424
1425         mtx_assert(&sq->lock, MA_OWNED);
1426
1427         /* check next state */
1428         switch (sq->cev_next_state) {
1429         case MLX5E_CEV_STATE_SEND_NOPS:
1430                 /* fill TX ring with NOPs, if any */
1431                 mlx5e_sq_send_nops_locked(sq, 0);
1432
1433                 /* check if completed */
1434                 if (sq->cev_counter == 0) {
1435                         sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1436                         return;
1437                 }
1438                 break;
1439         default:
1440                 /* send NOPs on next timeout */
1441                 sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1442                 break;
1443         }
1444
1445         /* restart timer */
1446         callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1447 }
1448
1449 void
1450 mlx5e_drain_sq(struct mlx5e_sq *sq)
1451 {
1452         int error;
1453         struct mlx5_core_dev *mdev= sq->priv->mdev;
1454
1455         /*
1456          * Check if already stopped.
1457          *
1458          * NOTE: The "stopped" variable is only written when both the
1459          * priv's configuration lock and the SQ's lock is locked. It
1460          * can therefore safely be read when only one of the two locks
1461          * is locked. This function is always called when the priv's
1462          * configuration lock is locked.
1463          */
1464         if (sq->stopped != 0)
1465                 return;
1466
1467         mtx_lock(&sq->lock);
1468
1469         /* don't put more packets into the SQ */
1470         sq->stopped = 1;
1471
1472         /* teardown event factor timer, if any */
1473         sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1474         callout_stop(&sq->cev_callout);
1475
1476         /* send dummy NOPs in order to flush the transmit ring */
1477         mlx5e_sq_send_nops_locked(sq, 1);
1478         mtx_unlock(&sq->lock);
1479
1480         /* make sure it is safe to free the callout */
1481         callout_drain(&sq->cev_callout);
1482
1483         /* wait till SQ is empty or link is down */
1484         mtx_lock(&sq->lock);
1485         while (sq->cc != sq->pc &&
1486             (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
1487             mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1488                 mtx_unlock(&sq->lock);
1489                 msleep(1);
1490                 sq->cq.mcq.comp(&sq->cq.mcq);
1491                 mtx_lock(&sq->lock);
1492         }
1493         mtx_unlock(&sq->lock);
1494
1495         /* error out remaining requests */
1496         error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1497         if (error != 0) {
1498                 if_printf(sq->ifp,
1499                     "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
1500         }
1501
1502         /* wait till SQ is empty */
1503         mtx_lock(&sq->lock);
1504         while (sq->cc != sq->pc &&
1505                mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1506                 mtx_unlock(&sq->lock);
1507                 msleep(1);
1508                 sq->cq.mcq.comp(&sq->cq.mcq);
1509                 mtx_lock(&sq->lock);
1510         }
1511         mtx_unlock(&sq->lock);
1512 }
1513
1514 static void
1515 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1516 {
1517
1518         mlx5e_drain_sq(sq);
1519         mlx5e_disable_sq(sq);
1520         mlx5e_destroy_sq(sq);
1521 }
1522
1523 static int
1524 mlx5e_create_cq(struct mlx5e_priv *priv,
1525     struct mlx5e_cq_param *param,
1526     struct mlx5e_cq *cq,
1527     mlx5e_cq_comp_t *comp,
1528     int eq_ix)
1529 {
1530         struct mlx5_core_dev *mdev = priv->mdev;
1531         struct mlx5_core_cq *mcq = &cq->mcq;
1532         int eqn_not_used;
1533         int irqn;
1534         int err;
1535         u32 i;
1536
1537         param->wq.buf_numa_node = 0;
1538         param->wq.db_numa_node = 0;
1539
1540         err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1541             &cq->wq_ctrl);
1542         if (err)
1543                 return (err);
1544
1545         mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
1546
1547         mcq->cqe_sz = 64;
1548         mcq->set_ci_db = cq->wq_ctrl.db.db;
1549         mcq->arm_db = cq->wq_ctrl.db.db + 1;
1550         *mcq->set_ci_db = 0;
1551         *mcq->arm_db = 0;
1552         mcq->vector = eq_ix;
1553         mcq->comp = comp;
1554         mcq->event = mlx5e_cq_error_event;
1555         mcq->irqn = irqn;
1556         mcq->uar = &priv->cq_uar;
1557
1558         for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1559                 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1560
1561                 cqe->op_own = 0xf1;
1562         }
1563
1564         cq->priv = priv;
1565
1566         return (0);
1567 }
1568
1569 static void
1570 mlx5e_destroy_cq(struct mlx5e_cq *cq)
1571 {
1572         mlx5_wq_destroy(&cq->wq_ctrl);
1573 }
1574
1575 static int
1576 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
1577 {
1578         struct mlx5_core_cq *mcq = &cq->mcq;
1579         void *in;
1580         void *cqc;
1581         int inlen;
1582         int irqn_not_used;
1583         int eqn;
1584         int err;
1585
1586         inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1587             sizeof(u64) * cq->wq_ctrl.buf.npages;
1588         in = mlx5_vzalloc(inlen);
1589         if (in == NULL)
1590                 return (-ENOMEM);
1591
1592         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1593
1594         memcpy(cqc, param->cqc, sizeof(param->cqc));
1595
1596         mlx5_fill_page_array(&cq->wq_ctrl.buf,
1597             (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1598
1599         mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
1600
1601         MLX5_SET(cqc, cqc, c_eqn, eqn);
1602         MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1603         MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1604             PAGE_SHIFT);
1605         MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1606
1607         err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
1608
1609         kvfree(in);
1610
1611         if (err)
1612                 return (err);
1613
1614         mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
1615
1616         return (0);
1617 }
1618
1619 static void
1620 mlx5e_disable_cq(struct mlx5e_cq *cq)
1621 {
1622
1623         mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
1624 }
1625
1626 int
1627 mlx5e_open_cq(struct mlx5e_priv *priv,
1628     struct mlx5e_cq_param *param,
1629     struct mlx5e_cq *cq,
1630     mlx5e_cq_comp_t *comp,
1631     int eq_ix)
1632 {
1633         int err;
1634
1635         err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
1636         if (err)
1637                 return (err);
1638
1639         err = mlx5e_enable_cq(cq, param, eq_ix);
1640         if (err)
1641                 goto err_destroy_cq;
1642
1643         return (0);
1644
1645 err_destroy_cq:
1646         mlx5e_destroy_cq(cq);
1647
1648         return (err);
1649 }
1650
1651 void
1652 mlx5e_close_cq(struct mlx5e_cq *cq)
1653 {
1654         mlx5e_disable_cq(cq);
1655         mlx5e_destroy_cq(cq);
1656 }
1657
1658 static int
1659 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1660     struct mlx5e_channel_param *cparam)
1661 {
1662         int err;
1663         int tc;
1664
1665         for (tc = 0; tc < c->num_tc; tc++) {
1666                 /* open completion queue */
1667                 err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
1668                     &mlx5e_tx_cq_comp, c->ix);
1669                 if (err)
1670                         goto err_close_tx_cqs;
1671         }
1672         return (0);
1673
1674 err_close_tx_cqs:
1675         for (tc--; tc >= 0; tc--)
1676                 mlx5e_close_cq(&c->sq[tc].cq);
1677
1678         return (err);
1679 }
1680
1681 static void
1682 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1683 {
1684         int tc;
1685
1686         for (tc = 0; tc < c->num_tc; tc++)
1687                 mlx5e_close_cq(&c->sq[tc].cq);
1688 }
1689
1690 static int
1691 mlx5e_open_sqs(struct mlx5e_channel *c,
1692     struct mlx5e_channel_param *cparam)
1693 {
1694         int err;
1695         int tc;
1696
1697         for (tc = 0; tc < c->num_tc; tc++) {
1698                 err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1699                 if (err)
1700                         goto err_close_sqs;
1701         }
1702
1703         return (0);
1704
1705 err_close_sqs:
1706         for (tc--; tc >= 0; tc--)
1707                 mlx5e_close_sq_wait(&c->sq[tc]);
1708
1709         return (err);
1710 }
1711
1712 static void
1713 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1714 {
1715         int tc;
1716
1717         for (tc = 0; tc < c->num_tc; tc++)
1718                 mlx5e_close_sq_wait(&c->sq[tc]);
1719 }
1720
1721 static void
1722 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1723 {
1724         int tc;
1725
1726         mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1727
1728         callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
1729
1730         for (tc = 0; tc < c->num_tc; tc++) {
1731                 struct mlx5e_sq *sq = c->sq + tc;
1732
1733                 mtx_init(&sq->lock, "mlx5tx",
1734                     MTX_NETWORK_LOCK " TX", MTX_DEF);
1735                 mtx_init(&sq->comp_lock, "mlx5comp",
1736                     MTX_NETWORK_LOCK " TX", MTX_DEF);
1737
1738                 callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1739
1740                 sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1741
1742                 /* ensure the TX completion event factor is not zero */
1743                 if (sq->cev_factor == 0)
1744                         sq->cev_factor = 1;
1745         }
1746 }
1747
1748 static void
1749 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1750 {
1751         int tc;
1752
1753         mtx_destroy(&c->rq.mtx);
1754
1755         for (tc = 0; tc < c->num_tc; tc++) {
1756                 mtx_destroy(&c->sq[tc].lock);
1757                 mtx_destroy(&c->sq[tc].comp_lock);
1758         }
1759 }
1760
1761 static int
1762 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1763     struct mlx5e_channel_param *cparam,
1764     struct mlx5e_channel *volatile *cp)
1765 {
1766         struct mlx5e_channel *c;
1767         int err;
1768
1769         c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1770         c->priv = priv;
1771         c->ix = ix;
1772         c->cpu = 0;
1773         c->ifp = priv->ifp;
1774         c->mkey_be = cpu_to_be32(priv->mr.key);
1775         c->num_tc = priv->num_tc;
1776
1777         /* init mutexes */
1778         mlx5e_chan_mtx_init(c);
1779
1780         /* open transmit completion queue */
1781         err = mlx5e_open_tx_cqs(c, cparam);
1782         if (err)
1783                 goto err_free;
1784
1785         /* open receive completion queue */
1786         err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
1787             &mlx5e_rx_cq_comp, c->ix);
1788         if (err)
1789                 goto err_close_tx_cqs;
1790
1791         err = mlx5e_open_sqs(c, cparam);
1792         if (err)
1793                 goto err_close_rx_cq;
1794
1795         err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1796         if (err)
1797                 goto err_close_sqs;
1798
1799         /* store channel pointer */
1800         *cp = c;
1801
1802         /* poll receive queue initially */
1803         c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1804
1805         return (0);
1806
1807 err_close_sqs:
1808         mlx5e_close_sqs_wait(c);
1809
1810 err_close_rx_cq:
1811         mlx5e_close_cq(&c->rq.cq);
1812
1813 err_close_tx_cqs:
1814         mlx5e_close_tx_cqs(c);
1815
1816 err_free:
1817         /* destroy mutexes */
1818         mlx5e_chan_mtx_destroy(c);
1819         free(c, M_MLX5EN);
1820         return (err);
1821 }
1822
1823 static void
1824 mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1825 {
1826         struct mlx5e_channel *c = *pp;
1827
1828         /* check if channel is already closed */
1829         if (c == NULL)
1830                 return;
1831         mlx5e_close_rq(&c->rq);
1832 }
1833
1834 static void
1835 mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1836 {
1837         struct mlx5e_channel *c = *pp;
1838
1839         /* check if channel is already closed */
1840         if (c == NULL)
1841                 return;
1842         /* ensure channel pointer is no longer used */
1843         *pp = NULL;
1844
1845         mlx5e_close_rq_wait(&c->rq);
1846         mlx5e_close_sqs_wait(c);
1847         mlx5e_close_cq(&c->rq.cq);
1848         mlx5e_close_tx_cqs(c);
1849         /* destroy mutexes */
1850         mlx5e_chan_mtx_destroy(c);
1851         free(c, M_MLX5EN);
1852 }
1853
1854 static int
1855 mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs)
1856 {
1857         u32 r, n;
1858
1859         r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz :
1860             MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
1861         if (r > MJUM16BYTES)
1862                 return (-ENOMEM);
1863
1864         if (r > MJUM9BYTES)
1865                 r = MJUM16BYTES;
1866         else if (r > MJUMPAGESIZE)
1867                 r = MJUM9BYTES;
1868         else if (r > MCLBYTES)
1869                 r = MJUMPAGESIZE;
1870         else
1871                 r = MCLBYTES;
1872
1873         /*
1874          * n + 1 must be a power of two, because stride size must be.
1875          * Stride size is 16 * (n + 1), as the first segment is
1876          * control.
1877          */
1878         for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++)
1879                 ;
1880
1881         *wqe_sz = r;
1882         *nsegs = n;
1883         return (0);
1884 }
1885
1886 static void
1887 mlx5e_build_rq_param(struct mlx5e_priv *priv,
1888     struct mlx5e_rq_param *param)
1889 {
1890         void *rqc = param->rqc;
1891         void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1892         u32 wqe_sz, nsegs;
1893
1894         mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
1895         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1896         MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1897         MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) +
1898             nsegs * sizeof(struct mlx5_wqe_data_seg)));
1899         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1900         MLX5_SET(wq, wq, pd, priv->pdn);
1901
1902         param->wq.buf_numa_node = 0;
1903         param->wq.db_numa_node = 0;
1904         param->wq.linear = 1;
1905 }
1906
1907 static void
1908 mlx5e_build_sq_param(struct mlx5e_priv *priv,
1909     struct mlx5e_sq_param *param)
1910 {
1911         void *sqc = param->sqc;
1912         void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1913
1914         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1915         MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1916         MLX5_SET(wq, wq, pd, priv->pdn);
1917
1918         param->wq.buf_numa_node = 0;
1919         param->wq.db_numa_node = 0;
1920         param->wq.linear = 1;
1921 }
1922
1923 static void
1924 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1925     struct mlx5e_cq_param *param)
1926 {
1927         void *cqc = param->cqc;
1928
1929         MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1930 }
1931
1932 static void
1933 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1934     struct mlx5e_cq_param *param)
1935 {
1936         void *cqc = param->cqc;
1937
1938
1939         /*
1940          * TODO The sysctl to control on/off is a bool value for now, which means
1941          * we only support CSUM, once HASH is implemnted we'll need to address that.
1942          */
1943         if (priv->params.cqe_zipping_en) {
1944                 MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1945                 MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1946         }
1947
1948         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1949         MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1950         MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1951
1952         switch (priv->params.rx_cq_moderation_mode) {
1953         case 0:
1954                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1955                 break;
1956         default:
1957                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1958                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1959                 else
1960                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1961                 break;
1962         }
1963
1964         mlx5e_build_common_cq_param(priv, param);
1965 }
1966
1967 static void
1968 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1969     struct mlx5e_cq_param *param)
1970 {
1971         void *cqc = param->cqc;
1972
1973         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1974         MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1975         MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1976
1977         switch (priv->params.tx_cq_moderation_mode) {
1978         case 0:
1979                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1980                 break;
1981         default:
1982                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1983                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1984                 else
1985                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1986                 break;
1987         }
1988
1989         mlx5e_build_common_cq_param(priv, param);
1990 }
1991
1992 static void
1993 mlx5e_build_channel_param(struct mlx5e_priv *priv,
1994     struct mlx5e_channel_param *cparam)
1995 {
1996         memset(cparam, 0, sizeof(*cparam));
1997
1998         mlx5e_build_rq_param(priv, &cparam->rq);
1999         mlx5e_build_sq_param(priv, &cparam->sq);
2000         mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
2001         mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
2002 }
2003
2004 static int
2005 mlx5e_open_channels(struct mlx5e_priv *priv)
2006 {
2007         struct mlx5e_channel_param cparam;
2008         void *ptr;
2009         int err;
2010         int i;
2011         int j;
2012
2013         priv->channel = malloc(priv->params.num_channels *
2014             sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
2015
2016         mlx5e_build_channel_param(priv, &cparam);
2017         for (i = 0; i < priv->params.num_channels; i++) {
2018                 err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
2019                 if (err)
2020                         goto err_close_channels;
2021         }
2022
2023         for (j = 0; j < priv->params.num_channels; j++) {
2024                 err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
2025                 if (err)
2026                         goto err_close_channels;
2027         }
2028
2029         return (0);
2030
2031 err_close_channels:
2032         for (i--; i >= 0; i--) {
2033                 mlx5e_close_channel(&priv->channel[i]);
2034                 mlx5e_close_channel_wait(&priv->channel[i]);
2035         }
2036
2037         /* remove "volatile" attribute from "channel" pointer */
2038         ptr = __DECONST(void *, priv->channel);
2039         priv->channel = NULL;
2040
2041         free(ptr, M_MLX5EN);
2042
2043         return (err);
2044 }
2045
2046 static void
2047 mlx5e_close_channels(struct mlx5e_priv *priv)
2048 {
2049         void *ptr;
2050         int i;
2051
2052         if (priv->channel == NULL)
2053                 return;
2054
2055         for (i = 0; i < priv->params.num_channels; i++)
2056                 mlx5e_close_channel(&priv->channel[i]);
2057         for (i = 0; i < priv->params.num_channels; i++)
2058                 mlx5e_close_channel_wait(&priv->channel[i]);
2059
2060         /* remove "volatile" attribute from "channel" pointer */
2061         ptr = __DECONST(void *, priv->channel);
2062         priv->channel = NULL;
2063
2064         free(ptr, M_MLX5EN);
2065 }
2066
2067 static int
2068 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
2069 {
2070
2071         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
2072                 uint8_t cq_mode;
2073
2074                 switch (priv->params.tx_cq_moderation_mode) {
2075                 case 0:
2076                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2077                         break;
2078                 default:
2079                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2080                         break;
2081                 }
2082
2083                 return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
2084                     priv->params.tx_cq_moderation_usec,
2085                     priv->params.tx_cq_moderation_pkts,
2086                     cq_mode));
2087         }
2088
2089         return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
2090             priv->params.tx_cq_moderation_usec,
2091             priv->params.tx_cq_moderation_pkts));
2092 }
2093
2094 static int
2095 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
2096 {
2097
2098         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
2099                 uint8_t cq_mode;
2100                 int retval;
2101
2102                 switch (priv->params.rx_cq_moderation_mode) {
2103                 case 0:
2104                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2105                         break;
2106                 default:
2107                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2108                         break;
2109                 }
2110
2111                 retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
2112                     priv->params.rx_cq_moderation_usec,
2113                     priv->params.rx_cq_moderation_pkts,
2114                     cq_mode);
2115
2116                 return (retval);
2117         }
2118
2119         return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
2120             priv->params.rx_cq_moderation_usec,
2121             priv->params.rx_cq_moderation_pkts));
2122 }
2123
2124 static int
2125 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
2126 {
2127         int err;
2128         int i;
2129
2130         if (c == NULL)
2131                 return (EINVAL);
2132
2133         err = mlx5e_refresh_rq_params(priv, &c->rq);
2134         if (err)
2135                 goto done;
2136
2137         for (i = 0; i != c->num_tc; i++) {
2138                 err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
2139                 if (err)
2140                         goto done;
2141         }
2142 done:
2143         return (err);
2144 }
2145
2146 int
2147 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
2148 {
2149         int i;
2150
2151         if (priv->channel == NULL)
2152                 return (EINVAL);
2153
2154         for (i = 0; i < priv->params.num_channels; i++) {
2155                 int err;
2156
2157                 err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
2158                 if (err)
2159                         return (err);
2160         }
2161         return (0);
2162 }
2163
2164 static int
2165 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
2166 {
2167         struct mlx5_core_dev *mdev = priv->mdev;
2168         u32 in[MLX5_ST_SZ_DW(create_tis_in)];
2169         void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
2170
2171         memset(in, 0, sizeof(in));
2172
2173         MLX5_SET(tisc, tisc, prio, tc);
2174         MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
2175
2176         return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
2177 }
2178
2179 static void
2180 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
2181 {
2182         mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
2183 }
2184
2185 static int
2186 mlx5e_open_tises(struct mlx5e_priv *priv)
2187 {
2188         int num_tc = priv->num_tc;
2189         int err;
2190         int tc;
2191
2192         for (tc = 0; tc < num_tc; tc++) {
2193                 err = mlx5e_open_tis(priv, tc);
2194                 if (err)
2195                         goto err_close_tises;
2196         }
2197
2198         return (0);
2199
2200 err_close_tises:
2201         for (tc--; tc >= 0; tc--)
2202                 mlx5e_close_tis(priv, tc);
2203
2204         return (err);
2205 }
2206
2207 static void
2208 mlx5e_close_tises(struct mlx5e_priv *priv)
2209 {
2210         int num_tc = priv->num_tc;
2211         int tc;
2212
2213         for (tc = 0; tc < num_tc; tc++)
2214                 mlx5e_close_tis(priv, tc);
2215 }
2216
2217 static int
2218 mlx5e_open_rqt(struct mlx5e_priv *priv)
2219 {
2220         struct mlx5_core_dev *mdev = priv->mdev;
2221         u32 *in;
2222         u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
2223         void *rqtc;
2224         int inlen;
2225         int err;
2226         int sz;
2227         int i;
2228
2229         sz = 1 << priv->params.rx_hash_log_tbl_sz;
2230
2231         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
2232         in = mlx5_vzalloc(inlen);
2233         if (in == NULL)
2234                 return (-ENOMEM);
2235         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
2236
2237         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
2238         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
2239
2240         for (i = 0; i < sz; i++) {
2241                 int ix = i;
2242 #ifdef RSS
2243                 ix = rss_get_indirection_to_bucket(ix);
2244 #endif
2245                 /* ensure we don't overflow */
2246                 ix %= priv->params.num_channels;
2247
2248                 /* apply receive side scaling stride, if any */
2249                 ix -= ix % (int)priv->params.channels_rsss;
2250
2251                 MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
2252         }
2253
2254         MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
2255
2256         err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
2257         if (!err)
2258                 priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
2259
2260         kvfree(in);
2261
2262         return (err);
2263 }
2264
2265 static void
2266 mlx5e_close_rqt(struct mlx5e_priv *priv)
2267 {
2268         u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
2269         u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
2270
2271         MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
2272         MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
2273
2274         mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
2275 }
2276
2277 static void
2278 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
2279 {
2280         void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
2281         __be32 *hkey;
2282
2283         MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
2284
2285 #define ROUGH_MAX_L2_L3_HDR_SZ 256
2286
2287 #define MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2288                           MLX5_HASH_FIELD_SEL_DST_IP)
2289
2290 #define MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2291                           MLX5_HASH_FIELD_SEL_DST_IP   |\
2292                           MLX5_HASH_FIELD_SEL_L4_SPORT |\
2293                           MLX5_HASH_FIELD_SEL_L4_DPORT)
2294
2295 #define MLX5_HASH_IP_IPSEC_SPI  (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2296                                  MLX5_HASH_FIELD_SEL_DST_IP   |\
2297                                  MLX5_HASH_FIELD_SEL_IPSEC_SPI)
2298
2299         if (priv->params.hw_lro_en) {
2300                 MLX5_SET(tirc, tirc, lro_enable_mask,
2301                     MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2302                     MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
2303                 MLX5_SET(tirc, tirc, lro_max_msg_sz,
2304                     (priv->params.lro_wqe_sz -
2305                     ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
2306                 /* TODO: add the option to choose timer value dynamically */
2307                 MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
2308                     MLX5_CAP_ETH(priv->mdev,
2309                     lro_timer_supported_periods[2]));
2310         }
2311
2312         /* setup parameters for hashing TIR type, if any */
2313         switch (tt) {
2314         case MLX5E_TT_ANY:
2315                 MLX5_SET(tirc, tirc, disp_type,
2316                     MLX5_TIRC_DISP_TYPE_DIRECT);
2317                 MLX5_SET(tirc, tirc, inline_rqn,
2318                     priv->channel[0]->rq.rqn);
2319                 break;
2320         default:
2321                 MLX5_SET(tirc, tirc, disp_type,
2322                     MLX5_TIRC_DISP_TYPE_INDIRECT);
2323                 MLX5_SET(tirc, tirc, indirect_table,
2324                     priv->rqtn);
2325                 MLX5_SET(tirc, tirc, rx_hash_fn,
2326                     MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2327                 hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2328 #ifdef RSS
2329                 /*
2330                  * The FreeBSD RSS implementation does currently not
2331                  * support symmetric Toeplitz hashes:
2332                  */
2333                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2334                 rss_getkey((uint8_t *)hkey);
2335 #else
2336                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2337                 hkey[0] = cpu_to_be32(0xD181C62C);
2338                 hkey[1] = cpu_to_be32(0xF7F4DB5B);
2339                 hkey[2] = cpu_to_be32(0x1983A2FC);
2340                 hkey[3] = cpu_to_be32(0x943E1ADB);
2341                 hkey[4] = cpu_to_be32(0xD9389E6B);
2342                 hkey[5] = cpu_to_be32(0xD1039C2C);
2343                 hkey[6] = cpu_to_be32(0xA74499AD);
2344                 hkey[7] = cpu_to_be32(0x593D56D9);
2345                 hkey[8] = cpu_to_be32(0xF3253C06);
2346                 hkey[9] = cpu_to_be32(0x2ADC1FFC);
2347 #endif
2348                 break;
2349         }
2350
2351         switch (tt) {
2352         case MLX5E_TT_IPV4_TCP:
2353                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2354                     MLX5_L3_PROT_TYPE_IPV4);
2355                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2356                     MLX5_L4_PROT_TYPE_TCP);
2357 #ifdef RSS
2358                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2359                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2360                             MLX5_HASH_IP);
2361                 } else
2362 #endif
2363                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2364                     MLX5_HASH_ALL);
2365                 break;
2366
2367         case MLX5E_TT_IPV6_TCP:
2368                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2369                     MLX5_L3_PROT_TYPE_IPV6);
2370                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2371                     MLX5_L4_PROT_TYPE_TCP);
2372 #ifdef RSS
2373                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2374                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2375                             MLX5_HASH_IP);
2376                 } else
2377 #endif
2378                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2379                     MLX5_HASH_ALL);
2380                 break;
2381
2382         case MLX5E_TT_IPV4_UDP:
2383                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2384                     MLX5_L3_PROT_TYPE_IPV4);
2385                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2386                     MLX5_L4_PROT_TYPE_UDP);
2387 #ifdef RSS
2388                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2389                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2390                             MLX5_HASH_IP);
2391                 } else
2392 #endif
2393                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2394                     MLX5_HASH_ALL);
2395                 break;
2396
2397         case MLX5E_TT_IPV6_UDP:
2398                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2399                     MLX5_L3_PROT_TYPE_IPV6);
2400                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2401                     MLX5_L4_PROT_TYPE_UDP);
2402 #ifdef RSS
2403                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2404                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2405                             MLX5_HASH_IP);
2406                 } else
2407 #endif
2408                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2409                     MLX5_HASH_ALL);
2410                 break;
2411
2412         case MLX5E_TT_IPV4_IPSEC_AH:
2413                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2414                     MLX5_L3_PROT_TYPE_IPV4);
2415                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2416                     MLX5_HASH_IP_IPSEC_SPI);
2417                 break;
2418
2419         case MLX5E_TT_IPV6_IPSEC_AH:
2420                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2421                     MLX5_L3_PROT_TYPE_IPV6);
2422                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2423                     MLX5_HASH_IP_IPSEC_SPI);
2424                 break;
2425
2426         case MLX5E_TT_IPV4_IPSEC_ESP:
2427                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2428                     MLX5_L3_PROT_TYPE_IPV4);
2429                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2430                     MLX5_HASH_IP_IPSEC_SPI);
2431                 break;
2432
2433         case MLX5E_TT_IPV6_IPSEC_ESP:
2434                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2435                     MLX5_L3_PROT_TYPE_IPV6);
2436                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2437                     MLX5_HASH_IP_IPSEC_SPI);
2438                 break;
2439
2440         case MLX5E_TT_IPV4:
2441                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2442                     MLX5_L3_PROT_TYPE_IPV4);
2443                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2444                     MLX5_HASH_IP);
2445                 break;
2446
2447         case MLX5E_TT_IPV6:
2448                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2449                     MLX5_L3_PROT_TYPE_IPV6);
2450                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2451                     MLX5_HASH_IP);
2452                 break;
2453
2454         default:
2455                 break;
2456         }
2457 }
2458
2459 static int
2460 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2461 {
2462         struct mlx5_core_dev *mdev = priv->mdev;
2463         u32 *in;
2464         void *tirc;
2465         int inlen;
2466         int err;
2467
2468         inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2469         in = mlx5_vzalloc(inlen);
2470         if (in == NULL)
2471                 return (-ENOMEM);
2472         tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2473
2474         mlx5e_build_tir_ctx(priv, tirc, tt);
2475
2476         err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2477
2478         kvfree(in);
2479
2480         return (err);
2481 }
2482
2483 static void
2484 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2485 {
2486         mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2487 }
2488
2489 static int
2490 mlx5e_open_tirs(struct mlx5e_priv *priv)
2491 {
2492         int err;
2493         int i;
2494
2495         for (i = 0; i < MLX5E_NUM_TT; i++) {
2496                 err = mlx5e_open_tir(priv, i);
2497                 if (err)
2498                         goto err_close_tirs;
2499         }
2500
2501         return (0);
2502
2503 err_close_tirs:
2504         for (i--; i >= 0; i--)
2505                 mlx5e_close_tir(priv, i);
2506
2507         return (err);
2508 }
2509
2510 static void
2511 mlx5e_close_tirs(struct mlx5e_priv *priv)
2512 {
2513         int i;
2514
2515         for (i = 0; i < MLX5E_NUM_TT; i++)
2516                 mlx5e_close_tir(priv, i);
2517 }
2518
2519 /*
2520  * SW MTU does not include headers,
2521  * HW MTU includes all headers and checksums.
2522  */
2523 static int
2524 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2525 {
2526         struct mlx5e_priv *priv = ifp->if_softc;
2527         struct mlx5_core_dev *mdev = priv->mdev;
2528         int hw_mtu;
2529         int err;
2530
2531         hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
2532
2533         err = mlx5_set_port_mtu(mdev, hw_mtu);
2534         if (err) {
2535                 if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2536                     __func__, sw_mtu, err);
2537                 return (err);
2538         }
2539
2540         /* Update vport context MTU */
2541         err = mlx5_set_vport_mtu(mdev, hw_mtu);
2542         if (err) {
2543                 if_printf(ifp, "%s: Failed updating vport context with MTU size, err=%d\n",
2544                     __func__, err);
2545         }
2546
2547         ifp->if_mtu = sw_mtu;
2548
2549         err = mlx5_query_vport_mtu(mdev, &hw_mtu);
2550         if (err || !hw_mtu) {
2551                 /* fallback to port oper mtu */
2552                 err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2553         }
2554         if (err) {
2555                 if_printf(ifp, "Query port MTU, after setting new "
2556                     "MTU value, failed\n");
2557                 return (err);
2558         } else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
2559                 err = -E2BIG,
2560                 if_printf(ifp, "Port MTU %d is smaller than "
2561                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2562         } else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
2563                 err = -EINVAL;
2564                 if_printf(ifp, "Port MTU %d is bigger than "
2565                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2566         }
2567         priv->params_ethtool.hw_mtu = hw_mtu;
2568
2569         return (err);
2570 }
2571
2572 int
2573 mlx5e_open_locked(struct ifnet *ifp)
2574 {
2575         struct mlx5e_priv *priv = ifp->if_softc;
2576         int err;
2577         u16 set_id;
2578
2579         /* check if already opened */
2580         if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2581                 return (0);
2582
2583 #ifdef RSS
2584         if (rss_getnumbuckets() > priv->params.num_channels) {
2585                 if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2586                     "channels(%u) available\n", rss_getnumbuckets(),
2587                     priv->params.num_channels);
2588         }
2589 #endif
2590         err = mlx5e_open_tises(priv);
2591         if (err) {
2592                 if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2593                     __func__, err);
2594                 return (err);
2595         }
2596         err = mlx5_vport_alloc_q_counter(priv->mdev,
2597             MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
2598         if (err) {
2599                 if_printf(priv->ifp,
2600                     "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2601                     __func__, err);
2602                 goto err_close_tises;
2603         }
2604         /* store counter set ID */
2605         priv->counter_set_id = set_id;
2606
2607         err = mlx5e_open_channels(priv);
2608         if (err) {
2609                 if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2610                     __func__, err);
2611                 goto err_dalloc_q_counter;
2612         }
2613         err = mlx5e_open_rqt(priv);
2614         if (err) {
2615                 if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2616                     __func__, err);
2617                 goto err_close_channels;
2618         }
2619         err = mlx5e_open_tirs(priv);
2620         if (err) {
2621                 if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2622                     __func__, err);
2623                 goto err_close_rqls;
2624         }
2625         err = mlx5e_open_flow_table(priv);
2626         if (err) {
2627                 if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2628                     __func__, err);
2629                 goto err_close_tirs;
2630         }
2631         err = mlx5e_add_all_vlan_rules(priv);
2632         if (err) {
2633                 if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2634                     __func__, err);
2635                 goto err_close_flow_table;
2636         }
2637         set_bit(MLX5E_STATE_OPENED, &priv->state);
2638
2639         mlx5e_update_carrier(priv);
2640         mlx5e_set_rx_mode_core(priv);
2641
2642         return (0);
2643
2644 err_close_flow_table:
2645         mlx5e_close_flow_table(priv);
2646
2647 err_close_tirs:
2648         mlx5e_close_tirs(priv);
2649
2650 err_close_rqls:
2651         mlx5e_close_rqt(priv);
2652
2653 err_close_channels:
2654         mlx5e_close_channels(priv);
2655
2656 err_dalloc_q_counter:
2657         mlx5_vport_dealloc_q_counter(priv->mdev,
2658             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2659
2660 err_close_tises:
2661         mlx5e_close_tises(priv);
2662
2663         return (err);
2664 }
2665
2666 static void
2667 mlx5e_open(void *arg)
2668 {
2669         struct mlx5e_priv *priv = arg;
2670
2671         PRIV_LOCK(priv);
2672         if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2673                 if_printf(priv->ifp,
2674                     "%s: Setting port status to up failed\n",
2675                     __func__);
2676
2677         mlx5e_open_locked(priv->ifp);
2678         priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2679         PRIV_UNLOCK(priv);
2680 }
2681
2682 int
2683 mlx5e_close_locked(struct ifnet *ifp)
2684 {
2685         struct mlx5e_priv *priv = ifp->if_softc;
2686
2687         /* check if already closed */
2688         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2689                 return (0);
2690
2691         clear_bit(MLX5E_STATE_OPENED, &priv->state);
2692
2693         mlx5e_set_rx_mode_core(priv);
2694         mlx5e_del_all_vlan_rules(priv);
2695         if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2696         mlx5e_close_flow_table(priv);
2697         mlx5e_close_tirs(priv);
2698         mlx5e_close_rqt(priv);
2699         mlx5e_close_channels(priv);
2700         mlx5_vport_dealloc_q_counter(priv->mdev,
2701             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2702         mlx5e_close_tises(priv);
2703
2704         return (0);
2705 }
2706
2707 #if (__FreeBSD_version >= 1100000)
2708 static uint64_t
2709 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2710 {
2711         struct mlx5e_priv *priv = ifp->if_softc;
2712         u64 retval;
2713
2714         /* PRIV_LOCK(priv); XXX not allowed */
2715         switch (cnt) {
2716         case IFCOUNTER_IPACKETS:
2717                 retval = priv->stats.vport.rx_packets;
2718                 break;
2719         case IFCOUNTER_IERRORS:
2720                 retval = priv->stats.vport.rx_error_packets +
2721                     priv->stats.pport.alignment_err +
2722                     priv->stats.pport.check_seq_err +
2723                     priv->stats.pport.crc_align_errors +
2724                     priv->stats.pport.in_range_len_errors +
2725                     priv->stats.pport.jabbers +
2726                     priv->stats.pport.out_of_range_len +
2727                     priv->stats.pport.oversize_pkts +
2728                     priv->stats.pport.symbol_err +
2729                     priv->stats.pport.too_long_errors +
2730                     priv->stats.pport.undersize_pkts +
2731                     priv->stats.pport.unsupported_op_rx;
2732                 break;
2733         case IFCOUNTER_IQDROPS:
2734                 retval = priv->stats.vport.rx_out_of_buffer +
2735                     priv->stats.pport.drop_events;
2736                 break;
2737         case IFCOUNTER_OPACKETS:
2738                 retval = priv->stats.vport.tx_packets;
2739                 break;
2740         case IFCOUNTER_OERRORS:
2741                 retval = priv->stats.vport.tx_error_packets;
2742                 break;
2743         case IFCOUNTER_IBYTES:
2744                 retval = priv->stats.vport.rx_bytes;
2745                 break;
2746         case IFCOUNTER_OBYTES:
2747                 retval = priv->stats.vport.tx_bytes;
2748                 break;
2749         case IFCOUNTER_IMCASTS:
2750                 retval = priv->stats.vport.rx_multicast_packets;
2751                 break;
2752         case IFCOUNTER_OMCASTS:
2753                 retval = priv->stats.vport.tx_multicast_packets;
2754                 break;
2755         case IFCOUNTER_OQDROPS:
2756                 retval = priv->stats.vport.tx_queue_dropped;
2757                 break;
2758         case IFCOUNTER_COLLISIONS:
2759                 retval = priv->stats.pport.collisions;
2760                 break;
2761         default:
2762                 retval = if_get_counter_default(ifp, cnt);
2763                 break;
2764         }
2765         /* PRIV_UNLOCK(priv); XXX not allowed */
2766         return (retval);
2767 }
2768 #endif
2769
2770 static void
2771 mlx5e_set_rx_mode(struct ifnet *ifp)
2772 {
2773         struct mlx5e_priv *priv = ifp->if_softc;
2774
2775         queue_work(priv->wq, &priv->set_rx_mode_work);
2776 }
2777
2778 static int
2779 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2780 {
2781         struct mlx5e_priv *priv;
2782         struct ifreq *ifr;
2783         struct ifi2creq i2c;
2784         int error = 0;
2785         int mask = 0;
2786         int size_read = 0;
2787         int module_status;
2788         int module_num;
2789         int max_mtu;
2790         uint8_t read_addr;
2791
2792         priv = ifp->if_softc;
2793
2794         /* check if detaching */
2795         if (priv == NULL || priv->gone != 0)
2796                 return (ENXIO);
2797
2798         switch (command) {
2799         case SIOCSIFMTU:
2800                 ifr = (struct ifreq *)data;
2801
2802                 PRIV_LOCK(priv);
2803                 mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2804
2805                 if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2806                     ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2807                         int was_opened;
2808
2809                         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2810                         if (was_opened)
2811                                 mlx5e_close_locked(ifp);
2812
2813                         /* set new MTU */
2814                         mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2815
2816                         if (was_opened)
2817                                 mlx5e_open_locked(ifp);
2818                 } else {
2819                         error = EINVAL;
2820                         if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2821                             MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2822                 }
2823                 PRIV_UNLOCK(priv);
2824                 break;
2825         case SIOCSIFFLAGS:
2826                 if ((ifp->if_flags & IFF_UP) &&
2827                     (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2828                         mlx5e_set_rx_mode(ifp);
2829                         break;
2830                 }
2831                 PRIV_LOCK(priv);
2832                 if (ifp->if_flags & IFF_UP) {
2833                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2834                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2835                                         mlx5e_open_locked(ifp);
2836                                 ifp->if_drv_flags |= IFF_DRV_RUNNING;
2837                                 mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2838                         }
2839                 } else {
2840                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2841                                 mlx5_set_port_status(priv->mdev,
2842                                     MLX5_PORT_DOWN);
2843                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2844                                         mlx5e_close_locked(ifp);
2845                                 mlx5e_update_carrier(priv);
2846                                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2847                         }
2848                 }
2849                 PRIV_UNLOCK(priv);
2850                 break;
2851         case SIOCADDMULTI:
2852         case SIOCDELMULTI:
2853                 mlx5e_set_rx_mode(ifp);
2854                 break;
2855         case SIOCSIFMEDIA:
2856         case SIOCGIFMEDIA:
2857         case SIOCGIFXMEDIA:
2858                 ifr = (struct ifreq *)data;
2859                 error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2860                 break;
2861         case SIOCSIFCAP:
2862                 ifr = (struct ifreq *)data;
2863                 PRIV_LOCK(priv);
2864                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2865
2866                 if (mask & IFCAP_TXCSUM) {
2867                         ifp->if_capenable ^= IFCAP_TXCSUM;
2868                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2869
2870                         if (IFCAP_TSO4 & ifp->if_capenable &&
2871                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2872                                 ifp->if_capenable &= ~IFCAP_TSO4;
2873                                 ifp->if_hwassist &= ~CSUM_IP_TSO;
2874                                 if_printf(ifp,
2875                                     "tso4 disabled due to -txcsum.\n");
2876                         }
2877                 }
2878                 if (mask & IFCAP_TXCSUM_IPV6) {
2879                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2880                         ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2881
2882                         if (IFCAP_TSO6 & ifp->if_capenable &&
2883                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2884                                 ifp->if_capenable &= ~IFCAP_TSO6;
2885                                 ifp->if_hwassist &= ~CSUM_IP6_TSO;
2886                                 if_printf(ifp,
2887                                     "tso6 disabled due to -txcsum6.\n");
2888                         }
2889                 }
2890                 if (mask & IFCAP_RXCSUM)
2891                         ifp->if_capenable ^= IFCAP_RXCSUM;
2892                 if (mask & IFCAP_RXCSUM_IPV6)
2893                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2894                 if (mask & IFCAP_TSO4) {
2895                         if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2896                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2897                                 if_printf(ifp, "enable txcsum first.\n");
2898                                 error = EAGAIN;
2899                                 goto out;
2900                         }
2901                         ifp->if_capenable ^= IFCAP_TSO4;
2902                         ifp->if_hwassist ^= CSUM_IP_TSO;
2903                 }
2904                 if (mask & IFCAP_TSO6) {
2905                         if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2906                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2907                                 if_printf(ifp, "enable txcsum6 first.\n");
2908                                 error = EAGAIN;
2909                                 goto out;
2910                         }
2911                         ifp->if_capenable ^= IFCAP_TSO6;
2912                         ifp->if_hwassist ^= CSUM_IP6_TSO;
2913                 }
2914                 if (mask & IFCAP_VLAN_HWFILTER) {
2915                         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2916                                 mlx5e_disable_vlan_filter(priv);
2917                         else
2918                                 mlx5e_enable_vlan_filter(priv);
2919
2920                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2921                 }
2922                 if (mask & IFCAP_VLAN_HWTAGGING)
2923                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2924                 if (mask & IFCAP_WOL_MAGIC)
2925                         ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2926
2927                 VLAN_CAPABILITIES(ifp);
2928                 /* turn off LRO means also turn of HW LRO - if it's on */
2929                 if (mask & IFCAP_LRO) {
2930                         int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2931                         bool need_restart = false;
2932
2933                         ifp->if_capenable ^= IFCAP_LRO;
2934                         if (!(ifp->if_capenable & IFCAP_LRO)) {
2935                                 if (priv->params.hw_lro_en) {
2936                                         priv->params.hw_lro_en = false;
2937                                         need_restart = true;
2938                                         /* Not sure this is the correct way */
2939                                         priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2940                                 }
2941                         }
2942                         if (was_opened && need_restart) {
2943                                 mlx5e_close_locked(ifp);
2944                                 mlx5e_open_locked(ifp);
2945                         }
2946                 }
2947                 if (mask & IFCAP_HWRXTSTMP) {
2948                         ifp->if_capenable ^= IFCAP_HWRXTSTMP;
2949                         if (ifp->if_capenable & IFCAP_HWRXTSTMP) {
2950                                 if (priv->clbr_done == 0)
2951                                         mlx5e_reset_calibration_callout(priv);
2952                         } else {
2953                                 callout_drain(&priv->tstmp_clbr);
2954                                 priv->clbr_done = 0;
2955                         }
2956                 }
2957 out:
2958                 PRIV_UNLOCK(priv);
2959                 break;
2960
2961         case SIOCGI2C:
2962                 ifr = (struct ifreq *)data;
2963
2964                 /*
2965                  * Copy from the user-space address ifr_data to the
2966                  * kernel-space address i2c
2967                  */
2968                 error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
2969                 if (error)
2970                         break;
2971
2972                 if (i2c.len > sizeof(i2c.data)) {
2973                         error = EINVAL;
2974                         break;
2975                 }
2976
2977                 PRIV_LOCK(priv);
2978                 /* Get module_num which is required for the query_eeprom */
2979                 error = mlx5_query_module_num(priv->mdev, &module_num);
2980                 if (error) {
2981                         if_printf(ifp, "Query module num failed, eeprom "
2982                             "reading is not supported\n");
2983                         error = EINVAL;
2984                         goto err_i2c;
2985                 }
2986                 /* Check if module is present before doing an access */
2987                 module_status = mlx5_query_module_status(priv->mdev, module_num);
2988                 if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
2989                     module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
2990                         error = EINVAL;
2991                         goto err_i2c;
2992                 }
2993                 /*
2994                  * Currently 0XA0 and 0xA2 are the only addresses permitted.
2995                  * The internal conversion is as follows:
2996                  */
2997                 if (i2c.dev_addr == 0xA0)
2998                         read_addr = MLX5E_I2C_ADDR_LOW;
2999                 else if (i2c.dev_addr == 0xA2)
3000                         read_addr = MLX5E_I2C_ADDR_HIGH;
3001                 else {
3002                         if_printf(ifp, "Query eeprom failed, "
3003                             "Invalid Address: %X\n", i2c.dev_addr);
3004                         error = EINVAL;
3005                         goto err_i2c;
3006                 }
3007                 error = mlx5_query_eeprom(priv->mdev,
3008                     read_addr, MLX5E_EEPROM_LOW_PAGE,
3009                     (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
3010                     (uint32_t *)i2c.data, &size_read);
3011                 if (error) {
3012                         if_printf(ifp, "Query eeprom failed, eeprom "
3013                             "reading is not supported\n");
3014                         error = EINVAL;
3015                         goto err_i2c;
3016                 }
3017
3018                 if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
3019                         error = mlx5_query_eeprom(priv->mdev,
3020                             read_addr, MLX5E_EEPROM_LOW_PAGE,
3021                             (uint32_t)(i2c.offset + size_read),
3022                             (uint32_t)(i2c.len - size_read), module_num,
3023                             (uint32_t *)(i2c.data + size_read), &size_read);
3024                 }
3025                 if (error) {
3026                         if_printf(ifp, "Query eeprom failed, eeprom "
3027                             "reading is not supported\n");
3028                         error = EINVAL;
3029                         goto err_i2c;
3030                 }
3031
3032                 error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
3033 err_i2c:
3034                 PRIV_UNLOCK(priv);
3035                 break;
3036
3037         default:
3038                 error = ether_ioctl(ifp, command, data);
3039                 break;
3040         }
3041         return (error);
3042 }
3043
3044 static int
3045 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
3046 {
3047         /*
3048          * TODO: uncoment once FW really sets all these bits if
3049          * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
3050          * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
3051          * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
3052          * -ENOTSUPP;
3053          */
3054
3055         /* TODO: add more must-to-have features */
3056
3057         if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
3058                 return (-ENODEV);
3059
3060         return (0);
3061 }
3062
3063 static u16
3064 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
3065 {
3066         uint32_t bf_buf_size = (1U << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2U;
3067
3068         bf_buf_size -= sizeof(struct mlx5e_tx_wqe) - 2;
3069
3070         /* verify against driver hardware limit */
3071         if (bf_buf_size > MLX5E_MAX_TX_INLINE)
3072                 bf_buf_size = MLX5E_MAX_TX_INLINE;
3073
3074         return (bf_buf_size);
3075 }
3076
3077 static int
3078 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
3079     struct mlx5e_priv *priv,
3080     int num_comp_vectors)
3081 {
3082         int err;
3083
3084         /*
3085          * TODO: Consider link speed for setting "log_sq_size",
3086          * "log_rq_size" and "cq_moderation_xxx":
3087          */
3088         priv->params.log_sq_size =
3089             MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
3090         priv->params.log_rq_size =
3091             MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
3092         priv->params.rx_cq_moderation_usec =
3093             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
3094             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
3095             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
3096         priv->params.rx_cq_moderation_mode =
3097             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
3098         priv->params.rx_cq_moderation_pkts =
3099             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
3100         priv->params.tx_cq_moderation_usec =
3101             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
3102         priv->params.tx_cq_moderation_pkts =
3103             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
3104         priv->params.min_rx_wqes =
3105             MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
3106         priv->params.rx_hash_log_tbl_sz =
3107             (order_base_2(num_comp_vectors) >
3108             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
3109             order_base_2(num_comp_vectors) :
3110             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
3111         priv->params.num_tc = 1;
3112         priv->params.default_vlan_prio = 0;
3113         priv->counter_set_id = -1;
3114         priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
3115
3116         err = mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
3117         if (err)
3118                 return (err);
3119
3120         /*
3121          * hw lro is currently defaulted to off. when it won't anymore we
3122          * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
3123          */
3124         priv->params.hw_lro_en = false;
3125         priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
3126
3127         priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
3128
3129         priv->mdev = mdev;
3130         priv->params.num_channels = num_comp_vectors;
3131         priv->params.channels_rsss = 1;
3132         priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
3133         priv->queue_mapping_channel_mask =
3134             roundup_pow_of_two(num_comp_vectors) - 1;
3135         priv->num_tc = priv->params.num_tc;
3136         priv->default_vlan_prio = priv->params.default_vlan_prio;
3137
3138         INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
3139         INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
3140         INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
3141
3142         return (0);
3143 }
3144
3145 static int
3146 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
3147                   struct mlx5_core_mr *mkey)
3148 {
3149         struct ifnet *ifp = priv->ifp;
3150         struct mlx5_core_dev *mdev = priv->mdev;
3151         int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
3152         void *mkc;
3153         u32 *in;
3154         int err;
3155
3156         in = mlx5_vzalloc(inlen);
3157         if (in == NULL) {
3158                 if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
3159                 return (-ENOMEM);
3160         }
3161
3162         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
3163         MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
3164         MLX5_SET(mkc, mkc, lw, 1);
3165         MLX5_SET(mkc, mkc, lr, 1);
3166
3167         MLX5_SET(mkc, mkc, pd, pdn);
3168         MLX5_SET(mkc, mkc, length64, 1);
3169         MLX5_SET(mkc, mkc, qpn, 0xffffff);
3170
3171         err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
3172         if (err)
3173                 if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
3174                     __func__, err);
3175
3176         kvfree(in);
3177         return (err);
3178 }
3179
3180 static const char *mlx5e_vport_stats_desc[] = {
3181         MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
3182 };
3183
3184 static const char *mlx5e_pport_stats_desc[] = {
3185         MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
3186 };
3187
3188 static void
3189 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
3190 {
3191         mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
3192         sx_init(&priv->state_lock, "mlx5state");
3193         callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
3194         MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
3195 }
3196
3197 static void
3198 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
3199 {
3200         mtx_destroy(&priv->async_events_mtx);
3201         sx_destroy(&priv->state_lock);
3202 }
3203
3204 static int
3205 sysctl_firmware(SYSCTL_HANDLER_ARGS)
3206 {
3207         /*
3208          * %d.%d%.d the string format.
3209          * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
3210          * We need at most 5 chars to store that.
3211          * It also has: two "." and NULL at the end, which means we need 18
3212          * (5*3 + 3) chars at most.
3213          */
3214         char fw[18];
3215         struct mlx5e_priv *priv = arg1;
3216         int error;
3217
3218         snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
3219             fw_rev_sub(priv->mdev));
3220         error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
3221         return (error);
3222 }
3223
3224 static void
3225 mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
3226 {
3227         int i;
3228
3229         for (i = 0; i < ch->num_tc; i++)
3230                 mlx5e_drain_sq(&ch->sq[i]);
3231 }
3232
3233 static void
3234 mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
3235 {
3236
3237         sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
3238         sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
3239         mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
3240         sq->doorbell.d64 = 0;
3241 }
3242
3243 void
3244 mlx5e_resume_sq(struct mlx5e_sq *sq)
3245 {
3246         int err;
3247
3248         /* check if already enabled */
3249         if (sq->stopped == 0)
3250                 return;
3251
3252         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
3253             MLX5_SQC_STATE_RST);
3254         if (err != 0) {
3255                 if_printf(sq->ifp,
3256                     "mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
3257         }
3258
3259         sq->cc = 0;
3260         sq->pc = 0;
3261
3262         /* reset doorbell prior to moving from RST to RDY */
3263         mlx5e_reset_sq_doorbell_record(sq);
3264
3265         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
3266             MLX5_SQC_STATE_RDY);
3267         if (err != 0) {
3268                 if_printf(sq->ifp,
3269                     "mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
3270         }
3271
3272         mtx_lock(&sq->lock);
3273         sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
3274         sq->stopped = 0;
3275         mtx_unlock(&sq->lock);
3276
3277 }
3278
3279 static void
3280 mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
3281 {
3282         int i;
3283
3284         for (i = 0; i < ch->num_tc; i++)
3285                 mlx5e_resume_sq(&ch->sq[i]);
3286 }
3287
3288 static void
3289 mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
3290 {
3291         struct mlx5e_rq *rq = &ch->rq;
3292         int err;
3293
3294         mtx_lock(&rq->mtx);
3295         rq->enabled = 0;
3296         callout_stop(&rq->watchdog);
3297         mtx_unlock(&rq->mtx);
3298
3299         callout_drain(&rq->watchdog);
3300
3301         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
3302         if (err != 0) {
3303                 if_printf(rq->ifp,
3304                     "mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
3305         }
3306
3307         while (!mlx5_wq_ll_is_empty(&rq->wq)) {
3308                 msleep(1);
3309                 rq->cq.mcq.comp(&rq->cq.mcq);
3310         }
3311
3312         /*
3313          * Transitioning into RST state will allow the FW to track less ERR state queues,
3314          * thus reducing the recv queue flushing time
3315          */
3316         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
3317         if (err != 0) {
3318                 if_printf(rq->ifp,
3319                     "mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
3320         }
3321 }
3322
3323 static void
3324 mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
3325 {
3326         struct mlx5e_rq *rq = &ch->rq;
3327         int err;
3328
3329         rq->wq.wqe_ctr = 0;
3330         mlx5_wq_ll_update_db_record(&rq->wq);
3331         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
3332         if (err != 0) {
3333                 if_printf(rq->ifp,
3334                     "mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
3335         }
3336
3337         rq->enabled = 1;
3338
3339         rq->cq.mcq.comp(&rq->cq.mcq);
3340 }
3341
3342 void
3343 mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
3344 {
3345         int i;
3346
3347         if (priv->channel == NULL)
3348                 return;
3349
3350         for (i = 0; i < priv->params.num_channels; i++) {
3351
3352                 if (!priv->channel[i])
3353                         continue;
3354
3355                 if (value)
3356                         mlx5e_disable_tx_dma(priv->channel[i]);
3357                 else
3358                         mlx5e_enable_tx_dma(priv->channel[i]);
3359         }
3360 }
3361
3362 void
3363 mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
3364 {
3365         int i;
3366
3367         if (priv->channel == NULL)
3368                 return;
3369
3370         for (i = 0; i < priv->params.num_channels; i++) {
3371
3372                 if (!priv->channel[i])
3373                         continue;
3374
3375                 if (value)
3376                         mlx5e_disable_rx_dma(priv->channel[i]);
3377                 else
3378                         mlx5e_enable_rx_dma(priv->channel[i]);
3379         }
3380 }
3381
3382 static void
3383 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
3384 {
3385         SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3386             OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
3387             sysctl_firmware, "A", "HCA firmware version");
3388
3389         SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3390             OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
3391             "Board ID");
3392 }
3393
3394 static int
3395 mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3396 {
3397         struct mlx5e_priv *priv = arg1;
3398         uint32_t tx_pfc;
3399         uint32_t value;
3400         int error;
3401
3402         PRIV_LOCK(priv);
3403
3404         tx_pfc = priv->params.tx_priority_flow_control;
3405
3406         /* get current value */
3407         value = (tx_pfc >> arg2) & 1;
3408
3409         error = sysctl_handle_32(oidp, &value, 0, req);
3410
3411         /* range check value */
3412         if (value != 0)
3413                 priv->params.tx_priority_flow_control |= (1 << arg2);
3414         else
3415                 priv->params.tx_priority_flow_control &= ~(1 << arg2);
3416
3417         /* check if update is required */
3418         if (error == 0 && priv->gone == 0 &&
3419             tx_pfc != priv->params.tx_priority_flow_control) {
3420                 error = -mlx5e_set_port_pfc(priv);
3421                 /* restore previous value */
3422                 if (error != 0)
3423                         priv->params.tx_priority_flow_control= tx_pfc;
3424         }
3425         PRIV_UNLOCK(priv);
3426
3427         return (error);
3428 }
3429
3430 static int
3431 mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3432 {
3433         struct mlx5e_priv *priv = arg1;
3434         uint32_t rx_pfc;
3435         uint32_t value;
3436         int error;
3437
3438         PRIV_LOCK(priv);
3439
3440         rx_pfc = priv->params.rx_priority_flow_control;
3441
3442         /* get current value */
3443         value = (rx_pfc >> arg2) & 1;
3444
3445         error = sysctl_handle_32(oidp, &value, 0, req);
3446
3447         /* range check value */
3448         if (value != 0)
3449                 priv->params.rx_priority_flow_control |= (1 << arg2);
3450         else
3451                 priv->params.rx_priority_flow_control &= ~(1 << arg2);
3452
3453         /* check if update is required */
3454         if (error == 0 && priv->gone == 0 &&
3455             rx_pfc != priv->params.rx_priority_flow_control) {
3456                 error = -mlx5e_set_port_pfc(priv);
3457                 /* restore previous value */
3458                 if (error != 0)
3459                         priv->params.rx_priority_flow_control= rx_pfc;
3460         }
3461         PRIV_UNLOCK(priv);
3462
3463         return (error);
3464 }
3465
3466 static void
3467 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
3468 {
3469         unsigned int x;
3470         char path[96];
3471         int error;
3472
3473         /* enable pauseframes by default */
3474         priv->params.tx_pauseframe_control = 1;
3475         priv->params.rx_pauseframe_control = 1;
3476
3477         /* disable ports flow control, PFC, by default */
3478         priv->params.tx_priority_flow_control = 0;
3479         priv->params.rx_priority_flow_control = 0;
3480
3481 #if (__FreeBSD_version < 1100000)
3482         /* compute path for sysctl */
3483         snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
3484             device_get_unit(priv->mdev->pdev->dev.bsddev));
3485
3486         /* try to fetch tunable, if any */
3487         TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
3488
3489         /* compute path for sysctl */
3490         snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
3491             device_get_unit(priv->mdev->pdev->dev.bsddev));
3492
3493         /* try to fetch tunable, if any */
3494         TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
3495
3496         for (x = 0; x != 8; x++) {
3497
3498                 /* compute path for sysctl */
3499                 snprintf(path, sizeof(path), "dev.mce.%d.tx_priority_flow_control_%u",
3500                     device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3501
3502                 /* try to fetch tunable, if any */
3503                 if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3504                         priv->params.tx_priority_flow_control |= 1 << x;
3505
3506                 /* compute path for sysctl */
3507                 snprintf(path, sizeof(path), "dev.mce.%d.rx_priority_flow_control_%u",
3508                     device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3509
3510                 /* try to fetch tunable, if any */
3511                 if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3512                         priv->params.rx_priority_flow_control |= 1 << x;
3513         }
3514 #endif
3515
3516         /* register pauseframe SYSCTLs */
3517         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3518             OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
3519             &priv->params.tx_pauseframe_control, 0,
3520             "Set to enable TX pause frames. Clear to disable.");
3521
3522         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3523             OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
3524             &priv->params.rx_pauseframe_control, 0,
3525             "Set to enable RX pause frames. Clear to disable.");
3526
3527         /* register priority_flow control, PFC, SYSCTLs */
3528         for (x = 0; x != 8; x++) {
3529                 snprintf(path, sizeof(path), "tx_priority_flow_control_%u", x);
3530
3531                 SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3532                     OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3533                     CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_tx_priority_flow_control, "IU",
3534                     "Set to enable TX ports flow control frames for given priority. Clear to disable.");
3535
3536                 snprintf(path, sizeof(path), "rx_priority_flow_control_%u", x);
3537
3538                 SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3539                     OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3540                     CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_rx_priority_flow_control, "IU",
3541                     "Set to enable RX ports flow control frames for given priority. Clear to disable.");
3542         }
3543
3544         PRIV_LOCK(priv);
3545
3546         /* range check */
3547         priv->params.tx_pauseframe_control =
3548             priv->params.tx_pauseframe_control ? 1 : 0;
3549         priv->params.rx_pauseframe_control =
3550             priv->params.rx_pauseframe_control ? 1 : 0;
3551
3552         /* update firmware */
3553         error = mlx5e_set_port_pause_and_pfc(priv);
3554         if (error == -EINVAL) {
3555                 if_printf(priv->ifp,
3556                     "Global pauseframes must be disabled before enabling PFC.\n");
3557                 priv->params.rx_priority_flow_control = 0;
3558                 priv->params.tx_priority_flow_control = 0;
3559
3560                 /* update firmware */
3561                 (void) mlx5e_set_port_pause_and_pfc(priv);
3562         }
3563         PRIV_UNLOCK(priv);
3564 }
3565
3566 static void *
3567 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
3568 {
3569         struct ifnet *ifp;
3570         struct mlx5e_priv *priv;
3571         u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
3572         struct sysctl_oid_list *child;
3573         int ncv = mdev->priv.eq_table.num_comp_vectors;
3574         char unit[16];
3575         int err;
3576         int i;
3577         u32 eth_proto_cap;
3578
3579         if (mlx5e_check_required_hca_cap(mdev)) {
3580                 mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
3581                 return (NULL);
3582         }
3583         priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
3584         mlx5e_priv_mtx_init(priv);
3585
3586         ifp = priv->ifp = if_alloc(IFT_ETHER);
3587         if (ifp == NULL) {
3588                 mlx5_core_err(mdev, "if_alloc() failed\n");
3589                 goto err_free_priv;
3590         }
3591         ifp->if_softc = priv;
3592         if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
3593         ifp->if_mtu = ETHERMTU;
3594         ifp->if_init = mlx5e_open;
3595         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3596         ifp->if_ioctl = mlx5e_ioctl;
3597         ifp->if_transmit = mlx5e_xmit;
3598         ifp->if_qflush = if_qflush;
3599 #if (__FreeBSD_version >= 1100000)
3600         ifp->if_get_counter = mlx5e_get_counter;
3601 #endif
3602         ifp->if_snd.ifq_maxlen = ifqmaxlen;
3603         /*
3604          * Set driver features
3605          */
3606         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
3607         ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
3608         ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
3609         ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
3610         ifp->if_capabilities |= IFCAP_LRO;
3611         ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
3612         ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
3613 #ifdef RATELIMIT
3614         ifp->if_capabilities |= IFCAP_TXRTLMT;
3615         ifp->if_snd_tag_alloc = mlx5e_rl_snd_tag_alloc;
3616         ifp->if_snd_tag_free = mlx5e_rl_snd_tag_free;
3617         ifp->if_snd_tag_modify = mlx5e_rl_snd_tag_modify;
3618         ifp->if_snd_tag_query = mlx5e_rl_snd_tag_query;
3619 #endif
3620
3621         /* set TSO limits so that we don't have to drop TX packets */
3622         ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3623         ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
3624         ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
3625
3626         ifp->if_capenable = ifp->if_capabilities;
3627         ifp->if_hwassist = 0;
3628         if (ifp->if_capenable & IFCAP_TSO)
3629                 ifp->if_hwassist |= CSUM_TSO;
3630         if (ifp->if_capenable & IFCAP_TXCSUM)
3631                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
3632         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3633                 ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
3634
3635         /* ifnet sysctl tree */
3636         sysctl_ctx_init(&priv->sysctl_ctx);
3637         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
3638             OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
3639         if (priv->sysctl_ifnet == NULL) {
3640                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3641                 goto err_free_sysctl;
3642         }
3643         snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
3644         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3645             OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
3646         if (priv->sysctl_ifnet == NULL) {
3647                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3648                 goto err_free_sysctl;
3649         }
3650
3651         /* HW sysctl tree */
3652         child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
3653         priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
3654             OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
3655         if (priv->sysctl_hw == NULL) {
3656                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3657                 goto err_free_sysctl;
3658         }
3659
3660         err = mlx5e_build_ifp_priv(mdev, priv, ncv);
3661         if (err) {
3662                 mlx5_core_err(mdev, "mlx5e_build_ifp_priv() failed (%d)\n", err);
3663                 goto err_free_sysctl;
3664         }
3665
3666         snprintf(unit, sizeof(unit), "mce%u_wq",
3667             device_get_unit(mdev->pdev->dev.bsddev));
3668         priv->wq = alloc_workqueue(unit, 0, 1);
3669         if (priv->wq == NULL) {
3670                 if_printf(ifp, "%s: alloc_workqueue failed\n", __func__);
3671                 goto err_free_sysctl;
3672         }
3673
3674         err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
3675         if (err) {
3676                 if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
3677                     __func__, err);
3678                 goto err_free_wq;
3679         }
3680         err = mlx5_core_alloc_pd(mdev, &priv->pdn);
3681         if (err) {
3682                 if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
3683                     __func__, err);
3684                 goto err_unmap_free_uar;
3685         }
3686         err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
3687         if (err) {
3688                 if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
3689                     __func__, err);
3690                 goto err_dealloc_pd;
3691         }
3692         err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
3693         if (err) {
3694                 if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
3695                     __func__, err);
3696                 goto err_dealloc_transport_domain;
3697         }
3698         mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
3699
3700         /* check if we should generate a random MAC address */
3701         if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3702             is_zero_ether_addr(dev_addr)) {
3703                 random_ether_addr(dev_addr);
3704                 if_printf(ifp, "Assigned random MAC address\n");
3705         }
3706 #ifdef RATELIMIT
3707         err = mlx5e_rl_init(priv);
3708         if (err) {
3709                 if_printf(ifp, "%s: mlx5e_rl_init failed, %d\n",
3710                     __func__, err);
3711                 goto err_create_mkey;
3712         }
3713 #endif
3714
3715         /* set default MTU */
3716         mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3717
3718         /* Set default media status */
3719         priv->media_status_last = IFM_AVALID;
3720         priv->media_active_last = IFM_ETHER | IFM_AUTO |
3721             IFM_ETH_RXPAUSE | IFM_FDX;
3722
3723         /* setup default pauseframes configuration */
3724         mlx5e_setup_pauseframes(priv);
3725
3726         err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3727         if (err) {
3728                 eth_proto_cap = 0;
3729                 if_printf(ifp, "%s: Query port media capability failed, %d\n",
3730                     __func__, err);
3731         }
3732
3733         /* Setup supported medias */
3734         ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3735             mlx5e_media_change, mlx5e_media_status);
3736
3737         for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3738                 if (mlx5e_mode_table[i].baudrate == 0)
3739                         continue;
3740                 if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3741                         ifmedia_add(&priv->media,
3742                             mlx5e_mode_table[i].subtype |
3743                             IFM_ETHER, 0, NULL);
3744                         ifmedia_add(&priv->media,
3745                             mlx5e_mode_table[i].subtype |
3746                             IFM_ETHER | IFM_FDX |
3747                             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3748                 }
3749         }
3750
3751         /* Additional supported medias */
3752         ifmedia_add(&priv->media, IFM_10G_LR | IFM_ETHER, 0, NULL);
3753         ifmedia_add(&priv->media, IFM_10G_LR |
3754             IFM_ETHER | IFM_FDX |
3755             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3756
3757         ifmedia_add(&priv->media, IFM_40G_ER4 | IFM_ETHER, 0, NULL);
3758         ifmedia_add(&priv->media, IFM_40G_ER4 |
3759             IFM_ETHER | IFM_FDX |
3760             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3761
3762         ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3763         ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3764             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3765
3766         /* Set autoselect by default */
3767         ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3768             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3769         ether_ifattach(ifp, dev_addr);
3770
3771         /* Register for VLAN events */
3772         priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3773             mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3774         priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3775             mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3776
3777         /* Link is down by default */
3778         if_link_state_change(ifp, LINK_STATE_DOWN);
3779
3780         mlx5e_enable_async_events(priv);
3781
3782         mlx5e_add_hw_stats(priv);
3783
3784         mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3785             "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3786             priv->stats.vport.arg);
3787
3788         mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3789             "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3790             priv->stats.pport.arg);
3791
3792         mlx5e_create_ethtool(priv);
3793
3794         mtx_lock(&priv->async_events_mtx);
3795         mlx5e_update_stats(priv);
3796         mtx_unlock(&priv->async_events_mtx);
3797
3798         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3799             OID_AUTO, "rx_clbr_done", CTLFLAG_RD,
3800             &priv->clbr_done, 0,
3801             "RX timestamps calibration state");
3802         callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT);
3803         mlx5e_reset_calibration_callout(priv);
3804
3805         return (priv);
3806
3807 #ifdef RATELIMIT
3808 err_create_mkey:
3809         mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3810 #endif
3811 err_dealloc_transport_domain:
3812         mlx5_dealloc_transport_domain(mdev, priv->tdn);
3813
3814 err_dealloc_pd:
3815         mlx5_core_dealloc_pd(mdev, priv->pdn);
3816
3817 err_unmap_free_uar:
3818         mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3819
3820 err_free_wq:
3821         destroy_workqueue(priv->wq);
3822
3823 err_free_sysctl:
3824         sysctl_ctx_free(&priv->sysctl_ctx);
3825         if (priv->sysctl_debug)
3826                 sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3827         if_free(ifp);
3828
3829 err_free_priv:
3830         mlx5e_priv_mtx_destroy(priv);
3831         free(priv, M_MLX5EN);
3832         return (NULL);
3833 }
3834
3835 static void
3836 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3837 {
3838         struct mlx5e_priv *priv = vpriv;
3839         struct ifnet *ifp = priv->ifp;
3840
3841         /* don't allow more IOCTLs */
3842         priv->gone = 1;
3843
3844         /* XXX wait a bit to allow IOCTL handlers to complete */
3845         pause("W", hz);
3846
3847 #ifdef RATELIMIT
3848         /*
3849          * The kernel can have reference(s) via the m_snd_tag's into
3850          * the ratelimit channels, and these must go away before
3851          * detaching:
3852          */
3853         while (READ_ONCE(priv->rl.stats.tx_active_connections) != 0) {
3854                 if_printf(priv->ifp, "Waiting for all ratelimit connections "
3855                     "to terminate\n");
3856                 pause("W", hz);
3857         }
3858 #endif
3859         /* stop watchdog timer */
3860         callout_drain(&priv->watchdog);
3861
3862         callout_drain(&priv->tstmp_clbr);
3863
3864         if (priv->vlan_attach != NULL)
3865                 EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3866         if (priv->vlan_detach != NULL)
3867                 EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3868
3869         /* make sure device gets closed */
3870         PRIV_LOCK(priv);
3871         mlx5e_close_locked(ifp);
3872         PRIV_UNLOCK(priv);
3873
3874         /* unregister device */
3875         ifmedia_removeall(&priv->media);
3876         ether_ifdetach(ifp);
3877         if_free(ifp);
3878
3879 #ifdef RATELIMIT
3880         mlx5e_rl_cleanup(priv);
3881 #endif
3882         /* destroy all remaining sysctl nodes */
3883         sysctl_ctx_free(&priv->stats.vport.ctx);
3884         sysctl_ctx_free(&priv->stats.pport.ctx);
3885         sysctl_ctx_free(&priv->sysctl_ctx);
3886         if (priv->sysctl_debug)
3887                 sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3888
3889         mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3890         mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3891         mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3892         mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3893         mlx5e_disable_async_events(priv);
3894         destroy_workqueue(priv->wq);
3895         mlx5e_priv_mtx_destroy(priv);
3896         free(priv, M_MLX5EN);
3897 }
3898
3899 static void *
3900 mlx5e_get_ifp(void *vpriv)
3901 {
3902         struct mlx5e_priv *priv = vpriv;
3903
3904         return (priv->ifp);
3905 }
3906
3907 static struct mlx5_interface mlx5e_interface = {
3908         .add = mlx5e_create_ifp,
3909         .remove = mlx5e_destroy_ifp,
3910         .event = mlx5e_async_event,
3911         .protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3912         .get_dev = mlx5e_get_ifp,
3913 };
3914
3915 void
3916 mlx5e_init(void)
3917 {
3918         mlx5_register_interface(&mlx5e_interface);
3919 }
3920
3921 void
3922 mlx5e_cleanup(void)
3923 {
3924         mlx5_unregister_interface(&mlx5e_interface);
3925 }
3926
3927 static void
3928 mlx5e_show_version(void __unused *arg)
3929 {
3930
3931         printf("%s", mlx5e_version);
3932 }
3933 SYSINIT(mlx5e_show_version, SI_SUB_DRIVERS, SI_ORDER_ANY, mlx5e_show_version, NULL);
3934
3935 module_init_order(mlx5e_init, SI_ORDER_THIRD);
3936 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3937
3938 #if (__FreeBSD_version >= 1100000)
3939 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3940 #endif
3941 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3942 MODULE_VERSION(mlx5en, 1);