]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx5/mlx5_en/mlx5_en_main.c
MFC r341578 and r341655:
[FreeBSD/FreeBSD.git] / sys / dev / mlx5 / mlx5_en / mlx5_en_main.c
1 /*-
2  * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27
28 #include "en.h"
29
30 #include <sys/sockio.h>
31 #include <machine/atomic.h>
32
33 #ifndef ETH_DRIVER_VERSION
34 #define ETH_DRIVER_VERSION      "3.4.2"
35 #endif
36
37 static const char mlx5e_version[] = "mlx5en: Mellanox Ethernet driver "
38         ETH_DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
39
40 static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs);
41
42 struct mlx5e_channel_param {
43         struct mlx5e_rq_param rq;
44         struct mlx5e_sq_param sq;
45         struct mlx5e_cq_param rx_cq;
46         struct mlx5e_cq_param tx_cq;
47 };
48
49 static const struct {
50         u32     subtype;
51         u64     baudrate;
52 }       mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
53
54         [MLX5E_1000BASE_CX_SGMII] = {
55                 .subtype = IFM_1000_CX_SGMII,
56                 .baudrate = IF_Mbps(1000ULL),
57         },
58         [MLX5E_1000BASE_KX] = {
59                 .subtype = IFM_1000_KX,
60                 .baudrate = IF_Mbps(1000ULL),
61         },
62         [MLX5E_10GBASE_CX4] = {
63                 .subtype = IFM_10G_CX4,
64                 .baudrate = IF_Gbps(10ULL),
65         },
66         [MLX5E_10GBASE_KX4] = {
67                 .subtype = IFM_10G_KX4,
68                 .baudrate = IF_Gbps(10ULL),
69         },
70         [MLX5E_10GBASE_KR] = {
71                 .subtype = IFM_10G_KR,
72                 .baudrate = IF_Gbps(10ULL),
73         },
74         [MLX5E_20GBASE_KR2] = {
75                 .subtype = IFM_20G_KR2,
76                 .baudrate = IF_Gbps(20ULL),
77         },
78         [MLX5E_40GBASE_CR4] = {
79                 .subtype = IFM_40G_CR4,
80                 .baudrate = IF_Gbps(40ULL),
81         },
82         [MLX5E_40GBASE_KR4] = {
83                 .subtype = IFM_40G_KR4,
84                 .baudrate = IF_Gbps(40ULL),
85         },
86         [MLX5E_56GBASE_R4] = {
87                 .subtype = IFM_56G_R4,
88                 .baudrate = IF_Gbps(56ULL),
89         },
90         [MLX5E_10GBASE_CR] = {
91                 .subtype = IFM_10G_CR1,
92                 .baudrate = IF_Gbps(10ULL),
93         },
94         [MLX5E_10GBASE_SR] = {
95                 .subtype = IFM_10G_SR,
96                 .baudrate = IF_Gbps(10ULL),
97         },
98         [MLX5E_10GBASE_ER] = {
99                 .subtype = IFM_10G_ER,
100                 .baudrate = IF_Gbps(10ULL),
101         },
102         [MLX5E_40GBASE_SR4] = {
103                 .subtype = IFM_40G_SR4,
104                 .baudrate = IF_Gbps(40ULL),
105         },
106         [MLX5E_40GBASE_LR4] = {
107                 .subtype = IFM_40G_LR4,
108                 .baudrate = IF_Gbps(40ULL),
109         },
110         [MLX5E_100GBASE_CR4] = {
111                 .subtype = IFM_100G_CR4,
112                 .baudrate = IF_Gbps(100ULL),
113         },
114         [MLX5E_100GBASE_SR4] = {
115                 .subtype = IFM_100G_SR4,
116                 .baudrate = IF_Gbps(100ULL),
117         },
118         [MLX5E_100GBASE_KR4] = {
119                 .subtype = IFM_100G_KR4,
120                 .baudrate = IF_Gbps(100ULL),
121         },
122         [MLX5E_100GBASE_LR4] = {
123                 .subtype = IFM_100G_LR4,
124                 .baudrate = IF_Gbps(100ULL),
125         },
126         [MLX5E_100BASE_TX] = {
127                 .subtype = IFM_100_TX,
128                 .baudrate = IF_Mbps(100ULL),
129         },
130         [MLX5E_1000BASE_T] = {
131                 .subtype = IFM_1000_T,
132                 .baudrate = IF_Mbps(1000ULL),
133         },
134         [MLX5E_10GBASE_T] = {
135                 .subtype = IFM_10G_T,
136                 .baudrate = IF_Gbps(10ULL),
137         },
138         [MLX5E_25GBASE_CR] = {
139                 .subtype = IFM_25G_CR,
140                 .baudrate = IF_Gbps(25ULL),
141         },
142         [MLX5E_25GBASE_KR] = {
143                 .subtype = IFM_25G_KR,
144                 .baudrate = IF_Gbps(25ULL),
145         },
146         [MLX5E_25GBASE_SR] = {
147                 .subtype = IFM_25G_SR,
148                 .baudrate = IF_Gbps(25ULL),
149         },
150         [MLX5E_50GBASE_CR2] = {
151                 .subtype = IFM_50G_CR2,
152                 .baudrate = IF_Gbps(50ULL),
153         },
154         [MLX5E_50GBASE_KR2] = {
155                 .subtype = IFM_50G_KR2,
156                 .baudrate = IF_Gbps(50ULL),
157         },
158 };
159
160 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
161
162 static void
163 mlx5e_update_carrier(struct mlx5e_priv *priv)
164 {
165         struct mlx5_core_dev *mdev = priv->mdev;
166         u32 out[MLX5_ST_SZ_DW(ptys_reg)];
167         u32 eth_proto_oper;
168         int error;
169         u8 port_state;
170         u8 i;
171
172         port_state = mlx5_query_vport_state(mdev,
173             MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
174
175         if (port_state == VPORT_STATE_UP) {
176                 priv->media_status_last |= IFM_ACTIVE;
177         } else {
178                 priv->media_status_last &= ~IFM_ACTIVE;
179                 priv->media_active_last = IFM_ETHER;
180                 if_link_state_change(priv->ifp, LINK_STATE_DOWN);
181                 return;
182         }
183
184         error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
185         if (error) {
186                 priv->media_active_last = IFM_ETHER;
187                 priv->ifp->if_baudrate = 1;
188                 if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
189                     __func__, error);
190                 return;
191         }
192         eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
193
194         for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
195                 if (mlx5e_mode_table[i].baudrate == 0)
196                         continue;
197                 if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
198                         priv->ifp->if_baudrate =
199                             mlx5e_mode_table[i].baudrate;
200                         priv->media_active_last =
201                             mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
202                 }
203         }
204         if_link_state_change(priv->ifp, LINK_STATE_UP);
205 }
206
207 static void
208 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
209 {
210         struct mlx5e_priv *priv = dev->if_softc;
211
212         ifmr->ifm_status = priv->media_status_last;
213         ifmr->ifm_active = priv->media_active_last |
214             (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
215             (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
216
217 }
218
219 static u32
220 mlx5e_find_link_mode(u32 subtype)
221 {
222         u32 i;
223         u32 link_mode = 0;
224
225         for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
226                 if (mlx5e_mode_table[i].baudrate == 0)
227                         continue;
228                 if (mlx5e_mode_table[i].subtype == subtype)
229                         link_mode |= MLX5E_PROT_MASK(i);
230         }
231
232         return (link_mode);
233 }
234
235 static int
236 mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
237 {
238         return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
239             priv->params.rx_pauseframe_control,
240             priv->params.tx_pauseframe_control,
241             priv->params.rx_priority_flow_control,
242             priv->params.tx_priority_flow_control));
243 }
244
245 static int
246 mlx5e_set_port_pfc(struct mlx5e_priv *priv)
247 {
248         int error;
249
250         if (priv->params.rx_pauseframe_control ||
251             priv->params.tx_pauseframe_control) {
252                 if_printf(priv->ifp,
253                     "Global pauseframes must be disabled before enabling PFC.\n");
254                 error = -EINVAL;
255         } else {
256                 error = mlx5e_set_port_pause_and_pfc(priv);
257         }
258         return (error);
259 }
260
261 static int
262 mlx5e_media_change(struct ifnet *dev)
263 {
264         struct mlx5e_priv *priv = dev->if_softc;
265         struct mlx5_core_dev *mdev = priv->mdev;
266         u32 eth_proto_cap;
267         u32 link_mode;
268         int was_opened;
269         int locked;
270         int error;
271
272         locked = PRIV_LOCKED(priv);
273         if (!locked)
274                 PRIV_LOCK(priv);
275
276         if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
277                 error = EINVAL;
278                 goto done;
279         }
280         link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
281
282         /* query supported capabilities */
283         error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
284         if (error != 0) {
285                 if_printf(dev, "Query port media capability failed\n");
286                 goto done;
287         }
288         /* check for autoselect */
289         if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
290                 link_mode = eth_proto_cap;
291                 if (link_mode == 0) {
292                         if_printf(dev, "Port media capability is zero\n");
293                         error = EINVAL;
294                         goto done;
295                 }
296         } else {
297                 link_mode = link_mode & eth_proto_cap;
298                 if (link_mode == 0) {
299                         if_printf(dev, "Not supported link mode requested\n");
300                         error = EINVAL;
301                         goto done;
302                 }
303         }
304         if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
305                 /* check if PFC is enabled */
306                 if (priv->params.rx_priority_flow_control ||
307                     priv->params.tx_priority_flow_control) {
308                         if_printf(dev, "PFC must be disabled before enabling global pauseframes.\n");
309                         error = EINVAL;
310                         goto done;
311                 }
312         }
313         /* update pauseframe control bits */
314         priv->params.rx_pauseframe_control =
315             (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
316         priv->params.tx_pauseframe_control =
317             (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
318
319         /* check if device is opened */
320         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
321
322         /* reconfigure the hardware */
323         mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
324         mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
325         error = -mlx5e_set_port_pause_and_pfc(priv);
326         if (was_opened)
327                 mlx5_set_port_status(mdev, MLX5_PORT_UP);
328
329 done:
330         if (!locked)
331                 PRIV_UNLOCK(priv);
332         return (error);
333 }
334
335 static void
336 mlx5e_update_carrier_work(struct work_struct *work)
337 {
338         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
339             update_carrier_work);
340
341         PRIV_LOCK(priv);
342         if (test_bit(MLX5E_STATE_OPENED, &priv->state))
343                 mlx5e_update_carrier(priv);
344         PRIV_UNLOCK(priv);
345 }
346
347 /*
348  * This function reads the physical port counters from the firmware
349  * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
350  * macros. The output is converted from big-endian 64-bit values into
351  * host endian ones and stored in the "priv->stats.pport" structure.
352  */
353 static void
354 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
355 {
356         struct mlx5_core_dev *mdev = priv->mdev;
357         struct mlx5e_pport_stats *s = &priv->stats.pport;
358         struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
359         u32 *in;
360         u32 *out;
361         const u64 *ptr;
362         unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
363         unsigned x;
364         unsigned y;
365         unsigned z;
366
367         /* allocate firmware request structures */
368         in = mlx5_vzalloc(sz);
369         out = mlx5_vzalloc(sz);
370         if (in == NULL || out == NULL)
371                 goto free_out;
372
373         /*
374          * Get pointer to the 64-bit counter set which is located at a
375          * fixed offset in the output firmware request structure:
376          */
377         ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
378
379         MLX5_SET(ppcnt_reg, in, local_port, 1);
380
381         /* read IEEE802_3 counter group using predefined counter layout */
382         MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
383         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
384         for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
385              x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
386                 s->arg[y] = be64toh(ptr[x]);
387
388         /* read RFC2819 counter group using predefined counter layout */
389         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
390         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
391         for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
392                 s->arg[y] = be64toh(ptr[x]);
393         for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
394             MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
395                 s_debug->arg[y] = be64toh(ptr[x]);
396
397         /* read RFC2863 counter group using predefined counter layout */
398         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
399         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
400         for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
401                 s_debug->arg[y] = be64toh(ptr[x]);
402
403         /* read physical layer stats counter group using predefined counter layout */
404         MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
405         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
406         for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
407                 s_debug->arg[y] = be64toh(ptr[x]);
408
409         /* read per-priority counters */
410         MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
411
412         /* iterate all the priorities */
413         for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
414                 MLX5_SET(ppcnt_reg, in, prio_tc, z);
415                 mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
416
417                 /* read per priority stats counter group using predefined counter layout */
418                 for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
419                     MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
420                         s->arg[y] = be64toh(ptr[x]);
421         }
422 free_out:
423         /* free firmware request structures */
424         kvfree(in);
425         kvfree(out);
426 }
427
428 /*
429  * This function is called regularly to collect all statistics
430  * counters from the firmware. The values can be viewed through the
431  * sysctl interface. Execution is serialized using the priv's global
432  * configuration lock.
433  */
434 static void
435 mlx5e_update_stats_work(struct work_struct *work)
436 {
437         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
438             update_stats_work);
439         struct mlx5_core_dev *mdev = priv->mdev;
440         struct mlx5e_vport_stats *s = &priv->stats.vport;
441         struct mlx5e_rq_stats *rq_stats;
442         struct mlx5e_sq_stats *sq_stats;
443         struct buf_ring *sq_br;
444 #if (__FreeBSD_version < 1100000)
445         struct ifnet *ifp = priv->ifp;
446 #endif
447
448         u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
449         u32 *out;
450         int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
451         u64 tso_packets = 0;
452         u64 tso_bytes = 0;
453         u64 tx_queue_dropped = 0;
454         u64 tx_defragged = 0;
455         u64 tx_offload_none = 0;
456         u64 lro_packets = 0;
457         u64 lro_bytes = 0;
458         u64 sw_lro_queued = 0;
459         u64 sw_lro_flushed = 0;
460         u64 rx_csum_none = 0;
461         u64 rx_wqe_err = 0;
462         u32 rx_out_of_buffer = 0;
463         int i;
464         int j;
465
466         PRIV_LOCK(priv);
467         out = mlx5_vzalloc(outlen);
468         if (out == NULL)
469                 goto free_out;
470         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
471                 goto free_out;
472
473         /* Collect firts the SW counters and then HW for consistency */
474         for (i = 0; i < priv->params.num_channels; i++) {
475                 struct mlx5e_rq *rq = &priv->channel[i]->rq;
476
477                 rq_stats = &priv->channel[i]->rq.stats;
478
479                 /* collect stats from LRO */
480                 rq_stats->sw_lro_queued = rq->lro.lro_queued;
481                 rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
482                 sw_lro_queued += rq_stats->sw_lro_queued;
483                 sw_lro_flushed += rq_stats->sw_lro_flushed;
484                 lro_packets += rq_stats->lro_packets;
485                 lro_bytes += rq_stats->lro_bytes;
486                 rx_csum_none += rq_stats->csum_none;
487                 rx_wqe_err += rq_stats->wqe_err;
488
489                 for (j = 0; j < priv->num_tc; j++) {
490                         sq_stats = &priv->channel[i]->sq[j].stats;
491                         sq_br = priv->channel[i]->sq[j].br;
492
493                         tso_packets += sq_stats->tso_packets;
494                         tso_bytes += sq_stats->tso_bytes;
495                         tx_queue_dropped += sq_stats->dropped;
496                         if (sq_br != NULL)
497                                 tx_queue_dropped += sq_br->br_drops;
498                         tx_defragged += sq_stats->defragged;
499                         tx_offload_none += sq_stats->csum_offload_none;
500                 }
501         }
502
503         s->tx_jumbo_packets =
504             priv->stats.port_stats_debug.p1519to2047octets +
505             priv->stats.port_stats_debug.p2048to4095octets +
506             priv->stats.port_stats_debug.p4096to8191octets +
507             priv->stats.port_stats_debug.p8192to10239octets;
508
509         /* update counters */
510         s->tso_packets = tso_packets;
511         s->tso_bytes = tso_bytes;
512         s->tx_queue_dropped = tx_queue_dropped;
513         s->tx_defragged = tx_defragged;
514         s->lro_packets = lro_packets;
515         s->lro_bytes = lro_bytes;
516         s->sw_lro_queued = sw_lro_queued;
517         s->sw_lro_flushed = sw_lro_flushed;
518         s->rx_csum_none = rx_csum_none;
519         s->rx_wqe_err = rx_wqe_err;
520
521         /* HW counters */
522         memset(in, 0, sizeof(in));
523
524         MLX5_SET(query_vport_counter_in, in, opcode,
525             MLX5_CMD_OP_QUERY_VPORT_COUNTER);
526         MLX5_SET(query_vport_counter_in, in, op_mod, 0);
527         MLX5_SET(query_vport_counter_in, in, other_vport, 0);
528
529         memset(out, 0, outlen);
530
531         /* get number of out-of-buffer drops first */
532         if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
533             &rx_out_of_buffer))
534                 goto free_out;
535
536         /* accumulate difference into a 64-bit counter */
537         s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
538         s->rx_out_of_buffer_prev = rx_out_of_buffer;
539
540         /* get port statistics */
541         if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
542                 goto free_out;
543
544 #define MLX5_GET_CTR(out, x) \
545         MLX5_GET64(query_vport_counter_out, out, x)
546
547         s->rx_error_packets =
548             MLX5_GET_CTR(out, received_errors.packets);
549         s->rx_error_bytes =
550             MLX5_GET_CTR(out, received_errors.octets);
551         s->tx_error_packets =
552             MLX5_GET_CTR(out, transmit_errors.packets);
553         s->tx_error_bytes =
554             MLX5_GET_CTR(out, transmit_errors.octets);
555
556         s->rx_unicast_packets =
557             MLX5_GET_CTR(out, received_eth_unicast.packets);
558         s->rx_unicast_bytes =
559             MLX5_GET_CTR(out, received_eth_unicast.octets);
560         s->tx_unicast_packets =
561             MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
562         s->tx_unicast_bytes =
563             MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
564
565         s->rx_multicast_packets =
566             MLX5_GET_CTR(out, received_eth_multicast.packets);
567         s->rx_multicast_bytes =
568             MLX5_GET_CTR(out, received_eth_multicast.octets);
569         s->tx_multicast_packets =
570             MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
571         s->tx_multicast_bytes =
572             MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
573
574         s->rx_broadcast_packets =
575             MLX5_GET_CTR(out, received_eth_broadcast.packets);
576         s->rx_broadcast_bytes =
577             MLX5_GET_CTR(out, received_eth_broadcast.octets);
578         s->tx_broadcast_packets =
579             MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
580         s->tx_broadcast_bytes =
581             MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
582
583         s->rx_packets =
584             s->rx_unicast_packets +
585             s->rx_multicast_packets +
586             s->rx_broadcast_packets -
587             s->rx_out_of_buffer;
588         s->rx_bytes =
589             s->rx_unicast_bytes +
590             s->rx_multicast_bytes +
591             s->rx_broadcast_bytes;
592         s->tx_packets =
593             s->tx_unicast_packets +
594             s->tx_multicast_packets +
595             s->tx_broadcast_packets;
596         s->tx_bytes =
597             s->tx_unicast_bytes +
598             s->tx_multicast_bytes +
599             s->tx_broadcast_bytes;
600
601         /* Update calculated offload counters */
602         s->tx_csum_offload = s->tx_packets - tx_offload_none;
603         s->rx_csum_good = s->rx_packets - s->rx_csum_none;
604
605         /* Get physical port counters */
606         mlx5e_update_pport_counters(priv);
607
608 #if (__FreeBSD_version < 1100000)
609         /* no get_counters interface in fbsd 10 */
610         ifp->if_ipackets = s->rx_packets;
611         ifp->if_ierrors = s->rx_error_packets +
612             priv->stats.pport.alignment_err +
613             priv->stats.pport.check_seq_err +
614             priv->stats.pport.crc_align_errors +
615             priv->stats.pport.in_range_len_errors +
616             priv->stats.pport.jabbers +
617             priv->stats.pport.out_of_range_len +
618             priv->stats.pport.oversize_pkts +
619             priv->stats.pport.symbol_err +
620             priv->stats.pport.too_long_errors +
621             priv->stats.pport.undersize_pkts +
622             priv->stats.pport.unsupported_op_rx;
623         ifp->if_iqdrops = s->rx_out_of_buffer +
624             priv->stats.pport.drop_events;
625         ifp->if_opackets = s->tx_packets;
626         ifp->if_oerrors = s->tx_error_packets;
627         ifp->if_snd.ifq_drops = s->tx_queue_dropped;
628         ifp->if_ibytes = s->rx_bytes;
629         ifp->if_obytes = s->tx_bytes;
630         ifp->if_collisions =
631             priv->stats.pport.collisions;
632 #endif
633
634 free_out:
635         kvfree(out);
636
637         /* Update diagnostics, if any */
638         if (priv->params_ethtool.diag_pci_enable ||
639             priv->params_ethtool.diag_general_enable) {
640                 int error = mlx5_core_get_diagnostics_full(mdev,
641                     priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
642                     priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
643                 if (error != 0)
644                         if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error);
645         }
646         PRIV_UNLOCK(priv);
647 }
648
649 static void
650 mlx5e_update_stats(void *arg)
651 {
652         struct mlx5e_priv *priv = arg;
653
654         queue_work(priv->wq, &priv->update_stats_work);
655
656         callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
657 }
658
659 static void
660 mlx5e_async_event_sub(struct mlx5e_priv *priv,
661     enum mlx5_dev_event event)
662 {
663         switch (event) {
664         case MLX5_DEV_EVENT_PORT_UP:
665         case MLX5_DEV_EVENT_PORT_DOWN:
666                 queue_work(priv->wq, &priv->update_carrier_work);
667                 break;
668
669         default:
670                 break;
671         }
672 }
673
674 static void
675 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
676     enum mlx5_dev_event event, unsigned long param)
677 {
678         struct mlx5e_priv *priv = vpriv;
679
680         mtx_lock(&priv->async_events_mtx);
681         if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
682                 mlx5e_async_event_sub(priv, event);
683         mtx_unlock(&priv->async_events_mtx);
684 }
685
686 static void
687 mlx5e_enable_async_events(struct mlx5e_priv *priv)
688 {
689         set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
690 }
691
692 static void
693 mlx5e_disable_async_events(struct mlx5e_priv *priv)
694 {
695         mtx_lock(&priv->async_events_mtx);
696         clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
697         mtx_unlock(&priv->async_events_mtx);
698 }
699
700 static void mlx5e_calibration_callout(void *arg);
701 static int mlx5e_calibration_duration = 20;
702 static int mlx5e_fast_calibration = 1;
703 static int mlx5e_normal_calibration = 30;
704
705 static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW, 0,
706     "MLX5 timestamp calibration parameteres");
707
708 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN,
709     &mlx5e_calibration_duration, 0,
710     "Duration of initial calibration");
711 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN,
712     &mlx5e_fast_calibration, 0,
713     "Recalibration interval during initial calibration");
714 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN,
715     &mlx5e_normal_calibration, 0,
716     "Recalibration interval during normal operations");
717
718 /*
719  * Ignites the calibration process.
720  */
721 static void
722 mlx5e_reset_calibration_callout(struct mlx5e_priv *priv)
723 {
724
725         if (priv->clbr_done == 0)
726                 mlx5e_calibration_callout(priv);
727         else
728                 callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done <
729                     mlx5e_calibration_duration ? mlx5e_fast_calibration :
730                     mlx5e_normal_calibration) * hz, mlx5e_calibration_callout,
731                     priv);
732 }
733
734 static uint64_t
735 mlx5e_timespec2usec(const struct timespec *ts)
736 {
737
738         return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec);
739 }
740
741 static uint64_t
742 mlx5e_hw_clock(struct mlx5e_priv *priv)
743 {
744         struct mlx5_init_seg *iseg;
745         uint32_t hw_h, hw_h1, hw_l;
746
747         iseg = priv->mdev->iseg;
748         do {
749                 hw_h = ioread32be(&iseg->internal_timer_h);
750                 hw_l = ioread32be(&iseg->internal_timer_l);
751                 hw_h1 = ioread32be(&iseg->internal_timer_h);
752         } while (hw_h1 != hw_h);
753         return (((uint64_t)hw_h << 32) | hw_l);
754 }
755
756 /*
757  * The calibration callout, it runs either in the context of the
758  * thread which enables calibration, or in callout.  It takes the
759  * snapshot of system and adapter clocks, then advances the pointers to
760  * the calibration point to allow rx path to read the consistent data
761  * lockless.
762  */
763 static void
764 mlx5e_calibration_callout(void *arg)
765 {
766         struct mlx5e_priv *priv;
767         struct mlx5e_clbr_point *next, *curr;
768         struct timespec ts;
769         int clbr_curr_next;
770
771         priv = arg;
772         curr = &priv->clbr_points[priv->clbr_curr];
773         clbr_curr_next = priv->clbr_curr + 1;
774         if (clbr_curr_next >= nitems(priv->clbr_points))
775                 clbr_curr_next = 0;
776         next = &priv->clbr_points[clbr_curr_next];
777
778         next->base_prev = curr->base_curr;
779         next->clbr_hw_prev = curr->clbr_hw_curr;
780
781         next->clbr_hw_curr = mlx5e_hw_clock(priv);
782         if (((next->clbr_hw_curr - curr->clbr_hw_prev) >> MLX5E_TSTMP_PREC) ==
783             0) {
784                 if_printf(priv->ifp, "HW failed tstmp frozen %#jx %#jx,"
785                     "disabling\n", next->clbr_hw_curr, curr->clbr_hw_prev);
786                 priv->clbr_done = 0;
787                 return;
788         }
789
790         nanouptime(&ts);
791         next->base_curr = mlx5e_timespec2usec(&ts);
792
793         curr->clbr_gen = 0;
794         atomic_thread_fence_rel();
795         priv->clbr_curr = clbr_curr_next;
796         atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen));
797
798         if (priv->clbr_done < mlx5e_calibration_duration)
799                 priv->clbr_done++;
800         mlx5e_reset_calibration_callout(priv);
801 }
802
803 static const char *mlx5e_rq_stats_desc[] = {
804         MLX5E_RQ_STATS(MLX5E_STATS_DESC)
805 };
806
807 static int
808 mlx5e_create_rq(struct mlx5e_channel *c,
809     struct mlx5e_rq_param *param,
810     struct mlx5e_rq *rq)
811 {
812         struct mlx5e_priv *priv = c->priv;
813         struct mlx5_core_dev *mdev = priv->mdev;
814         char buffer[16];
815         void *rqc = param->rqc;
816         void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
817         int wq_sz;
818         int err;
819         int i;
820         u32 nsegs, wqe_sz;
821
822         err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
823         if (err != 0)
824                 goto done;
825
826         /* Create DMA descriptor TAG */
827         if ((err = -bus_dma_tag_create(
828             bus_get_dma_tag(mdev->pdev->dev.bsddev),
829             1,                          /* any alignment */
830             0,                          /* no boundary */
831             BUS_SPACE_MAXADDR,          /* lowaddr */
832             BUS_SPACE_MAXADDR,          /* highaddr */
833             NULL, NULL,                 /* filter, filterarg */
834             nsegs * MLX5E_MAX_RX_BYTES, /* maxsize */
835             nsegs,                      /* nsegments */
836             nsegs * MLX5E_MAX_RX_BYTES, /* maxsegsize */
837             0,                          /* flags */
838             NULL, NULL,                 /* lockfunc, lockfuncarg */
839             &rq->dma_tag)))
840                 goto done;
841
842         err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
843             &rq->wq_ctrl);
844         if (err)
845                 goto err_free_dma_tag;
846
847         rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
848
849         err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs);
850         if (err != 0)
851                 goto err_rq_wq_destroy;
852
853         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
854
855         err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz);
856         if (err)
857                 goto err_rq_wq_destroy;
858
859         rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
860         for (i = 0; i != wq_sz; i++) {
861                 struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
862 #if (MLX5E_MAX_RX_SEGS == 1)
863                 uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
864 #else
865                 int j;
866 #endif
867
868                 err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
869                 if (err != 0) {
870                         while (i--)
871                                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
872                         goto err_rq_mbuf_free;
873                 }
874
875                 /* set value for constant fields */
876 #if (MLX5E_MAX_RX_SEGS == 1)
877                 wqe->data[0].lkey = c->mkey_be;
878                 wqe->data[0].byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
879 #else
880                 for (j = 0; j < rq->nsegs; j++)
881                         wqe->data[j].lkey = c->mkey_be;
882 #endif
883         }
884
885         rq->ifp = c->ifp;
886         rq->channel = c;
887         rq->ix = c->ix;
888
889         snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
890         mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
891             buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
892             rq->stats.arg);
893         return (0);
894
895 err_rq_mbuf_free:
896         free(rq->mbuf, M_MLX5EN);
897         tcp_lro_free(&rq->lro);
898 err_rq_wq_destroy:
899         mlx5_wq_destroy(&rq->wq_ctrl);
900 err_free_dma_tag:
901         bus_dma_tag_destroy(rq->dma_tag);
902 done:
903         return (err);
904 }
905
906 static void
907 mlx5e_destroy_rq(struct mlx5e_rq *rq)
908 {
909         int wq_sz;
910         int i;
911
912         /* destroy all sysctl nodes */
913         sysctl_ctx_free(&rq->stats.ctx);
914
915         /* free leftover LRO packets, if any */
916         tcp_lro_free(&rq->lro);
917
918         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
919         for (i = 0; i != wq_sz; i++) {
920                 if (rq->mbuf[i].mbuf != NULL) {
921                         bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
922                         m_freem(rq->mbuf[i].mbuf);
923                 }
924                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
925         }
926         free(rq->mbuf, M_MLX5EN);
927         mlx5_wq_destroy(&rq->wq_ctrl);
928 }
929
930 static int
931 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
932 {
933         struct mlx5e_channel *c = rq->channel;
934         struct mlx5e_priv *priv = c->priv;
935         struct mlx5_core_dev *mdev = priv->mdev;
936
937         void *in;
938         void *rqc;
939         void *wq;
940         int inlen;
941         int err;
942
943         inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
944             sizeof(u64) * rq->wq_ctrl.buf.npages;
945         in = mlx5_vzalloc(inlen);
946         if (in == NULL)
947                 return (-ENOMEM);
948
949         rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
950         wq = MLX5_ADDR_OF(rqc, rqc, wq);
951
952         memcpy(rqc, param->rqc, sizeof(param->rqc));
953
954         MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
955         MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
956         MLX5_SET(rqc, rqc, flush_in_error_en, 1);
957         if (priv->counter_set_id >= 0)
958                 MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
959         MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
960             PAGE_SHIFT);
961         MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
962
963         mlx5_fill_page_array(&rq->wq_ctrl.buf,
964             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
965
966         err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
967
968         kvfree(in);
969
970         return (err);
971 }
972
973 static int
974 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
975 {
976         struct mlx5e_channel *c = rq->channel;
977         struct mlx5e_priv *priv = c->priv;
978         struct mlx5_core_dev *mdev = priv->mdev;
979
980         void *in;
981         void *rqc;
982         int inlen;
983         int err;
984
985         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
986         in = mlx5_vzalloc(inlen);
987         if (in == NULL)
988                 return (-ENOMEM);
989
990         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
991
992         MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
993         MLX5_SET(modify_rq_in, in, rq_state, curr_state);
994         MLX5_SET(rqc, rqc, state, next_state);
995
996         err = mlx5_core_modify_rq(mdev, in, inlen);
997
998         kvfree(in);
999
1000         return (err);
1001 }
1002
1003 static void
1004 mlx5e_disable_rq(struct mlx5e_rq *rq)
1005 {
1006         struct mlx5e_channel *c = rq->channel;
1007         struct mlx5e_priv *priv = c->priv;
1008         struct mlx5_core_dev *mdev = priv->mdev;
1009
1010         mlx5_core_destroy_rq(mdev, rq->rqn);
1011 }
1012
1013 static int
1014 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
1015 {
1016         struct mlx5e_channel *c = rq->channel;
1017         struct mlx5e_priv *priv = c->priv;
1018         struct mlx5_wq_ll *wq = &rq->wq;
1019         int i;
1020
1021         for (i = 0; i < 1000; i++) {
1022                 if (wq->cur_sz >= priv->params.min_rx_wqes)
1023                         return (0);
1024
1025                 msleep(4);
1026         }
1027         return (-ETIMEDOUT);
1028 }
1029
1030 static int
1031 mlx5e_open_rq(struct mlx5e_channel *c,
1032     struct mlx5e_rq_param *param,
1033     struct mlx5e_rq *rq)
1034 {
1035         int err;
1036
1037         err = mlx5e_create_rq(c, param, rq);
1038         if (err)
1039                 return (err);
1040
1041         err = mlx5e_enable_rq(rq, param);
1042         if (err)
1043                 goto err_destroy_rq;
1044
1045         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
1046         if (err)
1047                 goto err_disable_rq;
1048
1049         c->rq.enabled = 1;
1050
1051         return (0);
1052
1053 err_disable_rq:
1054         mlx5e_disable_rq(rq);
1055 err_destroy_rq:
1056         mlx5e_destroy_rq(rq);
1057
1058         return (err);
1059 }
1060
1061 static void
1062 mlx5e_close_rq(struct mlx5e_rq *rq)
1063 {
1064         mtx_lock(&rq->mtx);
1065         rq->enabled = 0;
1066         callout_stop(&rq->watchdog);
1067         mtx_unlock(&rq->mtx);
1068
1069         callout_drain(&rq->watchdog);
1070
1071         mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
1072 }
1073
1074 static void
1075 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
1076 {
1077         struct mlx5_core_dev *mdev = rq->channel->priv->mdev;
1078
1079         /* wait till RQ is empty */
1080         while (!mlx5_wq_ll_is_empty(&rq->wq) &&
1081                (mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
1082                 msleep(4);
1083                 rq->cq.mcq.comp(&rq->cq.mcq);
1084         }
1085
1086         mlx5e_disable_rq(rq);
1087         mlx5e_destroy_rq(rq);
1088 }
1089
1090 void
1091 mlx5e_free_sq_db(struct mlx5e_sq *sq)
1092 {
1093         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1094         int x;
1095
1096         for (x = 0; x != wq_sz; x++)
1097                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1098         free(sq->mbuf, M_MLX5EN);
1099 }
1100
1101 int
1102 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
1103 {
1104         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1105         int err;
1106         int x;
1107
1108         sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
1109
1110         /* Create DMA descriptor MAPs */
1111         for (x = 0; x != wq_sz; x++) {
1112                 err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
1113                 if (err != 0) {
1114                         while (x--)
1115                                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1116                         free(sq->mbuf, M_MLX5EN);
1117                         return (err);
1118                 }
1119         }
1120         return (0);
1121 }
1122
1123 static const char *mlx5e_sq_stats_desc[] = {
1124         MLX5E_SQ_STATS(MLX5E_STATS_DESC)
1125 };
1126
1127 static int
1128 mlx5e_create_sq(struct mlx5e_channel *c,
1129     int tc,
1130     struct mlx5e_sq_param *param,
1131     struct mlx5e_sq *sq)
1132 {
1133         struct mlx5e_priv *priv = c->priv;
1134         struct mlx5_core_dev *mdev = priv->mdev;
1135         char buffer[16];
1136         void *sqc = param->sqc;
1137         void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
1138         int err;
1139
1140         /* Create DMA descriptor TAG */
1141         if ((err = -bus_dma_tag_create(
1142             bus_get_dma_tag(mdev->pdev->dev.bsddev),
1143             1,                          /* any alignment */
1144             0,                          /* no boundary */
1145             BUS_SPACE_MAXADDR,          /* lowaddr */
1146             BUS_SPACE_MAXADDR,          /* highaddr */
1147             NULL, NULL,                 /* filter, filterarg */
1148             MLX5E_MAX_TX_PAYLOAD_SIZE,  /* maxsize */
1149             MLX5E_MAX_TX_MBUF_FRAGS,    /* nsegments */
1150             MLX5E_MAX_TX_MBUF_SIZE,     /* maxsegsize */
1151             0,                          /* flags */
1152             NULL, NULL,                 /* lockfunc, lockfuncarg */
1153             &sq->dma_tag)))
1154                 goto done;
1155
1156         err = mlx5_alloc_map_uar(mdev, &sq->uar);
1157         if (err)
1158                 goto err_free_dma_tag;
1159
1160         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
1161             &sq->wq_ctrl);
1162         if (err)
1163                 goto err_unmap_free_uar;
1164
1165         sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1166         sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
1167
1168         err = mlx5e_alloc_sq_db(sq);
1169         if (err)
1170                 goto err_sq_wq_destroy;
1171
1172         sq->mkey_be = c->mkey_be;
1173         sq->ifp = priv->ifp;
1174         sq->priv = priv;
1175         sq->tc = tc;
1176         sq->max_inline = priv->params.tx_max_inline;
1177         sq->min_inline_mode = priv->params.tx_min_inline_mode;
1178         sq->vlan_inline_cap = MLX5_CAP_ETH(mdev, wqe_vlan_insert);
1179
1180         snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1181         mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1182             buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1183             sq->stats.arg);
1184
1185         return (0);
1186
1187 err_sq_wq_destroy:
1188         mlx5_wq_destroy(&sq->wq_ctrl);
1189
1190 err_unmap_free_uar:
1191         mlx5_unmap_free_uar(mdev, &sq->uar);
1192
1193 err_free_dma_tag:
1194         bus_dma_tag_destroy(sq->dma_tag);
1195 done:
1196         return (err);
1197 }
1198
1199 static void
1200 mlx5e_destroy_sq(struct mlx5e_sq *sq)
1201 {
1202         /* destroy all sysctl nodes */
1203         sysctl_ctx_free(&sq->stats.ctx);
1204
1205         mlx5e_free_sq_db(sq);
1206         mlx5_wq_destroy(&sq->wq_ctrl);
1207         mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
1208 }
1209
1210 int
1211 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
1212     int tis_num)
1213 {
1214         void *in;
1215         void *sqc;
1216         void *wq;
1217         int inlen;
1218         int err;
1219
1220         inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1221             sizeof(u64) * sq->wq_ctrl.buf.npages;
1222         in = mlx5_vzalloc(inlen);
1223         if (in == NULL)
1224                 return (-ENOMEM);
1225
1226         sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1227         wq = MLX5_ADDR_OF(sqc, sqc, wq);
1228
1229         memcpy(sqc, param->sqc, sizeof(param->sqc));
1230
1231         MLX5_SET(sqc, sqc, tis_num_0, tis_num);
1232         MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
1233         MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1234         MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1235         MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1236
1237         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1238         MLX5_SET(wq, wq, uar_page, sq->uar.index);
1239         MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1240             PAGE_SHIFT);
1241         MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1242
1243         mlx5_fill_page_array(&sq->wq_ctrl.buf,
1244             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1245
1246         err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
1247
1248         kvfree(in);
1249
1250         return (err);
1251 }
1252
1253 int
1254 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1255 {
1256         void *in;
1257         void *sqc;
1258         int inlen;
1259         int err;
1260
1261         inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1262         in = mlx5_vzalloc(inlen);
1263         if (in == NULL)
1264                 return (-ENOMEM);
1265
1266         sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1267
1268         MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1269         MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1270         MLX5_SET(sqc, sqc, state, next_state);
1271
1272         err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
1273
1274         kvfree(in);
1275
1276         return (err);
1277 }
1278
1279 void
1280 mlx5e_disable_sq(struct mlx5e_sq *sq)
1281 {
1282
1283         mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
1284 }
1285
1286 static int
1287 mlx5e_open_sq(struct mlx5e_channel *c,
1288     int tc,
1289     struct mlx5e_sq_param *param,
1290     struct mlx5e_sq *sq)
1291 {
1292         int err;
1293
1294         err = mlx5e_create_sq(c, tc, param, sq);
1295         if (err)
1296                 return (err);
1297
1298         err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
1299         if (err)
1300                 goto err_destroy_sq;
1301
1302         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1303         if (err)
1304                 goto err_disable_sq;
1305
1306         return (0);
1307
1308 err_disable_sq:
1309         mlx5e_disable_sq(sq);
1310 err_destroy_sq:
1311         mlx5e_destroy_sq(sq);
1312
1313         return (err);
1314 }
1315
1316 static void
1317 mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1318 {
1319         /* fill up remainder with NOPs */
1320         while (sq->cev_counter != 0) {
1321                 while (!mlx5e_sq_has_room_for(sq, 1)) {
1322                         if (can_sleep != 0) {
1323                                 mtx_unlock(&sq->lock);
1324                                 msleep(4);
1325                                 mtx_lock(&sq->lock);
1326                         } else {
1327                                 goto done;
1328                         }
1329                 }
1330                 /* send a single NOP */
1331                 mlx5e_send_nop(sq, 1);
1332                 atomic_thread_fence_rel();
1333         }
1334 done:
1335         /* Check if we need to write the doorbell */
1336         if (likely(sq->doorbell.d64 != 0)) {
1337                 mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1338                 sq->doorbell.d64 = 0;
1339         }
1340 }
1341
1342 void
1343 mlx5e_sq_cev_timeout(void *arg)
1344 {
1345         struct mlx5e_sq *sq = arg;
1346
1347         mtx_assert(&sq->lock, MA_OWNED);
1348
1349         /* check next state */
1350         switch (sq->cev_next_state) {
1351         case MLX5E_CEV_STATE_SEND_NOPS:
1352                 /* fill TX ring with NOPs, if any */
1353                 mlx5e_sq_send_nops_locked(sq, 0);
1354
1355                 /* check if completed */
1356                 if (sq->cev_counter == 0) {
1357                         sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1358                         return;
1359                 }
1360                 break;
1361         default:
1362                 /* send NOPs on next timeout */
1363                 sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1364                 break;
1365         }
1366
1367         /* restart timer */
1368         callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1369 }
1370
1371 void
1372 mlx5e_drain_sq(struct mlx5e_sq *sq)
1373 {
1374         int error;
1375         struct mlx5_core_dev *mdev= sq->priv->mdev;
1376
1377         /*
1378          * Check if already stopped.
1379          *
1380          * NOTE: The "stopped" variable is only written when both the
1381          * priv's configuration lock and the SQ's lock is locked. It
1382          * can therefore safely be read when only one of the two locks
1383          * is locked. This function is always called when the priv's
1384          * configuration lock is locked.
1385          */
1386         if (sq->stopped != 0)
1387                 return;
1388
1389         mtx_lock(&sq->lock);
1390
1391         /* don't put more packets into the SQ */
1392         sq->stopped = 1;
1393
1394         /* teardown event factor timer, if any */
1395         sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1396         callout_stop(&sq->cev_callout);
1397
1398         /* send dummy NOPs in order to flush the transmit ring */
1399         mlx5e_sq_send_nops_locked(sq, 1);
1400         mtx_unlock(&sq->lock);
1401
1402         /* make sure it is safe to free the callout */
1403         callout_drain(&sq->cev_callout);
1404
1405         /* wait till SQ is empty or link is down */
1406         mtx_lock(&sq->lock);
1407         while (sq->cc != sq->pc &&
1408             (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
1409             mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1410                 mtx_unlock(&sq->lock);
1411                 msleep(1);
1412                 sq->cq.mcq.comp(&sq->cq.mcq);
1413                 mtx_lock(&sq->lock);
1414         }
1415         mtx_unlock(&sq->lock);
1416
1417         /* error out remaining requests */
1418         error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1419         if (error != 0) {
1420                 if_printf(sq->ifp,
1421                     "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
1422         }
1423
1424         /* wait till SQ is empty */
1425         mtx_lock(&sq->lock);
1426         while (sq->cc != sq->pc &&
1427                mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1428                 mtx_unlock(&sq->lock);
1429                 msleep(1);
1430                 sq->cq.mcq.comp(&sq->cq.mcq);
1431                 mtx_lock(&sq->lock);
1432         }
1433         mtx_unlock(&sq->lock);
1434 }
1435
1436 static void
1437 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1438 {
1439
1440         mlx5e_drain_sq(sq);
1441         mlx5e_disable_sq(sq);
1442         mlx5e_destroy_sq(sq);
1443 }
1444
1445 static int
1446 mlx5e_create_cq(struct mlx5e_priv *priv,
1447     struct mlx5e_cq_param *param,
1448     struct mlx5e_cq *cq,
1449     mlx5e_cq_comp_t *comp,
1450     int eq_ix)
1451 {
1452         struct mlx5_core_dev *mdev = priv->mdev;
1453         struct mlx5_core_cq *mcq = &cq->mcq;
1454         int eqn_not_used;
1455         int irqn;
1456         int err;
1457         u32 i;
1458
1459         param->wq.buf_numa_node = 0;
1460         param->wq.db_numa_node = 0;
1461
1462         err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1463             &cq->wq_ctrl);
1464         if (err)
1465                 return (err);
1466
1467         mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
1468
1469         mcq->cqe_sz = 64;
1470         mcq->set_ci_db = cq->wq_ctrl.db.db;
1471         mcq->arm_db = cq->wq_ctrl.db.db + 1;
1472         *mcq->set_ci_db = 0;
1473         *mcq->arm_db = 0;
1474         mcq->vector = eq_ix;
1475         mcq->comp = comp;
1476         mcq->event = mlx5e_cq_error_event;
1477         mcq->irqn = irqn;
1478         mcq->uar = &priv->cq_uar;
1479
1480         for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1481                 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1482
1483                 cqe->op_own = 0xf1;
1484         }
1485
1486         cq->priv = priv;
1487
1488         return (0);
1489 }
1490
1491 static void
1492 mlx5e_destroy_cq(struct mlx5e_cq *cq)
1493 {
1494         mlx5_wq_destroy(&cq->wq_ctrl);
1495 }
1496
1497 static int
1498 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
1499 {
1500         struct mlx5_core_cq *mcq = &cq->mcq;
1501         void *in;
1502         void *cqc;
1503         int inlen;
1504         int irqn_not_used;
1505         int eqn;
1506         int err;
1507
1508         inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1509             sizeof(u64) * cq->wq_ctrl.buf.npages;
1510         in = mlx5_vzalloc(inlen);
1511         if (in == NULL)
1512                 return (-ENOMEM);
1513
1514         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1515
1516         memcpy(cqc, param->cqc, sizeof(param->cqc));
1517
1518         mlx5_fill_page_array(&cq->wq_ctrl.buf,
1519             (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1520
1521         mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
1522
1523         MLX5_SET(cqc, cqc, c_eqn, eqn);
1524         MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1525         MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1526             PAGE_SHIFT);
1527         MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1528
1529         err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
1530
1531         kvfree(in);
1532
1533         if (err)
1534                 return (err);
1535
1536         mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
1537
1538         return (0);
1539 }
1540
1541 static void
1542 mlx5e_disable_cq(struct mlx5e_cq *cq)
1543 {
1544
1545         mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
1546 }
1547
1548 int
1549 mlx5e_open_cq(struct mlx5e_priv *priv,
1550     struct mlx5e_cq_param *param,
1551     struct mlx5e_cq *cq,
1552     mlx5e_cq_comp_t *comp,
1553     int eq_ix)
1554 {
1555         int err;
1556
1557         err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
1558         if (err)
1559                 return (err);
1560
1561         err = mlx5e_enable_cq(cq, param, eq_ix);
1562         if (err)
1563                 goto err_destroy_cq;
1564
1565         return (0);
1566
1567 err_destroy_cq:
1568         mlx5e_destroy_cq(cq);
1569
1570         return (err);
1571 }
1572
1573 void
1574 mlx5e_close_cq(struct mlx5e_cq *cq)
1575 {
1576         mlx5e_disable_cq(cq);
1577         mlx5e_destroy_cq(cq);
1578 }
1579
1580 static int
1581 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1582     struct mlx5e_channel_param *cparam)
1583 {
1584         int err;
1585         int tc;
1586
1587         for (tc = 0; tc < c->num_tc; tc++) {
1588                 /* open completion queue */
1589                 err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
1590                     &mlx5e_tx_cq_comp, c->ix);
1591                 if (err)
1592                         goto err_close_tx_cqs;
1593         }
1594         return (0);
1595
1596 err_close_tx_cqs:
1597         for (tc--; tc >= 0; tc--)
1598                 mlx5e_close_cq(&c->sq[tc].cq);
1599
1600         return (err);
1601 }
1602
1603 static void
1604 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1605 {
1606         int tc;
1607
1608         for (tc = 0; tc < c->num_tc; tc++)
1609                 mlx5e_close_cq(&c->sq[tc].cq);
1610 }
1611
1612 static int
1613 mlx5e_open_sqs(struct mlx5e_channel *c,
1614     struct mlx5e_channel_param *cparam)
1615 {
1616         int err;
1617         int tc;
1618
1619         for (tc = 0; tc < c->num_tc; tc++) {
1620                 err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1621                 if (err)
1622                         goto err_close_sqs;
1623         }
1624
1625         return (0);
1626
1627 err_close_sqs:
1628         for (tc--; tc >= 0; tc--)
1629                 mlx5e_close_sq_wait(&c->sq[tc]);
1630
1631         return (err);
1632 }
1633
1634 static void
1635 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1636 {
1637         int tc;
1638
1639         for (tc = 0; tc < c->num_tc; tc++)
1640                 mlx5e_close_sq_wait(&c->sq[tc]);
1641 }
1642
1643 static void
1644 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1645 {
1646         int tc;
1647
1648         mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1649
1650         callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
1651
1652         for (tc = 0; tc < c->num_tc; tc++) {
1653                 struct mlx5e_sq *sq = c->sq + tc;
1654
1655                 mtx_init(&sq->lock, "mlx5tx",
1656                     MTX_NETWORK_LOCK " TX", MTX_DEF);
1657                 mtx_init(&sq->comp_lock, "mlx5comp",
1658                     MTX_NETWORK_LOCK " TX", MTX_DEF);
1659
1660                 callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1661
1662                 sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1663
1664                 /* ensure the TX completion event factor is not zero */
1665                 if (sq->cev_factor == 0)
1666                         sq->cev_factor = 1;
1667         }
1668 }
1669
1670 static void
1671 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1672 {
1673         int tc;
1674
1675         mtx_destroy(&c->rq.mtx);
1676
1677         for (tc = 0; tc < c->num_tc; tc++) {
1678                 mtx_destroy(&c->sq[tc].lock);
1679                 mtx_destroy(&c->sq[tc].comp_lock);
1680         }
1681 }
1682
1683 static int
1684 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1685     struct mlx5e_channel_param *cparam,
1686     struct mlx5e_channel *volatile *cp)
1687 {
1688         struct mlx5e_channel *c;
1689         int err;
1690
1691         c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1692         c->priv = priv;
1693         c->ix = ix;
1694         c->cpu = 0;
1695         c->ifp = priv->ifp;
1696         c->mkey_be = cpu_to_be32(priv->mr.key);
1697         c->num_tc = priv->num_tc;
1698
1699         /* init mutexes */
1700         mlx5e_chan_mtx_init(c);
1701
1702         /* open transmit completion queue */
1703         err = mlx5e_open_tx_cqs(c, cparam);
1704         if (err)
1705                 goto err_free;
1706
1707         /* open receive completion queue */
1708         err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
1709             &mlx5e_rx_cq_comp, c->ix);
1710         if (err)
1711                 goto err_close_tx_cqs;
1712
1713         err = mlx5e_open_sqs(c, cparam);
1714         if (err)
1715                 goto err_close_rx_cq;
1716
1717         err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1718         if (err)
1719                 goto err_close_sqs;
1720
1721         /* store channel pointer */
1722         *cp = c;
1723
1724         /* poll receive queue initially */
1725         c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1726
1727         return (0);
1728
1729 err_close_sqs:
1730         mlx5e_close_sqs_wait(c);
1731
1732 err_close_rx_cq:
1733         mlx5e_close_cq(&c->rq.cq);
1734
1735 err_close_tx_cqs:
1736         mlx5e_close_tx_cqs(c);
1737
1738 err_free:
1739         /* destroy mutexes */
1740         mlx5e_chan_mtx_destroy(c);
1741         free(c, M_MLX5EN);
1742         return (err);
1743 }
1744
1745 static void
1746 mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1747 {
1748         struct mlx5e_channel *c = *pp;
1749
1750         /* check if channel is already closed */
1751         if (c == NULL)
1752                 return;
1753         mlx5e_close_rq(&c->rq);
1754 }
1755
1756 static void
1757 mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1758 {
1759         struct mlx5e_channel *c = *pp;
1760
1761         /* check if channel is already closed */
1762         if (c == NULL)
1763                 return;
1764         /* ensure channel pointer is no longer used */
1765         *pp = NULL;
1766
1767         mlx5e_close_rq_wait(&c->rq);
1768         mlx5e_close_sqs_wait(c);
1769         mlx5e_close_cq(&c->rq.cq);
1770         mlx5e_close_tx_cqs(c);
1771         /* destroy mutexes */
1772         mlx5e_chan_mtx_destroy(c);
1773         free(c, M_MLX5EN);
1774 }
1775
1776 static int
1777 mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs)
1778 {
1779         u32 r, n;
1780
1781         r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz :
1782             MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
1783         if (r > MJUM16BYTES)
1784                 return (-ENOMEM);
1785
1786         if (r > MJUM9BYTES)
1787                 r = MJUM16BYTES;
1788         else if (r > MJUMPAGESIZE)
1789                 r = MJUM9BYTES;
1790         else if (r > MCLBYTES)
1791                 r = MJUMPAGESIZE;
1792         else
1793                 r = MCLBYTES;
1794
1795         /*
1796          * n + 1 must be a power of two, because stride size must be.
1797          * Stride size is 16 * (n + 1), as the first segment is
1798          * control.
1799          */
1800         for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++)
1801                 ;
1802
1803         *wqe_sz = r;
1804         *nsegs = n;
1805         return (0);
1806 }
1807
1808 static void
1809 mlx5e_build_rq_param(struct mlx5e_priv *priv,
1810     struct mlx5e_rq_param *param)
1811 {
1812         void *rqc = param->rqc;
1813         void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1814         u32 wqe_sz, nsegs;
1815
1816         mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
1817         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1818         MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1819         MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) +
1820             nsegs * sizeof(struct mlx5_wqe_data_seg)));
1821         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1822         MLX5_SET(wq, wq, pd, priv->pdn);
1823
1824         param->wq.buf_numa_node = 0;
1825         param->wq.db_numa_node = 0;
1826         param->wq.linear = 1;
1827 }
1828
1829 static void
1830 mlx5e_build_sq_param(struct mlx5e_priv *priv,
1831     struct mlx5e_sq_param *param)
1832 {
1833         void *sqc = param->sqc;
1834         void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1835
1836         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1837         MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1838         MLX5_SET(wq, wq, pd, priv->pdn);
1839
1840         param->wq.buf_numa_node = 0;
1841         param->wq.db_numa_node = 0;
1842         param->wq.linear = 1;
1843 }
1844
1845 static void
1846 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1847     struct mlx5e_cq_param *param)
1848 {
1849         void *cqc = param->cqc;
1850
1851         MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1852 }
1853
1854 static void
1855 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1856     struct mlx5e_cq_param *param)
1857 {
1858         void *cqc = param->cqc;
1859
1860
1861         /*
1862          * TODO The sysctl to control on/off is a bool value for now, which means
1863          * we only support CSUM, once HASH is implemnted we'll need to address that.
1864          */
1865         if (priv->params.cqe_zipping_en) {
1866                 MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1867                 MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1868         }
1869
1870         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1871         MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1872         MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1873
1874         switch (priv->params.rx_cq_moderation_mode) {
1875         case 0:
1876                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1877                 break;
1878         default:
1879                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1880                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1881                 else
1882                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1883                 break;
1884         }
1885
1886         mlx5e_build_common_cq_param(priv, param);
1887 }
1888
1889 static void
1890 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1891     struct mlx5e_cq_param *param)
1892 {
1893         void *cqc = param->cqc;
1894
1895         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1896         MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1897         MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1898
1899         switch (priv->params.tx_cq_moderation_mode) {
1900         case 0:
1901                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1902                 break;
1903         default:
1904                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1905                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1906                 else
1907                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1908                 break;
1909         }
1910
1911         mlx5e_build_common_cq_param(priv, param);
1912 }
1913
1914 static void
1915 mlx5e_build_channel_param(struct mlx5e_priv *priv,
1916     struct mlx5e_channel_param *cparam)
1917 {
1918         memset(cparam, 0, sizeof(*cparam));
1919
1920         mlx5e_build_rq_param(priv, &cparam->rq);
1921         mlx5e_build_sq_param(priv, &cparam->sq);
1922         mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1923         mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1924 }
1925
1926 static int
1927 mlx5e_open_channels(struct mlx5e_priv *priv)
1928 {
1929         struct mlx5e_channel_param cparam;
1930         void *ptr;
1931         int err;
1932         int i;
1933         int j;
1934
1935         priv->channel = malloc(priv->params.num_channels *
1936             sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
1937
1938         mlx5e_build_channel_param(priv, &cparam);
1939         for (i = 0; i < priv->params.num_channels; i++) {
1940                 err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1941                 if (err)
1942                         goto err_close_channels;
1943         }
1944
1945         for (j = 0; j < priv->params.num_channels; j++) {
1946                 err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
1947                 if (err)
1948                         goto err_close_channels;
1949         }
1950
1951         return (0);
1952
1953 err_close_channels:
1954         for (i--; i >= 0; i--) {
1955                 mlx5e_close_channel(&priv->channel[i]);
1956                 mlx5e_close_channel_wait(&priv->channel[i]);
1957         }
1958
1959         /* remove "volatile" attribute from "channel" pointer */
1960         ptr = __DECONST(void *, priv->channel);
1961         priv->channel = NULL;
1962
1963         free(ptr, M_MLX5EN);
1964
1965         return (err);
1966 }
1967
1968 static void
1969 mlx5e_close_channels(struct mlx5e_priv *priv)
1970 {
1971         void *ptr;
1972         int i;
1973
1974         if (priv->channel == NULL)
1975                 return;
1976
1977         for (i = 0; i < priv->params.num_channels; i++)
1978                 mlx5e_close_channel(&priv->channel[i]);
1979         for (i = 0; i < priv->params.num_channels; i++)
1980                 mlx5e_close_channel_wait(&priv->channel[i]);
1981
1982         /* remove "volatile" attribute from "channel" pointer */
1983         ptr = __DECONST(void *, priv->channel);
1984         priv->channel = NULL;
1985
1986         free(ptr, M_MLX5EN);
1987 }
1988
1989 static int
1990 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
1991 {
1992
1993         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
1994                 uint8_t cq_mode;
1995
1996                 switch (priv->params.tx_cq_moderation_mode) {
1997                 case 0:
1998                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1999                         break;
2000                 default:
2001                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2002                         break;
2003                 }
2004
2005                 return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
2006                     priv->params.tx_cq_moderation_usec,
2007                     priv->params.tx_cq_moderation_pkts,
2008                     cq_mode));
2009         }
2010
2011         return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
2012             priv->params.tx_cq_moderation_usec,
2013             priv->params.tx_cq_moderation_pkts));
2014 }
2015
2016 static int
2017 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
2018 {
2019
2020         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
2021                 uint8_t cq_mode;
2022                 int retval;
2023
2024                 switch (priv->params.rx_cq_moderation_mode) {
2025                 case 0:
2026                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2027                         break;
2028                 default:
2029                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2030                         break;
2031                 }
2032
2033                 retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
2034                     priv->params.rx_cq_moderation_usec,
2035                     priv->params.rx_cq_moderation_pkts,
2036                     cq_mode);
2037
2038                 return (retval);
2039         }
2040
2041         return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
2042             priv->params.rx_cq_moderation_usec,
2043             priv->params.rx_cq_moderation_pkts));
2044 }
2045
2046 static int
2047 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
2048 {
2049         int err;
2050         int i;
2051
2052         if (c == NULL)
2053                 return (EINVAL);
2054
2055         err = mlx5e_refresh_rq_params(priv, &c->rq);
2056         if (err)
2057                 goto done;
2058
2059         for (i = 0; i != c->num_tc; i++) {
2060                 err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
2061                 if (err)
2062                         goto done;
2063         }
2064 done:
2065         return (err);
2066 }
2067
2068 int
2069 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
2070 {
2071         int i;
2072
2073         if (priv->channel == NULL)
2074                 return (EINVAL);
2075
2076         for (i = 0; i < priv->params.num_channels; i++) {
2077                 int err;
2078
2079                 err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
2080                 if (err)
2081                         return (err);
2082         }
2083         return (0);
2084 }
2085
2086 static int
2087 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
2088 {
2089         struct mlx5_core_dev *mdev = priv->mdev;
2090         u32 in[MLX5_ST_SZ_DW(create_tis_in)];
2091         void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
2092
2093         memset(in, 0, sizeof(in));
2094
2095         MLX5_SET(tisc, tisc, prio, tc);
2096         MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
2097
2098         return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
2099 }
2100
2101 static void
2102 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
2103 {
2104         mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
2105 }
2106
2107 static int
2108 mlx5e_open_tises(struct mlx5e_priv *priv)
2109 {
2110         int num_tc = priv->num_tc;
2111         int err;
2112         int tc;
2113
2114         for (tc = 0; tc < num_tc; tc++) {
2115                 err = mlx5e_open_tis(priv, tc);
2116                 if (err)
2117                         goto err_close_tises;
2118         }
2119
2120         return (0);
2121
2122 err_close_tises:
2123         for (tc--; tc >= 0; tc--)
2124                 mlx5e_close_tis(priv, tc);
2125
2126         return (err);
2127 }
2128
2129 static void
2130 mlx5e_close_tises(struct mlx5e_priv *priv)
2131 {
2132         int num_tc = priv->num_tc;
2133         int tc;
2134
2135         for (tc = 0; tc < num_tc; tc++)
2136                 mlx5e_close_tis(priv, tc);
2137 }
2138
2139 static int
2140 mlx5e_open_rqt(struct mlx5e_priv *priv)
2141 {
2142         struct mlx5_core_dev *mdev = priv->mdev;
2143         u32 *in;
2144         u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
2145         void *rqtc;
2146         int inlen;
2147         int err;
2148         int sz;
2149         int i;
2150
2151         sz = 1 << priv->params.rx_hash_log_tbl_sz;
2152
2153         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
2154         in = mlx5_vzalloc(inlen);
2155         if (in == NULL)
2156                 return (-ENOMEM);
2157         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
2158
2159         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
2160         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
2161
2162         for (i = 0; i < sz; i++) {
2163                 int ix = i;
2164 #ifdef RSS
2165                 ix = rss_get_indirection_to_bucket(ix);
2166 #endif
2167                 /* ensure we don't overflow */
2168                 ix %= priv->params.num_channels;
2169
2170                 /* apply receive side scaling stride, if any */
2171                 ix -= ix % (int)priv->params.channels_rsss;
2172
2173                 MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
2174         }
2175
2176         MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
2177
2178         err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
2179         if (!err)
2180                 priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
2181
2182         kvfree(in);
2183
2184         return (err);
2185 }
2186
2187 static void
2188 mlx5e_close_rqt(struct mlx5e_priv *priv)
2189 {
2190         u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
2191         u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
2192
2193         MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
2194         MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
2195
2196         mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
2197 }
2198
2199 static void
2200 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
2201 {
2202         void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
2203         __be32 *hkey;
2204
2205         MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
2206
2207 #define ROUGH_MAX_L2_L3_HDR_SZ 256
2208
2209 #define MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2210                           MLX5_HASH_FIELD_SEL_DST_IP)
2211
2212 #define MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2213                           MLX5_HASH_FIELD_SEL_DST_IP   |\
2214                           MLX5_HASH_FIELD_SEL_L4_SPORT |\
2215                           MLX5_HASH_FIELD_SEL_L4_DPORT)
2216
2217 #define MLX5_HASH_IP_IPSEC_SPI  (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2218                                  MLX5_HASH_FIELD_SEL_DST_IP   |\
2219                                  MLX5_HASH_FIELD_SEL_IPSEC_SPI)
2220
2221         if (priv->params.hw_lro_en) {
2222                 MLX5_SET(tirc, tirc, lro_enable_mask,
2223                     MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2224                     MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
2225                 MLX5_SET(tirc, tirc, lro_max_msg_sz,
2226                     (priv->params.lro_wqe_sz -
2227                     ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
2228                 /* TODO: add the option to choose timer value dynamically */
2229                 MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
2230                     MLX5_CAP_ETH(priv->mdev,
2231                     lro_timer_supported_periods[2]));
2232         }
2233
2234         /* setup parameters for hashing TIR type, if any */
2235         switch (tt) {
2236         case MLX5E_TT_ANY:
2237                 MLX5_SET(tirc, tirc, disp_type,
2238                     MLX5_TIRC_DISP_TYPE_DIRECT);
2239                 MLX5_SET(tirc, tirc, inline_rqn,
2240                     priv->channel[0]->rq.rqn);
2241                 break;
2242         default:
2243                 MLX5_SET(tirc, tirc, disp_type,
2244                     MLX5_TIRC_DISP_TYPE_INDIRECT);
2245                 MLX5_SET(tirc, tirc, indirect_table,
2246                     priv->rqtn);
2247                 MLX5_SET(tirc, tirc, rx_hash_fn,
2248                     MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2249                 hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2250 #ifdef RSS
2251                 /*
2252                  * The FreeBSD RSS implementation does currently not
2253                  * support symmetric Toeplitz hashes:
2254                  */
2255                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2256                 rss_getkey((uint8_t *)hkey);
2257 #else
2258                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2259                 hkey[0] = cpu_to_be32(0xD181C62C);
2260                 hkey[1] = cpu_to_be32(0xF7F4DB5B);
2261                 hkey[2] = cpu_to_be32(0x1983A2FC);
2262                 hkey[3] = cpu_to_be32(0x943E1ADB);
2263                 hkey[4] = cpu_to_be32(0xD9389E6B);
2264                 hkey[5] = cpu_to_be32(0xD1039C2C);
2265                 hkey[6] = cpu_to_be32(0xA74499AD);
2266                 hkey[7] = cpu_to_be32(0x593D56D9);
2267                 hkey[8] = cpu_to_be32(0xF3253C06);
2268                 hkey[9] = cpu_to_be32(0x2ADC1FFC);
2269 #endif
2270                 break;
2271         }
2272
2273         switch (tt) {
2274         case MLX5E_TT_IPV4_TCP:
2275                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2276                     MLX5_L3_PROT_TYPE_IPV4);
2277                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2278                     MLX5_L4_PROT_TYPE_TCP);
2279 #ifdef RSS
2280                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2281                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2282                             MLX5_HASH_IP);
2283                 } else
2284 #endif
2285                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2286                     MLX5_HASH_ALL);
2287                 break;
2288
2289         case MLX5E_TT_IPV6_TCP:
2290                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2291                     MLX5_L3_PROT_TYPE_IPV6);
2292                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2293                     MLX5_L4_PROT_TYPE_TCP);
2294 #ifdef RSS
2295                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2296                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2297                             MLX5_HASH_IP);
2298                 } else
2299 #endif
2300                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2301                     MLX5_HASH_ALL);
2302                 break;
2303
2304         case MLX5E_TT_IPV4_UDP:
2305                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2306                     MLX5_L3_PROT_TYPE_IPV4);
2307                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2308                     MLX5_L4_PROT_TYPE_UDP);
2309 #ifdef RSS
2310                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2311                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2312                             MLX5_HASH_IP);
2313                 } else
2314 #endif
2315                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2316                     MLX5_HASH_ALL);
2317                 break;
2318
2319         case MLX5E_TT_IPV6_UDP:
2320                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2321                     MLX5_L3_PROT_TYPE_IPV6);
2322                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2323                     MLX5_L4_PROT_TYPE_UDP);
2324 #ifdef RSS
2325                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2326                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2327                             MLX5_HASH_IP);
2328                 } else
2329 #endif
2330                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2331                     MLX5_HASH_ALL);
2332                 break;
2333
2334         case MLX5E_TT_IPV4_IPSEC_AH:
2335                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2336                     MLX5_L3_PROT_TYPE_IPV4);
2337                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2338                     MLX5_HASH_IP_IPSEC_SPI);
2339                 break;
2340
2341         case MLX5E_TT_IPV6_IPSEC_AH:
2342                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2343                     MLX5_L3_PROT_TYPE_IPV6);
2344                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2345                     MLX5_HASH_IP_IPSEC_SPI);
2346                 break;
2347
2348         case MLX5E_TT_IPV4_IPSEC_ESP:
2349                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2350                     MLX5_L3_PROT_TYPE_IPV4);
2351                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2352                     MLX5_HASH_IP_IPSEC_SPI);
2353                 break;
2354
2355         case MLX5E_TT_IPV6_IPSEC_ESP:
2356                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2357                     MLX5_L3_PROT_TYPE_IPV6);
2358                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2359                     MLX5_HASH_IP_IPSEC_SPI);
2360                 break;
2361
2362         case MLX5E_TT_IPV4:
2363                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2364                     MLX5_L3_PROT_TYPE_IPV4);
2365                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2366                     MLX5_HASH_IP);
2367                 break;
2368
2369         case MLX5E_TT_IPV6:
2370                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2371                     MLX5_L3_PROT_TYPE_IPV6);
2372                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2373                     MLX5_HASH_IP);
2374                 break;
2375
2376         default:
2377                 break;
2378         }
2379 }
2380
2381 static int
2382 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2383 {
2384         struct mlx5_core_dev *mdev = priv->mdev;
2385         u32 *in;
2386         void *tirc;
2387         int inlen;
2388         int err;
2389
2390         inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2391         in = mlx5_vzalloc(inlen);
2392         if (in == NULL)
2393                 return (-ENOMEM);
2394         tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2395
2396         mlx5e_build_tir_ctx(priv, tirc, tt);
2397
2398         err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2399
2400         kvfree(in);
2401
2402         return (err);
2403 }
2404
2405 static void
2406 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2407 {
2408         mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2409 }
2410
2411 static int
2412 mlx5e_open_tirs(struct mlx5e_priv *priv)
2413 {
2414         int err;
2415         int i;
2416
2417         for (i = 0; i < MLX5E_NUM_TT; i++) {
2418                 err = mlx5e_open_tir(priv, i);
2419                 if (err)
2420                         goto err_close_tirs;
2421         }
2422
2423         return (0);
2424
2425 err_close_tirs:
2426         for (i--; i >= 0; i--)
2427                 mlx5e_close_tir(priv, i);
2428
2429         return (err);
2430 }
2431
2432 static void
2433 mlx5e_close_tirs(struct mlx5e_priv *priv)
2434 {
2435         int i;
2436
2437         for (i = 0; i < MLX5E_NUM_TT; i++)
2438                 mlx5e_close_tir(priv, i);
2439 }
2440
2441 /*
2442  * SW MTU does not include headers,
2443  * HW MTU includes all headers and checksums.
2444  */
2445 static int
2446 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2447 {
2448         struct mlx5e_priv *priv = ifp->if_softc;
2449         struct mlx5_core_dev *mdev = priv->mdev;
2450         int hw_mtu;
2451         int err;
2452
2453         hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
2454
2455         err = mlx5_set_port_mtu(mdev, hw_mtu);
2456         if (err) {
2457                 if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2458                     __func__, sw_mtu, err);
2459                 return (err);
2460         }
2461
2462         /* Update vport context MTU */
2463         err = mlx5_set_vport_mtu(mdev, hw_mtu);
2464         if (err) {
2465                 if_printf(ifp, "%s: Failed updating vport context with MTU size, err=%d\n",
2466                     __func__, err);
2467         }
2468
2469         ifp->if_mtu = sw_mtu;
2470
2471         err = mlx5_query_vport_mtu(mdev, &hw_mtu);
2472         if (err || !hw_mtu) {
2473                 /* fallback to port oper mtu */
2474                 err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2475         }
2476         if (err) {
2477                 if_printf(ifp, "Query port MTU, after setting new "
2478                     "MTU value, failed\n");
2479                 return (err);
2480         } else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
2481                 err = -E2BIG,
2482                 if_printf(ifp, "Port MTU %d is smaller than "
2483                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2484         } else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
2485                 err = -EINVAL;
2486                 if_printf(ifp, "Port MTU %d is bigger than "
2487                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2488         }
2489         priv->params_ethtool.hw_mtu = hw_mtu;
2490
2491         return (err);
2492 }
2493
2494 int
2495 mlx5e_open_locked(struct ifnet *ifp)
2496 {
2497         struct mlx5e_priv *priv = ifp->if_softc;
2498         int err;
2499         u16 set_id;
2500
2501         /* check if already opened */
2502         if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2503                 return (0);
2504
2505 #ifdef RSS
2506         if (rss_getnumbuckets() > priv->params.num_channels) {
2507                 if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2508                     "channels(%u) available\n", rss_getnumbuckets(),
2509                     priv->params.num_channels);
2510         }
2511 #endif
2512         err = mlx5e_open_tises(priv);
2513         if (err) {
2514                 if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2515                     __func__, err);
2516                 return (err);
2517         }
2518         err = mlx5_vport_alloc_q_counter(priv->mdev,
2519             MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
2520         if (err) {
2521                 if_printf(priv->ifp,
2522                     "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2523                     __func__, err);
2524                 goto err_close_tises;
2525         }
2526         /* store counter set ID */
2527         priv->counter_set_id = set_id;
2528
2529         err = mlx5e_open_channels(priv);
2530         if (err) {
2531                 if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2532                     __func__, err);
2533                 goto err_dalloc_q_counter;
2534         }
2535         err = mlx5e_open_rqt(priv);
2536         if (err) {
2537                 if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2538                     __func__, err);
2539                 goto err_close_channels;
2540         }
2541         err = mlx5e_open_tirs(priv);
2542         if (err) {
2543                 if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2544                     __func__, err);
2545                 goto err_close_rqls;
2546         }
2547         err = mlx5e_open_flow_table(priv);
2548         if (err) {
2549                 if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2550                     __func__, err);
2551                 goto err_close_tirs;
2552         }
2553         err = mlx5e_add_all_vlan_rules(priv);
2554         if (err) {
2555                 if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2556                     __func__, err);
2557                 goto err_close_flow_table;
2558         }
2559         set_bit(MLX5E_STATE_OPENED, &priv->state);
2560
2561         mlx5e_update_carrier(priv);
2562         mlx5e_set_rx_mode_core(priv);
2563
2564         return (0);
2565
2566 err_close_flow_table:
2567         mlx5e_close_flow_table(priv);
2568
2569 err_close_tirs:
2570         mlx5e_close_tirs(priv);
2571
2572 err_close_rqls:
2573         mlx5e_close_rqt(priv);
2574
2575 err_close_channels:
2576         mlx5e_close_channels(priv);
2577
2578 err_dalloc_q_counter:
2579         mlx5_vport_dealloc_q_counter(priv->mdev,
2580             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2581
2582 err_close_tises:
2583         mlx5e_close_tises(priv);
2584
2585         return (err);
2586 }
2587
2588 static void
2589 mlx5e_open(void *arg)
2590 {
2591         struct mlx5e_priv *priv = arg;
2592
2593         PRIV_LOCK(priv);
2594         if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2595                 if_printf(priv->ifp,
2596                     "%s: Setting port status to up failed\n",
2597                     __func__);
2598
2599         mlx5e_open_locked(priv->ifp);
2600         priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2601         PRIV_UNLOCK(priv);
2602 }
2603
2604 int
2605 mlx5e_close_locked(struct ifnet *ifp)
2606 {
2607         struct mlx5e_priv *priv = ifp->if_softc;
2608
2609         /* check if already closed */
2610         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2611                 return (0);
2612
2613         clear_bit(MLX5E_STATE_OPENED, &priv->state);
2614
2615         mlx5e_set_rx_mode_core(priv);
2616         mlx5e_del_all_vlan_rules(priv);
2617         if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2618         mlx5e_close_flow_table(priv);
2619         mlx5e_close_tirs(priv);
2620         mlx5e_close_rqt(priv);
2621         mlx5e_close_channels(priv);
2622         mlx5_vport_dealloc_q_counter(priv->mdev,
2623             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2624         mlx5e_close_tises(priv);
2625
2626         return (0);
2627 }
2628
2629 #if (__FreeBSD_version >= 1100000)
2630 static uint64_t
2631 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2632 {
2633         struct mlx5e_priv *priv = ifp->if_softc;
2634         u64 retval;
2635
2636         /* PRIV_LOCK(priv); XXX not allowed */
2637         switch (cnt) {
2638         case IFCOUNTER_IPACKETS:
2639                 retval = priv->stats.vport.rx_packets;
2640                 break;
2641         case IFCOUNTER_IERRORS:
2642                 retval = priv->stats.vport.rx_error_packets +
2643                     priv->stats.pport.alignment_err +
2644                     priv->stats.pport.check_seq_err +
2645                     priv->stats.pport.crc_align_errors +
2646                     priv->stats.pport.in_range_len_errors +
2647                     priv->stats.pport.jabbers +
2648                     priv->stats.pport.out_of_range_len +
2649                     priv->stats.pport.oversize_pkts +
2650                     priv->stats.pport.symbol_err +
2651                     priv->stats.pport.too_long_errors +
2652                     priv->stats.pport.undersize_pkts +
2653                     priv->stats.pport.unsupported_op_rx;
2654                 break;
2655         case IFCOUNTER_IQDROPS:
2656                 retval = priv->stats.vport.rx_out_of_buffer +
2657                     priv->stats.pport.drop_events;
2658                 break;
2659         case IFCOUNTER_OPACKETS:
2660                 retval = priv->stats.vport.tx_packets;
2661                 break;
2662         case IFCOUNTER_OERRORS:
2663                 retval = priv->stats.vport.tx_error_packets;
2664                 break;
2665         case IFCOUNTER_IBYTES:
2666                 retval = priv->stats.vport.rx_bytes;
2667                 break;
2668         case IFCOUNTER_OBYTES:
2669                 retval = priv->stats.vport.tx_bytes;
2670                 break;
2671         case IFCOUNTER_IMCASTS:
2672                 retval = priv->stats.vport.rx_multicast_packets;
2673                 break;
2674         case IFCOUNTER_OMCASTS:
2675                 retval = priv->stats.vport.tx_multicast_packets;
2676                 break;
2677         case IFCOUNTER_OQDROPS:
2678                 retval = priv->stats.vport.tx_queue_dropped;
2679                 break;
2680         case IFCOUNTER_COLLISIONS:
2681                 retval = priv->stats.pport.collisions;
2682                 break;
2683         default:
2684                 retval = if_get_counter_default(ifp, cnt);
2685                 break;
2686         }
2687         /* PRIV_UNLOCK(priv); XXX not allowed */
2688         return (retval);
2689 }
2690 #endif
2691
2692 static void
2693 mlx5e_set_rx_mode(struct ifnet *ifp)
2694 {
2695         struct mlx5e_priv *priv = ifp->if_softc;
2696
2697         queue_work(priv->wq, &priv->set_rx_mode_work);
2698 }
2699
2700 static int
2701 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2702 {
2703         struct mlx5e_priv *priv;
2704         struct ifreq *ifr;
2705         struct ifi2creq i2c;
2706         int error = 0;
2707         int mask = 0;
2708         int size_read = 0;
2709         int module_status;
2710         int module_num;
2711         int max_mtu;
2712         uint8_t read_addr;
2713
2714         priv = ifp->if_softc;
2715
2716         /* check if detaching */
2717         if (priv == NULL || priv->gone != 0)
2718                 return (ENXIO);
2719
2720         switch (command) {
2721         case SIOCSIFMTU:
2722                 ifr = (struct ifreq *)data;
2723
2724                 PRIV_LOCK(priv);
2725                 mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2726
2727                 if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2728                     ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2729                         int was_opened;
2730
2731                         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2732                         if (was_opened)
2733                                 mlx5e_close_locked(ifp);
2734
2735                         /* set new MTU */
2736                         mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2737
2738                         if (was_opened)
2739                                 mlx5e_open_locked(ifp);
2740                 } else {
2741                         error = EINVAL;
2742                         if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2743                             MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2744                 }
2745                 PRIV_UNLOCK(priv);
2746                 break;
2747         case SIOCSIFFLAGS:
2748                 if ((ifp->if_flags & IFF_UP) &&
2749                     (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2750                         mlx5e_set_rx_mode(ifp);
2751                         break;
2752                 }
2753                 PRIV_LOCK(priv);
2754                 if (ifp->if_flags & IFF_UP) {
2755                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2756                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2757                                         mlx5e_open_locked(ifp);
2758                                 ifp->if_drv_flags |= IFF_DRV_RUNNING;
2759                                 mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2760                         }
2761                 } else {
2762                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2763                                 mlx5_set_port_status(priv->mdev,
2764                                     MLX5_PORT_DOWN);
2765                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2766                                         mlx5e_close_locked(ifp);
2767                                 mlx5e_update_carrier(priv);
2768                                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2769                         }
2770                 }
2771                 PRIV_UNLOCK(priv);
2772                 break;
2773         case SIOCADDMULTI:
2774         case SIOCDELMULTI:
2775                 mlx5e_set_rx_mode(ifp);
2776                 break;
2777         case SIOCSIFMEDIA:
2778         case SIOCGIFMEDIA:
2779         case SIOCGIFXMEDIA:
2780                 ifr = (struct ifreq *)data;
2781                 error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2782                 break;
2783         case SIOCSIFCAP:
2784                 ifr = (struct ifreq *)data;
2785                 PRIV_LOCK(priv);
2786                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2787
2788                 if (mask & IFCAP_TXCSUM) {
2789                         ifp->if_capenable ^= IFCAP_TXCSUM;
2790                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2791
2792                         if (IFCAP_TSO4 & ifp->if_capenable &&
2793                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2794                                 ifp->if_capenable &= ~IFCAP_TSO4;
2795                                 ifp->if_hwassist &= ~CSUM_IP_TSO;
2796                                 if_printf(ifp,
2797                                     "tso4 disabled due to -txcsum.\n");
2798                         }
2799                 }
2800                 if (mask & IFCAP_TXCSUM_IPV6) {
2801                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2802                         ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2803
2804                         if (IFCAP_TSO6 & ifp->if_capenable &&
2805                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2806                                 ifp->if_capenable &= ~IFCAP_TSO6;
2807                                 ifp->if_hwassist &= ~CSUM_IP6_TSO;
2808                                 if_printf(ifp,
2809                                     "tso6 disabled due to -txcsum6.\n");
2810                         }
2811                 }
2812                 if (mask & IFCAP_RXCSUM)
2813                         ifp->if_capenable ^= IFCAP_RXCSUM;
2814                 if (mask & IFCAP_RXCSUM_IPV6)
2815                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2816                 if (mask & IFCAP_TSO4) {
2817                         if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2818                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2819                                 if_printf(ifp, "enable txcsum first.\n");
2820                                 error = EAGAIN;
2821                                 goto out;
2822                         }
2823                         ifp->if_capenable ^= IFCAP_TSO4;
2824                         ifp->if_hwassist ^= CSUM_IP_TSO;
2825                 }
2826                 if (mask & IFCAP_TSO6) {
2827                         if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2828                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2829                                 if_printf(ifp, "enable txcsum6 first.\n");
2830                                 error = EAGAIN;
2831                                 goto out;
2832                         }
2833                         ifp->if_capenable ^= IFCAP_TSO6;
2834                         ifp->if_hwassist ^= CSUM_IP6_TSO;
2835                 }
2836                 if (mask & IFCAP_VLAN_HWFILTER) {
2837                         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2838                                 mlx5e_disable_vlan_filter(priv);
2839                         else
2840                                 mlx5e_enable_vlan_filter(priv);
2841
2842                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2843                 }
2844                 if (mask & IFCAP_VLAN_HWTAGGING)
2845                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2846                 if (mask & IFCAP_WOL_MAGIC)
2847                         ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2848
2849                 VLAN_CAPABILITIES(ifp);
2850                 /* turn off LRO means also turn of HW LRO - if it's on */
2851                 if (mask & IFCAP_LRO) {
2852                         int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2853                         bool need_restart = false;
2854
2855                         ifp->if_capenable ^= IFCAP_LRO;
2856                         if (!(ifp->if_capenable & IFCAP_LRO)) {
2857                                 if (priv->params.hw_lro_en) {
2858                                         priv->params.hw_lro_en = false;
2859                                         need_restart = true;
2860                                         /* Not sure this is the correct way */
2861                                         priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2862                                 }
2863                         }
2864                         if (was_opened && need_restart) {
2865                                 mlx5e_close_locked(ifp);
2866                                 mlx5e_open_locked(ifp);
2867                         }
2868                 }
2869                 if (mask & IFCAP_HWRXTSTMP) {
2870                         ifp->if_capenable ^= IFCAP_HWRXTSTMP;
2871                         if (ifp->if_capenable & IFCAP_HWRXTSTMP) {
2872                                 if (priv->clbr_done == 0)
2873                                         mlx5e_reset_calibration_callout(priv);
2874                         } else {
2875                                 callout_drain(&priv->tstmp_clbr);
2876                                 priv->clbr_done = 0;
2877                         }
2878                 }
2879 out:
2880                 PRIV_UNLOCK(priv);
2881                 break;
2882
2883         case SIOCGI2C:
2884                 ifr = (struct ifreq *)data;
2885
2886                 /*
2887                  * Copy from the user-space address ifr_data to the
2888                  * kernel-space address i2c
2889                  */
2890                 error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
2891                 if (error)
2892                         break;
2893
2894                 if (i2c.len > sizeof(i2c.data)) {
2895                         error = EINVAL;
2896                         break;
2897                 }
2898
2899                 PRIV_LOCK(priv);
2900                 /* Get module_num which is required for the query_eeprom */
2901                 error = mlx5_query_module_num(priv->mdev, &module_num);
2902                 if (error) {
2903                         if_printf(ifp, "Query module num failed, eeprom "
2904                             "reading is not supported\n");
2905                         error = EINVAL;
2906                         goto err_i2c;
2907                 }
2908                 /* Check if module is present before doing an access */
2909                 module_status = mlx5_query_module_status(priv->mdev, module_num);
2910                 if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
2911                     module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
2912                         error = EINVAL;
2913                         goto err_i2c;
2914                 }
2915                 /*
2916                  * Currently 0XA0 and 0xA2 are the only addresses permitted.
2917                  * The internal conversion is as follows:
2918                  */
2919                 if (i2c.dev_addr == 0xA0)
2920                         read_addr = MLX5E_I2C_ADDR_LOW;
2921                 else if (i2c.dev_addr == 0xA2)
2922                         read_addr = MLX5E_I2C_ADDR_HIGH;
2923                 else {
2924                         if_printf(ifp, "Query eeprom failed, "
2925                             "Invalid Address: %X\n", i2c.dev_addr);
2926                         error = EINVAL;
2927                         goto err_i2c;
2928                 }
2929                 error = mlx5_query_eeprom(priv->mdev,
2930                     read_addr, MLX5E_EEPROM_LOW_PAGE,
2931                     (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2932                     (uint32_t *)i2c.data, &size_read);
2933                 if (error) {
2934                         if_printf(ifp, "Query eeprom failed, eeprom "
2935                             "reading is not supported\n");
2936                         error = EINVAL;
2937                         goto err_i2c;
2938                 }
2939
2940                 if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2941                         error = mlx5_query_eeprom(priv->mdev,
2942                             read_addr, MLX5E_EEPROM_LOW_PAGE,
2943                             (uint32_t)(i2c.offset + size_read),
2944                             (uint32_t)(i2c.len - size_read), module_num,
2945                             (uint32_t *)(i2c.data + size_read), &size_read);
2946                 }
2947                 if (error) {
2948                         if_printf(ifp, "Query eeprom failed, eeprom "
2949                             "reading is not supported\n");
2950                         error = EINVAL;
2951                         goto err_i2c;
2952                 }
2953
2954                 error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
2955 err_i2c:
2956                 PRIV_UNLOCK(priv);
2957                 break;
2958
2959         default:
2960                 error = ether_ioctl(ifp, command, data);
2961                 break;
2962         }
2963         return (error);
2964 }
2965
2966 static int
2967 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2968 {
2969         /*
2970          * TODO: uncoment once FW really sets all these bits if
2971          * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2972          * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2973          * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2974          * -ENOTSUPP;
2975          */
2976
2977         /* TODO: add more must-to-have features */
2978
2979         if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
2980                 return (-ENODEV);
2981
2982         return (0);
2983 }
2984
2985 static u16
2986 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
2987 {
2988         int bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
2989
2990         return bf_buf_size -
2991                sizeof(struct mlx5e_tx_wqe) +
2992                2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
2993 }
2994
2995 static void
2996 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
2997     struct mlx5e_priv *priv,
2998     int num_comp_vectors)
2999 {
3000         /*
3001          * TODO: Consider link speed for setting "log_sq_size",
3002          * "log_rq_size" and "cq_moderation_xxx":
3003          */
3004         priv->params.log_sq_size =
3005             MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
3006         priv->params.log_rq_size =
3007             MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
3008         priv->params.rx_cq_moderation_usec =
3009             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
3010             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
3011             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
3012         priv->params.rx_cq_moderation_mode =
3013             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
3014         priv->params.rx_cq_moderation_pkts =
3015             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
3016         priv->params.tx_cq_moderation_usec =
3017             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
3018         priv->params.tx_cq_moderation_pkts =
3019             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
3020         priv->params.min_rx_wqes =
3021             MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
3022         priv->params.rx_hash_log_tbl_sz =
3023             (order_base_2(num_comp_vectors) >
3024             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
3025             order_base_2(num_comp_vectors) :
3026             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
3027         priv->params.num_tc = 1;
3028         priv->params.default_vlan_prio = 0;
3029         priv->counter_set_id = -1;
3030         priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
3031         mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
3032
3033         /*
3034          * hw lro is currently defaulted to off. when it won't anymore we
3035          * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
3036          */
3037         priv->params.hw_lro_en = false;
3038         priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
3039
3040         priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
3041
3042         priv->mdev = mdev;
3043         priv->params.num_channels = num_comp_vectors;
3044         priv->params.channels_rsss = 1;
3045         priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
3046         priv->queue_mapping_channel_mask =
3047             roundup_pow_of_two(num_comp_vectors) - 1;
3048         priv->num_tc = priv->params.num_tc;
3049         priv->default_vlan_prio = priv->params.default_vlan_prio;
3050
3051         INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
3052         INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
3053         INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
3054 }
3055
3056 static int
3057 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
3058                   struct mlx5_core_mr *mkey)
3059 {
3060         struct ifnet *ifp = priv->ifp;
3061         struct mlx5_core_dev *mdev = priv->mdev;
3062         int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
3063         void *mkc;
3064         u32 *in;
3065         int err;
3066
3067         in = mlx5_vzalloc(inlen);
3068         if (in == NULL) {
3069                 if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
3070                 return (-ENOMEM);
3071         }
3072
3073         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
3074         MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
3075         MLX5_SET(mkc, mkc, lw, 1);
3076         MLX5_SET(mkc, mkc, lr, 1);
3077
3078         MLX5_SET(mkc, mkc, pd, pdn);
3079         MLX5_SET(mkc, mkc, length64, 1);
3080         MLX5_SET(mkc, mkc, qpn, 0xffffff);
3081
3082         err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
3083         if (err)
3084                 if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
3085                     __func__, err);
3086
3087         kvfree(in);
3088         return (err);
3089 }
3090
3091 static const char *mlx5e_vport_stats_desc[] = {
3092         MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
3093 };
3094
3095 static const char *mlx5e_pport_stats_desc[] = {
3096         MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
3097 };
3098
3099 static void
3100 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
3101 {
3102         mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
3103         sx_init(&priv->state_lock, "mlx5state");
3104         callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
3105         MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
3106 }
3107
3108 static void
3109 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
3110 {
3111         mtx_destroy(&priv->async_events_mtx);
3112         sx_destroy(&priv->state_lock);
3113 }
3114
3115 static int
3116 sysctl_firmware(SYSCTL_HANDLER_ARGS)
3117 {
3118         /*
3119          * %d.%d%.d the string format.
3120          * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
3121          * We need at most 5 chars to store that.
3122          * It also has: two "." and NULL at the end, which means we need 18
3123          * (5*3 + 3) chars at most.
3124          */
3125         char fw[18];
3126         struct mlx5e_priv *priv = arg1;
3127         int error;
3128
3129         snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
3130             fw_rev_sub(priv->mdev));
3131         error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
3132         return (error);
3133 }
3134
3135 static void
3136 mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
3137 {
3138         int i;
3139
3140         for (i = 0; i < ch->num_tc; i++)
3141                 mlx5e_drain_sq(&ch->sq[i]);
3142 }
3143
3144 static void
3145 mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
3146 {
3147
3148         sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
3149         sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
3150         mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
3151         sq->doorbell.d64 = 0;
3152 }
3153
3154 void
3155 mlx5e_resume_sq(struct mlx5e_sq *sq)
3156 {
3157         int err;
3158
3159         /* check if already enabled */
3160         if (sq->stopped == 0)
3161                 return;
3162
3163         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
3164             MLX5_SQC_STATE_RST);
3165         if (err != 0) {
3166                 if_printf(sq->ifp,
3167                     "mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
3168         }
3169
3170         sq->cc = 0;
3171         sq->pc = 0;
3172
3173         /* reset doorbell prior to moving from RST to RDY */
3174         mlx5e_reset_sq_doorbell_record(sq);
3175
3176         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
3177             MLX5_SQC_STATE_RDY);
3178         if (err != 0) {
3179                 if_printf(sq->ifp,
3180                     "mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
3181         }
3182
3183         mtx_lock(&sq->lock);
3184         sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
3185         sq->stopped = 0;
3186         mtx_unlock(&sq->lock);
3187
3188 }
3189
3190 static void
3191 mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
3192 {
3193         int i;
3194
3195         for (i = 0; i < ch->num_tc; i++)
3196                 mlx5e_resume_sq(&ch->sq[i]);
3197 }
3198
3199 static void
3200 mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
3201 {
3202         struct mlx5e_rq *rq = &ch->rq;
3203         int err;
3204
3205         mtx_lock(&rq->mtx);
3206         rq->enabled = 0;
3207         callout_stop(&rq->watchdog);
3208         mtx_unlock(&rq->mtx);
3209
3210         callout_drain(&rq->watchdog);
3211
3212         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
3213         if (err != 0) {
3214                 if_printf(rq->ifp,
3215                     "mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
3216         }
3217
3218         while (!mlx5_wq_ll_is_empty(&rq->wq)) {
3219                 msleep(1);
3220                 rq->cq.mcq.comp(&rq->cq.mcq);
3221         }
3222
3223         /*
3224          * Transitioning into RST state will allow the FW to track less ERR state queues,
3225          * thus reducing the recv queue flushing time
3226          */
3227         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
3228         if (err != 0) {
3229                 if_printf(rq->ifp,
3230                     "mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
3231         }
3232 }
3233
3234 static void
3235 mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
3236 {
3237         struct mlx5e_rq *rq = &ch->rq;
3238         int err;
3239
3240         rq->wq.wqe_ctr = 0;
3241         mlx5_wq_ll_update_db_record(&rq->wq);
3242         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
3243         if (err != 0) {
3244                 if_printf(rq->ifp,
3245                     "mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
3246         }
3247
3248         rq->enabled = 1;
3249
3250         rq->cq.mcq.comp(&rq->cq.mcq);
3251 }
3252
3253 void
3254 mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
3255 {
3256         int i;
3257
3258         if (priv->channel == NULL)
3259                 return;
3260
3261         for (i = 0; i < priv->params.num_channels; i++) {
3262
3263                 if (!priv->channel[i])
3264                         continue;
3265
3266                 if (value)
3267                         mlx5e_disable_tx_dma(priv->channel[i]);
3268                 else
3269                         mlx5e_enable_tx_dma(priv->channel[i]);
3270         }
3271 }
3272
3273 void
3274 mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
3275 {
3276         int i;
3277
3278         if (priv->channel == NULL)
3279                 return;
3280
3281         for (i = 0; i < priv->params.num_channels; i++) {
3282
3283                 if (!priv->channel[i])
3284                         continue;
3285
3286                 if (value)
3287                         mlx5e_disable_rx_dma(priv->channel[i]);
3288                 else
3289                         mlx5e_enable_rx_dma(priv->channel[i]);
3290         }
3291 }
3292
3293 u8
3294 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev)
3295 {
3296         u8 min_inline_mode;
3297
3298         min_inline_mode = MLX5_INLINE_MODE_L2;
3299         mlx5_query_min_inline(mdev, &min_inline_mode);
3300         if (min_inline_mode == MLX5_INLINE_MODE_NONE &&
3301             !MLX5_CAP_ETH(mdev, wqe_vlan_insert))
3302                 min_inline_mode = MLX5_INLINE_MODE_L2;
3303
3304         return (min_inline_mode);
3305 }
3306
3307 static void
3308 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
3309 {
3310         SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3311             OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
3312             sysctl_firmware, "A", "HCA firmware version");
3313
3314         SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3315             OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
3316             "Board ID");
3317 }
3318
3319 static int
3320 mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3321 {
3322         struct mlx5e_priv *priv = arg1;
3323         uint32_t tx_pfc;
3324         uint32_t value;
3325         int error;
3326
3327         PRIV_LOCK(priv);
3328
3329         tx_pfc = priv->params.tx_priority_flow_control;
3330
3331         /* get current value */
3332         value = (tx_pfc >> arg2) & 1;
3333
3334         error = sysctl_handle_32(oidp, &value, 0, req);
3335
3336         /* range check value */
3337         if (value != 0)
3338                 priv->params.tx_priority_flow_control |= (1 << arg2);
3339         else
3340                 priv->params.tx_priority_flow_control &= ~(1 << arg2);
3341
3342         /* check if update is required */
3343         if (error == 0 && priv->gone == 0 &&
3344             tx_pfc != priv->params.tx_priority_flow_control) {
3345                 error = -mlx5e_set_port_pfc(priv);
3346                 /* restore previous value */
3347                 if (error != 0)
3348                         priv->params.tx_priority_flow_control= tx_pfc;
3349         }
3350         PRIV_UNLOCK(priv);
3351
3352         return (error);
3353 }
3354
3355 static int
3356 mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3357 {
3358         struct mlx5e_priv *priv = arg1;
3359         uint32_t rx_pfc;
3360         uint32_t value;
3361         int error;
3362
3363         PRIV_LOCK(priv);
3364
3365         rx_pfc = priv->params.rx_priority_flow_control;
3366
3367         /* get current value */
3368         value = (rx_pfc >> arg2) & 1;
3369
3370         error = sysctl_handle_32(oidp, &value, 0, req);
3371
3372         /* range check value */
3373         if (value != 0)
3374                 priv->params.rx_priority_flow_control |= (1 << arg2);
3375         else
3376                 priv->params.rx_priority_flow_control &= ~(1 << arg2);
3377
3378         /* check if update is required */
3379         if (error == 0 && priv->gone == 0 &&
3380             rx_pfc != priv->params.rx_priority_flow_control) {
3381                 error = -mlx5e_set_port_pfc(priv);
3382                 /* restore previous value */
3383                 if (error != 0)
3384                         priv->params.rx_priority_flow_control= rx_pfc;
3385         }
3386         PRIV_UNLOCK(priv);
3387
3388         return (error);
3389 }
3390
3391 static void
3392 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
3393 {
3394         unsigned int x;
3395         char path[96];
3396         int error;
3397
3398         /* enable pauseframes by default */
3399         priv->params.tx_pauseframe_control = 1;
3400         priv->params.rx_pauseframe_control = 1;
3401
3402         /* disable ports flow control, PFC, by default */
3403         priv->params.tx_priority_flow_control = 0;
3404         priv->params.rx_priority_flow_control = 0;
3405
3406 #if (__FreeBSD_version < 1100000)
3407         /* compute path for sysctl */
3408         snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
3409             device_get_unit(priv->mdev->pdev->dev.bsddev));
3410
3411         /* try to fetch tunable, if any */
3412         TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
3413
3414         /* compute path for sysctl */
3415         snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
3416             device_get_unit(priv->mdev->pdev->dev.bsddev));
3417
3418         /* try to fetch tunable, if any */
3419         TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
3420
3421         for (x = 0; x != 8; x++) {
3422
3423                 /* compute path for sysctl */
3424                 snprintf(path, sizeof(path), "dev.mce.%d.tx_priority_flow_control_%u",
3425                     device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3426
3427                 /* try to fetch tunable, if any */
3428                 if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3429                         priv->params.tx_priority_flow_control |= 1 << x;
3430
3431                 /* compute path for sysctl */
3432                 snprintf(path, sizeof(path), "dev.mce.%d.rx_priority_flow_control_%u",
3433                     device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3434
3435                 /* try to fetch tunable, if any */
3436                 if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3437                         priv->params.rx_priority_flow_control |= 1 << x;
3438         }
3439 #endif
3440
3441         /* register pauseframe SYSCTLs */
3442         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3443             OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
3444             &priv->params.tx_pauseframe_control, 0,
3445             "Set to enable TX pause frames. Clear to disable.");
3446
3447         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3448             OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
3449             &priv->params.rx_pauseframe_control, 0,
3450             "Set to enable RX pause frames. Clear to disable.");
3451
3452         /* register priority_flow control, PFC, SYSCTLs */
3453         for (x = 0; x != 8; x++) {
3454                 snprintf(path, sizeof(path), "tx_priority_flow_control_%u", x);
3455
3456                 SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3457                     OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3458                     CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_tx_priority_flow_control, "IU",
3459                     "Set to enable TX ports flow control frames for given priority. Clear to disable.");
3460
3461                 snprintf(path, sizeof(path), "rx_priority_flow_control_%u", x);
3462
3463                 SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3464                     OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3465                     CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_rx_priority_flow_control, "IU",
3466                     "Set to enable RX ports flow control frames for given priority. Clear to disable.");
3467         }
3468
3469         PRIV_LOCK(priv);
3470
3471         /* range check */
3472         priv->params.tx_pauseframe_control =
3473             priv->params.tx_pauseframe_control ? 1 : 0;
3474         priv->params.rx_pauseframe_control =
3475             priv->params.rx_pauseframe_control ? 1 : 0;
3476
3477         /* update firmware */
3478         error = mlx5e_set_port_pause_and_pfc(priv);
3479         if (error == -EINVAL) {
3480                 if_printf(priv->ifp,
3481                     "Global pauseframes must be disabled before enabling PFC.\n");
3482                 priv->params.rx_priority_flow_control = 0;
3483                 priv->params.tx_priority_flow_control = 0;
3484
3485                 /* update firmware */
3486                 (void) mlx5e_set_port_pause_and_pfc(priv);
3487         }
3488         PRIV_UNLOCK(priv);
3489 }
3490
3491 static void *
3492 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
3493 {
3494         struct ifnet *ifp;
3495         struct mlx5e_priv *priv;
3496         u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
3497         struct sysctl_oid_list *child;
3498         int ncv = mdev->priv.eq_table.num_comp_vectors;
3499         char unit[16];
3500         int err;
3501         int i;
3502         u32 eth_proto_cap;
3503
3504         if (mlx5e_check_required_hca_cap(mdev)) {
3505                 mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
3506                 return (NULL);
3507         }
3508         priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
3509         mlx5e_priv_mtx_init(priv);
3510
3511         ifp = priv->ifp = if_alloc(IFT_ETHER);
3512         if (ifp == NULL) {
3513                 mlx5_core_err(mdev, "if_alloc() failed\n");
3514                 goto err_free_priv;
3515         }
3516         ifp->if_softc = priv;
3517         if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
3518         ifp->if_mtu = ETHERMTU;
3519         ifp->if_init = mlx5e_open;
3520         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3521         ifp->if_ioctl = mlx5e_ioctl;
3522         ifp->if_transmit = mlx5e_xmit;
3523         ifp->if_qflush = if_qflush;
3524 #if (__FreeBSD_version >= 1100000)
3525         ifp->if_get_counter = mlx5e_get_counter;
3526 #endif
3527         ifp->if_snd.ifq_maxlen = ifqmaxlen;
3528         /*
3529          * Set driver features
3530          */
3531         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
3532         ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
3533         ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
3534         ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
3535         ifp->if_capabilities |= IFCAP_LRO;
3536         ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
3537         ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
3538 #ifdef RATELIMIT
3539         ifp->if_capabilities |= IFCAP_TXRTLMT;
3540         ifp->if_snd_tag_alloc = mlx5e_rl_snd_tag_alloc;
3541         ifp->if_snd_tag_free = mlx5e_rl_snd_tag_free;
3542         ifp->if_snd_tag_modify = mlx5e_rl_snd_tag_modify;
3543         ifp->if_snd_tag_query = mlx5e_rl_snd_tag_query;
3544 #endif
3545
3546         /* set TSO limits so that we don't have to drop TX packets */
3547         ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3548         ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
3549         ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
3550
3551         ifp->if_capenable = ifp->if_capabilities;
3552         ifp->if_hwassist = 0;
3553         if (ifp->if_capenable & IFCAP_TSO)
3554                 ifp->if_hwassist |= CSUM_TSO;
3555         if (ifp->if_capenable & IFCAP_TXCSUM)
3556                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
3557         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3558                 ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
3559
3560         sysctl_ctx_init(&priv->sysctl_ctx_channel_debug);
3561
3562         /* ifnet sysctl tree */
3563         sysctl_ctx_init(&priv->sysctl_ctx);
3564         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
3565             OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
3566         if (priv->sysctl_ifnet == NULL) {
3567                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3568                 goto err_free_sysctl;
3569         }
3570         snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
3571         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3572             OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
3573         if (priv->sysctl_ifnet == NULL) {
3574                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3575                 goto err_free_sysctl;
3576         }
3577
3578         /* HW sysctl tree */
3579         child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
3580         priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
3581             OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
3582         if (priv->sysctl_hw == NULL) {
3583                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3584                 goto err_free_sysctl;
3585         }
3586         mlx5e_build_ifp_priv(mdev, priv, ncv);
3587
3588         snprintf(unit, sizeof(unit), "mce%u_wq",
3589             device_get_unit(mdev->pdev->dev.bsddev));
3590         priv->wq = alloc_workqueue(unit, 0, 1);
3591         if (priv->wq == NULL) {
3592                 if_printf(ifp, "%s: alloc_workqueue failed\n", __func__);
3593                 goto err_free_sysctl;
3594         }
3595
3596         err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
3597         if (err) {
3598                 if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
3599                     __func__, err);
3600                 goto err_free_wq;
3601         }
3602         err = mlx5_core_alloc_pd(mdev, &priv->pdn);
3603         if (err) {
3604                 if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
3605                     __func__, err);
3606                 goto err_unmap_free_uar;
3607         }
3608         err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
3609         if (err) {
3610                 if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
3611                     __func__, err);
3612                 goto err_dealloc_pd;
3613         }
3614         err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
3615         if (err) {
3616                 if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
3617                     __func__, err);
3618                 goto err_dealloc_transport_domain;
3619         }
3620         mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
3621
3622         /* check if we should generate a random MAC address */
3623         if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3624             is_zero_ether_addr(dev_addr)) {
3625                 random_ether_addr(dev_addr);
3626                 if_printf(ifp, "Assigned random MAC address\n");
3627         }
3628 #ifdef RATELIMIT
3629         err = mlx5e_rl_init(priv);
3630         if (err) {
3631                 if_printf(ifp, "%s: mlx5e_rl_init failed, %d\n",
3632                     __func__, err);
3633                 goto err_create_mkey;
3634         }
3635 #endif
3636
3637         /* set default MTU */
3638         mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3639
3640         /* Set default media status */
3641         priv->media_status_last = IFM_AVALID;
3642         priv->media_active_last = IFM_ETHER | IFM_AUTO |
3643             IFM_ETH_RXPAUSE | IFM_FDX;
3644
3645         /* setup default pauseframes configuration */
3646         mlx5e_setup_pauseframes(priv);
3647
3648         err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3649         if (err) {
3650                 eth_proto_cap = 0;
3651                 if_printf(ifp, "%s: Query port media capability failed, %d\n",
3652                     __func__, err);
3653         }
3654
3655         /* Setup supported medias */
3656         ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3657             mlx5e_media_change, mlx5e_media_status);
3658
3659         for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3660                 if (mlx5e_mode_table[i].baudrate == 0)
3661                         continue;
3662                 if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3663                         ifmedia_add(&priv->media,
3664                             mlx5e_mode_table[i].subtype |
3665                             IFM_ETHER, 0, NULL);
3666                         ifmedia_add(&priv->media,
3667                             mlx5e_mode_table[i].subtype |
3668                             IFM_ETHER | IFM_FDX |
3669                             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3670                 }
3671         }
3672
3673         ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3674         ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3675             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3676
3677         /* Set autoselect by default */
3678         ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3679             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3680         ether_ifattach(ifp, dev_addr);
3681
3682         /* Register for VLAN events */
3683         priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3684             mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3685         priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3686             mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3687
3688         /* Link is down by default */
3689         if_link_state_change(ifp, LINK_STATE_DOWN);
3690
3691         mlx5e_enable_async_events(priv);
3692
3693         mlx5e_add_hw_stats(priv);
3694
3695         mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3696             "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3697             priv->stats.vport.arg);
3698
3699         mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3700             "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3701             priv->stats.pport.arg);
3702
3703         mlx5e_create_ethtool(priv);
3704
3705         mtx_lock(&priv->async_events_mtx);
3706         mlx5e_update_stats(priv);
3707         mtx_unlock(&priv->async_events_mtx);
3708
3709         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3710             OID_AUTO, "rx_clbr_done", CTLFLAG_RD,
3711             &priv->clbr_done, 0,
3712             "RX timestamps calibration state");
3713         callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT);
3714         mlx5e_reset_calibration_callout(priv);
3715
3716         return (priv);
3717
3718 #ifdef RATELIMIT
3719 err_create_mkey:
3720         mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3721 #endif
3722 err_dealloc_transport_domain:
3723         mlx5_dealloc_transport_domain(mdev, priv->tdn);
3724
3725 err_dealloc_pd:
3726         mlx5_core_dealloc_pd(mdev, priv->pdn);
3727
3728 err_unmap_free_uar:
3729         mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3730
3731 err_free_wq:
3732         destroy_workqueue(priv->wq);
3733
3734 err_free_sysctl:
3735         sysctl_ctx_free(&priv->sysctl_ctx);
3736         sysctl_ctx_free(&priv->sysctl_ctx_channel_debug);
3737
3738         if_free(ifp);
3739
3740 err_free_priv:
3741         mlx5e_priv_mtx_destroy(priv);
3742         free(priv, M_MLX5EN);
3743         return (NULL);
3744 }
3745
3746 static void
3747 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3748 {
3749         struct mlx5e_priv *priv = vpriv;
3750         struct ifnet *ifp = priv->ifp;
3751
3752         /* don't allow more IOCTLs */
3753         priv->gone = 1;
3754
3755         /* XXX wait a bit to allow IOCTL handlers to complete */
3756         pause("W", hz);
3757
3758 #ifdef RATELIMIT
3759         /*
3760          * The kernel can have reference(s) via the m_snd_tag's into
3761          * the ratelimit channels, and these must go away before
3762          * detaching:
3763          */
3764         while (READ_ONCE(priv->rl.stats.tx_active_connections) != 0) {
3765                 if_printf(priv->ifp, "Waiting for all ratelimit connections "
3766                     "to terminate\n");
3767                 pause("W", hz);
3768         }
3769 #endif
3770         /* stop watchdog timer */
3771         callout_drain(&priv->watchdog);
3772
3773         callout_drain(&priv->tstmp_clbr);
3774
3775         if (priv->vlan_attach != NULL)
3776                 EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3777         if (priv->vlan_detach != NULL)
3778                 EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3779
3780         /* make sure device gets closed */
3781         PRIV_LOCK(priv);
3782         mlx5e_close_locked(ifp);
3783         PRIV_UNLOCK(priv);
3784
3785         /* unregister device */
3786         ifmedia_removeall(&priv->media);
3787         ether_ifdetach(ifp);
3788         if_free(ifp);
3789
3790 #ifdef RATELIMIT
3791         mlx5e_rl_cleanup(priv);
3792 #endif
3793         /* destroy all remaining sysctl nodes */
3794         if (priv->sysctl_debug) {
3795                 sysctl_ctx_free(&priv->sysctl_ctx_channel_debug);
3796                 sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3797         }
3798         sysctl_ctx_free(&priv->stats.vport.ctx);
3799         sysctl_ctx_free(&priv->stats.pport.ctx);
3800         sysctl_ctx_free(&priv->sysctl_ctx);
3801
3802         mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3803         mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3804         mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3805         mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3806         mlx5e_disable_async_events(priv);
3807         destroy_workqueue(priv->wq);
3808         mlx5e_priv_mtx_destroy(priv);
3809         free(priv, M_MLX5EN);
3810 }
3811
3812 static void *
3813 mlx5e_get_ifp(void *vpriv)
3814 {
3815         struct mlx5e_priv *priv = vpriv;
3816
3817         return (priv->ifp);
3818 }
3819
3820 static struct mlx5_interface mlx5e_interface = {
3821         .add = mlx5e_create_ifp,
3822         .remove = mlx5e_destroy_ifp,
3823         .event = mlx5e_async_event,
3824         .protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3825         .get_dev = mlx5e_get_ifp,
3826 };
3827
3828 void
3829 mlx5e_init(void)
3830 {
3831         mlx5_register_interface(&mlx5e_interface);
3832 }
3833
3834 void
3835 mlx5e_cleanup(void)
3836 {
3837         mlx5_unregister_interface(&mlx5e_interface);
3838 }
3839
3840 static void
3841 mlx5e_show_version(void __unused *arg)
3842 {
3843
3844         printf("%s", mlx5e_version);
3845 }
3846 SYSINIT(mlx5e_show_version, SI_SUB_DRIVERS, SI_ORDER_ANY, mlx5e_show_version, NULL);
3847
3848 module_init_order(mlx5e_init, SI_ORDER_THIRD);
3849 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3850
3851 #if (__FreeBSD_version >= 1100000)
3852 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3853 #endif
3854 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3855 MODULE_VERSION(mlx5en, 1);