]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx5/mlx5_en/mlx5_en_main.c
MFV: r333378
[FreeBSD/FreeBSD.git] / sys / dev / mlx5 / mlx5_en / mlx5_en_main.c
1 /*-
2  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27
28 #include "en.h"
29
30 #include <sys/sockio.h>
31 #include <machine/atomic.h>
32
33 #ifndef ETH_DRIVER_VERSION
34 #define ETH_DRIVER_VERSION      "3.4.1"
35 #endif
36 char mlx5e_version[] = "Mellanox Ethernet driver"
37     " (" ETH_DRIVER_VERSION ")";
38
39 struct mlx5e_channel_param {
40         struct mlx5e_rq_param rq;
41         struct mlx5e_sq_param sq;
42         struct mlx5e_cq_param rx_cq;
43         struct mlx5e_cq_param tx_cq;
44 };
45
46 static const struct {
47         u32     subtype;
48         u64     baudrate;
49 }       mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
50
51         [MLX5E_1000BASE_CX_SGMII] = {
52                 .subtype = IFM_1000_CX_SGMII,
53                 .baudrate = IF_Mbps(1000ULL),
54         },
55         [MLX5E_1000BASE_KX] = {
56                 .subtype = IFM_1000_KX,
57                 .baudrate = IF_Mbps(1000ULL),
58         },
59         [MLX5E_10GBASE_CX4] = {
60                 .subtype = IFM_10G_CX4,
61                 .baudrate = IF_Gbps(10ULL),
62         },
63         [MLX5E_10GBASE_KX4] = {
64                 .subtype = IFM_10G_KX4,
65                 .baudrate = IF_Gbps(10ULL),
66         },
67         [MLX5E_10GBASE_KR] = {
68                 .subtype = IFM_10G_KR,
69                 .baudrate = IF_Gbps(10ULL),
70         },
71         [MLX5E_20GBASE_KR2] = {
72                 .subtype = IFM_20G_KR2,
73                 .baudrate = IF_Gbps(20ULL),
74         },
75         [MLX5E_40GBASE_CR4] = {
76                 .subtype = IFM_40G_CR4,
77                 .baudrate = IF_Gbps(40ULL),
78         },
79         [MLX5E_40GBASE_KR4] = {
80                 .subtype = IFM_40G_KR4,
81                 .baudrate = IF_Gbps(40ULL),
82         },
83         [MLX5E_56GBASE_R4] = {
84                 .subtype = IFM_56G_R4,
85                 .baudrate = IF_Gbps(56ULL),
86         },
87         [MLX5E_10GBASE_CR] = {
88                 .subtype = IFM_10G_CR1,
89                 .baudrate = IF_Gbps(10ULL),
90         },
91         [MLX5E_10GBASE_SR] = {
92                 .subtype = IFM_10G_SR,
93                 .baudrate = IF_Gbps(10ULL),
94         },
95         [MLX5E_10GBASE_ER] = {
96                 .subtype = IFM_10G_ER,
97                 .baudrate = IF_Gbps(10ULL),
98         },
99         [MLX5E_40GBASE_SR4] = {
100                 .subtype = IFM_40G_SR4,
101                 .baudrate = IF_Gbps(40ULL),
102         },
103         [MLX5E_40GBASE_LR4] = {
104                 .subtype = IFM_40G_LR4,
105                 .baudrate = IF_Gbps(40ULL),
106         },
107         [MLX5E_100GBASE_CR4] = {
108                 .subtype = IFM_100G_CR4,
109                 .baudrate = IF_Gbps(100ULL),
110         },
111         [MLX5E_100GBASE_SR4] = {
112                 .subtype = IFM_100G_SR4,
113                 .baudrate = IF_Gbps(100ULL),
114         },
115         [MLX5E_100GBASE_KR4] = {
116                 .subtype = IFM_100G_KR4,
117                 .baudrate = IF_Gbps(100ULL),
118         },
119         [MLX5E_100GBASE_LR4] = {
120                 .subtype = IFM_100G_LR4,
121                 .baudrate = IF_Gbps(100ULL),
122         },
123         [MLX5E_100BASE_TX] = {
124                 .subtype = IFM_100_TX,
125                 .baudrate = IF_Mbps(100ULL),
126         },
127         [MLX5E_1000BASE_T] = {
128                 .subtype = IFM_1000_T,
129                 .baudrate = IF_Mbps(1000ULL),
130         },
131         [MLX5E_10GBASE_T] = {
132                 .subtype = IFM_10G_T,
133                 .baudrate = IF_Gbps(10ULL),
134         },
135         [MLX5E_25GBASE_CR] = {
136                 .subtype = IFM_25G_CR,
137                 .baudrate = IF_Gbps(25ULL),
138         },
139         [MLX5E_25GBASE_KR] = {
140                 .subtype = IFM_25G_KR,
141                 .baudrate = IF_Gbps(25ULL),
142         },
143         [MLX5E_25GBASE_SR] = {
144                 .subtype = IFM_25G_SR,
145                 .baudrate = IF_Gbps(25ULL),
146         },
147         [MLX5E_50GBASE_CR2] = {
148                 .subtype = IFM_50G_CR2,
149                 .baudrate = IF_Gbps(50ULL),
150         },
151         [MLX5E_50GBASE_KR2] = {
152                 .subtype = IFM_50G_KR2,
153                 .baudrate = IF_Gbps(50ULL),
154         },
155 };
156
157 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
158
159 static SYSCTL_NODE(_hw, OID_AUTO, mlx5, CTLFLAG_RW, 0, "MLX5 driver parameters");
160
161 static void
162 mlx5e_update_carrier(struct mlx5e_priv *priv)
163 {
164         struct mlx5_core_dev *mdev = priv->mdev;
165         u32 out[MLX5_ST_SZ_DW(ptys_reg)];
166         u32 eth_proto_oper;
167         int error;
168         u8 port_state;
169         u8 i;
170
171         port_state = mlx5_query_vport_state(mdev,
172             MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
173
174         if (port_state == VPORT_STATE_UP) {
175                 priv->media_status_last |= IFM_ACTIVE;
176         } else {
177                 priv->media_status_last &= ~IFM_ACTIVE;
178                 priv->media_active_last = IFM_ETHER;
179                 if_link_state_change(priv->ifp, LINK_STATE_DOWN);
180                 return;
181         }
182
183         error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
184         if (error) {
185                 priv->media_active_last = IFM_ETHER;
186                 priv->ifp->if_baudrate = 1;
187                 if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
188                     __func__, error);
189                 return;
190         }
191         eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
192
193         for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
194                 if (mlx5e_mode_table[i].baudrate == 0)
195                         continue;
196                 if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
197                         priv->ifp->if_baudrate =
198                             mlx5e_mode_table[i].baudrate;
199                         priv->media_active_last =
200                             mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
201                 }
202         }
203         if_link_state_change(priv->ifp, LINK_STATE_UP);
204 }
205
206 static void
207 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
208 {
209         struct mlx5e_priv *priv = dev->if_softc;
210
211         ifmr->ifm_status = priv->media_status_last;
212         ifmr->ifm_active = priv->media_active_last |
213             (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
214             (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
215
216 }
217
218 static u32
219 mlx5e_find_link_mode(u32 subtype)
220 {
221         u32 i;
222         u32 link_mode = 0;
223
224         for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
225                 if (mlx5e_mode_table[i].baudrate == 0)
226                         continue;
227                 if (mlx5e_mode_table[i].subtype == subtype)
228                         link_mode |= MLX5E_PROT_MASK(i);
229         }
230
231         return (link_mode);
232 }
233
234 static int
235 mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
236 {
237         return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
238             priv->params.rx_pauseframe_control,
239             priv->params.tx_pauseframe_control,
240             priv->params.rx_priority_flow_control,
241             priv->params.tx_priority_flow_control));
242 }
243
244 static int
245 mlx5e_set_port_pfc(struct mlx5e_priv *priv)
246 {
247         int error;
248
249         if (priv->params.rx_pauseframe_control ||
250             priv->params.tx_pauseframe_control) {
251                 if_printf(priv->ifp,
252                     "Global pauseframes must be disabled before enabling PFC.\n");
253                 error = -EINVAL;
254         } else {
255                 error = mlx5e_set_port_pause_and_pfc(priv);
256         }
257         return (error);
258 }
259
260 static int
261 mlx5e_media_change(struct ifnet *dev)
262 {
263         struct mlx5e_priv *priv = dev->if_softc;
264         struct mlx5_core_dev *mdev = priv->mdev;
265         u32 eth_proto_cap;
266         u32 link_mode;
267         int was_opened;
268         int locked;
269         int error;
270
271         locked = PRIV_LOCKED(priv);
272         if (!locked)
273                 PRIV_LOCK(priv);
274
275         if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
276                 error = EINVAL;
277                 goto done;
278         }
279         link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
280
281         /* query supported capabilities */
282         error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
283         if (error != 0) {
284                 if_printf(dev, "Query port media capability failed\n");
285                 goto done;
286         }
287         /* check for autoselect */
288         if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
289                 link_mode = eth_proto_cap;
290                 if (link_mode == 0) {
291                         if_printf(dev, "Port media capability is zero\n");
292                         error = EINVAL;
293                         goto done;
294                 }
295         } else {
296                 link_mode = link_mode & eth_proto_cap;
297                 if (link_mode == 0) {
298                         if_printf(dev, "Not supported link mode requested\n");
299                         error = EINVAL;
300                         goto done;
301                 }
302         }
303         if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
304                 /* check if PFC is enabled */
305                 if (priv->params.rx_priority_flow_control ||
306                     priv->params.tx_priority_flow_control) {
307                         if_printf(dev, "PFC must be disabled before enabling global pauseframes.\n");
308                         error = EINVAL;
309                         goto done;
310                 }
311         }
312         /* update pauseframe control bits */
313         priv->params.rx_pauseframe_control =
314             (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
315         priv->params.tx_pauseframe_control =
316             (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
317
318         /* check if device is opened */
319         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
320
321         /* reconfigure the hardware */
322         mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
323         mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
324         error = -mlx5e_set_port_pause_and_pfc(priv);
325         if (was_opened)
326                 mlx5_set_port_status(mdev, MLX5_PORT_UP);
327
328 done:
329         if (!locked)
330                 PRIV_UNLOCK(priv);
331         return (error);
332 }
333
334 static void
335 mlx5e_update_carrier_work(struct work_struct *work)
336 {
337         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
338             update_carrier_work);
339
340         PRIV_LOCK(priv);
341         if (test_bit(MLX5E_STATE_OPENED, &priv->state))
342                 mlx5e_update_carrier(priv);
343         PRIV_UNLOCK(priv);
344 }
345
346 /*
347  * This function reads the physical port counters from the firmware
348  * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
349  * macros. The output is converted from big-endian 64-bit values into
350  * host endian ones and stored in the "priv->stats.pport" structure.
351  */
352 static void
353 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
354 {
355         struct mlx5_core_dev *mdev = priv->mdev;
356         struct mlx5e_pport_stats *s = &priv->stats.pport;
357         struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
358         u32 *in;
359         u32 *out;
360         const u64 *ptr;
361         unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
362         unsigned x;
363         unsigned y;
364         unsigned z;
365
366         /* allocate firmware request structures */
367         in = mlx5_vzalloc(sz);
368         out = mlx5_vzalloc(sz);
369         if (in == NULL || out == NULL)
370                 goto free_out;
371
372         /*
373          * Get pointer to the 64-bit counter set which is located at a
374          * fixed offset in the output firmware request structure:
375          */
376         ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
377
378         MLX5_SET(ppcnt_reg, in, local_port, 1);
379
380         /* read IEEE802_3 counter group using predefined counter layout */
381         MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
382         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
383         for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
384              x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
385                 s->arg[y] = be64toh(ptr[x]);
386
387         /* read RFC2819 counter group using predefined counter layout */
388         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
389         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
390         for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
391                 s->arg[y] = be64toh(ptr[x]);
392         for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
393             MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
394                 s_debug->arg[y] = be64toh(ptr[x]);
395
396         /* read RFC2863 counter group using predefined counter layout */
397         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
398         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
399         for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
400                 s_debug->arg[y] = be64toh(ptr[x]);
401
402         /* read physical layer stats counter group using predefined counter layout */
403         MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
404         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
405         for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
406                 s_debug->arg[y] = be64toh(ptr[x]);
407
408         /* read per-priority counters */
409         MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
410
411         /* iterate all the priorities */
412         for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
413                 MLX5_SET(ppcnt_reg, in, prio_tc, z);
414                 mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
415
416                 /* read per priority stats counter group using predefined counter layout */
417                 for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
418                     MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
419                         s->arg[y] = be64toh(ptr[x]);
420         }
421 free_out:
422         /* free firmware request structures */
423         kvfree(in);
424         kvfree(out);
425 }
426
427 /*
428  * This function is called regularly to collect all statistics
429  * counters from the firmware. The values can be viewed through the
430  * sysctl interface. Execution is serialized using the priv's global
431  * configuration lock.
432  */
433 static void
434 mlx5e_update_stats_work(struct work_struct *work)
435 {
436         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
437             update_stats_work);
438         struct mlx5_core_dev *mdev = priv->mdev;
439         struct mlx5e_vport_stats *s = &priv->stats.vport;
440         struct mlx5e_rq_stats *rq_stats;
441         struct mlx5e_sq_stats *sq_stats;
442         struct buf_ring *sq_br;
443 #if (__FreeBSD_version < 1100000)
444         struct ifnet *ifp = priv->ifp;
445 #endif
446
447         u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
448         u32 *out;
449         int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
450         u64 tso_packets = 0;
451         u64 tso_bytes = 0;
452         u64 tx_queue_dropped = 0;
453         u64 tx_defragged = 0;
454         u64 tx_offload_none = 0;
455         u64 lro_packets = 0;
456         u64 lro_bytes = 0;
457         u64 sw_lro_queued = 0;
458         u64 sw_lro_flushed = 0;
459         u64 rx_csum_none = 0;
460         u64 rx_wqe_err = 0;
461         u32 rx_out_of_buffer = 0;
462         int i;
463         int j;
464
465         PRIV_LOCK(priv);
466         out = mlx5_vzalloc(outlen);
467         if (out == NULL)
468                 goto free_out;
469         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
470                 goto free_out;
471
472         /* Collect firts the SW counters and then HW for consistency */
473         for (i = 0; i < priv->params.num_channels; i++) {
474                 struct mlx5e_rq *rq = &priv->channel[i]->rq;
475
476                 rq_stats = &priv->channel[i]->rq.stats;
477
478                 /* collect stats from LRO */
479                 rq_stats->sw_lro_queued = rq->lro.lro_queued;
480                 rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
481                 sw_lro_queued += rq_stats->sw_lro_queued;
482                 sw_lro_flushed += rq_stats->sw_lro_flushed;
483                 lro_packets += rq_stats->lro_packets;
484                 lro_bytes += rq_stats->lro_bytes;
485                 rx_csum_none += rq_stats->csum_none;
486                 rx_wqe_err += rq_stats->wqe_err;
487
488                 for (j = 0; j < priv->num_tc; j++) {
489                         sq_stats = &priv->channel[i]->sq[j].stats;
490                         sq_br = priv->channel[i]->sq[j].br;
491
492                         tso_packets += sq_stats->tso_packets;
493                         tso_bytes += sq_stats->tso_bytes;
494                         tx_queue_dropped += sq_stats->dropped;
495                         if (sq_br != NULL)
496                                 tx_queue_dropped += sq_br->br_drops;
497                         tx_defragged += sq_stats->defragged;
498                         tx_offload_none += sq_stats->csum_offload_none;
499                 }
500         }
501
502         /* update counters */
503         s->tso_packets = tso_packets;
504         s->tso_bytes = tso_bytes;
505         s->tx_queue_dropped = tx_queue_dropped;
506         s->tx_defragged = tx_defragged;
507         s->lro_packets = lro_packets;
508         s->lro_bytes = lro_bytes;
509         s->sw_lro_queued = sw_lro_queued;
510         s->sw_lro_flushed = sw_lro_flushed;
511         s->rx_csum_none = rx_csum_none;
512         s->rx_wqe_err = rx_wqe_err;
513
514         /* HW counters */
515         memset(in, 0, sizeof(in));
516
517         MLX5_SET(query_vport_counter_in, in, opcode,
518             MLX5_CMD_OP_QUERY_VPORT_COUNTER);
519         MLX5_SET(query_vport_counter_in, in, op_mod, 0);
520         MLX5_SET(query_vport_counter_in, in, other_vport, 0);
521
522         memset(out, 0, outlen);
523
524         /* get number of out-of-buffer drops first */
525         if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
526             &rx_out_of_buffer))
527                 goto free_out;
528
529         /* accumulate difference into a 64-bit counter */
530         s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
531         s->rx_out_of_buffer_prev = rx_out_of_buffer;
532
533         /* get port statistics */
534         if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
535                 goto free_out;
536
537 #define MLX5_GET_CTR(out, x) \
538         MLX5_GET64(query_vport_counter_out, out, x)
539
540         s->rx_error_packets =
541             MLX5_GET_CTR(out, received_errors.packets);
542         s->rx_error_bytes =
543             MLX5_GET_CTR(out, received_errors.octets);
544         s->tx_error_packets =
545             MLX5_GET_CTR(out, transmit_errors.packets);
546         s->tx_error_bytes =
547             MLX5_GET_CTR(out, transmit_errors.octets);
548
549         s->rx_unicast_packets =
550             MLX5_GET_CTR(out, received_eth_unicast.packets);
551         s->rx_unicast_bytes =
552             MLX5_GET_CTR(out, received_eth_unicast.octets);
553         s->tx_unicast_packets =
554             MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
555         s->tx_unicast_bytes =
556             MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
557
558         s->rx_multicast_packets =
559             MLX5_GET_CTR(out, received_eth_multicast.packets);
560         s->rx_multicast_bytes =
561             MLX5_GET_CTR(out, received_eth_multicast.octets);
562         s->tx_multicast_packets =
563             MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
564         s->tx_multicast_bytes =
565             MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
566
567         s->rx_broadcast_packets =
568             MLX5_GET_CTR(out, received_eth_broadcast.packets);
569         s->rx_broadcast_bytes =
570             MLX5_GET_CTR(out, received_eth_broadcast.octets);
571         s->tx_broadcast_packets =
572             MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
573         s->tx_broadcast_bytes =
574             MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
575
576         s->rx_packets =
577             s->rx_unicast_packets +
578             s->rx_multicast_packets +
579             s->rx_broadcast_packets -
580             s->rx_out_of_buffer;
581         s->rx_bytes =
582             s->rx_unicast_bytes +
583             s->rx_multicast_bytes +
584             s->rx_broadcast_bytes;
585         s->tx_packets =
586             s->tx_unicast_packets +
587             s->tx_multicast_packets +
588             s->tx_broadcast_packets;
589         s->tx_bytes =
590             s->tx_unicast_bytes +
591             s->tx_multicast_bytes +
592             s->tx_broadcast_bytes;
593
594         /* Update calculated offload counters */
595         s->tx_csum_offload = s->tx_packets - tx_offload_none;
596         s->rx_csum_good = s->rx_packets - s->rx_csum_none;
597
598         /* Get physical port counters */
599         mlx5e_update_pport_counters(priv);
600
601 #if (__FreeBSD_version < 1100000)
602         /* no get_counters interface in fbsd 10 */
603         ifp->if_ipackets = s->rx_packets;
604         ifp->if_ierrors = s->rx_error_packets +
605             priv->stats.pport.alignment_err +
606             priv->stats.pport.check_seq_err +
607             priv->stats.pport.crc_align_errors +
608             priv->stats.pport.in_range_len_errors +
609             priv->stats.pport.jabbers +
610             priv->stats.pport.out_of_range_len +
611             priv->stats.pport.oversize_pkts +
612             priv->stats.pport.symbol_err +
613             priv->stats.pport.too_long_errors +
614             priv->stats.pport.undersize_pkts +
615             priv->stats.pport.unsupported_op_rx;
616         ifp->if_iqdrops = s->rx_out_of_buffer +
617             priv->stats.pport.drop_events;
618         ifp->if_opackets = s->tx_packets;
619         ifp->if_oerrors = s->tx_error_packets;
620         ifp->if_snd.ifq_drops = s->tx_queue_dropped;
621         ifp->if_ibytes = s->rx_bytes;
622         ifp->if_obytes = s->tx_bytes;
623         ifp->if_collisions =
624             priv->stats.pport.collisions;
625 #endif
626
627 free_out:
628         kvfree(out);
629
630         /* Update diagnostics, if any */
631         if (priv->params_ethtool.diag_pci_enable ||
632             priv->params_ethtool.diag_general_enable) {
633                 int error = mlx5_core_get_diagnostics_full(mdev,
634                     priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
635                     priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
636                 if (error != 0)
637                         if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error);
638         }
639         PRIV_UNLOCK(priv);
640 }
641
642 static void
643 mlx5e_update_stats(void *arg)
644 {
645         struct mlx5e_priv *priv = arg;
646
647         queue_work(priv->wq, &priv->update_stats_work);
648
649         callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
650 }
651
652 static void
653 mlx5e_async_event_sub(struct mlx5e_priv *priv,
654     enum mlx5_dev_event event)
655 {
656         switch (event) {
657         case MLX5_DEV_EVENT_PORT_UP:
658         case MLX5_DEV_EVENT_PORT_DOWN:
659                 queue_work(priv->wq, &priv->update_carrier_work);
660                 break;
661
662         default:
663                 break;
664         }
665 }
666
667 static void
668 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
669     enum mlx5_dev_event event, unsigned long param)
670 {
671         struct mlx5e_priv *priv = vpriv;
672
673         mtx_lock(&priv->async_events_mtx);
674         if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
675                 mlx5e_async_event_sub(priv, event);
676         mtx_unlock(&priv->async_events_mtx);
677 }
678
679 static void
680 mlx5e_enable_async_events(struct mlx5e_priv *priv)
681 {
682         set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
683 }
684
685 static void
686 mlx5e_disable_async_events(struct mlx5e_priv *priv)
687 {
688         mtx_lock(&priv->async_events_mtx);
689         clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
690         mtx_unlock(&priv->async_events_mtx);
691 }
692
693 static void mlx5e_calibration_callout(void *arg);
694 static int mlx5e_calibration_duration = 20;
695 static int mlx5e_fast_calibration = 1;
696 static int mlx5e_normal_calibration = 30;
697
698 static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW, 0,
699     "MLX5 timestamp calibration parameteres");
700
701 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN,
702     &mlx5e_calibration_duration, 0,
703     "Duration of initial calibration");
704 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN,
705     &mlx5e_fast_calibration, 0,
706     "Recalibration interval during initial calibration");
707 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN,
708     &mlx5e_normal_calibration, 0,
709     "Recalibration interval during normal operations");
710
711 /*
712  * Ignites the calibration process.
713  */
714 static void
715 mlx5e_reset_calibration_callout(struct mlx5e_priv *priv)
716 {
717
718         if (priv->clbr_done == 0)
719                 mlx5e_calibration_callout(priv);
720         else
721                 callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done <
722                     mlx5e_calibration_duration ? mlx5e_fast_calibration :
723                     mlx5e_normal_calibration) * hz, mlx5e_calibration_callout,
724                     priv);
725 }
726
727 static uint64_t
728 mlx5e_timespec2usec(const struct timespec *ts)
729 {
730
731         return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec);
732 }
733
734 static uint64_t
735 mlx5e_hw_clock(struct mlx5e_priv *priv)
736 {
737         struct mlx5_init_seg *iseg;
738         uint32_t hw_h, hw_h1, hw_l;
739
740         iseg = priv->mdev->iseg;
741         do {
742                 hw_h = ioread32be(&iseg->internal_timer_h);
743                 hw_l = ioread32be(&iseg->internal_timer_l);
744                 hw_h1 = ioread32be(&iseg->internal_timer_h);
745         } while (hw_h1 != hw_h);
746         return (((uint64_t)hw_h << 32) | hw_l);
747 }
748
749 /*
750  * The calibration callout, it runs either in the context of the
751  * thread which enables calibration, or in callout.  It takes the
752  * snapshot of system and adapter clocks, then advances the pointers to
753  * the calibration point to allow rx path to read the consistent data
754  * lockless.
755  */
756 static void
757 mlx5e_calibration_callout(void *arg)
758 {
759         struct mlx5e_priv *priv;
760         struct mlx5e_clbr_point *next, *curr;
761         struct timespec ts;
762         int clbr_curr_next;
763
764         priv = arg;
765         curr = &priv->clbr_points[priv->clbr_curr];
766         clbr_curr_next = priv->clbr_curr + 1;
767         if (clbr_curr_next >= nitems(priv->clbr_points))
768                 clbr_curr_next = 0;
769         next = &priv->clbr_points[clbr_curr_next];
770
771         next->base_prev = curr->base_curr;
772         next->clbr_hw_prev = curr->clbr_hw_curr;
773
774         next->clbr_hw_curr = mlx5e_hw_clock(priv);
775         if (((next->clbr_hw_curr - curr->clbr_hw_prev) >> MLX5E_TSTMP_PREC) ==
776             0) {
777                 if_printf(priv->ifp, "HW failed tstmp frozen %#jx %#jx,"
778                     "disabling\n", next->clbr_hw_curr, curr->clbr_hw_prev);
779                 priv->clbr_done = 0;
780                 return;
781         }
782
783         nanouptime(&ts);
784         next->base_curr = mlx5e_timespec2usec(&ts);
785
786         curr->clbr_gen = 0;
787         atomic_thread_fence_rel();
788         priv->clbr_curr = clbr_curr_next;
789         atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen));
790
791         if (priv->clbr_done < mlx5e_calibration_duration)
792                 priv->clbr_done++;
793         mlx5e_reset_calibration_callout(priv);
794 }
795
796 static const char *mlx5e_rq_stats_desc[] = {
797         MLX5E_RQ_STATS(MLX5E_STATS_DESC)
798 };
799
800 static int
801 mlx5e_create_rq(struct mlx5e_channel *c,
802     struct mlx5e_rq_param *param,
803     struct mlx5e_rq *rq)
804 {
805         struct mlx5e_priv *priv = c->priv;
806         struct mlx5_core_dev *mdev = priv->mdev;
807         char buffer[16];
808         void *rqc = param->rqc;
809         void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
810         int wq_sz;
811         int err;
812         int i;
813
814         /* Create DMA descriptor TAG */
815         if ((err = -bus_dma_tag_create(
816             bus_get_dma_tag(mdev->pdev->dev.bsddev),
817             1,                          /* any alignment */
818             0,                          /* no boundary */
819             BUS_SPACE_MAXADDR,          /* lowaddr */
820             BUS_SPACE_MAXADDR,          /* highaddr */
821             NULL, NULL,                 /* filter, filterarg */
822             MJUM16BYTES,                /* maxsize */
823             1,                          /* nsegments */
824             MJUM16BYTES,                /* maxsegsize */
825             0,                          /* flags */
826             NULL, NULL,                 /* lockfunc, lockfuncarg */
827             &rq->dma_tag)))
828                 goto done;
829
830         err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
831             &rq->wq_ctrl);
832         if (err)
833                 goto err_free_dma_tag;
834
835         rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
836
837         if (priv->params.hw_lro_en) {
838                 rq->wqe_sz = priv->params.lro_wqe_sz;
839         } else {
840                 rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
841         }
842         if (rq->wqe_sz > MJUM16BYTES) {
843                 err = -ENOMEM;
844                 goto err_rq_wq_destroy;
845         } else if (rq->wqe_sz > MJUM9BYTES) {
846                 rq->wqe_sz = MJUM16BYTES;
847         } else if (rq->wqe_sz > MJUMPAGESIZE) {
848                 rq->wqe_sz = MJUM9BYTES;
849         } else if (rq->wqe_sz > MCLBYTES) {
850                 rq->wqe_sz = MJUMPAGESIZE;
851         } else {
852                 rq->wqe_sz = MCLBYTES;
853         }
854
855         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
856
857         err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz);
858         if (err)
859                 goto err_rq_wq_destroy;
860
861         rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
862         for (i = 0; i != wq_sz; i++) {
863                 struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
864                 uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
865
866                 err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
867                 if (err != 0) {
868                         while (i--)
869                                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
870                         goto err_rq_mbuf_free;
871                 }
872                 wqe->data.lkey = c->mkey_be;
873                 wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
874         }
875
876         rq->ifp = c->ifp;
877         rq->channel = c;
878         rq->ix = c->ix;
879
880         snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
881         mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
882             buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
883             rq->stats.arg);
884         return (0);
885
886 err_rq_mbuf_free:
887         free(rq->mbuf, M_MLX5EN);
888         tcp_lro_free(&rq->lro);
889 err_rq_wq_destroy:
890         mlx5_wq_destroy(&rq->wq_ctrl);
891 err_free_dma_tag:
892         bus_dma_tag_destroy(rq->dma_tag);
893 done:
894         return (err);
895 }
896
897 static void
898 mlx5e_destroy_rq(struct mlx5e_rq *rq)
899 {
900         int wq_sz;
901         int i;
902
903         /* destroy all sysctl nodes */
904         sysctl_ctx_free(&rq->stats.ctx);
905
906         /* free leftover LRO packets, if any */
907         tcp_lro_free(&rq->lro);
908
909         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
910         for (i = 0; i != wq_sz; i++) {
911                 if (rq->mbuf[i].mbuf != NULL) {
912                         bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
913                         m_freem(rq->mbuf[i].mbuf);
914                 }
915                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
916         }
917         free(rq->mbuf, M_MLX5EN);
918         mlx5_wq_destroy(&rq->wq_ctrl);
919 }
920
921 static int
922 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
923 {
924         struct mlx5e_channel *c = rq->channel;
925         struct mlx5e_priv *priv = c->priv;
926         struct mlx5_core_dev *mdev = priv->mdev;
927
928         void *in;
929         void *rqc;
930         void *wq;
931         int inlen;
932         int err;
933
934         inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
935             sizeof(u64) * rq->wq_ctrl.buf.npages;
936         in = mlx5_vzalloc(inlen);
937         if (in == NULL)
938                 return (-ENOMEM);
939
940         rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
941         wq = MLX5_ADDR_OF(rqc, rqc, wq);
942
943         memcpy(rqc, param->rqc, sizeof(param->rqc));
944
945         MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
946         MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
947         MLX5_SET(rqc, rqc, flush_in_error_en, 1);
948         if (priv->counter_set_id >= 0)
949                 MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
950         MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
951             PAGE_SHIFT);
952         MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
953
954         mlx5_fill_page_array(&rq->wq_ctrl.buf,
955             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
956
957         err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
958
959         kvfree(in);
960
961         return (err);
962 }
963
964 static int
965 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
966 {
967         struct mlx5e_channel *c = rq->channel;
968         struct mlx5e_priv *priv = c->priv;
969         struct mlx5_core_dev *mdev = priv->mdev;
970
971         void *in;
972         void *rqc;
973         int inlen;
974         int err;
975
976         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
977         in = mlx5_vzalloc(inlen);
978         if (in == NULL)
979                 return (-ENOMEM);
980
981         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
982
983         MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
984         MLX5_SET(modify_rq_in, in, rq_state, curr_state);
985         MLX5_SET(rqc, rqc, state, next_state);
986
987         err = mlx5_core_modify_rq(mdev, in, inlen);
988
989         kvfree(in);
990
991         return (err);
992 }
993
994 static void
995 mlx5e_disable_rq(struct mlx5e_rq *rq)
996 {
997         struct mlx5e_channel *c = rq->channel;
998         struct mlx5e_priv *priv = c->priv;
999         struct mlx5_core_dev *mdev = priv->mdev;
1000
1001         mlx5_core_destroy_rq(mdev, rq->rqn);
1002 }
1003
1004 static int
1005 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
1006 {
1007         struct mlx5e_channel *c = rq->channel;
1008         struct mlx5e_priv *priv = c->priv;
1009         struct mlx5_wq_ll *wq = &rq->wq;
1010         int i;
1011
1012         for (i = 0; i < 1000; i++) {
1013                 if (wq->cur_sz >= priv->params.min_rx_wqes)
1014                         return (0);
1015
1016                 msleep(4);
1017         }
1018         return (-ETIMEDOUT);
1019 }
1020
1021 static int
1022 mlx5e_open_rq(struct mlx5e_channel *c,
1023     struct mlx5e_rq_param *param,
1024     struct mlx5e_rq *rq)
1025 {
1026         int err;
1027
1028         err = mlx5e_create_rq(c, param, rq);
1029         if (err)
1030                 return (err);
1031
1032         err = mlx5e_enable_rq(rq, param);
1033         if (err)
1034                 goto err_destroy_rq;
1035
1036         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
1037         if (err)
1038                 goto err_disable_rq;
1039
1040         c->rq.enabled = 1;
1041
1042         return (0);
1043
1044 err_disable_rq:
1045         mlx5e_disable_rq(rq);
1046 err_destroy_rq:
1047         mlx5e_destroy_rq(rq);
1048
1049         return (err);
1050 }
1051
1052 static void
1053 mlx5e_close_rq(struct mlx5e_rq *rq)
1054 {
1055         mtx_lock(&rq->mtx);
1056         rq->enabled = 0;
1057         callout_stop(&rq->watchdog);
1058         mtx_unlock(&rq->mtx);
1059
1060         callout_drain(&rq->watchdog);
1061
1062         mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
1063 }
1064
1065 static void
1066 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
1067 {
1068         struct mlx5_core_dev *mdev = rq->channel->priv->mdev;
1069
1070         /* wait till RQ is empty */
1071         while (!mlx5_wq_ll_is_empty(&rq->wq) &&
1072                (mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
1073                 msleep(4);
1074                 rq->cq.mcq.comp(&rq->cq.mcq);
1075         }
1076
1077         mlx5e_disable_rq(rq);
1078         mlx5e_destroy_rq(rq);
1079 }
1080
1081 void
1082 mlx5e_free_sq_db(struct mlx5e_sq *sq)
1083 {
1084         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1085         int x;
1086
1087         for (x = 0; x != wq_sz; x++)
1088                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1089         free(sq->mbuf, M_MLX5EN);
1090 }
1091
1092 int
1093 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
1094 {
1095         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1096         int err;
1097         int x;
1098
1099         sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
1100
1101         /* Create DMA descriptor MAPs */
1102         for (x = 0; x != wq_sz; x++) {
1103                 err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
1104                 if (err != 0) {
1105                         while (x--)
1106                                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1107                         free(sq->mbuf, M_MLX5EN);
1108                         return (err);
1109                 }
1110         }
1111         return (0);
1112 }
1113
1114 static const char *mlx5e_sq_stats_desc[] = {
1115         MLX5E_SQ_STATS(MLX5E_STATS_DESC)
1116 };
1117
1118 static int
1119 mlx5e_create_sq(struct mlx5e_channel *c,
1120     int tc,
1121     struct mlx5e_sq_param *param,
1122     struct mlx5e_sq *sq)
1123 {
1124         struct mlx5e_priv *priv = c->priv;
1125         struct mlx5_core_dev *mdev = priv->mdev;
1126         char buffer[16];
1127
1128         void *sqc = param->sqc;
1129         void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
1130 #ifdef RSS
1131         cpuset_t cpu_mask;
1132         int cpu_id;
1133 #endif
1134         int err;
1135
1136         /* Create DMA descriptor TAG */
1137         if ((err = -bus_dma_tag_create(
1138             bus_get_dma_tag(mdev->pdev->dev.bsddev),
1139             1,                          /* any alignment */
1140             0,                          /* no boundary */
1141             BUS_SPACE_MAXADDR,          /* lowaddr */
1142             BUS_SPACE_MAXADDR,          /* highaddr */
1143             NULL, NULL,                 /* filter, filterarg */
1144             MLX5E_MAX_TX_PAYLOAD_SIZE,  /* maxsize */
1145             MLX5E_MAX_TX_MBUF_FRAGS,    /* nsegments */
1146             MLX5E_MAX_TX_MBUF_SIZE,     /* maxsegsize */
1147             0,                          /* flags */
1148             NULL, NULL,                 /* lockfunc, lockfuncarg */
1149             &sq->dma_tag)))
1150                 goto done;
1151
1152         err = mlx5_alloc_map_uar(mdev, &sq->uar);
1153         if (err)
1154                 goto err_free_dma_tag;
1155
1156         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
1157             &sq->wq_ctrl);
1158         if (err)
1159                 goto err_unmap_free_uar;
1160
1161         sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1162         sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
1163
1164         err = mlx5e_alloc_sq_db(sq);
1165         if (err)
1166                 goto err_sq_wq_destroy;
1167
1168         sq->mkey_be = c->mkey_be;
1169         sq->ifp = priv->ifp;
1170         sq->priv = priv;
1171         sq->tc = tc;
1172
1173         /* check if we should allocate a second packet buffer */
1174         if (priv->params_ethtool.tx_bufring_disable == 0) {
1175                 sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
1176                     M_WAITOK, &sq->lock);
1177                 if (sq->br == NULL) {
1178                         if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
1179                             __func__);
1180                         err = -ENOMEM;
1181                         goto err_free_sq_db;
1182                 }
1183
1184                 sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
1185                     taskqueue_thread_enqueue, &sq->sq_tq);
1186                 if (sq->sq_tq == NULL) {
1187                         if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
1188                             __func__);
1189                         err = -ENOMEM;
1190                         goto err_free_drbr;
1191                 }
1192
1193                 TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
1194 #ifdef RSS
1195                 cpu_id = rss_getcpu(c->ix % rss_getnumbuckets());
1196                 CPU_SETOF(cpu_id, &cpu_mask);
1197                 taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask,
1198                     "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id);
1199 #else
1200                 taskqueue_start_threads(&sq->sq_tq, 1, PI_NET,
1201                     "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc);
1202 #endif
1203         }
1204         snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1205         mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1206             buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1207             sq->stats.arg);
1208
1209         return (0);
1210
1211 err_free_drbr:
1212         buf_ring_free(sq->br, M_MLX5EN);
1213 err_free_sq_db:
1214         mlx5e_free_sq_db(sq);
1215 err_sq_wq_destroy:
1216         mlx5_wq_destroy(&sq->wq_ctrl);
1217
1218 err_unmap_free_uar:
1219         mlx5_unmap_free_uar(mdev, &sq->uar);
1220
1221 err_free_dma_tag:
1222         bus_dma_tag_destroy(sq->dma_tag);
1223 done:
1224         return (err);
1225 }
1226
1227 static void
1228 mlx5e_destroy_sq(struct mlx5e_sq *sq)
1229 {
1230         /* destroy all sysctl nodes */
1231         sysctl_ctx_free(&sq->stats.ctx);
1232
1233         mlx5e_free_sq_db(sq);
1234         mlx5_wq_destroy(&sq->wq_ctrl);
1235         mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
1236         if (sq->sq_tq != NULL) {
1237                 taskqueue_drain(sq->sq_tq, &sq->sq_task);
1238                 taskqueue_free(sq->sq_tq);
1239         }
1240         if (sq->br != NULL)
1241                 buf_ring_free(sq->br, M_MLX5EN);
1242 }
1243
1244 int
1245 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
1246     int tis_num)
1247 {
1248         void *in;
1249         void *sqc;
1250         void *wq;
1251         int inlen;
1252         int err;
1253
1254         inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1255             sizeof(u64) * sq->wq_ctrl.buf.npages;
1256         in = mlx5_vzalloc(inlen);
1257         if (in == NULL)
1258                 return (-ENOMEM);
1259
1260         sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1261         wq = MLX5_ADDR_OF(sqc, sqc, wq);
1262
1263         memcpy(sqc, param->sqc, sizeof(param->sqc));
1264
1265         MLX5_SET(sqc, sqc, tis_num_0, tis_num);
1266         MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
1267         MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1268         MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1269         MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1270
1271         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1272         MLX5_SET(wq, wq, uar_page, sq->uar.index);
1273         MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1274             PAGE_SHIFT);
1275         MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1276
1277         mlx5_fill_page_array(&sq->wq_ctrl.buf,
1278             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1279
1280         err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
1281
1282         kvfree(in);
1283
1284         return (err);
1285 }
1286
1287 int
1288 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1289 {
1290         void *in;
1291         void *sqc;
1292         int inlen;
1293         int err;
1294
1295         inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1296         in = mlx5_vzalloc(inlen);
1297         if (in == NULL)
1298                 return (-ENOMEM);
1299
1300         sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1301
1302         MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1303         MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1304         MLX5_SET(sqc, sqc, state, next_state);
1305
1306         err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
1307
1308         kvfree(in);
1309
1310         return (err);
1311 }
1312
1313 void
1314 mlx5e_disable_sq(struct mlx5e_sq *sq)
1315 {
1316
1317         mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
1318 }
1319
1320 static int
1321 mlx5e_open_sq(struct mlx5e_channel *c,
1322     int tc,
1323     struct mlx5e_sq_param *param,
1324     struct mlx5e_sq *sq)
1325 {
1326         int err;
1327
1328         err = mlx5e_create_sq(c, tc, param, sq);
1329         if (err)
1330                 return (err);
1331
1332         err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
1333         if (err)
1334                 goto err_destroy_sq;
1335
1336         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1337         if (err)
1338                 goto err_disable_sq;
1339
1340         atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY);
1341
1342         return (0);
1343
1344 err_disable_sq:
1345         mlx5e_disable_sq(sq);
1346 err_destroy_sq:
1347         mlx5e_destroy_sq(sq);
1348
1349         return (err);
1350 }
1351
1352 static void
1353 mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1354 {
1355         /* fill up remainder with NOPs */
1356         while (sq->cev_counter != 0) {
1357                 while (!mlx5e_sq_has_room_for(sq, 1)) {
1358                         if (can_sleep != 0) {
1359                                 mtx_unlock(&sq->lock);
1360                                 msleep(4);
1361                                 mtx_lock(&sq->lock);
1362                         } else {
1363                                 goto done;
1364                         }
1365                 }
1366                 /* send a single NOP */
1367                 mlx5e_send_nop(sq, 1);
1368                 atomic_thread_fence_rel();
1369         }
1370 done:
1371         /* Check if we need to write the doorbell */
1372         if (likely(sq->doorbell.d64 != 0)) {
1373                 mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1374                 sq->doorbell.d64 = 0;
1375         }
1376 }
1377
1378 void
1379 mlx5e_sq_cev_timeout(void *arg)
1380 {
1381         struct mlx5e_sq *sq = arg;
1382
1383         mtx_assert(&sq->lock, MA_OWNED);
1384
1385         /* check next state */
1386         switch (sq->cev_next_state) {
1387         case MLX5E_CEV_STATE_SEND_NOPS:
1388                 /* fill TX ring with NOPs, if any */
1389                 mlx5e_sq_send_nops_locked(sq, 0);
1390
1391                 /* check if completed */
1392                 if (sq->cev_counter == 0) {
1393                         sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1394                         return;
1395                 }
1396                 break;
1397         default:
1398                 /* send NOPs on next timeout */
1399                 sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1400                 break;
1401         }
1402
1403         /* restart timer */
1404         callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1405 }
1406
1407 void
1408 mlx5e_drain_sq(struct mlx5e_sq *sq)
1409 {
1410         int error;
1411         struct mlx5_core_dev *mdev= sq->priv->mdev;
1412
1413         /*
1414          * Check if already stopped.
1415          *
1416          * NOTE: The "stopped" variable is only written when both the
1417          * priv's configuration lock and the SQ's lock is locked. It
1418          * can therefore safely be read when only one of the two locks
1419          * is locked. This function is always called when the priv's
1420          * configuration lock is locked.
1421          */
1422         if (sq->stopped != 0)
1423                 return;
1424
1425         mtx_lock(&sq->lock);
1426
1427         /* don't put more packets into the SQ */
1428         sq->stopped = 1;
1429
1430         /* teardown event factor timer, if any */
1431         sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1432         callout_stop(&sq->cev_callout);
1433
1434         /* send dummy NOPs in order to flush the transmit ring */
1435         mlx5e_sq_send_nops_locked(sq, 1);
1436         mtx_unlock(&sq->lock);
1437
1438         /* make sure it is safe to free the callout */
1439         callout_drain(&sq->cev_callout);
1440
1441         /* wait till SQ is empty or link is down */
1442         mtx_lock(&sq->lock);
1443         while (sq->cc != sq->pc &&
1444             (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
1445             mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1446                 mtx_unlock(&sq->lock);
1447                 msleep(1);
1448                 sq->cq.mcq.comp(&sq->cq.mcq);
1449                 mtx_lock(&sq->lock);
1450         }
1451         mtx_unlock(&sq->lock);
1452
1453         /* error out remaining requests */
1454         error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1455         if (error != 0) {
1456                 if_printf(sq->ifp,
1457                     "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
1458         }
1459
1460         /* wait till SQ is empty */
1461         mtx_lock(&sq->lock);
1462         while (sq->cc != sq->pc &&
1463                mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1464                 mtx_unlock(&sq->lock);
1465                 msleep(1);
1466                 sq->cq.mcq.comp(&sq->cq.mcq);
1467                 mtx_lock(&sq->lock);
1468         }
1469         mtx_unlock(&sq->lock);
1470 }
1471
1472 static void
1473 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1474 {
1475
1476         mlx5e_drain_sq(sq);
1477         mlx5e_disable_sq(sq);
1478         mlx5e_destroy_sq(sq);
1479 }
1480
1481 static int
1482 mlx5e_create_cq(struct mlx5e_priv *priv,
1483     struct mlx5e_cq_param *param,
1484     struct mlx5e_cq *cq,
1485     mlx5e_cq_comp_t *comp,
1486     int eq_ix)
1487 {
1488         struct mlx5_core_dev *mdev = priv->mdev;
1489         struct mlx5_core_cq *mcq = &cq->mcq;
1490         int eqn_not_used;
1491         int irqn;
1492         int err;
1493         u32 i;
1494
1495         param->wq.buf_numa_node = 0;
1496         param->wq.db_numa_node = 0;
1497
1498         err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1499             &cq->wq_ctrl);
1500         if (err)
1501                 return (err);
1502
1503         mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
1504
1505         mcq->cqe_sz = 64;
1506         mcq->set_ci_db = cq->wq_ctrl.db.db;
1507         mcq->arm_db = cq->wq_ctrl.db.db + 1;
1508         *mcq->set_ci_db = 0;
1509         *mcq->arm_db = 0;
1510         mcq->vector = eq_ix;
1511         mcq->comp = comp;
1512         mcq->event = mlx5e_cq_error_event;
1513         mcq->irqn = irqn;
1514         mcq->uar = &priv->cq_uar;
1515
1516         for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1517                 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1518
1519                 cqe->op_own = 0xf1;
1520         }
1521
1522         cq->priv = priv;
1523
1524         return (0);
1525 }
1526
1527 static void
1528 mlx5e_destroy_cq(struct mlx5e_cq *cq)
1529 {
1530         mlx5_wq_destroy(&cq->wq_ctrl);
1531 }
1532
1533 static int
1534 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
1535 {
1536         struct mlx5_core_cq *mcq = &cq->mcq;
1537         void *in;
1538         void *cqc;
1539         int inlen;
1540         int irqn_not_used;
1541         int eqn;
1542         int err;
1543
1544         inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1545             sizeof(u64) * cq->wq_ctrl.buf.npages;
1546         in = mlx5_vzalloc(inlen);
1547         if (in == NULL)
1548                 return (-ENOMEM);
1549
1550         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1551
1552         memcpy(cqc, param->cqc, sizeof(param->cqc));
1553
1554         mlx5_fill_page_array(&cq->wq_ctrl.buf,
1555             (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1556
1557         mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
1558
1559         MLX5_SET(cqc, cqc, c_eqn, eqn);
1560         MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1561         MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1562             PAGE_SHIFT);
1563         MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1564
1565         err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
1566
1567         kvfree(in);
1568
1569         if (err)
1570                 return (err);
1571
1572         mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
1573
1574         return (0);
1575 }
1576
1577 static void
1578 mlx5e_disable_cq(struct mlx5e_cq *cq)
1579 {
1580
1581         mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
1582 }
1583
1584 int
1585 mlx5e_open_cq(struct mlx5e_priv *priv,
1586     struct mlx5e_cq_param *param,
1587     struct mlx5e_cq *cq,
1588     mlx5e_cq_comp_t *comp,
1589     int eq_ix)
1590 {
1591         int err;
1592
1593         err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
1594         if (err)
1595                 return (err);
1596
1597         err = mlx5e_enable_cq(cq, param, eq_ix);
1598         if (err)
1599                 goto err_destroy_cq;
1600
1601         return (0);
1602
1603 err_destroy_cq:
1604         mlx5e_destroy_cq(cq);
1605
1606         return (err);
1607 }
1608
1609 void
1610 mlx5e_close_cq(struct mlx5e_cq *cq)
1611 {
1612         mlx5e_disable_cq(cq);
1613         mlx5e_destroy_cq(cq);
1614 }
1615
1616 static int
1617 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1618     struct mlx5e_channel_param *cparam)
1619 {
1620         int err;
1621         int tc;
1622
1623         for (tc = 0; tc < c->num_tc; tc++) {
1624                 /* open completion queue */
1625                 err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
1626                     &mlx5e_tx_cq_comp, c->ix);
1627                 if (err)
1628                         goto err_close_tx_cqs;
1629         }
1630         return (0);
1631
1632 err_close_tx_cqs:
1633         for (tc--; tc >= 0; tc--)
1634                 mlx5e_close_cq(&c->sq[tc].cq);
1635
1636         return (err);
1637 }
1638
1639 static void
1640 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1641 {
1642         int tc;
1643
1644         for (tc = 0; tc < c->num_tc; tc++)
1645                 mlx5e_close_cq(&c->sq[tc].cq);
1646 }
1647
1648 static int
1649 mlx5e_open_sqs(struct mlx5e_channel *c,
1650     struct mlx5e_channel_param *cparam)
1651 {
1652         int err;
1653         int tc;
1654
1655         for (tc = 0; tc < c->num_tc; tc++) {
1656                 err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1657                 if (err)
1658                         goto err_close_sqs;
1659         }
1660
1661         return (0);
1662
1663 err_close_sqs:
1664         for (tc--; tc >= 0; tc--)
1665                 mlx5e_close_sq_wait(&c->sq[tc]);
1666
1667         return (err);
1668 }
1669
1670 static void
1671 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1672 {
1673         int tc;
1674
1675         for (tc = 0; tc < c->num_tc; tc++)
1676                 mlx5e_close_sq_wait(&c->sq[tc]);
1677 }
1678
1679 static void
1680 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1681 {
1682         int tc;
1683
1684         mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1685
1686         callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
1687
1688         for (tc = 0; tc < c->num_tc; tc++) {
1689                 struct mlx5e_sq *sq = c->sq + tc;
1690
1691                 mtx_init(&sq->lock, "mlx5tx",
1692                     MTX_NETWORK_LOCK " TX", MTX_DEF);
1693                 mtx_init(&sq->comp_lock, "mlx5comp",
1694                     MTX_NETWORK_LOCK " TX", MTX_DEF);
1695
1696                 callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1697
1698                 sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1699
1700                 /* ensure the TX completion event factor is not zero */
1701                 if (sq->cev_factor == 0)
1702                         sq->cev_factor = 1;
1703         }
1704 }
1705
1706 static void
1707 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1708 {
1709         int tc;
1710
1711         mtx_destroy(&c->rq.mtx);
1712
1713         for (tc = 0; tc < c->num_tc; tc++) {
1714                 mtx_destroy(&c->sq[tc].lock);
1715                 mtx_destroy(&c->sq[tc].comp_lock);
1716         }
1717 }
1718
1719 static int
1720 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1721     struct mlx5e_channel_param *cparam,
1722     struct mlx5e_channel *volatile *cp)
1723 {
1724         struct mlx5e_channel *c;
1725         int err;
1726
1727         c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1728         c->priv = priv;
1729         c->ix = ix;
1730         c->cpu = 0;
1731         c->ifp = priv->ifp;
1732         c->mkey_be = cpu_to_be32(priv->mr.key);
1733         c->num_tc = priv->num_tc;
1734
1735         /* init mutexes */
1736         mlx5e_chan_mtx_init(c);
1737
1738         /* open transmit completion queue */
1739         err = mlx5e_open_tx_cqs(c, cparam);
1740         if (err)
1741                 goto err_free;
1742
1743         /* open receive completion queue */
1744         err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
1745             &mlx5e_rx_cq_comp, c->ix);
1746         if (err)
1747                 goto err_close_tx_cqs;
1748
1749         err = mlx5e_open_sqs(c, cparam);
1750         if (err)
1751                 goto err_close_rx_cq;
1752
1753         err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1754         if (err)
1755                 goto err_close_sqs;
1756
1757         /* store channel pointer */
1758         *cp = c;
1759
1760         /* poll receive queue initially */
1761         c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1762
1763         return (0);
1764
1765 err_close_sqs:
1766         mlx5e_close_sqs_wait(c);
1767
1768 err_close_rx_cq:
1769         mlx5e_close_cq(&c->rq.cq);
1770
1771 err_close_tx_cqs:
1772         mlx5e_close_tx_cqs(c);
1773
1774 err_free:
1775         /* destroy mutexes */
1776         mlx5e_chan_mtx_destroy(c);
1777         free(c, M_MLX5EN);
1778         return (err);
1779 }
1780
1781 static void
1782 mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1783 {
1784         struct mlx5e_channel *c = *pp;
1785
1786         /* check if channel is already closed */
1787         if (c == NULL)
1788                 return;
1789         mlx5e_close_rq(&c->rq);
1790 }
1791
1792 static void
1793 mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1794 {
1795         struct mlx5e_channel *c = *pp;
1796
1797         /* check if channel is already closed */
1798         if (c == NULL)
1799                 return;
1800         /* ensure channel pointer is no longer used */
1801         *pp = NULL;
1802
1803         mlx5e_close_rq_wait(&c->rq);
1804         mlx5e_close_sqs_wait(c);
1805         mlx5e_close_cq(&c->rq.cq);
1806         mlx5e_close_tx_cqs(c);
1807         /* destroy mutexes */
1808         mlx5e_chan_mtx_destroy(c);
1809         free(c, M_MLX5EN);
1810 }
1811
1812 static void
1813 mlx5e_build_rq_param(struct mlx5e_priv *priv,
1814     struct mlx5e_rq_param *param)
1815 {
1816         void *rqc = param->rqc;
1817         void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1818
1819         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1820         MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1821         MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
1822         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1823         MLX5_SET(wq, wq, pd, priv->pdn);
1824
1825         param->wq.buf_numa_node = 0;
1826         param->wq.db_numa_node = 0;
1827         param->wq.linear = 1;
1828 }
1829
1830 static void
1831 mlx5e_build_sq_param(struct mlx5e_priv *priv,
1832     struct mlx5e_sq_param *param)
1833 {
1834         void *sqc = param->sqc;
1835         void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1836
1837         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1838         MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1839         MLX5_SET(wq, wq, pd, priv->pdn);
1840
1841         param->wq.buf_numa_node = 0;
1842         param->wq.db_numa_node = 0;
1843         param->wq.linear = 1;
1844 }
1845
1846 static void
1847 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1848     struct mlx5e_cq_param *param)
1849 {
1850         void *cqc = param->cqc;
1851
1852         MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1853 }
1854
1855 static void
1856 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1857     struct mlx5e_cq_param *param)
1858 {
1859         void *cqc = param->cqc;
1860
1861
1862         /*
1863          * TODO The sysctl to control on/off is a bool value for now, which means
1864          * we only support CSUM, once HASH is implemnted we'll need to address that.
1865          */
1866         if (priv->params.cqe_zipping_en) {
1867                 MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1868                 MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1869         }
1870
1871         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1872         MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1873         MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1874
1875         switch (priv->params.rx_cq_moderation_mode) {
1876         case 0:
1877                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1878                 break;
1879         default:
1880                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1881                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1882                 else
1883                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1884                 break;
1885         }
1886
1887         mlx5e_build_common_cq_param(priv, param);
1888 }
1889
1890 static void
1891 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1892     struct mlx5e_cq_param *param)
1893 {
1894         void *cqc = param->cqc;
1895
1896         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1897         MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1898         MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1899
1900         switch (priv->params.tx_cq_moderation_mode) {
1901         case 0:
1902                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1903                 break;
1904         default:
1905                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1906                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1907                 else
1908                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1909                 break;
1910         }
1911
1912         mlx5e_build_common_cq_param(priv, param);
1913 }
1914
1915 static void
1916 mlx5e_build_channel_param(struct mlx5e_priv *priv,
1917     struct mlx5e_channel_param *cparam)
1918 {
1919         memset(cparam, 0, sizeof(*cparam));
1920
1921         mlx5e_build_rq_param(priv, &cparam->rq);
1922         mlx5e_build_sq_param(priv, &cparam->sq);
1923         mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1924         mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1925 }
1926
1927 static int
1928 mlx5e_open_channels(struct mlx5e_priv *priv)
1929 {
1930         struct mlx5e_channel_param cparam;
1931         void *ptr;
1932         int err;
1933         int i;
1934         int j;
1935
1936         priv->channel = malloc(priv->params.num_channels *
1937             sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
1938
1939         mlx5e_build_channel_param(priv, &cparam);
1940         for (i = 0; i < priv->params.num_channels; i++) {
1941                 err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1942                 if (err)
1943                         goto err_close_channels;
1944         }
1945
1946         for (j = 0; j < priv->params.num_channels; j++) {
1947                 err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
1948                 if (err)
1949                         goto err_close_channels;
1950         }
1951
1952         return (0);
1953
1954 err_close_channels:
1955         for (i--; i >= 0; i--) {
1956                 mlx5e_close_channel(&priv->channel[i]);
1957                 mlx5e_close_channel_wait(&priv->channel[i]);
1958         }
1959
1960         /* remove "volatile" attribute from "channel" pointer */
1961         ptr = __DECONST(void *, priv->channel);
1962         priv->channel = NULL;
1963
1964         free(ptr, M_MLX5EN);
1965
1966         return (err);
1967 }
1968
1969 static void
1970 mlx5e_close_channels(struct mlx5e_priv *priv)
1971 {
1972         void *ptr;
1973         int i;
1974
1975         if (priv->channel == NULL)
1976                 return;
1977
1978         for (i = 0; i < priv->params.num_channels; i++)
1979                 mlx5e_close_channel(&priv->channel[i]);
1980         for (i = 0; i < priv->params.num_channels; i++)
1981                 mlx5e_close_channel_wait(&priv->channel[i]);
1982
1983         /* remove "volatile" attribute from "channel" pointer */
1984         ptr = __DECONST(void *, priv->channel);
1985         priv->channel = NULL;
1986
1987         free(ptr, M_MLX5EN);
1988 }
1989
1990 static int
1991 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
1992 {
1993
1994         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
1995                 uint8_t cq_mode;
1996
1997                 switch (priv->params.tx_cq_moderation_mode) {
1998                 case 0:
1999                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2000                         break;
2001                 default:
2002                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2003                         break;
2004                 }
2005
2006                 return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
2007                     priv->params.tx_cq_moderation_usec,
2008                     priv->params.tx_cq_moderation_pkts,
2009                     cq_mode));
2010         }
2011
2012         return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
2013             priv->params.tx_cq_moderation_usec,
2014             priv->params.tx_cq_moderation_pkts));
2015 }
2016
2017 static int
2018 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
2019 {
2020
2021         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
2022                 uint8_t cq_mode;
2023                 int retval;
2024
2025                 switch (priv->params.rx_cq_moderation_mode) {
2026                 case 0:
2027                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2028                         break;
2029                 default:
2030                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2031                         break;
2032                 }
2033
2034                 retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
2035                     priv->params.rx_cq_moderation_usec,
2036                     priv->params.rx_cq_moderation_pkts,
2037                     cq_mode);
2038
2039                 return (retval);
2040         }
2041
2042         return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
2043             priv->params.rx_cq_moderation_usec,
2044             priv->params.rx_cq_moderation_pkts));
2045 }
2046
2047 static int
2048 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
2049 {
2050         int err;
2051         int i;
2052
2053         if (c == NULL)
2054                 return (EINVAL);
2055
2056         err = mlx5e_refresh_rq_params(priv, &c->rq);
2057         if (err)
2058                 goto done;
2059
2060         for (i = 0; i != c->num_tc; i++) {
2061                 err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
2062                 if (err)
2063                         goto done;
2064         }
2065 done:
2066         return (err);
2067 }
2068
2069 int
2070 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
2071 {
2072         int i;
2073
2074         if (priv->channel == NULL)
2075                 return (EINVAL);
2076
2077         for (i = 0; i < priv->params.num_channels; i++) {
2078                 int err;
2079
2080                 err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
2081                 if (err)
2082                         return (err);
2083         }
2084         return (0);
2085 }
2086
2087 static int
2088 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
2089 {
2090         struct mlx5_core_dev *mdev = priv->mdev;
2091         u32 in[MLX5_ST_SZ_DW(create_tis_in)];
2092         void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
2093
2094         memset(in, 0, sizeof(in));
2095
2096         MLX5_SET(tisc, tisc, prio, tc);
2097         MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
2098
2099         return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
2100 }
2101
2102 static void
2103 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
2104 {
2105         mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
2106 }
2107
2108 static int
2109 mlx5e_open_tises(struct mlx5e_priv *priv)
2110 {
2111         int num_tc = priv->num_tc;
2112         int err;
2113         int tc;
2114
2115         for (tc = 0; tc < num_tc; tc++) {
2116                 err = mlx5e_open_tis(priv, tc);
2117                 if (err)
2118                         goto err_close_tises;
2119         }
2120
2121         return (0);
2122
2123 err_close_tises:
2124         for (tc--; tc >= 0; tc--)
2125                 mlx5e_close_tis(priv, tc);
2126
2127         return (err);
2128 }
2129
2130 static void
2131 mlx5e_close_tises(struct mlx5e_priv *priv)
2132 {
2133         int num_tc = priv->num_tc;
2134         int tc;
2135
2136         for (tc = 0; tc < num_tc; tc++)
2137                 mlx5e_close_tis(priv, tc);
2138 }
2139
2140 static int
2141 mlx5e_open_rqt(struct mlx5e_priv *priv)
2142 {
2143         struct mlx5_core_dev *mdev = priv->mdev;
2144         u32 *in;
2145         u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
2146         void *rqtc;
2147         int inlen;
2148         int err;
2149         int sz;
2150         int i;
2151
2152         sz = 1 << priv->params.rx_hash_log_tbl_sz;
2153
2154         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
2155         in = mlx5_vzalloc(inlen);
2156         if (in == NULL)
2157                 return (-ENOMEM);
2158         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
2159
2160         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
2161         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
2162
2163         for (i = 0; i < sz; i++) {
2164                 int ix;
2165 #ifdef RSS
2166                 ix = rss_get_indirection_to_bucket(i);
2167 #else
2168                 ix = i;
2169 #endif
2170                 /* ensure we don't overflow */
2171                 ix %= priv->params.num_channels;
2172                 MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
2173         }
2174
2175         MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
2176
2177         err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
2178         if (!err)
2179                 priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
2180
2181         kvfree(in);
2182
2183         return (err);
2184 }
2185
2186 static void
2187 mlx5e_close_rqt(struct mlx5e_priv *priv)
2188 {
2189         u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
2190         u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
2191
2192         MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
2193         MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
2194
2195         mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
2196 }
2197
2198 static void
2199 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
2200 {
2201         void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
2202         __be32 *hkey;
2203
2204         MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
2205
2206 #define ROUGH_MAX_L2_L3_HDR_SZ 256
2207
2208 #define MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2209                           MLX5_HASH_FIELD_SEL_DST_IP)
2210
2211 #define MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2212                           MLX5_HASH_FIELD_SEL_DST_IP   |\
2213                           MLX5_HASH_FIELD_SEL_L4_SPORT |\
2214                           MLX5_HASH_FIELD_SEL_L4_DPORT)
2215
2216 #define MLX5_HASH_IP_IPSEC_SPI  (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2217                                  MLX5_HASH_FIELD_SEL_DST_IP   |\
2218                                  MLX5_HASH_FIELD_SEL_IPSEC_SPI)
2219
2220         if (priv->params.hw_lro_en) {
2221                 MLX5_SET(tirc, tirc, lro_enable_mask,
2222                     MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2223                     MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
2224                 MLX5_SET(tirc, tirc, lro_max_msg_sz,
2225                     (priv->params.lro_wqe_sz -
2226                     ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
2227                 /* TODO: add the option to choose timer value dynamically */
2228                 MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
2229                     MLX5_CAP_ETH(priv->mdev,
2230                     lro_timer_supported_periods[2]));
2231         }
2232
2233         /* setup parameters for hashing TIR type, if any */
2234         switch (tt) {
2235         case MLX5E_TT_ANY:
2236                 MLX5_SET(tirc, tirc, disp_type,
2237                     MLX5_TIRC_DISP_TYPE_DIRECT);
2238                 MLX5_SET(tirc, tirc, inline_rqn,
2239                     priv->channel[0]->rq.rqn);
2240                 break;
2241         default:
2242                 MLX5_SET(tirc, tirc, disp_type,
2243                     MLX5_TIRC_DISP_TYPE_INDIRECT);
2244                 MLX5_SET(tirc, tirc, indirect_table,
2245                     priv->rqtn);
2246                 MLX5_SET(tirc, tirc, rx_hash_fn,
2247                     MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2248                 hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2249 #ifdef RSS
2250                 /*
2251                  * The FreeBSD RSS implementation does currently not
2252                  * support symmetric Toeplitz hashes:
2253                  */
2254                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2255                 rss_getkey((uint8_t *)hkey);
2256 #else
2257                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2258                 hkey[0] = cpu_to_be32(0xD181C62C);
2259                 hkey[1] = cpu_to_be32(0xF7F4DB5B);
2260                 hkey[2] = cpu_to_be32(0x1983A2FC);
2261                 hkey[3] = cpu_to_be32(0x943E1ADB);
2262                 hkey[4] = cpu_to_be32(0xD9389E6B);
2263                 hkey[5] = cpu_to_be32(0xD1039C2C);
2264                 hkey[6] = cpu_to_be32(0xA74499AD);
2265                 hkey[7] = cpu_to_be32(0x593D56D9);
2266                 hkey[8] = cpu_to_be32(0xF3253C06);
2267                 hkey[9] = cpu_to_be32(0x2ADC1FFC);
2268 #endif
2269                 break;
2270         }
2271
2272         switch (tt) {
2273         case MLX5E_TT_IPV4_TCP:
2274                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2275                     MLX5_L3_PROT_TYPE_IPV4);
2276                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2277                     MLX5_L4_PROT_TYPE_TCP);
2278 #ifdef RSS
2279                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2280                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2281                             MLX5_HASH_IP);
2282                 } else
2283 #endif
2284                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2285                     MLX5_HASH_ALL);
2286                 break;
2287
2288         case MLX5E_TT_IPV6_TCP:
2289                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2290                     MLX5_L3_PROT_TYPE_IPV6);
2291                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2292                     MLX5_L4_PROT_TYPE_TCP);
2293 #ifdef RSS
2294                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2295                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2296                             MLX5_HASH_IP);
2297                 } else
2298 #endif
2299                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2300                     MLX5_HASH_ALL);
2301                 break;
2302
2303         case MLX5E_TT_IPV4_UDP:
2304                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2305                     MLX5_L3_PROT_TYPE_IPV4);
2306                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2307                     MLX5_L4_PROT_TYPE_UDP);
2308 #ifdef RSS
2309                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2310                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2311                             MLX5_HASH_IP);
2312                 } else
2313 #endif
2314                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2315                     MLX5_HASH_ALL);
2316                 break;
2317
2318         case MLX5E_TT_IPV6_UDP:
2319                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2320                     MLX5_L3_PROT_TYPE_IPV6);
2321                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2322                     MLX5_L4_PROT_TYPE_UDP);
2323 #ifdef RSS
2324                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2325                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2326                             MLX5_HASH_IP);
2327                 } else
2328 #endif
2329                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2330                     MLX5_HASH_ALL);
2331                 break;
2332
2333         case MLX5E_TT_IPV4_IPSEC_AH:
2334                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2335                     MLX5_L3_PROT_TYPE_IPV4);
2336                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2337                     MLX5_HASH_IP_IPSEC_SPI);
2338                 break;
2339
2340         case MLX5E_TT_IPV6_IPSEC_AH:
2341                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2342                     MLX5_L3_PROT_TYPE_IPV6);
2343                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2344                     MLX5_HASH_IP_IPSEC_SPI);
2345                 break;
2346
2347         case MLX5E_TT_IPV4_IPSEC_ESP:
2348                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2349                     MLX5_L3_PROT_TYPE_IPV4);
2350                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2351                     MLX5_HASH_IP_IPSEC_SPI);
2352                 break;
2353
2354         case MLX5E_TT_IPV6_IPSEC_ESP:
2355                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2356                     MLX5_L3_PROT_TYPE_IPV6);
2357                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2358                     MLX5_HASH_IP_IPSEC_SPI);
2359                 break;
2360
2361         case MLX5E_TT_IPV4:
2362                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2363                     MLX5_L3_PROT_TYPE_IPV4);
2364                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2365                     MLX5_HASH_IP);
2366                 break;
2367
2368         case MLX5E_TT_IPV6:
2369                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2370                     MLX5_L3_PROT_TYPE_IPV6);
2371                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2372                     MLX5_HASH_IP);
2373                 break;
2374
2375         default:
2376                 break;
2377         }
2378 }
2379
2380 static int
2381 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2382 {
2383         struct mlx5_core_dev *mdev = priv->mdev;
2384         u32 *in;
2385         void *tirc;
2386         int inlen;
2387         int err;
2388
2389         inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2390         in = mlx5_vzalloc(inlen);
2391         if (in == NULL)
2392                 return (-ENOMEM);
2393         tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2394
2395         mlx5e_build_tir_ctx(priv, tirc, tt);
2396
2397         err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2398
2399         kvfree(in);
2400
2401         return (err);
2402 }
2403
2404 static void
2405 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2406 {
2407         mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2408 }
2409
2410 static int
2411 mlx5e_open_tirs(struct mlx5e_priv *priv)
2412 {
2413         int err;
2414         int i;
2415
2416         for (i = 0; i < MLX5E_NUM_TT; i++) {
2417                 err = mlx5e_open_tir(priv, i);
2418                 if (err)
2419                         goto err_close_tirs;
2420         }
2421
2422         return (0);
2423
2424 err_close_tirs:
2425         for (i--; i >= 0; i--)
2426                 mlx5e_close_tir(priv, i);
2427
2428         return (err);
2429 }
2430
2431 static void
2432 mlx5e_close_tirs(struct mlx5e_priv *priv)
2433 {
2434         int i;
2435
2436         for (i = 0; i < MLX5E_NUM_TT; i++)
2437                 mlx5e_close_tir(priv, i);
2438 }
2439
2440 /*
2441  * SW MTU does not include headers,
2442  * HW MTU includes all headers and checksums.
2443  */
2444 static int
2445 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2446 {
2447         struct mlx5e_priv *priv = ifp->if_softc;
2448         struct mlx5_core_dev *mdev = priv->mdev;
2449         int hw_mtu;
2450         int err;
2451
2452         hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
2453
2454         err = mlx5_set_port_mtu(mdev, hw_mtu);
2455         if (err) {
2456                 if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2457                     __func__, sw_mtu, err);
2458                 return (err);
2459         }
2460
2461         /* Update vport context MTU */
2462         err = mlx5_set_vport_mtu(mdev, hw_mtu);
2463         if (err) {
2464                 if_printf(ifp, "%s: Failed updating vport context with MTU size, err=%d\n",
2465                     __func__, err);
2466         }
2467
2468         ifp->if_mtu = sw_mtu;
2469
2470         err = mlx5_query_vport_mtu(mdev, &hw_mtu);
2471         if (err || !hw_mtu) {
2472                 /* fallback to port oper mtu */
2473                 err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2474         }
2475         if (err) {
2476                 if_printf(ifp, "Query port MTU, after setting new "
2477                     "MTU value, failed\n");
2478                 return (err);
2479         } else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
2480                 err = -E2BIG,
2481                 if_printf(ifp, "Port MTU %d is smaller than "
2482                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2483         } else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
2484                 err = -EINVAL;
2485                 if_printf(ifp, "Port MTU %d is bigger than "
2486                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2487         }
2488         priv->params_ethtool.hw_mtu = hw_mtu;
2489
2490         return (err);
2491 }
2492
2493 int
2494 mlx5e_open_locked(struct ifnet *ifp)
2495 {
2496         struct mlx5e_priv *priv = ifp->if_softc;
2497         int err;
2498         u16 set_id;
2499
2500         /* check if already opened */
2501         if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2502                 return (0);
2503
2504 #ifdef RSS
2505         if (rss_getnumbuckets() > priv->params.num_channels) {
2506                 if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2507                     "channels(%u) available\n", rss_getnumbuckets(),
2508                     priv->params.num_channels);
2509         }
2510 #endif
2511         err = mlx5e_open_tises(priv);
2512         if (err) {
2513                 if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2514                     __func__, err);
2515                 return (err);
2516         }
2517         err = mlx5_vport_alloc_q_counter(priv->mdev,
2518             MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
2519         if (err) {
2520                 if_printf(priv->ifp,
2521                     "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2522                     __func__, err);
2523                 goto err_close_tises;
2524         }
2525         /* store counter set ID */
2526         priv->counter_set_id = set_id;
2527
2528         err = mlx5e_open_channels(priv);
2529         if (err) {
2530                 if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2531                     __func__, err);
2532                 goto err_dalloc_q_counter;
2533         }
2534         err = mlx5e_open_rqt(priv);
2535         if (err) {
2536                 if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2537                     __func__, err);
2538                 goto err_close_channels;
2539         }
2540         err = mlx5e_open_tirs(priv);
2541         if (err) {
2542                 if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2543                     __func__, err);
2544                 goto err_close_rqls;
2545         }
2546         err = mlx5e_open_flow_table(priv);
2547         if (err) {
2548                 if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2549                     __func__, err);
2550                 goto err_close_tirs;
2551         }
2552         err = mlx5e_add_all_vlan_rules(priv);
2553         if (err) {
2554                 if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2555                     __func__, err);
2556                 goto err_close_flow_table;
2557         }
2558         set_bit(MLX5E_STATE_OPENED, &priv->state);
2559
2560         mlx5e_update_carrier(priv);
2561         mlx5e_set_rx_mode_core(priv);
2562
2563         return (0);
2564
2565 err_close_flow_table:
2566         mlx5e_close_flow_table(priv);
2567
2568 err_close_tirs:
2569         mlx5e_close_tirs(priv);
2570
2571 err_close_rqls:
2572         mlx5e_close_rqt(priv);
2573
2574 err_close_channels:
2575         mlx5e_close_channels(priv);
2576
2577 err_dalloc_q_counter:
2578         mlx5_vport_dealloc_q_counter(priv->mdev,
2579             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2580
2581 err_close_tises:
2582         mlx5e_close_tises(priv);
2583
2584         return (err);
2585 }
2586
2587 static void
2588 mlx5e_open(void *arg)
2589 {
2590         struct mlx5e_priv *priv = arg;
2591
2592         PRIV_LOCK(priv);
2593         if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2594                 if_printf(priv->ifp,
2595                     "%s: Setting port status to up failed\n",
2596                     __func__);
2597
2598         mlx5e_open_locked(priv->ifp);
2599         priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2600         PRIV_UNLOCK(priv);
2601 }
2602
2603 int
2604 mlx5e_close_locked(struct ifnet *ifp)
2605 {
2606         struct mlx5e_priv *priv = ifp->if_softc;
2607
2608         /* check if already closed */
2609         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2610                 return (0);
2611
2612         clear_bit(MLX5E_STATE_OPENED, &priv->state);
2613
2614         mlx5e_set_rx_mode_core(priv);
2615         mlx5e_del_all_vlan_rules(priv);
2616         if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2617         mlx5e_close_flow_table(priv);
2618         mlx5e_close_tirs(priv);
2619         mlx5e_close_rqt(priv);
2620         mlx5e_close_channels(priv);
2621         mlx5_vport_dealloc_q_counter(priv->mdev,
2622             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2623         mlx5e_close_tises(priv);
2624
2625         return (0);
2626 }
2627
2628 #if (__FreeBSD_version >= 1100000)
2629 static uint64_t
2630 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2631 {
2632         struct mlx5e_priv *priv = ifp->if_softc;
2633         u64 retval;
2634
2635         /* PRIV_LOCK(priv); XXX not allowed */
2636         switch (cnt) {
2637         case IFCOUNTER_IPACKETS:
2638                 retval = priv->stats.vport.rx_packets;
2639                 break;
2640         case IFCOUNTER_IERRORS:
2641                 retval = priv->stats.vport.rx_error_packets +
2642                     priv->stats.pport.alignment_err +
2643                     priv->stats.pport.check_seq_err +
2644                     priv->stats.pport.crc_align_errors +
2645                     priv->stats.pport.in_range_len_errors +
2646                     priv->stats.pport.jabbers +
2647                     priv->stats.pport.out_of_range_len +
2648                     priv->stats.pport.oversize_pkts +
2649                     priv->stats.pport.symbol_err +
2650                     priv->stats.pport.too_long_errors +
2651                     priv->stats.pport.undersize_pkts +
2652                     priv->stats.pport.unsupported_op_rx;
2653                 break;
2654         case IFCOUNTER_IQDROPS:
2655                 retval = priv->stats.vport.rx_out_of_buffer +
2656                     priv->stats.pport.drop_events;
2657                 break;
2658         case IFCOUNTER_OPACKETS:
2659                 retval = priv->stats.vport.tx_packets;
2660                 break;
2661         case IFCOUNTER_OERRORS:
2662                 retval = priv->stats.vport.tx_error_packets;
2663                 break;
2664         case IFCOUNTER_IBYTES:
2665                 retval = priv->stats.vport.rx_bytes;
2666                 break;
2667         case IFCOUNTER_OBYTES:
2668                 retval = priv->stats.vport.tx_bytes;
2669                 break;
2670         case IFCOUNTER_IMCASTS:
2671                 retval = priv->stats.vport.rx_multicast_packets;
2672                 break;
2673         case IFCOUNTER_OMCASTS:
2674                 retval = priv->stats.vport.tx_multicast_packets;
2675                 break;
2676         case IFCOUNTER_OQDROPS:
2677                 retval = priv->stats.vport.tx_queue_dropped;
2678                 break;
2679         case IFCOUNTER_COLLISIONS:
2680                 retval = priv->stats.pport.collisions;
2681                 break;
2682         default:
2683                 retval = if_get_counter_default(ifp, cnt);
2684                 break;
2685         }
2686         /* PRIV_UNLOCK(priv); XXX not allowed */
2687         return (retval);
2688 }
2689 #endif
2690
2691 static void
2692 mlx5e_set_rx_mode(struct ifnet *ifp)
2693 {
2694         struct mlx5e_priv *priv = ifp->if_softc;
2695
2696         queue_work(priv->wq, &priv->set_rx_mode_work);
2697 }
2698
2699 static int
2700 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2701 {
2702         struct mlx5e_priv *priv;
2703         struct ifreq *ifr;
2704         struct ifi2creq i2c;
2705         int error = 0;
2706         int mask = 0;
2707         int size_read = 0;
2708         int module_status;
2709         int module_num;
2710         int max_mtu;
2711         uint8_t read_addr;
2712
2713         priv = ifp->if_softc;
2714
2715         /* check if detaching */
2716         if (priv == NULL || priv->gone != 0)
2717                 return (ENXIO);
2718
2719         switch (command) {
2720         case SIOCSIFMTU:
2721                 ifr = (struct ifreq *)data;
2722
2723                 PRIV_LOCK(priv);
2724                 mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2725
2726                 if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2727                     ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2728                         int was_opened;
2729
2730                         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2731                         if (was_opened)
2732                                 mlx5e_close_locked(ifp);
2733
2734                         /* set new MTU */
2735                         mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2736
2737                         if (was_opened)
2738                                 mlx5e_open_locked(ifp);
2739                 } else {
2740                         error = EINVAL;
2741                         if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2742                             MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2743                 }
2744                 PRIV_UNLOCK(priv);
2745                 break;
2746         case SIOCSIFFLAGS:
2747                 if ((ifp->if_flags & IFF_UP) &&
2748                     (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2749                         mlx5e_set_rx_mode(ifp);
2750                         break;
2751                 }
2752                 PRIV_LOCK(priv);
2753                 if (ifp->if_flags & IFF_UP) {
2754                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2755                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2756                                         mlx5e_open_locked(ifp);
2757                                 ifp->if_drv_flags |= IFF_DRV_RUNNING;
2758                                 mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2759                         }
2760                 } else {
2761                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2762                                 mlx5_set_port_status(priv->mdev,
2763                                     MLX5_PORT_DOWN);
2764                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2765                                         mlx5e_close_locked(ifp);
2766                                 mlx5e_update_carrier(priv);
2767                                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2768                         }
2769                 }
2770                 PRIV_UNLOCK(priv);
2771                 break;
2772         case SIOCADDMULTI:
2773         case SIOCDELMULTI:
2774                 mlx5e_set_rx_mode(ifp);
2775                 break;
2776         case SIOCSIFMEDIA:
2777         case SIOCGIFMEDIA:
2778         case SIOCGIFXMEDIA:
2779                 ifr = (struct ifreq *)data;
2780                 error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2781                 break;
2782         case SIOCSIFCAP:
2783                 ifr = (struct ifreq *)data;
2784                 PRIV_LOCK(priv);
2785                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2786
2787                 if (mask & IFCAP_TXCSUM) {
2788                         ifp->if_capenable ^= IFCAP_TXCSUM;
2789                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2790
2791                         if (IFCAP_TSO4 & ifp->if_capenable &&
2792                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2793                                 ifp->if_capenable &= ~IFCAP_TSO4;
2794                                 ifp->if_hwassist &= ~CSUM_IP_TSO;
2795                                 if_printf(ifp,
2796                                     "tso4 disabled due to -txcsum.\n");
2797                         }
2798                 }
2799                 if (mask & IFCAP_TXCSUM_IPV6) {
2800                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2801                         ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2802
2803                         if (IFCAP_TSO6 & ifp->if_capenable &&
2804                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2805                                 ifp->if_capenable &= ~IFCAP_TSO6;
2806                                 ifp->if_hwassist &= ~CSUM_IP6_TSO;
2807                                 if_printf(ifp,
2808                                     "tso6 disabled due to -txcsum6.\n");
2809                         }
2810                 }
2811                 if (mask & IFCAP_RXCSUM)
2812                         ifp->if_capenable ^= IFCAP_RXCSUM;
2813                 if (mask & IFCAP_RXCSUM_IPV6)
2814                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2815                 if (mask & IFCAP_TSO4) {
2816                         if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2817                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2818                                 if_printf(ifp, "enable txcsum first.\n");
2819                                 error = EAGAIN;
2820                                 goto out;
2821                         }
2822                         ifp->if_capenable ^= IFCAP_TSO4;
2823                         ifp->if_hwassist ^= CSUM_IP_TSO;
2824                 }
2825                 if (mask & IFCAP_TSO6) {
2826                         if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2827                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2828                                 if_printf(ifp, "enable txcsum6 first.\n");
2829                                 error = EAGAIN;
2830                                 goto out;
2831                         }
2832                         ifp->if_capenable ^= IFCAP_TSO6;
2833                         ifp->if_hwassist ^= CSUM_IP6_TSO;
2834                 }
2835                 if (mask & IFCAP_VLAN_HWFILTER) {
2836                         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2837                                 mlx5e_disable_vlan_filter(priv);
2838                         else
2839                                 mlx5e_enable_vlan_filter(priv);
2840
2841                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2842                 }
2843                 if (mask & IFCAP_VLAN_HWTAGGING)
2844                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2845                 if (mask & IFCAP_WOL_MAGIC)
2846                         ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2847
2848                 VLAN_CAPABILITIES(ifp);
2849                 /* turn off LRO means also turn of HW LRO - if it's on */
2850                 if (mask & IFCAP_LRO) {
2851                         int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2852                         bool need_restart = false;
2853
2854                         ifp->if_capenable ^= IFCAP_LRO;
2855                         if (!(ifp->if_capenable & IFCAP_LRO)) {
2856                                 if (priv->params.hw_lro_en) {
2857                                         priv->params.hw_lro_en = false;
2858                                         need_restart = true;
2859                                         /* Not sure this is the correct way */
2860                                         priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2861                                 }
2862                         }
2863                         if (was_opened && need_restart) {
2864                                 mlx5e_close_locked(ifp);
2865                                 mlx5e_open_locked(ifp);
2866                         }
2867                 }
2868                 if (mask & IFCAP_HWRXTSTMP) {
2869                         ifp->if_capenable ^= IFCAP_HWRXTSTMP;
2870                         if (ifp->if_capenable & IFCAP_HWRXTSTMP) {
2871                                 if (priv->clbr_done == 0)
2872                                         mlx5e_reset_calibration_callout(priv);
2873                         } else {
2874                                 callout_drain(&priv->tstmp_clbr);
2875                                 priv->clbr_done = 0;
2876                         }
2877                 }
2878 out:
2879                 PRIV_UNLOCK(priv);
2880                 break;
2881
2882         case SIOCGI2C:
2883                 ifr = (struct ifreq *)data;
2884
2885                 /*
2886                  * Copy from the user-space address ifr_data to the
2887                  * kernel-space address i2c
2888                  */
2889                 error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
2890                 if (error)
2891                         break;
2892
2893                 if (i2c.len > sizeof(i2c.data)) {
2894                         error = EINVAL;
2895                         break;
2896                 }
2897
2898                 PRIV_LOCK(priv);
2899                 /* Get module_num which is required for the query_eeprom */
2900                 error = mlx5_query_module_num(priv->mdev, &module_num);
2901                 if (error) {
2902                         if_printf(ifp, "Query module num failed, eeprom "
2903                             "reading is not supported\n");
2904                         error = EINVAL;
2905                         goto err_i2c;
2906                 }
2907                 /* Check if module is present before doing an access */
2908                 module_status = mlx5_query_module_status(priv->mdev, module_num);
2909                 if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
2910                     module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
2911                         error = EINVAL;
2912                         goto err_i2c;
2913                 }
2914                 /*
2915                  * Currently 0XA0 and 0xA2 are the only addresses permitted.
2916                  * The internal conversion is as follows:
2917                  */
2918                 if (i2c.dev_addr == 0xA0)
2919                         read_addr = MLX5E_I2C_ADDR_LOW;
2920                 else if (i2c.dev_addr == 0xA2)
2921                         read_addr = MLX5E_I2C_ADDR_HIGH;
2922                 else {
2923                         if_printf(ifp, "Query eeprom failed, "
2924                             "Invalid Address: %X\n", i2c.dev_addr);
2925                         error = EINVAL;
2926                         goto err_i2c;
2927                 }
2928                 error = mlx5_query_eeprom(priv->mdev,
2929                     read_addr, MLX5E_EEPROM_LOW_PAGE,
2930                     (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2931                     (uint32_t *)i2c.data, &size_read);
2932                 if (error) {
2933                         if_printf(ifp, "Query eeprom failed, eeprom "
2934                             "reading is not supported\n");
2935                         error = EINVAL;
2936                         goto err_i2c;
2937                 }
2938
2939                 if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2940                         error = mlx5_query_eeprom(priv->mdev,
2941                             read_addr, MLX5E_EEPROM_LOW_PAGE,
2942                             (uint32_t)(i2c.offset + size_read),
2943                             (uint32_t)(i2c.len - size_read), module_num,
2944                             (uint32_t *)(i2c.data + size_read), &size_read);
2945                 }
2946                 if (error) {
2947                         if_printf(ifp, "Query eeprom failed, eeprom "
2948                             "reading is not supported\n");
2949                         error = EINVAL;
2950                         goto err_i2c;
2951                 }
2952
2953                 error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
2954 err_i2c:
2955                 PRIV_UNLOCK(priv);
2956                 break;
2957
2958         default:
2959                 error = ether_ioctl(ifp, command, data);
2960                 break;
2961         }
2962         return (error);
2963 }
2964
2965 static int
2966 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2967 {
2968         /*
2969          * TODO: uncoment once FW really sets all these bits if
2970          * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2971          * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2972          * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2973          * -ENOTSUPP;
2974          */
2975
2976         /* TODO: add more must-to-have features */
2977
2978         if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
2979                 return (-ENODEV);
2980
2981         return (0);
2982 }
2983
2984 static void
2985 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
2986     struct mlx5e_priv *priv,
2987     int num_comp_vectors)
2988 {
2989         /*
2990          * TODO: Consider link speed for setting "log_sq_size",
2991          * "log_rq_size" and "cq_moderation_xxx":
2992          */
2993         priv->params.log_sq_size =
2994             MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
2995         priv->params.log_rq_size =
2996             MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
2997         priv->params.rx_cq_moderation_usec =
2998             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
2999             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
3000             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
3001         priv->params.rx_cq_moderation_mode =
3002             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
3003         priv->params.rx_cq_moderation_pkts =
3004             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
3005         priv->params.tx_cq_moderation_usec =
3006             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
3007         priv->params.tx_cq_moderation_pkts =
3008             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
3009         priv->params.min_rx_wqes =
3010             MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
3011         priv->params.rx_hash_log_tbl_sz =
3012             (order_base_2(num_comp_vectors) >
3013             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
3014             order_base_2(num_comp_vectors) :
3015             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
3016         priv->params.num_tc = 1;
3017         priv->params.default_vlan_prio = 0;
3018         priv->counter_set_id = -1;
3019
3020         /*
3021          * hw lro is currently defaulted to off. when it won't anymore we
3022          * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
3023          */
3024         priv->params.hw_lro_en = false;
3025         priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
3026
3027         priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
3028
3029         priv->mdev = mdev;
3030         priv->params.num_channels = num_comp_vectors;
3031         priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
3032         priv->queue_mapping_channel_mask =
3033             roundup_pow_of_two(num_comp_vectors) - 1;
3034         priv->num_tc = priv->params.num_tc;
3035         priv->default_vlan_prio = priv->params.default_vlan_prio;
3036
3037         INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
3038         INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
3039         INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
3040 }
3041
3042 static int
3043 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
3044                   struct mlx5_core_mr *mkey)
3045 {
3046         struct ifnet *ifp = priv->ifp;
3047         struct mlx5_core_dev *mdev = priv->mdev;
3048         int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
3049         void *mkc;
3050         u32 *in;
3051         int err;
3052
3053         in = mlx5_vzalloc(inlen);
3054         if (in == NULL) {
3055                 if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
3056                 return (-ENOMEM);
3057         }
3058
3059         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
3060         MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
3061         MLX5_SET(mkc, mkc, lw, 1);
3062         MLX5_SET(mkc, mkc, lr, 1);
3063
3064         MLX5_SET(mkc, mkc, pd, pdn);
3065         MLX5_SET(mkc, mkc, length64, 1);
3066         MLX5_SET(mkc, mkc, qpn, 0xffffff);
3067
3068         err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
3069         if (err)
3070                 if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
3071                     __func__, err);
3072
3073         kvfree(in);
3074         return (err);
3075 }
3076
3077 static const char *mlx5e_vport_stats_desc[] = {
3078         MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
3079 };
3080
3081 static const char *mlx5e_pport_stats_desc[] = {
3082         MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
3083 };
3084
3085 static void
3086 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
3087 {
3088         mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
3089         sx_init(&priv->state_lock, "mlx5state");
3090         callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
3091         MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
3092 }
3093
3094 static void
3095 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
3096 {
3097         mtx_destroy(&priv->async_events_mtx);
3098         sx_destroy(&priv->state_lock);
3099 }
3100
3101 static int
3102 sysctl_firmware(SYSCTL_HANDLER_ARGS)
3103 {
3104         /*
3105          * %d.%d%.d the string format.
3106          * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
3107          * We need at most 5 chars to store that.
3108          * It also has: two "." and NULL at the end, which means we need 18
3109          * (5*3 + 3) chars at most.
3110          */
3111         char fw[18];
3112         struct mlx5e_priv *priv = arg1;
3113         int error;
3114
3115         snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
3116             fw_rev_sub(priv->mdev));
3117         error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
3118         return (error);
3119 }
3120
3121 static void
3122 mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
3123 {
3124         int i;
3125
3126         for (i = 0; i < ch->num_tc; i++)
3127                 mlx5e_drain_sq(&ch->sq[i]);
3128 }
3129
3130 static void
3131 mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
3132 {
3133
3134         sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
3135         sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
3136         mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
3137         sq->doorbell.d64 = 0;
3138 }
3139
3140 void
3141 mlx5e_resume_sq(struct mlx5e_sq *sq)
3142 {
3143         int err;
3144
3145         /* check if already enabled */
3146         if (sq->stopped == 0)
3147                 return;
3148
3149         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
3150             MLX5_SQC_STATE_RST);
3151         if (err != 0) {
3152                 if_printf(sq->ifp,
3153                     "mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
3154         }
3155
3156         sq->cc = 0;
3157         sq->pc = 0;
3158
3159         /* reset doorbell prior to moving from RST to RDY */
3160         mlx5e_reset_sq_doorbell_record(sq);
3161
3162         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
3163             MLX5_SQC_STATE_RDY);
3164         if (err != 0) {
3165                 if_printf(sq->ifp,
3166                     "mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
3167         }
3168
3169         mtx_lock(&sq->lock);
3170         sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
3171         sq->stopped = 0;
3172         mtx_unlock(&sq->lock);
3173
3174 }
3175
3176 static void
3177 mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
3178 {
3179         int i;
3180
3181         for (i = 0; i < ch->num_tc; i++)
3182                 mlx5e_resume_sq(&ch->sq[i]);
3183 }
3184
3185 static void
3186 mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
3187 {
3188         struct mlx5e_rq *rq = &ch->rq;
3189         int err;
3190
3191         mtx_lock(&rq->mtx);
3192         rq->enabled = 0;
3193         callout_stop(&rq->watchdog);
3194         mtx_unlock(&rq->mtx);
3195
3196         callout_drain(&rq->watchdog);
3197
3198         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
3199         if (err != 0) {
3200                 if_printf(rq->ifp,
3201                     "mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
3202         }
3203
3204         while (!mlx5_wq_ll_is_empty(&rq->wq)) {
3205                 msleep(1);
3206                 rq->cq.mcq.comp(&rq->cq.mcq);
3207         }
3208
3209         /*
3210          * Transitioning into RST state will allow the FW to track less ERR state queues,
3211          * thus reducing the recv queue flushing time
3212          */
3213         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
3214         if (err != 0) {
3215                 if_printf(rq->ifp,
3216                     "mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
3217         }
3218 }
3219
3220 static void
3221 mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
3222 {
3223         struct mlx5e_rq *rq = &ch->rq;
3224         int err;
3225
3226         rq->wq.wqe_ctr = 0;
3227         mlx5_wq_ll_update_db_record(&rq->wq);
3228         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
3229         if (err != 0) {
3230                 if_printf(rq->ifp,
3231                     "mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
3232         }
3233
3234         rq->enabled = 1;
3235
3236         rq->cq.mcq.comp(&rq->cq.mcq);
3237 }
3238
3239 void
3240 mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
3241 {
3242         int i;
3243
3244         if (priv->channel == NULL)
3245                 return;
3246
3247         for (i = 0; i < priv->params.num_channels; i++) {
3248
3249                 if (!priv->channel[i])
3250                         continue;
3251
3252                 if (value)
3253                         mlx5e_disable_tx_dma(priv->channel[i]);
3254                 else
3255                         mlx5e_enable_tx_dma(priv->channel[i]);
3256         }
3257 }
3258
3259 void
3260 mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
3261 {
3262         int i;
3263
3264         if (priv->channel == NULL)
3265                 return;
3266
3267         for (i = 0; i < priv->params.num_channels; i++) {
3268
3269                 if (!priv->channel[i])
3270                         continue;
3271
3272                 if (value)
3273                         mlx5e_disable_rx_dma(priv->channel[i]);
3274                 else
3275                         mlx5e_enable_rx_dma(priv->channel[i]);
3276         }
3277 }
3278
3279 static void
3280 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
3281 {
3282         SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3283             OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
3284             sysctl_firmware, "A", "HCA firmware version");
3285
3286         SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3287             OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
3288             "Board ID");
3289 }
3290
3291 static int
3292 mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3293 {
3294         struct mlx5e_priv *priv = arg1;
3295         uint32_t tx_pfc;
3296         uint32_t value;
3297         int error;
3298
3299         PRIV_LOCK(priv);
3300
3301         tx_pfc = priv->params.tx_priority_flow_control;
3302
3303         /* get current value */
3304         value = (tx_pfc >> arg2) & 1;
3305
3306         error = sysctl_handle_32(oidp, &value, 0, req);
3307
3308         /* range check value */
3309         if (value != 0)
3310                 priv->params.tx_priority_flow_control |= (1 << arg2);
3311         else
3312                 priv->params.tx_priority_flow_control &= ~(1 << arg2);
3313
3314         /* check if update is required */
3315         if (error == 0 && priv->gone == 0 &&
3316             tx_pfc != priv->params.tx_priority_flow_control) {
3317                 error = -mlx5e_set_port_pfc(priv);
3318                 /* restore previous value */
3319                 if (error != 0)
3320                         priv->params.tx_priority_flow_control= tx_pfc;
3321         }
3322         PRIV_UNLOCK(priv);
3323
3324         return (error);
3325 }
3326
3327 static int
3328 mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3329 {
3330         struct mlx5e_priv *priv = arg1;
3331         uint32_t rx_pfc;
3332         uint32_t value;
3333         int error;
3334
3335         PRIV_LOCK(priv);
3336
3337         rx_pfc = priv->params.rx_priority_flow_control;
3338
3339         /* get current value */
3340         value = (rx_pfc >> arg2) & 1;
3341
3342         error = sysctl_handle_32(oidp, &value, 0, req);
3343
3344         /* range check value */
3345         if (value != 0)
3346                 priv->params.rx_priority_flow_control |= (1 << arg2);
3347         else
3348                 priv->params.rx_priority_flow_control &= ~(1 << arg2);
3349
3350         /* check if update is required */
3351         if (error == 0 && priv->gone == 0 &&
3352             rx_pfc != priv->params.rx_priority_flow_control) {
3353                 error = -mlx5e_set_port_pfc(priv);
3354                 /* restore previous value */
3355                 if (error != 0)
3356                         priv->params.rx_priority_flow_control= rx_pfc;
3357         }
3358         PRIV_UNLOCK(priv);
3359
3360         return (error);
3361 }
3362
3363 static void
3364 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
3365 {
3366         unsigned int x;
3367         char path[96];
3368         int error;
3369
3370         /* Only receiving pauseframes is enabled by default */
3371         priv->params.tx_pauseframe_control = 0;
3372         priv->params.rx_pauseframe_control = 1;
3373
3374         /* disable ports flow control, PFC, by default */
3375         priv->params.tx_priority_flow_control = 0;
3376         priv->params.rx_priority_flow_control = 0;
3377
3378 #if (__FreeBSD_version < 1100000)
3379         /* compute path for sysctl */
3380         snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
3381             device_get_unit(priv->mdev->pdev->dev.bsddev));
3382
3383         /* try to fetch tunable, if any */
3384         TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
3385
3386         /* compute path for sysctl */
3387         snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
3388             device_get_unit(priv->mdev->pdev->dev.bsddev));
3389
3390         /* try to fetch tunable, if any */
3391         TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
3392
3393         for (x = 0; x != 8; x++) {
3394
3395                 /* compute path for sysctl */
3396                 snprintf(path, sizeof(path), "dev.mce.%d.tx_priority_flow_control_%u",
3397                     device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3398
3399                 /* try to fetch tunable, if any */
3400                 if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3401                         priv->params.tx_priority_flow_control |= 1 << x;
3402
3403                 /* compute path for sysctl */
3404                 snprintf(path, sizeof(path), "dev.mce.%d.rx_priority_flow_control_%u",
3405                     device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3406
3407                 /* try to fetch tunable, if any */
3408                 if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3409                         priv->params.rx_priority_flow_control |= 1 << x;
3410         }
3411 #endif
3412
3413         /* register pauseframe SYSCTLs */
3414         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3415             OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
3416             &priv->params.tx_pauseframe_control, 0,
3417             "Set to enable TX pause frames. Clear to disable.");
3418
3419         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3420             OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
3421             &priv->params.rx_pauseframe_control, 0,
3422             "Set to enable RX pause frames. Clear to disable.");
3423
3424         /* register priority_flow control, PFC, SYSCTLs */
3425         for (x = 0; x != 8; x++) {
3426                 snprintf(path, sizeof(path), "tx_priority_flow_control_%u", x);
3427
3428                 SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3429                     OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3430                     CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_tx_priority_flow_control, "IU",
3431                     "Set to enable TX ports flow control frames for given priority. Clear to disable.");
3432
3433                 snprintf(path, sizeof(path), "rx_priority_flow_control_%u", x);
3434
3435                 SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3436                     OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3437                     CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_rx_priority_flow_control, "IU",
3438                     "Set to enable RX ports flow control frames for given priority. Clear to disable.");
3439         }
3440
3441         PRIV_LOCK(priv);
3442
3443         /* range check */
3444         priv->params.tx_pauseframe_control =
3445             priv->params.tx_pauseframe_control ? 1 : 0;
3446         priv->params.rx_pauseframe_control =
3447             priv->params.rx_pauseframe_control ? 1 : 0;
3448
3449         /* update firmware */
3450         error = mlx5e_set_port_pause_and_pfc(priv);
3451         if (error == -EINVAL) {
3452                 if_printf(priv->ifp,
3453                     "Global pauseframes must be disabled before enabling PFC.\n");
3454                 priv->params.rx_priority_flow_control = 0;
3455                 priv->params.tx_priority_flow_control = 0;
3456
3457                 /* update firmware */
3458                 (void) mlx5e_set_port_pause_and_pfc(priv);
3459         }
3460         PRIV_UNLOCK(priv);
3461 }
3462
3463 static void *
3464 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
3465 {
3466         struct ifnet *ifp;
3467         struct mlx5e_priv *priv;
3468         u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
3469         struct sysctl_oid_list *child;
3470         int ncv = mdev->priv.eq_table.num_comp_vectors;
3471         char unit[16];
3472         int err;
3473         int i;
3474         u32 eth_proto_cap;
3475
3476         if (mlx5e_check_required_hca_cap(mdev)) {
3477                 mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
3478                 return (NULL);
3479         }
3480         priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
3481         mlx5e_priv_mtx_init(priv);
3482
3483         ifp = priv->ifp = if_alloc(IFT_ETHER);
3484         if (ifp == NULL) {
3485                 mlx5_core_err(mdev, "if_alloc() failed\n");
3486                 goto err_free_priv;
3487         }
3488         ifp->if_softc = priv;
3489         if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
3490         ifp->if_mtu = ETHERMTU;
3491         ifp->if_init = mlx5e_open;
3492         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3493         ifp->if_ioctl = mlx5e_ioctl;
3494         ifp->if_transmit = mlx5e_xmit;
3495         ifp->if_qflush = if_qflush;
3496 #if (__FreeBSD_version >= 1100000)
3497         ifp->if_get_counter = mlx5e_get_counter;
3498 #endif
3499         ifp->if_snd.ifq_maxlen = ifqmaxlen;
3500         /*
3501          * Set driver features
3502          */
3503         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
3504         ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
3505         ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
3506         ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
3507         ifp->if_capabilities |= IFCAP_LRO;
3508         ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
3509         ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
3510
3511         /* set TSO limits so that we don't have to drop TX packets */
3512         ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3513         ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
3514         ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
3515
3516         ifp->if_capenable = ifp->if_capabilities;
3517         ifp->if_hwassist = 0;
3518         if (ifp->if_capenable & IFCAP_TSO)
3519                 ifp->if_hwassist |= CSUM_TSO;
3520         if (ifp->if_capenable & IFCAP_TXCSUM)
3521                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
3522         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3523                 ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
3524
3525         /* ifnet sysctl tree */
3526         sysctl_ctx_init(&priv->sysctl_ctx);
3527         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
3528             OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
3529         if (priv->sysctl_ifnet == NULL) {
3530                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3531                 goto err_free_sysctl;
3532         }
3533         snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
3534         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3535             OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
3536         if (priv->sysctl_ifnet == NULL) {
3537                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3538                 goto err_free_sysctl;
3539         }
3540
3541         /* HW sysctl tree */
3542         child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
3543         priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
3544             OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
3545         if (priv->sysctl_hw == NULL) {
3546                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3547                 goto err_free_sysctl;
3548         }
3549         mlx5e_build_ifp_priv(mdev, priv, ncv);
3550
3551         snprintf(unit, sizeof(unit), "mce%u_wq",
3552             device_get_unit(mdev->pdev->dev.bsddev));
3553         priv->wq = alloc_workqueue(unit, 0, 1);
3554         if (priv->wq == NULL) {
3555                 if_printf(ifp, "%s: alloc_workqueue failed\n", __func__);
3556                 goto err_free_sysctl;
3557         }
3558
3559         err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
3560         if (err) {
3561                 if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
3562                     __func__, err);
3563                 goto err_free_wq;
3564         }
3565         err = mlx5_core_alloc_pd(mdev, &priv->pdn);
3566         if (err) {
3567                 if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
3568                     __func__, err);
3569                 goto err_unmap_free_uar;
3570         }
3571         err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
3572         if (err) {
3573                 if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
3574                     __func__, err);
3575                 goto err_dealloc_pd;
3576         }
3577         err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
3578         if (err) {
3579                 if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
3580                     __func__, err);
3581                 goto err_dealloc_transport_domain;
3582         }
3583         mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
3584
3585         /* check if we should generate a random MAC address */
3586         if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3587             is_zero_ether_addr(dev_addr)) {
3588                 random_ether_addr(dev_addr);
3589                 if_printf(ifp, "Assigned random MAC address\n");
3590         }
3591
3592         /* set default MTU */
3593         mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3594
3595         /* Set desc */
3596         device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
3597
3598         /* Set default media status */
3599         priv->media_status_last = IFM_AVALID;
3600         priv->media_active_last = IFM_ETHER | IFM_AUTO |
3601             IFM_ETH_RXPAUSE | IFM_FDX;
3602
3603         /* setup default pauseframes configuration */
3604         mlx5e_setup_pauseframes(priv);
3605
3606         err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3607         if (err) {
3608                 eth_proto_cap = 0;
3609                 if_printf(ifp, "%s: Query port media capability failed, %d\n",
3610                     __func__, err);
3611         }
3612
3613         /* Setup supported medias */
3614         ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3615             mlx5e_media_change, mlx5e_media_status);
3616
3617         for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3618                 if (mlx5e_mode_table[i].baudrate == 0)
3619                         continue;
3620                 if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3621                         ifmedia_add(&priv->media,
3622                             mlx5e_mode_table[i].subtype |
3623                             IFM_ETHER, 0, NULL);
3624                         ifmedia_add(&priv->media,
3625                             mlx5e_mode_table[i].subtype |
3626                             IFM_ETHER | IFM_FDX |
3627                             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3628                 }
3629         }
3630
3631         ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3632         ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3633             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3634
3635         /* Set autoselect by default */
3636         ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3637             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3638         ether_ifattach(ifp, dev_addr);
3639
3640         /* Register for VLAN events */
3641         priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3642             mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3643         priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3644             mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3645
3646         /* Link is down by default */
3647         if_link_state_change(ifp, LINK_STATE_DOWN);
3648
3649         mlx5e_enable_async_events(priv);
3650
3651         mlx5e_add_hw_stats(priv);
3652
3653         mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3654             "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3655             priv->stats.vport.arg);
3656
3657         mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3658             "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3659             priv->stats.pport.arg);
3660
3661         mlx5e_create_ethtool(priv);
3662
3663         mtx_lock(&priv->async_events_mtx);
3664         mlx5e_update_stats(priv);
3665         mtx_unlock(&priv->async_events_mtx);
3666
3667         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3668             OID_AUTO, "rx_clbr_done", CTLFLAG_RD,
3669             &priv->clbr_done, 0,
3670             "RX timestamps calibration state");
3671         callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT);
3672         mlx5e_reset_calibration_callout(priv);
3673
3674         return (priv);
3675
3676 err_dealloc_transport_domain:
3677         mlx5_dealloc_transport_domain(mdev, priv->tdn);
3678
3679 err_dealloc_pd:
3680         mlx5_core_dealloc_pd(mdev, priv->pdn);
3681
3682 err_unmap_free_uar:
3683         mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3684
3685 err_free_wq:
3686         destroy_workqueue(priv->wq);
3687
3688 err_free_sysctl:
3689         sysctl_ctx_free(&priv->sysctl_ctx);
3690
3691         if_free(ifp);
3692
3693 err_free_priv:
3694         mlx5e_priv_mtx_destroy(priv);
3695         free(priv, M_MLX5EN);
3696         return (NULL);
3697 }
3698
3699 static void
3700 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3701 {
3702         struct mlx5e_priv *priv = vpriv;
3703         struct ifnet *ifp = priv->ifp;
3704
3705         /* don't allow more IOCTLs */
3706         priv->gone = 1;
3707
3708         /*
3709          * Clear the device description to avoid use after free,
3710          * because the bsddev is not destroyed when this module is
3711          * unloaded:
3712          */
3713         device_set_desc(mdev->pdev->dev.bsddev, NULL);
3714
3715         /* XXX wait a bit to allow IOCTL handlers to complete */
3716         pause("W", hz);
3717
3718         /* stop watchdog timer */
3719         callout_drain(&priv->watchdog);
3720
3721         callout_drain(&priv->tstmp_clbr);
3722
3723         if (priv->vlan_attach != NULL)
3724                 EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3725         if (priv->vlan_detach != NULL)
3726                 EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3727
3728         /* make sure device gets closed */
3729         PRIV_LOCK(priv);
3730         mlx5e_close_locked(ifp);
3731         PRIV_UNLOCK(priv);
3732
3733         /* unregister device */
3734         ifmedia_removeall(&priv->media);
3735         ether_ifdetach(ifp);
3736         if_free(ifp);
3737
3738         /* destroy all remaining sysctl nodes */
3739         if (priv->sysctl_debug)
3740                 sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3741         sysctl_ctx_free(&priv->stats.vport.ctx);
3742         sysctl_ctx_free(&priv->stats.pport.ctx);
3743         sysctl_ctx_free(&priv->sysctl_ctx);
3744
3745         mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3746         mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3747         mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3748         mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3749         mlx5e_disable_async_events(priv);
3750         destroy_workqueue(priv->wq);
3751         mlx5e_priv_mtx_destroy(priv);
3752         free(priv, M_MLX5EN);
3753 }
3754
3755 static void *
3756 mlx5e_get_ifp(void *vpriv)
3757 {
3758         struct mlx5e_priv *priv = vpriv;
3759
3760         return (priv->ifp);
3761 }
3762
3763 static struct mlx5_interface mlx5e_interface = {
3764         .add = mlx5e_create_ifp,
3765         .remove = mlx5e_destroy_ifp,
3766         .event = mlx5e_async_event,
3767         .protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3768         .get_dev = mlx5e_get_ifp,
3769 };
3770
3771 void
3772 mlx5e_init(void)
3773 {
3774         mlx5_register_interface(&mlx5e_interface);
3775 }
3776
3777 void
3778 mlx5e_cleanup(void)
3779 {
3780         mlx5_unregister_interface(&mlx5e_interface);
3781 }
3782
3783 module_init_order(mlx5e_init, SI_ORDER_THIRD);
3784 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3785
3786 #if (__FreeBSD_version >= 1100000)
3787 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3788 #endif
3789 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3790 MODULE_VERSION(mlx5en, 1);