]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx5/mlx5_en/mlx5_en_main.c
Update our device tree files to a Linux 4.10
[FreeBSD/FreeBSD.git] / sys / dev / mlx5 / mlx5_en / mlx5_en_main.c
1 /*-
2  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27
28 #include "en.h"
29
30 #include <sys/sockio.h>
31 #include <machine/atomic.h>
32
33 #define ETH_DRIVER_VERSION      "3.1.0-dev"
34 char mlx5e_version[] = "Mellanox Ethernet driver"
35     " (" ETH_DRIVER_VERSION ")";
36
37 struct mlx5e_channel_param {
38         struct mlx5e_rq_param rq;
39         struct mlx5e_sq_param sq;
40         struct mlx5e_cq_param rx_cq;
41         struct mlx5e_cq_param tx_cq;
42 };
43
44 static const struct {
45         u32     subtype;
46         u64     baudrate;
47 }       mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
48
49         [MLX5E_1000BASE_CX_SGMII] = {
50                 .subtype = IFM_1000_CX_SGMII,
51                 .baudrate = IF_Mbps(1000ULL),
52         },
53         [MLX5E_1000BASE_KX] = {
54                 .subtype = IFM_1000_KX,
55                 .baudrate = IF_Mbps(1000ULL),
56         },
57         [MLX5E_10GBASE_CX4] = {
58                 .subtype = IFM_10G_CX4,
59                 .baudrate = IF_Gbps(10ULL),
60         },
61         [MLX5E_10GBASE_KX4] = {
62                 .subtype = IFM_10G_KX4,
63                 .baudrate = IF_Gbps(10ULL),
64         },
65         [MLX5E_10GBASE_KR] = {
66                 .subtype = IFM_10G_KR,
67                 .baudrate = IF_Gbps(10ULL),
68         },
69         [MLX5E_20GBASE_KR2] = {
70                 .subtype = IFM_20G_KR2,
71                 .baudrate = IF_Gbps(20ULL),
72         },
73         [MLX5E_40GBASE_CR4] = {
74                 .subtype = IFM_40G_CR4,
75                 .baudrate = IF_Gbps(40ULL),
76         },
77         [MLX5E_40GBASE_KR4] = {
78                 .subtype = IFM_40G_KR4,
79                 .baudrate = IF_Gbps(40ULL),
80         },
81         [MLX5E_56GBASE_R4] = {
82                 .subtype = IFM_56G_R4,
83                 .baudrate = IF_Gbps(56ULL),
84         },
85         [MLX5E_10GBASE_CR] = {
86                 .subtype = IFM_10G_CR1,
87                 .baudrate = IF_Gbps(10ULL),
88         },
89         [MLX5E_10GBASE_SR] = {
90                 .subtype = IFM_10G_SR,
91                 .baudrate = IF_Gbps(10ULL),
92         },
93         [MLX5E_10GBASE_LR] = {
94                 .subtype = IFM_10G_LR,
95                 .baudrate = IF_Gbps(10ULL),
96         },
97         [MLX5E_40GBASE_SR4] = {
98                 .subtype = IFM_40G_SR4,
99                 .baudrate = IF_Gbps(40ULL),
100         },
101         [MLX5E_40GBASE_LR4] = {
102                 .subtype = IFM_40G_LR4,
103                 .baudrate = IF_Gbps(40ULL),
104         },
105         [MLX5E_100GBASE_CR4] = {
106                 .subtype = IFM_100G_CR4,
107                 .baudrate = IF_Gbps(100ULL),
108         },
109         [MLX5E_100GBASE_SR4] = {
110                 .subtype = IFM_100G_SR4,
111                 .baudrate = IF_Gbps(100ULL),
112         },
113         [MLX5E_100GBASE_KR4] = {
114                 .subtype = IFM_100G_KR4,
115                 .baudrate = IF_Gbps(100ULL),
116         },
117         [MLX5E_100GBASE_LR4] = {
118                 .subtype = IFM_100G_LR4,
119                 .baudrate = IF_Gbps(100ULL),
120         },
121         [MLX5E_100BASE_TX] = {
122                 .subtype = IFM_100_TX,
123                 .baudrate = IF_Mbps(100ULL),
124         },
125         [MLX5E_100BASE_T] = {
126                 .subtype = IFM_100_T,
127                 .baudrate = IF_Mbps(100ULL),
128         },
129         [MLX5E_10GBASE_T] = {
130                 .subtype = IFM_10G_T,
131                 .baudrate = IF_Gbps(10ULL),
132         },
133         [MLX5E_25GBASE_CR] = {
134                 .subtype = IFM_25G_CR,
135                 .baudrate = IF_Gbps(25ULL),
136         },
137         [MLX5E_25GBASE_KR] = {
138                 .subtype = IFM_25G_KR,
139                 .baudrate = IF_Gbps(25ULL),
140         },
141         [MLX5E_25GBASE_SR] = {
142                 .subtype = IFM_25G_SR,
143                 .baudrate = IF_Gbps(25ULL),
144         },
145         [MLX5E_50GBASE_CR2] = {
146                 .subtype = IFM_50G_CR2,
147                 .baudrate = IF_Gbps(50ULL),
148         },
149         [MLX5E_50GBASE_KR2] = {
150                 .subtype = IFM_50G_KR2,
151                 .baudrate = IF_Gbps(50ULL),
152         },
153 };
154
155 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
156
157 static void
158 mlx5e_update_carrier(struct mlx5e_priv *priv)
159 {
160         struct mlx5_core_dev *mdev = priv->mdev;
161         u32 out[MLX5_ST_SZ_DW(ptys_reg)];
162         u32 eth_proto_oper;
163         int error;
164         u8 port_state;
165         u8 i;
166
167         port_state = mlx5_query_vport_state(mdev,
168             MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
169
170         if (port_state == VPORT_STATE_UP) {
171                 priv->media_status_last |= IFM_ACTIVE;
172         } else {
173                 priv->media_status_last &= ~IFM_ACTIVE;
174                 priv->media_active_last = IFM_ETHER;
175                 if_link_state_change(priv->ifp, LINK_STATE_DOWN);
176                 return;
177         }
178
179         error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN);
180         if (error) {
181                 priv->media_active_last = IFM_ETHER;
182                 priv->ifp->if_baudrate = 1;
183                 if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
184                     __func__, error);
185                 return;
186         }
187         eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
188
189         for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
190                 if (mlx5e_mode_table[i].baudrate == 0)
191                         continue;
192                 if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
193                         priv->ifp->if_baudrate =
194                             mlx5e_mode_table[i].baudrate;
195                         priv->media_active_last =
196                             mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
197                 }
198         }
199         if_link_state_change(priv->ifp, LINK_STATE_UP);
200 }
201
202 static void
203 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
204 {
205         struct mlx5e_priv *priv = dev->if_softc;
206
207         ifmr->ifm_status = priv->media_status_last;
208         ifmr->ifm_active = priv->media_active_last |
209             (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
210             (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
211
212 }
213
214 static u32
215 mlx5e_find_link_mode(u32 subtype)
216 {
217         u32 i;
218         u32 link_mode = 0;
219
220         for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
221                 if (mlx5e_mode_table[i].baudrate == 0)
222                         continue;
223                 if (mlx5e_mode_table[i].subtype == subtype)
224                         link_mode |= MLX5E_PROT_MASK(i);
225         }
226
227         return (link_mode);
228 }
229
230 static int
231 mlx5e_media_change(struct ifnet *dev)
232 {
233         struct mlx5e_priv *priv = dev->if_softc;
234         struct mlx5_core_dev *mdev = priv->mdev;
235         u32 eth_proto_cap;
236         u32 link_mode;
237         int was_opened;
238         int locked;
239         int error;
240
241         locked = PRIV_LOCKED(priv);
242         if (!locked)
243                 PRIV_LOCK(priv);
244
245         if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
246                 error = EINVAL;
247                 goto done;
248         }
249         link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
250
251         /* query supported capabilities */
252         error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
253         if (error != 0) {
254                 if_printf(dev, "Query port media capability failed\n");
255                 goto done;
256         }
257         /* check for autoselect */
258         if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
259                 link_mode = eth_proto_cap;
260                 if (link_mode == 0) {
261                         if_printf(dev, "Port media capability is zero\n");
262                         error = EINVAL;
263                         goto done;
264                 }
265         } else {
266                 link_mode = link_mode & eth_proto_cap;
267                 if (link_mode == 0) {
268                         if_printf(dev, "Not supported link mode requested\n");
269                         error = EINVAL;
270                         goto done;
271                 }
272         }
273         /* update pauseframe control bits */
274         priv->params.rx_pauseframe_control =
275             (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
276         priv->params.tx_pauseframe_control =
277             (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
278
279         /* check if device is opened */
280         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
281
282         /* reconfigure the hardware */
283         mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
284         mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
285         mlx5_set_port_pause(mdev, 1,
286             priv->params.rx_pauseframe_control,
287             priv->params.tx_pauseframe_control);
288         if (was_opened)
289                 mlx5_set_port_status(mdev, MLX5_PORT_UP);
290
291 done:
292         if (!locked)
293                 PRIV_UNLOCK(priv);
294         return (error);
295 }
296
297 static void
298 mlx5e_update_carrier_work(struct work_struct *work)
299 {
300         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
301             update_carrier_work);
302
303         PRIV_LOCK(priv);
304         if (test_bit(MLX5E_STATE_OPENED, &priv->state))
305                 mlx5e_update_carrier(priv);
306         PRIV_UNLOCK(priv);
307 }
308
309 /*
310  * This function reads the physical port counters from the firmware
311  * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
312  * macros. The output is converted from big-endian 64-bit values into
313  * host endian ones and stored in the "priv->stats.pport" structure.
314  */
315 static void
316 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
317 {
318         struct mlx5_core_dev *mdev = priv->mdev;
319         struct mlx5e_pport_stats *s = &priv->stats.pport;
320         struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
321         u32 *in;
322         u32 *out;
323         const u64 *ptr;
324         unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
325         unsigned x;
326         unsigned y;
327
328         /* allocate firmware request structures */
329         in = mlx5_vzalloc(sz);
330         out = mlx5_vzalloc(sz);
331         if (in == NULL || out == NULL)
332                 goto free_out;
333
334         /*
335          * Get pointer to the 64-bit counter set which is located at a
336          * fixed offset in the output firmware request structure:
337          */
338         ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
339
340         MLX5_SET(ppcnt_reg, in, local_port, 1);
341
342         /* read IEEE802_3 counter group using predefined counter layout */
343         MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
344         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
345         for (x = y = 0; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
346                 s->arg[y] = be64toh(ptr[x]);
347
348         /* read RFC2819 counter group using predefined counter layout */
349         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
350         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
351         for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
352                 s->arg[y] = be64toh(ptr[x]);
353         for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
354             MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
355                 s_debug->arg[y] = be64toh(ptr[x]);
356
357         /* read RFC2863 counter group using predefined counter layout */
358         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
359         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
360         for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
361                 s_debug->arg[y] = be64toh(ptr[x]);
362
363         /* read physical layer stats counter group using predefined counter layout */
364         MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
365         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
366         for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
367                 s_debug->arg[y] = be64toh(ptr[x]);
368 free_out:
369         /* free firmware request structures */
370         kvfree(in);
371         kvfree(out);
372 }
373
374 /*
375  * This function is called regularly to collect all statistics
376  * counters from the firmware. The values can be viewed through the
377  * sysctl interface. Execution is serialized using the priv's global
378  * configuration lock.
379  */
380 static void
381 mlx5e_update_stats_work(struct work_struct *work)
382 {
383         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
384             update_stats_work);
385         struct mlx5_core_dev *mdev = priv->mdev;
386         struct mlx5e_vport_stats *s = &priv->stats.vport;
387         struct mlx5e_rq_stats *rq_stats;
388         struct mlx5e_sq_stats *sq_stats;
389         struct buf_ring *sq_br;
390 #if (__FreeBSD_version < 1100000)
391         struct ifnet *ifp = priv->ifp;
392 #endif
393
394         u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
395         u32 *out;
396         int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
397         u64 tso_packets = 0;
398         u64 tso_bytes = 0;
399         u64 tx_queue_dropped = 0;
400         u64 tx_defragged = 0;
401         u64 tx_offload_none = 0;
402         u64 lro_packets = 0;
403         u64 lro_bytes = 0;
404         u64 sw_lro_queued = 0;
405         u64 sw_lro_flushed = 0;
406         u64 rx_csum_none = 0;
407         u64 rx_wqe_err = 0;
408         u32 rx_out_of_buffer = 0;
409         int i;
410         int j;
411
412         PRIV_LOCK(priv);
413         out = mlx5_vzalloc(outlen);
414         if (out == NULL)
415                 goto free_out;
416         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
417                 goto free_out;
418
419         /* Collect firts the SW counters and then HW for consistency */
420         for (i = 0; i < priv->params.num_channels; i++) {
421                 struct mlx5e_rq *rq = &priv->channel[i]->rq;
422
423                 rq_stats = &priv->channel[i]->rq.stats;
424
425                 /* collect stats from LRO */
426                 rq_stats->sw_lro_queued = rq->lro.lro_queued;
427                 rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
428                 sw_lro_queued += rq_stats->sw_lro_queued;
429                 sw_lro_flushed += rq_stats->sw_lro_flushed;
430                 lro_packets += rq_stats->lro_packets;
431                 lro_bytes += rq_stats->lro_bytes;
432                 rx_csum_none += rq_stats->csum_none;
433                 rx_wqe_err += rq_stats->wqe_err;
434
435                 for (j = 0; j < priv->num_tc; j++) {
436                         sq_stats = &priv->channel[i]->sq[j].stats;
437                         sq_br = priv->channel[i]->sq[j].br;
438
439                         tso_packets += sq_stats->tso_packets;
440                         tso_bytes += sq_stats->tso_bytes;
441                         tx_queue_dropped += sq_stats->dropped;
442                         if (sq_br != NULL)
443                                 tx_queue_dropped += sq_br->br_drops;
444                         tx_defragged += sq_stats->defragged;
445                         tx_offload_none += sq_stats->csum_offload_none;
446                 }
447         }
448
449         /* update counters */
450         s->tso_packets = tso_packets;
451         s->tso_bytes = tso_bytes;
452         s->tx_queue_dropped = tx_queue_dropped;
453         s->tx_defragged = tx_defragged;
454         s->lro_packets = lro_packets;
455         s->lro_bytes = lro_bytes;
456         s->sw_lro_queued = sw_lro_queued;
457         s->sw_lro_flushed = sw_lro_flushed;
458         s->rx_csum_none = rx_csum_none;
459         s->rx_wqe_err = rx_wqe_err;
460
461         /* HW counters */
462         memset(in, 0, sizeof(in));
463
464         MLX5_SET(query_vport_counter_in, in, opcode,
465             MLX5_CMD_OP_QUERY_VPORT_COUNTER);
466         MLX5_SET(query_vport_counter_in, in, op_mod, 0);
467         MLX5_SET(query_vport_counter_in, in, other_vport, 0);
468
469         memset(out, 0, outlen);
470
471         /* get number of out-of-buffer drops first */
472         if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
473             &rx_out_of_buffer))
474                 goto free_out;
475
476         /* accumulate difference into a 64-bit counter */
477         s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
478         s->rx_out_of_buffer_prev = rx_out_of_buffer;
479
480         /* get port statistics */
481         if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
482                 goto free_out;
483
484 #define MLX5_GET_CTR(out, x) \
485         MLX5_GET64(query_vport_counter_out, out, x)
486
487         s->rx_error_packets =
488             MLX5_GET_CTR(out, received_errors.packets);
489         s->rx_error_bytes =
490             MLX5_GET_CTR(out, received_errors.octets);
491         s->tx_error_packets =
492             MLX5_GET_CTR(out, transmit_errors.packets);
493         s->tx_error_bytes =
494             MLX5_GET_CTR(out, transmit_errors.octets);
495
496         s->rx_unicast_packets =
497             MLX5_GET_CTR(out, received_eth_unicast.packets);
498         s->rx_unicast_bytes =
499             MLX5_GET_CTR(out, received_eth_unicast.octets);
500         s->tx_unicast_packets =
501             MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
502         s->tx_unicast_bytes =
503             MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
504
505         s->rx_multicast_packets =
506             MLX5_GET_CTR(out, received_eth_multicast.packets);
507         s->rx_multicast_bytes =
508             MLX5_GET_CTR(out, received_eth_multicast.octets);
509         s->tx_multicast_packets =
510             MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
511         s->tx_multicast_bytes =
512             MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
513
514         s->rx_broadcast_packets =
515             MLX5_GET_CTR(out, received_eth_broadcast.packets);
516         s->rx_broadcast_bytes =
517             MLX5_GET_CTR(out, received_eth_broadcast.octets);
518         s->tx_broadcast_packets =
519             MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
520         s->tx_broadcast_bytes =
521             MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
522
523         s->rx_packets =
524             s->rx_unicast_packets +
525             s->rx_multicast_packets +
526             s->rx_broadcast_packets -
527             s->rx_out_of_buffer;
528         s->rx_bytes =
529             s->rx_unicast_bytes +
530             s->rx_multicast_bytes +
531             s->rx_broadcast_bytes;
532         s->tx_packets =
533             s->tx_unicast_packets +
534             s->tx_multicast_packets +
535             s->tx_broadcast_packets;
536         s->tx_bytes =
537             s->tx_unicast_bytes +
538             s->tx_multicast_bytes +
539             s->tx_broadcast_bytes;
540
541         /* Update calculated offload counters */
542         s->tx_csum_offload = s->tx_packets - tx_offload_none;
543         s->rx_csum_good = s->rx_packets - s->rx_csum_none;
544
545         /* Update per port counters */
546         mlx5e_update_pport_counters(priv);
547
548 #if (__FreeBSD_version < 1100000)
549         /* no get_counters interface in fbsd 10 */
550         ifp->if_ipackets = s->rx_packets;
551         ifp->if_ierrors = s->rx_error_packets;
552         ifp->if_iqdrops = s->rx_out_of_buffer;
553         ifp->if_opackets = s->tx_packets;
554         ifp->if_oerrors = s->tx_error_packets;
555         ifp->if_snd.ifq_drops = s->tx_queue_dropped;
556         ifp->if_ibytes = s->rx_bytes;
557         ifp->if_obytes = s->tx_bytes;
558 #endif
559
560 free_out:
561         kvfree(out);
562
563         /* Update diagnostics, if any */
564         if (priv->params_ethtool.diag_pci_enable ||
565             priv->params_ethtool.diag_general_enable) {
566                 int error = mlx5_core_get_diagnostics_full(mdev,
567                     priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
568                     priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
569                 if (error != 0)
570                         if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error);
571         }
572         PRIV_UNLOCK(priv);
573 }
574
575 static void
576 mlx5e_update_stats(void *arg)
577 {
578         struct mlx5e_priv *priv = arg;
579
580         schedule_work(&priv->update_stats_work);
581
582         callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
583 }
584
585 static void
586 mlx5e_async_event_sub(struct mlx5e_priv *priv,
587     enum mlx5_dev_event event)
588 {
589         switch (event) {
590         case MLX5_DEV_EVENT_PORT_UP:
591         case MLX5_DEV_EVENT_PORT_DOWN:
592                 schedule_work(&priv->update_carrier_work);
593                 break;
594
595         default:
596                 break;
597         }
598 }
599
600 static void
601 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
602     enum mlx5_dev_event event, unsigned long param)
603 {
604         struct mlx5e_priv *priv = vpriv;
605
606         mtx_lock(&priv->async_events_mtx);
607         if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
608                 mlx5e_async_event_sub(priv, event);
609         mtx_unlock(&priv->async_events_mtx);
610 }
611
612 static void
613 mlx5e_enable_async_events(struct mlx5e_priv *priv)
614 {
615         set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
616 }
617
618 static void
619 mlx5e_disable_async_events(struct mlx5e_priv *priv)
620 {
621         mtx_lock(&priv->async_events_mtx);
622         clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
623         mtx_unlock(&priv->async_events_mtx);
624 }
625
626 static const char *mlx5e_rq_stats_desc[] = {
627         MLX5E_RQ_STATS(MLX5E_STATS_DESC)
628 };
629
630 static int
631 mlx5e_create_rq(struct mlx5e_channel *c,
632     struct mlx5e_rq_param *param,
633     struct mlx5e_rq *rq)
634 {
635         struct mlx5e_priv *priv = c->priv;
636         struct mlx5_core_dev *mdev = priv->mdev;
637         char buffer[16];
638         void *rqc = param->rqc;
639         void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
640         int wq_sz;
641         int err;
642         int i;
643
644         /* Create DMA descriptor TAG */
645         if ((err = -bus_dma_tag_create(
646             bus_get_dma_tag(mdev->pdev->dev.bsddev),
647             1,                          /* any alignment */
648             0,                          /* no boundary */
649             BUS_SPACE_MAXADDR,          /* lowaddr */
650             BUS_SPACE_MAXADDR,          /* highaddr */
651             NULL, NULL,                 /* filter, filterarg */
652             MJUM16BYTES,                /* maxsize */
653             1,                          /* nsegments */
654             MJUM16BYTES,                /* maxsegsize */
655             0,                          /* flags */
656             NULL, NULL,                 /* lockfunc, lockfuncarg */
657             &rq->dma_tag)))
658                 goto done;
659
660         err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
661             &rq->wq_ctrl);
662         if (err)
663                 goto err_free_dma_tag;
664
665         rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
666
667         if (priv->params.hw_lro_en) {
668                 rq->wqe_sz = priv->params.lro_wqe_sz;
669         } else {
670                 rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
671         }
672         if (rq->wqe_sz > MJUM16BYTES) {
673                 err = -ENOMEM;
674                 goto err_rq_wq_destroy;
675         } else if (rq->wqe_sz > MJUM9BYTES) {
676                 rq->wqe_sz = MJUM16BYTES;
677         } else if (rq->wqe_sz > MJUMPAGESIZE) {
678                 rq->wqe_sz = MJUM9BYTES;
679         } else if (rq->wqe_sz > MCLBYTES) {
680                 rq->wqe_sz = MJUMPAGESIZE;
681         } else {
682                 rq->wqe_sz = MCLBYTES;
683         }
684
685         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
686
687         err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz);
688         if (err)
689                 goto err_rq_wq_destroy;
690
691         rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
692         for (i = 0; i != wq_sz; i++) {
693                 struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
694                 uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
695
696                 err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
697                 if (err != 0) {
698                         while (i--)
699                                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
700                         goto err_rq_mbuf_free;
701                 }
702                 wqe->data.lkey = c->mkey_be;
703                 wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
704         }
705
706         rq->ifp = c->ifp;
707         rq->channel = c;
708         rq->ix = c->ix;
709
710         snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
711         mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
712             buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
713             rq->stats.arg);
714         return (0);
715
716 err_rq_mbuf_free:
717         free(rq->mbuf, M_MLX5EN);
718         tcp_lro_free(&rq->lro);
719 err_rq_wq_destroy:
720         mlx5_wq_destroy(&rq->wq_ctrl);
721 err_free_dma_tag:
722         bus_dma_tag_destroy(rq->dma_tag);
723 done:
724         return (err);
725 }
726
727 static void
728 mlx5e_destroy_rq(struct mlx5e_rq *rq)
729 {
730         int wq_sz;
731         int i;
732
733         /* destroy all sysctl nodes */
734         sysctl_ctx_free(&rq->stats.ctx);
735
736         /* free leftover LRO packets, if any */
737         tcp_lro_free(&rq->lro);
738
739         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
740         for (i = 0; i != wq_sz; i++) {
741                 if (rq->mbuf[i].mbuf != NULL) {
742                         bus_dmamap_unload(rq->dma_tag,
743                             rq->mbuf[i].dma_map);
744                         m_freem(rq->mbuf[i].mbuf);
745                 }
746                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
747         }
748         free(rq->mbuf, M_MLX5EN);
749         mlx5_wq_destroy(&rq->wq_ctrl);
750 }
751
752 static int
753 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
754 {
755         struct mlx5e_channel *c = rq->channel;
756         struct mlx5e_priv *priv = c->priv;
757         struct mlx5_core_dev *mdev = priv->mdev;
758
759         void *in;
760         void *rqc;
761         void *wq;
762         int inlen;
763         int err;
764
765         inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
766             sizeof(u64) * rq->wq_ctrl.buf.npages;
767         in = mlx5_vzalloc(inlen);
768         if (in == NULL)
769                 return (-ENOMEM);
770
771         rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
772         wq = MLX5_ADDR_OF(rqc, rqc, wq);
773
774         memcpy(rqc, param->rqc, sizeof(param->rqc));
775
776         MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
777         MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
778         MLX5_SET(rqc, rqc, flush_in_error_en, 1);
779         if (priv->counter_set_id >= 0)
780                 MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
781         MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
782             PAGE_SHIFT);
783         MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
784
785         mlx5_fill_page_array(&rq->wq_ctrl.buf,
786             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
787
788         err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
789
790         kvfree(in);
791
792         return (err);
793 }
794
795 static int
796 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
797 {
798         struct mlx5e_channel *c = rq->channel;
799         struct mlx5e_priv *priv = c->priv;
800         struct mlx5_core_dev *mdev = priv->mdev;
801
802         void *in;
803         void *rqc;
804         int inlen;
805         int err;
806
807         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
808         in = mlx5_vzalloc(inlen);
809         if (in == NULL)
810                 return (-ENOMEM);
811
812         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
813
814         MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
815         MLX5_SET(modify_rq_in, in, rq_state, curr_state);
816         MLX5_SET(rqc, rqc, state, next_state);
817
818         err = mlx5_core_modify_rq(mdev, in, inlen);
819
820         kvfree(in);
821
822         return (err);
823 }
824
825 static void
826 mlx5e_disable_rq(struct mlx5e_rq *rq)
827 {
828         struct mlx5e_channel *c = rq->channel;
829         struct mlx5e_priv *priv = c->priv;
830         struct mlx5_core_dev *mdev = priv->mdev;
831
832         mlx5_core_destroy_rq(mdev, rq->rqn);
833 }
834
835 static int
836 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
837 {
838         struct mlx5e_channel *c = rq->channel;
839         struct mlx5e_priv *priv = c->priv;
840         struct mlx5_wq_ll *wq = &rq->wq;
841         int i;
842
843         for (i = 0; i < 1000; i++) {
844                 if (wq->cur_sz >= priv->params.min_rx_wqes)
845                         return (0);
846
847                 msleep(4);
848         }
849         return (-ETIMEDOUT);
850 }
851
852 static int
853 mlx5e_open_rq(struct mlx5e_channel *c,
854     struct mlx5e_rq_param *param,
855     struct mlx5e_rq *rq)
856 {
857         int err;
858
859         err = mlx5e_create_rq(c, param, rq);
860         if (err)
861                 return (err);
862
863         err = mlx5e_enable_rq(rq, param);
864         if (err)
865                 goto err_destroy_rq;
866
867         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
868         if (err)
869                 goto err_disable_rq;
870
871         c->rq.enabled = 1;
872
873         return (0);
874
875 err_disable_rq:
876         mlx5e_disable_rq(rq);
877 err_destroy_rq:
878         mlx5e_destroy_rq(rq);
879
880         return (err);
881 }
882
883 static void
884 mlx5e_close_rq(struct mlx5e_rq *rq)
885 {
886         mtx_lock(&rq->mtx);
887         rq->enabled = 0;
888         callout_stop(&rq->watchdog);
889         mtx_unlock(&rq->mtx);
890
891         callout_drain(&rq->watchdog);
892
893         mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
894 }
895
896 static void
897 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
898 {
899         /* wait till RQ is empty */
900         while (!mlx5_wq_ll_is_empty(&rq->wq)) {
901                 msleep(4);
902                 rq->cq.mcq.comp(&rq->cq.mcq);
903         }
904
905         mlx5e_disable_rq(rq);
906         mlx5e_destroy_rq(rq);
907 }
908
909 void
910 mlx5e_free_sq_db(struct mlx5e_sq *sq)
911 {
912         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
913         int x;
914
915         for (x = 0; x != wq_sz; x++)
916                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
917         free(sq->mbuf, M_MLX5EN);
918 }
919
920 int
921 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
922 {
923         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
924         int err;
925         int x;
926
927         sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
928
929         /* Create DMA descriptor MAPs */
930         for (x = 0; x != wq_sz; x++) {
931                 err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
932                 if (err != 0) {
933                         while (x--)
934                                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
935                         free(sq->mbuf, M_MLX5EN);
936                         return (err);
937                 }
938         }
939         return (0);
940 }
941
942 static const char *mlx5e_sq_stats_desc[] = {
943         MLX5E_SQ_STATS(MLX5E_STATS_DESC)
944 };
945
946 static int
947 mlx5e_create_sq(struct mlx5e_channel *c,
948     int tc,
949     struct mlx5e_sq_param *param,
950     struct mlx5e_sq *sq)
951 {
952         struct mlx5e_priv *priv = c->priv;
953         struct mlx5_core_dev *mdev = priv->mdev;
954         char buffer[16];
955
956         void *sqc = param->sqc;
957         void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
958 #ifdef RSS
959         cpuset_t cpu_mask;
960         int cpu_id;
961 #endif
962         int err;
963
964         /* Create DMA descriptor TAG */
965         if ((err = -bus_dma_tag_create(
966             bus_get_dma_tag(mdev->pdev->dev.bsddev),
967             1,                          /* any alignment */
968             0,                          /* no boundary */
969             BUS_SPACE_MAXADDR,          /* lowaddr */
970             BUS_SPACE_MAXADDR,          /* highaddr */
971             NULL, NULL,                 /* filter, filterarg */
972             MLX5E_MAX_TX_PAYLOAD_SIZE,  /* maxsize */
973             MLX5E_MAX_TX_MBUF_FRAGS,    /* nsegments */
974             MLX5E_MAX_TX_MBUF_SIZE,     /* maxsegsize */
975             0,                          /* flags */
976             NULL, NULL,                 /* lockfunc, lockfuncarg */
977             &sq->dma_tag)))
978                 goto done;
979
980         err = mlx5_alloc_map_uar(mdev, &sq->uar);
981         if (err)
982                 goto err_free_dma_tag;
983
984         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
985             &sq->wq_ctrl);
986         if (err)
987                 goto err_unmap_free_uar;
988
989         sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
990         sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
991
992         err = mlx5e_alloc_sq_db(sq);
993         if (err)
994                 goto err_sq_wq_destroy;
995
996         sq->mkey_be = c->mkey_be;
997         sq->ifp = priv->ifp;
998         sq->priv = priv;
999         sq->tc = tc;
1000
1001         /* check if we should allocate a second packet buffer */
1002         if (priv->params_ethtool.tx_bufring_disable == 0) {
1003                 sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
1004                     M_WAITOK, &sq->lock);
1005                 if (sq->br == NULL) {
1006                         if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
1007                             __func__);
1008                         err = -ENOMEM;
1009                         goto err_free_sq_db;
1010                 }
1011
1012                 sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
1013                     taskqueue_thread_enqueue, &sq->sq_tq);
1014                 if (sq->sq_tq == NULL) {
1015                         if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
1016                             __func__);
1017                         err = -ENOMEM;
1018                         goto err_free_drbr;
1019                 }
1020
1021                 TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
1022 #ifdef RSS
1023                 cpu_id = rss_getcpu(c->ix % rss_getnumbuckets());
1024                 CPU_SETOF(cpu_id, &cpu_mask);
1025                 taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask,
1026                     "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id);
1027 #else
1028                 taskqueue_start_threads(&sq->sq_tq, 1, PI_NET,
1029                     "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc);
1030 #endif
1031         }
1032         snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1033         mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1034             buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1035             sq->stats.arg);
1036
1037         return (0);
1038
1039 err_free_drbr:
1040         buf_ring_free(sq->br, M_MLX5EN);
1041 err_free_sq_db:
1042         mlx5e_free_sq_db(sq);
1043 err_sq_wq_destroy:
1044         mlx5_wq_destroy(&sq->wq_ctrl);
1045
1046 err_unmap_free_uar:
1047         mlx5_unmap_free_uar(mdev, &sq->uar);
1048
1049 err_free_dma_tag:
1050         bus_dma_tag_destroy(sq->dma_tag);
1051 done:
1052         return (err);
1053 }
1054
1055 static void
1056 mlx5e_destroy_sq(struct mlx5e_sq *sq)
1057 {
1058         /* destroy all sysctl nodes */
1059         sysctl_ctx_free(&sq->stats.ctx);
1060
1061         mlx5e_free_sq_db(sq);
1062         mlx5_wq_destroy(&sq->wq_ctrl);
1063         mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
1064         if (sq->sq_tq != NULL) {
1065                 taskqueue_drain(sq->sq_tq, &sq->sq_task);
1066                 taskqueue_free(sq->sq_tq);
1067         }
1068         if (sq->br != NULL)
1069                 buf_ring_free(sq->br, M_MLX5EN);
1070 }
1071
1072 int
1073 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
1074     int tis_num)
1075 {
1076         void *in;
1077         void *sqc;
1078         void *wq;
1079         int inlen;
1080         int err;
1081
1082         inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1083             sizeof(u64) * sq->wq_ctrl.buf.npages;
1084         in = mlx5_vzalloc(inlen);
1085         if (in == NULL)
1086                 return (-ENOMEM);
1087
1088         sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1089         wq = MLX5_ADDR_OF(sqc, sqc, wq);
1090
1091         memcpy(sqc, param->sqc, sizeof(param->sqc));
1092
1093         MLX5_SET(sqc, sqc, tis_num_0, tis_num);
1094         MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
1095         MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1096         MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1097         MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1098
1099         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1100         MLX5_SET(wq, wq, uar_page, sq->uar.index);
1101         MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1102             PAGE_SHIFT);
1103         MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1104
1105         mlx5_fill_page_array(&sq->wq_ctrl.buf,
1106             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1107
1108         err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
1109
1110         kvfree(in);
1111
1112         return (err);
1113 }
1114
1115 int
1116 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1117 {
1118         void *in;
1119         void *sqc;
1120         int inlen;
1121         int err;
1122
1123         inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1124         in = mlx5_vzalloc(inlen);
1125         if (in == NULL)
1126                 return (-ENOMEM);
1127
1128         sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1129
1130         MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1131         MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1132         MLX5_SET(sqc, sqc, state, next_state);
1133
1134         err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
1135
1136         kvfree(in);
1137
1138         return (err);
1139 }
1140
1141 void
1142 mlx5e_disable_sq(struct mlx5e_sq *sq)
1143 {
1144
1145         mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
1146 }
1147
1148 static int
1149 mlx5e_open_sq(struct mlx5e_channel *c,
1150     int tc,
1151     struct mlx5e_sq_param *param,
1152     struct mlx5e_sq *sq)
1153 {
1154         int err;
1155
1156         err = mlx5e_create_sq(c, tc, param, sq);
1157         if (err)
1158                 return (err);
1159
1160         err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
1161         if (err)
1162                 goto err_destroy_sq;
1163
1164         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1165         if (err)
1166                 goto err_disable_sq;
1167
1168         atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY);
1169
1170         return (0);
1171
1172 err_disable_sq:
1173         mlx5e_disable_sq(sq);
1174 err_destroy_sq:
1175         mlx5e_destroy_sq(sq);
1176
1177         return (err);
1178 }
1179
1180 static void
1181 mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1182 {
1183         /* fill up remainder with NOPs */
1184         while (sq->cev_counter != 0) {
1185                 while (!mlx5e_sq_has_room_for(sq, 1)) {
1186                         if (can_sleep != 0) {
1187                                 mtx_unlock(&sq->lock);
1188                                 msleep(4);
1189                                 mtx_lock(&sq->lock);
1190                         } else {
1191                                 goto done;
1192                         }
1193                 }
1194                 /* send a single NOP */
1195                 mlx5e_send_nop(sq, 1);
1196                 wmb();
1197         }
1198 done:
1199         /* Check if we need to write the doorbell */
1200         if (likely(sq->doorbell.d64 != 0)) {
1201                 mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1202                 sq->doorbell.d64 = 0;
1203         }
1204 }
1205
1206 void
1207 mlx5e_sq_cev_timeout(void *arg)
1208 {
1209         struct mlx5e_sq *sq = arg;
1210
1211         mtx_assert(&sq->lock, MA_OWNED);
1212
1213         /* check next state */
1214         switch (sq->cev_next_state) {
1215         case MLX5E_CEV_STATE_SEND_NOPS:
1216                 /* fill TX ring with NOPs, if any */
1217                 mlx5e_sq_send_nops_locked(sq, 0);
1218
1219                 /* check if completed */
1220                 if (sq->cev_counter == 0) {
1221                         sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1222                         return;
1223                 }
1224                 break;
1225         default:
1226                 /* send NOPs on next timeout */
1227                 sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1228                 break;
1229         }
1230
1231         /* restart timer */
1232         callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1233 }
1234
1235 void
1236 mlx5e_drain_sq(struct mlx5e_sq *sq)
1237 {
1238         int error;
1239
1240         /*
1241          * Check if already stopped.
1242          *
1243          * NOTE: The "stopped" variable is only written when both the
1244          * priv's configuration lock and the SQ's lock is locked. It
1245          * can therefore safely be read when only one of the two locks
1246          * is locked. This function is always called when the priv's
1247          * configuration lock is locked.
1248          */
1249         if (sq->stopped != 0)
1250                 return;
1251
1252         mtx_lock(&sq->lock);
1253
1254         /* don't put more packets into the SQ */
1255         sq->stopped = 1;
1256
1257         /* teardown event factor timer, if any */
1258         sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1259         callout_stop(&sq->cev_callout);
1260
1261         /* send dummy NOPs in order to flush the transmit ring */
1262         mlx5e_sq_send_nops_locked(sq, 1);
1263         mtx_unlock(&sq->lock);
1264
1265         /* make sure it is safe to free the callout */
1266         callout_drain(&sq->cev_callout);
1267
1268         /* wait till SQ is empty or link is down */
1269         mtx_lock(&sq->lock);
1270         while (sq->cc != sq->pc &&
1271             (sq->priv->media_status_last & IFM_ACTIVE) != 0) {
1272                 mtx_unlock(&sq->lock);
1273                 msleep(1);
1274                 sq->cq.mcq.comp(&sq->cq.mcq);
1275                 mtx_lock(&sq->lock);
1276         }
1277         mtx_unlock(&sq->lock);
1278
1279         /* error out remaining requests */
1280         error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1281         if (error != 0) {
1282                 if_printf(sq->ifp,
1283                     "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
1284         }
1285
1286         /* wait till SQ is empty */
1287         mtx_lock(&sq->lock);
1288         while (sq->cc != sq->pc) {
1289                 mtx_unlock(&sq->lock);
1290                 msleep(1);
1291                 sq->cq.mcq.comp(&sq->cq.mcq);
1292                 mtx_lock(&sq->lock);
1293         }
1294         mtx_unlock(&sq->lock);
1295 }
1296
1297 static void
1298 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1299 {
1300
1301         mlx5e_drain_sq(sq);
1302         mlx5e_disable_sq(sq);
1303         mlx5e_destroy_sq(sq);
1304 }
1305
1306 static int
1307 mlx5e_create_cq(struct mlx5e_priv *priv,
1308     struct mlx5e_cq_param *param,
1309     struct mlx5e_cq *cq,
1310     mlx5e_cq_comp_t *comp,
1311     int eq_ix)
1312 {
1313         struct mlx5_core_dev *mdev = priv->mdev;
1314         struct mlx5_core_cq *mcq = &cq->mcq;
1315         int eqn_not_used;
1316         int irqn;
1317         int err;
1318         u32 i;
1319
1320         param->wq.buf_numa_node = 0;
1321         param->wq.db_numa_node = 0;
1322
1323         err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1324             &cq->wq_ctrl);
1325         if (err)
1326                 return (err);
1327
1328         mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
1329
1330         mcq->cqe_sz = 64;
1331         mcq->set_ci_db = cq->wq_ctrl.db.db;
1332         mcq->arm_db = cq->wq_ctrl.db.db + 1;
1333         *mcq->set_ci_db = 0;
1334         *mcq->arm_db = 0;
1335         mcq->vector = eq_ix;
1336         mcq->comp = comp;
1337         mcq->event = mlx5e_cq_error_event;
1338         mcq->irqn = irqn;
1339         mcq->uar = &priv->cq_uar;
1340
1341         for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1342                 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1343
1344                 cqe->op_own = 0xf1;
1345         }
1346
1347         cq->priv = priv;
1348
1349         return (0);
1350 }
1351
1352 static void
1353 mlx5e_destroy_cq(struct mlx5e_cq *cq)
1354 {
1355         mlx5_wq_destroy(&cq->wq_ctrl);
1356 }
1357
1358 static int
1359 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
1360 {
1361         struct mlx5_core_cq *mcq = &cq->mcq;
1362         void *in;
1363         void *cqc;
1364         int inlen;
1365         int irqn_not_used;
1366         int eqn;
1367         int err;
1368
1369         inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1370             sizeof(u64) * cq->wq_ctrl.buf.npages;
1371         in = mlx5_vzalloc(inlen);
1372         if (in == NULL)
1373                 return (-ENOMEM);
1374
1375         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1376
1377         memcpy(cqc, param->cqc, sizeof(param->cqc));
1378
1379         mlx5_fill_page_array(&cq->wq_ctrl.buf,
1380             (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1381
1382         mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
1383
1384         MLX5_SET(cqc, cqc, c_eqn, eqn);
1385         MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1386         MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1387             PAGE_SHIFT);
1388         MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1389
1390         err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
1391
1392         kvfree(in);
1393
1394         if (err)
1395                 return (err);
1396
1397         mlx5e_cq_arm(cq);
1398
1399         return (0);
1400 }
1401
1402 static void
1403 mlx5e_disable_cq(struct mlx5e_cq *cq)
1404 {
1405
1406         mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
1407 }
1408
1409 int
1410 mlx5e_open_cq(struct mlx5e_priv *priv,
1411     struct mlx5e_cq_param *param,
1412     struct mlx5e_cq *cq,
1413     mlx5e_cq_comp_t *comp,
1414     int eq_ix)
1415 {
1416         int err;
1417
1418         err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
1419         if (err)
1420                 return (err);
1421
1422         err = mlx5e_enable_cq(cq, param, eq_ix);
1423         if (err)
1424                 goto err_destroy_cq;
1425
1426         return (0);
1427
1428 err_destroy_cq:
1429         mlx5e_destroy_cq(cq);
1430
1431         return (err);
1432 }
1433
1434 void
1435 mlx5e_close_cq(struct mlx5e_cq *cq)
1436 {
1437         mlx5e_disable_cq(cq);
1438         mlx5e_destroy_cq(cq);
1439 }
1440
1441 static int
1442 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1443     struct mlx5e_channel_param *cparam)
1444 {
1445         int err;
1446         int tc;
1447
1448         for (tc = 0; tc < c->num_tc; tc++) {
1449                 /* open completion queue */
1450                 err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
1451                     &mlx5e_tx_cq_comp, c->ix);
1452                 if (err)
1453                         goto err_close_tx_cqs;
1454         }
1455         return (0);
1456
1457 err_close_tx_cqs:
1458         for (tc--; tc >= 0; tc--)
1459                 mlx5e_close_cq(&c->sq[tc].cq);
1460
1461         return (err);
1462 }
1463
1464 static void
1465 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1466 {
1467         int tc;
1468
1469         for (tc = 0; tc < c->num_tc; tc++)
1470                 mlx5e_close_cq(&c->sq[tc].cq);
1471 }
1472
1473 static int
1474 mlx5e_open_sqs(struct mlx5e_channel *c,
1475     struct mlx5e_channel_param *cparam)
1476 {
1477         int err;
1478         int tc;
1479
1480         for (tc = 0; tc < c->num_tc; tc++) {
1481                 err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1482                 if (err)
1483                         goto err_close_sqs;
1484         }
1485
1486         return (0);
1487
1488 err_close_sqs:
1489         for (tc--; tc >= 0; tc--)
1490                 mlx5e_close_sq_wait(&c->sq[tc]);
1491
1492         return (err);
1493 }
1494
1495 static void
1496 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1497 {
1498         int tc;
1499
1500         for (tc = 0; tc < c->num_tc; tc++)
1501                 mlx5e_close_sq_wait(&c->sq[tc]);
1502 }
1503
1504 static void
1505 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1506 {
1507         int tc;
1508
1509         mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1510
1511         callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
1512
1513         for (tc = 0; tc < c->num_tc; tc++) {
1514                 struct mlx5e_sq *sq = c->sq + tc;
1515
1516                 mtx_init(&sq->lock, "mlx5tx",
1517                     MTX_NETWORK_LOCK " TX", MTX_DEF);
1518                 mtx_init(&sq->comp_lock, "mlx5comp",
1519                     MTX_NETWORK_LOCK " TX", MTX_DEF);
1520
1521                 callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1522
1523                 sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1524
1525                 /* ensure the TX completion event factor is not zero */
1526                 if (sq->cev_factor == 0)
1527                         sq->cev_factor = 1;
1528         }
1529 }
1530
1531 static void
1532 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1533 {
1534         int tc;
1535
1536         mtx_destroy(&c->rq.mtx);
1537
1538         for (tc = 0; tc < c->num_tc; tc++) {
1539                 mtx_destroy(&c->sq[tc].lock);
1540                 mtx_destroy(&c->sq[tc].comp_lock);
1541         }
1542 }
1543
1544 static int
1545 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1546     struct mlx5e_channel_param *cparam,
1547     struct mlx5e_channel *volatile *cp)
1548 {
1549         struct mlx5e_channel *c;
1550         int err;
1551
1552         c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1553         c->priv = priv;
1554         c->ix = ix;
1555         c->cpu = 0;
1556         c->ifp = priv->ifp;
1557         c->mkey_be = cpu_to_be32(priv->mr.key);
1558         c->num_tc = priv->num_tc;
1559
1560         /* init mutexes */
1561         mlx5e_chan_mtx_init(c);
1562
1563         /* open transmit completion queue */
1564         err = mlx5e_open_tx_cqs(c, cparam);
1565         if (err)
1566                 goto err_free;
1567
1568         /* open receive completion queue */
1569         err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
1570             &mlx5e_rx_cq_comp, c->ix);
1571         if (err)
1572                 goto err_close_tx_cqs;
1573
1574         err = mlx5e_open_sqs(c, cparam);
1575         if (err)
1576                 goto err_close_rx_cq;
1577
1578         err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1579         if (err)
1580                 goto err_close_sqs;
1581
1582         /* store channel pointer */
1583         *cp = c;
1584
1585         /* poll receive queue initially */
1586         c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1587
1588         return (0);
1589
1590 err_close_sqs:
1591         mlx5e_close_sqs_wait(c);
1592
1593 err_close_rx_cq:
1594         mlx5e_close_cq(&c->rq.cq);
1595
1596 err_close_tx_cqs:
1597         mlx5e_close_tx_cqs(c);
1598
1599 err_free:
1600         /* destroy mutexes */
1601         mlx5e_chan_mtx_destroy(c);
1602         free(c, M_MLX5EN);
1603         return (err);
1604 }
1605
1606 static void
1607 mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1608 {
1609         struct mlx5e_channel *c = *pp;
1610
1611         /* check if channel is already closed */
1612         if (c == NULL)
1613                 return;
1614         mlx5e_close_rq(&c->rq);
1615 }
1616
1617 static void
1618 mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1619 {
1620         struct mlx5e_channel *c = *pp;
1621
1622         /* check if channel is already closed */
1623         if (c == NULL)
1624                 return;
1625         /* ensure channel pointer is no longer used */
1626         *pp = NULL;
1627
1628         mlx5e_close_rq_wait(&c->rq);
1629         mlx5e_close_sqs_wait(c);
1630         mlx5e_close_cq(&c->rq.cq);
1631         mlx5e_close_tx_cqs(c);
1632         /* destroy mutexes */
1633         mlx5e_chan_mtx_destroy(c);
1634         free(c, M_MLX5EN);
1635 }
1636
1637 static void
1638 mlx5e_build_rq_param(struct mlx5e_priv *priv,
1639     struct mlx5e_rq_param *param)
1640 {
1641         void *rqc = param->rqc;
1642         void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1643
1644         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1645         MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1646         MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
1647         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1648         MLX5_SET(wq, wq, pd, priv->pdn);
1649
1650         param->wq.buf_numa_node = 0;
1651         param->wq.db_numa_node = 0;
1652         param->wq.linear = 1;
1653 }
1654
1655 static void
1656 mlx5e_build_sq_param(struct mlx5e_priv *priv,
1657     struct mlx5e_sq_param *param)
1658 {
1659         void *sqc = param->sqc;
1660         void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1661
1662         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1663         MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1664         MLX5_SET(wq, wq, pd, priv->pdn);
1665
1666         param->wq.buf_numa_node = 0;
1667         param->wq.db_numa_node = 0;
1668         param->wq.linear = 1;
1669 }
1670
1671 static void
1672 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1673     struct mlx5e_cq_param *param)
1674 {
1675         void *cqc = param->cqc;
1676
1677         MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1678 }
1679
1680 static void
1681 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1682     struct mlx5e_cq_param *param)
1683 {
1684         void *cqc = param->cqc;
1685
1686
1687         /*
1688          * TODO The sysctl to control on/off is a bool value for now, which means
1689          * we only support CSUM, once HASH is implemnted we'll need to address that.
1690          */
1691         if (priv->params.cqe_zipping_en) {
1692                 MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1693                 MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1694         }
1695
1696         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1697         MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1698         MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1699
1700         switch (priv->params.rx_cq_moderation_mode) {
1701         case 0:
1702                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1703                 break;
1704         default:
1705                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1706                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1707                 else
1708                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1709                 break;
1710         }
1711
1712         mlx5e_build_common_cq_param(priv, param);
1713 }
1714
1715 static void
1716 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1717     struct mlx5e_cq_param *param)
1718 {
1719         void *cqc = param->cqc;
1720
1721         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1722         MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1723         MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1724
1725         switch (priv->params.tx_cq_moderation_mode) {
1726         case 0:
1727                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1728                 break;
1729         default:
1730                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1731                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1732                 else
1733                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1734                 break;
1735         }
1736
1737         mlx5e_build_common_cq_param(priv, param);
1738 }
1739
1740 static void
1741 mlx5e_build_channel_param(struct mlx5e_priv *priv,
1742     struct mlx5e_channel_param *cparam)
1743 {
1744         memset(cparam, 0, sizeof(*cparam));
1745
1746         mlx5e_build_rq_param(priv, &cparam->rq);
1747         mlx5e_build_sq_param(priv, &cparam->sq);
1748         mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1749         mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1750 }
1751
1752 static int
1753 mlx5e_open_channels(struct mlx5e_priv *priv)
1754 {
1755         struct mlx5e_channel_param cparam;
1756         void *ptr;
1757         int err;
1758         int i;
1759         int j;
1760
1761         priv->channel = malloc(priv->params.num_channels *
1762             sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
1763
1764         mlx5e_build_channel_param(priv, &cparam);
1765         for (i = 0; i < priv->params.num_channels; i++) {
1766                 err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1767                 if (err)
1768                         goto err_close_channels;
1769         }
1770
1771         for (j = 0; j < priv->params.num_channels; j++) {
1772                 err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
1773                 if (err)
1774                         goto err_close_channels;
1775         }
1776
1777         return (0);
1778
1779 err_close_channels:
1780         for (i--; i >= 0; i--) {
1781                 mlx5e_close_channel(&priv->channel[i]);
1782                 mlx5e_close_channel_wait(&priv->channel[i]);
1783         }
1784
1785         /* remove "volatile" attribute from "channel" pointer */
1786         ptr = __DECONST(void *, priv->channel);
1787         priv->channel = NULL;
1788
1789         free(ptr, M_MLX5EN);
1790
1791         return (err);
1792 }
1793
1794 static void
1795 mlx5e_close_channels(struct mlx5e_priv *priv)
1796 {
1797         void *ptr;
1798         int i;
1799
1800         if (priv->channel == NULL)
1801                 return;
1802
1803         for (i = 0; i < priv->params.num_channels; i++)
1804                 mlx5e_close_channel(&priv->channel[i]);
1805         for (i = 0; i < priv->params.num_channels; i++)
1806                 mlx5e_close_channel_wait(&priv->channel[i]);
1807
1808         /* remove "volatile" attribute from "channel" pointer */
1809         ptr = __DECONST(void *, priv->channel);
1810         priv->channel = NULL;
1811
1812         free(ptr, M_MLX5EN);
1813 }
1814
1815 static int
1816 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
1817 {
1818
1819         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
1820                 uint8_t cq_mode;
1821
1822                 switch (priv->params.tx_cq_moderation_mode) {
1823                 case 0:
1824                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1825                         break;
1826                 default:
1827                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1828                         break;
1829                 }
1830
1831                 return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
1832                     priv->params.tx_cq_moderation_usec,
1833                     priv->params.tx_cq_moderation_pkts,
1834                     cq_mode));
1835         }
1836
1837         return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
1838             priv->params.tx_cq_moderation_usec,
1839             priv->params.tx_cq_moderation_pkts));
1840 }
1841
1842 static int
1843 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
1844 {
1845
1846         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
1847                 uint8_t cq_mode;
1848                 int retval;
1849
1850                 switch (priv->params.rx_cq_moderation_mode) {
1851                 case 0:
1852                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1853                         break;
1854                 default:
1855                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1856                         break;
1857                 }
1858
1859                 retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
1860                     priv->params.rx_cq_moderation_usec,
1861                     priv->params.rx_cq_moderation_pkts,
1862                     cq_mode);
1863
1864                 return (retval);
1865         }
1866
1867         return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
1868             priv->params.rx_cq_moderation_usec,
1869             priv->params.rx_cq_moderation_pkts));
1870 }
1871
1872 static int
1873 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
1874 {
1875         int err;
1876         int i;
1877
1878         if (c == NULL)
1879                 return (EINVAL);
1880
1881         err = mlx5e_refresh_rq_params(priv, &c->rq);
1882         if (err)
1883                 goto done;
1884
1885         for (i = 0; i != c->num_tc; i++) {
1886                 err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
1887                 if (err)
1888                         goto done;
1889         }
1890 done:
1891         return (err);
1892 }
1893
1894 int
1895 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
1896 {
1897         int i;
1898
1899         if (priv->channel == NULL)
1900                 return (EINVAL);
1901
1902         for (i = 0; i < priv->params.num_channels; i++) {
1903                 int err;
1904
1905                 err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
1906                 if (err)
1907                         return (err);
1908         }
1909         return (0);
1910 }
1911
1912 static int
1913 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
1914 {
1915         struct mlx5_core_dev *mdev = priv->mdev;
1916         u32 in[MLX5_ST_SZ_DW(create_tis_in)];
1917         void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
1918
1919         memset(in, 0, sizeof(in));
1920
1921         MLX5_SET(tisc, tisc, prio, tc);
1922         MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
1923
1924         return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
1925 }
1926
1927 static void
1928 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
1929 {
1930         mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
1931 }
1932
1933 static int
1934 mlx5e_open_tises(struct mlx5e_priv *priv)
1935 {
1936         int num_tc = priv->num_tc;
1937         int err;
1938         int tc;
1939
1940         for (tc = 0; tc < num_tc; tc++) {
1941                 err = mlx5e_open_tis(priv, tc);
1942                 if (err)
1943                         goto err_close_tises;
1944         }
1945
1946         return (0);
1947
1948 err_close_tises:
1949         for (tc--; tc >= 0; tc--)
1950                 mlx5e_close_tis(priv, tc);
1951
1952         return (err);
1953 }
1954
1955 static void
1956 mlx5e_close_tises(struct mlx5e_priv *priv)
1957 {
1958         int num_tc = priv->num_tc;
1959         int tc;
1960
1961         for (tc = 0; tc < num_tc; tc++)
1962                 mlx5e_close_tis(priv, tc);
1963 }
1964
1965 static int
1966 mlx5e_open_rqt(struct mlx5e_priv *priv)
1967 {
1968         struct mlx5_core_dev *mdev = priv->mdev;
1969         u32 *in;
1970         u32 out[MLX5_ST_SZ_DW(create_rqt_out)];
1971         void *rqtc;
1972         int inlen;
1973         int err;
1974         int sz;
1975         int i;
1976
1977         sz = 1 << priv->params.rx_hash_log_tbl_sz;
1978
1979         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
1980         in = mlx5_vzalloc(inlen);
1981         if (in == NULL)
1982                 return (-ENOMEM);
1983         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1984
1985         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
1986         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
1987
1988         for (i = 0; i < sz; i++) {
1989                 int ix;
1990 #ifdef RSS
1991                 ix = rss_get_indirection_to_bucket(i);
1992 #else
1993                 ix = i;
1994 #endif
1995                 /* ensure we don't overflow */
1996                 ix %= priv->params.num_channels;
1997                 MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
1998         }
1999
2000         MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
2001
2002         memset(out, 0, sizeof(out));
2003         err = mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
2004         if (!err)
2005                 priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
2006
2007         kvfree(in);
2008
2009         return (err);
2010 }
2011
2012 static void
2013 mlx5e_close_rqt(struct mlx5e_priv *priv)
2014 {
2015         u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)];
2016         u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)];
2017
2018         memset(in, 0, sizeof(in));
2019
2020         MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
2021         MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
2022
2023         mlx5_cmd_exec_check_status(priv->mdev, in, sizeof(in), out,
2024             sizeof(out));
2025 }
2026
2027 static void
2028 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
2029 {
2030         void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
2031         __be32 *hkey;
2032
2033         MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
2034
2035 #define ROUGH_MAX_L2_L3_HDR_SZ 256
2036
2037 #define MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2038                           MLX5_HASH_FIELD_SEL_DST_IP)
2039
2040 #define MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2041                           MLX5_HASH_FIELD_SEL_DST_IP   |\
2042                           MLX5_HASH_FIELD_SEL_L4_SPORT |\
2043                           MLX5_HASH_FIELD_SEL_L4_DPORT)
2044
2045 #define MLX5_HASH_IP_IPSEC_SPI  (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2046                                  MLX5_HASH_FIELD_SEL_DST_IP   |\
2047                                  MLX5_HASH_FIELD_SEL_IPSEC_SPI)
2048
2049         if (priv->params.hw_lro_en) {
2050                 MLX5_SET(tirc, tirc, lro_enable_mask,
2051                     MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2052                     MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
2053                 MLX5_SET(tirc, tirc, lro_max_msg_sz,
2054                     (priv->params.lro_wqe_sz -
2055                     ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
2056                 /* TODO: add the option to choose timer value dynamically */
2057                 MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
2058                     MLX5_CAP_ETH(priv->mdev,
2059                     lro_timer_supported_periods[2]));
2060         }
2061
2062         /* setup parameters for hashing TIR type, if any */
2063         switch (tt) {
2064         case MLX5E_TT_ANY:
2065                 MLX5_SET(tirc, tirc, disp_type,
2066                     MLX5_TIRC_DISP_TYPE_DIRECT);
2067                 MLX5_SET(tirc, tirc, inline_rqn,
2068                     priv->channel[0]->rq.rqn);
2069                 break;
2070         default:
2071                 MLX5_SET(tirc, tirc, disp_type,
2072                     MLX5_TIRC_DISP_TYPE_INDIRECT);
2073                 MLX5_SET(tirc, tirc, indirect_table,
2074                     priv->rqtn);
2075                 MLX5_SET(tirc, tirc, rx_hash_fn,
2076                     MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2077                 hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2078 #ifdef RSS
2079                 /*
2080                  * The FreeBSD RSS implementation does currently not
2081                  * support symmetric Toeplitz hashes:
2082                  */
2083                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2084                 rss_getkey((uint8_t *)hkey);
2085 #else
2086                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2087                 hkey[0] = cpu_to_be32(0xD181C62C);
2088                 hkey[1] = cpu_to_be32(0xF7F4DB5B);
2089                 hkey[2] = cpu_to_be32(0x1983A2FC);
2090                 hkey[3] = cpu_to_be32(0x943E1ADB);
2091                 hkey[4] = cpu_to_be32(0xD9389E6B);
2092                 hkey[5] = cpu_to_be32(0xD1039C2C);
2093                 hkey[6] = cpu_to_be32(0xA74499AD);
2094                 hkey[7] = cpu_to_be32(0x593D56D9);
2095                 hkey[8] = cpu_to_be32(0xF3253C06);
2096                 hkey[9] = cpu_to_be32(0x2ADC1FFC);
2097 #endif
2098                 break;
2099         }
2100
2101         switch (tt) {
2102         case MLX5E_TT_IPV4_TCP:
2103                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2104                     MLX5_L3_PROT_TYPE_IPV4);
2105                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2106                     MLX5_L4_PROT_TYPE_TCP);
2107 #ifdef RSS
2108                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2109                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2110                             MLX5_HASH_IP);
2111                 } else
2112 #endif
2113                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2114                     MLX5_HASH_ALL);
2115                 break;
2116
2117         case MLX5E_TT_IPV6_TCP:
2118                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2119                     MLX5_L3_PROT_TYPE_IPV6);
2120                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2121                     MLX5_L4_PROT_TYPE_TCP);
2122 #ifdef RSS
2123                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2124                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2125                             MLX5_HASH_IP);
2126                 } else
2127 #endif
2128                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2129                     MLX5_HASH_ALL);
2130                 break;
2131
2132         case MLX5E_TT_IPV4_UDP:
2133                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2134                     MLX5_L3_PROT_TYPE_IPV4);
2135                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2136                     MLX5_L4_PROT_TYPE_UDP);
2137 #ifdef RSS
2138                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2139                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2140                             MLX5_HASH_IP);
2141                 } else
2142 #endif
2143                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2144                     MLX5_HASH_ALL);
2145                 break;
2146
2147         case MLX5E_TT_IPV6_UDP:
2148                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2149                     MLX5_L3_PROT_TYPE_IPV6);
2150                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2151                     MLX5_L4_PROT_TYPE_UDP);
2152 #ifdef RSS
2153                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2154                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2155                             MLX5_HASH_IP);
2156                 } else
2157 #endif
2158                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2159                     MLX5_HASH_ALL);
2160                 break;
2161
2162         case MLX5E_TT_IPV4_IPSEC_AH:
2163                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2164                     MLX5_L3_PROT_TYPE_IPV4);
2165                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2166                     MLX5_HASH_IP_IPSEC_SPI);
2167                 break;
2168
2169         case MLX5E_TT_IPV6_IPSEC_AH:
2170                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2171                     MLX5_L3_PROT_TYPE_IPV6);
2172                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2173                     MLX5_HASH_IP_IPSEC_SPI);
2174                 break;
2175
2176         case MLX5E_TT_IPV4_IPSEC_ESP:
2177                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2178                     MLX5_L3_PROT_TYPE_IPV4);
2179                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2180                     MLX5_HASH_IP_IPSEC_SPI);
2181                 break;
2182
2183         case MLX5E_TT_IPV6_IPSEC_ESP:
2184                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2185                     MLX5_L3_PROT_TYPE_IPV6);
2186                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2187                     MLX5_HASH_IP_IPSEC_SPI);
2188                 break;
2189
2190         case MLX5E_TT_IPV4:
2191                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2192                     MLX5_L3_PROT_TYPE_IPV4);
2193                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2194                     MLX5_HASH_IP);
2195                 break;
2196
2197         case MLX5E_TT_IPV6:
2198                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2199                     MLX5_L3_PROT_TYPE_IPV6);
2200                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2201                     MLX5_HASH_IP);
2202                 break;
2203
2204         default:
2205                 break;
2206         }
2207 }
2208
2209 static int
2210 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2211 {
2212         struct mlx5_core_dev *mdev = priv->mdev;
2213         u32 *in;
2214         void *tirc;
2215         int inlen;
2216         int err;
2217
2218         inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2219         in = mlx5_vzalloc(inlen);
2220         if (in == NULL)
2221                 return (-ENOMEM);
2222         tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2223
2224         mlx5e_build_tir_ctx(priv, tirc, tt);
2225
2226         err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2227
2228         kvfree(in);
2229
2230         return (err);
2231 }
2232
2233 static void
2234 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2235 {
2236         mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2237 }
2238
2239 static int
2240 mlx5e_open_tirs(struct mlx5e_priv *priv)
2241 {
2242         int err;
2243         int i;
2244
2245         for (i = 0; i < MLX5E_NUM_TT; i++) {
2246                 err = mlx5e_open_tir(priv, i);
2247                 if (err)
2248                         goto err_close_tirs;
2249         }
2250
2251         return (0);
2252
2253 err_close_tirs:
2254         for (i--; i >= 0; i--)
2255                 mlx5e_close_tir(priv, i);
2256
2257         return (err);
2258 }
2259
2260 static void
2261 mlx5e_close_tirs(struct mlx5e_priv *priv)
2262 {
2263         int i;
2264
2265         for (i = 0; i < MLX5E_NUM_TT; i++)
2266                 mlx5e_close_tir(priv, i);
2267 }
2268
2269 /*
2270  * SW MTU does not include headers,
2271  * HW MTU includes all headers and checksums.
2272  */
2273 static int
2274 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2275 {
2276         struct mlx5e_priv *priv = ifp->if_softc;
2277         struct mlx5_core_dev *mdev = priv->mdev;
2278         int hw_mtu;
2279         int err;
2280
2281         err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(sw_mtu));
2282         if (err) {
2283                 if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2284                     __func__, sw_mtu, err);
2285                 return (err);
2286         }
2287         err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2288         if (err) {
2289                 if_printf(ifp, "Query port MTU, after setting new "
2290                     "MTU value, failed\n");
2291         } else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
2292                 err = -E2BIG,
2293                 if_printf(ifp, "Port MTU %d is smaller than "
2294                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2295         } else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
2296                 err = -EINVAL;
2297                 if_printf(ifp, "Port MTU %d is bigger than "
2298                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2299         }
2300         ifp->if_mtu = sw_mtu;
2301         return (err);
2302 }
2303
2304 int
2305 mlx5e_open_locked(struct ifnet *ifp)
2306 {
2307         struct mlx5e_priv *priv = ifp->if_softc;
2308         int err;
2309         u16 set_id;
2310
2311         /* check if already opened */
2312         if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2313                 return (0);
2314
2315 #ifdef RSS
2316         if (rss_getnumbuckets() > priv->params.num_channels) {
2317                 if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2318                     "channels(%u) available\n", rss_getnumbuckets(),
2319                     priv->params.num_channels);
2320         }
2321 #endif
2322         err = mlx5e_open_tises(priv);
2323         if (err) {
2324                 if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2325                     __func__, err);
2326                 return (err);
2327         }
2328         err = mlx5_vport_alloc_q_counter(priv->mdev,
2329             MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
2330         if (err) {
2331                 if_printf(priv->ifp,
2332                     "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2333                     __func__, err);
2334                 goto err_close_tises;
2335         }
2336         /* store counter set ID */
2337         priv->counter_set_id = set_id;
2338
2339         err = mlx5e_open_channels(priv);
2340         if (err) {
2341                 if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2342                     __func__, err);
2343                 goto err_dalloc_q_counter;
2344         }
2345         err = mlx5e_open_rqt(priv);
2346         if (err) {
2347                 if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2348                     __func__, err);
2349                 goto err_close_channels;
2350         }
2351         err = mlx5e_open_tirs(priv);
2352         if (err) {
2353                 if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2354                     __func__, err);
2355                 goto err_close_rqls;
2356         }
2357         err = mlx5e_open_flow_table(priv);
2358         if (err) {
2359                 if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2360                     __func__, err);
2361                 goto err_close_tirs;
2362         }
2363         err = mlx5e_add_all_vlan_rules(priv);
2364         if (err) {
2365                 if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2366                     __func__, err);
2367                 goto err_close_flow_table;
2368         }
2369         set_bit(MLX5E_STATE_OPENED, &priv->state);
2370
2371         mlx5e_update_carrier(priv);
2372         mlx5e_set_rx_mode_core(priv);
2373
2374         return (0);
2375
2376 err_close_flow_table:
2377         mlx5e_close_flow_table(priv);
2378
2379 err_close_tirs:
2380         mlx5e_close_tirs(priv);
2381
2382 err_close_rqls:
2383         mlx5e_close_rqt(priv);
2384
2385 err_close_channels:
2386         mlx5e_close_channels(priv);
2387
2388 err_dalloc_q_counter:
2389         mlx5_vport_dealloc_q_counter(priv->mdev,
2390             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2391
2392 err_close_tises:
2393         mlx5e_close_tises(priv);
2394
2395         return (err);
2396 }
2397
2398 static void
2399 mlx5e_open(void *arg)
2400 {
2401         struct mlx5e_priv *priv = arg;
2402
2403         PRIV_LOCK(priv);
2404         if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2405                 if_printf(priv->ifp,
2406                     "%s: Setting port status to up failed\n",
2407                     __func__);
2408
2409         mlx5e_open_locked(priv->ifp);
2410         priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2411         PRIV_UNLOCK(priv);
2412 }
2413
2414 int
2415 mlx5e_close_locked(struct ifnet *ifp)
2416 {
2417         struct mlx5e_priv *priv = ifp->if_softc;
2418
2419         /* check if already closed */
2420         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2421                 return (0);
2422
2423         clear_bit(MLX5E_STATE_OPENED, &priv->state);
2424
2425         mlx5e_set_rx_mode_core(priv);
2426         mlx5e_del_all_vlan_rules(priv);
2427         if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2428         mlx5e_close_flow_table(priv);
2429         mlx5e_close_tirs(priv);
2430         mlx5e_close_rqt(priv);
2431         mlx5e_close_channels(priv);
2432         mlx5_vport_dealloc_q_counter(priv->mdev,
2433             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2434         mlx5e_close_tises(priv);
2435
2436         return (0);
2437 }
2438
2439 #if (__FreeBSD_version >= 1100000)
2440 static uint64_t
2441 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2442 {
2443         struct mlx5e_priv *priv = ifp->if_softc;
2444         u64 retval;
2445
2446         /* PRIV_LOCK(priv); XXX not allowed */
2447         switch (cnt) {
2448         case IFCOUNTER_IPACKETS:
2449                 retval = priv->stats.vport.rx_packets;
2450                 break;
2451         case IFCOUNTER_IERRORS:
2452                 retval = priv->stats.vport.rx_error_packets;
2453                 break;
2454         case IFCOUNTER_IQDROPS:
2455                 retval = priv->stats.vport.rx_out_of_buffer;
2456                 break;
2457         case IFCOUNTER_OPACKETS:
2458                 retval = priv->stats.vport.tx_packets;
2459                 break;
2460         case IFCOUNTER_OERRORS:
2461                 retval = priv->stats.vport.tx_error_packets;
2462                 break;
2463         case IFCOUNTER_IBYTES:
2464                 retval = priv->stats.vport.rx_bytes;
2465                 break;
2466         case IFCOUNTER_OBYTES:
2467                 retval = priv->stats.vport.tx_bytes;
2468                 break;
2469         case IFCOUNTER_IMCASTS:
2470                 retval = priv->stats.vport.rx_multicast_packets;
2471                 break;
2472         case IFCOUNTER_OMCASTS:
2473                 retval = priv->stats.vport.tx_multicast_packets;
2474                 break;
2475         case IFCOUNTER_OQDROPS:
2476                 retval = priv->stats.vport.tx_queue_dropped;
2477                 break;
2478         default:
2479                 retval = if_get_counter_default(ifp, cnt);
2480                 break;
2481         }
2482         /* PRIV_UNLOCK(priv); XXX not allowed */
2483         return (retval);
2484 }
2485 #endif
2486
2487 static void
2488 mlx5e_set_rx_mode(struct ifnet *ifp)
2489 {
2490         struct mlx5e_priv *priv = ifp->if_softc;
2491
2492         schedule_work(&priv->set_rx_mode_work);
2493 }
2494
2495 static int
2496 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2497 {
2498         struct mlx5e_priv *priv;
2499         struct ifreq *ifr;
2500         struct ifi2creq i2c;
2501         int error = 0;
2502         int mask = 0;
2503         int size_read = 0;
2504         int module_num;
2505         int max_mtu;
2506         uint8_t read_addr;
2507
2508         priv = ifp->if_softc;
2509
2510         /* check if detaching */
2511         if (priv == NULL || priv->gone != 0)
2512                 return (ENXIO);
2513
2514         switch (command) {
2515         case SIOCSIFMTU:
2516                 ifr = (struct ifreq *)data;
2517
2518                 PRIV_LOCK(priv);
2519                 mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2520
2521                 if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2522                     ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2523                         int was_opened;
2524
2525                         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2526                         if (was_opened)
2527                                 mlx5e_close_locked(ifp);
2528
2529                         /* set new MTU */
2530                         mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2531
2532                         if (was_opened)
2533                                 mlx5e_open_locked(ifp);
2534                 } else {
2535                         error = EINVAL;
2536                         if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2537                             MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2538                 }
2539                 PRIV_UNLOCK(priv);
2540                 break;
2541         case SIOCSIFFLAGS:
2542                 if ((ifp->if_flags & IFF_UP) &&
2543                     (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2544                         mlx5e_set_rx_mode(ifp);
2545                         break;
2546                 }
2547                 PRIV_LOCK(priv);
2548                 if (ifp->if_flags & IFF_UP) {
2549                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2550                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2551                                         mlx5e_open_locked(ifp);
2552                                 ifp->if_drv_flags |= IFF_DRV_RUNNING;
2553                                 mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2554                         }
2555                 } else {
2556                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2557                                 mlx5_set_port_status(priv->mdev,
2558                                     MLX5_PORT_DOWN);
2559                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2560                                         mlx5e_close_locked(ifp);
2561                                 mlx5e_update_carrier(priv);
2562                                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2563                         }
2564                 }
2565                 PRIV_UNLOCK(priv);
2566                 break;
2567         case SIOCADDMULTI:
2568         case SIOCDELMULTI:
2569                 mlx5e_set_rx_mode(ifp);
2570                 break;
2571         case SIOCSIFMEDIA:
2572         case SIOCGIFMEDIA:
2573         case SIOCGIFXMEDIA:
2574                 ifr = (struct ifreq *)data;
2575                 error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2576                 break;
2577         case SIOCSIFCAP:
2578                 ifr = (struct ifreq *)data;
2579                 PRIV_LOCK(priv);
2580                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2581
2582                 if (mask & IFCAP_TXCSUM) {
2583                         ifp->if_capenable ^= IFCAP_TXCSUM;
2584                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2585
2586                         if (IFCAP_TSO4 & ifp->if_capenable &&
2587                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2588                                 ifp->if_capenable &= ~IFCAP_TSO4;
2589                                 ifp->if_hwassist &= ~CSUM_IP_TSO;
2590                                 if_printf(ifp,
2591                                     "tso4 disabled due to -txcsum.\n");
2592                         }
2593                 }
2594                 if (mask & IFCAP_TXCSUM_IPV6) {
2595                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2596                         ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2597
2598                         if (IFCAP_TSO6 & ifp->if_capenable &&
2599                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2600                                 ifp->if_capenable &= ~IFCAP_TSO6;
2601                                 ifp->if_hwassist &= ~CSUM_IP6_TSO;
2602                                 if_printf(ifp,
2603                                     "tso6 disabled due to -txcsum6.\n");
2604                         }
2605                 }
2606                 if (mask & IFCAP_RXCSUM)
2607                         ifp->if_capenable ^= IFCAP_RXCSUM;
2608                 if (mask & IFCAP_RXCSUM_IPV6)
2609                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2610                 if (mask & IFCAP_TSO4) {
2611                         if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2612                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2613                                 if_printf(ifp, "enable txcsum first.\n");
2614                                 error = EAGAIN;
2615                                 goto out;
2616                         }
2617                         ifp->if_capenable ^= IFCAP_TSO4;
2618                         ifp->if_hwassist ^= CSUM_IP_TSO;
2619                 }
2620                 if (mask & IFCAP_TSO6) {
2621                         if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2622                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2623                                 if_printf(ifp, "enable txcsum6 first.\n");
2624                                 error = EAGAIN;
2625                                 goto out;
2626                         }
2627                         ifp->if_capenable ^= IFCAP_TSO6;
2628                         ifp->if_hwassist ^= CSUM_IP6_TSO;
2629                 }
2630                 if (mask & IFCAP_VLAN_HWFILTER) {
2631                         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2632                                 mlx5e_disable_vlan_filter(priv);
2633                         else
2634                                 mlx5e_enable_vlan_filter(priv);
2635
2636                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2637                 }
2638                 if (mask & IFCAP_VLAN_HWTAGGING)
2639                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2640                 if (mask & IFCAP_WOL_MAGIC)
2641                         ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2642
2643                 VLAN_CAPABILITIES(ifp);
2644                 /* turn off LRO means also turn of HW LRO - if it's on */
2645                 if (mask & IFCAP_LRO) {
2646                         int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2647                         bool need_restart = false;
2648
2649                         ifp->if_capenable ^= IFCAP_LRO;
2650                         if (!(ifp->if_capenable & IFCAP_LRO)) {
2651                                 if (priv->params.hw_lro_en) {
2652                                         priv->params.hw_lro_en = false;
2653                                         need_restart = true;
2654                                         /* Not sure this is the correct way */
2655                                         priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2656                                 }
2657                         }
2658                         if (was_opened && need_restart) {
2659                                 mlx5e_close_locked(ifp);
2660                                 mlx5e_open_locked(ifp);
2661                         }
2662                 }
2663 out:
2664                 PRIV_UNLOCK(priv);
2665                 break;
2666
2667         case SIOCGI2C:
2668                 ifr = (struct ifreq *)data;
2669
2670                 /*
2671                  * Copy from the user-space address ifr_data to the
2672                  * kernel-space address i2c
2673                  */
2674                 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
2675                 if (error)
2676                         break;
2677
2678                 if (i2c.len > sizeof(i2c.data)) {
2679                         error = EINVAL;
2680                         break;
2681                 }
2682
2683                 PRIV_LOCK(priv);
2684                 /* Get module_num which is required for the query_eeprom */
2685                 error = mlx5_query_module_num(priv->mdev, &module_num);
2686                 if (error) {
2687                         if_printf(ifp, "Query module num failed, eeprom "
2688                             "reading is not supported\n");
2689                         error = EINVAL;
2690                         goto err_i2c;
2691                 }
2692                 /* Check if module is present before doing an access */
2693                 if (mlx5_query_module_status(priv->mdev, module_num) !=
2694                     MLX5_MODULE_STATUS_PLUGGED) {
2695                         error = EINVAL;
2696                         goto err_i2c;
2697                 }
2698                 /*
2699                  * Currently 0XA0 and 0xA2 are the only addresses permitted.
2700                  * The internal conversion is as follows:
2701                  */
2702                 if (i2c.dev_addr == 0xA0)
2703                         read_addr = MLX5E_I2C_ADDR_LOW;
2704                 else if (i2c.dev_addr == 0xA2)
2705                         read_addr = MLX5E_I2C_ADDR_HIGH;
2706                 else {
2707                         if_printf(ifp, "Query eeprom failed, "
2708                             "Invalid Address: %X\n", i2c.dev_addr);
2709                         error = EINVAL;
2710                         goto err_i2c;
2711                 }
2712                 error = mlx5_query_eeprom(priv->mdev,
2713                     read_addr, MLX5E_EEPROM_LOW_PAGE,
2714                     (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2715                     (uint32_t *)i2c.data, &size_read);
2716                 if (error) {
2717                         if_printf(ifp, "Query eeprom failed, eeprom "
2718                             "reading is not supported\n");
2719                         error = EINVAL;
2720                         goto err_i2c;
2721                 }
2722
2723                 if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2724                         error = mlx5_query_eeprom(priv->mdev,
2725                             read_addr, MLX5E_EEPROM_LOW_PAGE,
2726                             (uint32_t)(i2c.offset + size_read),
2727                             (uint32_t)(i2c.len - size_read), module_num,
2728                             (uint32_t *)(i2c.data + size_read), &size_read);
2729                 }
2730                 if (error) {
2731                         if_printf(ifp, "Query eeprom failed, eeprom "
2732                             "reading is not supported\n");
2733                         error = EINVAL;
2734                         goto err_i2c;
2735                 }
2736
2737                 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
2738 err_i2c:
2739                 PRIV_UNLOCK(priv);
2740                 break;
2741
2742         default:
2743                 error = ether_ioctl(ifp, command, data);
2744                 break;
2745         }
2746         return (error);
2747 }
2748
2749 static int
2750 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2751 {
2752         /*
2753          * TODO: uncoment once FW really sets all these bits if
2754          * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2755          * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2756          * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2757          * -ENOTSUPP;
2758          */
2759
2760         /* TODO: add more must-to-have features */
2761
2762         if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
2763                 return (-ENODEV);
2764
2765         return (0);
2766 }
2767
2768 static void
2769 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
2770     struct mlx5e_priv *priv,
2771     int num_comp_vectors)
2772 {
2773         /*
2774          * TODO: Consider link speed for setting "log_sq_size",
2775          * "log_rq_size" and "cq_moderation_xxx":
2776          */
2777         priv->params.log_sq_size =
2778             MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
2779         priv->params.log_rq_size =
2780             MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
2781         priv->params.rx_cq_moderation_usec =
2782             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
2783             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
2784             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
2785         priv->params.rx_cq_moderation_mode =
2786             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
2787         priv->params.rx_cq_moderation_pkts =
2788             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
2789         priv->params.tx_cq_moderation_usec =
2790             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
2791         priv->params.tx_cq_moderation_pkts =
2792             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
2793         priv->params.min_rx_wqes =
2794             MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
2795         priv->params.rx_hash_log_tbl_sz =
2796             (order_base_2(num_comp_vectors) >
2797             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
2798             order_base_2(num_comp_vectors) :
2799             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
2800         priv->params.num_tc = 1;
2801         priv->params.default_vlan_prio = 0;
2802         priv->counter_set_id = -1;
2803
2804         /*
2805          * hw lro is currently defaulted to off. when it won't anymore we
2806          * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
2807          */
2808         priv->params.hw_lro_en = false;
2809         priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
2810
2811         priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
2812
2813         priv->mdev = mdev;
2814         priv->params.num_channels = num_comp_vectors;
2815         priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
2816         priv->queue_mapping_channel_mask =
2817             roundup_pow_of_two(num_comp_vectors) - 1;
2818         priv->num_tc = priv->params.num_tc;
2819         priv->default_vlan_prio = priv->params.default_vlan_prio;
2820
2821         INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
2822         INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
2823         INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
2824 }
2825
2826 static int
2827 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
2828     struct mlx5_core_mr *mr)
2829 {
2830         struct ifnet *ifp = priv->ifp;
2831         struct mlx5_core_dev *mdev = priv->mdev;
2832         struct mlx5_create_mkey_mbox_in *in;
2833         int err;
2834
2835         in = mlx5_vzalloc(sizeof(*in));
2836         if (in == NULL) {
2837                 if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
2838                 return (-ENOMEM);
2839         }
2840         in->seg.flags = MLX5_PERM_LOCAL_WRITE |
2841             MLX5_PERM_LOCAL_READ |
2842             MLX5_ACCESS_MODE_PA;
2843         in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
2844         in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
2845
2846         err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL,
2847             NULL);
2848         if (err)
2849                 if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
2850                     __func__, err);
2851
2852         kvfree(in);
2853
2854         return (err);
2855 }
2856
2857 static const char *mlx5e_vport_stats_desc[] = {
2858         MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
2859 };
2860
2861 static const char *mlx5e_pport_stats_desc[] = {
2862         MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
2863 };
2864
2865 static void
2866 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
2867 {
2868         mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
2869         sx_init(&priv->state_lock, "mlx5state");
2870         callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
2871         MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
2872 }
2873
2874 static void
2875 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
2876 {
2877         mtx_destroy(&priv->async_events_mtx);
2878         sx_destroy(&priv->state_lock);
2879 }
2880
2881 static int
2882 sysctl_firmware(SYSCTL_HANDLER_ARGS)
2883 {
2884         /*
2885          * %d.%d%.d the string format.
2886          * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
2887          * We need at most 5 chars to store that.
2888          * It also has: two "." and NULL at the end, which means we need 18
2889          * (5*3 + 3) chars at most.
2890          */
2891         char fw[18];
2892         struct mlx5e_priv *priv = arg1;
2893         int error;
2894
2895         snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
2896             fw_rev_sub(priv->mdev));
2897         error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
2898         return (error);
2899 }
2900
2901 static void
2902 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
2903 {
2904         SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2905             OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
2906             sysctl_firmware, "A", "HCA firmware version");
2907
2908         SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2909             OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
2910             "Board ID");
2911 }
2912
2913 static void
2914 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
2915 {
2916 #if (__FreeBSD_version < 1100000)
2917         char path[64];
2918
2919 #endif
2920         /* Only receiving pauseframes is enabled by default */
2921         priv->params.tx_pauseframe_control = 0;
2922         priv->params.rx_pauseframe_control = 1;
2923
2924 #if (__FreeBSD_version < 1100000)
2925         /* compute path for sysctl */
2926         snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
2927             device_get_unit(priv->mdev->pdev->dev.bsddev));
2928
2929         /* try to fetch tunable, if any */
2930         TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
2931
2932         /* compute path for sysctl */
2933         snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
2934             device_get_unit(priv->mdev->pdev->dev.bsddev));
2935
2936         /* try to fetch tunable, if any */
2937         TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
2938 #endif
2939
2940         /* register pausframe SYSCTLs */
2941         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2942             OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
2943             &priv->params.tx_pauseframe_control, 0,
2944             "Set to enable TX pause frames. Clear to disable.");
2945
2946         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2947             OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
2948             &priv->params.rx_pauseframe_control, 0,
2949             "Set to enable RX pause frames. Clear to disable.");
2950
2951         /* range check */
2952         priv->params.tx_pauseframe_control =
2953             priv->params.tx_pauseframe_control ? 1 : 0;
2954         priv->params.rx_pauseframe_control =
2955             priv->params.rx_pauseframe_control ? 1 : 0;
2956
2957         /* update firmware */
2958         mlx5_set_port_pause(priv->mdev, 1,
2959             priv->params.rx_pauseframe_control,
2960             priv->params.tx_pauseframe_control);
2961 }
2962
2963 static void *
2964 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
2965 {
2966         static volatile int mlx5_en_unit;
2967         struct ifnet *ifp;
2968         struct mlx5e_priv *priv;
2969         u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
2970         struct sysctl_oid_list *child;
2971         int ncv = mdev->priv.eq_table.num_comp_vectors;
2972         char unit[16];
2973         int err;
2974         int i;
2975         u32 eth_proto_cap;
2976
2977         if (mlx5e_check_required_hca_cap(mdev)) {
2978                 mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
2979                 return (NULL);
2980         }
2981         priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
2982         mlx5e_priv_mtx_init(priv);
2983
2984         ifp = priv->ifp = if_alloc(IFT_ETHER);
2985         if (ifp == NULL) {
2986                 mlx5_core_err(mdev, "if_alloc() failed\n");
2987                 goto err_free_priv;
2988         }
2989         ifp->if_softc = priv;
2990         if_initname(ifp, "mce", atomic_fetchadd_int(&mlx5_en_unit, 1));
2991         ifp->if_mtu = ETHERMTU;
2992         ifp->if_init = mlx5e_open;
2993         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2994         ifp->if_ioctl = mlx5e_ioctl;
2995         ifp->if_transmit = mlx5e_xmit;
2996         ifp->if_qflush = if_qflush;
2997 #if (__FreeBSD_version >= 1100000)
2998         ifp->if_get_counter = mlx5e_get_counter;
2999 #endif
3000         ifp->if_snd.ifq_maxlen = ifqmaxlen;
3001         /*
3002          * Set driver features
3003          */
3004         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
3005         ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
3006         ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
3007         ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
3008         ifp->if_capabilities |= IFCAP_LRO;
3009         ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
3010         ifp->if_capabilities |= IFCAP_HWSTATS;
3011
3012         /* set TSO limits so that we don't have to drop TX packets */
3013         ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3014         ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
3015         ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
3016
3017         ifp->if_capenable = ifp->if_capabilities;
3018         ifp->if_hwassist = 0;
3019         if (ifp->if_capenable & IFCAP_TSO)
3020                 ifp->if_hwassist |= CSUM_TSO;
3021         if (ifp->if_capenable & IFCAP_TXCSUM)
3022                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
3023         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3024                 ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
3025
3026         /* ifnet sysctl tree */
3027         sysctl_ctx_init(&priv->sysctl_ctx);
3028         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
3029             OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
3030         if (priv->sysctl_ifnet == NULL) {
3031                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3032                 goto err_free_sysctl;
3033         }
3034         snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
3035         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3036             OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
3037         if (priv->sysctl_ifnet == NULL) {
3038                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3039                 goto err_free_sysctl;
3040         }
3041
3042         /* HW sysctl tree */
3043         child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
3044         priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
3045             OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
3046         if (priv->sysctl_hw == NULL) {
3047                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3048                 goto err_free_sysctl;
3049         }
3050         mlx5e_build_ifp_priv(mdev, priv, ncv);
3051         err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
3052         if (err) {
3053                 if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
3054                     __func__, err);
3055                 goto err_free_sysctl;
3056         }
3057         err = mlx5_core_alloc_pd(mdev, &priv->pdn);
3058         if (err) {
3059                 if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
3060                     __func__, err);
3061                 goto err_unmap_free_uar;
3062         }
3063         err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
3064         if (err) {
3065                 if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
3066                     __func__, err);
3067                 goto err_dealloc_pd;
3068         }
3069         err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
3070         if (err) {
3071                 if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
3072                     __func__, err);
3073                 goto err_dealloc_transport_domain;
3074         }
3075         mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
3076
3077         /* check if we should generate a random MAC address */
3078         if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3079             is_zero_ether_addr(dev_addr)) {
3080                 random_ether_addr(dev_addr);
3081                 if_printf(ifp, "Assigned random MAC address\n");
3082         }
3083
3084         /* set default MTU */
3085         mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3086
3087         /* Set desc */
3088         device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
3089
3090         /* Set default media status */
3091         priv->media_status_last = IFM_AVALID;
3092         priv->media_active_last = IFM_ETHER | IFM_AUTO |
3093             IFM_ETH_RXPAUSE | IFM_FDX;
3094
3095         /* setup default pauseframes configuration */
3096         mlx5e_setup_pauseframes(priv);
3097
3098         err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3099         if (err) {
3100                 eth_proto_cap = 0;
3101                 if_printf(ifp, "%s: Query port media capability failed, %d\n",
3102                     __func__, err);
3103         }
3104
3105         /* Setup supported medias */
3106         ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3107             mlx5e_media_change, mlx5e_media_status);
3108
3109         for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3110                 if (mlx5e_mode_table[i].baudrate == 0)
3111                         continue;
3112                 if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3113                         ifmedia_add(&priv->media,
3114                             mlx5e_mode_table[i].subtype |
3115                             IFM_ETHER, 0, NULL);
3116                         ifmedia_add(&priv->media,
3117                             mlx5e_mode_table[i].subtype |
3118                             IFM_ETHER | IFM_FDX |
3119                             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3120                 }
3121         }
3122
3123         ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3124         ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3125             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3126
3127         /* Set autoselect by default */
3128         ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3129             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3130         ether_ifattach(ifp, dev_addr);
3131
3132         /* Register for VLAN events */
3133         priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3134             mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3135         priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3136             mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3137
3138         /* Link is down by default */
3139         if_link_state_change(ifp, LINK_STATE_DOWN);
3140
3141         mlx5e_enable_async_events(priv);
3142
3143         mlx5e_add_hw_stats(priv);
3144
3145         mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3146             "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3147             priv->stats.vport.arg);
3148
3149         mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3150             "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3151             priv->stats.pport.arg);
3152
3153         mlx5e_create_ethtool(priv);
3154
3155         mtx_lock(&priv->async_events_mtx);
3156         mlx5e_update_stats(priv);
3157         mtx_unlock(&priv->async_events_mtx);
3158
3159         return (priv);
3160
3161 err_dealloc_transport_domain:
3162         mlx5_dealloc_transport_domain(mdev, priv->tdn);
3163
3164 err_dealloc_pd:
3165         mlx5_core_dealloc_pd(mdev, priv->pdn);
3166
3167 err_unmap_free_uar:
3168         mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3169
3170 err_free_sysctl:
3171         sysctl_ctx_free(&priv->sysctl_ctx);
3172
3173         if_free(ifp);
3174
3175 err_free_priv:
3176         mlx5e_priv_mtx_destroy(priv);
3177         free(priv, M_MLX5EN);
3178         return (NULL);
3179 }
3180
3181 static void
3182 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3183 {
3184         struct mlx5e_priv *priv = vpriv;
3185         struct ifnet *ifp = priv->ifp;
3186
3187         /* don't allow more IOCTLs */
3188         priv->gone = 1;
3189
3190         /*
3191          * Clear the device description to avoid use after free,
3192          * because the bsddev is not destroyed when this module is
3193          * unloaded:
3194          */
3195         device_set_desc(mdev->pdev->dev.bsddev, NULL);
3196
3197         /* XXX wait a bit to allow IOCTL handlers to complete */
3198         pause("W", hz);
3199
3200         /* stop watchdog timer */
3201         callout_drain(&priv->watchdog);
3202
3203         if (priv->vlan_attach != NULL)
3204                 EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3205         if (priv->vlan_detach != NULL)
3206                 EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3207
3208         /* make sure device gets closed */
3209         PRIV_LOCK(priv);
3210         mlx5e_close_locked(ifp);
3211         PRIV_UNLOCK(priv);
3212
3213         /* unregister device */
3214         ifmedia_removeall(&priv->media);
3215         ether_ifdetach(ifp);
3216         if_free(ifp);
3217
3218         /* destroy all remaining sysctl nodes */
3219         if (priv->sysctl_debug)
3220                 sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3221         sysctl_ctx_free(&priv->stats.vport.ctx);
3222         sysctl_ctx_free(&priv->stats.pport.ctx);
3223         sysctl_ctx_free(&priv->sysctl_ctx);
3224
3225         mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3226         mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3227         mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3228         mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3229         mlx5e_disable_async_events(priv);
3230         flush_scheduled_work();
3231         mlx5e_priv_mtx_destroy(priv);
3232         free(priv, M_MLX5EN);
3233 }
3234
3235 static void *
3236 mlx5e_get_ifp(void *vpriv)
3237 {
3238         struct mlx5e_priv *priv = vpriv;
3239
3240         return (priv->ifp);
3241 }
3242
3243 static struct mlx5_interface mlx5e_interface = {
3244         .add = mlx5e_create_ifp,
3245         .remove = mlx5e_destroy_ifp,
3246         .event = mlx5e_async_event,
3247         .protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3248         .get_dev = mlx5e_get_ifp,
3249 };
3250
3251 void
3252 mlx5e_init(void)
3253 {
3254         mlx5_register_interface(&mlx5e_interface);
3255 }
3256
3257 void
3258 mlx5e_cleanup(void)
3259 {
3260         mlx5_unregister_interface(&mlx5e_interface);
3261 }
3262
3263 module_init_order(mlx5e_init, SI_ORDER_THIRD);
3264 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3265
3266 #if (__FreeBSD_version >= 1100000)
3267 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3268 #endif
3269 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3270 MODULE_VERSION(mlx5en, 1);