]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/mlx5/mlx5_en/mlx5_en_main.c
MFC r312528:
[FreeBSD/stable/10.git] / sys / dev / mlx5 / mlx5_en / mlx5_en_main.c
1 /*-
2  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27
28 #include "en.h"
29
30 #include <sys/sockio.h>
31 #include <machine/atomic.h>
32
33 #define ETH_DRIVER_VERSION      "3.1.0-dev"
34 char mlx5e_version[] = "Mellanox Ethernet driver"
35     " (" ETH_DRIVER_VERSION ")";
36
37 struct mlx5e_channel_param {
38         struct mlx5e_rq_param rq;
39         struct mlx5e_sq_param sq;
40         struct mlx5e_cq_param rx_cq;
41         struct mlx5e_cq_param tx_cq;
42 };
43
44 static const struct {
45         u32     subtype;
46         u64     baudrate;
47 }       mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
48
49         [MLX5E_1000BASE_CX_SGMII] = {
50                 .subtype = IFM_1000_CX_SGMII,
51                 .baudrate = IF_Mbps(1000ULL),
52         },
53         [MLX5E_1000BASE_KX] = {
54                 .subtype = IFM_1000_KX,
55                 .baudrate = IF_Mbps(1000ULL),
56         },
57         [MLX5E_10GBASE_CX4] = {
58                 .subtype = IFM_10G_CX4,
59                 .baudrate = IF_Gbps(10ULL),
60         },
61         [MLX5E_10GBASE_KX4] = {
62                 .subtype = IFM_10G_KX4,
63                 .baudrate = IF_Gbps(10ULL),
64         },
65         [MLX5E_10GBASE_KR] = {
66                 .subtype = IFM_10G_KR,
67                 .baudrate = IF_Gbps(10ULL),
68         },
69         [MLX5E_20GBASE_KR2] = {
70                 .subtype = IFM_20G_KR2,
71                 .baudrate = IF_Gbps(20ULL),
72         },
73         [MLX5E_40GBASE_CR4] = {
74                 .subtype = IFM_40G_CR4,
75                 .baudrate = IF_Gbps(40ULL),
76         },
77         [MLX5E_40GBASE_KR4] = {
78                 .subtype = IFM_40G_KR4,
79                 .baudrate = IF_Gbps(40ULL),
80         },
81         [MLX5E_56GBASE_R4] = {
82                 .subtype = IFM_56G_R4,
83                 .baudrate = IF_Gbps(56ULL),
84         },
85         [MLX5E_10GBASE_CR] = {
86                 .subtype = IFM_10G_CR1,
87                 .baudrate = IF_Gbps(10ULL),
88         },
89         [MLX5E_10GBASE_SR] = {
90                 .subtype = IFM_10G_SR,
91                 .baudrate = IF_Gbps(10ULL),
92         },
93         [MLX5E_10GBASE_LR] = {
94                 .subtype = IFM_10G_LR,
95                 .baudrate = IF_Gbps(10ULL),
96         },
97         [MLX5E_40GBASE_SR4] = {
98                 .subtype = IFM_40G_SR4,
99                 .baudrate = IF_Gbps(40ULL),
100         },
101         [MLX5E_40GBASE_LR4] = {
102                 .subtype = IFM_40G_LR4,
103                 .baudrate = IF_Gbps(40ULL),
104         },
105         [MLX5E_100GBASE_CR4] = {
106                 .subtype = IFM_100G_CR4,
107                 .baudrate = IF_Gbps(100ULL),
108         },
109         [MLX5E_100GBASE_SR4] = {
110                 .subtype = IFM_100G_SR4,
111                 .baudrate = IF_Gbps(100ULL),
112         },
113         [MLX5E_100GBASE_KR4] = {
114                 .subtype = IFM_100G_KR4,
115                 .baudrate = IF_Gbps(100ULL),
116         },
117         [MLX5E_100GBASE_LR4] = {
118                 .subtype = IFM_100G_LR4,
119                 .baudrate = IF_Gbps(100ULL),
120         },
121         [MLX5E_100BASE_TX] = {
122                 .subtype = IFM_100_TX,
123                 .baudrate = IF_Mbps(100ULL),
124         },
125         [MLX5E_100BASE_T] = {
126                 .subtype = IFM_100_T,
127                 .baudrate = IF_Mbps(100ULL),
128         },
129         [MLX5E_10GBASE_T] = {
130                 .subtype = IFM_10G_T,
131                 .baudrate = IF_Gbps(10ULL),
132         },
133         [MLX5E_25GBASE_CR] = {
134                 .subtype = IFM_25G_CR,
135                 .baudrate = IF_Gbps(25ULL),
136         },
137         [MLX5E_25GBASE_KR] = {
138                 .subtype = IFM_25G_KR,
139                 .baudrate = IF_Gbps(25ULL),
140         },
141         [MLX5E_25GBASE_SR] = {
142                 .subtype = IFM_25G_SR,
143                 .baudrate = IF_Gbps(25ULL),
144         },
145         [MLX5E_50GBASE_CR2] = {
146                 .subtype = IFM_50G_CR2,
147                 .baudrate = IF_Gbps(50ULL),
148         },
149         [MLX5E_50GBASE_KR2] = {
150                 .subtype = IFM_50G_KR2,
151                 .baudrate = IF_Gbps(50ULL),
152         },
153 };
154
155 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
156
157 static void
158 mlx5e_update_carrier(struct mlx5e_priv *priv)
159 {
160         struct mlx5_core_dev *mdev = priv->mdev;
161         u32 out[MLX5_ST_SZ_DW(ptys_reg)];
162         u32 eth_proto_oper;
163         int error;
164         u8 port_state;
165         u8 i;
166
167         port_state = mlx5_query_vport_state(mdev,
168             MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
169
170         if (port_state == VPORT_STATE_UP) {
171                 priv->media_status_last |= IFM_ACTIVE;
172         } else {
173                 priv->media_status_last &= ~IFM_ACTIVE;
174                 priv->media_active_last = IFM_ETHER;
175                 if_link_state_change(priv->ifp, LINK_STATE_DOWN);
176                 return;
177         }
178
179         error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN);
180         if (error) {
181                 priv->media_active_last = IFM_ETHER;
182                 priv->ifp->if_baudrate = 1;
183                 if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
184                     __func__, error);
185                 return;
186         }
187         eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
188
189         for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
190                 if (mlx5e_mode_table[i].baudrate == 0)
191                         continue;
192                 if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
193                         priv->ifp->if_baudrate =
194                             mlx5e_mode_table[i].baudrate;
195                         priv->media_active_last =
196                             mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
197                 }
198         }
199         if_link_state_change(priv->ifp, LINK_STATE_UP);
200 }
201
202 static void
203 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
204 {
205         struct mlx5e_priv *priv = dev->if_softc;
206
207         ifmr->ifm_status = priv->media_status_last;
208         ifmr->ifm_active = priv->media_active_last |
209             (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
210             (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
211
212 }
213
214 static u32
215 mlx5e_find_link_mode(u32 subtype)
216 {
217         u32 i;
218         u32 link_mode = 0;
219
220         for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
221                 if (mlx5e_mode_table[i].baudrate == 0)
222                         continue;
223                 if (mlx5e_mode_table[i].subtype == subtype)
224                         link_mode |= MLX5E_PROT_MASK(i);
225         }
226
227         return (link_mode);
228 }
229
230 static int
231 mlx5e_media_change(struct ifnet *dev)
232 {
233         struct mlx5e_priv *priv = dev->if_softc;
234         struct mlx5_core_dev *mdev = priv->mdev;
235         u32 eth_proto_cap;
236         u32 link_mode;
237         int was_opened;
238         int locked;
239         int error;
240
241         locked = PRIV_LOCKED(priv);
242         if (!locked)
243                 PRIV_LOCK(priv);
244
245         if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
246                 error = EINVAL;
247                 goto done;
248         }
249         link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
250
251         /* query supported capabilities */
252         error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
253         if (error != 0) {
254                 if_printf(dev, "Query port media capability failed\n");
255                 goto done;
256         }
257         /* check for autoselect */
258         if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
259                 link_mode = eth_proto_cap;
260                 if (link_mode == 0) {
261                         if_printf(dev, "Port media capability is zero\n");
262                         error = EINVAL;
263                         goto done;
264                 }
265         } else {
266                 link_mode = link_mode & eth_proto_cap;
267                 if (link_mode == 0) {
268                         if_printf(dev, "Not supported link mode requested\n");
269                         error = EINVAL;
270                         goto done;
271                 }
272         }
273         /* update pauseframe control bits */
274         priv->params.rx_pauseframe_control =
275             (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
276         priv->params.tx_pauseframe_control =
277             (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
278
279         /* check if device is opened */
280         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
281
282         /* reconfigure the hardware */
283         mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
284         mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
285         mlx5_set_port_pause(mdev, 1,
286             priv->params.rx_pauseframe_control,
287             priv->params.tx_pauseframe_control);
288         if (was_opened)
289                 mlx5_set_port_status(mdev, MLX5_PORT_UP);
290
291 done:
292         if (!locked)
293                 PRIV_UNLOCK(priv);
294         return (error);
295 }
296
297 static void
298 mlx5e_update_carrier_work(struct work_struct *work)
299 {
300         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
301             update_carrier_work);
302
303         PRIV_LOCK(priv);
304         if (test_bit(MLX5E_STATE_OPENED, &priv->state))
305                 mlx5e_update_carrier(priv);
306         PRIV_UNLOCK(priv);
307 }
308
309 /*
310  * This function reads the physical port counters from the firmware
311  * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
312  * macros. The output is converted from big-endian 64-bit values into
313  * host endian ones and stored in the "priv->stats.pport" structure.
314  */
315 static void
316 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
317 {
318         struct mlx5_core_dev *mdev = priv->mdev;
319         struct mlx5e_pport_stats *s = &priv->stats.pport;
320         struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
321         u32 *in;
322         u32 *out;
323         const u64 *ptr;
324         unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
325         unsigned x;
326         unsigned y;
327
328         /* allocate firmware request structures */
329         in = mlx5_vzalloc(sz);
330         out = mlx5_vzalloc(sz);
331         if (in == NULL || out == NULL)
332                 goto free_out;
333
334         /*
335          * Get pointer to the 64-bit counter set which is located at a
336          * fixed offset in the output firmware request structure:
337          */
338         ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
339
340         MLX5_SET(ppcnt_reg, in, local_port, 1);
341
342         /* read IEEE802_3 counter group using predefined counter layout */
343         MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
344         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
345         for (x = y = 0; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
346                 s->arg[y] = be64toh(ptr[x]);
347
348         /* read RFC2819 counter group using predefined counter layout */
349         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
350         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
351         for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
352                 s->arg[y] = be64toh(ptr[x]);
353         for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
354             MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
355                 s_debug->arg[y] = be64toh(ptr[x]);
356
357         /* read RFC2863 counter group using predefined counter layout */
358         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
359         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
360         for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
361                 s_debug->arg[y] = be64toh(ptr[x]);
362
363         /* read physical layer stats counter group using predefined counter layout */
364         MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
365         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
366         for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
367                 s_debug->arg[y] = be64toh(ptr[x]);
368 free_out:
369         /* free firmware request structures */
370         kvfree(in);
371         kvfree(out);
372 }
373
374 /*
375  * This function is called regularly to collect all statistics
376  * counters from the firmware. The values can be viewed through the
377  * sysctl interface. Execution is serialized using the priv's global
378  * configuration lock.
379  */
380 static void
381 mlx5e_update_stats_work(struct work_struct *work)
382 {
383         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
384             update_stats_work);
385         struct mlx5_core_dev *mdev = priv->mdev;
386         struct mlx5e_vport_stats *s = &priv->stats.vport;
387         struct mlx5e_rq_stats *rq_stats;
388         struct mlx5e_sq_stats *sq_stats;
389         struct buf_ring *sq_br;
390 #if (__FreeBSD_version < 1100000)
391         struct ifnet *ifp = priv->ifp;
392 #endif
393
394         u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
395         u32 *out;
396         int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
397         u64 tso_packets = 0;
398         u64 tso_bytes = 0;
399         u64 tx_queue_dropped = 0;
400         u64 tx_defragged = 0;
401         u64 tx_offload_none = 0;
402         u64 lro_packets = 0;
403         u64 lro_bytes = 0;
404         u64 sw_lro_queued = 0;
405         u64 sw_lro_flushed = 0;
406         u64 rx_csum_none = 0;
407         u64 rx_wqe_err = 0;
408         u32 rx_out_of_buffer = 0;
409         int i;
410         int j;
411
412         PRIV_LOCK(priv);
413         out = mlx5_vzalloc(outlen);
414         if (out == NULL)
415                 goto free_out;
416         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
417                 goto free_out;
418
419         /* Collect firts the SW counters and then HW for consistency */
420         for (i = 0; i < priv->params.num_channels; i++) {
421                 struct mlx5e_rq *rq = &priv->channel[i]->rq;
422
423                 rq_stats = &priv->channel[i]->rq.stats;
424
425                 /* collect stats from LRO */
426                 rq_stats->sw_lro_queued = rq->lro.lro_queued;
427                 rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
428                 sw_lro_queued += rq_stats->sw_lro_queued;
429                 sw_lro_flushed += rq_stats->sw_lro_flushed;
430                 lro_packets += rq_stats->lro_packets;
431                 lro_bytes += rq_stats->lro_bytes;
432                 rx_csum_none += rq_stats->csum_none;
433                 rx_wqe_err += rq_stats->wqe_err;
434
435                 for (j = 0; j < priv->num_tc; j++) {
436                         sq_stats = &priv->channel[i]->sq[j].stats;
437                         sq_br = priv->channel[i]->sq[j].br;
438
439                         tso_packets += sq_stats->tso_packets;
440                         tso_bytes += sq_stats->tso_bytes;
441                         tx_queue_dropped += sq_stats->dropped;
442                         tx_queue_dropped += sq_br->br_drops;
443                         tx_defragged += sq_stats->defragged;
444                         tx_offload_none += sq_stats->csum_offload_none;
445                 }
446         }
447
448         /* update counters */
449         s->tso_packets = tso_packets;
450         s->tso_bytes = tso_bytes;
451         s->tx_queue_dropped = tx_queue_dropped;
452         s->tx_defragged = tx_defragged;
453         s->lro_packets = lro_packets;
454         s->lro_bytes = lro_bytes;
455         s->sw_lro_queued = sw_lro_queued;
456         s->sw_lro_flushed = sw_lro_flushed;
457         s->rx_csum_none = rx_csum_none;
458         s->rx_wqe_err = rx_wqe_err;
459
460         /* HW counters */
461         memset(in, 0, sizeof(in));
462
463         MLX5_SET(query_vport_counter_in, in, opcode,
464             MLX5_CMD_OP_QUERY_VPORT_COUNTER);
465         MLX5_SET(query_vport_counter_in, in, op_mod, 0);
466         MLX5_SET(query_vport_counter_in, in, other_vport, 0);
467
468         memset(out, 0, outlen);
469
470         /* get number of out-of-buffer drops first */
471         if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
472             &rx_out_of_buffer))
473                 goto free_out;
474
475         /* accumulate difference into a 64-bit counter */
476         s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
477         s->rx_out_of_buffer_prev = rx_out_of_buffer;
478
479         /* get port statistics */
480         if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
481                 goto free_out;
482
483 #define MLX5_GET_CTR(out, x) \
484         MLX5_GET64(query_vport_counter_out, out, x)
485
486         s->rx_error_packets =
487             MLX5_GET_CTR(out, received_errors.packets);
488         s->rx_error_bytes =
489             MLX5_GET_CTR(out, received_errors.octets);
490         s->tx_error_packets =
491             MLX5_GET_CTR(out, transmit_errors.packets);
492         s->tx_error_bytes =
493             MLX5_GET_CTR(out, transmit_errors.octets);
494
495         s->rx_unicast_packets =
496             MLX5_GET_CTR(out, received_eth_unicast.packets);
497         s->rx_unicast_bytes =
498             MLX5_GET_CTR(out, received_eth_unicast.octets);
499         s->tx_unicast_packets =
500             MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
501         s->tx_unicast_bytes =
502             MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
503
504         s->rx_multicast_packets =
505             MLX5_GET_CTR(out, received_eth_multicast.packets);
506         s->rx_multicast_bytes =
507             MLX5_GET_CTR(out, received_eth_multicast.octets);
508         s->tx_multicast_packets =
509             MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
510         s->tx_multicast_bytes =
511             MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
512
513         s->rx_broadcast_packets =
514             MLX5_GET_CTR(out, received_eth_broadcast.packets);
515         s->rx_broadcast_bytes =
516             MLX5_GET_CTR(out, received_eth_broadcast.octets);
517         s->tx_broadcast_packets =
518             MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
519         s->tx_broadcast_bytes =
520             MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
521
522         s->rx_packets =
523             s->rx_unicast_packets +
524             s->rx_multicast_packets +
525             s->rx_broadcast_packets -
526             s->rx_out_of_buffer;
527         s->rx_bytes =
528             s->rx_unicast_bytes +
529             s->rx_multicast_bytes +
530             s->rx_broadcast_bytes;
531         s->tx_packets =
532             s->tx_unicast_packets +
533             s->tx_multicast_packets +
534             s->tx_broadcast_packets;
535         s->tx_bytes =
536             s->tx_unicast_bytes +
537             s->tx_multicast_bytes +
538             s->tx_broadcast_bytes;
539
540         /* Update calculated offload counters */
541         s->tx_csum_offload = s->tx_packets - tx_offload_none;
542         s->rx_csum_good = s->rx_packets - s->rx_csum_none;
543
544         /* Get physical port counters */
545         mlx5e_update_pport_counters(priv);
546
547 #if (__FreeBSD_version < 1100000)
548         /* no get_counters interface in fbsd 10 */
549         ifp->if_ipackets = s->rx_packets;
550         ifp->if_ierrors = s->rx_error_packets +
551             priv->stats.pport.alignment_err +
552             priv->stats.pport.check_seq_err +
553             priv->stats.pport.crc_align_errors +
554             priv->stats.pport.drop_events +
555             priv->stats.pport.in_range_len_errors +
556             priv->stats.pport.jabbers +
557             priv->stats.pport.out_of_range_len +
558             priv->stats.pport.oversize_pkts +
559             priv->stats.pport.symbol_err +
560             priv->stats.pport.too_long_errors +
561             priv->stats.pport.undersize_pkts +
562             priv->stats.pport.unsupported_op_rx;
563         ifp->if_iqdrops = s->rx_out_of_buffer;
564         ifp->if_opackets = s->tx_packets;
565         ifp->if_oerrors = s->tx_error_packets;
566         ifp->if_snd.ifq_drops = s->tx_queue_dropped;
567         ifp->if_ibytes = s->rx_bytes;
568         ifp->if_obytes = s->tx_bytes;
569         ifp->if_collisions =
570             priv->stats.pport.collisions;
571 #endif
572
573 free_out:
574         kvfree(out);
575         PRIV_UNLOCK(priv);
576 }
577
578 static void
579 mlx5e_update_stats(void *arg)
580 {
581         struct mlx5e_priv *priv = arg;
582
583         schedule_work(&priv->update_stats_work);
584
585         callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
586 }
587
588 static void
589 mlx5e_async_event_sub(struct mlx5e_priv *priv,
590     enum mlx5_dev_event event)
591 {
592         switch (event) {
593         case MLX5_DEV_EVENT_PORT_UP:
594         case MLX5_DEV_EVENT_PORT_DOWN:
595                 schedule_work(&priv->update_carrier_work);
596                 break;
597
598         default:
599                 break;
600         }
601 }
602
603 static void
604 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
605     enum mlx5_dev_event event, unsigned long param)
606 {
607         struct mlx5e_priv *priv = vpriv;
608
609         mtx_lock(&priv->async_events_mtx);
610         if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
611                 mlx5e_async_event_sub(priv, event);
612         mtx_unlock(&priv->async_events_mtx);
613 }
614
615 static void
616 mlx5e_enable_async_events(struct mlx5e_priv *priv)
617 {
618         set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
619 }
620
621 static void
622 mlx5e_disable_async_events(struct mlx5e_priv *priv)
623 {
624         mtx_lock(&priv->async_events_mtx);
625         clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
626         mtx_unlock(&priv->async_events_mtx);
627 }
628
629 static const char *mlx5e_rq_stats_desc[] = {
630         MLX5E_RQ_STATS(MLX5E_STATS_DESC)
631 };
632
633 static int
634 mlx5e_create_rq(struct mlx5e_channel *c,
635     struct mlx5e_rq_param *param,
636     struct mlx5e_rq *rq)
637 {
638         struct mlx5e_priv *priv = c->priv;
639         struct mlx5_core_dev *mdev = priv->mdev;
640         char buffer[16];
641         void *rqc = param->rqc;
642         void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
643         int wq_sz;
644         int err;
645         int i;
646
647         /* Create DMA descriptor TAG */
648         if ((err = -bus_dma_tag_create(
649             bus_get_dma_tag(mdev->pdev->dev.bsddev),
650             1,                          /* any alignment */
651             0,                          /* no boundary */
652             BUS_SPACE_MAXADDR,          /* lowaddr */
653             BUS_SPACE_MAXADDR,          /* highaddr */
654             NULL, NULL,                 /* filter, filterarg */
655             MJUM16BYTES,                /* maxsize */
656             1,                          /* nsegments */
657             MJUM16BYTES,                /* maxsegsize */
658             0,                          /* flags */
659             NULL, NULL,                 /* lockfunc, lockfuncarg */
660             &rq->dma_tag)))
661                 goto done;
662
663         err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
664             &rq->wq_ctrl);
665         if (err)
666                 goto err_free_dma_tag;
667
668         rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
669
670         if (priv->params.hw_lro_en) {
671                 rq->wqe_sz = priv->params.lro_wqe_sz;
672         } else {
673                 rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
674         }
675         if (rq->wqe_sz > MJUM16BYTES) {
676                 err = -ENOMEM;
677                 goto err_rq_wq_destroy;
678         } else if (rq->wqe_sz > MJUM9BYTES) {
679                 rq->wqe_sz = MJUM16BYTES;
680         } else if (rq->wqe_sz > MJUMPAGESIZE) {
681                 rq->wqe_sz = MJUM9BYTES;
682         } else if (rq->wqe_sz > MCLBYTES) {
683                 rq->wqe_sz = MJUMPAGESIZE;
684         } else {
685                 rq->wqe_sz = MCLBYTES;
686         }
687
688         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
689         rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
690         for (i = 0; i != wq_sz; i++) {
691                 struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
692                 uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
693
694                 err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
695                 if (err != 0) {
696                         while (i--)
697                                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
698                         goto err_rq_mbuf_free;
699                 }
700                 wqe->data.lkey = c->mkey_be;
701                 wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
702         }
703
704         rq->ifp = c->ifp;
705         rq->channel = c;
706         rq->ix = c->ix;
707
708         snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
709         mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
710             buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
711             rq->stats.arg);
712
713 #ifdef HAVE_TURBO_LRO
714         if (tcp_tlro_init(&rq->lro, c->ifp, MLX5E_BUDGET_MAX) != 0)
715                 rq->lro.mbuf = NULL;
716 #else
717         if (tcp_lro_init(&rq->lro))
718                 rq->lro.lro_cnt = 0;
719         else
720                 rq->lro.ifp = c->ifp;
721 #endif
722         return (0);
723
724 err_rq_mbuf_free:
725         free(rq->mbuf, M_MLX5EN);
726 err_rq_wq_destroy:
727         mlx5_wq_destroy(&rq->wq_ctrl);
728 err_free_dma_tag:
729         bus_dma_tag_destroy(rq->dma_tag);
730 done:
731         return (err);
732 }
733
734 static void
735 mlx5e_destroy_rq(struct mlx5e_rq *rq)
736 {
737         int wq_sz;
738         int i;
739
740         /* destroy all sysctl nodes */
741         sysctl_ctx_free(&rq->stats.ctx);
742
743         /* free leftover LRO packets, if any */
744 #ifdef HAVE_TURBO_LRO
745         tcp_tlro_free(&rq->lro);
746 #else
747         tcp_lro_free(&rq->lro);
748 #endif
749         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
750         for (i = 0; i != wq_sz; i++) {
751                 if (rq->mbuf[i].mbuf != NULL) {
752                         bus_dmamap_unload(rq->dma_tag,
753                             rq->mbuf[i].dma_map);
754                         m_freem(rq->mbuf[i].mbuf);
755                 }
756                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
757         }
758         free(rq->mbuf, M_MLX5EN);
759         mlx5_wq_destroy(&rq->wq_ctrl);
760 }
761
762 static int
763 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
764 {
765         struct mlx5e_channel *c = rq->channel;
766         struct mlx5e_priv *priv = c->priv;
767         struct mlx5_core_dev *mdev = priv->mdev;
768
769         void *in;
770         void *rqc;
771         void *wq;
772         int inlen;
773         int err;
774
775         inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
776             sizeof(u64) * rq->wq_ctrl.buf.npages;
777         in = mlx5_vzalloc(inlen);
778         if (in == NULL)
779                 return (-ENOMEM);
780
781         rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
782         wq = MLX5_ADDR_OF(rqc, rqc, wq);
783
784         memcpy(rqc, param->rqc, sizeof(param->rqc));
785
786         MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
787         MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
788         MLX5_SET(rqc, rqc, flush_in_error_en, 1);
789         if (priv->counter_set_id >= 0)
790                 MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
791         MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
792             PAGE_SHIFT);
793         MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
794
795         mlx5_fill_page_array(&rq->wq_ctrl.buf,
796             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
797
798         err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
799
800         kvfree(in);
801
802         return (err);
803 }
804
805 static int
806 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
807 {
808         struct mlx5e_channel *c = rq->channel;
809         struct mlx5e_priv *priv = c->priv;
810         struct mlx5_core_dev *mdev = priv->mdev;
811
812         void *in;
813         void *rqc;
814         int inlen;
815         int err;
816
817         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
818         in = mlx5_vzalloc(inlen);
819         if (in == NULL)
820                 return (-ENOMEM);
821
822         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
823
824         MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
825         MLX5_SET(modify_rq_in, in, rq_state, curr_state);
826         MLX5_SET(rqc, rqc, state, next_state);
827
828         err = mlx5_core_modify_rq(mdev, in, inlen);
829
830         kvfree(in);
831
832         return (err);
833 }
834
835 static void
836 mlx5e_disable_rq(struct mlx5e_rq *rq)
837 {
838         struct mlx5e_channel *c = rq->channel;
839         struct mlx5e_priv *priv = c->priv;
840         struct mlx5_core_dev *mdev = priv->mdev;
841
842         mlx5_core_destroy_rq(mdev, rq->rqn);
843 }
844
845 static int
846 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
847 {
848         struct mlx5e_channel *c = rq->channel;
849         struct mlx5e_priv *priv = c->priv;
850         struct mlx5_wq_ll *wq = &rq->wq;
851         int i;
852
853         for (i = 0; i < 1000; i++) {
854                 if (wq->cur_sz >= priv->params.min_rx_wqes)
855                         return (0);
856
857                 msleep(4);
858         }
859         return (-ETIMEDOUT);
860 }
861
862 static int
863 mlx5e_open_rq(struct mlx5e_channel *c,
864     struct mlx5e_rq_param *param,
865     struct mlx5e_rq *rq)
866 {
867         int err;
868
869         err = mlx5e_create_rq(c, param, rq);
870         if (err)
871                 return (err);
872
873         err = mlx5e_enable_rq(rq, param);
874         if (err)
875                 goto err_destroy_rq;
876
877         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
878         if (err)
879                 goto err_disable_rq;
880
881         c->rq.enabled = 1;
882
883         return (0);
884
885 err_disable_rq:
886         mlx5e_disable_rq(rq);
887 err_destroy_rq:
888         mlx5e_destroy_rq(rq);
889
890         return (err);
891 }
892
893 static void
894 mlx5e_close_rq(struct mlx5e_rq *rq)
895 {
896         mtx_lock(&rq->mtx);
897         rq->enabled = 0;
898         callout_stop(&rq->watchdog);
899         mtx_unlock(&rq->mtx);
900
901         callout_drain(&rq->watchdog);
902
903         mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
904 }
905
906 static void
907 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
908 {
909         /* wait till RQ is empty */
910         while (!mlx5_wq_ll_is_empty(&rq->wq)) {
911                 msleep(4);
912                 rq->cq.mcq.comp(&rq->cq.mcq);
913         }
914
915         mlx5e_disable_rq(rq);
916         mlx5e_destroy_rq(rq);
917 }
918
919 void
920 mlx5e_free_sq_db(struct mlx5e_sq *sq)
921 {
922         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
923         int x;
924
925         for (x = 0; x != wq_sz; x++)
926                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
927         free(sq->mbuf, M_MLX5EN);
928 }
929
930 int
931 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
932 {
933         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
934         int err;
935         int x;
936
937         sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
938
939         /* Create DMA descriptor MAPs */
940         for (x = 0; x != wq_sz; x++) {
941                 err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
942                 if (err != 0) {
943                         while (x--)
944                                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
945                         free(sq->mbuf, M_MLX5EN);
946                         return (err);
947                 }
948         }
949         return (0);
950 }
951
952 static const char *mlx5e_sq_stats_desc[] = {
953         MLX5E_SQ_STATS(MLX5E_STATS_DESC)
954 };
955
956 static int
957 mlx5e_create_sq(struct mlx5e_channel *c,
958     int tc,
959     struct mlx5e_sq_param *param,
960     struct mlx5e_sq *sq)
961 {
962         struct mlx5e_priv *priv = c->priv;
963         struct mlx5_core_dev *mdev = priv->mdev;
964         char buffer[16];
965
966         void *sqc = param->sqc;
967         void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
968 #ifdef RSS
969         cpuset_t cpu_mask;
970         int cpu_id;
971 #endif
972         int err;
973
974         /* Create DMA descriptor TAG */
975         if ((err = -bus_dma_tag_create(
976             bus_get_dma_tag(mdev->pdev->dev.bsddev),
977             1,                          /* any alignment */
978             0,                          /* no boundary */
979             BUS_SPACE_MAXADDR,          /* lowaddr */
980             BUS_SPACE_MAXADDR,          /* highaddr */
981             NULL, NULL,                 /* filter, filterarg */
982             MLX5E_MAX_TX_PAYLOAD_SIZE,  /* maxsize */
983             MLX5E_MAX_TX_MBUF_FRAGS,    /* nsegments */
984             MLX5E_MAX_TX_MBUF_SIZE,     /* maxsegsize */
985             0,                          /* flags */
986             NULL, NULL,                 /* lockfunc, lockfuncarg */
987             &sq->dma_tag)))
988                 goto done;
989
990         err = mlx5_alloc_map_uar(mdev, &sq->uar);
991         if (err)
992                 goto err_free_dma_tag;
993
994         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
995             &sq->wq_ctrl);
996         if (err)
997                 goto err_unmap_free_uar;
998
999         sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1000         sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
1001
1002         err = mlx5e_alloc_sq_db(sq);
1003         if (err)
1004                 goto err_sq_wq_destroy;
1005
1006         sq->mkey_be = c->mkey_be;
1007         sq->ifp = priv->ifp;
1008         sq->priv = priv;
1009         sq->tc = tc;
1010
1011         sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
1012             M_WAITOK, &sq->lock);
1013         if (sq->br == NULL) {
1014                 if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
1015                     __func__);
1016                 err = -ENOMEM;
1017                 goto err_free_sq_db;
1018         }
1019
1020         sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
1021             taskqueue_thread_enqueue, &sq->sq_tq);
1022         if (sq->sq_tq == NULL) {
1023                 if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
1024                     __func__);
1025                 err = -ENOMEM;
1026                 goto err_free_drbr;
1027         }
1028
1029         TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
1030 #ifdef RSS
1031         cpu_id = rss_getcpu(c->ix % rss_getnumbuckets());
1032         CPU_SETOF(cpu_id, &cpu_mask);
1033         taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask,
1034             "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id);
1035 #else
1036         taskqueue_start_threads(&sq->sq_tq, 1, PI_NET,
1037             "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc);
1038 #endif
1039         snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1040         mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1041             buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1042             sq->stats.arg);
1043
1044         return (0);
1045
1046 err_free_drbr:
1047         buf_ring_free(sq->br, M_MLX5EN);
1048 err_free_sq_db:
1049         mlx5e_free_sq_db(sq);
1050 err_sq_wq_destroy:
1051         mlx5_wq_destroy(&sq->wq_ctrl);
1052
1053 err_unmap_free_uar:
1054         mlx5_unmap_free_uar(mdev, &sq->uar);
1055
1056 err_free_dma_tag:
1057         bus_dma_tag_destroy(sq->dma_tag);
1058 done:
1059         return (err);
1060 }
1061
1062 static void
1063 mlx5e_destroy_sq(struct mlx5e_sq *sq)
1064 {
1065         /* destroy all sysctl nodes */
1066         sysctl_ctx_free(&sq->stats.ctx);
1067
1068         mlx5e_free_sq_db(sq);
1069         mlx5_wq_destroy(&sq->wq_ctrl);
1070         mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
1071         taskqueue_drain(sq->sq_tq, &sq->sq_task);
1072         taskqueue_free(sq->sq_tq);
1073         buf_ring_free(sq->br, M_MLX5EN);
1074 }
1075
1076 int
1077 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
1078     int tis_num)
1079 {
1080         void *in;
1081         void *sqc;
1082         void *wq;
1083         int inlen;
1084         int err;
1085
1086         inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1087             sizeof(u64) * sq->wq_ctrl.buf.npages;
1088         in = mlx5_vzalloc(inlen);
1089         if (in == NULL)
1090                 return (-ENOMEM);
1091
1092         sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1093         wq = MLX5_ADDR_OF(sqc, sqc, wq);
1094
1095         memcpy(sqc, param->sqc, sizeof(param->sqc));
1096
1097         MLX5_SET(sqc, sqc, tis_num_0, tis_num);
1098         MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
1099         MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1100         MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1101         MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1102
1103         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1104         MLX5_SET(wq, wq, uar_page, sq->uar.index);
1105         MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1106             PAGE_SHIFT);
1107         MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1108
1109         mlx5_fill_page_array(&sq->wq_ctrl.buf,
1110             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1111
1112         err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
1113
1114         kvfree(in);
1115
1116         return (err);
1117 }
1118
1119 int
1120 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1121 {
1122         void *in;
1123         void *sqc;
1124         int inlen;
1125         int err;
1126
1127         inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1128         in = mlx5_vzalloc(inlen);
1129         if (in == NULL)
1130                 return (-ENOMEM);
1131
1132         sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1133
1134         MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1135         MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1136         MLX5_SET(sqc, sqc, state, next_state);
1137
1138         err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
1139
1140         kvfree(in);
1141
1142         return (err);
1143 }
1144
1145 void
1146 mlx5e_disable_sq(struct mlx5e_sq *sq)
1147 {
1148
1149         mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
1150 }
1151
1152 static int
1153 mlx5e_open_sq(struct mlx5e_channel *c,
1154     int tc,
1155     struct mlx5e_sq_param *param,
1156     struct mlx5e_sq *sq)
1157 {
1158         int err;
1159
1160         err = mlx5e_create_sq(c, tc, param, sq);
1161         if (err)
1162                 return (err);
1163
1164         err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
1165         if (err)
1166                 goto err_destroy_sq;
1167
1168         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1169         if (err)
1170                 goto err_disable_sq;
1171
1172         atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY);
1173
1174         return (0);
1175
1176 err_disable_sq:
1177         mlx5e_disable_sq(sq);
1178 err_destroy_sq:
1179         mlx5e_destroy_sq(sq);
1180
1181         return (err);
1182 }
1183
1184 static void
1185 mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1186 {
1187         /* fill up remainder with NOPs */
1188         while (sq->cev_counter != 0) {
1189                 while (!mlx5e_sq_has_room_for(sq, 1)) {
1190                         if (can_sleep != 0) {
1191                                 mtx_unlock(&sq->lock);
1192                                 msleep(4);
1193                                 mtx_lock(&sq->lock);
1194                         } else {
1195                                 goto done;
1196                         }
1197                 }
1198                 /* send a single NOP */
1199                 mlx5e_send_nop(sq, 1);
1200                 wmb();
1201         }
1202 done:
1203         /* Check if we need to write the doorbell */
1204         if (likely(sq->doorbell.d64 != 0)) {
1205                 mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1206                 sq->doorbell.d64 = 0;
1207         }
1208         return;
1209 }
1210
1211 void
1212 mlx5e_sq_cev_timeout(void *arg)
1213 {
1214         struct mlx5e_sq *sq = arg;
1215
1216         mtx_assert(&sq->lock, MA_OWNED);
1217
1218         /* check next state */
1219         switch (sq->cev_next_state) {
1220         case MLX5E_CEV_STATE_SEND_NOPS:
1221                 /* fill TX ring with NOPs, if any */
1222                 mlx5e_sq_send_nops_locked(sq, 0);
1223
1224                 /* check if completed */
1225                 if (sq->cev_counter == 0) {
1226                         sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1227                         return;
1228                 }
1229                 break;
1230         default:
1231                 /* send NOPs on next timeout */
1232                 sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1233                 break;
1234         }
1235
1236         /* restart timer */
1237         callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1238 }
1239
1240 void
1241 mlx5e_drain_sq(struct mlx5e_sq *sq)
1242 {
1243         int error;
1244
1245         /*
1246          * Check if already stopped.
1247          *
1248          * NOTE: The "stopped" variable is only written when both the
1249          * priv's configuration lock and the SQ's lock is locked. It
1250          * can therefore safely be read when only one of the two locks
1251          * is locked. This function is always called when the priv's
1252          * configuration lock is locked.
1253          */
1254         if (sq->stopped != 0)
1255                 return;
1256
1257         mtx_lock(&sq->lock);
1258
1259         /* don't put more packets into the SQ */
1260         sq->stopped = 1;
1261
1262         /* teardown event factor timer, if any */
1263         sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1264         callout_stop(&sq->cev_callout);
1265
1266         /* send dummy NOPs in order to flush the transmit ring */
1267         mlx5e_sq_send_nops_locked(sq, 1);
1268         mtx_unlock(&sq->lock);
1269
1270         /* make sure it is safe to free the callout */
1271         callout_drain(&sq->cev_callout);
1272
1273         /* wait till SQ is empty or link is down */
1274         mtx_lock(&sq->lock);
1275         while (sq->cc != sq->pc &&
1276             (sq->priv->media_status_last & IFM_ACTIVE) != 0) {
1277                 mtx_unlock(&sq->lock);
1278                 msleep(1);
1279                 sq->cq.mcq.comp(&sq->cq.mcq);
1280                 mtx_lock(&sq->lock);
1281         }
1282         mtx_unlock(&sq->lock);
1283
1284         /* error out remaining requests */
1285         error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1286         if (error != 0) {
1287                 if_printf(sq->ifp,
1288                     "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
1289         }
1290
1291         /* wait till SQ is empty */
1292         mtx_lock(&sq->lock);
1293         while (sq->cc != sq->pc) {
1294                 mtx_unlock(&sq->lock);
1295                 msleep(1);
1296                 sq->cq.mcq.comp(&sq->cq.mcq);
1297                 mtx_lock(&sq->lock);
1298         }
1299         mtx_unlock(&sq->lock);
1300 }
1301
1302 static void
1303 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1304 {
1305
1306         mlx5e_drain_sq(sq);
1307         mlx5e_disable_sq(sq);
1308         mlx5e_destroy_sq(sq);
1309 }
1310
1311 static int
1312 mlx5e_create_cq(struct mlx5e_priv *priv,
1313     struct mlx5e_cq_param *param,
1314     struct mlx5e_cq *cq,
1315     mlx5e_cq_comp_t *comp,
1316     int eq_ix)
1317 {
1318         struct mlx5_core_dev *mdev = priv->mdev;
1319         struct mlx5_core_cq *mcq = &cq->mcq;
1320         int eqn_not_used;
1321         int irqn;
1322         int err;
1323         u32 i;
1324
1325         param->wq.buf_numa_node = 0;
1326         param->wq.db_numa_node = 0;
1327
1328         err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1329             &cq->wq_ctrl);
1330         if (err)
1331                 return (err);
1332
1333         mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
1334
1335         mcq->cqe_sz = 64;
1336         mcq->set_ci_db = cq->wq_ctrl.db.db;
1337         mcq->arm_db = cq->wq_ctrl.db.db + 1;
1338         *mcq->set_ci_db = 0;
1339         *mcq->arm_db = 0;
1340         mcq->vector = eq_ix;
1341         mcq->comp = comp;
1342         mcq->event = mlx5e_cq_error_event;
1343         mcq->irqn = irqn;
1344         mcq->uar = &priv->cq_uar;
1345
1346         for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1347                 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1348
1349                 cqe->op_own = 0xf1;
1350         }
1351
1352         cq->priv = priv;
1353
1354         return (0);
1355 }
1356
1357 static void
1358 mlx5e_destroy_cq(struct mlx5e_cq *cq)
1359 {
1360         mlx5_wq_destroy(&cq->wq_ctrl);
1361 }
1362
1363 static int
1364 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
1365 {
1366         struct mlx5_core_cq *mcq = &cq->mcq;
1367         void *in;
1368         void *cqc;
1369         int inlen;
1370         int irqn_not_used;
1371         int eqn;
1372         int err;
1373
1374         inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1375             sizeof(u64) * cq->wq_ctrl.buf.npages;
1376         in = mlx5_vzalloc(inlen);
1377         if (in == NULL)
1378                 return (-ENOMEM);
1379
1380         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1381
1382         memcpy(cqc, param->cqc, sizeof(param->cqc));
1383
1384         mlx5_fill_page_array(&cq->wq_ctrl.buf,
1385             (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1386
1387         mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
1388
1389         MLX5_SET(cqc, cqc, c_eqn, eqn);
1390         MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1391         MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1392             PAGE_SHIFT);
1393         MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1394
1395         err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
1396
1397         kvfree(in);
1398
1399         if (err)
1400                 return (err);
1401
1402         mlx5e_cq_arm(cq);
1403
1404         return (0);
1405 }
1406
1407 static void
1408 mlx5e_disable_cq(struct mlx5e_cq *cq)
1409 {
1410
1411         mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
1412 }
1413
1414 int
1415 mlx5e_open_cq(struct mlx5e_priv *priv,
1416     struct mlx5e_cq_param *param,
1417     struct mlx5e_cq *cq,
1418     mlx5e_cq_comp_t *comp,
1419     int eq_ix)
1420 {
1421         int err;
1422
1423         err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
1424         if (err)
1425                 return (err);
1426
1427         err = mlx5e_enable_cq(cq, param, eq_ix);
1428         if (err)
1429                 goto err_destroy_cq;
1430
1431         return (0);
1432
1433 err_destroy_cq:
1434         mlx5e_destroy_cq(cq);
1435
1436         return (err);
1437 }
1438
1439 void
1440 mlx5e_close_cq(struct mlx5e_cq *cq)
1441 {
1442         mlx5e_disable_cq(cq);
1443         mlx5e_destroy_cq(cq);
1444 }
1445
1446 static int
1447 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1448     struct mlx5e_channel_param *cparam)
1449 {
1450         int err;
1451         int tc;
1452
1453         for (tc = 0; tc < c->num_tc; tc++) {
1454                 /* open completion queue */
1455                 err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
1456                     &mlx5e_tx_cq_comp, c->ix);
1457                 if (err)
1458                         goto err_close_tx_cqs;
1459         }
1460         return (0);
1461
1462 err_close_tx_cqs:
1463         for (tc--; tc >= 0; tc--)
1464                 mlx5e_close_cq(&c->sq[tc].cq);
1465
1466         return (err);
1467 }
1468
1469 static void
1470 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1471 {
1472         int tc;
1473
1474         for (tc = 0; tc < c->num_tc; tc++)
1475                 mlx5e_close_cq(&c->sq[tc].cq);
1476 }
1477
1478 static int
1479 mlx5e_open_sqs(struct mlx5e_channel *c,
1480     struct mlx5e_channel_param *cparam)
1481 {
1482         int err;
1483         int tc;
1484
1485         for (tc = 0; tc < c->num_tc; tc++) {
1486                 err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1487                 if (err)
1488                         goto err_close_sqs;
1489         }
1490
1491         return (0);
1492
1493 err_close_sqs:
1494         for (tc--; tc >= 0; tc--)
1495                 mlx5e_close_sq_wait(&c->sq[tc]);
1496
1497         return (err);
1498 }
1499
1500 static void
1501 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1502 {
1503         int tc;
1504
1505         for (tc = 0; tc < c->num_tc; tc++)
1506                 mlx5e_close_sq_wait(&c->sq[tc]);
1507 }
1508
1509 static void
1510 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1511 {
1512         int tc;
1513
1514         mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1515
1516         callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
1517
1518         for (tc = 0; tc < c->num_tc; tc++) {
1519                 struct mlx5e_sq *sq = c->sq + tc;
1520
1521                 mtx_init(&sq->lock, "mlx5tx", MTX_NETWORK_LOCK, MTX_DEF);
1522                 mtx_init(&sq->comp_lock, "mlx5comp", MTX_NETWORK_LOCK,
1523                     MTX_DEF);
1524
1525                 callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1526
1527                 sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1528
1529                 /* ensure the TX completion event factor is not zero */
1530                 if (sq->cev_factor == 0)
1531                         sq->cev_factor = 1;
1532         }
1533 }
1534
1535 static void
1536 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1537 {
1538         int tc;
1539
1540         mtx_destroy(&c->rq.mtx);
1541
1542         for (tc = 0; tc < c->num_tc; tc++) {
1543                 mtx_destroy(&c->sq[tc].lock);
1544                 mtx_destroy(&c->sq[tc].comp_lock);
1545         }
1546 }
1547
1548 static int
1549 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1550     struct mlx5e_channel_param *cparam,
1551     struct mlx5e_channel *volatile *cp)
1552 {
1553         struct mlx5e_channel *c;
1554         int err;
1555
1556         c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1557         c->priv = priv;
1558         c->ix = ix;
1559         c->cpu = 0;
1560         c->ifp = priv->ifp;
1561         c->mkey_be = cpu_to_be32(priv->mr.key);
1562         c->num_tc = priv->num_tc;
1563
1564         /* init mutexes */
1565         mlx5e_chan_mtx_init(c);
1566
1567         /* open transmit completion queue */
1568         err = mlx5e_open_tx_cqs(c, cparam);
1569         if (err)
1570                 goto err_free;
1571
1572         /* open receive completion queue */
1573         err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
1574             &mlx5e_rx_cq_comp, c->ix);
1575         if (err)
1576                 goto err_close_tx_cqs;
1577
1578         err = mlx5e_open_sqs(c, cparam);
1579         if (err)
1580                 goto err_close_rx_cq;
1581
1582         err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1583         if (err)
1584                 goto err_close_sqs;
1585
1586         /* store channel pointer */
1587         *cp = c;
1588
1589         /* poll receive queue initially */
1590         c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1591
1592         return (0);
1593
1594 err_close_sqs:
1595         mlx5e_close_sqs_wait(c);
1596
1597 err_close_rx_cq:
1598         mlx5e_close_cq(&c->rq.cq);
1599
1600 err_close_tx_cqs:
1601         mlx5e_close_tx_cqs(c);
1602
1603 err_free:
1604         /* destroy mutexes */
1605         mlx5e_chan_mtx_destroy(c);
1606         free(c, M_MLX5EN);
1607         return (err);
1608 }
1609
1610 static void
1611 mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1612 {
1613         struct mlx5e_channel *c = *pp;
1614
1615         /* check if channel is already closed */
1616         if (c == NULL)
1617                 return;
1618         mlx5e_close_rq(&c->rq);
1619 }
1620
1621 static void
1622 mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1623 {
1624         struct mlx5e_channel *c = *pp;
1625
1626         /* check if channel is already closed */
1627         if (c == NULL)
1628                 return;
1629         /* ensure channel pointer is no longer used */
1630         *pp = NULL;
1631
1632         mlx5e_close_rq_wait(&c->rq);
1633         mlx5e_close_sqs_wait(c);
1634         mlx5e_close_cq(&c->rq.cq);
1635         mlx5e_close_tx_cqs(c);
1636         /* destroy mutexes */
1637         mlx5e_chan_mtx_destroy(c);
1638         free(c, M_MLX5EN);
1639 }
1640
1641 static void
1642 mlx5e_build_rq_param(struct mlx5e_priv *priv,
1643     struct mlx5e_rq_param *param)
1644 {
1645         void *rqc = param->rqc;
1646         void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1647
1648         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1649         MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1650         MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
1651         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1652         MLX5_SET(wq, wq, pd, priv->pdn);
1653
1654         param->wq.buf_numa_node = 0;
1655         param->wq.db_numa_node = 0;
1656         param->wq.linear = 1;
1657 }
1658
1659 static void
1660 mlx5e_build_sq_param(struct mlx5e_priv *priv,
1661     struct mlx5e_sq_param *param)
1662 {
1663         void *sqc = param->sqc;
1664         void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1665
1666         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1667         MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1668         MLX5_SET(wq, wq, pd, priv->pdn);
1669
1670         param->wq.buf_numa_node = 0;
1671         param->wq.db_numa_node = 0;
1672         param->wq.linear = 1;
1673 }
1674
1675 static void
1676 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1677     struct mlx5e_cq_param *param)
1678 {
1679         void *cqc = param->cqc;
1680
1681         MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1682 }
1683
1684 static void
1685 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1686     struct mlx5e_cq_param *param)
1687 {
1688         void *cqc = param->cqc;
1689
1690
1691         /*
1692          * TODO The sysctl to control on/off is a bool value for now, which means
1693          * we only support CSUM, once HASH is implemnted we'll need to address that.
1694          */
1695         if (priv->params.cqe_zipping_en) {
1696                 MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1697                 MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1698         }
1699
1700         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1701         MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1702         MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1703
1704         switch (priv->params.rx_cq_moderation_mode) {
1705         case 0:
1706                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1707                 break;
1708         default:
1709                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1710                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1711                 else
1712                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1713                 break;
1714         }
1715
1716         mlx5e_build_common_cq_param(priv, param);
1717 }
1718
1719 static void
1720 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1721     struct mlx5e_cq_param *param)
1722 {
1723         void *cqc = param->cqc;
1724
1725         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1726         MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1727         MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1728
1729         switch (priv->params.tx_cq_moderation_mode) {
1730         case 0:
1731                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1732                 break;
1733         default:
1734                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1735                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1736                 else
1737                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1738                 break;
1739         }
1740
1741         mlx5e_build_common_cq_param(priv, param);
1742 }
1743
1744 static void
1745 mlx5e_build_channel_param(struct mlx5e_priv *priv,
1746     struct mlx5e_channel_param *cparam)
1747 {
1748         memset(cparam, 0, sizeof(*cparam));
1749
1750         mlx5e_build_rq_param(priv, &cparam->rq);
1751         mlx5e_build_sq_param(priv, &cparam->sq);
1752         mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1753         mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1754 }
1755
1756 static int
1757 mlx5e_open_channels(struct mlx5e_priv *priv)
1758 {
1759         struct mlx5e_channel_param cparam;
1760         void *ptr;
1761         int err;
1762         int i;
1763         int j;
1764
1765         priv->channel = malloc(priv->params.num_channels *
1766             sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
1767
1768         mlx5e_build_channel_param(priv, &cparam);
1769         for (i = 0; i < priv->params.num_channels; i++) {
1770                 err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1771                 if (err)
1772                         goto err_close_channels;
1773         }
1774
1775         for (j = 0; j < priv->params.num_channels; j++) {
1776                 err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
1777                 if (err)
1778                         goto err_close_channels;
1779         }
1780
1781         return (0);
1782
1783 err_close_channels:
1784         for (i--; i >= 0; i--) {
1785                 mlx5e_close_channel(&priv->channel[i]);
1786                 mlx5e_close_channel_wait(&priv->channel[i]);
1787         }
1788
1789         /* remove "volatile" attribute from "channel" pointer */
1790         ptr = __DECONST(void *, priv->channel);
1791         priv->channel = NULL;
1792
1793         free(ptr, M_MLX5EN);
1794
1795         return (err);
1796 }
1797
1798 static void
1799 mlx5e_close_channels(struct mlx5e_priv *priv)
1800 {
1801         void *ptr;
1802         int i;
1803
1804         if (priv->channel == NULL)
1805                 return;
1806
1807         for (i = 0; i < priv->params.num_channels; i++)
1808                 mlx5e_close_channel(&priv->channel[i]);
1809         for (i = 0; i < priv->params.num_channels; i++)
1810                 mlx5e_close_channel_wait(&priv->channel[i]);
1811
1812         /* remove "volatile" attribute from "channel" pointer */
1813         ptr = __DECONST(void *, priv->channel);
1814         priv->channel = NULL;
1815
1816         free(ptr, M_MLX5EN);
1817 }
1818
1819 static int
1820 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
1821 {
1822
1823         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
1824                 uint8_t cq_mode;
1825
1826                 switch (priv->params.tx_cq_moderation_mode) {
1827                 case 0:
1828                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1829                         break;
1830                 default:
1831                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1832                         break;
1833                 }
1834
1835                 return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
1836                     priv->params.tx_cq_moderation_usec,
1837                     priv->params.tx_cq_moderation_pkts,
1838                     cq_mode));
1839         }
1840
1841         return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
1842             priv->params.tx_cq_moderation_usec,
1843             priv->params.tx_cq_moderation_pkts));
1844 }
1845
1846 static int
1847 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
1848 {
1849
1850         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
1851                 uint8_t cq_mode;
1852                 int retval;
1853
1854                 switch (priv->params.rx_cq_moderation_mode) {
1855                 case 0:
1856                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1857                         break;
1858                 default:
1859                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1860                         break;
1861                 }
1862
1863                 retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
1864                     priv->params.rx_cq_moderation_usec,
1865                     priv->params.rx_cq_moderation_pkts,
1866                     cq_mode);
1867
1868                 return (retval);
1869         }
1870
1871         return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
1872             priv->params.rx_cq_moderation_usec,
1873             priv->params.rx_cq_moderation_pkts));
1874 }
1875
1876 static int
1877 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
1878 {
1879         int err;
1880         int i;
1881
1882         if (c == NULL)
1883                 return (EINVAL);
1884
1885         err = mlx5e_refresh_rq_params(priv, &c->rq);
1886         if (err)
1887                 goto done;
1888
1889         for (i = 0; i != c->num_tc; i++) {
1890                 err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
1891                 if (err)
1892                         goto done;
1893         }
1894 done:
1895         return (err);
1896 }
1897
1898 int
1899 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
1900 {
1901         int i;
1902
1903         if (priv->channel == NULL)
1904                 return (EINVAL);
1905
1906         for (i = 0; i < priv->params.num_channels; i++) {
1907                 int err;
1908
1909                 err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
1910                 if (err)
1911                         return (err);
1912         }
1913         return (0);
1914 }
1915
1916 static int
1917 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
1918 {
1919         struct mlx5_core_dev *mdev = priv->mdev;
1920         u32 in[MLX5_ST_SZ_DW(create_tis_in)];
1921         void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
1922
1923         memset(in, 0, sizeof(in));
1924
1925         MLX5_SET(tisc, tisc, prio, tc);
1926         MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
1927
1928         return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
1929 }
1930
1931 static void
1932 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
1933 {
1934         mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
1935 }
1936
1937 static int
1938 mlx5e_open_tises(struct mlx5e_priv *priv)
1939 {
1940         int num_tc = priv->num_tc;
1941         int err;
1942         int tc;
1943
1944         for (tc = 0; tc < num_tc; tc++) {
1945                 err = mlx5e_open_tis(priv, tc);
1946                 if (err)
1947                         goto err_close_tises;
1948         }
1949
1950         return (0);
1951
1952 err_close_tises:
1953         for (tc--; tc >= 0; tc--)
1954                 mlx5e_close_tis(priv, tc);
1955
1956         return (err);
1957 }
1958
1959 static void
1960 mlx5e_close_tises(struct mlx5e_priv *priv)
1961 {
1962         int num_tc = priv->num_tc;
1963         int tc;
1964
1965         for (tc = 0; tc < num_tc; tc++)
1966                 mlx5e_close_tis(priv, tc);
1967 }
1968
1969 static int
1970 mlx5e_open_rqt(struct mlx5e_priv *priv)
1971 {
1972         struct mlx5_core_dev *mdev = priv->mdev;
1973         u32 *in;
1974         u32 out[MLX5_ST_SZ_DW(create_rqt_out)];
1975         void *rqtc;
1976         int inlen;
1977         int err;
1978         int sz;
1979         int i;
1980
1981         sz = 1 << priv->params.rx_hash_log_tbl_sz;
1982
1983         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
1984         in = mlx5_vzalloc(inlen);
1985         if (in == NULL)
1986                 return (-ENOMEM);
1987         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1988
1989         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
1990         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
1991
1992         for (i = 0; i < sz; i++) {
1993                 int ix;
1994 #ifdef RSS
1995                 ix = rss_get_indirection_to_bucket(i);
1996 #else
1997                 ix = i;
1998 #endif
1999                 /* ensure we don't overflow */
2000                 ix %= priv->params.num_channels;
2001                 MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
2002         }
2003
2004         MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
2005
2006         memset(out, 0, sizeof(out));
2007         err = mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
2008         if (!err)
2009                 priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
2010
2011         kvfree(in);
2012
2013         return (err);
2014 }
2015
2016 static void
2017 mlx5e_close_rqt(struct mlx5e_priv *priv)
2018 {
2019         u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)];
2020         u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)];
2021
2022         memset(in, 0, sizeof(in));
2023
2024         MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
2025         MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
2026
2027         mlx5_cmd_exec_check_status(priv->mdev, in, sizeof(in), out,
2028             sizeof(out));
2029 }
2030
2031 static void
2032 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
2033 {
2034         void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
2035         __be32 *hkey;
2036
2037         MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
2038
2039 #define ROUGH_MAX_L2_L3_HDR_SZ 256
2040
2041 #define MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2042                           MLX5_HASH_FIELD_SEL_DST_IP)
2043
2044 #define MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2045                           MLX5_HASH_FIELD_SEL_DST_IP   |\
2046                           MLX5_HASH_FIELD_SEL_L4_SPORT |\
2047                           MLX5_HASH_FIELD_SEL_L4_DPORT)
2048
2049 #define MLX5_HASH_IP_IPSEC_SPI  (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2050                                  MLX5_HASH_FIELD_SEL_DST_IP   |\
2051                                  MLX5_HASH_FIELD_SEL_IPSEC_SPI)
2052
2053         if (priv->params.hw_lro_en) {
2054                 MLX5_SET(tirc, tirc, lro_enable_mask,
2055                     MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2056                     MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
2057                 MLX5_SET(tirc, tirc, lro_max_msg_sz,
2058                     (priv->params.lro_wqe_sz -
2059                     ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
2060                 /* TODO: add the option to choose timer value dynamically */
2061                 MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
2062                     MLX5_CAP_ETH(priv->mdev,
2063                     lro_timer_supported_periods[2]));
2064         }
2065
2066         /* setup parameters for hashing TIR type, if any */
2067         switch (tt) {
2068         case MLX5E_TT_ANY:
2069                 MLX5_SET(tirc, tirc, disp_type,
2070                     MLX5_TIRC_DISP_TYPE_DIRECT);
2071                 MLX5_SET(tirc, tirc, inline_rqn,
2072                     priv->channel[0]->rq.rqn);
2073                 break;
2074         default:
2075                 MLX5_SET(tirc, tirc, disp_type,
2076                     MLX5_TIRC_DISP_TYPE_INDIRECT);
2077                 MLX5_SET(tirc, tirc, indirect_table,
2078                     priv->rqtn);
2079                 MLX5_SET(tirc, tirc, rx_hash_fn,
2080                     MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2081                 hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2082 #ifdef RSS
2083                 /*
2084                  * The FreeBSD RSS implementation does currently not
2085                  * support symmetric Toeplitz hashes:
2086                  */
2087                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2088                 rss_getkey((uint8_t *)hkey);
2089 #else
2090                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2091                 hkey[0] = cpu_to_be32(0xD181C62C);
2092                 hkey[1] = cpu_to_be32(0xF7F4DB5B);
2093                 hkey[2] = cpu_to_be32(0x1983A2FC);
2094                 hkey[3] = cpu_to_be32(0x943E1ADB);
2095                 hkey[4] = cpu_to_be32(0xD9389E6B);
2096                 hkey[5] = cpu_to_be32(0xD1039C2C);
2097                 hkey[6] = cpu_to_be32(0xA74499AD);
2098                 hkey[7] = cpu_to_be32(0x593D56D9);
2099                 hkey[8] = cpu_to_be32(0xF3253C06);
2100                 hkey[9] = cpu_to_be32(0x2ADC1FFC);
2101 #endif
2102                 break;
2103         }
2104
2105         switch (tt) {
2106         case MLX5E_TT_IPV4_TCP:
2107                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2108                     MLX5_L3_PROT_TYPE_IPV4);
2109                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2110                     MLX5_L4_PROT_TYPE_TCP);
2111 #ifdef RSS
2112                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2113                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2114                             MLX5_HASH_IP);
2115                 } else
2116 #endif
2117                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2118                     MLX5_HASH_ALL);
2119                 break;
2120
2121         case MLX5E_TT_IPV6_TCP:
2122                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2123                     MLX5_L3_PROT_TYPE_IPV6);
2124                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2125                     MLX5_L4_PROT_TYPE_TCP);
2126 #ifdef RSS
2127                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2128                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2129                             MLX5_HASH_IP);
2130                 } else
2131 #endif
2132                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2133                     MLX5_HASH_ALL);
2134                 break;
2135
2136         case MLX5E_TT_IPV4_UDP:
2137                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2138                     MLX5_L3_PROT_TYPE_IPV4);
2139                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2140                     MLX5_L4_PROT_TYPE_UDP);
2141 #ifdef RSS
2142                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2143                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2144                             MLX5_HASH_IP);
2145                 } else
2146 #endif
2147                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2148                     MLX5_HASH_ALL);
2149                 break;
2150
2151         case MLX5E_TT_IPV6_UDP:
2152                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2153                     MLX5_L3_PROT_TYPE_IPV6);
2154                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2155                     MLX5_L4_PROT_TYPE_UDP);
2156 #ifdef RSS
2157                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2158                         MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2159                             MLX5_HASH_IP);
2160                 } else
2161 #endif
2162                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2163                     MLX5_HASH_ALL);
2164                 break;
2165
2166         case MLX5E_TT_IPV4_IPSEC_AH:
2167                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2168                     MLX5_L3_PROT_TYPE_IPV4);
2169                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2170                     MLX5_HASH_IP_IPSEC_SPI);
2171                 break;
2172
2173         case MLX5E_TT_IPV6_IPSEC_AH:
2174                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2175                     MLX5_L3_PROT_TYPE_IPV6);
2176                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2177                     MLX5_HASH_IP_IPSEC_SPI);
2178                 break;
2179
2180         case MLX5E_TT_IPV4_IPSEC_ESP:
2181                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2182                     MLX5_L3_PROT_TYPE_IPV4);
2183                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2184                     MLX5_HASH_IP_IPSEC_SPI);
2185                 break;
2186
2187         case MLX5E_TT_IPV6_IPSEC_ESP:
2188                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2189                     MLX5_L3_PROT_TYPE_IPV6);
2190                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2191                     MLX5_HASH_IP_IPSEC_SPI);
2192                 break;
2193
2194         case MLX5E_TT_IPV4:
2195                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2196                     MLX5_L3_PROT_TYPE_IPV4);
2197                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2198                     MLX5_HASH_IP);
2199                 break;
2200
2201         case MLX5E_TT_IPV6:
2202                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2203                     MLX5_L3_PROT_TYPE_IPV6);
2204                 MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2205                     MLX5_HASH_IP);
2206                 break;
2207
2208         default:
2209                 break;
2210         }
2211 }
2212
2213 static int
2214 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2215 {
2216         struct mlx5_core_dev *mdev = priv->mdev;
2217         u32 *in;
2218         void *tirc;
2219         int inlen;
2220         int err;
2221
2222         inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2223         in = mlx5_vzalloc(inlen);
2224         if (in == NULL)
2225                 return (-ENOMEM);
2226         tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2227
2228         mlx5e_build_tir_ctx(priv, tirc, tt);
2229
2230         err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2231
2232         kvfree(in);
2233
2234         return (err);
2235 }
2236
2237 static void
2238 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2239 {
2240         mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2241 }
2242
2243 static int
2244 mlx5e_open_tirs(struct mlx5e_priv *priv)
2245 {
2246         int err;
2247         int i;
2248
2249         for (i = 0; i < MLX5E_NUM_TT; i++) {
2250                 err = mlx5e_open_tir(priv, i);
2251                 if (err)
2252                         goto err_close_tirs;
2253         }
2254
2255         return (0);
2256
2257 err_close_tirs:
2258         for (i--; i >= 0; i--)
2259                 mlx5e_close_tir(priv, i);
2260
2261         return (err);
2262 }
2263
2264 static void
2265 mlx5e_close_tirs(struct mlx5e_priv *priv)
2266 {
2267         int i;
2268
2269         for (i = 0; i < MLX5E_NUM_TT; i++)
2270                 mlx5e_close_tir(priv, i);
2271 }
2272
2273 /*
2274  * SW MTU does not include headers,
2275  * HW MTU includes all headers and checksums.
2276  */
2277 static int
2278 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2279 {
2280         struct mlx5e_priv *priv = ifp->if_softc;
2281         struct mlx5_core_dev *mdev = priv->mdev;
2282         int hw_mtu;
2283         int err;
2284
2285         err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(sw_mtu));
2286         if (err) {
2287                 if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2288                     __func__, sw_mtu, err);
2289                 return (err);
2290         }
2291         err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2292         if (err) {
2293                 if_printf(ifp, "Query port MTU, after setting new "
2294                     "MTU value, failed\n");
2295         } else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
2296                 err = -E2BIG,
2297                 if_printf(ifp, "Port MTU %d is smaller than "
2298                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2299         } else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
2300                 err = -EINVAL;
2301                 if_printf(ifp, "Port MTU %d is bigger than "
2302                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2303         }
2304         ifp->if_mtu = sw_mtu;
2305         return (err);
2306 }
2307
2308 int
2309 mlx5e_open_locked(struct ifnet *ifp)
2310 {
2311         struct mlx5e_priv *priv = ifp->if_softc;
2312         int err;
2313         u16 set_id;
2314
2315         /* check if already opened */
2316         if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2317                 return (0);
2318
2319 #ifdef RSS
2320         if (rss_getnumbuckets() > priv->params.num_channels) {
2321                 if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2322                     "channels(%u) available\n", rss_getnumbuckets(),
2323                     priv->params.num_channels);
2324         }
2325 #endif
2326         err = mlx5e_open_tises(priv);
2327         if (err) {
2328                 if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2329                     __func__, err);
2330                 return (err);
2331         }
2332         err = mlx5_vport_alloc_q_counter(priv->mdev,
2333             MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
2334         if (err) {
2335                 if_printf(priv->ifp,
2336                     "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2337                     __func__, err);
2338                 goto err_close_tises;
2339         }
2340         /* store counter set ID */
2341         priv->counter_set_id = set_id;
2342
2343         err = mlx5e_open_channels(priv);
2344         if (err) {
2345                 if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2346                     __func__, err);
2347                 goto err_dalloc_q_counter;
2348         }
2349         err = mlx5e_open_rqt(priv);
2350         if (err) {
2351                 if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2352                     __func__, err);
2353                 goto err_close_channels;
2354         }
2355         err = mlx5e_open_tirs(priv);
2356         if (err) {
2357                 if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2358                     __func__, err);
2359                 goto err_close_rqls;
2360         }
2361         err = mlx5e_open_flow_table(priv);
2362         if (err) {
2363                 if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2364                     __func__, err);
2365                 goto err_close_tirs;
2366         }
2367         err = mlx5e_add_all_vlan_rules(priv);
2368         if (err) {
2369                 if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2370                     __func__, err);
2371                 goto err_close_flow_table;
2372         }
2373         set_bit(MLX5E_STATE_OPENED, &priv->state);
2374
2375         mlx5e_update_carrier(priv);
2376         mlx5e_set_rx_mode_core(priv);
2377
2378         return (0);
2379
2380 err_close_flow_table:
2381         mlx5e_close_flow_table(priv);
2382
2383 err_close_tirs:
2384         mlx5e_close_tirs(priv);
2385
2386 err_close_rqls:
2387         mlx5e_close_rqt(priv);
2388
2389 err_close_channels:
2390         mlx5e_close_channels(priv);
2391
2392 err_dalloc_q_counter:
2393         mlx5_vport_dealloc_q_counter(priv->mdev,
2394             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2395
2396 err_close_tises:
2397         mlx5e_close_tises(priv);
2398
2399         return (err);
2400 }
2401
2402 static void
2403 mlx5e_open(void *arg)
2404 {
2405         struct mlx5e_priv *priv = arg;
2406
2407         PRIV_LOCK(priv);
2408         if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2409                 if_printf(priv->ifp,
2410                     "%s: Setting port status to up failed\n",
2411                     __func__);
2412
2413         mlx5e_open_locked(priv->ifp);
2414         priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2415         PRIV_UNLOCK(priv);
2416 }
2417
2418 int
2419 mlx5e_close_locked(struct ifnet *ifp)
2420 {
2421         struct mlx5e_priv *priv = ifp->if_softc;
2422
2423         /* check if already closed */
2424         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2425                 return (0);
2426
2427         clear_bit(MLX5E_STATE_OPENED, &priv->state);
2428
2429         mlx5e_set_rx_mode_core(priv);
2430         mlx5e_del_all_vlan_rules(priv);
2431         if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2432         mlx5e_close_flow_table(priv);
2433         mlx5e_close_tirs(priv);
2434         mlx5e_close_rqt(priv);
2435         mlx5e_close_channels(priv);
2436         mlx5_vport_dealloc_q_counter(priv->mdev,
2437             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2438         mlx5e_close_tises(priv);
2439
2440         return (0);
2441 }
2442
2443 #if (__FreeBSD_version >= 1100000)
2444 static uint64_t
2445 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2446 {
2447         struct mlx5e_priv *priv = ifp->if_softc;
2448         u64 retval;
2449
2450         /* PRIV_LOCK(priv); XXX not allowed */
2451         switch (cnt) {
2452         case IFCOUNTER_IPACKETS:
2453                 retval = priv->stats.vport.rx_packets;
2454                 break;
2455         case IFCOUNTER_IERRORS:
2456                 retval = priv->stats.vport.rx_error_packets +
2457                     priv->stats.pport.alignment_err +
2458                     priv->stats.pport.check_seq_err +
2459                     priv->stats.pport.crc_align_errors +
2460                     priv->stats.pport.drop_events +
2461                     priv->stats.pport.in_range_len_errors +
2462                     priv->stats.pport.jabbers +
2463                     priv->stats.pport.out_of_range_len +
2464                     priv->stats.pport.oversize_pkts +
2465                     priv->stats.pport.symbol_err +
2466                     priv->stats.pport.too_long_errors +
2467                     priv->stats.pport.undersize_pkts +
2468                     priv->stats.pport.unsupported_op_rx;
2469                 break;
2470         case IFCOUNTER_IQDROPS:
2471                 retval = priv->stats.vport.rx_out_of_buffer;
2472                 break;
2473         case IFCOUNTER_OPACKETS:
2474                 retval = priv->stats.vport.tx_packets;
2475                 break;
2476         case IFCOUNTER_OERRORS:
2477                 retval = priv->stats.vport.tx_error_packets;
2478                 break;
2479         case IFCOUNTER_IBYTES:
2480                 retval = priv->stats.vport.rx_bytes;
2481                 break;
2482         case IFCOUNTER_OBYTES:
2483                 retval = priv->stats.vport.tx_bytes;
2484                 break;
2485         case IFCOUNTER_IMCASTS:
2486                 retval = priv->stats.vport.rx_multicast_packets;
2487                 break;
2488         case IFCOUNTER_OMCASTS:
2489                 retval = priv->stats.vport.tx_multicast_packets;
2490                 break;
2491         case IFCOUNTER_OQDROPS:
2492                 retval = priv->stats.vport.tx_queue_dropped;
2493                 break;
2494         case IFCOUNTER_COLLISIONS:
2495                 retval = priv->stats.pport.collisions;
2496                 break;
2497         default:
2498                 retval = if_get_counter_default(ifp, cnt);
2499                 break;
2500         }
2501         /* PRIV_UNLOCK(priv); XXX not allowed */
2502         return (retval);
2503 }
2504 #endif
2505
2506 static void
2507 mlx5e_set_rx_mode(struct ifnet *ifp)
2508 {
2509         struct mlx5e_priv *priv = ifp->if_softc;
2510
2511         schedule_work(&priv->set_rx_mode_work);
2512 }
2513
2514 static int
2515 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2516 {
2517         struct mlx5e_priv *priv;
2518         struct ifreq *ifr;
2519         struct ifi2creq i2c;
2520         int error = 0;
2521         int mask = 0;
2522         int size_read = 0;
2523         int module_num;
2524         int max_mtu;
2525         uint8_t read_addr;
2526
2527         priv = ifp->if_softc;
2528
2529         /* check if detaching */
2530         if (priv == NULL || priv->gone != 0)
2531                 return (ENXIO);
2532
2533         switch (command) {
2534         case SIOCSIFMTU:
2535                 ifr = (struct ifreq *)data;
2536
2537                 PRIV_LOCK(priv);
2538                 mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2539
2540                 if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2541                     ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2542                         int was_opened;
2543
2544                         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2545                         if (was_opened)
2546                                 mlx5e_close_locked(ifp);
2547
2548                         /* set new MTU */
2549                         mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2550
2551                         if (was_opened)
2552                                 mlx5e_open_locked(ifp);
2553                 } else {
2554                         error = EINVAL;
2555                         if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2556                             MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2557                 }
2558                 PRIV_UNLOCK(priv);
2559                 break;
2560         case SIOCSIFFLAGS:
2561                 if ((ifp->if_flags & IFF_UP) &&
2562                     (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2563                         mlx5e_set_rx_mode(ifp);
2564                         break;
2565                 }
2566                 PRIV_LOCK(priv);
2567                 if (ifp->if_flags & IFF_UP) {
2568                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2569                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2570                                         mlx5e_open_locked(ifp);
2571                                 ifp->if_drv_flags |= IFF_DRV_RUNNING;
2572                                 mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2573                         }
2574                 } else {
2575                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2576                                 mlx5_set_port_status(priv->mdev,
2577                                     MLX5_PORT_DOWN);
2578                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2579                                         mlx5e_close_locked(ifp);
2580                                 mlx5e_update_carrier(priv);
2581                                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2582                         }
2583                 }
2584                 PRIV_UNLOCK(priv);
2585                 break;
2586         case SIOCADDMULTI:
2587         case SIOCDELMULTI:
2588                 mlx5e_set_rx_mode(ifp);
2589                 break;
2590         case SIOCSIFMEDIA:
2591         case SIOCGIFMEDIA:
2592         case SIOCGIFXMEDIA:
2593                 ifr = (struct ifreq *)data;
2594                 error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2595                 break;
2596         case SIOCSIFCAP:
2597                 ifr = (struct ifreq *)data;
2598                 PRIV_LOCK(priv);
2599                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2600
2601                 if (mask & IFCAP_TXCSUM) {
2602                         ifp->if_capenable ^= IFCAP_TXCSUM;
2603                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2604
2605                         if (IFCAP_TSO4 & ifp->if_capenable &&
2606                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2607                                 ifp->if_capenable &= ~IFCAP_TSO4;
2608                                 ifp->if_hwassist &= ~CSUM_IP_TSO;
2609                                 if_printf(ifp,
2610                                     "tso4 disabled due to -txcsum.\n");
2611                         }
2612                 }
2613                 if (mask & IFCAP_TXCSUM_IPV6) {
2614                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2615                         ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2616
2617                         if (IFCAP_TSO6 & ifp->if_capenable &&
2618                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2619                                 ifp->if_capenable &= ~IFCAP_TSO6;
2620                                 ifp->if_hwassist &= ~CSUM_IP6_TSO;
2621                                 if_printf(ifp,
2622                                     "tso6 disabled due to -txcsum6.\n");
2623                         }
2624                 }
2625                 if (mask & IFCAP_RXCSUM)
2626                         ifp->if_capenable ^= IFCAP_RXCSUM;
2627                 if (mask & IFCAP_RXCSUM_IPV6)
2628                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2629                 if (mask & IFCAP_TSO4) {
2630                         if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2631                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2632                                 if_printf(ifp, "enable txcsum first.\n");
2633                                 error = EAGAIN;
2634                                 goto out;
2635                         }
2636                         ifp->if_capenable ^= IFCAP_TSO4;
2637                         ifp->if_hwassist ^= CSUM_IP_TSO;
2638                 }
2639                 if (mask & IFCAP_TSO6) {
2640                         if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2641                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2642                                 if_printf(ifp, "enable txcsum6 first.\n");
2643                                 error = EAGAIN;
2644                                 goto out;
2645                         }
2646                         ifp->if_capenable ^= IFCAP_TSO6;
2647                         ifp->if_hwassist ^= CSUM_IP6_TSO;
2648                 }
2649                 if (mask & IFCAP_VLAN_HWFILTER) {
2650                         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2651                                 mlx5e_disable_vlan_filter(priv);
2652                         else
2653                                 mlx5e_enable_vlan_filter(priv);
2654
2655                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2656                 }
2657                 if (mask & IFCAP_VLAN_HWTAGGING)
2658                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2659                 if (mask & IFCAP_WOL_MAGIC)
2660                         ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2661
2662                 VLAN_CAPABILITIES(ifp);
2663                 /* turn off LRO means also turn of HW LRO - if it's on */
2664                 if (mask & IFCAP_LRO) {
2665                         int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2666                         bool need_restart = false;
2667
2668                         ifp->if_capenable ^= IFCAP_LRO;
2669                         if (!(ifp->if_capenable & IFCAP_LRO)) {
2670                                 if (priv->params.hw_lro_en) {
2671                                         priv->params.hw_lro_en = false;
2672                                         need_restart = true;
2673                                         /* Not sure this is the correct way */
2674                                         priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2675                                 }
2676                         }
2677                         if (was_opened && need_restart) {
2678                                 mlx5e_close_locked(ifp);
2679                                 mlx5e_open_locked(ifp);
2680                         }
2681                 }
2682 out:
2683                 PRIV_UNLOCK(priv);
2684                 break;
2685
2686         case SIOCGI2C:
2687                 ifr = (struct ifreq *)data;
2688
2689                 /*
2690                  * Copy from the user-space address ifr_data to the
2691                  * kernel-space address i2c
2692                  */
2693                 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
2694                 if (error)
2695                         break;
2696
2697                 if (i2c.len > sizeof(i2c.data)) {
2698                         error = EINVAL;
2699                         break;
2700                 }
2701
2702                 PRIV_LOCK(priv);
2703                 /* Get module_num which is required for the query_eeprom */
2704                 error = mlx5_query_module_num(priv->mdev, &module_num);
2705                 if (error) {
2706                         if_printf(ifp, "Query module num failed, eeprom "
2707                             "reading is not supported\n");
2708                         error = EINVAL;
2709                         goto err_i2c;
2710                 }
2711                 /* Check if module is present before doing an access */
2712                 if (mlx5_query_module_status(priv->mdev, module_num) !=
2713                     MLX5_MODULE_STATUS_PLUGGED) {
2714                         error = EINVAL;
2715                         goto err_i2c;
2716                 }
2717                 /*
2718                  * Currently 0XA0 and 0xA2 are the only addresses permitted.
2719                  * The internal conversion is as follows:
2720                  */
2721                 if (i2c.dev_addr == 0xA0)
2722                         read_addr = MLX5E_I2C_ADDR_LOW;
2723                 else if (i2c.dev_addr == 0xA2)
2724                         read_addr = MLX5E_I2C_ADDR_HIGH;
2725                 else {
2726                         if_printf(ifp, "Query eeprom failed, "
2727                             "Invalid Address: %X\n", i2c.dev_addr);
2728                         error = EINVAL;
2729                         goto err_i2c;
2730                 }
2731                 error = mlx5_query_eeprom(priv->mdev,
2732                     read_addr, MLX5E_EEPROM_LOW_PAGE,
2733                     (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2734                     (uint32_t *)i2c.data, &size_read);
2735                 if (error) {
2736                         if_printf(ifp, "Query eeprom failed, eeprom "
2737                             "reading is not supported\n");
2738                         error = EINVAL;
2739                         goto err_i2c;
2740                 }
2741
2742                 if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2743                         error = mlx5_query_eeprom(priv->mdev,
2744                             read_addr, MLX5E_EEPROM_LOW_PAGE,
2745                             (uint32_t)(i2c.offset + size_read),
2746                             (uint32_t)(i2c.len - size_read), module_num,
2747                             (uint32_t *)(i2c.data + size_read), &size_read);
2748                 }
2749                 if (error) {
2750                         if_printf(ifp, "Query eeprom failed, eeprom "
2751                             "reading is not supported\n");
2752                         error = EINVAL;
2753                         goto err_i2c;
2754                 }
2755
2756                 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
2757 err_i2c:
2758                 PRIV_UNLOCK(priv);
2759                 break;
2760
2761         default:
2762                 error = ether_ioctl(ifp, command, data);
2763                 break;
2764         }
2765         return (error);
2766 }
2767
2768 static int
2769 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2770 {
2771         /*
2772          * TODO: uncoment once FW really sets all these bits if
2773          * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2774          * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2775          * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2776          * -ENOTSUPP;
2777          */
2778
2779         /* TODO: add more must-to-have features */
2780
2781         if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
2782                 return (-ENODEV);
2783
2784         return (0);
2785 }
2786
2787 static void
2788 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
2789     struct mlx5e_priv *priv,
2790     int num_comp_vectors)
2791 {
2792         /*
2793          * TODO: Consider link speed for setting "log_sq_size",
2794          * "log_rq_size" and "cq_moderation_xxx":
2795          */
2796         priv->params.log_sq_size =
2797             MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
2798         priv->params.log_rq_size =
2799             MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
2800         priv->params.rx_cq_moderation_usec =
2801             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
2802             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
2803             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
2804         priv->params.rx_cq_moderation_mode =
2805             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
2806         priv->params.rx_cq_moderation_pkts =
2807             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
2808         priv->params.tx_cq_moderation_usec =
2809             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
2810         priv->params.tx_cq_moderation_pkts =
2811             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
2812         priv->params.min_rx_wqes =
2813             MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
2814         priv->params.rx_hash_log_tbl_sz =
2815             (order_base_2(num_comp_vectors) >
2816             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
2817             order_base_2(num_comp_vectors) :
2818             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
2819         priv->params.num_tc = 1;
2820         priv->params.default_vlan_prio = 0;
2821         priv->counter_set_id = -1;
2822
2823         /*
2824          * hw lro is currently defaulted to off. when it won't anymore we
2825          * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
2826          */
2827         priv->params.hw_lro_en = false;
2828         priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
2829
2830         priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
2831
2832         priv->mdev = mdev;
2833         priv->params.num_channels = num_comp_vectors;
2834         priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
2835         priv->queue_mapping_channel_mask =
2836             roundup_pow_of_two(num_comp_vectors) - 1;
2837         priv->num_tc = priv->params.num_tc;
2838         priv->default_vlan_prio = priv->params.default_vlan_prio;
2839
2840         INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
2841         INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
2842         INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
2843 }
2844
2845 static int
2846 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
2847     struct mlx5_core_mr *mr)
2848 {
2849         struct ifnet *ifp = priv->ifp;
2850         struct mlx5_core_dev *mdev = priv->mdev;
2851         struct mlx5_create_mkey_mbox_in *in;
2852         int err;
2853
2854         in = mlx5_vzalloc(sizeof(*in));
2855         if (in == NULL) {
2856                 if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
2857                 return (-ENOMEM);
2858         }
2859         in->seg.flags = MLX5_PERM_LOCAL_WRITE |
2860             MLX5_PERM_LOCAL_READ |
2861             MLX5_ACCESS_MODE_PA;
2862         in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
2863         in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
2864
2865         err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL,
2866             NULL);
2867         if (err)
2868                 if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
2869                     __func__, err);
2870
2871         kvfree(in);
2872
2873         return (err);
2874 }
2875
2876 static const char *mlx5e_vport_stats_desc[] = {
2877         MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
2878 };
2879
2880 static const char *mlx5e_pport_stats_desc[] = {
2881         MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
2882 };
2883
2884 static void
2885 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
2886 {
2887         mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
2888         sx_init(&priv->state_lock, "mlx5state");
2889         callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
2890         MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
2891 }
2892
2893 static void
2894 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
2895 {
2896         mtx_destroy(&priv->async_events_mtx);
2897         sx_destroy(&priv->state_lock);
2898 }
2899
2900 static int
2901 sysctl_firmware(SYSCTL_HANDLER_ARGS)
2902 {
2903         /*
2904          * %d.%d%.d the string format.
2905          * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
2906          * We need at most 5 chars to store that.
2907          * It also has: two "." and NULL at the end, which means we need 18
2908          * (5*3 + 3) chars at most.
2909          */
2910         char fw[18];
2911         struct mlx5e_priv *priv = arg1;
2912         int error;
2913
2914         snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
2915             fw_rev_sub(priv->mdev));
2916         error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
2917         return (error);
2918 }
2919
2920 static void
2921 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
2922 {
2923         SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2924             OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
2925             sysctl_firmware, "A", "HCA firmware version");
2926
2927         SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2928             OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
2929             "Board ID");
2930 }
2931
2932 static void
2933 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
2934 {
2935 #if (__FreeBSD_version < 1100000)
2936         char path[64];
2937
2938 #endif
2939         /* Only receiving pauseframes is enabled by default */
2940         priv->params.tx_pauseframe_control = 0;
2941         priv->params.rx_pauseframe_control = 1;
2942
2943 #if (__FreeBSD_version < 1100000)
2944         /* compute path for sysctl */
2945         snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
2946             device_get_unit(priv->mdev->pdev->dev.bsddev));
2947
2948         /* try to fetch tunable, if any */
2949         TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
2950
2951         /* compute path for sysctl */
2952         snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
2953             device_get_unit(priv->mdev->pdev->dev.bsddev));
2954
2955         /* try to fetch tunable, if any */
2956         TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
2957 #endif
2958
2959         /* register pausframe SYSCTLs */
2960         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2961             OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
2962             &priv->params.tx_pauseframe_control, 0,
2963             "Set to enable TX pause frames. Clear to disable.");
2964
2965         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2966             OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
2967             &priv->params.rx_pauseframe_control, 0,
2968             "Set to enable RX pause frames. Clear to disable.");
2969
2970         /* range check */
2971         priv->params.tx_pauseframe_control =
2972             priv->params.tx_pauseframe_control ? 1 : 0;
2973         priv->params.rx_pauseframe_control =
2974             priv->params.rx_pauseframe_control ? 1 : 0;
2975
2976         /* update firmware */
2977         mlx5_set_port_pause(priv->mdev, 1,
2978             priv->params.rx_pauseframe_control,
2979             priv->params.tx_pauseframe_control);
2980 }
2981
2982 static void *
2983 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
2984 {
2985         static volatile int mlx5_en_unit;
2986         struct ifnet *ifp;
2987         struct mlx5e_priv *priv;
2988         u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
2989         struct sysctl_oid_list *child;
2990         int ncv = mdev->priv.eq_table.num_comp_vectors;
2991         char unit[16];
2992         int err;
2993         int i;
2994         u32 eth_proto_cap;
2995
2996         if (mlx5e_check_required_hca_cap(mdev)) {
2997                 mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
2998                 return (NULL);
2999         }
3000         priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
3001         mlx5e_priv_mtx_init(priv);
3002
3003         ifp = priv->ifp = if_alloc(IFT_ETHER);
3004         if (ifp == NULL) {
3005                 mlx5_core_err(mdev, "if_alloc() failed\n");
3006                 goto err_free_priv;
3007         }
3008         ifp->if_softc = priv;
3009         if_initname(ifp, "mce", atomic_fetchadd_int(&mlx5_en_unit, 1));
3010         ifp->if_mtu = ETHERMTU;
3011         ifp->if_init = mlx5e_open;
3012         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3013         ifp->if_ioctl = mlx5e_ioctl;
3014         ifp->if_transmit = mlx5e_xmit;
3015         ifp->if_qflush = if_qflush;
3016 #if (__FreeBSD_version >= 1100000)
3017         ifp->if_get_counter = mlx5e_get_counter;
3018 #endif
3019         ifp->if_snd.ifq_maxlen = ifqmaxlen;
3020         /*
3021          * Set driver features
3022          */
3023         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
3024         ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
3025         ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
3026         ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
3027         ifp->if_capabilities |= IFCAP_LRO;
3028         ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
3029         ifp->if_capabilities |= IFCAP_HWSTATS;
3030
3031         /* set TSO limits so that we don't have to drop TX packets */
3032         ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3033         ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
3034         ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
3035
3036         ifp->if_capenable = ifp->if_capabilities;
3037         ifp->if_hwassist = 0;
3038         if (ifp->if_capenable & IFCAP_TSO)
3039                 ifp->if_hwassist |= CSUM_TSO;
3040         if (ifp->if_capenable & IFCAP_TXCSUM)
3041                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
3042         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3043                 ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
3044
3045         /* ifnet sysctl tree */
3046         sysctl_ctx_init(&priv->sysctl_ctx);
3047         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
3048             OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
3049         if (priv->sysctl_ifnet == NULL) {
3050                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3051                 goto err_free_sysctl;
3052         }
3053         snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
3054         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3055             OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
3056         if (priv->sysctl_ifnet == NULL) {
3057                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3058                 goto err_free_sysctl;
3059         }
3060
3061         /* HW sysctl tree */
3062         child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
3063         priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
3064             OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
3065         if (priv->sysctl_hw == NULL) {
3066                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3067                 goto err_free_sysctl;
3068         }
3069         mlx5e_build_ifp_priv(mdev, priv, ncv);
3070         err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
3071         if (err) {
3072                 if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
3073                     __func__, err);
3074                 goto err_free_sysctl;
3075         }
3076         err = mlx5_core_alloc_pd(mdev, &priv->pdn);
3077         if (err) {
3078                 if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
3079                     __func__, err);
3080                 goto err_unmap_free_uar;
3081         }
3082         err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
3083         if (err) {
3084                 if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
3085                     __func__, err);
3086                 goto err_dealloc_pd;
3087         }
3088         err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
3089         if (err) {
3090                 if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
3091                     __func__, err);
3092                 goto err_dealloc_transport_domain;
3093         }
3094         mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
3095
3096         /* check if we should generate a random MAC address */
3097         if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3098             is_zero_ether_addr(dev_addr)) {
3099                 random_ether_addr(dev_addr);
3100                 if_printf(ifp, "Assigned random MAC address\n");
3101         }
3102
3103         /* set default MTU */
3104         mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3105
3106         /* Set desc */
3107         device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
3108
3109         /* Set default media status */
3110         priv->media_status_last = IFM_AVALID;
3111         priv->media_active_last = IFM_ETHER | IFM_AUTO |
3112             IFM_ETH_RXPAUSE | IFM_FDX;
3113
3114         /* setup default pauseframes configuration */
3115         mlx5e_setup_pauseframes(priv);
3116
3117         err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3118         if (err) {
3119                 eth_proto_cap = 0;
3120                 if_printf(ifp, "%s: Query port media capability failed, %d\n",
3121                     __func__, err);
3122         }
3123
3124         /* Setup supported medias */
3125         ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3126             mlx5e_media_change, mlx5e_media_status);
3127
3128         for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3129                 if (mlx5e_mode_table[i].baudrate == 0)
3130                         continue;
3131                 if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3132                         ifmedia_add(&priv->media,
3133                             mlx5e_mode_table[i].subtype |
3134                             IFM_ETHER, 0, NULL);
3135                         ifmedia_add(&priv->media,
3136                             mlx5e_mode_table[i].subtype |
3137                             IFM_ETHER | IFM_FDX |
3138                             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3139                 }
3140         }
3141
3142         ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3143         ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3144             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3145
3146         /* Set autoselect by default */
3147         ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3148             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3149         ether_ifattach(ifp, dev_addr);
3150
3151         /* Register for VLAN events */
3152         priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3153             mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3154         priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3155             mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3156
3157         /* Link is down by default */
3158         if_link_state_change(ifp, LINK_STATE_DOWN);
3159
3160         mlx5e_enable_async_events(priv);
3161
3162         mlx5e_add_hw_stats(priv);
3163
3164         mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3165             "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3166             priv->stats.vport.arg);
3167
3168         mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3169             "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3170             priv->stats.pport.arg);
3171
3172         mlx5e_create_ethtool(priv);
3173
3174         mtx_lock(&priv->async_events_mtx);
3175         mlx5e_update_stats(priv);
3176         mtx_unlock(&priv->async_events_mtx);
3177
3178         return (priv);
3179
3180 err_dealloc_transport_domain:
3181         mlx5_dealloc_transport_domain(mdev, priv->tdn);
3182
3183 err_dealloc_pd:
3184         mlx5_core_dealloc_pd(mdev, priv->pdn);
3185
3186 err_unmap_free_uar:
3187         mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3188
3189 err_free_sysctl:
3190         sysctl_ctx_free(&priv->sysctl_ctx);
3191
3192         if_free(ifp);
3193
3194 err_free_priv:
3195         mlx5e_priv_mtx_destroy(priv);
3196         free(priv, M_MLX5EN);
3197         return (NULL);
3198 }
3199
3200 static void
3201 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3202 {
3203         struct mlx5e_priv *priv = vpriv;
3204         struct ifnet *ifp = priv->ifp;
3205
3206         /* don't allow more IOCTLs */
3207         priv->gone = 1;
3208
3209         /*
3210          * Clear the device description to avoid use after free,
3211          * because the bsddev is not destroyed when this module is
3212          * unloaded:
3213          */
3214         device_set_desc(mdev->pdev->dev.bsddev, NULL);
3215
3216         /* XXX wait a bit to allow IOCTL handlers to complete */
3217         pause("W", hz);
3218
3219         /* stop watchdog timer */
3220         callout_drain(&priv->watchdog);
3221
3222         if (priv->vlan_attach != NULL)
3223                 EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3224         if (priv->vlan_detach != NULL)
3225                 EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3226
3227         /* make sure device gets closed */
3228         PRIV_LOCK(priv);
3229         mlx5e_close_locked(ifp);
3230         PRIV_UNLOCK(priv);
3231
3232         /* unregister device */
3233         ifmedia_removeall(&priv->media);
3234         ether_ifdetach(ifp);
3235         if_free(ifp);
3236
3237         /* destroy all remaining sysctl nodes */
3238         if (priv->sysctl_debug)
3239                 sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3240         sysctl_ctx_free(&priv->stats.vport.ctx);
3241         sysctl_ctx_free(&priv->stats.pport.ctx);
3242         sysctl_ctx_free(&priv->sysctl_ctx);
3243
3244         mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3245         mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3246         mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3247         mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3248         mlx5e_disable_async_events(priv);
3249         flush_scheduled_work();
3250         mlx5e_priv_mtx_destroy(priv);
3251         free(priv, M_MLX5EN);
3252 }
3253
3254 static void *
3255 mlx5e_get_ifp(void *vpriv)
3256 {
3257         struct mlx5e_priv *priv = vpriv;
3258
3259         return (priv->ifp);
3260 }
3261
3262 static struct mlx5_interface mlx5e_interface = {
3263         .add = mlx5e_create_ifp,
3264         .remove = mlx5e_destroy_ifp,
3265         .event = mlx5e_async_event,
3266         .protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3267         .get_dev = mlx5e_get_ifp,
3268 };
3269
3270 void
3271 mlx5e_init(void)
3272 {
3273         mlx5_register_interface(&mlx5e_interface);
3274 }
3275
3276 void
3277 mlx5e_cleanup(void)
3278 {
3279         mlx5_unregister_interface(&mlx5e_interface);
3280 }
3281
3282 module_init_order(mlx5e_init, SI_ORDER_THIRD);
3283 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3284
3285 #if (__FreeBSD_version >= 1100000)
3286 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3287 #endif
3288 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3289 MODULE_VERSION(mlx5en, 1);