]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c
MFV r323535: 8585 improve batching done in zil_commit()
[FreeBSD/FreeBSD.git] / sys / dev / mlx5 / mlx5_ib / mlx5_ib_main.c
1 /*-
2  * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27
28 #include <linux/errno.h>
29 #include <linux/pci.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/slab.h>
32 #include <linux/io-mapping.h>
33 #include <linux/sched.h>
34 #include <linux/netdevice.h>
35 #include <linux/etherdevice.h>
36 #include <linux/list.h>
37 #include <dev/mlx5/driver.h>
38 #include <dev/mlx5/vport.h>
39 #include <asm/pgtable.h>
40 #include <linux/fs.h>
41 #undef inode
42
43 #include <rdma/ib_user_verbs.h>
44 #include <rdma/ib_smi.h>
45 #include <rdma/ib_umem.h>
46 #include "user.h"
47 #include "mlx5_ib.h"
48
49 #include <sys/unistd.h>
50 #include <sys/kthread.h>
51
52 #define DRIVER_NAME "mlx5_ib"
53 #define DRIVER_VERSION "3.2-rc1"
54 #define DRIVER_RELDATE  "May 2016"
55
56 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
57 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
58 MODULE_LICENSE("Dual BSD/GPL");
59 MODULE_DEPEND(mlx5ib, linuxkpi, 1, 1, 1);
60 MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1);
61 MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1);
62 MODULE_VERSION(mlx5ib, 1);
63
64 static int deprecated_prof_sel = 2;
65 module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
66 MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
67
68 enum {
69         MLX5_STANDARD_ATOMIC_SIZE = 0x8,
70 };
71
72 struct workqueue_struct *mlx5_ib_wq;
73
74 static char mlx5_version[] =
75         DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
76         DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
77
78 static void get_atomic_caps(struct mlx5_ib_dev *dev,
79                             struct ib_device_attr *props)
80 {
81         int tmp;
82         u8 atomic_operations;
83         u8 atomic_size_qp;
84         u8 atomic_req_endianess;
85
86         atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
87         atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
88         atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev,
89                                                atomic_req_8B_endianess_mode) ||
90                                !mlx5_host_is_le();
91
92         tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
93         if (((atomic_operations & tmp) == tmp)
94             && (atomic_size_qp & 8)) {
95                 if (atomic_req_endianess) {
96                         props->atomic_cap = IB_ATOMIC_HCA;
97                 } else {
98                         props->atomic_cap = IB_ATOMIC_NONE;
99                 }
100         } else {
101                 props->atomic_cap = IB_ATOMIC_NONE;
102         }
103
104         tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD;
105         if (((atomic_operations & tmp) == tmp)
106             &&(atomic_size_qp & 8)) {
107                 if (atomic_req_endianess)
108                         props->masked_atomic_cap = IB_ATOMIC_HCA;
109                 else {
110                         props->masked_atomic_cap = IB_ATOMIC_NONE;
111                 }
112         } else {
113                 props->masked_atomic_cap = IB_ATOMIC_NONE;
114         }
115 }
116
117 static enum rdma_link_layer
118 mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
119 {
120         struct mlx5_ib_dev *dev = to_mdev(device);
121
122         switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
123         case MLX5_CAP_PORT_TYPE_IB:
124                 return IB_LINK_LAYER_INFINIBAND;
125         case MLX5_CAP_PORT_TYPE_ETH:
126                 return IB_LINK_LAYER_ETHERNET;
127         default:
128                 return IB_LINK_LAYER_UNSPECIFIED;
129         }
130 }
131
132 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
133 {
134         return !dev->mdev->issi;
135 }
136
137 enum {
138         MLX5_VPORT_ACCESS_METHOD_MAD,
139         MLX5_VPORT_ACCESS_METHOD_HCA,
140         MLX5_VPORT_ACCESS_METHOD_NIC,
141 };
142
143 static int mlx5_get_vport_access_method(struct ib_device *ibdev)
144 {
145         if (mlx5_use_mad_ifc(to_mdev(ibdev)))
146                 return MLX5_VPORT_ACCESS_METHOD_MAD;
147
148         if (mlx5_ib_port_link_layer(ibdev, 1) ==
149             IB_LINK_LAYER_ETHERNET)
150                 return MLX5_VPORT_ACCESS_METHOD_NIC;
151
152         return MLX5_VPORT_ACCESS_METHOD_HCA;
153 }
154
155 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
156                                         __be64 *sys_image_guid)
157 {
158         struct mlx5_ib_dev *dev = to_mdev(ibdev);
159         struct mlx5_core_dev *mdev = dev->mdev;
160         u64 tmp;
161         int err;
162
163         switch (mlx5_get_vport_access_method(ibdev)) {
164         case MLX5_VPORT_ACCESS_METHOD_MAD:
165                 return mlx5_query_system_image_guid_mad_ifc(ibdev,
166                                                             sys_image_guid);
167
168         case MLX5_VPORT_ACCESS_METHOD_HCA:
169                 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
170                 if (!err)
171                         *sys_image_guid = cpu_to_be64(tmp);
172                 return err;
173
174         case MLX5_VPORT_ACCESS_METHOD_NIC:
175                 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
176                 if (!err)
177                         *sys_image_guid = cpu_to_be64(tmp);
178                 return err;
179
180         default:
181                 return -EINVAL;
182         }
183 }
184
185 static int mlx5_query_max_pkeys(struct ib_device *ibdev,
186                                 u16 *max_pkeys)
187 {
188         struct mlx5_ib_dev *dev = to_mdev(ibdev);
189         struct mlx5_core_dev *mdev = dev->mdev;
190
191         switch (mlx5_get_vport_access_method(ibdev)) {
192         case MLX5_VPORT_ACCESS_METHOD_MAD:
193                 return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys);
194
195         case MLX5_VPORT_ACCESS_METHOD_HCA:
196         case MLX5_VPORT_ACCESS_METHOD_NIC:
197                 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
198                                                 pkey_table_size));
199                 return 0;
200
201         default:
202                 return -EINVAL;
203         }
204 }
205
206 static int mlx5_query_vendor_id(struct ib_device *ibdev,
207                                 u32 *vendor_id)
208 {
209         struct mlx5_ib_dev *dev = to_mdev(ibdev);
210
211         switch (mlx5_get_vport_access_method(ibdev)) {
212         case MLX5_VPORT_ACCESS_METHOD_MAD:
213                 return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id);
214
215         case MLX5_VPORT_ACCESS_METHOD_HCA:
216         case MLX5_VPORT_ACCESS_METHOD_NIC:
217                 return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
218
219         default:
220                 return -EINVAL;
221         }
222 }
223
224 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
225                                 __be64 *node_guid)
226 {
227         u64 tmp;
228         int err;
229
230         switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
231         case MLX5_VPORT_ACCESS_METHOD_MAD:
232                 return mlx5_query_node_guid_mad_ifc(dev, node_guid);
233
234         case MLX5_VPORT_ACCESS_METHOD_HCA:
235                 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
236                 if (!err)
237                         *node_guid = cpu_to_be64(tmp);
238                 return err;
239
240         case MLX5_VPORT_ACCESS_METHOD_NIC:
241                 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
242                 if (!err)
243                         *node_guid = cpu_to_be64(tmp);
244                 return err;
245
246         default:
247                 return -EINVAL;
248         }
249 }
250
251 struct mlx5_reg_node_desc {
252         u8      desc[64];
253 };
254
255 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
256 {
257         struct mlx5_reg_node_desc in;
258
259         if (mlx5_use_mad_ifc(dev))
260                 return mlx5_query_node_desc_mad_ifc(dev, node_desc);
261
262         memset(&in, 0, sizeof(in));
263
264         return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
265                                     sizeof(struct mlx5_reg_node_desc),
266                                     MLX5_REG_NODE_DESC, 0, 0);
267 }
268
269 static int mlx5_ib_query_device(struct ib_device *ibdev,
270                                 struct ib_device_attr *props)
271 {
272         struct mlx5_ib_dev *dev = to_mdev(ibdev);
273         struct mlx5_core_dev *mdev = dev->mdev;
274         int max_sq_desc;
275         int max_rq_sg;
276         int max_sq_sg;
277         int err;
278
279
280         memset(props, 0, sizeof(*props));
281
282         err = mlx5_query_system_image_guid(ibdev,
283                                            &props->sys_image_guid);
284         if (err)
285                 return err;
286
287         err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
288         if (err)
289                 return err;
290
291         err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
292         if (err)
293                 return err;
294
295         props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
296                 ((u64)fw_rev_min(dev->mdev) << 16) |
297                 fw_rev_sub(dev->mdev);
298         props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
299                 IB_DEVICE_PORT_ACTIVE_EVENT             |
300                 IB_DEVICE_SYS_IMAGE_GUID                |
301                 IB_DEVICE_RC_RNR_NAK_GEN;
302
303         if (MLX5_CAP_GEN(mdev, pkv))
304                 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
305         if (MLX5_CAP_GEN(mdev, qkv))
306                 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
307         if (MLX5_CAP_GEN(mdev, apm))
308                 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
309         props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
310         if (MLX5_CAP_GEN(mdev, xrc))
311                 props->device_cap_flags |= IB_DEVICE_XRC;
312         props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
313         if (MLX5_CAP_GEN(mdev, block_lb_mc))
314                 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
315
316         props->vendor_part_id      = mdev->pdev->device;
317         props->hw_ver              = mdev->pdev->revision;
318
319         props->max_mr_size         = ~0ull;
320         props->page_size_cap       = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1);
321         props->max_qp              = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
322         props->max_qp_wr           = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
323         max_rq_sg =  MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
324                      sizeof(struct mlx5_wqe_data_seg);
325         max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
326         max_sq_sg = (max_sq_desc -
327                      sizeof(struct mlx5_wqe_ctrl_seg) -
328                      sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg);
329         props->max_sge = min(max_rq_sg, max_sq_sg);
330         props->max_cq              = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
331         props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
332         props->max_mr              = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
333         props->max_pd              = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
334         props->max_qp_rd_atom      = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
335         props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
336         props->max_srq             = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
337         props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
338         props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
339         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
340         props->max_srq_sge         = max_rq_sg - 1;
341         props->max_fast_reg_page_list_len = (unsigned int)-1;
342         get_atomic_caps(dev, props);
343         props->max_mcast_grp       = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
344         props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
345         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
346                                            props->max_mcast_grp;
347         props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
348         props->max_ah           = INT_MAX;
349
350         return 0;
351 }
352
353 enum mlx5_ib_width {
354         MLX5_IB_WIDTH_1X        = 1 << 0,
355         MLX5_IB_WIDTH_2X        = 1 << 1,
356         MLX5_IB_WIDTH_4X        = 1 << 2,
357         MLX5_IB_WIDTH_8X        = 1 << 3,
358         MLX5_IB_WIDTH_12X       = 1 << 4
359 };
360
361 static int translate_active_width(struct ib_device *ibdev, u8 active_width,
362                                   u8 *ib_width)
363 {
364         struct mlx5_ib_dev *dev = to_mdev(ibdev);
365         int err = 0;
366
367         if (active_width & MLX5_IB_WIDTH_1X) {
368                 *ib_width = IB_WIDTH_1X;
369         } else if (active_width & MLX5_IB_WIDTH_2X) {
370                 mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n",
371                              (int)active_width);
372                 err = -EINVAL;
373         } else if (active_width & MLX5_IB_WIDTH_4X) {
374                 *ib_width = IB_WIDTH_4X;
375         } else if (active_width & MLX5_IB_WIDTH_8X) {
376                 *ib_width = IB_WIDTH_8X;
377         } else if (active_width & MLX5_IB_WIDTH_12X) {
378                 *ib_width = IB_WIDTH_12X;
379         } else {
380                 mlx5_ib_dbg(dev, "Invalid active_width %d\n",
381                             (int)active_width);
382                 err = -EINVAL;
383         }
384
385         return err;
386 }
387
388 /*
389  * TODO: Move to IB core
390  */
391 enum ib_max_vl_num {
392         __IB_MAX_VL_0           = 1,
393         __IB_MAX_VL_0_1         = 2,
394         __IB_MAX_VL_0_3         = 3,
395         __IB_MAX_VL_0_7         = 4,
396         __IB_MAX_VL_0_14        = 5,
397 };
398
399 enum mlx5_vl_hw_cap {
400         MLX5_VL_HW_0    = 1,
401         MLX5_VL_HW_0_1  = 2,
402         MLX5_VL_HW_0_2  = 3,
403         MLX5_VL_HW_0_3  = 4,
404         MLX5_VL_HW_0_4  = 5,
405         MLX5_VL_HW_0_5  = 6,
406         MLX5_VL_HW_0_6  = 7,
407         MLX5_VL_HW_0_7  = 8,
408         MLX5_VL_HW_0_14 = 15
409 };
410
411 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
412                                 u8 *max_vl_num)
413 {
414         switch (vl_hw_cap) {
415         case MLX5_VL_HW_0:
416                 *max_vl_num = __IB_MAX_VL_0;
417                 break;
418         case MLX5_VL_HW_0_1:
419                 *max_vl_num = __IB_MAX_VL_0_1;
420                 break;
421         case MLX5_VL_HW_0_3:
422                 *max_vl_num = __IB_MAX_VL_0_3;
423                 break;
424         case MLX5_VL_HW_0_7:
425                 *max_vl_num = __IB_MAX_VL_0_7;
426                 break;
427         case MLX5_VL_HW_0_14:
428                 *max_vl_num = __IB_MAX_VL_0_14;
429                 break;
430
431         default:
432                 return -EINVAL;
433         }
434
435         return 0;
436 }
437
438 static int mlx5_query_port_ib(struct ib_device *ibdev, u8 port,
439                               struct ib_port_attr *props)
440 {
441         struct mlx5_ib_dev *dev = to_mdev(ibdev);
442         struct mlx5_core_dev *mdev = dev->mdev;
443         u32 *rep;
444         int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
445         struct mlx5_ptys_reg *ptys;
446         struct mlx5_pmtu_reg *pmtu;
447         struct mlx5_pvlc_reg pvlc;
448         void *ctx;
449         int err;
450
451         rep = mlx5_vzalloc(outlen);
452         ptys = kzalloc(sizeof(*ptys), GFP_KERNEL);
453         pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL);
454         if (!rep || !ptys || !pmtu) {
455                 err = -ENOMEM;
456                 goto out;
457         }
458
459         memset(props, 0, sizeof(*props));
460
461         /* what if I am pf with dual port */
462         err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen);
463         if (err)
464                 goto out;
465
466         ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context);
467
468         props->lid              = MLX5_GET(hca_vport_context, ctx, lid);
469         props->lmc              = MLX5_GET(hca_vport_context, ctx, lmc);
470         props->sm_lid           = MLX5_GET(hca_vport_context, ctx, sm_lid);
471         props->sm_sl            = MLX5_GET(hca_vport_context, ctx, sm_sl);
472         props->state            = MLX5_GET(hca_vport_context, ctx, vport_state);
473         props->phys_state       = MLX5_GET(hca_vport_context, ctx,
474                                         port_physical_state);
475         props->port_cap_flags   = MLX5_GET(hca_vport_context, ctx, cap_mask1);
476         props->gid_tbl_len      = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
477         props->max_msg_sz       = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
478         props->pkey_tbl_len     = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
479         props->bad_pkey_cntr    = MLX5_GET(hca_vport_context, ctx,
480                                               pkey_violation_counter);
481         props->qkey_viol_cntr   = MLX5_GET(hca_vport_context, ctx,
482                                               qkey_violation_counter);
483         props->subnet_timeout   = MLX5_GET(hca_vport_context, ctx,
484                                               subnet_timeout);
485         props->init_type_reply  = MLX5_GET(hca_vport_context, ctx,
486                                            init_type_reply);
487
488         ptys->proto_mask |= MLX5_PTYS_IB;
489         ptys->local_port = port;
490         err = mlx5_core_access_ptys(mdev, ptys, 0);
491         if (err)
492                 goto out;
493
494         err = translate_active_width(ibdev, ptys->ib_link_width_oper,
495                                      &props->active_width);
496         if (err)
497                 goto out;
498
499         props->active_speed     = (u8)ptys->ib_proto_oper;
500
501         pmtu->local_port = port;
502         err = mlx5_core_access_pmtu(mdev, pmtu, 0);
503         if (err)
504                 goto out;
505
506         props->max_mtu          = pmtu->max_mtu;
507         props->active_mtu       = pmtu->oper_mtu;
508
509         memset(&pvlc, 0, sizeof(pvlc));
510         pvlc.local_port = port;
511         err = mlx5_core_access_pvlc(mdev, &pvlc, 0);
512         if (err)
513                 goto out;
514
515         err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap,
516                                    &props->max_vl_num);
517 out:
518         kvfree(rep);
519         kfree(ptys);
520         kfree(pmtu);
521         return err;
522 }
523
524 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
525                        struct ib_port_attr *props)
526 {
527         switch (mlx5_get_vport_access_method(ibdev)) {
528         case MLX5_VPORT_ACCESS_METHOD_MAD:
529                 return mlx5_query_port_mad_ifc(ibdev, port, props);
530
531         case MLX5_VPORT_ACCESS_METHOD_HCA:
532                 return mlx5_query_port_ib(ibdev, port, props);
533
534         case MLX5_VPORT_ACCESS_METHOD_NIC:
535                 return mlx5_query_port_roce(ibdev, port, props);
536
537         default:
538                 return -EINVAL;
539         }
540 }
541
542 static inline int
543 mlx5_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
544 {
545         if (dev->if_addrlen != ETH_ALEN)
546                 return -1;
547         memcpy(eui, IF_LLADDR(dev), 3);
548         memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
549
550         /* NOTE: The scope ID is added by the GID to IP conversion */
551
552         eui[3] = 0xFF;
553         eui[4] = 0xFE;
554         eui[0] ^= 2;
555         return 0;
556 }
557
558 static void
559 mlx5_make_default_gid(struct net_device *dev, union ib_gid *gid)
560 {
561         gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
562         mlx5_addrconf_ifid_eui48(&gid->raw[8], dev);
563 }
564
565 static inline int
566 mlx5_ip2gid(const struct sockaddr *addr, union ib_gid *gid)
567 {
568         switch (addr->sa_family) {
569         case AF_INET:
570                 ipv6_addr_set_v4mapped(((const struct sockaddr_in *)addr)->sin_addr.s_addr,
571                     (struct in6_addr *)gid->raw);
572                 break;
573         case AF_INET6:
574                 memcpy(gid->raw, &((const struct sockaddr_in6 *)addr)->sin6_addr, 16);
575                 /* clear SCOPE ID */
576                 gid->raw[2] = 0;
577                 gid->raw[3] = 0;
578                 break;
579         default:
580                 return -EINVAL;
581         }
582         return 0;
583 }
584
585 static void
586 mlx5_ib_roce_port_update(void *arg)
587 {
588         struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg;
589         struct mlx5_ib_dev *dev = port->dev;
590         struct mlx5_core_dev *mdev = dev->mdev;
591         struct net_device *xdev[MLX5_IB_GID_MAX];
592         struct net_device *idev;
593         struct net_device *ndev;
594         struct ifaddr *ifa;
595         union ib_gid gid_temp;
596
597         while (port->port_gone == 0) {
598                 int update = 0;
599                 int gid_index = 0;
600                 int j;
601                 int error;
602
603                 ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH);
604                 if (ndev == NULL) {
605                         pause("W", hz);
606                         continue;
607                 }
608
609                 CURVNET_SET_QUIET(ndev->if_vnet);
610
611                 memset(&gid_temp, 0, sizeof(gid_temp));
612                 mlx5_make_default_gid(ndev, &gid_temp);
613                 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
614                         port->gid_table[gid_index] = gid_temp;
615                         update = 1;
616                 }
617                 xdev[gid_index] = ndev;
618                 gid_index++;
619
620                 IFNET_RLOCK();
621                 TAILQ_FOREACH(idev, &V_ifnet, if_link) {
622                         if (idev == ndev)
623                                 break;
624                 }
625                 if (idev != NULL) {
626                     TAILQ_FOREACH(idev, &V_ifnet, if_link) {
627                         if (idev != ndev) {
628                                 if (idev->if_type != IFT_L2VLAN)
629                                         continue;
630                                 if (ndev != rdma_vlan_dev_real_dev(idev))
631                                         continue;
632                         }
633                         /* clone address information for IPv4 and IPv6 */
634                         IF_ADDR_RLOCK(idev);
635                         TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
636                                 if (ifa->ifa_addr == NULL ||
637                                     (ifa->ifa_addr->sa_family != AF_INET &&
638                                      ifa->ifa_addr->sa_family != AF_INET6) ||
639                                     gid_index >= MLX5_IB_GID_MAX)
640                                         continue;
641                                 memset(&gid_temp, 0, sizeof(gid_temp));
642                                 mlx5_ip2gid(ifa->ifa_addr, &gid_temp);
643                                 /* check for existing entry */
644                                 for (j = 0; j != gid_index; j++) {
645                                         if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0)
646                                                 break;
647                                 }
648                                 /* check if new entry must be added */
649                                 if (j == gid_index) {
650                                         if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
651                                                 port->gid_table[gid_index] = gid_temp;
652                                                 update = 1;
653                                         }
654                                         xdev[gid_index] = idev;
655                                         gid_index++;
656                                 }
657                         }
658                         IF_ADDR_RUNLOCK(idev);
659                     }
660                 }
661                 IFNET_RUNLOCK();
662                 CURVNET_RESTORE();
663
664                 if (update != 0 &&
665                     mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) {
666                         struct ib_event event = {
667                             .device = &dev->ib_dev,
668                             .element.port_num = port->port_num + 1,
669                             .event = IB_EVENT_GID_CHANGE,
670                         };
671
672                         /* add new entries, if any */
673                         for (j = 0; j != gid_index; j++) {
674                                 error = modify_gid_roce(&dev->ib_dev, port->port_num, j,
675                                     port->gid_table + j, xdev[j]);
676                                 if (error != 0)
677                                         printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error);
678                         }
679                         memset(&gid_temp, 0, sizeof(gid_temp));
680
681                         /* clear old entries, if any */
682                         for (; j != MLX5_IB_GID_MAX; j++) {
683                                 if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0)
684                                         continue;
685                                 port->gid_table[j] = gid_temp;
686                                 (void) modify_gid_roce(&dev->ib_dev, port->port_num, j,
687                                     port->gid_table + j, ndev);
688                         }
689
690                         /* make sure ibcore gets updated */
691                         ib_dispatch_event(&event);
692                 }
693                 pause("W", hz);
694         }
695         do {
696                 struct ib_event event = {
697                         .device = &dev->ib_dev,
698                         .element.port_num = port->port_num + 1,
699                         .event = IB_EVENT_GID_CHANGE,
700                 };
701                 /* make sure ibcore gets updated */
702                 ib_dispatch_event(&event);
703
704                 /* wait a bit */
705                 pause("W", hz);
706         } while (0);
707         port->port_gone = 2;
708         kthread_exit();
709 }
710
711 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
712                              union ib_gid *gid)
713 {
714         struct mlx5_ib_dev *dev = to_mdev(ibdev);
715         struct mlx5_core_dev *mdev = dev->mdev;
716
717         switch (mlx5_get_vport_access_method(ibdev)) {
718         case MLX5_VPORT_ACCESS_METHOD_MAD:
719                 return mlx5_query_gids_mad_ifc(ibdev, port, index, gid);
720
721         case MLX5_VPORT_ACCESS_METHOD_HCA:
722                 return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid);
723
724         case MLX5_VPORT_ACCESS_METHOD_NIC:
725                 if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) ||
726                     index < 0 || index >= MLX5_IB_GID_MAX ||
727                     dev->port[port - 1].port_gone != 0)
728                         memset(gid, 0, sizeof(*gid));
729                 else
730                         *gid = dev->port[port - 1].gid_table[index];
731                 return 0;
732
733         default:
734                 return -EINVAL;
735         }
736 }
737
738 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
739                               u16 *pkey)
740 {
741         struct mlx5_ib_dev *dev = to_mdev(ibdev);
742         struct mlx5_core_dev *mdev = dev->mdev;
743
744         switch (mlx5_get_vport_access_method(ibdev)) {
745         case MLX5_VPORT_ACCESS_METHOD_MAD:
746                 return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey);
747
748         case MLX5_VPORT_ACCESS_METHOD_HCA:
749         case MLX5_VPORT_ACCESS_METHOD_NIC:
750                 return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
751                                                  pkey);
752
753         default:
754                 return -EINVAL;
755         }
756 }
757
758 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
759                                  struct ib_device_modify *props)
760 {
761         struct mlx5_ib_dev *dev = to_mdev(ibdev);
762         struct mlx5_reg_node_desc in;
763         struct mlx5_reg_node_desc out;
764         int err;
765
766         if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
767                 return -EOPNOTSUPP;
768
769         if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
770                 return 0;
771
772         /*
773          * If possible, pass node desc to FW, so it can generate
774          * a 144 trap.  If cmd fails, just ignore.
775          */
776         memcpy(&in, props->node_desc, 64);
777         err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
778                                    sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
779         if (err)
780                 return err;
781
782         memcpy(ibdev->node_desc, props->node_desc, 64);
783
784         return err;
785 }
786
787 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
788                                struct ib_port_modify *props)
789 {
790         u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) ==
791                      IB_LINK_LAYER_ETHERNET);
792         struct mlx5_ib_dev *dev = to_mdev(ibdev);
793         struct ib_port_attr attr;
794         u32 tmp;
795         int err;
796
797         /* return OK if this is RoCE. CM calls ib_modify_port() regardless
798          * of whether port link layer is ETH or IB. For ETH ports, qkey
799          * violations and port capabilities are not valid.
800          */
801         if (is_eth)
802                 return 0;
803
804         mutex_lock(&dev->cap_mask_mutex);
805
806         err = mlx5_ib_query_port(ibdev, port, &attr);
807         if (err)
808                 goto out;
809
810         tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
811                 ~props->clr_port_cap_mask;
812
813         err = mlx5_set_port_caps(dev->mdev, port, tmp);
814
815 out:
816         mutex_unlock(&dev->cap_mask_mutex);
817         return err;
818 }
819
820 enum mlx5_cap_flags {
821         MLX5_CAP_COMPACT_AV = 1 << 0,
822 };
823
824 static void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev)
825 {
826         *flags |= MLX5_CAP_GEN(dev, compact_address_vector) ?
827                   MLX5_CAP_COMPACT_AV : 0;
828 }
829
830 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
831                                                   struct ib_udata *udata)
832 {
833         struct mlx5_ib_dev *dev = to_mdev(ibdev);
834         struct mlx5_ib_alloc_ucontext_req_v2 req;
835         struct mlx5_ib_alloc_ucontext_resp resp;
836         struct mlx5_ib_ucontext *context;
837         struct mlx5_uuar_info *uuari;
838         struct mlx5_uar *uars;
839         int gross_uuars;
840         int num_uars;
841         int ver;
842         int uuarn;
843         int err;
844         int i;
845         size_t reqlen;
846
847         if (!dev->ib_active)
848                 return ERR_PTR(-EAGAIN);
849
850         memset(&req, 0, sizeof(req));
851         memset(&resp, 0, sizeof(resp));
852
853         reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
854         if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
855                 ver = 0;
856         else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
857                 ver = 2;
858         else {
859                 mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", (long)reqlen);
860                 return ERR_PTR(-EINVAL);
861         }
862
863         err = ib_copy_from_udata(&req, udata, reqlen);
864         if (err) {
865                 mlx5_ib_err(dev, "copy failed\n");
866                 return ERR_PTR(err);
867         }
868
869         if (req.reserved) {
870                 mlx5_ib_err(dev, "request corrupted\n");
871                 return ERR_PTR(-EINVAL);
872         }
873
874         if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) {
875                 mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars);
876                 return ERR_PTR(-ENOMEM);
877         }
878
879         req.total_num_uuars = ALIGN(req.total_num_uuars,
880                                     MLX5_NON_FP_BF_REGS_PER_PAGE);
881         if (req.num_low_latency_uuars > req.total_num_uuars - 1) {
882                 mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n",
883                              req.total_num_uuars, req.total_num_uuars);
884                 return ERR_PTR(-EINVAL);
885         }
886
887         num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
888         gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
889         resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
890         if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
891                 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
892         resp.cache_line_size = L1_CACHE_BYTES;
893         resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
894         resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
895         resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
896         resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
897         resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
898         set_mlx5_flags(&resp.flags, dev->mdev);
899
900         if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen)
901                 resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc);
902
903         if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen)
904                 resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
905
906         context = kzalloc(sizeof(*context), GFP_KERNEL);
907         if (!context)
908                 return ERR_PTR(-ENOMEM);
909
910         uuari = &context->uuari;
911         mutex_init(&uuari->lock);
912         uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
913         if (!uars) {
914                 err = -ENOMEM;
915                 goto out_ctx;
916         }
917
918         uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
919                                 sizeof(*uuari->bitmap),
920                                 GFP_KERNEL);
921         if (!uuari->bitmap) {
922                 err = -ENOMEM;
923                 goto out_uar_ctx;
924         }
925         /*
926          * clear all fast path uuars
927          */
928         for (i = 0; i < gross_uuars; i++) {
929                 uuarn = i & 3;
930                 if (uuarn == 2 || uuarn == 3)
931                         set_bit(i, uuari->bitmap);
932         }
933
934         uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
935         if (!uuari->count) {
936                 err = -ENOMEM;
937                 goto out_bitmap;
938         }
939
940         for (i = 0; i < num_uars; i++) {
941                 err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
942                 if (err) {
943                         mlx5_ib_err(dev, "uar alloc failed at %d\n", i);
944                         goto out_uars;
945                 }
946         }
947         for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++)
948                 context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX;
949
950         INIT_LIST_HEAD(&context->db_page_list);
951         mutex_init(&context->db_page_mutex);
952
953         resp.tot_uuars = req.total_num_uuars;
954         resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
955         err = ib_copy_to_udata(udata, &resp,
956                                min_t(size_t, udata->outlen, sizeof(resp)));
957         if (err)
958                 goto out_uars;
959
960         uuari->ver = ver;
961         uuari->num_low_latency_uuars = req.num_low_latency_uuars;
962         uuari->uars = uars;
963         uuari->num_uars = num_uars;
964
965         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
966             IB_LINK_LAYER_ETHERNET) {
967                 err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn);
968                 if (err)
969                         goto out_uars;
970         }
971
972         return &context->ibucontext;
973
974 out_uars:
975         for (i--; i >= 0; i--)
976                 mlx5_cmd_free_uar(dev->mdev, uars[i].index);
977         kfree(uuari->count);
978
979 out_bitmap:
980         kfree(uuari->bitmap);
981
982 out_uar_ctx:
983         kfree(uars);
984
985 out_ctx:
986         kfree(context);
987         return ERR_PTR(err);
988 }
989
990 static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
991 {
992         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
993         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
994         struct mlx5_uuar_info *uuari = &context->uuari;
995         int i;
996
997         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
998             IB_LINK_LAYER_ETHERNET)
999                 mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
1000
1001         for (i = 0; i < uuari->num_uars; i++) {
1002                 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
1003                         mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
1004         }
1005         for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) {
1006                 if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX)
1007                         mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]);
1008         }
1009
1010         kfree(uuari->count);
1011         kfree(uuari->bitmap);
1012         kfree(uuari->uars);
1013         kfree(context);
1014
1015         return 0;
1016 }
1017
1018 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
1019 {
1020         return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
1021 }
1022
1023 static int get_command(unsigned long offset)
1024 {
1025         return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
1026 }
1027
1028 static int get_arg(unsigned long offset)
1029 {
1030         return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
1031 }
1032
1033 static int get_index(unsigned long offset)
1034 {
1035         return get_arg(offset);
1036 }
1037
1038 static int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc,
1039                     struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev,
1040                     struct mlx5_ib_ucontext *context)
1041 {
1042         unsigned long idx;
1043         phys_addr_t pfn;
1044
1045         if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
1046                 mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n",
1047                              (long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start));
1048                 return -EINVAL;
1049         }
1050
1051         idx = get_index(vma->vm_pgoff);
1052         if (idx >= uuari->num_uars) {
1053                 mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n",
1054                              idx, uuari->num_uars);
1055                 return -EINVAL;
1056         }
1057
1058         pfn = uar_index2pfn(dev, uuari->uars[idx].index);
1059         mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
1060                     (unsigned long long)pfn);
1061
1062         vma->vm_page_prot = prot;
1063         if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1064                                PAGE_SIZE, vma->vm_page_prot)) {
1065                 mlx5_ib_err(dev, "io remap failed\n");
1066                 return -EAGAIN;
1067         }
1068
1069         mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC",
1070                     (long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT);
1071
1072         return 0;
1073 }
1074
1075 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
1076 {
1077         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1078         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1079         struct mlx5_uuar_info *uuari = &context->uuari;
1080         unsigned long command;
1081
1082         command = get_command(vma->vm_pgoff);
1083         switch (command) {
1084         case MLX5_IB_MMAP_REGULAR_PAGE:
1085                 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1086                                 true,
1087                                 uuari, dev, context);
1088
1089                 break;
1090
1091         case MLX5_IB_MMAP_WC_PAGE:
1092                 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1093                                 true, uuari, dev, context);
1094                 break;
1095
1096         case MLX5_IB_MMAP_NC_PAGE:
1097                 return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot),
1098                                 false, uuari, dev, context);
1099                 break;
1100
1101         default:
1102                 return -EINVAL;
1103         }
1104
1105         return 0;
1106 }
1107
1108 static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
1109 {
1110         struct mlx5_create_mkey_mbox_in *in;
1111         struct mlx5_mkey_seg *seg;
1112         struct mlx5_core_mr mr;
1113         int err;
1114
1115         in = kzalloc(sizeof(*in), GFP_KERNEL);
1116         if (!in)
1117                 return -ENOMEM;
1118
1119         seg = &in->seg;
1120         seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
1121         seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
1122         seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1123         seg->start_addr = 0;
1124
1125         err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
1126                                     NULL, NULL, NULL);
1127         if (err) {
1128                 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
1129                 goto err_in;
1130         }
1131
1132         kfree(in);
1133         *key = mr.key;
1134
1135         return 0;
1136
1137 err_in:
1138         kfree(in);
1139
1140         return err;
1141 }
1142
1143 static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
1144 {
1145         struct mlx5_core_mr mr;
1146         int err;
1147
1148         memset(&mr, 0, sizeof(mr));
1149         mr.key = key;
1150         err = mlx5_core_destroy_mkey(dev->mdev, &mr);
1151         if (err)
1152                 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
1153 }
1154
1155 static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
1156                                       struct ib_ucontext *context,
1157                                       struct ib_udata *udata)
1158 {
1159         struct mlx5_ib_dev *dev = to_mdev(ibdev);
1160         struct mlx5_ib_alloc_pd_resp resp;
1161         struct mlx5_ib_pd *pd;
1162         int err;
1163
1164         pd = kmalloc(sizeof(*pd), GFP_KERNEL);
1165         if (!pd)
1166                 return ERR_PTR(-ENOMEM);
1167
1168         err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
1169         if (err) {
1170                 mlx5_ib_warn(dev, "pd alloc failed\n");
1171                 kfree(pd);
1172                 return ERR_PTR(err);
1173         }
1174
1175         if (context) {
1176                 resp.pdn = pd->pdn;
1177                 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
1178                         mlx5_ib_err(dev, "copy failed\n");
1179                         mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1180                         kfree(pd);
1181                         return ERR_PTR(-EFAULT);
1182                 }
1183         } else {
1184                 err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
1185                 if (err) {
1186                         mlx5_ib_err(dev, "alloc mkey failed\n");
1187                         mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1188                         kfree(pd);
1189                         return ERR_PTR(err);
1190                 }
1191         }
1192
1193         return &pd->ibpd;
1194 }
1195
1196 static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
1197 {
1198         struct mlx5_ib_dev *mdev = to_mdev(pd->device);
1199         struct mlx5_ib_pd *mpd = to_mpd(pd);
1200
1201         if (!pd->uobject)
1202                 free_pa_mkey(mdev, mpd->pa_lkey);
1203
1204         mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
1205         kfree(mpd);
1206
1207         return 0;
1208 }
1209
1210 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1211 {
1212         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1213         int err;
1214
1215         if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1216                 err = -EOPNOTSUPP;
1217         else
1218                 err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
1219         if (err)
1220                 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
1221                              ibqp->qp_num, gid->raw);
1222
1223         return err;
1224 }
1225
1226 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1227 {
1228         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1229         int err;
1230
1231         if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1232                 err = -EOPNOTSUPP;
1233         else
1234                 err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
1235         if (err)
1236                 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
1237                              ibqp->qp_num, gid->raw);
1238
1239         return err;
1240 }
1241
1242 static int init_node_data(struct mlx5_ib_dev *dev)
1243 {
1244         int err;
1245
1246         err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
1247         if (err)
1248                 return err;
1249
1250         return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
1251 }
1252
1253 static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
1254                              char *buf)
1255 {
1256         struct mlx5_ib_dev *dev =
1257                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1258
1259         return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages);
1260 }
1261
1262 static ssize_t show_reg_pages(struct device *device,
1263                               struct device_attribute *attr, char *buf)
1264 {
1265         struct mlx5_ib_dev *dev =
1266                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1267
1268         return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
1269 }
1270
1271 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1272                         char *buf)
1273 {
1274         struct mlx5_ib_dev *dev =
1275                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1276         return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
1277 }
1278
1279 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1280                            char *buf)
1281 {
1282         struct mlx5_ib_dev *dev =
1283                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1284         return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
1285                        fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
1286 }
1287
1288 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1289                         char *buf)
1290 {
1291         struct mlx5_ib_dev *dev =
1292                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1293         return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision);
1294 }
1295
1296 static ssize_t show_board(struct device *device, struct device_attribute *attr,
1297                           char *buf)
1298 {
1299         struct mlx5_ib_dev *dev =
1300                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1301         return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
1302                        dev->mdev->board_id);
1303 }
1304
1305 static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
1306 static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
1307 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
1308 static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
1309 static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
1310 static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
1311
1312 static struct device_attribute *mlx5_class_attributes[] = {
1313         &dev_attr_hw_rev,
1314         &dev_attr_fw_ver,
1315         &dev_attr_hca_type,
1316         &dev_attr_board_id,
1317         &dev_attr_fw_pages,
1318         &dev_attr_reg_pages,
1319 };
1320
1321 static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
1322 {
1323         struct mlx5_ib_qp *mqp;
1324         struct mlx5_ib_cq *send_mcq, *recv_mcq;
1325         struct mlx5_core_cq *mcq;
1326         struct list_head cq_armed_list;
1327         unsigned long flags_qp;
1328         unsigned long flags_cq;
1329         unsigned long flags;
1330
1331         mlx5_ib_warn(ibdev, " started\n");
1332         INIT_LIST_HEAD(&cq_armed_list);
1333
1334         /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
1335         spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
1336         list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
1337                 spin_lock_irqsave(&mqp->sq.lock, flags_qp);
1338                 if (mqp->sq.tail != mqp->sq.head) {
1339                         send_mcq = to_mcq(mqp->ibqp.send_cq);
1340                         spin_lock_irqsave(&send_mcq->lock, flags_cq);
1341                         if (send_mcq->mcq.comp &&
1342                             mqp->ibqp.send_cq->comp_handler) {
1343                                 if (!send_mcq->mcq.reset_notify_added) {
1344                                         send_mcq->mcq.reset_notify_added = 1;
1345                                         list_add_tail(&send_mcq->mcq.reset_notify,
1346                                                       &cq_armed_list);
1347                                 }
1348                         }
1349                         spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
1350                 }
1351                 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
1352                 spin_lock_irqsave(&mqp->rq.lock, flags_qp);
1353                 /* no handling is needed for SRQ */
1354                 if (!mqp->ibqp.srq) {
1355                         if (mqp->rq.tail != mqp->rq.head) {
1356                                 recv_mcq = to_mcq(mqp->ibqp.recv_cq);
1357                                 spin_lock_irqsave(&recv_mcq->lock, flags_cq);
1358                                 if (recv_mcq->mcq.comp &&
1359                                     mqp->ibqp.recv_cq->comp_handler) {
1360                                         if (!recv_mcq->mcq.reset_notify_added) {
1361                                                 recv_mcq->mcq.reset_notify_added = 1;
1362                                                 list_add_tail(&recv_mcq->mcq.reset_notify,
1363                                                               &cq_armed_list);
1364                                         }
1365                                 }
1366                                 spin_unlock_irqrestore(&recv_mcq->lock,
1367                                                        flags_cq);
1368                         }
1369                 }
1370                 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
1371         }
1372         /*At that point all inflight post send were put to be executed as of we
1373          * lock/unlock above locks Now need to arm all involved CQs.
1374          */
1375         list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
1376                 mcq->comp(mcq);
1377         }
1378         spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
1379         mlx5_ib_warn(ibdev, " ended\n");
1380         return;
1381 }
1382
1383 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1384                           enum mlx5_dev_event event, unsigned long param)
1385 {
1386         struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
1387         struct ib_event ibev;
1388
1389         u8 port = 0;
1390
1391         switch (event) {
1392         case MLX5_DEV_EVENT_SYS_ERROR:
1393                 ibdev->ib_active = false;
1394                 ibev.event = IB_EVENT_DEVICE_FATAL;
1395                 mlx5_ib_handle_internal_error(ibdev);
1396                 break;
1397
1398         case MLX5_DEV_EVENT_PORT_UP:
1399                 ibev.event = IB_EVENT_PORT_ACTIVE;
1400                 port = (u8)param;
1401                 break;
1402
1403         case MLX5_DEV_EVENT_PORT_DOWN:
1404         case MLX5_DEV_EVENT_PORT_INITIALIZED:
1405                 ibev.event = IB_EVENT_PORT_ERR;
1406                 port = (u8)param;
1407                 break;
1408
1409         case MLX5_DEV_EVENT_LID_CHANGE:
1410                 ibev.event = IB_EVENT_LID_CHANGE;
1411                 port = (u8)param;
1412                 break;
1413
1414         case MLX5_DEV_EVENT_PKEY_CHANGE:
1415                 ibev.event = IB_EVENT_PKEY_CHANGE;
1416                 port = (u8)param;
1417                 break;
1418
1419         case MLX5_DEV_EVENT_GUID_CHANGE:
1420                 ibev.event = IB_EVENT_GID_CHANGE;
1421                 port = (u8)param;
1422                 break;
1423
1424         case MLX5_DEV_EVENT_CLIENT_REREG:
1425                 ibev.event = IB_EVENT_CLIENT_REREGISTER;
1426                 port = (u8)param;
1427                 break;
1428
1429         default:
1430                 break;
1431         }
1432
1433         ibev.device           = &ibdev->ib_dev;
1434         ibev.element.port_num = port;
1435
1436         if ((event != MLX5_DEV_EVENT_SYS_ERROR) &&
1437             (port < 1 || port > ibdev->num_ports)) {
1438                 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
1439                 return;
1440         }
1441
1442         if (ibdev->ib_active)
1443                 ib_dispatch_event(&ibev);
1444 }
1445
1446 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1447 {
1448         int port;
1449
1450         for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
1451                 mlx5_query_ext_port_caps(dev, port);
1452 }
1453
1454 static void config_atomic_responder(struct mlx5_ib_dev *dev,
1455                                     struct ib_device_attr *props)
1456 {
1457         enum ib_atomic_cap cap = props->atomic_cap;
1458
1459 #if 0
1460         if (cap == IB_ATOMIC_HCA ||
1461             cap == IB_ATOMIC_GLOB)
1462 #endif
1463                 dev->enable_atomic_resp = 1;
1464
1465         dev->atomic_cap = cap;
1466 }
1467
1468 enum mlx5_addr_align {
1469         MLX5_ADDR_ALIGN_0       = 0,
1470         MLX5_ADDR_ALIGN_64      = 64,
1471         MLX5_ADDR_ALIGN_128     = 128,
1472 };
1473
1474 static int get_port_caps(struct mlx5_ib_dev *dev)
1475 {
1476         struct ib_device_attr *dprops = NULL;
1477         struct ib_port_attr *pprops = NULL;
1478         int err = -ENOMEM;
1479         int port;
1480
1481         pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1482         if (!pprops)
1483                 goto out;
1484
1485         dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1486         if (!dprops)
1487                 goto out;
1488
1489         err = mlx5_ib_query_device(&dev->ib_dev, dprops);
1490         if (err) {
1491                 mlx5_ib_warn(dev, "query_device failed %d\n", err);
1492                 goto out;
1493         }
1494         config_atomic_responder(dev, dprops);
1495
1496         for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
1497                 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1498                 if (err) {
1499                         mlx5_ib_warn(dev, "query_port %d failed %d\n",
1500                                      port, err);
1501                         break;
1502                 }
1503                 dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys;
1504                 dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len;
1505                 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1506                             dprops->max_pkeys, pprops->gid_tbl_len);
1507         }
1508
1509 out:
1510         kfree(pprops);
1511         kfree(dprops);
1512
1513         return err;
1514 }
1515
1516 static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1517 {
1518         int err;
1519
1520         err = mlx5_mr_cache_cleanup(dev);
1521         if (err)
1522                 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1523
1524         ib_dereg_mr(dev->umrc.mr);
1525         ib_dealloc_pd(dev->umrc.pd);
1526 }
1527
1528 enum {
1529         MAX_UMR_WR = 128,
1530 };
1531
1532 static int create_umr_res(struct mlx5_ib_dev *dev)
1533 {
1534         struct ib_pd *pd;
1535         struct ib_mr *mr;
1536         int ret;
1537
1538         pd = ib_alloc_pd(&dev->ib_dev);
1539         if (IS_ERR(pd)) {
1540                 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1541                 ret = PTR_ERR(pd);
1542                 goto error_0;
1543         }
1544
1545         mr = ib_get_dma_mr(pd,  IB_ACCESS_LOCAL_WRITE);
1546         if (IS_ERR(mr)) {
1547                 mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
1548                 ret = PTR_ERR(mr);
1549                 goto error_1;
1550         }
1551
1552         dev->umrc.mr = mr;
1553         dev->umrc.pd = pd;
1554
1555         ret = mlx5_mr_cache_init(dev);
1556         if (ret) {
1557                 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1558                 goto error_4;
1559         }
1560
1561         return 0;
1562
1563 error_4:
1564         ib_dereg_mr(mr);
1565 error_1:
1566         ib_dealloc_pd(pd);
1567 error_0:
1568         return ret;
1569 }
1570
1571 static int create_dev_resources(struct mlx5_ib_resources *devr)
1572 {
1573         struct ib_srq_init_attr attr;
1574         struct mlx5_ib_dev *dev;
1575         int ret = 0;
1576         struct ib_cq_init_attr cq_attr = { .cqe = 1 };
1577
1578         dev = container_of(devr, struct mlx5_ib_dev, devr);
1579
1580         devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1581         if (IS_ERR(devr->p0)) {
1582                 ret = PTR_ERR(devr->p0);
1583                 goto error0;
1584         }
1585         devr->p0->device  = &dev->ib_dev;
1586         devr->p0->uobject = NULL;
1587         atomic_set(&devr->p0->usecnt, 0);
1588
1589         devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
1590         if (IS_ERR(devr->c0)) {
1591                 ret = PTR_ERR(devr->c0);
1592                 goto error1;
1593         }
1594         devr->c0->device        = &dev->ib_dev;
1595         devr->c0->uobject       = NULL;
1596         devr->c0->comp_handler  = NULL;
1597         devr->c0->event_handler = NULL;
1598         devr->c0->cq_context    = NULL;
1599         atomic_set(&devr->c0->usecnt, 0);
1600
1601         devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1602         if (IS_ERR(devr->x0)) {
1603                 ret = PTR_ERR(devr->x0);
1604                 goto error2;
1605         }
1606         devr->x0->device = &dev->ib_dev;
1607         devr->x0->inode = NULL;
1608         atomic_set(&devr->x0->usecnt, 0);
1609         mutex_init(&devr->x0->tgt_qp_mutex);
1610         INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1611
1612         devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1613         if (IS_ERR(devr->x1)) {
1614                 ret = PTR_ERR(devr->x1);
1615                 goto error3;
1616         }
1617         devr->x1->device = &dev->ib_dev;
1618         devr->x1->inode = NULL;
1619         atomic_set(&devr->x1->usecnt, 0);
1620         mutex_init(&devr->x1->tgt_qp_mutex);
1621         INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1622
1623         memset(&attr, 0, sizeof(attr));
1624         attr.attr.max_sge = 1;
1625         attr.attr.max_wr = 1;
1626         attr.srq_type = IB_SRQT_XRC;
1627         attr.ext.xrc.cq = devr->c0;
1628         attr.ext.xrc.xrcd = devr->x0;
1629
1630         devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1631         if (IS_ERR(devr->s0)) {
1632                 ret = PTR_ERR(devr->s0);
1633                 goto error4;
1634         }
1635         devr->s0->device        = &dev->ib_dev;
1636         devr->s0->pd            = devr->p0;
1637         devr->s0->uobject       = NULL;
1638         devr->s0->event_handler = NULL;
1639         devr->s0->srq_context   = NULL;
1640         devr->s0->srq_type      = IB_SRQT_XRC;
1641         devr->s0->ext.xrc.xrcd  = devr->x0;
1642         devr->s0->ext.xrc.cq    = devr->c0;
1643         atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1644         atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1645         atomic_inc(&devr->p0->usecnt);
1646         atomic_set(&devr->s0->usecnt, 0);
1647
1648         memset(&attr, 0, sizeof(attr));
1649         attr.attr.max_sge = 1;
1650         attr.attr.max_wr = 1;
1651         attr.srq_type = IB_SRQT_BASIC;
1652         devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1653         if (IS_ERR(devr->s1)) {
1654                 ret = PTR_ERR(devr->s1);
1655                 goto error5;
1656         }
1657         devr->s1->device        = &dev->ib_dev;
1658         devr->s1->pd            = devr->p0;
1659         devr->s1->uobject       = NULL;
1660         devr->s1->event_handler = NULL;
1661         devr->s1->srq_context   = NULL;
1662         devr->s1->srq_type      = IB_SRQT_BASIC;
1663         devr->s1->ext.xrc.cq    = devr->c0;
1664         atomic_inc(&devr->p0->usecnt);
1665         atomic_set(&devr->s1->usecnt, 0);
1666
1667         return 0;
1668
1669 error5:
1670         mlx5_ib_destroy_srq(devr->s0);
1671 error4:
1672         mlx5_ib_dealloc_xrcd(devr->x1);
1673 error3:
1674         mlx5_ib_dealloc_xrcd(devr->x0);
1675 error2:
1676         mlx5_ib_destroy_cq(devr->c0);
1677 error1:
1678         mlx5_ib_dealloc_pd(devr->p0);
1679 error0:
1680         return ret;
1681 }
1682
1683 static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1684 {
1685         mlx5_ib_destroy_srq(devr->s1);
1686         mlx5_ib_destroy_srq(devr->s0);
1687         mlx5_ib_dealloc_xrcd(devr->x0);
1688         mlx5_ib_dealloc_xrcd(devr->x1);
1689         mlx5_ib_destroy_cq(devr->c0);
1690         mlx5_ib_dealloc_pd(devr->p0);
1691 }
1692
1693 static void enable_dc_tracer(struct mlx5_ib_dev *dev)
1694 {
1695         struct device *device = dev->ib_dev.dma_device;
1696         struct mlx5_dc_tracer *dct = &dev->dctr;
1697         int order;
1698         void *tmp;
1699         int size;
1700         int err;
1701
1702         size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096;
1703         if (size <= PAGE_SIZE)
1704                 order = 0;
1705         else
1706                 order = 1;
1707
1708         dct->pg = alloc_pages(GFP_KERNEL, order);
1709         if (!dct->pg) {
1710                 mlx5_ib_err(dev, "failed to allocate %d pages\n", order);
1711                 return;
1712         }
1713
1714         tmp = page_address(dct->pg);
1715         memset(tmp, 0xff, size);
1716
1717         dct->size = size;
1718         dct->order = order;
1719         dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE);
1720         if (dma_mapping_error(device, dct->dma)) {
1721                 mlx5_ib_err(dev, "dma mapping error\n");
1722                 goto map_err;
1723         }
1724
1725         err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma);
1726         if (err) {
1727                 mlx5_ib_warn(dev, "failed to enable DC tracer\n");
1728                 goto cmd_err;
1729         }
1730
1731         return;
1732
1733 cmd_err:
1734         dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE);
1735 map_err:
1736         __free_pages(dct->pg, dct->order);
1737         dct->pg = NULL;
1738 }
1739
1740 static void disable_dc_tracer(struct mlx5_ib_dev *dev)
1741 {
1742         struct device *device = dev->ib_dev.dma_device;
1743         struct mlx5_dc_tracer *dct = &dev->dctr;
1744         int err;
1745
1746         if (!dct->pg)
1747                 return;
1748
1749         err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma);
1750         if (err) {
1751                 mlx5_ib_warn(dev, "failed to disable DC tracer\n");
1752                 return;
1753         }
1754
1755         dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE);
1756         __free_pages(dct->pg, dct->order);
1757         dct->pg = NULL;
1758 }
1759
1760 enum {
1761         MLX5_DC_CNAK_SIZE               = 128,
1762         MLX5_NUM_BUF_IN_PAGE            = PAGE_SIZE / MLX5_DC_CNAK_SIZE,
1763         MLX5_CNAK_TX_CQ_SIGNAL_FACTOR   = 128,
1764         MLX5_DC_CNAK_SL                 = 0,
1765         MLX5_DC_CNAK_VL                 = 0,
1766 };
1767
1768 static int init_dc_improvements(struct mlx5_ib_dev *dev)
1769 {
1770         if (!mlx5_core_is_pf(dev->mdev))
1771                 return 0;
1772
1773         if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace)))
1774                 return 0;
1775
1776         enable_dc_tracer(dev);
1777
1778         return 0;
1779 }
1780
1781 static void cleanup_dc_improvements(struct mlx5_ib_dev *dev)
1782 {
1783
1784         disable_dc_tracer(dev);
1785 }
1786
1787 static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num)
1788 {
1789         mlx5_vport_dealloc_q_counter(dev->mdev,
1790                                      MLX5_INTERFACE_PROTOCOL_IB,
1791                                      dev->port[port_num].q_cnt_id);
1792         dev->port[port_num].q_cnt_id = 0;
1793 }
1794
1795 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
1796 {
1797         unsigned int i;
1798
1799         for (i = 0; i < dev->num_ports; i++)
1800                 mlx5_ib_dealloc_q_port_counter(dev, i);
1801 }
1802
1803 static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
1804 {
1805         int i;
1806         int ret;
1807
1808         for (i = 0; i < dev->num_ports; i++) {
1809                 ret = mlx5_vport_alloc_q_counter(dev->mdev,
1810                                                  MLX5_INTERFACE_PROTOCOL_IB,
1811                                                  &dev->port[i].q_cnt_id);
1812                 if (ret) {
1813                         mlx5_ib_warn(dev,
1814                                      "couldn't allocate queue counter for port %d\n",
1815                                      i + 1);
1816                         goto dealloc_counters;
1817                 }
1818         }
1819
1820         return 0;
1821
1822 dealloc_counters:
1823         while (--i >= 0)
1824                 mlx5_ib_dealloc_q_port_counter(dev, i);
1825
1826         return ret;
1827 }
1828
1829 struct port_attribute {
1830         struct attribute attr;
1831         ssize_t (*show)(struct mlx5_ib_port *,
1832                         struct port_attribute *, char *buf);
1833         ssize_t (*store)(struct mlx5_ib_port *,
1834                          struct port_attribute *,
1835                          const char *buf, size_t count);
1836 };
1837
1838 struct port_counter_attribute {
1839         struct port_attribute   attr;
1840         size_t                  offset;
1841 };
1842
1843 static ssize_t port_attr_show(struct kobject *kobj,
1844                               struct attribute *attr, char *buf)
1845 {
1846         struct port_attribute *port_attr =
1847                 container_of(attr, struct port_attribute, attr);
1848         struct mlx5_ib_port_sysfs_group *p =
1849                 container_of(kobj, struct mlx5_ib_port_sysfs_group,
1850                              kobj);
1851         struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port,
1852                                                     group);
1853
1854         if (!port_attr->show)
1855                 return -EIO;
1856
1857         return port_attr->show(mibport, port_attr, buf);
1858 }
1859
1860 static ssize_t show_port_counter(struct mlx5_ib_port *p,
1861                                  struct port_attribute *port_attr,
1862                                  char *buf)
1863 {
1864         int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
1865         struct port_counter_attribute *counter_attr =
1866                 container_of(port_attr, struct port_counter_attribute, attr);
1867         void *out;
1868         int ret;
1869
1870         out = mlx5_vzalloc(outlen);
1871         if (!out)
1872                 return -ENOMEM;
1873
1874         ret = mlx5_vport_query_q_counter(p->dev->mdev,
1875                                          p->q_cnt_id, 0,
1876                                          out, outlen);
1877         if (ret)
1878                 goto free;
1879
1880         ret = sprintf(buf, "%d\n",
1881                       be32_to_cpu(*(__be32 *)(out + counter_attr->offset)));
1882
1883 free:
1884         kfree(out);
1885         return ret;
1886 }
1887
1888 #define PORT_COUNTER_ATTR(_name)                                        \
1889 struct port_counter_attribute port_counter_attr_##_name = {             \
1890         .attr  = __ATTR(_name, S_IRUGO, show_port_counter, NULL),       \
1891         .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)             \
1892 }
1893
1894 static PORT_COUNTER_ATTR(rx_write_requests);
1895 static PORT_COUNTER_ATTR(rx_read_requests);
1896 static PORT_COUNTER_ATTR(rx_atomic_requests);
1897 static PORT_COUNTER_ATTR(rx_dct_connect);
1898 static PORT_COUNTER_ATTR(out_of_buffer);
1899 static PORT_COUNTER_ATTR(out_of_sequence);
1900 static PORT_COUNTER_ATTR(duplicate_request);
1901 static PORT_COUNTER_ATTR(rnr_nak_retry_err);
1902 static PORT_COUNTER_ATTR(packet_seq_err);
1903 static PORT_COUNTER_ATTR(implied_nak_seq_err);
1904 static PORT_COUNTER_ATTR(local_ack_timeout_err);
1905
1906 static struct attribute *counter_attrs[] = {
1907         &port_counter_attr_rx_write_requests.attr.attr,
1908         &port_counter_attr_rx_read_requests.attr.attr,
1909         &port_counter_attr_rx_atomic_requests.attr.attr,
1910         &port_counter_attr_rx_dct_connect.attr.attr,
1911         &port_counter_attr_out_of_buffer.attr.attr,
1912         &port_counter_attr_out_of_sequence.attr.attr,
1913         &port_counter_attr_duplicate_request.attr.attr,
1914         &port_counter_attr_rnr_nak_retry_err.attr.attr,
1915         &port_counter_attr_packet_seq_err.attr.attr,
1916         &port_counter_attr_implied_nak_seq_err.attr.attr,
1917         &port_counter_attr_local_ack_timeout_err.attr.attr,
1918         NULL
1919 };
1920
1921 static struct attribute_group port_counters_group = {
1922         .name  = "counters",
1923         .attrs  = counter_attrs
1924 };
1925
1926 static const struct sysfs_ops port_sysfs_ops = {
1927         .show = port_attr_show
1928 };
1929
1930 static struct kobj_type port_type = {
1931         .sysfs_ops     = &port_sysfs_ops,
1932 };
1933
1934 static int add_port_attrs(struct mlx5_ib_dev *dev,
1935                           struct kobject *parent,
1936                           struct mlx5_ib_port_sysfs_group *port,
1937                           u8 port_num)
1938 {
1939         int ret;
1940
1941         ret = kobject_init_and_add(&port->kobj, &port_type,
1942                                    parent,
1943                                    "%d", port_num);
1944         if (ret)
1945                 return ret;
1946
1947         if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1948             MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
1949                 ret = sysfs_create_group(&port->kobj, &port_counters_group);
1950                 if (ret)
1951                         goto put_kobj;
1952         }
1953
1954         port->enabled = true;
1955         return ret;
1956
1957 put_kobj:
1958         kobject_put(&port->kobj);
1959         return ret;
1960 }
1961
1962 static void destroy_ports_attrs(struct mlx5_ib_dev *dev,
1963                                 unsigned int num_ports)
1964 {
1965         unsigned int i;
1966
1967         for (i = 0; i < num_ports; i++) {
1968                 struct mlx5_ib_port_sysfs_group *port =
1969                         &dev->port[i].group;
1970
1971                 if (!port->enabled)
1972                         continue;
1973
1974                 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1975                     MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
1976                         sysfs_remove_group(&port->kobj,
1977                                            &port_counters_group);
1978                 kobject_put(&port->kobj);
1979                 port->enabled = false;
1980         }
1981
1982         if (dev->ports_parent) {
1983                 kobject_put(dev->ports_parent);
1984                 dev->ports_parent = NULL;
1985         }
1986 }
1987
1988 static int create_port_attrs(struct mlx5_ib_dev *dev)
1989 {
1990         int ret = 0;
1991         unsigned int i = 0;
1992         struct device *device = &dev->ib_dev.dev;
1993
1994         dev->ports_parent = kobject_create_and_add("mlx5_ports",
1995                                                    &device->kobj);
1996         if (!dev->ports_parent)
1997                 return -ENOMEM;
1998
1999         for (i = 0; i < dev->num_ports; i++) {
2000                 ret = add_port_attrs(dev,
2001                                      dev->ports_parent,
2002                                      &dev->port[i].group,
2003                                      i + 1);
2004
2005                 if (ret)
2006                         goto _destroy_ports_attrs;
2007         }
2008
2009         return 0;
2010
2011 _destroy_ports_attrs:
2012         destroy_ports_attrs(dev, i);
2013         return ret;
2014 }
2015
2016 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2017 {
2018         struct mlx5_ib_dev *dev;
2019         int err;
2020         int i;
2021
2022         printk_once(KERN_INFO "%s", mlx5_version);
2023
2024         dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
2025         if (!dev)
2026                 return NULL;
2027
2028         dev->mdev = mdev;
2029
2030         dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
2031                              GFP_KERNEL);
2032         if (!dev->port)
2033                 goto err_dealloc;
2034
2035         for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2036                 dev->port[i].dev = dev;
2037                 dev->port[i].port_num = i;
2038                 dev->port[i].port_gone = 0;
2039                 memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table));
2040         }
2041
2042         err = get_port_caps(dev);
2043         if (err)
2044                 goto err_free_port;
2045
2046         if (mlx5_use_mad_ifc(dev))
2047                 get_ext_port_caps(dev);
2048
2049         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2050             IB_LINK_LAYER_ETHERNET) {
2051                 if (MLX5_CAP_GEN(mdev, roce)) {
2052                         err = mlx5_nic_vport_enable_roce(mdev);
2053                         if (err)
2054                                 goto err_free_port;
2055                 } else {
2056                         goto err_free_port;
2057                 }
2058         }
2059
2060         MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
2061
2062         strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
2063         dev->ib_dev.owner               = THIS_MODULE;
2064         dev->ib_dev.node_type           = RDMA_NODE_IB_CA;
2065         dev->ib_dev.local_dma_lkey      = mdev->special_contexts.resd_lkey;
2066         dev->num_ports          = MLX5_CAP_GEN(mdev, num_ports);
2067         dev->ib_dev.phys_port_cnt     = dev->num_ports;
2068         dev->ib_dev.num_comp_vectors    =
2069                 dev->mdev->priv.eq_table.num_comp_vectors;
2070         dev->ib_dev.dma_device  = &mdev->pdev->dev;
2071
2072         dev->ib_dev.uverbs_abi_ver      = MLX5_IB_UVERBS_ABI_VERSION;
2073         dev->ib_dev.uverbs_cmd_mask     =
2074                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
2075                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
2076                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
2077                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
2078                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
2079                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
2080                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
2081                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2082                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
2083                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
2084                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
2085                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
2086                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
2087                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
2088                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
2089                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
2090                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
2091                 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
2092                 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
2093                 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
2094                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
2095                 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
2096                 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
2097
2098         dev->ib_dev.query_device        = mlx5_ib_query_device;
2099         dev->ib_dev.query_port          = mlx5_ib_query_port;
2100         dev->ib_dev.get_link_layer      = mlx5_ib_port_link_layer;
2101         dev->ib_dev.get_netdev          = mlx5_ib_get_netdev;
2102         dev->ib_dev.query_gid           = mlx5_ib_query_gid;
2103         dev->ib_dev.query_pkey          = mlx5_ib_query_pkey;
2104         dev->ib_dev.modify_device       = mlx5_ib_modify_device;
2105         dev->ib_dev.modify_port         = mlx5_ib_modify_port;
2106         dev->ib_dev.alloc_ucontext      = mlx5_ib_alloc_ucontext;
2107         dev->ib_dev.dealloc_ucontext    = mlx5_ib_dealloc_ucontext;
2108         dev->ib_dev.mmap                = mlx5_ib_mmap;
2109         dev->ib_dev.alloc_pd            = mlx5_ib_alloc_pd;
2110         dev->ib_dev.dealloc_pd          = mlx5_ib_dealloc_pd;
2111         dev->ib_dev.create_ah           = mlx5_ib_create_ah;
2112         dev->ib_dev.query_ah            = mlx5_ib_query_ah;
2113         dev->ib_dev.destroy_ah          = mlx5_ib_destroy_ah;
2114         dev->ib_dev.create_srq          = mlx5_ib_create_srq;
2115         dev->ib_dev.modify_srq          = mlx5_ib_modify_srq;
2116         dev->ib_dev.query_srq           = mlx5_ib_query_srq;
2117         dev->ib_dev.destroy_srq         = mlx5_ib_destroy_srq;
2118         dev->ib_dev.post_srq_recv       = mlx5_ib_post_srq_recv;
2119         dev->ib_dev.create_qp           = mlx5_ib_create_qp;
2120         dev->ib_dev.modify_qp           = mlx5_ib_modify_qp;
2121         dev->ib_dev.query_qp            = mlx5_ib_query_qp;
2122         dev->ib_dev.destroy_qp          = mlx5_ib_destroy_qp;
2123         dev->ib_dev.post_send           = mlx5_ib_post_send;
2124         dev->ib_dev.post_recv           = mlx5_ib_post_recv;
2125         dev->ib_dev.create_cq           = mlx5_ib_create_cq;
2126         dev->ib_dev.modify_cq           = mlx5_ib_modify_cq;
2127         dev->ib_dev.resize_cq           = mlx5_ib_resize_cq;
2128         dev->ib_dev.destroy_cq          = mlx5_ib_destroy_cq;
2129         dev->ib_dev.poll_cq             = mlx5_ib_poll_cq;
2130         dev->ib_dev.req_notify_cq       = mlx5_ib_arm_cq;
2131         dev->ib_dev.get_dma_mr          = mlx5_ib_get_dma_mr;
2132         dev->ib_dev.reg_user_mr         = mlx5_ib_reg_user_mr;
2133         dev->ib_dev.reg_phys_mr         = mlx5_ib_reg_phys_mr;
2134         dev->ib_dev.dereg_mr            = mlx5_ib_dereg_mr;
2135         dev->ib_dev.attach_mcast        = mlx5_ib_mcg_attach;
2136         dev->ib_dev.detach_mcast        = mlx5_ib_mcg_detach;
2137         dev->ib_dev.process_mad         = mlx5_ib_process_mad;
2138         dev->ib_dev.alloc_fast_reg_mr   = mlx5_ib_alloc_fast_reg_mr;
2139         dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
2140         dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
2141
2142         if (MLX5_CAP_GEN(mdev, xrc)) {
2143                 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
2144                 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
2145                 dev->ib_dev.uverbs_cmd_mask |=
2146                         (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2147                         (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2148         }
2149
2150         err = init_node_data(dev);
2151         if (err)
2152                 goto err_disable_roce;
2153
2154         mutex_init(&dev->cap_mask_mutex);
2155         INIT_LIST_HEAD(&dev->qp_list);
2156         spin_lock_init(&dev->reset_flow_resource_lock);
2157
2158         err = create_dev_resources(&dev->devr);
2159         if (err)
2160                 goto err_disable_roce;
2161
2162
2163         err = mlx5_ib_alloc_q_counters(dev);
2164         if (err)
2165                 goto err_odp;
2166
2167         err = ib_register_device(&dev->ib_dev, NULL);
2168         if (err)
2169                 goto err_q_cnt;
2170
2171         err = create_umr_res(dev);
2172         if (err)
2173                 goto err_dev;
2174
2175         if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2176             MLX5_CAP_PORT_TYPE_IB) {
2177                 if (init_dc_improvements(dev))
2178                         mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n");
2179         }
2180
2181         err = create_port_attrs(dev);
2182         if (err)
2183                 goto err_dc;
2184
2185         for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2186                 err = device_create_file(&dev->ib_dev.dev,
2187                                          mlx5_class_attributes[i]);
2188                 if (err)
2189                         goto err_port_attrs;
2190         }
2191
2192         if (1) {
2193                 struct thread *rl_thread = NULL;
2194                 struct proc *rl_proc = NULL;
2195
2196                 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2197                         (void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread,
2198                             RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i);
2199                 }
2200         }
2201
2202         dev->ib_active = true;
2203
2204         return dev;
2205
2206 err_port_attrs:
2207         destroy_ports_attrs(dev, dev->num_ports);
2208
2209 err_dc:
2210         if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2211             MLX5_CAP_PORT_TYPE_IB)
2212                 cleanup_dc_improvements(dev);
2213         destroy_umrc_res(dev);
2214
2215 err_dev:
2216         ib_unregister_device(&dev->ib_dev);
2217
2218 err_q_cnt:
2219         mlx5_ib_dealloc_q_counters(dev);
2220
2221 err_odp:
2222         destroy_dev_resources(&dev->devr);
2223
2224 err_disable_roce:
2225         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2226             IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2227                 mlx5_nic_vport_disable_roce(mdev);
2228 err_free_port:
2229         kfree(dev->port);
2230
2231 err_dealloc:
2232         ib_dealloc_device((struct ib_device *)dev);
2233
2234         return NULL;
2235 }
2236
2237 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
2238 {
2239         struct mlx5_ib_dev *dev = context;
2240         int i;
2241
2242         for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2243                 dev->port[i].port_gone = 1;
2244                 while (dev->port[i].port_gone != 2)
2245                         pause("W", hz);
2246         }
2247
2248         for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2249                 device_remove_file(&dev->ib_dev.dev,
2250                     mlx5_class_attributes[i]);
2251         }
2252
2253         destroy_ports_attrs(dev, dev->num_ports);
2254         if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2255             MLX5_CAP_PORT_TYPE_IB)
2256                 cleanup_dc_improvements(dev);
2257         mlx5_ib_dealloc_q_counters(dev);
2258         ib_unregister_device(&dev->ib_dev);
2259         destroy_umrc_res(dev);
2260         destroy_dev_resources(&dev->devr);
2261
2262         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2263             IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2264                 mlx5_nic_vport_disable_roce(mdev);
2265
2266         kfree(dev->port);
2267         ib_dealloc_device(&dev->ib_dev);
2268 }
2269
2270 static struct mlx5_interface mlx5_ib_interface = {
2271         .add            = mlx5_ib_add,
2272         .remove         = mlx5_ib_remove,
2273         .event          = mlx5_ib_event,
2274         .protocol       = MLX5_INTERFACE_PROTOCOL_IB,
2275 };
2276
2277 static int __init mlx5_ib_init(void)
2278 {
2279         int err;
2280
2281         if (deprecated_prof_sel != 2)
2282                 printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
2283
2284         err = mlx5_register_interface(&mlx5_ib_interface);
2285         if (err)
2286                 goto clean_odp;
2287
2288         mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq");
2289         if (!mlx5_ib_wq) {
2290                 printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__);
2291                 goto err_unreg;
2292         }
2293
2294         return err;
2295
2296 err_unreg:
2297         mlx5_unregister_interface(&mlx5_ib_interface);
2298
2299 clean_odp:
2300         return err;
2301 }
2302
2303 static void __exit mlx5_ib_cleanup(void)
2304 {
2305         destroy_workqueue(mlx5_ib_wq);
2306         mlx5_unregister_interface(&mlx5_ib_interface);
2307 }
2308
2309 module_init_order(mlx5_ib_init, SI_ORDER_THIRD);
2310 module_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD);