]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c
MFC r322810 and r322830:
[FreeBSD/stable/10.git] / sys / dev / mlx5 / mlx5_ib / mlx5_ib_main.c
1 /*-
2  * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27
28 #include <linux/errno.h>
29 #include <linux/pci.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/slab.h>
32 #include <linux/io-mapping.h>
33 #include <linux/sched.h>
34 #include <linux/netdevice.h>
35 #include <linux/etherdevice.h>
36 #include <net/ipv6.h>
37 #include <linux/list.h>
38 #include <dev/mlx5/driver.h>
39 #include <dev/mlx5/vport.h>
40 #include <asm/pgtable.h>
41 #include <linux/fs.h>
42 #undef inode
43
44 #include <rdma/ib_user_verbs.h>
45 #include <rdma/ib_smi.h>
46 #include <rdma/ib_umem.h>
47 #include "user.h"
48 #include "mlx5_ib.h"
49
50 #include <sys/unistd.h>
51
52 #define DRIVER_NAME "mlx5_ib"
53 #define DRIVER_VERSION "3.2-rc1"
54 #define DRIVER_RELDATE  "May 2016"
55
56 #undef MODULE_VERSION
57 #include <sys/module.h>
58
59 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
60 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
61 MODULE_LICENSE("Dual BSD/GPL");
62 MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1);
63 MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1);
64 MODULE_VERSION(mlx5ib, 1);
65
66 static int deprecated_prof_sel = 2;
67 module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
68 MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
69
70 enum {
71         MLX5_STANDARD_ATOMIC_SIZE = 0x8,
72 };
73
74 struct workqueue_struct *mlx5_ib_wq;
75
76 static char mlx5_version[] =
77         DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
78         DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
79
80 static void get_atomic_caps(struct mlx5_ib_dev *dev,
81                             struct ib_device_attr *props)
82 {
83         int tmp;
84         u8 atomic_operations;
85         u8 atomic_size_qp;
86         u8 atomic_req_endianess;
87
88         atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
89         atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
90         atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev,
91                                                atomic_req_8B_endianess_mode) ||
92                                !mlx5_host_is_le();
93
94         tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
95         if (((atomic_operations & tmp) == tmp)
96             && (atomic_size_qp & 8)) {
97                 if (atomic_req_endianess) {
98                         props->atomic_cap = IB_ATOMIC_HCA;
99                 } else {
100                         props->atomic_cap = IB_ATOMIC_NONE;
101                 }
102         } else {
103                 props->atomic_cap = IB_ATOMIC_NONE;
104         }
105
106         tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD;
107         if (((atomic_operations & tmp) == tmp)
108             &&(atomic_size_qp & 8)) {
109                 if (atomic_req_endianess)
110                         props->masked_atomic_cap = IB_ATOMIC_HCA;
111                 else {
112                         props->masked_atomic_cap = IB_ATOMIC_NONE;
113                 }
114         } else {
115                 props->masked_atomic_cap = IB_ATOMIC_NONE;
116         }
117 }
118
119 static enum rdma_link_layer
120 mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
121 {
122         struct mlx5_ib_dev *dev = to_mdev(device);
123
124         switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
125         case MLX5_CAP_PORT_TYPE_IB:
126                 return IB_LINK_LAYER_INFINIBAND;
127         case MLX5_CAP_PORT_TYPE_ETH:
128                 return IB_LINK_LAYER_ETHERNET;
129         default:
130                 return IB_LINK_LAYER_UNSPECIFIED;
131         }
132 }
133
134 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
135 {
136         return !dev->mdev->issi;
137 }
138
139 enum {
140         MLX5_VPORT_ACCESS_METHOD_MAD,
141         MLX5_VPORT_ACCESS_METHOD_HCA,
142         MLX5_VPORT_ACCESS_METHOD_NIC,
143 };
144
145 static int mlx5_get_vport_access_method(struct ib_device *ibdev)
146 {
147         if (mlx5_use_mad_ifc(to_mdev(ibdev)))
148                 return MLX5_VPORT_ACCESS_METHOD_MAD;
149
150         if (mlx5_ib_port_link_layer(ibdev, 1) ==
151             IB_LINK_LAYER_ETHERNET)
152                 return MLX5_VPORT_ACCESS_METHOD_NIC;
153
154         return MLX5_VPORT_ACCESS_METHOD_HCA;
155 }
156
157 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
158                                         __be64 *sys_image_guid)
159 {
160         struct mlx5_ib_dev *dev = to_mdev(ibdev);
161         struct mlx5_core_dev *mdev = dev->mdev;
162         u64 tmp;
163         int err;
164
165         switch (mlx5_get_vport_access_method(ibdev)) {
166         case MLX5_VPORT_ACCESS_METHOD_MAD:
167                 return mlx5_query_system_image_guid_mad_ifc(ibdev,
168                                                             sys_image_guid);
169
170         case MLX5_VPORT_ACCESS_METHOD_HCA:
171                 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
172                 if (!err)
173                         *sys_image_guid = cpu_to_be64(tmp);
174                 return err;
175
176         case MLX5_VPORT_ACCESS_METHOD_NIC:
177                 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
178                 if (!err)
179                         *sys_image_guid = cpu_to_be64(tmp);
180                 return err;
181
182         default:
183                 return -EINVAL;
184         }
185 }
186
187 static int mlx5_query_max_pkeys(struct ib_device *ibdev,
188                                 u16 *max_pkeys)
189 {
190         struct mlx5_ib_dev *dev = to_mdev(ibdev);
191         struct mlx5_core_dev *mdev = dev->mdev;
192
193         switch (mlx5_get_vport_access_method(ibdev)) {
194         case MLX5_VPORT_ACCESS_METHOD_MAD:
195                 return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys);
196
197         case MLX5_VPORT_ACCESS_METHOD_HCA:
198         case MLX5_VPORT_ACCESS_METHOD_NIC:
199                 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
200                                                 pkey_table_size));
201                 return 0;
202
203         default:
204                 return -EINVAL;
205         }
206 }
207
208 static int mlx5_query_vendor_id(struct ib_device *ibdev,
209                                 u32 *vendor_id)
210 {
211         struct mlx5_ib_dev *dev = to_mdev(ibdev);
212
213         switch (mlx5_get_vport_access_method(ibdev)) {
214         case MLX5_VPORT_ACCESS_METHOD_MAD:
215                 return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id);
216
217         case MLX5_VPORT_ACCESS_METHOD_HCA:
218         case MLX5_VPORT_ACCESS_METHOD_NIC:
219                 return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
220
221         default:
222                 return -EINVAL;
223         }
224 }
225
226 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
227                                 __be64 *node_guid)
228 {
229         u64 tmp;
230         int err;
231
232         switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
233         case MLX5_VPORT_ACCESS_METHOD_MAD:
234                 return mlx5_query_node_guid_mad_ifc(dev, node_guid);
235
236         case MLX5_VPORT_ACCESS_METHOD_HCA:
237                 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
238                 if (!err)
239                         *node_guid = cpu_to_be64(tmp);
240                 return err;
241
242         case MLX5_VPORT_ACCESS_METHOD_NIC:
243                 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
244                 if (!err)
245                         *node_guid = cpu_to_be64(tmp);
246                 return err;
247
248         default:
249                 return -EINVAL;
250         }
251 }
252
253 struct mlx5_reg_node_desc {
254         u8      desc[64];
255 };
256
257 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
258 {
259         struct mlx5_reg_node_desc in;
260
261         if (mlx5_use_mad_ifc(dev))
262                 return mlx5_query_node_desc_mad_ifc(dev, node_desc);
263
264         memset(&in, 0, sizeof(in));
265
266         return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
267                                     sizeof(struct mlx5_reg_node_desc),
268                                     MLX5_REG_NODE_DESC, 0, 0);
269 }
270
271 static int mlx5_ib_query_device(struct ib_device *ibdev,
272                                 struct ib_device_attr *props)
273 {
274         struct mlx5_ib_dev *dev = to_mdev(ibdev);
275         struct mlx5_core_dev *mdev = dev->mdev;
276         int max_sq_desc;
277         int max_rq_sg;
278         int max_sq_sg;
279         int err;
280
281
282         memset(props, 0, sizeof(*props));
283
284         err = mlx5_query_system_image_guid(ibdev,
285                                            &props->sys_image_guid);
286         if (err)
287                 return err;
288
289         err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
290         if (err)
291                 return err;
292
293         err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
294         if (err)
295                 return err;
296
297         props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
298                 ((u64)fw_rev_min(dev->mdev) << 16) |
299                 fw_rev_sub(dev->mdev);
300         props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
301                 IB_DEVICE_PORT_ACTIVE_EVENT             |
302                 IB_DEVICE_SYS_IMAGE_GUID                |
303                 IB_DEVICE_RC_RNR_NAK_GEN;
304
305         if (MLX5_CAP_GEN(mdev, pkv))
306                 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
307         if (MLX5_CAP_GEN(mdev, qkv))
308                 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
309         if (MLX5_CAP_GEN(mdev, apm))
310                 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
311         props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
312         if (MLX5_CAP_GEN(mdev, xrc))
313                 props->device_cap_flags |= IB_DEVICE_XRC;
314         props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
315         if (MLX5_CAP_GEN(mdev, block_lb_mc))
316                 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
317
318         props->vendor_part_id      = mdev->pdev->device;
319         props->hw_ver              = mdev->pdev->revision;
320
321         props->max_mr_size         = ~0ull;
322         props->page_size_cap       = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1);
323         props->max_qp              = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
324         props->max_qp_wr           = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
325         max_rq_sg =  MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
326                      sizeof(struct mlx5_wqe_data_seg);
327         max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
328         max_sq_sg = (max_sq_desc -
329                      sizeof(struct mlx5_wqe_ctrl_seg) -
330                      sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg);
331         props->max_sge = min(max_rq_sg, max_sq_sg);
332         props->max_cq              = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
333         props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
334         props->max_mr              = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
335         props->max_pd              = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
336         props->max_qp_rd_atom      = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
337         props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
338         props->max_srq             = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
339         props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
340         props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
341         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
342         props->max_srq_sge         = max_rq_sg - 1;
343         props->max_fast_reg_page_list_len = (unsigned int)-1;
344         get_atomic_caps(dev, props);
345         props->max_mcast_grp       = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
346         props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
347         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
348                                            props->max_mcast_grp;
349         props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
350         props->max_ah           = INT_MAX;
351
352         return 0;
353 }
354
355 enum mlx5_ib_width {
356         MLX5_IB_WIDTH_1X        = 1 << 0,
357         MLX5_IB_WIDTH_2X        = 1 << 1,
358         MLX5_IB_WIDTH_4X        = 1 << 2,
359         MLX5_IB_WIDTH_8X        = 1 << 3,
360         MLX5_IB_WIDTH_12X       = 1 << 4
361 };
362
363 static int translate_active_width(struct ib_device *ibdev, u8 active_width,
364                                   u8 *ib_width)
365 {
366         struct mlx5_ib_dev *dev = to_mdev(ibdev);
367         int err = 0;
368
369         if (active_width & MLX5_IB_WIDTH_1X) {
370                 *ib_width = IB_WIDTH_1X;
371         } else if (active_width & MLX5_IB_WIDTH_2X) {
372                 mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n",
373                              (int)active_width);
374                 err = -EINVAL;
375         } else if (active_width & MLX5_IB_WIDTH_4X) {
376                 *ib_width = IB_WIDTH_4X;
377         } else if (active_width & MLX5_IB_WIDTH_8X) {
378                 *ib_width = IB_WIDTH_8X;
379         } else if (active_width & MLX5_IB_WIDTH_12X) {
380                 *ib_width = IB_WIDTH_12X;
381         } else {
382                 mlx5_ib_dbg(dev, "Invalid active_width %d\n",
383                             (int)active_width);
384                 err = -EINVAL;
385         }
386
387         return err;
388 }
389
390 /*
391  * TODO: Move to IB core
392  */
393 enum ib_max_vl_num {
394         __IB_MAX_VL_0           = 1,
395         __IB_MAX_VL_0_1         = 2,
396         __IB_MAX_VL_0_3         = 3,
397         __IB_MAX_VL_0_7         = 4,
398         __IB_MAX_VL_0_14        = 5,
399 };
400
401 enum mlx5_vl_hw_cap {
402         MLX5_VL_HW_0    = 1,
403         MLX5_VL_HW_0_1  = 2,
404         MLX5_VL_HW_0_2  = 3,
405         MLX5_VL_HW_0_3  = 4,
406         MLX5_VL_HW_0_4  = 5,
407         MLX5_VL_HW_0_5  = 6,
408         MLX5_VL_HW_0_6  = 7,
409         MLX5_VL_HW_0_7  = 8,
410         MLX5_VL_HW_0_14 = 15
411 };
412
413 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
414                                 u8 *max_vl_num)
415 {
416         switch (vl_hw_cap) {
417         case MLX5_VL_HW_0:
418                 *max_vl_num = __IB_MAX_VL_0;
419                 break;
420         case MLX5_VL_HW_0_1:
421                 *max_vl_num = __IB_MAX_VL_0_1;
422                 break;
423         case MLX5_VL_HW_0_3:
424                 *max_vl_num = __IB_MAX_VL_0_3;
425                 break;
426         case MLX5_VL_HW_0_7:
427                 *max_vl_num = __IB_MAX_VL_0_7;
428                 break;
429         case MLX5_VL_HW_0_14:
430                 *max_vl_num = __IB_MAX_VL_0_14;
431                 break;
432
433         default:
434                 return -EINVAL;
435         }
436
437         return 0;
438 }
439
440 static int mlx5_query_port_ib(struct ib_device *ibdev, u8 port,
441                               struct ib_port_attr *props)
442 {
443         struct mlx5_ib_dev *dev = to_mdev(ibdev);
444         struct mlx5_core_dev *mdev = dev->mdev;
445         u32 *rep;
446         int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
447         struct mlx5_ptys_reg *ptys;
448         struct mlx5_pmtu_reg *pmtu;
449         struct mlx5_pvlc_reg pvlc;
450         void *ctx;
451         int err;
452
453         rep = mlx5_vzalloc(outlen);
454         ptys = kzalloc(sizeof(*ptys), GFP_KERNEL);
455         pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL);
456         if (!rep || !ptys || !pmtu) {
457                 err = -ENOMEM;
458                 goto out;
459         }
460
461         memset(props, 0, sizeof(*props));
462
463         /* what if I am pf with dual port */
464         err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen);
465         if (err)
466                 goto out;
467
468         ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context);
469
470         props->lid              = MLX5_GET(hca_vport_context, ctx, lid);
471         props->lmc              = MLX5_GET(hca_vport_context, ctx, lmc);
472         props->sm_lid           = MLX5_GET(hca_vport_context, ctx, sm_lid);
473         props->sm_sl            = MLX5_GET(hca_vport_context, ctx, sm_sl);
474         props->state            = MLX5_GET(hca_vport_context, ctx, vport_state);
475         props->phys_state       = MLX5_GET(hca_vport_context, ctx,
476                                         port_physical_state);
477         props->port_cap_flags   = MLX5_GET(hca_vport_context, ctx, cap_mask1);
478         props->gid_tbl_len      = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
479         props->max_msg_sz       = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
480         props->pkey_tbl_len     = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
481         props->bad_pkey_cntr    = MLX5_GET(hca_vport_context, ctx,
482                                               pkey_violation_counter);
483         props->qkey_viol_cntr   = MLX5_GET(hca_vport_context, ctx,
484                                               qkey_violation_counter);
485         props->subnet_timeout   = MLX5_GET(hca_vport_context, ctx,
486                                               subnet_timeout);
487         props->init_type_reply  = MLX5_GET(hca_vport_context, ctx,
488                                            init_type_reply);
489
490         ptys->proto_mask |= MLX5_PTYS_IB;
491         ptys->local_port = port;
492         err = mlx5_core_access_ptys(mdev, ptys, 0);
493         if (err)
494                 goto out;
495
496         err = translate_active_width(ibdev, ptys->ib_link_width_oper,
497                                      &props->active_width);
498         if (err)
499                 goto out;
500
501         props->active_speed     = (u8)ptys->ib_proto_oper;
502
503         pmtu->local_port = port;
504         err = mlx5_core_access_pmtu(mdev, pmtu, 0);
505         if (err)
506                 goto out;
507
508         props->max_mtu          = pmtu->max_mtu;
509         props->active_mtu       = pmtu->oper_mtu;
510
511         memset(&pvlc, 0, sizeof(pvlc));
512         pvlc.local_port = port;
513         err = mlx5_core_access_pvlc(mdev, &pvlc, 0);
514         if (err)
515                 goto out;
516
517         err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap,
518                                    &props->max_vl_num);
519 out:
520         kvfree(rep);
521         kfree(ptys);
522         kfree(pmtu);
523         return err;
524 }
525
526 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
527                        struct ib_port_attr *props)
528 {
529         switch (mlx5_get_vport_access_method(ibdev)) {
530         case MLX5_VPORT_ACCESS_METHOD_MAD:
531                 return mlx5_query_port_mad_ifc(ibdev, port, props);
532
533         case MLX5_VPORT_ACCESS_METHOD_HCA:
534                 return mlx5_query_port_ib(ibdev, port, props);
535
536         case MLX5_VPORT_ACCESS_METHOD_NIC:
537                 return mlx5_query_port_roce(ibdev, port, props);
538
539         default:
540                 return -EINVAL;
541         }
542 }
543
544 static inline int
545 mlx5_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
546 {
547         if (dev->if_addrlen != ETH_ALEN)
548                 return -1;
549         memcpy(eui, IF_LLADDR(dev), 3);
550         memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
551
552         /* NOTE: The scope ID is added by the GID to IP conversion */
553
554         eui[3] = 0xFF;
555         eui[4] = 0xFE;
556         eui[0] ^= 2;
557         return 0;
558 }
559
560 static void
561 mlx5_make_default_gid(struct net_device *dev, union ib_gid *gid)
562 {
563         gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
564         mlx5_addrconf_ifid_eui48(&gid->raw[8], dev);
565 }
566
567 static inline int
568 mlx5_ip2gid(const struct sockaddr *addr, union ib_gid *gid)
569 {
570         switch (addr->sa_family) {
571         case AF_INET:
572                 ipv6_addr_set_v4mapped(((const struct sockaddr_in *)addr)->sin_addr.s_addr,
573                     (struct in6_addr *)gid->raw);
574                 break;
575         case AF_INET6:
576                 memcpy(gid->raw, &((const struct sockaddr_in6 *)addr)->sin6_addr, 16);
577                 /* clear SCOPE ID */
578                 gid->raw[2] = 0;
579                 gid->raw[3] = 0;
580                 break;
581         default:
582                 return -EINVAL;
583         }
584         return 0;
585 }
586
587 static void
588 mlx5_ib_roce_port_update(void *arg)
589 {
590         struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg;
591         struct mlx5_ib_dev *dev = port->dev;
592         struct mlx5_core_dev *mdev = dev->mdev;
593         struct net_device *xdev[MLX5_IB_GID_MAX];
594         struct net_device *idev;
595         struct net_device *ndev;
596         struct ifaddr *ifa;
597         union ib_gid gid_temp;
598
599         while (port->port_gone == 0) {
600                 int update = 0;
601                 int gid_index = 0;
602                 int j;
603                 int error;
604
605                 ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH);
606                 if (ndev == NULL) {
607                         pause("W", hz);
608                         continue;
609                 }
610
611                 CURVNET_SET_QUIET(ndev->if_vnet);
612
613                 memset(&gid_temp, 0, sizeof(gid_temp));
614                 mlx5_make_default_gid(ndev, &gid_temp);
615                 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
616                         port->gid_table[gid_index] = gid_temp;
617                         update = 1;
618                 }
619                 xdev[gid_index] = ndev;
620                 gid_index++;
621
622                 IFNET_RLOCK();
623                 TAILQ_FOREACH(idev, &V_ifnet, if_link) {
624                         if (idev == ndev)
625                                 break;
626                 }
627                 if (idev != NULL) {
628                     TAILQ_FOREACH(idev, &V_ifnet, if_link) {
629                         if (idev != ndev) {
630                                 if (idev->if_type != IFT_L2VLAN)
631                                         continue;
632                                 if (ndev != rdma_vlan_dev_real_dev(idev))
633                                         continue;
634                         }
635                         /* clone address information for IPv4 and IPv6 */
636                         IF_ADDR_RLOCK(idev);
637                         TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
638                                 if (ifa->ifa_addr == NULL ||
639                                     (ifa->ifa_addr->sa_family != AF_INET &&
640                                      ifa->ifa_addr->sa_family != AF_INET6) ||
641                                     gid_index >= MLX5_IB_GID_MAX)
642                                         continue;
643                                 memset(&gid_temp, 0, sizeof(gid_temp));
644                                 mlx5_ip2gid(ifa->ifa_addr, &gid_temp);
645                                 /* check for existing entry */
646                                 for (j = 0; j != gid_index; j++) {
647                                         if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0)
648                                                 break;
649                                 }
650                                 /* check if new entry must be added */
651                                 if (j == gid_index) {
652                                         if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
653                                                 port->gid_table[gid_index] = gid_temp;
654                                                 update = 1;
655                                         }
656                                         xdev[gid_index] = idev;
657                                         gid_index++;
658                                 }
659                         }
660                         IF_ADDR_RUNLOCK(idev);
661                     }
662                 }
663                 IFNET_RUNLOCK();
664                 CURVNET_RESTORE();
665
666                 if (update != 0 &&
667                     mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) {
668                         struct ib_event event = {
669                             .device = &dev->ib_dev,
670                             .element.port_num = port->port_num + 1,
671                             .event = IB_EVENT_GID_CHANGE,
672                         };
673
674                         /* add new entries, if any */
675                         for (j = 0; j != gid_index; j++) {
676                                 error = modify_gid_roce(&dev->ib_dev, port->port_num, j,
677                                     port->gid_table + j, xdev[j]);
678                                 if (error != 0)
679                                         printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error);
680                         }
681                         memset(&gid_temp, 0, sizeof(gid_temp));
682
683                         /* clear old entries, if any */
684                         for (; j != MLX5_IB_GID_MAX; j++) {
685                                 if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0)
686                                         continue;
687                                 port->gid_table[j] = gid_temp;
688                                 (void) modify_gid_roce(&dev->ib_dev, port->port_num, j,
689                                     port->gid_table + j, ndev);
690                         }
691
692                         /* make sure ibcore gets updated */
693                         ib_dispatch_event(&event);
694                 }
695                 pause("W", hz);
696         }
697         do {
698                 struct ib_event event = {
699                         .device = &dev->ib_dev,
700                         .element.port_num = port->port_num + 1,
701                         .event = IB_EVENT_GID_CHANGE,
702                 };
703                 /* make sure ibcore gets updated */
704                 ib_dispatch_event(&event);
705
706                 /* wait a bit */
707                 pause("W", hz);
708         } while (0);
709         port->port_gone = 2;
710         kthread_exit();
711 }
712
713 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
714                              union ib_gid *gid)
715 {
716         struct mlx5_ib_dev *dev = to_mdev(ibdev);
717         struct mlx5_core_dev *mdev = dev->mdev;
718
719         switch (mlx5_get_vport_access_method(ibdev)) {
720         case MLX5_VPORT_ACCESS_METHOD_MAD:
721                 return mlx5_query_gids_mad_ifc(ibdev, port, index, gid);
722
723         case MLX5_VPORT_ACCESS_METHOD_HCA:
724                 return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid);
725
726         case MLX5_VPORT_ACCESS_METHOD_NIC:
727                 if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) ||
728                     index < 0 || index >= MLX5_IB_GID_MAX ||
729                     dev->port[port - 1].port_gone != 0)
730                         memset(gid, 0, sizeof(*gid));
731                 else
732                         *gid = dev->port[port - 1].gid_table[index];
733                 return 0;
734
735         default:
736                 return -EINVAL;
737         }
738 }
739
740 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
741                               u16 *pkey)
742 {
743         struct mlx5_ib_dev *dev = to_mdev(ibdev);
744         struct mlx5_core_dev *mdev = dev->mdev;
745
746         switch (mlx5_get_vport_access_method(ibdev)) {
747         case MLX5_VPORT_ACCESS_METHOD_MAD:
748                 return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey);
749
750         case MLX5_VPORT_ACCESS_METHOD_HCA:
751         case MLX5_VPORT_ACCESS_METHOD_NIC:
752                 return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
753                                                  pkey);
754
755         default:
756                 return -EINVAL;
757         }
758 }
759
760 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
761                                  struct ib_device_modify *props)
762 {
763         struct mlx5_ib_dev *dev = to_mdev(ibdev);
764         struct mlx5_reg_node_desc in;
765         struct mlx5_reg_node_desc out;
766         int err;
767
768         if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
769                 return -EOPNOTSUPP;
770
771         if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
772                 return 0;
773
774         /*
775          * If possible, pass node desc to FW, so it can generate
776          * a 144 trap.  If cmd fails, just ignore.
777          */
778         memcpy(&in, props->node_desc, 64);
779         err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
780                                    sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
781         if (err)
782                 return err;
783
784         memcpy(ibdev->node_desc, props->node_desc, 64);
785
786         return err;
787 }
788
789 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
790                                struct ib_port_modify *props)
791 {
792         u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) ==
793                      IB_LINK_LAYER_ETHERNET);
794         struct mlx5_ib_dev *dev = to_mdev(ibdev);
795         struct ib_port_attr attr;
796         u32 tmp;
797         int err;
798
799         /* return OK if this is RoCE. CM calls ib_modify_port() regardless
800          * of whether port link layer is ETH or IB. For ETH ports, qkey
801          * violations and port capabilities are not valid.
802          */
803         if (is_eth)
804                 return 0;
805
806         mutex_lock(&dev->cap_mask_mutex);
807
808         err = mlx5_ib_query_port(ibdev, port, &attr);
809         if (err)
810                 goto out;
811
812         tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
813                 ~props->clr_port_cap_mask;
814
815         err = mlx5_set_port_caps(dev->mdev, port, tmp);
816
817 out:
818         mutex_unlock(&dev->cap_mask_mutex);
819         return err;
820 }
821
822 enum mlx5_cap_flags {
823         MLX5_CAP_COMPACT_AV = 1 << 0,
824 };
825
826 static void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev)
827 {
828         *flags |= MLX5_CAP_GEN(dev, compact_address_vector) ?
829                   MLX5_CAP_COMPACT_AV : 0;
830 }
831
832 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
833                                                   struct ib_udata *udata)
834 {
835         struct mlx5_ib_dev *dev = to_mdev(ibdev);
836         struct mlx5_ib_alloc_ucontext_req_v2 req;
837         struct mlx5_ib_alloc_ucontext_resp resp;
838         struct mlx5_ib_ucontext *context;
839         struct mlx5_uuar_info *uuari;
840         struct mlx5_uar *uars;
841         int gross_uuars;
842         int num_uars;
843         int ver;
844         int uuarn;
845         int err;
846         int i;
847         size_t reqlen;
848
849         if (!dev->ib_active)
850                 return ERR_PTR(-EAGAIN);
851
852         memset(&req, 0, sizeof(req));
853         memset(&resp, 0, sizeof(resp));
854
855         reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
856         if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
857                 ver = 0;
858         else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
859                 ver = 2;
860         else {
861                 mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", (long)reqlen);
862                 return ERR_PTR(-EINVAL);
863         }
864
865         err = ib_copy_from_udata(&req, udata, reqlen);
866         if (err) {
867                 mlx5_ib_err(dev, "copy failed\n");
868                 return ERR_PTR(err);
869         }
870
871         if (req.reserved) {
872                 mlx5_ib_err(dev, "request corrupted\n");
873                 return ERR_PTR(-EINVAL);
874         }
875
876         if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) {
877                 mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars);
878                 return ERR_PTR(-ENOMEM);
879         }
880
881         req.total_num_uuars = ALIGN(req.total_num_uuars,
882                                     MLX5_NON_FP_BF_REGS_PER_PAGE);
883         if (req.num_low_latency_uuars > req.total_num_uuars - 1) {
884                 mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n",
885                              req.total_num_uuars, req.total_num_uuars);
886                 return ERR_PTR(-EINVAL);
887         }
888
889         num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
890         gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
891         resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
892         if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
893                 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
894         resp.cache_line_size = L1_CACHE_BYTES;
895         resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
896         resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
897         resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
898         resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
899         resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
900         set_mlx5_flags(&resp.flags, dev->mdev);
901
902         if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen)
903                 resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc);
904
905         if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen)
906                 resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
907
908         context = kzalloc(sizeof(*context), GFP_KERNEL);
909         if (!context)
910                 return ERR_PTR(-ENOMEM);
911
912         uuari = &context->uuari;
913         mutex_init(&uuari->lock);
914         uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
915         if (!uars) {
916                 err = -ENOMEM;
917                 goto out_ctx;
918         }
919
920         uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
921                                 sizeof(*uuari->bitmap),
922                                 GFP_KERNEL);
923         if (!uuari->bitmap) {
924                 err = -ENOMEM;
925                 goto out_uar_ctx;
926         }
927         /*
928          * clear all fast path uuars
929          */
930         for (i = 0; i < gross_uuars; i++) {
931                 uuarn = i & 3;
932                 if (uuarn == 2 || uuarn == 3)
933                         set_bit(i, uuari->bitmap);
934         }
935
936         uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
937         if (!uuari->count) {
938                 err = -ENOMEM;
939                 goto out_bitmap;
940         }
941
942         for (i = 0; i < num_uars; i++) {
943                 err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
944                 if (err) {
945                         mlx5_ib_err(dev, "uar alloc failed at %d\n", i);
946                         goto out_uars;
947                 }
948         }
949         for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++)
950                 context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX;
951
952         INIT_LIST_HEAD(&context->db_page_list);
953         mutex_init(&context->db_page_mutex);
954
955         resp.tot_uuars = req.total_num_uuars;
956         resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
957         err = ib_copy_to_udata(udata, &resp,
958                                min_t(size_t, udata->outlen, sizeof(resp)));
959         if (err)
960                 goto out_uars;
961
962         uuari->ver = ver;
963         uuari->num_low_latency_uuars = req.num_low_latency_uuars;
964         uuari->uars = uars;
965         uuari->num_uars = num_uars;
966
967         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
968             IB_LINK_LAYER_ETHERNET) {
969                 err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn);
970                 if (err)
971                         goto out_uars;
972         }
973
974         return &context->ibucontext;
975
976 out_uars:
977         for (i--; i >= 0; i--)
978                 mlx5_cmd_free_uar(dev->mdev, uars[i].index);
979         kfree(uuari->count);
980
981 out_bitmap:
982         kfree(uuari->bitmap);
983
984 out_uar_ctx:
985         kfree(uars);
986
987 out_ctx:
988         kfree(context);
989         return ERR_PTR(err);
990 }
991
992 static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
993 {
994         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
995         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
996         struct mlx5_uuar_info *uuari = &context->uuari;
997         int i;
998
999         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
1000             IB_LINK_LAYER_ETHERNET)
1001                 mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
1002
1003         for (i = 0; i < uuari->num_uars; i++) {
1004                 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
1005                         mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
1006         }
1007         for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) {
1008                 if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX)
1009                         mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]);
1010         }
1011
1012         kfree(uuari->count);
1013         kfree(uuari->bitmap);
1014         kfree(uuari->uars);
1015         kfree(context);
1016
1017         return 0;
1018 }
1019
1020 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
1021 {
1022         return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
1023 }
1024
1025 static int get_command(unsigned long offset)
1026 {
1027         return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
1028 }
1029
1030 static int get_arg(unsigned long offset)
1031 {
1032         return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
1033 }
1034
1035 static int get_index(unsigned long offset)
1036 {
1037         return get_arg(offset);
1038 }
1039
1040 static int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc,
1041                     struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev,
1042                     struct mlx5_ib_ucontext *context)
1043 {
1044         unsigned long idx;
1045         phys_addr_t pfn;
1046
1047         if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
1048                 mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n",
1049                              (long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start));
1050                 return -EINVAL;
1051         }
1052
1053         idx = get_index(vma->vm_pgoff);
1054         if (idx >= uuari->num_uars) {
1055                 mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n",
1056                              idx, uuari->num_uars);
1057                 return -EINVAL;
1058         }
1059
1060         pfn = uar_index2pfn(dev, uuari->uars[idx].index);
1061         mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
1062                     (unsigned long long)pfn);
1063
1064         vma->vm_page_prot = prot;
1065         if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1066                                PAGE_SIZE, vma->vm_page_prot)) {
1067                 mlx5_ib_err(dev, "io remap failed\n");
1068                 return -EAGAIN;
1069         }
1070
1071         mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC",
1072                     (long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT);
1073
1074         return 0;
1075 }
1076
1077 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
1078 {
1079         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1080         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1081         struct mlx5_uuar_info *uuari = &context->uuari;
1082         unsigned long command;
1083
1084         command = get_command(vma->vm_pgoff);
1085         switch (command) {
1086         case MLX5_IB_MMAP_REGULAR_PAGE:
1087                 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1088                                 true,
1089                                 uuari, dev, context);
1090
1091                 break;
1092
1093         case MLX5_IB_MMAP_WC_PAGE:
1094                 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1095                                 true, uuari, dev, context);
1096                 break;
1097
1098         case MLX5_IB_MMAP_NC_PAGE:
1099                 return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot),
1100                                 false, uuari, dev, context);
1101                 break;
1102
1103         default:
1104                 return -EINVAL;
1105         }
1106
1107         return 0;
1108 }
1109
1110 static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
1111 {
1112         struct mlx5_create_mkey_mbox_in *in;
1113         struct mlx5_mkey_seg *seg;
1114         struct mlx5_core_mr mr;
1115         int err;
1116
1117         in = kzalloc(sizeof(*in), GFP_KERNEL);
1118         if (!in)
1119                 return -ENOMEM;
1120
1121         seg = &in->seg;
1122         seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
1123         seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
1124         seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1125         seg->start_addr = 0;
1126
1127         err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
1128                                     NULL, NULL, NULL);
1129         if (err) {
1130                 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
1131                 goto err_in;
1132         }
1133
1134         kfree(in);
1135         *key = mr.key;
1136
1137         return 0;
1138
1139 err_in:
1140         kfree(in);
1141
1142         return err;
1143 }
1144
1145 static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
1146 {
1147         struct mlx5_core_mr mr;
1148         int err;
1149
1150         memset(&mr, 0, sizeof(mr));
1151         mr.key = key;
1152         err = mlx5_core_destroy_mkey(dev->mdev, &mr);
1153         if (err)
1154                 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
1155 }
1156
1157 static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
1158                                       struct ib_ucontext *context,
1159                                       struct ib_udata *udata)
1160 {
1161         struct mlx5_ib_dev *dev = to_mdev(ibdev);
1162         struct mlx5_ib_alloc_pd_resp resp;
1163         struct mlx5_ib_pd *pd;
1164         int err;
1165
1166         pd = kmalloc(sizeof(*pd), GFP_KERNEL);
1167         if (!pd)
1168                 return ERR_PTR(-ENOMEM);
1169
1170         err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
1171         if (err) {
1172                 mlx5_ib_warn(dev, "pd alloc failed\n");
1173                 kfree(pd);
1174                 return ERR_PTR(err);
1175         }
1176
1177         if (context) {
1178                 resp.pdn = pd->pdn;
1179                 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
1180                         mlx5_ib_err(dev, "copy failed\n");
1181                         mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1182                         kfree(pd);
1183                         return ERR_PTR(-EFAULT);
1184                 }
1185         } else {
1186                 err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
1187                 if (err) {
1188                         mlx5_ib_err(dev, "alloc mkey failed\n");
1189                         mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1190                         kfree(pd);
1191                         return ERR_PTR(err);
1192                 }
1193         }
1194
1195         return &pd->ibpd;
1196 }
1197
1198 static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
1199 {
1200         struct mlx5_ib_dev *mdev = to_mdev(pd->device);
1201         struct mlx5_ib_pd *mpd = to_mpd(pd);
1202
1203         if (!pd->uobject)
1204                 free_pa_mkey(mdev, mpd->pa_lkey);
1205
1206         mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
1207         kfree(mpd);
1208
1209         return 0;
1210 }
1211
1212 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1213 {
1214         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1215         int err;
1216
1217         if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1218                 err = -EOPNOTSUPP;
1219         else
1220                 err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
1221         if (err)
1222                 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
1223                              ibqp->qp_num, gid->raw);
1224
1225         return err;
1226 }
1227
1228 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1229 {
1230         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1231         int err;
1232
1233         if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1234                 err = -EOPNOTSUPP;
1235         else
1236                 err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
1237         if (err)
1238                 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
1239                              ibqp->qp_num, gid->raw);
1240
1241         return err;
1242 }
1243
1244 static int init_node_data(struct mlx5_ib_dev *dev)
1245 {
1246         int err;
1247
1248         err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
1249         if (err)
1250                 return err;
1251
1252         return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
1253 }
1254
1255 static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
1256                              char *buf)
1257 {
1258         struct mlx5_ib_dev *dev =
1259                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1260
1261         return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages);
1262 }
1263
1264 static ssize_t show_reg_pages(struct device *device,
1265                               struct device_attribute *attr, char *buf)
1266 {
1267         struct mlx5_ib_dev *dev =
1268                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1269
1270         return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
1271 }
1272
1273 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1274                         char *buf)
1275 {
1276         struct mlx5_ib_dev *dev =
1277                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1278         return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
1279 }
1280
1281 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1282                            char *buf)
1283 {
1284         struct mlx5_ib_dev *dev =
1285                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1286         return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
1287                        fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
1288 }
1289
1290 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1291                         char *buf)
1292 {
1293         struct mlx5_ib_dev *dev =
1294                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1295         return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision);
1296 }
1297
1298 static ssize_t show_board(struct device *device, struct device_attribute *attr,
1299                           char *buf)
1300 {
1301         struct mlx5_ib_dev *dev =
1302                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1303         return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
1304                        dev->mdev->board_id);
1305 }
1306
1307 static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
1308 static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
1309 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
1310 static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
1311 static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
1312 static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
1313
1314 static struct device_attribute *mlx5_class_attributes[] = {
1315         &dev_attr_hw_rev,
1316         &dev_attr_fw_ver,
1317         &dev_attr_hca_type,
1318         &dev_attr_board_id,
1319         &dev_attr_fw_pages,
1320         &dev_attr_reg_pages,
1321 };
1322
1323 static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
1324 {
1325         struct mlx5_ib_qp *mqp;
1326         struct mlx5_ib_cq *send_mcq, *recv_mcq;
1327         struct mlx5_core_cq *mcq;
1328         struct list_head cq_armed_list;
1329         unsigned long flags_qp;
1330         unsigned long flags_cq;
1331         unsigned long flags;
1332
1333         mlx5_ib_warn(ibdev, " started\n");
1334         INIT_LIST_HEAD(&cq_armed_list);
1335
1336         /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
1337         spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
1338         list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
1339                 spin_lock_irqsave(&mqp->sq.lock, flags_qp);
1340                 if (mqp->sq.tail != mqp->sq.head) {
1341                         send_mcq = to_mcq(mqp->ibqp.send_cq);
1342                         spin_lock_irqsave(&send_mcq->lock, flags_cq);
1343                         if (send_mcq->mcq.comp &&
1344                             mqp->ibqp.send_cq->comp_handler) {
1345                                 if (!send_mcq->mcq.reset_notify_added) {
1346                                         send_mcq->mcq.reset_notify_added = 1;
1347                                         list_add_tail(&send_mcq->mcq.reset_notify,
1348                                                       &cq_armed_list);
1349                                 }
1350                         }
1351                         spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
1352                 }
1353                 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
1354                 spin_lock_irqsave(&mqp->rq.lock, flags_qp);
1355                 /* no handling is needed for SRQ */
1356                 if (!mqp->ibqp.srq) {
1357                         if (mqp->rq.tail != mqp->rq.head) {
1358                                 recv_mcq = to_mcq(mqp->ibqp.recv_cq);
1359                                 spin_lock_irqsave(&recv_mcq->lock, flags_cq);
1360                                 if (recv_mcq->mcq.comp &&
1361                                     mqp->ibqp.recv_cq->comp_handler) {
1362                                         if (!recv_mcq->mcq.reset_notify_added) {
1363                                                 recv_mcq->mcq.reset_notify_added = 1;
1364                                                 list_add_tail(&recv_mcq->mcq.reset_notify,
1365                                                               &cq_armed_list);
1366                                         }
1367                                 }
1368                                 spin_unlock_irqrestore(&recv_mcq->lock,
1369                                                        flags_cq);
1370                         }
1371                 }
1372                 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
1373         }
1374         /*At that point all inflight post send were put to be executed as of we
1375          * lock/unlock above locks Now need to arm all involved CQs.
1376          */
1377         list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
1378                 mcq->comp(mcq);
1379         }
1380         spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
1381         mlx5_ib_warn(ibdev, " ended\n");
1382         return;
1383 }
1384
1385 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1386                           enum mlx5_dev_event event, unsigned long param)
1387 {
1388         struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
1389         struct ib_event ibev;
1390
1391         u8 port = 0;
1392
1393         switch (event) {
1394         case MLX5_DEV_EVENT_SYS_ERROR:
1395                 ibdev->ib_active = false;
1396                 ibev.event = IB_EVENT_DEVICE_FATAL;
1397                 mlx5_ib_handle_internal_error(ibdev);
1398                 break;
1399
1400         case MLX5_DEV_EVENT_PORT_UP:
1401                 ibev.event = IB_EVENT_PORT_ACTIVE;
1402                 port = (u8)param;
1403                 break;
1404
1405         case MLX5_DEV_EVENT_PORT_DOWN:
1406         case MLX5_DEV_EVENT_PORT_INITIALIZED:
1407                 ibev.event = IB_EVENT_PORT_ERR;
1408                 port = (u8)param;
1409                 break;
1410
1411         case MLX5_DEV_EVENT_LID_CHANGE:
1412                 ibev.event = IB_EVENT_LID_CHANGE;
1413                 port = (u8)param;
1414                 break;
1415
1416         case MLX5_DEV_EVENT_PKEY_CHANGE:
1417                 ibev.event = IB_EVENT_PKEY_CHANGE;
1418                 port = (u8)param;
1419                 break;
1420
1421         case MLX5_DEV_EVENT_GUID_CHANGE:
1422                 ibev.event = IB_EVENT_GID_CHANGE;
1423                 port = (u8)param;
1424                 break;
1425
1426         case MLX5_DEV_EVENT_CLIENT_REREG:
1427                 ibev.event = IB_EVENT_CLIENT_REREGISTER;
1428                 port = (u8)param;
1429                 break;
1430
1431         default:
1432                 break;
1433         }
1434
1435         ibev.device           = &ibdev->ib_dev;
1436         ibev.element.port_num = port;
1437
1438         if ((event != MLX5_DEV_EVENT_SYS_ERROR) &&
1439             (port < 1 || port > ibdev->num_ports)) {
1440                 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
1441                 return;
1442         }
1443
1444         if (ibdev->ib_active)
1445                 ib_dispatch_event(&ibev);
1446 }
1447
1448 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1449 {
1450         int port;
1451
1452         for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
1453                 mlx5_query_ext_port_caps(dev, port);
1454 }
1455
1456 static void config_atomic_responder(struct mlx5_ib_dev *dev,
1457                                     struct ib_device_attr *props)
1458 {
1459         enum ib_atomic_cap cap = props->atomic_cap;
1460
1461 #if 0
1462         if (cap == IB_ATOMIC_HCA ||
1463             cap == IB_ATOMIC_GLOB)
1464 #endif
1465                 dev->enable_atomic_resp = 1;
1466
1467         dev->atomic_cap = cap;
1468 }
1469
1470 enum mlx5_addr_align {
1471         MLX5_ADDR_ALIGN_0       = 0,
1472         MLX5_ADDR_ALIGN_64      = 64,
1473         MLX5_ADDR_ALIGN_128     = 128,
1474 };
1475
1476 static int get_port_caps(struct mlx5_ib_dev *dev)
1477 {
1478         struct ib_device_attr *dprops = NULL;
1479         struct ib_port_attr *pprops = NULL;
1480         int err = -ENOMEM;
1481         int port;
1482
1483         pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1484         if (!pprops)
1485                 goto out;
1486
1487         dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1488         if (!dprops)
1489                 goto out;
1490
1491         err = mlx5_ib_query_device(&dev->ib_dev, dprops);
1492         if (err) {
1493                 mlx5_ib_warn(dev, "query_device failed %d\n", err);
1494                 goto out;
1495         }
1496         config_atomic_responder(dev, dprops);
1497
1498         for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
1499                 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1500                 if (err) {
1501                         mlx5_ib_warn(dev, "query_port %d failed %d\n",
1502                                      port, err);
1503                         break;
1504                 }
1505                 dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys;
1506                 dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len;
1507                 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1508                             dprops->max_pkeys, pprops->gid_tbl_len);
1509         }
1510
1511 out:
1512         kfree(pprops);
1513         kfree(dprops);
1514
1515         return err;
1516 }
1517
1518 static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1519 {
1520         int err;
1521
1522         err = mlx5_mr_cache_cleanup(dev);
1523         if (err)
1524                 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1525
1526         ib_dereg_mr(dev->umrc.mr);
1527         ib_dealloc_pd(dev->umrc.pd);
1528 }
1529
1530 enum {
1531         MAX_UMR_WR = 128,
1532 };
1533
1534 static int create_umr_res(struct mlx5_ib_dev *dev)
1535 {
1536         struct ib_pd *pd;
1537         struct ib_mr *mr;
1538         int ret;
1539
1540         pd = ib_alloc_pd(&dev->ib_dev);
1541         if (IS_ERR(pd)) {
1542                 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1543                 ret = PTR_ERR(pd);
1544                 goto error_0;
1545         }
1546
1547         mr = ib_get_dma_mr(pd,  IB_ACCESS_LOCAL_WRITE);
1548         if (IS_ERR(mr)) {
1549                 mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
1550                 ret = PTR_ERR(mr);
1551                 goto error_1;
1552         }
1553
1554         dev->umrc.mr = mr;
1555         dev->umrc.pd = pd;
1556
1557         ret = mlx5_mr_cache_init(dev);
1558         if (ret) {
1559                 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1560                 goto error_4;
1561         }
1562
1563         return 0;
1564
1565 error_4:
1566         ib_dereg_mr(mr);
1567 error_1:
1568         ib_dealloc_pd(pd);
1569 error_0:
1570         return ret;
1571 }
1572
1573 static int create_dev_resources(struct mlx5_ib_resources *devr)
1574 {
1575         struct ib_srq_init_attr attr;
1576         struct mlx5_ib_dev *dev;
1577         int ret = 0;
1578
1579         dev = container_of(devr, struct mlx5_ib_dev, devr);
1580
1581         devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1582         if (IS_ERR(devr->p0)) {
1583                 ret = PTR_ERR(devr->p0);
1584                 goto error0;
1585         }
1586         devr->p0->device  = &dev->ib_dev;
1587         devr->p0->uobject = NULL;
1588         atomic_set(&devr->p0->usecnt, 0);
1589
1590         devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL);
1591         if (IS_ERR(devr->c0)) {
1592                 ret = PTR_ERR(devr->c0);
1593                 goto error1;
1594         }
1595         devr->c0->device        = &dev->ib_dev;
1596         devr->c0->uobject       = NULL;
1597         devr->c0->comp_handler  = NULL;
1598         devr->c0->event_handler = NULL;
1599         devr->c0->cq_context    = NULL;
1600         atomic_set(&devr->c0->usecnt, 0);
1601
1602         devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1603         if (IS_ERR(devr->x0)) {
1604                 ret = PTR_ERR(devr->x0);
1605                 goto error2;
1606         }
1607         devr->x0->device = &dev->ib_dev;
1608         devr->x0->inode = NULL;
1609         atomic_set(&devr->x0->usecnt, 0);
1610         mutex_init(&devr->x0->tgt_qp_mutex);
1611         INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1612
1613         devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1614         if (IS_ERR(devr->x1)) {
1615                 ret = PTR_ERR(devr->x1);
1616                 goto error3;
1617         }
1618         devr->x1->device = &dev->ib_dev;
1619         devr->x1->inode = NULL;
1620         atomic_set(&devr->x1->usecnt, 0);
1621         mutex_init(&devr->x1->tgt_qp_mutex);
1622         INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1623
1624         memset(&attr, 0, sizeof(attr));
1625         attr.attr.max_sge = 1;
1626         attr.attr.max_wr = 1;
1627         attr.srq_type = IB_SRQT_XRC;
1628         attr.ext.xrc.cq = devr->c0;
1629         attr.ext.xrc.xrcd = devr->x0;
1630
1631         devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1632         if (IS_ERR(devr->s0)) {
1633                 ret = PTR_ERR(devr->s0);
1634                 goto error4;
1635         }
1636         devr->s0->device        = &dev->ib_dev;
1637         devr->s0->pd            = devr->p0;
1638         devr->s0->uobject       = NULL;
1639         devr->s0->event_handler = NULL;
1640         devr->s0->srq_context   = NULL;
1641         devr->s0->srq_type      = IB_SRQT_XRC;
1642         devr->s0->ext.xrc.xrcd  = devr->x0;
1643         devr->s0->ext.xrc.cq    = devr->c0;
1644         atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1645         atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1646         atomic_inc(&devr->p0->usecnt);
1647         atomic_set(&devr->s0->usecnt, 0);
1648
1649         memset(&attr, 0, sizeof(attr));
1650         attr.attr.max_sge = 1;
1651         attr.attr.max_wr = 1;
1652         attr.srq_type = IB_SRQT_BASIC;
1653         devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1654         if (IS_ERR(devr->s1)) {
1655                 ret = PTR_ERR(devr->s1);
1656                 goto error5;
1657         }
1658         devr->s1->device        = &dev->ib_dev;
1659         devr->s1->pd            = devr->p0;
1660         devr->s1->uobject       = NULL;
1661         devr->s1->event_handler = NULL;
1662         devr->s1->srq_context   = NULL;
1663         devr->s1->srq_type      = IB_SRQT_BASIC;
1664         devr->s1->ext.xrc.cq    = devr->c0;
1665         atomic_inc(&devr->p0->usecnt);
1666         atomic_set(&devr->s1->usecnt, 0);
1667
1668         return 0;
1669
1670 error5:
1671         mlx5_ib_destroy_srq(devr->s0);
1672 error4:
1673         mlx5_ib_dealloc_xrcd(devr->x1);
1674 error3:
1675         mlx5_ib_dealloc_xrcd(devr->x0);
1676 error2:
1677         mlx5_ib_destroy_cq(devr->c0);
1678 error1:
1679         mlx5_ib_dealloc_pd(devr->p0);
1680 error0:
1681         return ret;
1682 }
1683
1684 static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1685 {
1686         mlx5_ib_destroy_srq(devr->s1);
1687         mlx5_ib_destroy_srq(devr->s0);
1688         mlx5_ib_dealloc_xrcd(devr->x0);
1689         mlx5_ib_dealloc_xrcd(devr->x1);
1690         mlx5_ib_destroy_cq(devr->c0);
1691         mlx5_ib_dealloc_pd(devr->p0);
1692 }
1693
1694 static void enable_dc_tracer(struct mlx5_ib_dev *dev)
1695 {
1696         struct device *device = dev->ib_dev.dma_device;
1697         struct mlx5_dc_tracer *dct = &dev->dctr;
1698         int order;
1699         void *tmp;
1700         int size;
1701         int err;
1702
1703         size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096;
1704         if (size <= PAGE_SIZE)
1705                 order = 0;
1706         else
1707                 order = 1;
1708
1709         dct->pg = alloc_pages(GFP_KERNEL, order);
1710         if (!dct->pg) {
1711                 mlx5_ib_err(dev, "failed to allocate %d pages\n", order);
1712                 return;
1713         }
1714
1715         tmp = page_address(dct->pg);
1716         memset(tmp, 0xff, size);
1717
1718         dct->size = size;
1719         dct->order = order;
1720         dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE);
1721         if (dma_mapping_error(device, dct->dma)) {
1722                 mlx5_ib_err(dev, "dma mapping error\n");
1723                 goto map_err;
1724         }
1725
1726         err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma);
1727         if (err) {
1728                 mlx5_ib_warn(dev, "failed to enable DC tracer\n");
1729                 goto cmd_err;
1730         }
1731
1732         return;
1733
1734 cmd_err:
1735         dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE);
1736 map_err:
1737         __free_pages(dct->pg, dct->order);
1738         dct->pg = NULL;
1739 }
1740
1741 static void disable_dc_tracer(struct mlx5_ib_dev *dev)
1742 {
1743         struct device *device = dev->ib_dev.dma_device;
1744         struct mlx5_dc_tracer *dct = &dev->dctr;
1745         int err;
1746
1747         if (!dct->pg)
1748                 return;
1749
1750         err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma);
1751         if (err) {
1752                 mlx5_ib_warn(dev, "failed to disable DC tracer\n");
1753                 return;
1754         }
1755
1756         dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE);
1757         __free_pages(dct->pg, dct->order);
1758         dct->pg = NULL;
1759 }
1760
1761 enum {
1762         MLX5_DC_CNAK_SIZE               = 128,
1763         MLX5_NUM_BUF_IN_PAGE            = PAGE_SIZE / MLX5_DC_CNAK_SIZE,
1764         MLX5_CNAK_TX_CQ_SIGNAL_FACTOR   = 128,
1765         MLX5_DC_CNAK_SL                 = 0,
1766         MLX5_DC_CNAK_VL                 = 0,
1767 };
1768
1769 static int init_dc_improvements(struct mlx5_ib_dev *dev)
1770 {
1771         if (!mlx5_core_is_pf(dev->mdev))
1772                 return 0;
1773
1774         if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace)))
1775                 return 0;
1776
1777         enable_dc_tracer(dev);
1778
1779         return 0;
1780 }
1781
1782 static void cleanup_dc_improvements(struct mlx5_ib_dev *dev)
1783 {
1784
1785         disable_dc_tracer(dev);
1786 }
1787
1788 static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num)
1789 {
1790         mlx5_vport_dealloc_q_counter(dev->mdev,
1791                                      MLX5_INTERFACE_PROTOCOL_IB,
1792                                      dev->port[port_num].q_cnt_id);
1793         dev->port[port_num].q_cnt_id = 0;
1794 }
1795
1796 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
1797 {
1798         unsigned int i;
1799
1800         for (i = 0; i < dev->num_ports; i++)
1801                 mlx5_ib_dealloc_q_port_counter(dev, i);
1802 }
1803
1804 static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
1805 {
1806         int i;
1807         int ret;
1808
1809         for (i = 0; i < dev->num_ports; i++) {
1810                 ret = mlx5_vport_alloc_q_counter(dev->mdev,
1811                                                  MLX5_INTERFACE_PROTOCOL_IB,
1812                                                  &dev->port[i].q_cnt_id);
1813                 if (ret) {
1814                         mlx5_ib_warn(dev,
1815                                      "couldn't allocate queue counter for port %d\n",
1816                                      i + 1);
1817                         goto dealloc_counters;
1818                 }
1819         }
1820
1821         return 0;
1822
1823 dealloc_counters:
1824         while (--i >= 0)
1825                 mlx5_ib_dealloc_q_port_counter(dev, i);
1826
1827         return ret;
1828 }
1829
1830 struct port_attribute {
1831         struct attribute attr;
1832         ssize_t (*show)(struct mlx5_ib_port *,
1833                         struct port_attribute *, char *buf);
1834         ssize_t (*store)(struct mlx5_ib_port *,
1835                          struct port_attribute *,
1836                          const char *buf, size_t count);
1837 };
1838
1839 struct port_counter_attribute {
1840         struct port_attribute   attr;
1841         size_t                  offset;
1842 };
1843
1844 static ssize_t port_attr_show(struct kobject *kobj,
1845                               struct attribute *attr, char *buf)
1846 {
1847         struct port_attribute *port_attr =
1848                 container_of(attr, struct port_attribute, attr);
1849         struct mlx5_ib_port_sysfs_group *p =
1850                 container_of(kobj, struct mlx5_ib_port_sysfs_group,
1851                              kobj);
1852         struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port,
1853                                                     group);
1854
1855         if (!port_attr->show)
1856                 return -EIO;
1857
1858         return port_attr->show(mibport, port_attr, buf);
1859 }
1860
1861 static ssize_t show_port_counter(struct mlx5_ib_port *p,
1862                                  struct port_attribute *port_attr,
1863                                  char *buf)
1864 {
1865         int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
1866         struct port_counter_attribute *counter_attr =
1867                 container_of(port_attr, struct port_counter_attribute, attr);
1868         void *out;
1869         int ret;
1870
1871         out = mlx5_vzalloc(outlen);
1872         if (!out)
1873                 return -ENOMEM;
1874
1875         ret = mlx5_vport_query_q_counter(p->dev->mdev,
1876                                          p->q_cnt_id, 0,
1877                                          out, outlen);
1878         if (ret)
1879                 goto free;
1880
1881         ret = sprintf(buf, "%d\n",
1882                       be32_to_cpu(*(__be32 *)(out + counter_attr->offset)));
1883
1884 free:
1885         kfree(out);
1886         return ret;
1887 }
1888
1889 #define PORT_COUNTER_ATTR(_name)                                        \
1890 struct port_counter_attribute port_counter_attr_##_name = {             \
1891         .attr  = __ATTR(_name, S_IRUGO, show_port_counter, NULL),       \
1892         .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)             \
1893 }
1894
1895 static PORT_COUNTER_ATTR(rx_write_requests);
1896 static PORT_COUNTER_ATTR(rx_read_requests);
1897 static PORT_COUNTER_ATTR(rx_atomic_requests);
1898 static PORT_COUNTER_ATTR(rx_dct_connect);
1899 static PORT_COUNTER_ATTR(out_of_buffer);
1900 static PORT_COUNTER_ATTR(out_of_sequence);
1901 static PORT_COUNTER_ATTR(duplicate_request);
1902 static PORT_COUNTER_ATTR(rnr_nak_retry_err);
1903 static PORT_COUNTER_ATTR(packet_seq_err);
1904 static PORT_COUNTER_ATTR(implied_nak_seq_err);
1905 static PORT_COUNTER_ATTR(local_ack_timeout_err);
1906
1907 static struct attribute *counter_attrs[] = {
1908         &port_counter_attr_rx_write_requests.attr.attr,
1909         &port_counter_attr_rx_read_requests.attr.attr,
1910         &port_counter_attr_rx_atomic_requests.attr.attr,
1911         &port_counter_attr_rx_dct_connect.attr.attr,
1912         &port_counter_attr_out_of_buffer.attr.attr,
1913         &port_counter_attr_out_of_sequence.attr.attr,
1914         &port_counter_attr_duplicate_request.attr.attr,
1915         &port_counter_attr_rnr_nak_retry_err.attr.attr,
1916         &port_counter_attr_packet_seq_err.attr.attr,
1917         &port_counter_attr_implied_nak_seq_err.attr.attr,
1918         &port_counter_attr_local_ack_timeout_err.attr.attr,
1919         NULL
1920 };
1921
1922 static struct attribute_group port_counters_group = {
1923         .name  = "counters",
1924         .attrs  = counter_attrs
1925 };
1926
1927 static const struct sysfs_ops port_sysfs_ops = {
1928         .show = port_attr_show
1929 };
1930
1931 static struct kobj_type port_type = {
1932         .sysfs_ops     = &port_sysfs_ops,
1933 };
1934
1935 static int add_port_attrs(struct mlx5_ib_dev *dev,
1936                           struct kobject *parent,
1937                           struct mlx5_ib_port_sysfs_group *port,
1938                           u8 port_num)
1939 {
1940         int ret;
1941
1942         ret = kobject_init_and_add(&port->kobj, &port_type,
1943                                    parent,
1944                                    "%d", port_num);
1945         if (ret)
1946                 return ret;
1947
1948         if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1949             MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
1950                 ret = sysfs_create_group(&port->kobj, &port_counters_group);
1951                 if (ret)
1952                         goto put_kobj;
1953         }
1954
1955         port->enabled = true;
1956         return ret;
1957
1958 put_kobj:
1959         kobject_put(&port->kobj);
1960         return ret;
1961 }
1962
1963 static void destroy_ports_attrs(struct mlx5_ib_dev *dev,
1964                                 unsigned int num_ports)
1965 {
1966         unsigned int i;
1967
1968         for (i = 0; i < num_ports; i++) {
1969                 struct mlx5_ib_port_sysfs_group *port =
1970                         &dev->port[i].group;
1971
1972                 if (!port->enabled)
1973                         continue;
1974
1975                 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1976                     MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
1977                         sysfs_remove_group(&port->kobj,
1978                                            &port_counters_group);
1979                 kobject_put(&port->kobj);
1980                 port->enabled = false;
1981         }
1982
1983         if (dev->ports_parent) {
1984                 kobject_put(dev->ports_parent);
1985                 dev->ports_parent = NULL;
1986         }
1987 }
1988
1989 static int create_port_attrs(struct mlx5_ib_dev *dev)
1990 {
1991         int ret = 0;
1992         unsigned int i = 0;
1993         struct device *device = &dev->ib_dev.dev;
1994
1995         dev->ports_parent = kobject_create_and_add("mlx5_ports",
1996                                                    &device->kobj);
1997         if (!dev->ports_parent)
1998                 return -ENOMEM;
1999
2000         for (i = 0; i < dev->num_ports; i++) {
2001                 ret = add_port_attrs(dev,
2002                                      dev->ports_parent,
2003                                      &dev->port[i].group,
2004                                      i + 1);
2005
2006                 if (ret)
2007                         goto _destroy_ports_attrs;
2008         }
2009
2010         return 0;
2011
2012 _destroy_ports_attrs:
2013         destroy_ports_attrs(dev, i);
2014         return ret;
2015 }
2016
2017 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2018 {
2019         struct mlx5_ib_dev *dev;
2020         int err;
2021         int i;
2022
2023         printk_once(KERN_INFO "%s", mlx5_version);
2024
2025         dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
2026         if (!dev)
2027                 return NULL;
2028
2029         dev->mdev = mdev;
2030
2031         dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
2032                              GFP_KERNEL);
2033         if (!dev->port)
2034                 goto err_dealloc;
2035
2036         for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2037                 dev->port[i].dev = dev;
2038                 dev->port[i].port_num = i;
2039                 dev->port[i].port_gone = 0;
2040                 memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table));
2041         }
2042
2043         err = get_port_caps(dev);
2044         if (err)
2045                 goto err_free_port;
2046
2047         if (mlx5_use_mad_ifc(dev))
2048                 get_ext_port_caps(dev);
2049
2050         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2051             IB_LINK_LAYER_ETHERNET) {
2052                 if (MLX5_CAP_GEN(mdev, roce)) {
2053                         err = mlx5_nic_vport_enable_roce(mdev);
2054                         if (err)
2055                                 goto err_free_port;
2056                 } else {
2057                         goto err_free_port;
2058                 }
2059         }
2060
2061         MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
2062
2063         strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
2064         dev->ib_dev.owner               = THIS_MODULE;
2065         dev->ib_dev.node_type           = RDMA_NODE_IB_CA;
2066         dev->ib_dev.local_dma_lkey      = mdev->special_contexts.resd_lkey;
2067         dev->num_ports          = MLX5_CAP_GEN(mdev, num_ports);
2068         dev->ib_dev.phys_port_cnt     = dev->num_ports;
2069         dev->ib_dev.num_comp_vectors    =
2070                 dev->mdev->priv.eq_table.num_comp_vectors;
2071         dev->ib_dev.dma_device  = &mdev->pdev->dev;
2072
2073         dev->ib_dev.uverbs_abi_ver      = MLX5_IB_UVERBS_ABI_VERSION;
2074         dev->ib_dev.uverbs_cmd_mask     =
2075                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
2076                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
2077                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
2078                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
2079                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
2080                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
2081                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
2082                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2083                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
2084                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
2085                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
2086                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
2087                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
2088                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
2089                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
2090                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
2091                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
2092                 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
2093                 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
2094                 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
2095                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
2096                 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
2097                 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
2098
2099         dev->ib_dev.query_device        = mlx5_ib_query_device;
2100         dev->ib_dev.query_port          = mlx5_ib_query_port;
2101         dev->ib_dev.get_link_layer      = mlx5_ib_port_link_layer;
2102         dev->ib_dev.query_gid           = mlx5_ib_query_gid;
2103         dev->ib_dev.query_pkey          = mlx5_ib_query_pkey;
2104         dev->ib_dev.modify_device       = mlx5_ib_modify_device;
2105         dev->ib_dev.modify_port         = mlx5_ib_modify_port;
2106         dev->ib_dev.alloc_ucontext      = mlx5_ib_alloc_ucontext;
2107         dev->ib_dev.dealloc_ucontext    = mlx5_ib_dealloc_ucontext;
2108         dev->ib_dev.mmap                = mlx5_ib_mmap;
2109         dev->ib_dev.alloc_pd            = mlx5_ib_alloc_pd;
2110         dev->ib_dev.dealloc_pd          = mlx5_ib_dealloc_pd;
2111         dev->ib_dev.create_ah           = mlx5_ib_create_ah;
2112         dev->ib_dev.query_ah            = mlx5_ib_query_ah;
2113         dev->ib_dev.destroy_ah          = mlx5_ib_destroy_ah;
2114         dev->ib_dev.create_srq          = mlx5_ib_create_srq;
2115         dev->ib_dev.modify_srq          = mlx5_ib_modify_srq;
2116         dev->ib_dev.query_srq           = mlx5_ib_query_srq;
2117         dev->ib_dev.destroy_srq         = mlx5_ib_destroy_srq;
2118         dev->ib_dev.post_srq_recv       = mlx5_ib_post_srq_recv;
2119         dev->ib_dev.create_qp           = mlx5_ib_create_qp;
2120         dev->ib_dev.modify_qp           = mlx5_ib_modify_qp;
2121         dev->ib_dev.query_qp            = mlx5_ib_query_qp;
2122         dev->ib_dev.destroy_qp          = mlx5_ib_destroy_qp;
2123         dev->ib_dev.post_send           = mlx5_ib_post_send;
2124         dev->ib_dev.post_recv           = mlx5_ib_post_recv;
2125         dev->ib_dev.create_cq           = mlx5_ib_create_cq;
2126         dev->ib_dev.modify_cq           = mlx5_ib_modify_cq;
2127         dev->ib_dev.resize_cq           = mlx5_ib_resize_cq;
2128         dev->ib_dev.destroy_cq          = mlx5_ib_destroy_cq;
2129         dev->ib_dev.poll_cq             = mlx5_ib_poll_cq;
2130         dev->ib_dev.req_notify_cq       = mlx5_ib_arm_cq;
2131         dev->ib_dev.get_dma_mr          = mlx5_ib_get_dma_mr;
2132         dev->ib_dev.reg_user_mr         = mlx5_ib_reg_user_mr;
2133         dev->ib_dev.reg_phys_mr         = mlx5_ib_reg_phys_mr;
2134         dev->ib_dev.dereg_mr            = mlx5_ib_dereg_mr;
2135         dev->ib_dev.attach_mcast        = mlx5_ib_mcg_attach;
2136         dev->ib_dev.detach_mcast        = mlx5_ib_mcg_detach;
2137         dev->ib_dev.process_mad         = mlx5_ib_process_mad;
2138         dev->ib_dev.alloc_fast_reg_mr   = mlx5_ib_alloc_fast_reg_mr;
2139         dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
2140         dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
2141
2142         if (MLX5_CAP_GEN(mdev, xrc)) {
2143                 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
2144                 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
2145                 dev->ib_dev.uverbs_cmd_mask |=
2146                         (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2147                         (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2148         }
2149
2150         err = init_node_data(dev);
2151         if (err)
2152                 goto err_disable_roce;
2153
2154         mutex_init(&dev->cap_mask_mutex);
2155         INIT_LIST_HEAD(&dev->qp_list);
2156         spin_lock_init(&dev->reset_flow_resource_lock);
2157
2158         err = create_dev_resources(&dev->devr);
2159         if (err)
2160                 goto err_disable_roce;
2161
2162
2163         err = mlx5_ib_alloc_q_counters(dev);
2164         if (err)
2165                 goto err_odp;
2166
2167         err = ib_register_device(&dev->ib_dev, NULL);
2168         if (err)
2169                 goto err_q_cnt;
2170
2171         err = create_umr_res(dev);
2172         if (err)
2173                 goto err_dev;
2174
2175         if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2176             MLX5_CAP_PORT_TYPE_IB) {
2177                 if (init_dc_improvements(dev))
2178                         mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n");
2179         }
2180
2181         err = create_port_attrs(dev);
2182         if (err)
2183                 goto err_dc;
2184
2185         for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2186                 err = device_create_file(&dev->ib_dev.dev,
2187                                          mlx5_class_attributes[i]);
2188                 if (err)
2189                         goto err_port_attrs;
2190         }
2191
2192         if (1) {
2193                 struct thread *rl_thread = NULL;
2194                 struct proc *rl_proc = NULL;
2195
2196                 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2197                         (void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread,
2198                             RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i);
2199                 }
2200         }
2201
2202         dev->ib_active = true;
2203
2204         return dev;
2205
2206 err_port_attrs:
2207         destroy_ports_attrs(dev, dev->num_ports);
2208
2209 err_dc:
2210         if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2211             MLX5_CAP_PORT_TYPE_IB)
2212                 cleanup_dc_improvements(dev);
2213         destroy_umrc_res(dev);
2214
2215 err_dev:
2216         ib_unregister_device(&dev->ib_dev);
2217
2218 err_q_cnt:
2219         mlx5_ib_dealloc_q_counters(dev);
2220
2221 err_odp:
2222         destroy_dev_resources(&dev->devr);
2223
2224 err_disable_roce:
2225         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2226             IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2227                 mlx5_nic_vport_disable_roce(mdev);
2228 err_free_port:
2229         kfree(dev->port);
2230
2231 err_dealloc:
2232         ib_dealloc_device((struct ib_device *)dev);
2233
2234         return NULL;
2235 }
2236
2237 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
2238 {
2239         struct mlx5_ib_dev *dev = context;
2240         int i;
2241
2242         for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2243                 dev->port[i].port_gone = 1;
2244                 while (dev->port[i].port_gone != 2)
2245                         pause("W", hz);
2246         }
2247
2248         for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2249                 device_remove_file(&dev->ib_dev.dev,
2250                     mlx5_class_attributes[i]);
2251         }
2252
2253         destroy_ports_attrs(dev, dev->num_ports);
2254         if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2255             MLX5_CAP_PORT_TYPE_IB)
2256                 cleanup_dc_improvements(dev);
2257         mlx5_ib_dealloc_q_counters(dev);
2258         ib_unregister_device(&dev->ib_dev);
2259         destroy_umrc_res(dev);
2260         destroy_dev_resources(&dev->devr);
2261
2262         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2263             IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2264                 mlx5_nic_vport_disable_roce(mdev);
2265
2266         kfree(dev->port);
2267         ib_dealloc_device(&dev->ib_dev);
2268 }
2269
2270 static struct mlx5_interface mlx5_ib_interface = {
2271         .add            = mlx5_ib_add,
2272         .remove         = mlx5_ib_remove,
2273         .event          = mlx5_ib_event,
2274         .protocol       = MLX5_INTERFACE_PROTOCOL_IB,
2275 };
2276
2277 static int __init mlx5_ib_init(void)
2278 {
2279         int err;
2280
2281         if (deprecated_prof_sel != 2)
2282                 printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
2283
2284         err = mlx5_register_interface(&mlx5_ib_interface);
2285         if (err)
2286                 goto clean_odp;
2287
2288         mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq");
2289         if (!mlx5_ib_wq) {
2290                 printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__);
2291                 goto err_unreg;
2292         }
2293
2294         return err;
2295
2296 err_unreg:
2297         mlx5_unregister_interface(&mlx5_ib_interface);
2298
2299 clean_odp:
2300         return err;
2301 }
2302
2303 static void __exit mlx5_ib_cleanup(void)
2304 {
2305         destroy_workqueue(mlx5_ib_wq);
2306         mlx5_unregister_interface(&mlx5_ib_interface);
2307 }
2308
2309 module_init_order(mlx5_ib_init, SI_ORDER_THIRD);
2310 module_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD);