]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c
MFC r324792:
[FreeBSD/FreeBSD.git] / sys / dev / mlx5 / mlx5_ib / mlx5_ib_main.c
1 /*-
2  * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27
28 #include <linux/errno.h>
29 #include <linux/pci.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/slab.h>
32 #include <linux/io-mapping.h>
33 #include <linux/sched.h>
34 #include <linux/netdevice.h>
35 #include <linux/etherdevice.h>
36 #include <linux/list.h>
37 #include <dev/mlx5/driver.h>
38 #include <dev/mlx5/vport.h>
39 #include <asm/pgtable.h>
40 #include <linux/fs.h>
41 #undef inode
42
43 #include <rdma/ib_user_verbs.h>
44 #include <rdma/ib_smi.h>
45 #include <rdma/ib_umem.h>
46 #include "user.h"
47 #include "mlx5_ib.h"
48
49 #include <sys/unistd.h>
50 #include <sys/kthread.h>
51
52 #define DRIVER_NAME "mlx5_ib"
53 #define DRIVER_VERSION "3.2-rc1"
54 #define DRIVER_RELDATE  "May 2016"
55
56 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
57 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
58 MODULE_LICENSE("Dual BSD/GPL");
59 MODULE_DEPEND(mlx5ib, linuxkpi, 1, 1, 1);
60 MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1);
61 MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1);
62 MODULE_VERSION(mlx5ib, 1);
63
64 static int deprecated_prof_sel = 2;
65 module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
66 MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
67
68 enum {
69         MLX5_STANDARD_ATOMIC_SIZE = 0x8,
70 };
71
72 struct workqueue_struct *mlx5_ib_wq;
73
74 static char mlx5_version[] =
75         DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
76         DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
77
78 static void get_atomic_caps(struct mlx5_ib_dev *dev,
79                             struct ib_device_attr *props)
80 {
81         int tmp;
82         u8 atomic_operations;
83         u8 atomic_size_qp;
84         u8 atomic_req_endianess;
85
86         atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
87         atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
88         atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev,
89                                                atomic_req_8B_endianess_mode) ||
90                                !mlx5_host_is_le();
91
92         tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
93         if (((atomic_operations & tmp) == tmp)
94             && (atomic_size_qp & 8)) {
95                 if (atomic_req_endianess) {
96                         props->atomic_cap = IB_ATOMIC_HCA;
97                 } else {
98                         props->atomic_cap = IB_ATOMIC_NONE;
99                 }
100         } else {
101                 props->atomic_cap = IB_ATOMIC_NONE;
102         }
103
104         tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD;
105         if (((atomic_operations & tmp) == tmp)
106             &&(atomic_size_qp & 8)) {
107                 if (atomic_req_endianess)
108                         props->masked_atomic_cap = IB_ATOMIC_HCA;
109                 else {
110                         props->masked_atomic_cap = IB_ATOMIC_NONE;
111                 }
112         } else {
113                 props->masked_atomic_cap = IB_ATOMIC_NONE;
114         }
115 }
116
117 static enum rdma_link_layer
118 mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
119 {
120         struct mlx5_ib_dev *dev = to_mdev(device);
121
122         switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
123         case MLX5_CAP_PORT_TYPE_IB:
124                 return IB_LINK_LAYER_INFINIBAND;
125         case MLX5_CAP_PORT_TYPE_ETH:
126                 return IB_LINK_LAYER_ETHERNET;
127         default:
128                 return IB_LINK_LAYER_UNSPECIFIED;
129         }
130 }
131
132 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
133 {
134         return !dev->mdev->issi;
135 }
136
137 enum {
138         MLX5_VPORT_ACCESS_METHOD_MAD,
139         MLX5_VPORT_ACCESS_METHOD_HCA,
140         MLX5_VPORT_ACCESS_METHOD_NIC,
141 };
142
143 static int mlx5_get_vport_access_method(struct ib_device *ibdev)
144 {
145         if (mlx5_use_mad_ifc(to_mdev(ibdev)))
146                 return MLX5_VPORT_ACCESS_METHOD_MAD;
147
148         if (mlx5_ib_port_link_layer(ibdev, 1) ==
149             IB_LINK_LAYER_ETHERNET)
150                 return MLX5_VPORT_ACCESS_METHOD_NIC;
151
152         return MLX5_VPORT_ACCESS_METHOD_HCA;
153 }
154
155 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
156                                         __be64 *sys_image_guid)
157 {
158         struct mlx5_ib_dev *dev = to_mdev(ibdev);
159         struct mlx5_core_dev *mdev = dev->mdev;
160         u64 tmp;
161         int err;
162
163         switch (mlx5_get_vport_access_method(ibdev)) {
164         case MLX5_VPORT_ACCESS_METHOD_MAD:
165                 return mlx5_query_system_image_guid_mad_ifc(ibdev,
166                                                             sys_image_guid);
167
168         case MLX5_VPORT_ACCESS_METHOD_HCA:
169                 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
170                 if (!err)
171                         *sys_image_guid = cpu_to_be64(tmp);
172                 return err;
173
174         case MLX5_VPORT_ACCESS_METHOD_NIC:
175                 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
176                 if (!err)
177                         *sys_image_guid = cpu_to_be64(tmp);
178                 return err;
179
180         default:
181                 return -EINVAL;
182         }
183 }
184
185 static int mlx5_query_max_pkeys(struct ib_device *ibdev,
186                                 u16 *max_pkeys)
187 {
188         struct mlx5_ib_dev *dev = to_mdev(ibdev);
189         struct mlx5_core_dev *mdev = dev->mdev;
190
191         switch (mlx5_get_vport_access_method(ibdev)) {
192         case MLX5_VPORT_ACCESS_METHOD_MAD:
193                 return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys);
194
195         case MLX5_VPORT_ACCESS_METHOD_HCA:
196         case MLX5_VPORT_ACCESS_METHOD_NIC:
197                 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
198                                                 pkey_table_size));
199                 return 0;
200
201         default:
202                 return -EINVAL;
203         }
204 }
205
206 static int mlx5_query_vendor_id(struct ib_device *ibdev,
207                                 u32 *vendor_id)
208 {
209         struct mlx5_ib_dev *dev = to_mdev(ibdev);
210
211         switch (mlx5_get_vport_access_method(ibdev)) {
212         case MLX5_VPORT_ACCESS_METHOD_MAD:
213                 return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id);
214
215         case MLX5_VPORT_ACCESS_METHOD_HCA:
216         case MLX5_VPORT_ACCESS_METHOD_NIC:
217                 return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
218
219         default:
220                 return -EINVAL;
221         }
222 }
223
224 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
225                                 __be64 *node_guid)
226 {
227         u64 tmp;
228         int err;
229
230         switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
231         case MLX5_VPORT_ACCESS_METHOD_MAD:
232                 return mlx5_query_node_guid_mad_ifc(dev, node_guid);
233
234         case MLX5_VPORT_ACCESS_METHOD_HCA:
235                 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
236                 if (!err)
237                         *node_guid = cpu_to_be64(tmp);
238                 return err;
239
240         case MLX5_VPORT_ACCESS_METHOD_NIC:
241                 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
242                 if (!err)
243                         *node_guid = cpu_to_be64(tmp);
244                 return err;
245
246         default:
247                 return -EINVAL;
248         }
249 }
250
251 struct mlx5_reg_node_desc {
252         u8      desc[64];
253 };
254
255 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
256 {
257         struct mlx5_reg_node_desc in;
258
259         if (mlx5_use_mad_ifc(dev))
260                 return mlx5_query_node_desc_mad_ifc(dev, node_desc);
261
262         memset(&in, 0, sizeof(in));
263
264         return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
265                                     sizeof(struct mlx5_reg_node_desc),
266                                     MLX5_REG_NODE_DESC, 0, 0);
267 }
268
269 static int mlx5_ib_query_device(struct ib_device *ibdev,
270                                 struct ib_device_attr *props)
271 {
272         struct mlx5_ib_dev *dev = to_mdev(ibdev);
273         struct mlx5_core_dev *mdev = dev->mdev;
274         int max_sq_desc;
275         int max_rq_sg;
276         int max_sq_sg;
277         int err;
278
279
280         memset(props, 0, sizeof(*props));
281
282         err = mlx5_query_system_image_guid(ibdev,
283                                            &props->sys_image_guid);
284         if (err)
285                 return err;
286
287         err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
288         if (err)
289                 return err;
290
291         err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
292         if (err)
293                 return err;
294
295         props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
296                 ((u64)fw_rev_min(dev->mdev) << 16) |
297                 fw_rev_sub(dev->mdev);
298         props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
299                 IB_DEVICE_PORT_ACTIVE_EVENT             |
300                 IB_DEVICE_SYS_IMAGE_GUID                |
301                 IB_DEVICE_RC_RNR_NAK_GEN;
302
303         if (MLX5_CAP_GEN(mdev, pkv))
304                 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
305         if (MLX5_CAP_GEN(mdev, qkv))
306                 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
307         if (MLX5_CAP_GEN(mdev, apm))
308                 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
309         props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
310         if (MLX5_CAP_GEN(mdev, xrc))
311                 props->device_cap_flags |= IB_DEVICE_XRC;
312         props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
313         if (MLX5_CAP_GEN(mdev, block_lb_mc))
314                 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
315
316         props->vendor_part_id      = mdev->pdev->device;
317         props->hw_ver              = mdev->pdev->revision;
318
319         props->max_mr_size         = ~0ull;
320         props->page_size_cap       = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1);
321         props->max_qp              = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
322         props->max_qp_wr           = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
323         max_rq_sg =  MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
324                      sizeof(struct mlx5_wqe_data_seg);
325         max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
326         max_sq_sg = (max_sq_desc -
327                      sizeof(struct mlx5_wqe_ctrl_seg) -
328                      sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg);
329         props->max_sge = min(max_rq_sg, max_sq_sg);
330         props->max_cq              = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
331         props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
332         props->max_mr              = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
333         props->max_pd              = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
334         props->max_qp_rd_atom      = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
335         props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
336         props->max_srq             = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
337         props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
338         props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
339         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
340         props->max_srq_sge         = max_rq_sg - 1;
341         props->max_fast_reg_page_list_len = (unsigned int)-1;
342         get_atomic_caps(dev, props);
343         props->max_mcast_grp       = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
344         props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
345         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
346                                            props->max_mcast_grp;
347         props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
348         props->max_ah           = INT_MAX;
349
350         return 0;
351 }
352
353 enum mlx5_ib_width {
354         MLX5_IB_WIDTH_1X        = 1 << 0,
355         MLX5_IB_WIDTH_2X        = 1 << 1,
356         MLX5_IB_WIDTH_4X        = 1 << 2,
357         MLX5_IB_WIDTH_8X        = 1 << 3,
358         MLX5_IB_WIDTH_12X       = 1 << 4
359 };
360
361 static int translate_active_width(struct ib_device *ibdev, u8 active_width,
362                                   u8 *ib_width)
363 {
364         struct mlx5_ib_dev *dev = to_mdev(ibdev);
365         int err = 0;
366
367         if (active_width & MLX5_IB_WIDTH_1X) {
368                 *ib_width = IB_WIDTH_1X;
369         } else if (active_width & MLX5_IB_WIDTH_2X) {
370                 mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n",
371                              (int)active_width);
372                 err = -EINVAL;
373         } else if (active_width & MLX5_IB_WIDTH_4X) {
374                 *ib_width = IB_WIDTH_4X;
375         } else if (active_width & MLX5_IB_WIDTH_8X) {
376                 *ib_width = IB_WIDTH_8X;
377         } else if (active_width & MLX5_IB_WIDTH_12X) {
378                 *ib_width = IB_WIDTH_12X;
379         } else {
380                 mlx5_ib_dbg(dev, "Invalid active_width %d\n",
381                             (int)active_width);
382                 err = -EINVAL;
383         }
384
385         return err;
386 }
387
388 /*
389  * TODO: Move to IB core
390  */
391 enum ib_max_vl_num {
392         __IB_MAX_VL_0           = 1,
393         __IB_MAX_VL_0_1         = 2,
394         __IB_MAX_VL_0_3         = 3,
395         __IB_MAX_VL_0_7         = 4,
396         __IB_MAX_VL_0_14        = 5,
397 };
398
399 enum mlx5_vl_hw_cap {
400         MLX5_VL_HW_0    = 1,
401         MLX5_VL_HW_0_1  = 2,
402         MLX5_VL_HW_0_2  = 3,
403         MLX5_VL_HW_0_3  = 4,
404         MLX5_VL_HW_0_4  = 5,
405         MLX5_VL_HW_0_5  = 6,
406         MLX5_VL_HW_0_6  = 7,
407         MLX5_VL_HW_0_7  = 8,
408         MLX5_VL_HW_0_14 = 15
409 };
410
411 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
412                                 u8 *max_vl_num)
413 {
414         switch (vl_hw_cap) {
415         case MLX5_VL_HW_0:
416                 *max_vl_num = __IB_MAX_VL_0;
417                 break;
418         case MLX5_VL_HW_0_1:
419                 *max_vl_num = __IB_MAX_VL_0_1;
420                 break;
421         case MLX5_VL_HW_0_3:
422                 *max_vl_num = __IB_MAX_VL_0_3;
423                 break;
424         case MLX5_VL_HW_0_7:
425                 *max_vl_num = __IB_MAX_VL_0_7;
426                 break;
427         case MLX5_VL_HW_0_14:
428                 *max_vl_num = __IB_MAX_VL_0_14;
429                 break;
430
431         default:
432                 return -EINVAL;
433         }
434
435         return 0;
436 }
437
438 static int mlx5_query_port_ib(struct ib_device *ibdev, u8 port,
439                               struct ib_port_attr *props)
440 {
441         struct mlx5_ib_dev *dev = to_mdev(ibdev);
442         struct mlx5_core_dev *mdev = dev->mdev;
443         u32 *rep;
444         int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
445         struct mlx5_ptys_reg *ptys;
446         struct mlx5_pmtu_reg *pmtu;
447         struct mlx5_pvlc_reg pvlc;
448         void *ctx;
449         int err;
450
451         rep = mlx5_vzalloc(outlen);
452         ptys = kzalloc(sizeof(*ptys), GFP_KERNEL);
453         pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL);
454         if (!rep || !ptys || !pmtu) {
455                 err = -ENOMEM;
456                 goto out;
457         }
458
459         memset(props, 0, sizeof(*props));
460
461         /* what if I am pf with dual port */
462         err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen);
463         if (err)
464                 goto out;
465
466         ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context);
467
468         props->lid              = MLX5_GET(hca_vport_context, ctx, lid);
469         props->lmc              = MLX5_GET(hca_vport_context, ctx, lmc);
470         props->sm_lid           = MLX5_GET(hca_vport_context, ctx, sm_lid);
471         props->sm_sl            = MLX5_GET(hca_vport_context, ctx, sm_sl);
472         props->state            = MLX5_GET(hca_vport_context, ctx, vport_state);
473         props->phys_state       = MLX5_GET(hca_vport_context, ctx,
474                                         port_physical_state);
475         props->port_cap_flags   = MLX5_GET(hca_vport_context, ctx, cap_mask1);
476         props->gid_tbl_len      = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
477         props->max_msg_sz       = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
478         props->pkey_tbl_len     = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
479         props->bad_pkey_cntr    = MLX5_GET(hca_vport_context, ctx,
480                                               pkey_violation_counter);
481         props->qkey_viol_cntr   = MLX5_GET(hca_vport_context, ctx,
482                                               qkey_violation_counter);
483         props->subnet_timeout   = MLX5_GET(hca_vport_context, ctx,
484                                               subnet_timeout);
485         props->init_type_reply  = MLX5_GET(hca_vport_context, ctx,
486                                            init_type_reply);
487
488         ptys->proto_mask |= MLX5_PTYS_IB;
489         ptys->local_port = port;
490         err = mlx5_core_access_ptys(mdev, ptys, 0);
491         if (err)
492                 goto out;
493
494         err = translate_active_width(ibdev, ptys->ib_link_width_oper,
495                                      &props->active_width);
496         if (err)
497                 goto out;
498
499         props->active_speed     = (u8)ptys->ib_proto_oper;
500
501         pmtu->local_port = port;
502         err = mlx5_core_access_pmtu(mdev, pmtu, 0);
503         if (err)
504                 goto out;
505
506         props->max_mtu          = pmtu->max_mtu;
507         props->active_mtu       = pmtu->oper_mtu;
508
509         memset(&pvlc, 0, sizeof(pvlc));
510         pvlc.local_port = port;
511         err = mlx5_core_access_pvlc(mdev, &pvlc, 0);
512         if (err)
513                 goto out;
514
515         err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap,
516                                    &props->max_vl_num);
517 out:
518         kvfree(rep);
519         kfree(ptys);
520         kfree(pmtu);
521         return err;
522 }
523
524 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
525                        struct ib_port_attr *props)
526 {
527         switch (mlx5_get_vport_access_method(ibdev)) {
528         case MLX5_VPORT_ACCESS_METHOD_MAD:
529                 return mlx5_query_port_mad_ifc(ibdev, port, props);
530
531         case MLX5_VPORT_ACCESS_METHOD_HCA:
532                 return mlx5_query_port_ib(ibdev, port, props);
533
534         case MLX5_VPORT_ACCESS_METHOD_NIC:
535                 return mlx5_query_port_roce(ibdev, port, props);
536
537         default:
538                 return -EINVAL;
539         }
540 }
541
542 static inline int
543 mlx5_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
544 {
545         if (dev->if_addrlen != ETH_ALEN)
546                 return -1;
547         memcpy(eui, IF_LLADDR(dev), 3);
548         memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
549
550         /* NOTE: The scope ID is added by the GID to IP conversion */
551
552         eui[3] = 0xFF;
553         eui[4] = 0xFE;
554         eui[0] ^= 2;
555         return 0;
556 }
557
558 static void
559 mlx5_make_default_gid(struct net_device *dev, union ib_gid *gid)
560 {
561         gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
562         mlx5_addrconf_ifid_eui48(&gid->raw[8], dev);
563 }
564
565 static void
566 mlx5_ib_roce_port_update(void *arg)
567 {
568         struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg;
569         struct mlx5_ib_dev *dev = port->dev;
570         struct mlx5_core_dev *mdev = dev->mdev;
571         struct net_device *xdev[MLX5_IB_GID_MAX];
572         struct net_device *idev;
573         struct net_device *ndev;
574         struct ifaddr *ifa;
575         union ib_gid gid_temp;
576
577         while (port->port_gone == 0) {
578                 int update = 0;
579                 int gid_index = 0;
580                 int j;
581                 int error;
582
583                 ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH);
584                 if (ndev == NULL) {
585                         pause("W", hz);
586                         continue;
587                 }
588
589                 CURVNET_SET_QUIET(ndev->if_vnet);
590
591                 memset(&gid_temp, 0, sizeof(gid_temp));
592                 mlx5_make_default_gid(ndev, &gid_temp);
593                 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
594                         port->gid_table[gid_index] = gid_temp;
595                         update = 1;
596                 }
597                 xdev[gid_index] = ndev;
598                 gid_index++;
599
600                 IFNET_RLOCK();
601                 TAILQ_FOREACH(idev, &V_ifnet, if_link) {
602                         if (idev == ndev)
603                                 break;
604                 }
605                 if (idev != NULL) {
606                     TAILQ_FOREACH(idev, &V_ifnet, if_link) {
607                         if (idev != ndev) {
608                                 if (idev->if_type != IFT_L2VLAN)
609                                         continue;
610                                 if (ndev != rdma_vlan_dev_real_dev(idev))
611                                         continue;
612                         }
613                         /* clone address information for IPv4 and IPv6 */
614                         IF_ADDR_RLOCK(idev);
615                         TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
616                                 if (ifa->ifa_addr == NULL ||
617                                     (ifa->ifa_addr->sa_family != AF_INET &&
618                                      ifa->ifa_addr->sa_family != AF_INET6) ||
619                                     gid_index >= MLX5_IB_GID_MAX)
620                                         continue;
621                                 memset(&gid_temp, 0, sizeof(gid_temp));
622                                 rdma_ip2gid(ifa->ifa_addr, &gid_temp);
623                                 /* check for existing entry */
624                                 for (j = 0; j != gid_index; j++) {
625                                         if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0)
626                                                 break;
627                                 }
628                                 /* check if new entry must be added */
629                                 if (j == gid_index) {
630                                         if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
631                                                 port->gid_table[gid_index] = gid_temp;
632                                                 update = 1;
633                                         }
634                                         xdev[gid_index] = idev;
635                                         gid_index++;
636                                 }
637                         }
638                         IF_ADDR_RUNLOCK(idev);
639                     }
640                 }
641                 IFNET_RUNLOCK();
642                 CURVNET_RESTORE();
643
644                 if (update != 0 &&
645                     mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) {
646                         struct ib_event event = {
647                             .device = &dev->ib_dev,
648                             .element.port_num = port->port_num + 1,
649                             .event = IB_EVENT_GID_CHANGE,
650                         };
651
652                         /* add new entries, if any */
653                         for (j = 0; j != gid_index; j++) {
654                                 error = modify_gid_roce(&dev->ib_dev, port->port_num, j,
655                                     port->gid_table + j, xdev[j]);
656                                 if (error != 0)
657                                         printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error);
658                         }
659                         memset(&gid_temp, 0, sizeof(gid_temp));
660
661                         /* clear old entries, if any */
662                         for (; j != MLX5_IB_GID_MAX; j++) {
663                                 if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0)
664                                         continue;
665                                 port->gid_table[j] = gid_temp;
666                                 (void) modify_gid_roce(&dev->ib_dev, port->port_num, j,
667                                     port->gid_table + j, ndev);
668                         }
669
670                         /* make sure ibcore gets updated */
671                         ib_dispatch_event(&event);
672                 }
673                 pause("W", hz);
674         }
675         do {
676                 struct ib_event event = {
677                         .device = &dev->ib_dev,
678                         .element.port_num = port->port_num + 1,
679                         .event = IB_EVENT_GID_CHANGE,
680                 };
681                 /* make sure ibcore gets updated */
682                 ib_dispatch_event(&event);
683
684                 /* wait a bit */
685                 pause("W", hz);
686         } while (0);
687         port->port_gone = 2;
688         kthread_exit();
689 }
690
691 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
692                              union ib_gid *gid)
693 {
694         struct mlx5_ib_dev *dev = to_mdev(ibdev);
695         struct mlx5_core_dev *mdev = dev->mdev;
696
697         switch (mlx5_get_vport_access_method(ibdev)) {
698         case MLX5_VPORT_ACCESS_METHOD_MAD:
699                 return mlx5_query_gids_mad_ifc(ibdev, port, index, gid);
700
701         case MLX5_VPORT_ACCESS_METHOD_HCA:
702                 return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid);
703
704         case MLX5_VPORT_ACCESS_METHOD_NIC:
705                 if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) ||
706                     index < 0 || index >= MLX5_IB_GID_MAX ||
707                     dev->port[port - 1].port_gone != 0)
708                         memset(gid, 0, sizeof(*gid));
709                 else
710                         *gid = dev->port[port - 1].gid_table[index];
711                 return 0;
712
713         default:
714                 return -EINVAL;
715         }
716 }
717
718 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
719                               u16 *pkey)
720 {
721         struct mlx5_ib_dev *dev = to_mdev(ibdev);
722         struct mlx5_core_dev *mdev = dev->mdev;
723
724         switch (mlx5_get_vport_access_method(ibdev)) {
725         case MLX5_VPORT_ACCESS_METHOD_MAD:
726                 return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey);
727
728         case MLX5_VPORT_ACCESS_METHOD_HCA:
729         case MLX5_VPORT_ACCESS_METHOD_NIC:
730                 return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
731                                                  pkey);
732
733         default:
734                 return -EINVAL;
735         }
736 }
737
738 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
739                                  struct ib_device_modify *props)
740 {
741         struct mlx5_ib_dev *dev = to_mdev(ibdev);
742         struct mlx5_reg_node_desc in;
743         struct mlx5_reg_node_desc out;
744         int err;
745
746         if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
747                 return -EOPNOTSUPP;
748
749         if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
750                 return 0;
751
752         /*
753          * If possible, pass node desc to FW, so it can generate
754          * a 144 trap.  If cmd fails, just ignore.
755          */
756         memcpy(&in, props->node_desc, 64);
757         err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
758                                    sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
759         if (err)
760                 return err;
761
762         memcpy(ibdev->node_desc, props->node_desc, 64);
763
764         return err;
765 }
766
767 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
768                                struct ib_port_modify *props)
769 {
770         u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) ==
771                      IB_LINK_LAYER_ETHERNET);
772         struct mlx5_ib_dev *dev = to_mdev(ibdev);
773         struct ib_port_attr attr;
774         u32 tmp;
775         int err;
776
777         /* return OK if this is RoCE. CM calls ib_modify_port() regardless
778          * of whether port link layer is ETH or IB. For ETH ports, qkey
779          * violations and port capabilities are not valid.
780          */
781         if (is_eth)
782                 return 0;
783
784         mutex_lock(&dev->cap_mask_mutex);
785
786         err = mlx5_ib_query_port(ibdev, port, &attr);
787         if (err)
788                 goto out;
789
790         tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
791                 ~props->clr_port_cap_mask;
792
793         err = mlx5_set_port_caps(dev->mdev, port, tmp);
794
795 out:
796         mutex_unlock(&dev->cap_mask_mutex);
797         return err;
798 }
799
800 enum mlx5_cap_flags {
801         MLX5_CAP_COMPACT_AV = 1 << 0,
802 };
803
804 static void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev)
805 {
806         *flags |= MLX5_CAP_GEN(dev, compact_address_vector) ?
807                   MLX5_CAP_COMPACT_AV : 0;
808 }
809
810 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
811                                                   struct ib_udata *udata)
812 {
813         struct mlx5_ib_dev *dev = to_mdev(ibdev);
814         struct mlx5_ib_alloc_ucontext_req_v2 req;
815         struct mlx5_ib_alloc_ucontext_resp resp;
816         struct mlx5_ib_ucontext *context;
817         struct mlx5_uuar_info *uuari;
818         struct mlx5_uar *uars;
819         int gross_uuars;
820         int num_uars;
821         int ver;
822         int uuarn;
823         int err;
824         int i;
825         size_t reqlen;
826
827         if (!dev->ib_active)
828                 return ERR_PTR(-EAGAIN);
829
830         memset(&req, 0, sizeof(req));
831         memset(&resp, 0, sizeof(resp));
832
833         reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
834         if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
835                 ver = 0;
836         else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
837                 ver = 2;
838         else {
839                 mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", (long)reqlen);
840                 return ERR_PTR(-EINVAL);
841         }
842
843         err = ib_copy_from_udata(&req, udata, reqlen);
844         if (err) {
845                 mlx5_ib_err(dev, "copy failed\n");
846                 return ERR_PTR(err);
847         }
848
849         if (req.reserved) {
850                 mlx5_ib_err(dev, "request corrupted\n");
851                 return ERR_PTR(-EINVAL);
852         }
853
854         if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) {
855                 mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars);
856                 return ERR_PTR(-ENOMEM);
857         }
858
859         req.total_num_uuars = ALIGN(req.total_num_uuars,
860                                     MLX5_NON_FP_BF_REGS_PER_PAGE);
861         if (req.num_low_latency_uuars > req.total_num_uuars - 1) {
862                 mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n",
863                              req.total_num_uuars, req.total_num_uuars);
864                 return ERR_PTR(-EINVAL);
865         }
866
867         num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
868         gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
869         resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
870         if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
871                 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
872         resp.cache_line_size = L1_CACHE_BYTES;
873         resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
874         resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
875         resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
876         resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
877         resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
878         set_mlx5_flags(&resp.flags, dev->mdev);
879
880         if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen)
881                 resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc);
882
883         if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen)
884                 resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
885
886         context = kzalloc(sizeof(*context), GFP_KERNEL);
887         if (!context)
888                 return ERR_PTR(-ENOMEM);
889
890         uuari = &context->uuari;
891         mutex_init(&uuari->lock);
892         uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
893         if (!uars) {
894                 err = -ENOMEM;
895                 goto out_ctx;
896         }
897
898         uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
899                                 sizeof(*uuari->bitmap),
900                                 GFP_KERNEL);
901         if (!uuari->bitmap) {
902                 err = -ENOMEM;
903                 goto out_uar_ctx;
904         }
905         /*
906          * clear all fast path uuars
907          */
908         for (i = 0; i < gross_uuars; i++) {
909                 uuarn = i & 3;
910                 if (uuarn == 2 || uuarn == 3)
911                         set_bit(i, uuari->bitmap);
912         }
913
914         uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
915         if (!uuari->count) {
916                 err = -ENOMEM;
917                 goto out_bitmap;
918         }
919
920         for (i = 0; i < num_uars; i++) {
921                 err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
922                 if (err) {
923                         mlx5_ib_err(dev, "uar alloc failed at %d\n", i);
924                         goto out_uars;
925                 }
926         }
927         for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++)
928                 context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX;
929
930         INIT_LIST_HEAD(&context->db_page_list);
931         mutex_init(&context->db_page_mutex);
932
933         resp.tot_uuars = req.total_num_uuars;
934         resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
935         err = ib_copy_to_udata(udata, &resp,
936                                min_t(size_t, udata->outlen, sizeof(resp)));
937         if (err)
938                 goto out_uars;
939
940         uuari->ver = ver;
941         uuari->num_low_latency_uuars = req.num_low_latency_uuars;
942         uuari->uars = uars;
943         uuari->num_uars = num_uars;
944
945         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
946             IB_LINK_LAYER_ETHERNET) {
947                 err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn);
948                 if (err)
949                         goto out_uars;
950         }
951
952         return &context->ibucontext;
953
954 out_uars:
955         for (i--; i >= 0; i--)
956                 mlx5_cmd_free_uar(dev->mdev, uars[i].index);
957         kfree(uuari->count);
958
959 out_bitmap:
960         kfree(uuari->bitmap);
961
962 out_uar_ctx:
963         kfree(uars);
964
965 out_ctx:
966         kfree(context);
967         return ERR_PTR(err);
968 }
969
970 static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
971 {
972         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
973         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
974         struct mlx5_uuar_info *uuari = &context->uuari;
975         int i;
976
977         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
978             IB_LINK_LAYER_ETHERNET)
979                 mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
980
981         for (i = 0; i < uuari->num_uars; i++) {
982                 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
983                         mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
984         }
985         for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) {
986                 if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX)
987                         mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]);
988         }
989
990         kfree(uuari->count);
991         kfree(uuari->bitmap);
992         kfree(uuari->uars);
993         kfree(context);
994
995         return 0;
996 }
997
998 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
999 {
1000         return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
1001 }
1002
1003 static int get_command(unsigned long offset)
1004 {
1005         return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
1006 }
1007
1008 static int get_arg(unsigned long offset)
1009 {
1010         return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
1011 }
1012
1013 static int get_index(unsigned long offset)
1014 {
1015         return get_arg(offset);
1016 }
1017
1018 static int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc,
1019                     struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev,
1020                     struct mlx5_ib_ucontext *context)
1021 {
1022         unsigned long idx;
1023         phys_addr_t pfn;
1024
1025         if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
1026                 mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n",
1027                              (long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start));
1028                 return -EINVAL;
1029         }
1030
1031         idx = get_index(vma->vm_pgoff);
1032         if (idx >= uuari->num_uars) {
1033                 mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n",
1034                              idx, uuari->num_uars);
1035                 return -EINVAL;
1036         }
1037
1038         pfn = uar_index2pfn(dev, uuari->uars[idx].index);
1039         mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
1040                     (unsigned long long)pfn);
1041
1042         vma->vm_page_prot = prot;
1043         if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1044                                PAGE_SIZE, vma->vm_page_prot)) {
1045                 mlx5_ib_err(dev, "io remap failed\n");
1046                 return -EAGAIN;
1047         }
1048
1049         mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC",
1050                     (long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT);
1051
1052         return 0;
1053 }
1054
1055 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
1056 {
1057         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1058         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1059         struct mlx5_uuar_info *uuari = &context->uuari;
1060         unsigned long command;
1061
1062         command = get_command(vma->vm_pgoff);
1063         switch (command) {
1064         case MLX5_IB_MMAP_REGULAR_PAGE:
1065                 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1066                                 true,
1067                                 uuari, dev, context);
1068
1069                 break;
1070
1071         case MLX5_IB_MMAP_WC_PAGE:
1072                 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1073                                 true, uuari, dev, context);
1074                 break;
1075
1076         case MLX5_IB_MMAP_NC_PAGE:
1077                 return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot),
1078                                 false, uuari, dev, context);
1079                 break;
1080
1081         default:
1082                 return -EINVAL;
1083         }
1084
1085         return 0;
1086 }
1087
1088 static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
1089 {
1090         struct mlx5_create_mkey_mbox_in *in;
1091         struct mlx5_mkey_seg *seg;
1092         struct mlx5_core_mr mr;
1093         int err;
1094
1095         in = kzalloc(sizeof(*in), GFP_KERNEL);
1096         if (!in)
1097                 return -ENOMEM;
1098
1099         seg = &in->seg;
1100         seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
1101         seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
1102         seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1103         seg->start_addr = 0;
1104
1105         err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
1106                                     NULL, NULL, NULL);
1107         if (err) {
1108                 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
1109                 goto err_in;
1110         }
1111
1112         kfree(in);
1113         *key = mr.key;
1114
1115         return 0;
1116
1117 err_in:
1118         kfree(in);
1119
1120         return err;
1121 }
1122
1123 static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
1124 {
1125         struct mlx5_core_mr mr;
1126         int err;
1127
1128         memset(&mr, 0, sizeof(mr));
1129         mr.key = key;
1130         err = mlx5_core_destroy_mkey(dev->mdev, &mr);
1131         if (err)
1132                 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
1133 }
1134
1135 static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
1136                                       struct ib_ucontext *context,
1137                                       struct ib_udata *udata)
1138 {
1139         struct mlx5_ib_dev *dev = to_mdev(ibdev);
1140         struct mlx5_ib_alloc_pd_resp resp;
1141         struct mlx5_ib_pd *pd;
1142         int err;
1143
1144         pd = kmalloc(sizeof(*pd), GFP_KERNEL);
1145         if (!pd)
1146                 return ERR_PTR(-ENOMEM);
1147
1148         err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
1149         if (err) {
1150                 mlx5_ib_warn(dev, "pd alloc failed\n");
1151                 kfree(pd);
1152                 return ERR_PTR(err);
1153         }
1154
1155         if (context) {
1156                 resp.pdn = pd->pdn;
1157                 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
1158                         mlx5_ib_err(dev, "copy failed\n");
1159                         mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1160                         kfree(pd);
1161                         return ERR_PTR(-EFAULT);
1162                 }
1163         } else {
1164                 err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
1165                 if (err) {
1166                         mlx5_ib_err(dev, "alloc mkey failed\n");
1167                         mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1168                         kfree(pd);
1169                         return ERR_PTR(err);
1170                 }
1171         }
1172
1173         return &pd->ibpd;
1174 }
1175
1176 static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
1177 {
1178         struct mlx5_ib_dev *mdev = to_mdev(pd->device);
1179         struct mlx5_ib_pd *mpd = to_mpd(pd);
1180
1181         if (!pd->uobject)
1182                 free_pa_mkey(mdev, mpd->pa_lkey);
1183
1184         mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
1185         kfree(mpd);
1186
1187         return 0;
1188 }
1189
1190 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1191 {
1192         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1193         int err;
1194
1195         if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1196                 err = -EOPNOTSUPP;
1197         else
1198                 err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
1199         if (err)
1200                 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
1201                              ibqp->qp_num, gid->raw);
1202
1203         return err;
1204 }
1205
1206 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1207 {
1208         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1209         int err;
1210
1211         if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1212                 err = -EOPNOTSUPP;
1213         else
1214                 err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
1215         if (err)
1216                 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
1217                              ibqp->qp_num, gid->raw);
1218
1219         return err;
1220 }
1221
1222 static int init_node_data(struct mlx5_ib_dev *dev)
1223 {
1224         int err;
1225
1226         err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
1227         if (err)
1228                 return err;
1229
1230         return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
1231 }
1232
1233 static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
1234                              char *buf)
1235 {
1236         struct mlx5_ib_dev *dev =
1237                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1238
1239         return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages);
1240 }
1241
1242 static ssize_t show_reg_pages(struct device *device,
1243                               struct device_attribute *attr, char *buf)
1244 {
1245         struct mlx5_ib_dev *dev =
1246                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1247
1248         return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
1249 }
1250
1251 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1252                         char *buf)
1253 {
1254         struct mlx5_ib_dev *dev =
1255                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1256         return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
1257 }
1258
1259 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1260                            char *buf)
1261 {
1262         struct mlx5_ib_dev *dev =
1263                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1264         return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
1265                        fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
1266 }
1267
1268 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1269                         char *buf)
1270 {
1271         struct mlx5_ib_dev *dev =
1272                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1273         return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision);
1274 }
1275
1276 static ssize_t show_board(struct device *device, struct device_attribute *attr,
1277                           char *buf)
1278 {
1279         struct mlx5_ib_dev *dev =
1280                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1281         return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
1282                        dev->mdev->board_id);
1283 }
1284
1285 static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
1286 static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
1287 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
1288 static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
1289 static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
1290 static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
1291
1292 static struct device_attribute *mlx5_class_attributes[] = {
1293         &dev_attr_hw_rev,
1294         &dev_attr_fw_ver,
1295         &dev_attr_hca_type,
1296         &dev_attr_board_id,
1297         &dev_attr_fw_pages,
1298         &dev_attr_reg_pages,
1299 };
1300
1301 static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
1302 {
1303         struct mlx5_ib_qp *mqp;
1304         struct mlx5_ib_cq *send_mcq, *recv_mcq;
1305         struct mlx5_core_cq *mcq;
1306         struct list_head cq_armed_list;
1307         unsigned long flags_qp;
1308         unsigned long flags_cq;
1309         unsigned long flags;
1310
1311         mlx5_ib_warn(ibdev, " started\n");
1312         INIT_LIST_HEAD(&cq_armed_list);
1313
1314         /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
1315         spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
1316         list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
1317                 spin_lock_irqsave(&mqp->sq.lock, flags_qp);
1318                 if (mqp->sq.tail != mqp->sq.head) {
1319                         send_mcq = to_mcq(mqp->ibqp.send_cq);
1320                         spin_lock_irqsave(&send_mcq->lock, flags_cq);
1321                         if (send_mcq->mcq.comp &&
1322                             mqp->ibqp.send_cq->comp_handler) {
1323                                 if (!send_mcq->mcq.reset_notify_added) {
1324                                         send_mcq->mcq.reset_notify_added = 1;
1325                                         list_add_tail(&send_mcq->mcq.reset_notify,
1326                                                       &cq_armed_list);
1327                                 }
1328                         }
1329                         spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
1330                 }
1331                 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
1332                 spin_lock_irqsave(&mqp->rq.lock, flags_qp);
1333                 /* no handling is needed for SRQ */
1334                 if (!mqp->ibqp.srq) {
1335                         if (mqp->rq.tail != mqp->rq.head) {
1336                                 recv_mcq = to_mcq(mqp->ibqp.recv_cq);
1337                                 spin_lock_irqsave(&recv_mcq->lock, flags_cq);
1338                                 if (recv_mcq->mcq.comp &&
1339                                     mqp->ibqp.recv_cq->comp_handler) {
1340                                         if (!recv_mcq->mcq.reset_notify_added) {
1341                                                 recv_mcq->mcq.reset_notify_added = 1;
1342                                                 list_add_tail(&recv_mcq->mcq.reset_notify,
1343                                                               &cq_armed_list);
1344                                         }
1345                                 }
1346                                 spin_unlock_irqrestore(&recv_mcq->lock,
1347                                                        flags_cq);
1348                         }
1349                 }
1350                 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
1351         }
1352         /*At that point all inflight post send were put to be executed as of we
1353          * lock/unlock above locks Now need to arm all involved CQs.
1354          */
1355         list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
1356                 mcq->comp(mcq);
1357         }
1358         spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
1359         mlx5_ib_warn(ibdev, " ended\n");
1360         return;
1361 }
1362
1363 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1364                           enum mlx5_dev_event event, unsigned long param)
1365 {
1366         struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
1367         struct ib_event ibev;
1368
1369         u8 port = 0;
1370
1371         switch (event) {
1372         case MLX5_DEV_EVENT_SYS_ERROR:
1373                 ibdev->ib_active = false;
1374                 ibev.event = IB_EVENT_DEVICE_FATAL;
1375                 mlx5_ib_handle_internal_error(ibdev);
1376                 break;
1377
1378         case MLX5_DEV_EVENT_PORT_UP:
1379                 ibev.event = IB_EVENT_PORT_ACTIVE;
1380                 port = (u8)param;
1381                 break;
1382
1383         case MLX5_DEV_EVENT_PORT_DOWN:
1384         case MLX5_DEV_EVENT_PORT_INITIALIZED:
1385                 ibev.event = IB_EVENT_PORT_ERR;
1386                 port = (u8)param;
1387                 break;
1388
1389         case MLX5_DEV_EVENT_LID_CHANGE:
1390                 ibev.event = IB_EVENT_LID_CHANGE;
1391                 port = (u8)param;
1392                 break;
1393
1394         case MLX5_DEV_EVENT_PKEY_CHANGE:
1395                 ibev.event = IB_EVENT_PKEY_CHANGE;
1396                 port = (u8)param;
1397                 break;
1398
1399         case MLX5_DEV_EVENT_GUID_CHANGE:
1400                 ibev.event = IB_EVENT_GID_CHANGE;
1401                 port = (u8)param;
1402                 break;
1403
1404         case MLX5_DEV_EVENT_CLIENT_REREG:
1405                 ibev.event = IB_EVENT_CLIENT_REREGISTER;
1406                 port = (u8)param;
1407                 break;
1408
1409         default:
1410                 break;
1411         }
1412
1413         ibev.device           = &ibdev->ib_dev;
1414         ibev.element.port_num = port;
1415
1416         if ((event != MLX5_DEV_EVENT_SYS_ERROR) &&
1417             (port < 1 || port > ibdev->num_ports)) {
1418                 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
1419                 return;
1420         }
1421
1422         if (ibdev->ib_active)
1423                 ib_dispatch_event(&ibev);
1424 }
1425
1426 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1427 {
1428         int port;
1429
1430         for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
1431                 mlx5_query_ext_port_caps(dev, port);
1432 }
1433
1434 static void config_atomic_responder(struct mlx5_ib_dev *dev,
1435                                     struct ib_device_attr *props)
1436 {
1437         enum ib_atomic_cap cap = props->atomic_cap;
1438
1439 #if 0
1440         if (cap == IB_ATOMIC_HCA ||
1441             cap == IB_ATOMIC_GLOB)
1442 #endif
1443                 dev->enable_atomic_resp = 1;
1444
1445         dev->atomic_cap = cap;
1446 }
1447
1448 enum mlx5_addr_align {
1449         MLX5_ADDR_ALIGN_0       = 0,
1450         MLX5_ADDR_ALIGN_64      = 64,
1451         MLX5_ADDR_ALIGN_128     = 128,
1452 };
1453
1454 static int get_port_caps(struct mlx5_ib_dev *dev)
1455 {
1456         struct ib_device_attr *dprops = NULL;
1457         struct ib_port_attr *pprops = NULL;
1458         int err = -ENOMEM;
1459         int port;
1460
1461         pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1462         if (!pprops)
1463                 goto out;
1464
1465         dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1466         if (!dprops)
1467                 goto out;
1468
1469         err = mlx5_ib_query_device(&dev->ib_dev, dprops);
1470         if (err) {
1471                 mlx5_ib_warn(dev, "query_device failed %d\n", err);
1472                 goto out;
1473         }
1474         config_atomic_responder(dev, dprops);
1475
1476         for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
1477                 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1478                 if (err) {
1479                         mlx5_ib_warn(dev, "query_port %d failed %d\n",
1480                                      port, err);
1481                         break;
1482                 }
1483                 dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys;
1484                 dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len;
1485                 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1486                             dprops->max_pkeys, pprops->gid_tbl_len);
1487         }
1488
1489 out:
1490         kfree(pprops);
1491         kfree(dprops);
1492
1493         return err;
1494 }
1495
1496 static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1497 {
1498         int err;
1499
1500         err = mlx5_mr_cache_cleanup(dev);
1501         if (err)
1502                 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1503
1504         ib_dereg_mr(dev->umrc.mr);
1505         ib_dealloc_pd(dev->umrc.pd);
1506 }
1507
1508 enum {
1509         MAX_UMR_WR = 128,
1510 };
1511
1512 static int create_umr_res(struct mlx5_ib_dev *dev)
1513 {
1514         struct ib_pd *pd;
1515         struct ib_mr *mr;
1516         int ret;
1517
1518         pd = ib_alloc_pd(&dev->ib_dev);
1519         if (IS_ERR(pd)) {
1520                 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1521                 ret = PTR_ERR(pd);
1522                 goto error_0;
1523         }
1524
1525         mr = ib_get_dma_mr(pd,  IB_ACCESS_LOCAL_WRITE);
1526         if (IS_ERR(mr)) {
1527                 mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
1528                 ret = PTR_ERR(mr);
1529                 goto error_1;
1530         }
1531
1532         dev->umrc.mr = mr;
1533         dev->umrc.pd = pd;
1534
1535         ret = mlx5_mr_cache_init(dev);
1536         if (ret) {
1537                 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1538                 goto error_4;
1539         }
1540
1541         return 0;
1542
1543 error_4:
1544         ib_dereg_mr(mr);
1545 error_1:
1546         ib_dealloc_pd(pd);
1547 error_0:
1548         return ret;
1549 }
1550
1551 static int create_dev_resources(struct mlx5_ib_resources *devr)
1552 {
1553         struct ib_srq_init_attr attr;
1554         struct mlx5_ib_dev *dev;
1555         int ret = 0;
1556         struct ib_cq_init_attr cq_attr = { .cqe = 1 };
1557
1558         dev = container_of(devr, struct mlx5_ib_dev, devr);
1559
1560         devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1561         if (IS_ERR(devr->p0)) {
1562                 ret = PTR_ERR(devr->p0);
1563                 goto error0;
1564         }
1565         devr->p0->device  = &dev->ib_dev;
1566         devr->p0->uobject = NULL;
1567         atomic_set(&devr->p0->usecnt, 0);
1568
1569         devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
1570         if (IS_ERR(devr->c0)) {
1571                 ret = PTR_ERR(devr->c0);
1572                 goto error1;
1573         }
1574         devr->c0->device        = &dev->ib_dev;
1575         devr->c0->uobject       = NULL;
1576         devr->c0->comp_handler  = NULL;
1577         devr->c0->event_handler = NULL;
1578         devr->c0->cq_context    = NULL;
1579         atomic_set(&devr->c0->usecnt, 0);
1580
1581         devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1582         if (IS_ERR(devr->x0)) {
1583                 ret = PTR_ERR(devr->x0);
1584                 goto error2;
1585         }
1586         devr->x0->device = &dev->ib_dev;
1587         devr->x0->inode = NULL;
1588         atomic_set(&devr->x0->usecnt, 0);
1589         mutex_init(&devr->x0->tgt_qp_mutex);
1590         INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1591
1592         devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1593         if (IS_ERR(devr->x1)) {
1594                 ret = PTR_ERR(devr->x1);
1595                 goto error3;
1596         }
1597         devr->x1->device = &dev->ib_dev;
1598         devr->x1->inode = NULL;
1599         atomic_set(&devr->x1->usecnt, 0);
1600         mutex_init(&devr->x1->tgt_qp_mutex);
1601         INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1602
1603         memset(&attr, 0, sizeof(attr));
1604         attr.attr.max_sge = 1;
1605         attr.attr.max_wr = 1;
1606         attr.srq_type = IB_SRQT_XRC;
1607         attr.ext.xrc.cq = devr->c0;
1608         attr.ext.xrc.xrcd = devr->x0;
1609
1610         devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1611         if (IS_ERR(devr->s0)) {
1612                 ret = PTR_ERR(devr->s0);
1613                 goto error4;
1614         }
1615         devr->s0->device        = &dev->ib_dev;
1616         devr->s0->pd            = devr->p0;
1617         devr->s0->uobject       = NULL;
1618         devr->s0->event_handler = NULL;
1619         devr->s0->srq_context   = NULL;
1620         devr->s0->srq_type      = IB_SRQT_XRC;
1621         devr->s0->ext.xrc.xrcd  = devr->x0;
1622         devr->s0->ext.xrc.cq    = devr->c0;
1623         atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1624         atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1625         atomic_inc(&devr->p0->usecnt);
1626         atomic_set(&devr->s0->usecnt, 0);
1627
1628         memset(&attr, 0, sizeof(attr));
1629         attr.attr.max_sge = 1;
1630         attr.attr.max_wr = 1;
1631         attr.srq_type = IB_SRQT_BASIC;
1632         devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1633         if (IS_ERR(devr->s1)) {
1634                 ret = PTR_ERR(devr->s1);
1635                 goto error5;
1636         }
1637         devr->s1->device        = &dev->ib_dev;
1638         devr->s1->pd            = devr->p0;
1639         devr->s1->uobject       = NULL;
1640         devr->s1->event_handler = NULL;
1641         devr->s1->srq_context   = NULL;
1642         devr->s1->srq_type      = IB_SRQT_BASIC;
1643         devr->s1->ext.xrc.cq    = devr->c0;
1644         atomic_inc(&devr->p0->usecnt);
1645         atomic_set(&devr->s1->usecnt, 0);
1646
1647         return 0;
1648
1649 error5:
1650         mlx5_ib_destroy_srq(devr->s0);
1651 error4:
1652         mlx5_ib_dealloc_xrcd(devr->x1);
1653 error3:
1654         mlx5_ib_dealloc_xrcd(devr->x0);
1655 error2:
1656         mlx5_ib_destroy_cq(devr->c0);
1657 error1:
1658         mlx5_ib_dealloc_pd(devr->p0);
1659 error0:
1660         return ret;
1661 }
1662
1663 static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1664 {
1665         mlx5_ib_destroy_srq(devr->s1);
1666         mlx5_ib_destroy_srq(devr->s0);
1667         mlx5_ib_dealloc_xrcd(devr->x0);
1668         mlx5_ib_dealloc_xrcd(devr->x1);
1669         mlx5_ib_destroy_cq(devr->c0);
1670         mlx5_ib_dealloc_pd(devr->p0);
1671 }
1672
1673 static u32 get_core_cap_flags(struct ib_device *ibdev)
1674 {
1675         struct mlx5_ib_dev *dev = to_mdev(ibdev);
1676         enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
1677         u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
1678         u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
1679         u32 ret = 0;
1680
1681         if (ll == IB_LINK_LAYER_INFINIBAND)
1682                 return RDMA_CORE_PORT_IBA_IB;
1683
1684         ret = RDMA_CORE_PORT_RAW_PACKET;
1685
1686         if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
1687                 return ret;
1688
1689         if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
1690                 return ret;
1691
1692         if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
1693                 ret |= RDMA_CORE_PORT_IBA_ROCE;
1694
1695         if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
1696                 ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
1697
1698         return ret;
1699 }
1700
1701 static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
1702                                struct ib_port_immutable *immutable)
1703 {
1704         struct ib_port_attr attr;
1705         struct mlx5_ib_dev *dev = to_mdev(ibdev);
1706         enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
1707         int err;
1708
1709         immutable->core_cap_flags = get_core_cap_flags(ibdev);
1710
1711         err = ib_query_port(ibdev, port_num, &attr);
1712         if (err)
1713                 return err;
1714
1715         immutable->pkey_tbl_len = attr.pkey_tbl_len;
1716         immutable->gid_tbl_len = attr.gid_tbl_len;
1717         immutable->core_cap_flags = get_core_cap_flags(ibdev);
1718         if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
1719                 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
1720
1721         return 0;
1722 }
1723
1724 static void enable_dc_tracer(struct mlx5_ib_dev *dev)
1725 {
1726         struct device *device = dev->ib_dev.dma_device;
1727         struct mlx5_dc_tracer *dct = &dev->dctr;
1728         int order;
1729         void *tmp;
1730         int size;
1731         int err;
1732
1733         size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096;
1734         if (size <= PAGE_SIZE)
1735                 order = 0;
1736         else
1737                 order = 1;
1738
1739         dct->pg = alloc_pages(GFP_KERNEL, order);
1740         if (!dct->pg) {
1741                 mlx5_ib_err(dev, "failed to allocate %d pages\n", order);
1742                 return;
1743         }
1744
1745         tmp = page_address(dct->pg);
1746         memset(tmp, 0xff, size);
1747
1748         dct->size = size;
1749         dct->order = order;
1750         dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE);
1751         if (dma_mapping_error(device, dct->dma)) {
1752                 mlx5_ib_err(dev, "dma mapping error\n");
1753                 goto map_err;
1754         }
1755
1756         err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma);
1757         if (err) {
1758                 mlx5_ib_warn(dev, "failed to enable DC tracer\n");
1759                 goto cmd_err;
1760         }
1761
1762         return;
1763
1764 cmd_err:
1765         dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE);
1766 map_err:
1767         __free_pages(dct->pg, dct->order);
1768         dct->pg = NULL;
1769 }
1770
1771 static void disable_dc_tracer(struct mlx5_ib_dev *dev)
1772 {
1773         struct device *device = dev->ib_dev.dma_device;
1774         struct mlx5_dc_tracer *dct = &dev->dctr;
1775         int err;
1776
1777         if (!dct->pg)
1778                 return;
1779
1780         err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma);
1781         if (err) {
1782                 mlx5_ib_warn(dev, "failed to disable DC tracer\n");
1783                 return;
1784         }
1785
1786         dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE);
1787         __free_pages(dct->pg, dct->order);
1788         dct->pg = NULL;
1789 }
1790
1791 enum {
1792         MLX5_DC_CNAK_SIZE               = 128,
1793         MLX5_NUM_BUF_IN_PAGE            = PAGE_SIZE / MLX5_DC_CNAK_SIZE,
1794         MLX5_CNAK_TX_CQ_SIGNAL_FACTOR   = 128,
1795         MLX5_DC_CNAK_SL                 = 0,
1796         MLX5_DC_CNAK_VL                 = 0,
1797 };
1798
1799 static int init_dc_improvements(struct mlx5_ib_dev *dev)
1800 {
1801         if (!mlx5_core_is_pf(dev->mdev))
1802                 return 0;
1803
1804         if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace)))
1805                 return 0;
1806
1807         enable_dc_tracer(dev);
1808
1809         return 0;
1810 }
1811
1812 static void cleanup_dc_improvements(struct mlx5_ib_dev *dev)
1813 {
1814
1815         disable_dc_tracer(dev);
1816 }
1817
1818 static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num)
1819 {
1820         mlx5_vport_dealloc_q_counter(dev->mdev,
1821                                      MLX5_INTERFACE_PROTOCOL_IB,
1822                                      dev->port[port_num].q_cnt_id);
1823         dev->port[port_num].q_cnt_id = 0;
1824 }
1825
1826 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
1827 {
1828         unsigned int i;
1829
1830         for (i = 0; i < dev->num_ports; i++)
1831                 mlx5_ib_dealloc_q_port_counter(dev, i);
1832 }
1833
1834 static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
1835 {
1836         int i;
1837         int ret;
1838
1839         for (i = 0; i < dev->num_ports; i++) {
1840                 ret = mlx5_vport_alloc_q_counter(dev->mdev,
1841                                                  MLX5_INTERFACE_PROTOCOL_IB,
1842                                                  &dev->port[i].q_cnt_id);
1843                 if (ret) {
1844                         mlx5_ib_warn(dev,
1845                                      "couldn't allocate queue counter for port %d\n",
1846                                      i + 1);
1847                         goto dealloc_counters;
1848                 }
1849         }
1850
1851         return 0;
1852
1853 dealloc_counters:
1854         while (--i >= 0)
1855                 mlx5_ib_dealloc_q_port_counter(dev, i);
1856
1857         return ret;
1858 }
1859
1860 struct port_attribute {
1861         struct attribute attr;
1862         ssize_t (*show)(struct mlx5_ib_port *,
1863                         struct port_attribute *, char *buf);
1864         ssize_t (*store)(struct mlx5_ib_port *,
1865                          struct port_attribute *,
1866                          const char *buf, size_t count);
1867 };
1868
1869 struct port_counter_attribute {
1870         struct port_attribute   attr;
1871         size_t                  offset;
1872 };
1873
1874 static ssize_t port_attr_show(struct kobject *kobj,
1875                               struct attribute *attr, char *buf)
1876 {
1877         struct port_attribute *port_attr =
1878                 container_of(attr, struct port_attribute, attr);
1879         struct mlx5_ib_port_sysfs_group *p =
1880                 container_of(kobj, struct mlx5_ib_port_sysfs_group,
1881                              kobj);
1882         struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port,
1883                                                     group);
1884
1885         if (!port_attr->show)
1886                 return -EIO;
1887
1888         return port_attr->show(mibport, port_attr, buf);
1889 }
1890
1891 static ssize_t show_port_counter(struct mlx5_ib_port *p,
1892                                  struct port_attribute *port_attr,
1893                                  char *buf)
1894 {
1895         int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
1896         struct port_counter_attribute *counter_attr =
1897                 container_of(port_attr, struct port_counter_attribute, attr);
1898         void *out;
1899         int ret;
1900
1901         out = mlx5_vzalloc(outlen);
1902         if (!out)
1903                 return -ENOMEM;
1904
1905         ret = mlx5_vport_query_q_counter(p->dev->mdev,
1906                                          p->q_cnt_id, 0,
1907                                          out, outlen);
1908         if (ret)
1909                 goto free;
1910
1911         ret = sprintf(buf, "%d\n",
1912                       be32_to_cpu(*(__be32 *)(out + counter_attr->offset)));
1913
1914 free:
1915         kfree(out);
1916         return ret;
1917 }
1918
1919 #define PORT_COUNTER_ATTR(_name)                                        \
1920 struct port_counter_attribute port_counter_attr_##_name = {             \
1921         .attr  = __ATTR(_name, S_IRUGO, show_port_counter, NULL),       \
1922         .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)             \
1923 }
1924
1925 static PORT_COUNTER_ATTR(rx_write_requests);
1926 static PORT_COUNTER_ATTR(rx_read_requests);
1927 static PORT_COUNTER_ATTR(rx_atomic_requests);
1928 static PORT_COUNTER_ATTR(rx_dct_connect);
1929 static PORT_COUNTER_ATTR(out_of_buffer);
1930 static PORT_COUNTER_ATTR(out_of_sequence);
1931 static PORT_COUNTER_ATTR(duplicate_request);
1932 static PORT_COUNTER_ATTR(rnr_nak_retry_err);
1933 static PORT_COUNTER_ATTR(packet_seq_err);
1934 static PORT_COUNTER_ATTR(implied_nak_seq_err);
1935 static PORT_COUNTER_ATTR(local_ack_timeout_err);
1936
1937 static struct attribute *counter_attrs[] = {
1938         &port_counter_attr_rx_write_requests.attr.attr,
1939         &port_counter_attr_rx_read_requests.attr.attr,
1940         &port_counter_attr_rx_atomic_requests.attr.attr,
1941         &port_counter_attr_rx_dct_connect.attr.attr,
1942         &port_counter_attr_out_of_buffer.attr.attr,
1943         &port_counter_attr_out_of_sequence.attr.attr,
1944         &port_counter_attr_duplicate_request.attr.attr,
1945         &port_counter_attr_rnr_nak_retry_err.attr.attr,
1946         &port_counter_attr_packet_seq_err.attr.attr,
1947         &port_counter_attr_implied_nak_seq_err.attr.attr,
1948         &port_counter_attr_local_ack_timeout_err.attr.attr,
1949         NULL
1950 };
1951
1952 static struct attribute_group port_counters_group = {
1953         .name  = "counters",
1954         .attrs  = counter_attrs
1955 };
1956
1957 static const struct sysfs_ops port_sysfs_ops = {
1958         .show = port_attr_show
1959 };
1960
1961 static struct kobj_type port_type = {
1962         .sysfs_ops     = &port_sysfs_ops,
1963 };
1964
1965 static int add_port_attrs(struct mlx5_ib_dev *dev,
1966                           struct kobject *parent,
1967                           struct mlx5_ib_port_sysfs_group *port,
1968                           u8 port_num)
1969 {
1970         int ret;
1971
1972         ret = kobject_init_and_add(&port->kobj, &port_type,
1973                                    parent,
1974                                    "%d", port_num);
1975         if (ret)
1976                 return ret;
1977
1978         if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1979             MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
1980                 ret = sysfs_create_group(&port->kobj, &port_counters_group);
1981                 if (ret)
1982                         goto put_kobj;
1983         }
1984
1985         port->enabled = true;
1986         return ret;
1987
1988 put_kobj:
1989         kobject_put(&port->kobj);
1990         return ret;
1991 }
1992
1993 static void destroy_ports_attrs(struct mlx5_ib_dev *dev,
1994                                 unsigned int num_ports)
1995 {
1996         unsigned int i;
1997
1998         for (i = 0; i < num_ports; i++) {
1999                 struct mlx5_ib_port_sysfs_group *port =
2000                         &dev->port[i].group;
2001
2002                 if (!port->enabled)
2003                         continue;
2004
2005                 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
2006                     MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
2007                         sysfs_remove_group(&port->kobj,
2008                                            &port_counters_group);
2009                 kobject_put(&port->kobj);
2010                 port->enabled = false;
2011         }
2012
2013         if (dev->ports_parent) {
2014                 kobject_put(dev->ports_parent);
2015                 dev->ports_parent = NULL;
2016         }
2017 }
2018
2019 static int create_port_attrs(struct mlx5_ib_dev *dev)
2020 {
2021         int ret = 0;
2022         unsigned int i = 0;
2023         struct device *device = &dev->ib_dev.dev;
2024
2025         dev->ports_parent = kobject_create_and_add("mlx5_ports",
2026                                                    &device->kobj);
2027         if (!dev->ports_parent)
2028                 return -ENOMEM;
2029
2030         for (i = 0; i < dev->num_ports; i++) {
2031                 ret = add_port_attrs(dev,
2032                                      dev->ports_parent,
2033                                      &dev->port[i].group,
2034                                      i + 1);
2035
2036                 if (ret)
2037                         goto _destroy_ports_attrs;
2038         }
2039
2040         return 0;
2041
2042 _destroy_ports_attrs:
2043         destroy_ports_attrs(dev, i);
2044         return ret;
2045 }
2046
2047 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2048 {
2049         struct mlx5_ib_dev *dev;
2050         int err;
2051         int i;
2052
2053         printk_once(KERN_INFO "%s", mlx5_version);
2054
2055         dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
2056         if (!dev)
2057                 return NULL;
2058
2059         dev->mdev = mdev;
2060
2061         dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
2062                              GFP_KERNEL);
2063         if (!dev->port)
2064                 goto err_dealloc;
2065
2066         for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2067                 dev->port[i].dev = dev;
2068                 dev->port[i].port_num = i;
2069                 dev->port[i].port_gone = 0;
2070                 memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table));
2071         }
2072
2073         err = get_port_caps(dev);
2074         if (err)
2075                 goto err_free_port;
2076
2077         if (mlx5_use_mad_ifc(dev))
2078                 get_ext_port_caps(dev);
2079
2080         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2081             IB_LINK_LAYER_ETHERNET) {
2082                 if (MLX5_CAP_GEN(mdev, roce)) {
2083                         err = mlx5_nic_vport_enable_roce(mdev);
2084                         if (err)
2085                                 goto err_free_port;
2086                 } else {
2087                         goto err_free_port;
2088                 }
2089         }
2090
2091         MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
2092
2093         strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
2094         dev->ib_dev.owner               = THIS_MODULE;
2095         dev->ib_dev.node_type           = RDMA_NODE_IB_CA;
2096         dev->ib_dev.local_dma_lkey      = mdev->special_contexts.resd_lkey;
2097         dev->num_ports          = MLX5_CAP_GEN(mdev, num_ports);
2098         dev->ib_dev.phys_port_cnt     = dev->num_ports;
2099         dev->ib_dev.num_comp_vectors    =
2100                 dev->mdev->priv.eq_table.num_comp_vectors;
2101         dev->ib_dev.dma_device  = &mdev->pdev->dev;
2102
2103         dev->ib_dev.uverbs_abi_ver      = MLX5_IB_UVERBS_ABI_VERSION;
2104         dev->ib_dev.uverbs_cmd_mask     =
2105                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
2106                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
2107                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
2108                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
2109                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
2110                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
2111                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
2112                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2113                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
2114                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
2115                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
2116                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
2117                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
2118                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
2119                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
2120                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
2121                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
2122                 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
2123                 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
2124                 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
2125                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
2126                 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
2127                 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
2128
2129         dev->ib_dev.query_device        = mlx5_ib_query_device;
2130         dev->ib_dev.query_port          = mlx5_ib_query_port;
2131         dev->ib_dev.get_link_layer      = mlx5_ib_port_link_layer;
2132         dev->ib_dev.get_netdev          = mlx5_ib_get_netdev;
2133         dev->ib_dev.query_gid           = mlx5_ib_query_gid;
2134         dev->ib_dev.query_pkey          = mlx5_ib_query_pkey;
2135         dev->ib_dev.modify_device       = mlx5_ib_modify_device;
2136         dev->ib_dev.modify_port         = mlx5_ib_modify_port;
2137         dev->ib_dev.alloc_ucontext      = mlx5_ib_alloc_ucontext;
2138         dev->ib_dev.dealloc_ucontext    = mlx5_ib_dealloc_ucontext;
2139         dev->ib_dev.mmap                = mlx5_ib_mmap;
2140         dev->ib_dev.alloc_pd            = mlx5_ib_alloc_pd;
2141         dev->ib_dev.dealloc_pd          = mlx5_ib_dealloc_pd;
2142         dev->ib_dev.create_ah           = mlx5_ib_create_ah;
2143         dev->ib_dev.query_ah            = mlx5_ib_query_ah;
2144         dev->ib_dev.destroy_ah          = mlx5_ib_destroy_ah;
2145         dev->ib_dev.create_srq          = mlx5_ib_create_srq;
2146         dev->ib_dev.modify_srq          = mlx5_ib_modify_srq;
2147         dev->ib_dev.query_srq           = mlx5_ib_query_srq;
2148         dev->ib_dev.destroy_srq         = mlx5_ib_destroy_srq;
2149         dev->ib_dev.post_srq_recv       = mlx5_ib_post_srq_recv;
2150         dev->ib_dev.create_qp           = mlx5_ib_create_qp;
2151         dev->ib_dev.modify_qp           = mlx5_ib_modify_qp;
2152         dev->ib_dev.query_qp            = mlx5_ib_query_qp;
2153         dev->ib_dev.destroy_qp          = mlx5_ib_destroy_qp;
2154         dev->ib_dev.post_send           = mlx5_ib_post_send;
2155         dev->ib_dev.post_recv           = mlx5_ib_post_recv;
2156         dev->ib_dev.create_cq           = mlx5_ib_create_cq;
2157         dev->ib_dev.modify_cq           = mlx5_ib_modify_cq;
2158         dev->ib_dev.resize_cq           = mlx5_ib_resize_cq;
2159         dev->ib_dev.destroy_cq          = mlx5_ib_destroy_cq;
2160         dev->ib_dev.poll_cq             = mlx5_ib_poll_cq;
2161         dev->ib_dev.req_notify_cq       = mlx5_ib_arm_cq;
2162         dev->ib_dev.get_dma_mr          = mlx5_ib_get_dma_mr;
2163         dev->ib_dev.reg_user_mr         = mlx5_ib_reg_user_mr;
2164         dev->ib_dev.reg_phys_mr         = mlx5_ib_reg_phys_mr;
2165         dev->ib_dev.dereg_mr            = mlx5_ib_dereg_mr;
2166         dev->ib_dev.attach_mcast        = mlx5_ib_mcg_attach;
2167         dev->ib_dev.detach_mcast        = mlx5_ib_mcg_detach;
2168         dev->ib_dev.process_mad         = mlx5_ib_process_mad;
2169         dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
2170         dev->ib_dev.alloc_fast_reg_mr   = mlx5_ib_alloc_fast_reg_mr;
2171         dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
2172         dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
2173
2174         if (MLX5_CAP_GEN(mdev, xrc)) {
2175                 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
2176                 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
2177                 dev->ib_dev.uverbs_cmd_mask |=
2178                         (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2179                         (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2180         }
2181
2182         err = init_node_data(dev);
2183         if (err)
2184                 goto err_disable_roce;
2185
2186         mutex_init(&dev->cap_mask_mutex);
2187         INIT_LIST_HEAD(&dev->qp_list);
2188         spin_lock_init(&dev->reset_flow_resource_lock);
2189
2190         err = create_dev_resources(&dev->devr);
2191         if (err)
2192                 goto err_disable_roce;
2193
2194
2195         err = mlx5_ib_alloc_q_counters(dev);
2196         if (err)
2197                 goto err_odp;
2198
2199         err = ib_register_device(&dev->ib_dev, NULL);
2200         if (err)
2201                 goto err_q_cnt;
2202
2203         err = create_umr_res(dev);
2204         if (err)
2205                 goto err_dev;
2206
2207         if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2208             MLX5_CAP_PORT_TYPE_IB) {
2209                 if (init_dc_improvements(dev))
2210                         mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n");
2211         }
2212
2213         err = create_port_attrs(dev);
2214         if (err)
2215                 goto err_dc;
2216
2217         for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2218                 err = device_create_file(&dev->ib_dev.dev,
2219                                          mlx5_class_attributes[i]);
2220                 if (err)
2221                         goto err_port_attrs;
2222         }
2223
2224         if (1) {
2225                 struct thread *rl_thread = NULL;
2226                 struct proc *rl_proc = NULL;
2227
2228                 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2229                         (void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread,
2230                             RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i);
2231                 }
2232         }
2233
2234         dev->ib_active = true;
2235
2236         return dev;
2237
2238 err_port_attrs:
2239         destroy_ports_attrs(dev, dev->num_ports);
2240
2241 err_dc:
2242         if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2243             MLX5_CAP_PORT_TYPE_IB)
2244                 cleanup_dc_improvements(dev);
2245         destroy_umrc_res(dev);
2246
2247 err_dev:
2248         ib_unregister_device(&dev->ib_dev);
2249
2250 err_q_cnt:
2251         mlx5_ib_dealloc_q_counters(dev);
2252
2253 err_odp:
2254         destroy_dev_resources(&dev->devr);
2255
2256 err_disable_roce:
2257         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2258             IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2259                 mlx5_nic_vport_disable_roce(mdev);
2260 err_free_port:
2261         kfree(dev->port);
2262
2263 err_dealloc:
2264         ib_dealloc_device((struct ib_device *)dev);
2265
2266         return NULL;
2267 }
2268
2269 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
2270 {
2271         struct mlx5_ib_dev *dev = context;
2272         int i;
2273
2274         for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2275                 dev->port[i].port_gone = 1;
2276                 while (dev->port[i].port_gone != 2)
2277                         pause("W", hz);
2278         }
2279
2280         for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2281                 device_remove_file(&dev->ib_dev.dev,
2282                     mlx5_class_attributes[i]);
2283         }
2284
2285         destroy_ports_attrs(dev, dev->num_ports);
2286         if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2287             MLX5_CAP_PORT_TYPE_IB)
2288                 cleanup_dc_improvements(dev);
2289         mlx5_ib_dealloc_q_counters(dev);
2290         ib_unregister_device(&dev->ib_dev);
2291         destroy_umrc_res(dev);
2292         destroy_dev_resources(&dev->devr);
2293
2294         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2295             IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2296                 mlx5_nic_vport_disable_roce(mdev);
2297
2298         kfree(dev->port);
2299         ib_dealloc_device(&dev->ib_dev);
2300 }
2301
2302 static struct mlx5_interface mlx5_ib_interface = {
2303         .add            = mlx5_ib_add,
2304         .remove         = mlx5_ib_remove,
2305         .event          = mlx5_ib_event,
2306         .protocol       = MLX5_INTERFACE_PROTOCOL_IB,
2307 };
2308
2309 static int __init mlx5_ib_init(void)
2310 {
2311         int err;
2312
2313         if (deprecated_prof_sel != 2)
2314                 printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
2315
2316         err = mlx5_register_interface(&mlx5_ib_interface);
2317         if (err)
2318                 goto clean_odp;
2319
2320         mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq");
2321         if (!mlx5_ib_wq) {
2322                 printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__);
2323                 goto err_unreg;
2324         }
2325
2326         return err;
2327
2328 err_unreg:
2329         mlx5_unregister_interface(&mlx5_ib_interface);
2330
2331 clean_odp:
2332         return err;
2333 }
2334
2335 static void __exit mlx5_ib_cleanup(void)
2336 {
2337         destroy_workqueue(mlx5_ib_wq);
2338         mlx5_unregister_interface(&mlx5_ib_interface);
2339 }
2340
2341 module_init_order(mlx5_ib_init, SI_ORDER_THIRD);
2342 module_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD);