2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <linux/errno.h>
29 #include <linux/pci.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/slab.h>
32 #include <linux/io-mapping.h>
33 #include <linux/sched.h>
34 #include <linux/netdevice.h>
35 #include <linux/etherdevice.h>
36 #include <linux/list.h>
37 #include <dev/mlx5/driver.h>
38 #include <dev/mlx5/vport.h>
39 #include <asm/pgtable.h>
43 #include <rdma/ib_user_verbs.h>
44 #include <rdma/ib_smi.h>
45 #include <rdma/ib_umem.h>
49 #include <sys/unistd.h>
50 #include <sys/kthread.h>
52 #define DRIVER_NAME "mlx5_ib"
53 #define DRIVER_VERSION "3.2-rc1"
54 #define DRIVER_RELDATE "May 2016"
56 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
57 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
58 MODULE_LICENSE("Dual BSD/GPL");
59 MODULE_DEPEND(mlx5ib, linuxkpi, 1, 1, 1);
60 MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1);
61 MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1);
62 MODULE_VERSION(mlx5ib, 1);
64 static int deprecated_prof_sel = 2;
65 module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
66 MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
69 MLX5_STANDARD_ATOMIC_SIZE = 0x8,
72 struct workqueue_struct *mlx5_ib_wq;
74 static char mlx5_version[] =
75 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
76 DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
78 static void get_atomic_caps(struct mlx5_ib_dev *dev,
79 struct ib_device_attr *props)
84 u8 atomic_req_endianess;
86 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
87 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
88 atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev,
89 atomic_req_8B_endianess_mode) ||
92 tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
93 if (((atomic_operations & tmp) == tmp)
94 && (atomic_size_qp & 8)) {
95 if (atomic_req_endianess) {
96 props->atomic_cap = IB_ATOMIC_HCA;
98 props->atomic_cap = IB_ATOMIC_NONE;
101 props->atomic_cap = IB_ATOMIC_NONE;
104 tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD;
105 if (((atomic_operations & tmp) == tmp)
106 &&(atomic_size_qp & 8)) {
107 if (atomic_req_endianess)
108 props->masked_atomic_cap = IB_ATOMIC_HCA;
110 props->masked_atomic_cap = IB_ATOMIC_NONE;
113 props->masked_atomic_cap = IB_ATOMIC_NONE;
117 static enum rdma_link_layer
118 mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
120 struct mlx5_ib_dev *dev = to_mdev(device);
122 switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
123 case MLX5_CAP_PORT_TYPE_IB:
124 return IB_LINK_LAYER_INFINIBAND;
125 case MLX5_CAP_PORT_TYPE_ETH:
126 return IB_LINK_LAYER_ETHERNET;
128 return IB_LINK_LAYER_UNSPECIFIED;
132 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
134 return !dev->mdev->issi;
138 MLX5_VPORT_ACCESS_METHOD_MAD,
139 MLX5_VPORT_ACCESS_METHOD_HCA,
140 MLX5_VPORT_ACCESS_METHOD_NIC,
143 static int mlx5_get_vport_access_method(struct ib_device *ibdev)
145 if (mlx5_use_mad_ifc(to_mdev(ibdev)))
146 return MLX5_VPORT_ACCESS_METHOD_MAD;
148 if (mlx5_ib_port_link_layer(ibdev, 1) ==
149 IB_LINK_LAYER_ETHERNET)
150 return MLX5_VPORT_ACCESS_METHOD_NIC;
152 return MLX5_VPORT_ACCESS_METHOD_HCA;
155 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
156 __be64 *sys_image_guid)
158 struct mlx5_ib_dev *dev = to_mdev(ibdev);
159 struct mlx5_core_dev *mdev = dev->mdev;
163 switch (mlx5_get_vport_access_method(ibdev)) {
164 case MLX5_VPORT_ACCESS_METHOD_MAD:
165 return mlx5_query_system_image_guid_mad_ifc(ibdev,
168 case MLX5_VPORT_ACCESS_METHOD_HCA:
169 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
171 *sys_image_guid = cpu_to_be64(tmp);
174 case MLX5_VPORT_ACCESS_METHOD_NIC:
175 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
177 *sys_image_guid = cpu_to_be64(tmp);
185 static int mlx5_query_max_pkeys(struct ib_device *ibdev,
188 struct mlx5_ib_dev *dev = to_mdev(ibdev);
189 struct mlx5_core_dev *mdev = dev->mdev;
191 switch (mlx5_get_vport_access_method(ibdev)) {
192 case MLX5_VPORT_ACCESS_METHOD_MAD:
193 return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys);
195 case MLX5_VPORT_ACCESS_METHOD_HCA:
196 case MLX5_VPORT_ACCESS_METHOD_NIC:
197 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
206 static int mlx5_query_vendor_id(struct ib_device *ibdev,
209 struct mlx5_ib_dev *dev = to_mdev(ibdev);
211 switch (mlx5_get_vport_access_method(ibdev)) {
212 case MLX5_VPORT_ACCESS_METHOD_MAD:
213 return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id);
215 case MLX5_VPORT_ACCESS_METHOD_HCA:
216 case MLX5_VPORT_ACCESS_METHOD_NIC:
217 return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
224 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
230 switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
231 case MLX5_VPORT_ACCESS_METHOD_MAD:
232 return mlx5_query_node_guid_mad_ifc(dev, node_guid);
234 case MLX5_VPORT_ACCESS_METHOD_HCA:
235 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
237 *node_guid = cpu_to_be64(tmp);
240 case MLX5_VPORT_ACCESS_METHOD_NIC:
241 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
243 *node_guid = cpu_to_be64(tmp);
251 struct mlx5_reg_node_desc {
255 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
257 struct mlx5_reg_node_desc in;
259 if (mlx5_use_mad_ifc(dev))
260 return mlx5_query_node_desc_mad_ifc(dev, node_desc);
262 memset(&in, 0, sizeof(in));
264 return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
265 sizeof(struct mlx5_reg_node_desc),
266 MLX5_REG_NODE_DESC, 0, 0);
269 static int mlx5_ib_query_device(struct ib_device *ibdev,
270 struct ib_device_attr *props)
272 struct mlx5_ib_dev *dev = to_mdev(ibdev);
273 struct mlx5_core_dev *mdev = dev->mdev;
280 memset(props, 0, sizeof(*props));
282 err = mlx5_query_system_image_guid(ibdev,
283 &props->sys_image_guid);
287 err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
291 err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
295 props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
296 ((u64)fw_rev_min(dev->mdev) << 16) |
297 fw_rev_sub(dev->mdev);
298 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
299 IB_DEVICE_PORT_ACTIVE_EVENT |
300 IB_DEVICE_SYS_IMAGE_GUID |
301 IB_DEVICE_RC_RNR_NAK_GEN;
303 if (MLX5_CAP_GEN(mdev, pkv))
304 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
305 if (MLX5_CAP_GEN(mdev, qkv))
306 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
307 if (MLX5_CAP_GEN(mdev, apm))
308 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
309 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
310 if (MLX5_CAP_GEN(mdev, xrc))
311 props->device_cap_flags |= IB_DEVICE_XRC;
312 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
313 if (MLX5_CAP_GEN(mdev, block_lb_mc))
314 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
316 props->vendor_part_id = mdev->pdev->device;
317 props->hw_ver = mdev->pdev->revision;
319 props->max_mr_size = ~0ull;
320 props->page_size_cap = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1);
321 props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
322 props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
323 max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
324 sizeof(struct mlx5_wqe_data_seg);
325 max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
326 max_sq_sg = (max_sq_desc -
327 sizeof(struct mlx5_wqe_ctrl_seg) -
328 sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg);
329 props->max_sge = min(max_rq_sg, max_sq_sg);
330 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
331 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
332 props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
333 props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
334 props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
335 props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
336 props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
337 props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
338 props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
339 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
340 props->max_srq_sge = max_rq_sg - 1;
341 props->max_fast_reg_page_list_len = (unsigned int)-1;
342 get_atomic_caps(dev, props);
343 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
344 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
345 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
346 props->max_mcast_grp;
347 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
348 props->max_ah = INT_MAX;
354 MLX5_IB_WIDTH_1X = 1 << 0,
355 MLX5_IB_WIDTH_2X = 1 << 1,
356 MLX5_IB_WIDTH_4X = 1 << 2,
357 MLX5_IB_WIDTH_8X = 1 << 3,
358 MLX5_IB_WIDTH_12X = 1 << 4
361 static int translate_active_width(struct ib_device *ibdev, u8 active_width,
364 struct mlx5_ib_dev *dev = to_mdev(ibdev);
367 if (active_width & MLX5_IB_WIDTH_1X) {
368 *ib_width = IB_WIDTH_1X;
369 } else if (active_width & MLX5_IB_WIDTH_2X) {
370 mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n",
373 } else if (active_width & MLX5_IB_WIDTH_4X) {
374 *ib_width = IB_WIDTH_4X;
375 } else if (active_width & MLX5_IB_WIDTH_8X) {
376 *ib_width = IB_WIDTH_8X;
377 } else if (active_width & MLX5_IB_WIDTH_12X) {
378 *ib_width = IB_WIDTH_12X;
380 mlx5_ib_dbg(dev, "Invalid active_width %d\n",
389 * TODO: Move to IB core
396 __IB_MAX_VL_0_14 = 5,
399 enum mlx5_vl_hw_cap {
411 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
416 *max_vl_num = __IB_MAX_VL_0;
419 *max_vl_num = __IB_MAX_VL_0_1;
422 *max_vl_num = __IB_MAX_VL_0_3;
425 *max_vl_num = __IB_MAX_VL_0_7;
427 case MLX5_VL_HW_0_14:
428 *max_vl_num = __IB_MAX_VL_0_14;
438 static int mlx5_query_port_ib(struct ib_device *ibdev, u8 port,
439 struct ib_port_attr *props)
441 struct mlx5_ib_dev *dev = to_mdev(ibdev);
442 struct mlx5_core_dev *mdev = dev->mdev;
444 int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
445 struct mlx5_ptys_reg *ptys;
446 struct mlx5_pmtu_reg *pmtu;
447 struct mlx5_pvlc_reg pvlc;
451 rep = mlx5_vzalloc(outlen);
452 ptys = kzalloc(sizeof(*ptys), GFP_KERNEL);
453 pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL);
454 if (!rep || !ptys || !pmtu) {
459 memset(props, 0, sizeof(*props));
461 /* what if I am pf with dual port */
462 err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen);
466 ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context);
468 props->lid = MLX5_GET(hca_vport_context, ctx, lid);
469 props->lmc = MLX5_GET(hca_vport_context, ctx, lmc);
470 props->sm_lid = MLX5_GET(hca_vport_context, ctx, sm_lid);
471 props->sm_sl = MLX5_GET(hca_vport_context, ctx, sm_sl);
472 props->state = MLX5_GET(hca_vport_context, ctx, vport_state);
473 props->phys_state = MLX5_GET(hca_vport_context, ctx,
474 port_physical_state);
475 props->port_cap_flags = MLX5_GET(hca_vport_context, ctx, cap_mask1);
476 props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
477 props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
478 props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
479 props->bad_pkey_cntr = MLX5_GET(hca_vport_context, ctx,
480 pkey_violation_counter);
481 props->qkey_viol_cntr = MLX5_GET(hca_vport_context, ctx,
482 qkey_violation_counter);
483 props->subnet_timeout = MLX5_GET(hca_vport_context, ctx,
485 props->init_type_reply = MLX5_GET(hca_vport_context, ctx,
488 ptys->proto_mask |= MLX5_PTYS_IB;
489 ptys->local_port = port;
490 err = mlx5_core_access_ptys(mdev, ptys, 0);
494 err = translate_active_width(ibdev, ptys->ib_link_width_oper,
495 &props->active_width);
499 props->active_speed = (u8)ptys->ib_proto_oper;
501 pmtu->local_port = port;
502 err = mlx5_core_access_pmtu(mdev, pmtu, 0);
506 props->max_mtu = pmtu->max_mtu;
507 props->active_mtu = pmtu->oper_mtu;
509 memset(&pvlc, 0, sizeof(pvlc));
510 pvlc.local_port = port;
511 err = mlx5_core_access_pvlc(mdev, &pvlc, 0);
515 err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap,
524 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
525 struct ib_port_attr *props)
527 switch (mlx5_get_vport_access_method(ibdev)) {
528 case MLX5_VPORT_ACCESS_METHOD_MAD:
529 return mlx5_query_port_mad_ifc(ibdev, port, props);
531 case MLX5_VPORT_ACCESS_METHOD_HCA:
532 return mlx5_query_port_ib(ibdev, port, props);
534 case MLX5_VPORT_ACCESS_METHOD_NIC:
535 return mlx5_query_port_roce(ibdev, port, props);
543 mlx5_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
545 if (dev->if_addrlen != ETH_ALEN)
547 memcpy(eui, IF_LLADDR(dev), 3);
548 memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
550 /* NOTE: The scope ID is added by the GID to IP conversion */
559 mlx5_make_default_gid(struct net_device *dev, union ib_gid *gid)
561 gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
562 mlx5_addrconf_ifid_eui48(&gid->raw[8], dev);
566 mlx5_ip2gid(const struct sockaddr *addr, union ib_gid *gid)
568 switch (addr->sa_family) {
570 ipv6_addr_set_v4mapped(((const struct sockaddr_in *)addr)->sin_addr.s_addr,
571 (struct in6_addr *)gid->raw);
574 memcpy(gid->raw, &((const struct sockaddr_in6 *)addr)->sin6_addr, 16);
586 mlx5_ib_roce_port_update(void *arg)
588 struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg;
589 struct mlx5_ib_dev *dev = port->dev;
590 struct mlx5_core_dev *mdev = dev->mdev;
591 struct net_device *xdev[MLX5_IB_GID_MAX];
592 struct net_device *idev;
593 struct net_device *ndev;
595 union ib_gid gid_temp;
597 while (port->port_gone == 0) {
603 ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH);
609 CURVNET_SET_QUIET(ndev->if_vnet);
611 memset(&gid_temp, 0, sizeof(gid_temp));
612 mlx5_make_default_gid(ndev, &gid_temp);
613 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
614 port->gid_table[gid_index] = gid_temp;
617 xdev[gid_index] = ndev;
621 TAILQ_FOREACH(idev, &V_ifnet, if_link) {
626 TAILQ_FOREACH(idev, &V_ifnet, if_link) {
628 if (idev->if_type != IFT_L2VLAN)
630 if (ndev != rdma_vlan_dev_real_dev(idev))
633 /* clone address information for IPv4 and IPv6 */
635 TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
636 if (ifa->ifa_addr == NULL ||
637 (ifa->ifa_addr->sa_family != AF_INET &&
638 ifa->ifa_addr->sa_family != AF_INET6) ||
639 gid_index >= MLX5_IB_GID_MAX)
641 memset(&gid_temp, 0, sizeof(gid_temp));
642 mlx5_ip2gid(ifa->ifa_addr, &gid_temp);
643 /* check for existing entry */
644 for (j = 0; j != gid_index; j++) {
645 if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0)
648 /* check if new entry must be added */
649 if (j == gid_index) {
650 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
651 port->gid_table[gid_index] = gid_temp;
654 xdev[gid_index] = idev;
658 IF_ADDR_RUNLOCK(idev);
665 mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) {
666 struct ib_event event = {
667 .device = &dev->ib_dev,
668 .element.port_num = port->port_num + 1,
669 .event = IB_EVENT_GID_CHANGE,
672 /* add new entries, if any */
673 for (j = 0; j != gid_index; j++) {
674 error = modify_gid_roce(&dev->ib_dev, port->port_num, j,
675 port->gid_table + j, xdev[j]);
677 printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error);
679 memset(&gid_temp, 0, sizeof(gid_temp));
681 /* clear old entries, if any */
682 for (; j != MLX5_IB_GID_MAX; j++) {
683 if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0)
685 port->gid_table[j] = gid_temp;
686 (void) modify_gid_roce(&dev->ib_dev, port->port_num, j,
687 port->gid_table + j, ndev);
690 /* make sure ibcore gets updated */
691 ib_dispatch_event(&event);
696 struct ib_event event = {
697 .device = &dev->ib_dev,
698 .element.port_num = port->port_num + 1,
699 .event = IB_EVENT_GID_CHANGE,
701 /* make sure ibcore gets updated */
702 ib_dispatch_event(&event);
711 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
714 struct mlx5_ib_dev *dev = to_mdev(ibdev);
715 struct mlx5_core_dev *mdev = dev->mdev;
717 switch (mlx5_get_vport_access_method(ibdev)) {
718 case MLX5_VPORT_ACCESS_METHOD_MAD:
719 return mlx5_query_gids_mad_ifc(ibdev, port, index, gid);
721 case MLX5_VPORT_ACCESS_METHOD_HCA:
722 return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid);
724 case MLX5_VPORT_ACCESS_METHOD_NIC:
725 if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) ||
726 index < 0 || index >= MLX5_IB_GID_MAX ||
727 dev->port[port - 1].port_gone != 0)
728 memset(gid, 0, sizeof(*gid));
730 *gid = dev->port[port - 1].gid_table[index];
738 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
741 struct mlx5_ib_dev *dev = to_mdev(ibdev);
742 struct mlx5_core_dev *mdev = dev->mdev;
744 switch (mlx5_get_vport_access_method(ibdev)) {
745 case MLX5_VPORT_ACCESS_METHOD_MAD:
746 return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey);
748 case MLX5_VPORT_ACCESS_METHOD_HCA:
749 case MLX5_VPORT_ACCESS_METHOD_NIC:
750 return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
758 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
759 struct ib_device_modify *props)
761 struct mlx5_ib_dev *dev = to_mdev(ibdev);
762 struct mlx5_reg_node_desc in;
763 struct mlx5_reg_node_desc out;
766 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
769 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
773 * If possible, pass node desc to FW, so it can generate
774 * a 144 trap. If cmd fails, just ignore.
776 memcpy(&in, props->node_desc, 64);
777 err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
778 sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
782 memcpy(ibdev->node_desc, props->node_desc, 64);
787 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
788 struct ib_port_modify *props)
790 u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) ==
791 IB_LINK_LAYER_ETHERNET);
792 struct mlx5_ib_dev *dev = to_mdev(ibdev);
793 struct ib_port_attr attr;
797 /* return OK if this is RoCE. CM calls ib_modify_port() regardless
798 * of whether port link layer is ETH or IB. For ETH ports, qkey
799 * violations and port capabilities are not valid.
804 mutex_lock(&dev->cap_mask_mutex);
806 err = mlx5_ib_query_port(ibdev, port, &attr);
810 tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
811 ~props->clr_port_cap_mask;
813 err = mlx5_set_port_caps(dev->mdev, port, tmp);
816 mutex_unlock(&dev->cap_mask_mutex);
820 enum mlx5_cap_flags {
821 MLX5_CAP_COMPACT_AV = 1 << 0,
824 static void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev)
826 *flags |= MLX5_CAP_GEN(dev, compact_address_vector) ?
827 MLX5_CAP_COMPACT_AV : 0;
830 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
831 struct ib_udata *udata)
833 struct mlx5_ib_dev *dev = to_mdev(ibdev);
834 struct mlx5_ib_alloc_ucontext_req_v2 req;
835 struct mlx5_ib_alloc_ucontext_resp resp;
836 struct mlx5_ib_ucontext *context;
837 struct mlx5_uuar_info *uuari;
838 struct mlx5_uar *uars;
848 return ERR_PTR(-EAGAIN);
850 memset(&req, 0, sizeof(req));
851 memset(&resp, 0, sizeof(resp));
853 reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
854 if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
856 else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
859 mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", (long)reqlen);
860 return ERR_PTR(-EINVAL);
863 err = ib_copy_from_udata(&req, udata, reqlen);
865 mlx5_ib_err(dev, "copy failed\n");
870 mlx5_ib_err(dev, "request corrupted\n");
871 return ERR_PTR(-EINVAL);
874 if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) {
875 mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars);
876 return ERR_PTR(-ENOMEM);
879 req.total_num_uuars = ALIGN(req.total_num_uuars,
880 MLX5_NON_FP_BF_REGS_PER_PAGE);
881 if (req.num_low_latency_uuars > req.total_num_uuars - 1) {
882 mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n",
883 req.total_num_uuars, req.total_num_uuars);
884 return ERR_PTR(-EINVAL);
887 num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
888 gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
889 resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
890 if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
891 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
892 resp.cache_line_size = L1_CACHE_BYTES;
893 resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
894 resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
895 resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
896 resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
897 resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
898 set_mlx5_flags(&resp.flags, dev->mdev);
900 if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen)
901 resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc);
903 if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen)
904 resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
906 context = kzalloc(sizeof(*context), GFP_KERNEL);
908 return ERR_PTR(-ENOMEM);
910 uuari = &context->uuari;
911 mutex_init(&uuari->lock);
912 uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
918 uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
919 sizeof(*uuari->bitmap),
921 if (!uuari->bitmap) {
926 * clear all fast path uuars
928 for (i = 0; i < gross_uuars; i++) {
930 if (uuarn == 2 || uuarn == 3)
931 set_bit(i, uuari->bitmap);
934 uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
940 for (i = 0; i < num_uars; i++) {
941 err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
943 mlx5_ib_err(dev, "uar alloc failed at %d\n", i);
947 for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++)
948 context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX;
950 INIT_LIST_HEAD(&context->db_page_list);
951 mutex_init(&context->db_page_mutex);
953 resp.tot_uuars = req.total_num_uuars;
954 resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
955 err = ib_copy_to_udata(udata, &resp,
956 min_t(size_t, udata->outlen, sizeof(resp)));
961 uuari->num_low_latency_uuars = req.num_low_latency_uuars;
963 uuari->num_uars = num_uars;
965 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
966 IB_LINK_LAYER_ETHERNET) {
967 err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn);
972 return &context->ibucontext;
975 for (i--; i >= 0; i--)
976 mlx5_cmd_free_uar(dev->mdev, uars[i].index);
980 kfree(uuari->bitmap);
990 static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
992 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
993 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
994 struct mlx5_uuar_info *uuari = &context->uuari;
997 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
998 IB_LINK_LAYER_ETHERNET)
999 mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
1001 for (i = 0; i < uuari->num_uars; i++) {
1002 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
1003 mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
1005 for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) {
1006 if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX)
1007 mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]);
1010 kfree(uuari->count);
1011 kfree(uuari->bitmap);
1018 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
1020 return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
1023 static int get_command(unsigned long offset)
1025 return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
1028 static int get_arg(unsigned long offset)
1030 return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
1033 static int get_index(unsigned long offset)
1035 return get_arg(offset);
1038 static int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc,
1039 struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev,
1040 struct mlx5_ib_ucontext *context)
1045 if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
1046 mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n",
1047 (long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start));
1051 idx = get_index(vma->vm_pgoff);
1052 if (idx >= uuari->num_uars) {
1053 mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n",
1054 idx, uuari->num_uars);
1058 pfn = uar_index2pfn(dev, uuari->uars[idx].index);
1059 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
1060 (unsigned long long)pfn);
1062 vma->vm_page_prot = prot;
1063 if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1064 PAGE_SIZE, vma->vm_page_prot)) {
1065 mlx5_ib_err(dev, "io remap failed\n");
1069 mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC",
1070 (long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT);
1075 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
1077 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1078 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1079 struct mlx5_uuar_info *uuari = &context->uuari;
1080 unsigned long command;
1082 command = get_command(vma->vm_pgoff);
1084 case MLX5_IB_MMAP_REGULAR_PAGE:
1085 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1087 uuari, dev, context);
1091 case MLX5_IB_MMAP_WC_PAGE:
1092 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1093 true, uuari, dev, context);
1096 case MLX5_IB_MMAP_NC_PAGE:
1097 return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot),
1098 false, uuari, dev, context);
1108 static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
1110 struct mlx5_create_mkey_mbox_in *in;
1111 struct mlx5_mkey_seg *seg;
1112 struct mlx5_core_mr mr;
1115 in = kzalloc(sizeof(*in), GFP_KERNEL);
1120 seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
1121 seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
1122 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1123 seg->start_addr = 0;
1125 err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
1128 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
1143 static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
1145 struct mlx5_core_mr mr;
1148 memset(&mr, 0, sizeof(mr));
1150 err = mlx5_core_destroy_mkey(dev->mdev, &mr);
1152 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
1155 static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
1156 struct ib_ucontext *context,
1157 struct ib_udata *udata)
1159 struct mlx5_ib_dev *dev = to_mdev(ibdev);
1160 struct mlx5_ib_alloc_pd_resp resp;
1161 struct mlx5_ib_pd *pd;
1164 pd = kmalloc(sizeof(*pd), GFP_KERNEL);
1166 return ERR_PTR(-ENOMEM);
1168 err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
1170 mlx5_ib_warn(dev, "pd alloc failed\n");
1172 return ERR_PTR(err);
1177 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
1178 mlx5_ib_err(dev, "copy failed\n");
1179 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1181 return ERR_PTR(-EFAULT);
1184 err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
1186 mlx5_ib_err(dev, "alloc mkey failed\n");
1187 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1189 return ERR_PTR(err);
1196 static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
1198 struct mlx5_ib_dev *mdev = to_mdev(pd->device);
1199 struct mlx5_ib_pd *mpd = to_mpd(pd);
1202 free_pa_mkey(mdev, mpd->pa_lkey);
1204 mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
1210 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1212 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1215 if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1218 err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
1220 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
1221 ibqp->qp_num, gid->raw);
1226 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1228 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1231 if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1234 err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
1236 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
1237 ibqp->qp_num, gid->raw);
1242 static int init_node_data(struct mlx5_ib_dev *dev)
1246 err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
1250 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
1253 static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
1256 struct mlx5_ib_dev *dev =
1257 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1259 return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages);
1262 static ssize_t show_reg_pages(struct device *device,
1263 struct device_attribute *attr, char *buf)
1265 struct mlx5_ib_dev *dev =
1266 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1268 return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
1271 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1274 struct mlx5_ib_dev *dev =
1275 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1276 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
1279 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1282 struct mlx5_ib_dev *dev =
1283 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1284 return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
1285 fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
1288 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1291 struct mlx5_ib_dev *dev =
1292 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1293 return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision);
1296 static ssize_t show_board(struct device *device, struct device_attribute *attr,
1299 struct mlx5_ib_dev *dev =
1300 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1301 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
1302 dev->mdev->board_id);
1305 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
1306 static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
1307 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
1308 static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
1309 static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
1310 static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
1312 static struct device_attribute *mlx5_class_attributes[] = {
1318 &dev_attr_reg_pages,
1321 static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
1323 struct mlx5_ib_qp *mqp;
1324 struct mlx5_ib_cq *send_mcq, *recv_mcq;
1325 struct mlx5_core_cq *mcq;
1326 struct list_head cq_armed_list;
1327 unsigned long flags_qp;
1328 unsigned long flags_cq;
1329 unsigned long flags;
1331 mlx5_ib_warn(ibdev, " started\n");
1332 INIT_LIST_HEAD(&cq_armed_list);
1334 /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
1335 spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
1336 list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
1337 spin_lock_irqsave(&mqp->sq.lock, flags_qp);
1338 if (mqp->sq.tail != mqp->sq.head) {
1339 send_mcq = to_mcq(mqp->ibqp.send_cq);
1340 spin_lock_irqsave(&send_mcq->lock, flags_cq);
1341 if (send_mcq->mcq.comp &&
1342 mqp->ibqp.send_cq->comp_handler) {
1343 if (!send_mcq->mcq.reset_notify_added) {
1344 send_mcq->mcq.reset_notify_added = 1;
1345 list_add_tail(&send_mcq->mcq.reset_notify,
1349 spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
1351 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
1352 spin_lock_irqsave(&mqp->rq.lock, flags_qp);
1353 /* no handling is needed for SRQ */
1354 if (!mqp->ibqp.srq) {
1355 if (mqp->rq.tail != mqp->rq.head) {
1356 recv_mcq = to_mcq(mqp->ibqp.recv_cq);
1357 spin_lock_irqsave(&recv_mcq->lock, flags_cq);
1358 if (recv_mcq->mcq.comp &&
1359 mqp->ibqp.recv_cq->comp_handler) {
1360 if (!recv_mcq->mcq.reset_notify_added) {
1361 recv_mcq->mcq.reset_notify_added = 1;
1362 list_add_tail(&recv_mcq->mcq.reset_notify,
1366 spin_unlock_irqrestore(&recv_mcq->lock,
1370 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
1372 /*At that point all inflight post send were put to be executed as of we
1373 * lock/unlock above locks Now need to arm all involved CQs.
1375 list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
1378 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
1379 mlx5_ib_warn(ibdev, " ended\n");
1383 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1384 enum mlx5_dev_event event, unsigned long param)
1386 struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
1387 struct ib_event ibev;
1392 case MLX5_DEV_EVENT_SYS_ERROR:
1393 ibdev->ib_active = false;
1394 ibev.event = IB_EVENT_DEVICE_FATAL;
1395 mlx5_ib_handle_internal_error(ibdev);
1398 case MLX5_DEV_EVENT_PORT_UP:
1399 ibev.event = IB_EVENT_PORT_ACTIVE;
1403 case MLX5_DEV_EVENT_PORT_DOWN:
1404 case MLX5_DEV_EVENT_PORT_INITIALIZED:
1405 ibev.event = IB_EVENT_PORT_ERR;
1409 case MLX5_DEV_EVENT_LID_CHANGE:
1410 ibev.event = IB_EVENT_LID_CHANGE;
1414 case MLX5_DEV_EVENT_PKEY_CHANGE:
1415 ibev.event = IB_EVENT_PKEY_CHANGE;
1419 case MLX5_DEV_EVENT_GUID_CHANGE:
1420 ibev.event = IB_EVENT_GID_CHANGE;
1424 case MLX5_DEV_EVENT_CLIENT_REREG:
1425 ibev.event = IB_EVENT_CLIENT_REREGISTER;
1433 ibev.device = &ibdev->ib_dev;
1434 ibev.element.port_num = port;
1436 if ((event != MLX5_DEV_EVENT_SYS_ERROR) &&
1437 (port < 1 || port > ibdev->num_ports)) {
1438 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
1442 if (ibdev->ib_active)
1443 ib_dispatch_event(&ibev);
1446 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1450 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
1451 mlx5_query_ext_port_caps(dev, port);
1454 static void config_atomic_responder(struct mlx5_ib_dev *dev,
1455 struct ib_device_attr *props)
1457 enum ib_atomic_cap cap = props->atomic_cap;
1460 if (cap == IB_ATOMIC_HCA ||
1461 cap == IB_ATOMIC_GLOB)
1463 dev->enable_atomic_resp = 1;
1465 dev->atomic_cap = cap;
1468 enum mlx5_addr_align {
1469 MLX5_ADDR_ALIGN_0 = 0,
1470 MLX5_ADDR_ALIGN_64 = 64,
1471 MLX5_ADDR_ALIGN_128 = 128,
1474 static int get_port_caps(struct mlx5_ib_dev *dev)
1476 struct ib_device_attr *dprops = NULL;
1477 struct ib_port_attr *pprops = NULL;
1481 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1485 dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1489 err = mlx5_ib_query_device(&dev->ib_dev, dprops);
1491 mlx5_ib_warn(dev, "query_device failed %d\n", err);
1494 config_atomic_responder(dev, dprops);
1496 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
1497 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1499 mlx5_ib_warn(dev, "query_port %d failed %d\n",
1503 dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys;
1504 dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len;
1505 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1506 dprops->max_pkeys, pprops->gid_tbl_len);
1516 static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1520 err = mlx5_mr_cache_cleanup(dev);
1522 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1524 ib_dereg_mr(dev->umrc.mr);
1525 ib_dealloc_pd(dev->umrc.pd);
1532 static int create_umr_res(struct mlx5_ib_dev *dev)
1538 pd = ib_alloc_pd(&dev->ib_dev);
1540 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1545 mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
1547 mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
1555 ret = mlx5_mr_cache_init(dev);
1557 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1571 static int create_dev_resources(struct mlx5_ib_resources *devr)
1573 struct ib_srq_init_attr attr;
1574 struct mlx5_ib_dev *dev;
1576 struct ib_cq_init_attr cq_attr = { .cqe = 1 };
1578 dev = container_of(devr, struct mlx5_ib_dev, devr);
1580 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1581 if (IS_ERR(devr->p0)) {
1582 ret = PTR_ERR(devr->p0);
1585 devr->p0->device = &dev->ib_dev;
1586 devr->p0->uobject = NULL;
1587 atomic_set(&devr->p0->usecnt, 0);
1589 devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
1590 if (IS_ERR(devr->c0)) {
1591 ret = PTR_ERR(devr->c0);
1594 devr->c0->device = &dev->ib_dev;
1595 devr->c0->uobject = NULL;
1596 devr->c0->comp_handler = NULL;
1597 devr->c0->event_handler = NULL;
1598 devr->c0->cq_context = NULL;
1599 atomic_set(&devr->c0->usecnt, 0);
1601 devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1602 if (IS_ERR(devr->x0)) {
1603 ret = PTR_ERR(devr->x0);
1606 devr->x0->device = &dev->ib_dev;
1607 devr->x0->inode = NULL;
1608 atomic_set(&devr->x0->usecnt, 0);
1609 mutex_init(&devr->x0->tgt_qp_mutex);
1610 INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1612 devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1613 if (IS_ERR(devr->x1)) {
1614 ret = PTR_ERR(devr->x1);
1617 devr->x1->device = &dev->ib_dev;
1618 devr->x1->inode = NULL;
1619 atomic_set(&devr->x1->usecnt, 0);
1620 mutex_init(&devr->x1->tgt_qp_mutex);
1621 INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1623 memset(&attr, 0, sizeof(attr));
1624 attr.attr.max_sge = 1;
1625 attr.attr.max_wr = 1;
1626 attr.srq_type = IB_SRQT_XRC;
1627 attr.ext.xrc.cq = devr->c0;
1628 attr.ext.xrc.xrcd = devr->x0;
1630 devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1631 if (IS_ERR(devr->s0)) {
1632 ret = PTR_ERR(devr->s0);
1635 devr->s0->device = &dev->ib_dev;
1636 devr->s0->pd = devr->p0;
1637 devr->s0->uobject = NULL;
1638 devr->s0->event_handler = NULL;
1639 devr->s0->srq_context = NULL;
1640 devr->s0->srq_type = IB_SRQT_XRC;
1641 devr->s0->ext.xrc.xrcd = devr->x0;
1642 devr->s0->ext.xrc.cq = devr->c0;
1643 atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1644 atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1645 atomic_inc(&devr->p0->usecnt);
1646 atomic_set(&devr->s0->usecnt, 0);
1648 memset(&attr, 0, sizeof(attr));
1649 attr.attr.max_sge = 1;
1650 attr.attr.max_wr = 1;
1651 attr.srq_type = IB_SRQT_BASIC;
1652 devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1653 if (IS_ERR(devr->s1)) {
1654 ret = PTR_ERR(devr->s1);
1657 devr->s1->device = &dev->ib_dev;
1658 devr->s1->pd = devr->p0;
1659 devr->s1->uobject = NULL;
1660 devr->s1->event_handler = NULL;
1661 devr->s1->srq_context = NULL;
1662 devr->s1->srq_type = IB_SRQT_BASIC;
1663 devr->s1->ext.xrc.cq = devr->c0;
1664 atomic_inc(&devr->p0->usecnt);
1665 atomic_set(&devr->s1->usecnt, 0);
1670 mlx5_ib_destroy_srq(devr->s0);
1672 mlx5_ib_dealloc_xrcd(devr->x1);
1674 mlx5_ib_dealloc_xrcd(devr->x0);
1676 mlx5_ib_destroy_cq(devr->c0);
1678 mlx5_ib_dealloc_pd(devr->p0);
1683 static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1685 mlx5_ib_destroy_srq(devr->s1);
1686 mlx5_ib_destroy_srq(devr->s0);
1687 mlx5_ib_dealloc_xrcd(devr->x0);
1688 mlx5_ib_dealloc_xrcd(devr->x1);
1689 mlx5_ib_destroy_cq(devr->c0);
1690 mlx5_ib_dealloc_pd(devr->p0);
1693 static void enable_dc_tracer(struct mlx5_ib_dev *dev)
1695 struct device *device = dev->ib_dev.dma_device;
1696 struct mlx5_dc_tracer *dct = &dev->dctr;
1702 size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096;
1703 if (size <= PAGE_SIZE)
1708 dct->pg = alloc_pages(GFP_KERNEL, order);
1710 mlx5_ib_err(dev, "failed to allocate %d pages\n", order);
1714 tmp = page_address(dct->pg);
1715 memset(tmp, 0xff, size);
1719 dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE);
1720 if (dma_mapping_error(device, dct->dma)) {
1721 mlx5_ib_err(dev, "dma mapping error\n");
1725 err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma);
1727 mlx5_ib_warn(dev, "failed to enable DC tracer\n");
1734 dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE);
1736 __free_pages(dct->pg, dct->order);
1740 static void disable_dc_tracer(struct mlx5_ib_dev *dev)
1742 struct device *device = dev->ib_dev.dma_device;
1743 struct mlx5_dc_tracer *dct = &dev->dctr;
1749 err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma);
1751 mlx5_ib_warn(dev, "failed to disable DC tracer\n");
1755 dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE);
1756 __free_pages(dct->pg, dct->order);
1761 MLX5_DC_CNAK_SIZE = 128,
1762 MLX5_NUM_BUF_IN_PAGE = PAGE_SIZE / MLX5_DC_CNAK_SIZE,
1763 MLX5_CNAK_TX_CQ_SIGNAL_FACTOR = 128,
1764 MLX5_DC_CNAK_SL = 0,
1765 MLX5_DC_CNAK_VL = 0,
1768 static int init_dc_improvements(struct mlx5_ib_dev *dev)
1770 if (!mlx5_core_is_pf(dev->mdev))
1773 if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace)))
1776 enable_dc_tracer(dev);
1781 static void cleanup_dc_improvements(struct mlx5_ib_dev *dev)
1784 disable_dc_tracer(dev);
1787 static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num)
1789 mlx5_vport_dealloc_q_counter(dev->mdev,
1790 MLX5_INTERFACE_PROTOCOL_IB,
1791 dev->port[port_num].q_cnt_id);
1792 dev->port[port_num].q_cnt_id = 0;
1795 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
1799 for (i = 0; i < dev->num_ports; i++)
1800 mlx5_ib_dealloc_q_port_counter(dev, i);
1803 static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
1808 for (i = 0; i < dev->num_ports; i++) {
1809 ret = mlx5_vport_alloc_q_counter(dev->mdev,
1810 MLX5_INTERFACE_PROTOCOL_IB,
1811 &dev->port[i].q_cnt_id);
1814 "couldn't allocate queue counter for port %d\n",
1816 goto dealloc_counters;
1824 mlx5_ib_dealloc_q_port_counter(dev, i);
1829 struct port_attribute {
1830 struct attribute attr;
1831 ssize_t (*show)(struct mlx5_ib_port *,
1832 struct port_attribute *, char *buf);
1833 ssize_t (*store)(struct mlx5_ib_port *,
1834 struct port_attribute *,
1835 const char *buf, size_t count);
1838 struct port_counter_attribute {
1839 struct port_attribute attr;
1843 static ssize_t port_attr_show(struct kobject *kobj,
1844 struct attribute *attr, char *buf)
1846 struct port_attribute *port_attr =
1847 container_of(attr, struct port_attribute, attr);
1848 struct mlx5_ib_port_sysfs_group *p =
1849 container_of(kobj, struct mlx5_ib_port_sysfs_group,
1851 struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port,
1854 if (!port_attr->show)
1857 return port_attr->show(mibport, port_attr, buf);
1860 static ssize_t show_port_counter(struct mlx5_ib_port *p,
1861 struct port_attribute *port_attr,
1864 int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
1865 struct port_counter_attribute *counter_attr =
1866 container_of(port_attr, struct port_counter_attribute, attr);
1870 out = mlx5_vzalloc(outlen);
1874 ret = mlx5_vport_query_q_counter(p->dev->mdev,
1880 ret = sprintf(buf, "%d\n",
1881 be32_to_cpu(*(__be32 *)(out + counter_attr->offset)));
1888 #define PORT_COUNTER_ATTR(_name) \
1889 struct port_counter_attribute port_counter_attr_##_name = { \
1890 .attr = __ATTR(_name, S_IRUGO, show_port_counter, NULL), \
1891 .offset = MLX5_BYTE_OFF(query_q_counter_out, _name) \
1894 static PORT_COUNTER_ATTR(rx_write_requests);
1895 static PORT_COUNTER_ATTR(rx_read_requests);
1896 static PORT_COUNTER_ATTR(rx_atomic_requests);
1897 static PORT_COUNTER_ATTR(rx_dct_connect);
1898 static PORT_COUNTER_ATTR(out_of_buffer);
1899 static PORT_COUNTER_ATTR(out_of_sequence);
1900 static PORT_COUNTER_ATTR(duplicate_request);
1901 static PORT_COUNTER_ATTR(rnr_nak_retry_err);
1902 static PORT_COUNTER_ATTR(packet_seq_err);
1903 static PORT_COUNTER_ATTR(implied_nak_seq_err);
1904 static PORT_COUNTER_ATTR(local_ack_timeout_err);
1906 static struct attribute *counter_attrs[] = {
1907 &port_counter_attr_rx_write_requests.attr.attr,
1908 &port_counter_attr_rx_read_requests.attr.attr,
1909 &port_counter_attr_rx_atomic_requests.attr.attr,
1910 &port_counter_attr_rx_dct_connect.attr.attr,
1911 &port_counter_attr_out_of_buffer.attr.attr,
1912 &port_counter_attr_out_of_sequence.attr.attr,
1913 &port_counter_attr_duplicate_request.attr.attr,
1914 &port_counter_attr_rnr_nak_retry_err.attr.attr,
1915 &port_counter_attr_packet_seq_err.attr.attr,
1916 &port_counter_attr_implied_nak_seq_err.attr.attr,
1917 &port_counter_attr_local_ack_timeout_err.attr.attr,
1921 static struct attribute_group port_counters_group = {
1923 .attrs = counter_attrs
1926 static const struct sysfs_ops port_sysfs_ops = {
1927 .show = port_attr_show
1930 static struct kobj_type port_type = {
1931 .sysfs_ops = &port_sysfs_ops,
1934 static int add_port_attrs(struct mlx5_ib_dev *dev,
1935 struct kobject *parent,
1936 struct mlx5_ib_port_sysfs_group *port,
1941 ret = kobject_init_and_add(&port->kobj, &port_type,
1947 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1948 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
1949 ret = sysfs_create_group(&port->kobj, &port_counters_group);
1954 port->enabled = true;
1958 kobject_put(&port->kobj);
1962 static void destroy_ports_attrs(struct mlx5_ib_dev *dev,
1963 unsigned int num_ports)
1967 for (i = 0; i < num_ports; i++) {
1968 struct mlx5_ib_port_sysfs_group *port =
1969 &dev->port[i].group;
1974 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1975 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
1976 sysfs_remove_group(&port->kobj,
1977 &port_counters_group);
1978 kobject_put(&port->kobj);
1979 port->enabled = false;
1982 if (dev->ports_parent) {
1983 kobject_put(dev->ports_parent);
1984 dev->ports_parent = NULL;
1988 static int create_port_attrs(struct mlx5_ib_dev *dev)
1992 struct device *device = &dev->ib_dev.dev;
1994 dev->ports_parent = kobject_create_and_add("mlx5_ports",
1996 if (!dev->ports_parent)
1999 for (i = 0; i < dev->num_ports; i++) {
2000 ret = add_port_attrs(dev,
2002 &dev->port[i].group,
2006 goto _destroy_ports_attrs;
2011 _destroy_ports_attrs:
2012 destroy_ports_attrs(dev, i);
2016 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2018 struct mlx5_ib_dev *dev;
2022 printk_once(KERN_INFO "%s", mlx5_version);
2024 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
2030 dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
2035 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2036 dev->port[i].dev = dev;
2037 dev->port[i].port_num = i;
2038 dev->port[i].port_gone = 0;
2039 memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table));
2042 err = get_port_caps(dev);
2046 if (mlx5_use_mad_ifc(dev))
2047 get_ext_port_caps(dev);
2049 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2050 IB_LINK_LAYER_ETHERNET) {
2051 if (MLX5_CAP_GEN(mdev, roce)) {
2052 err = mlx5_nic_vport_enable_roce(mdev);
2060 MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
2062 strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
2063 dev->ib_dev.owner = THIS_MODULE;
2064 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
2065 dev->ib_dev.local_dma_lkey = mdev->special_contexts.resd_lkey;
2066 dev->num_ports = MLX5_CAP_GEN(mdev, num_ports);
2067 dev->ib_dev.phys_port_cnt = dev->num_ports;
2068 dev->ib_dev.num_comp_vectors =
2069 dev->mdev->priv.eq_table.num_comp_vectors;
2070 dev->ib_dev.dma_device = &mdev->pdev->dev;
2072 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
2073 dev->ib_dev.uverbs_cmd_mask =
2074 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
2075 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
2076 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
2077 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
2078 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
2079 (1ull << IB_USER_VERBS_CMD_REG_MR) |
2080 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
2081 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2082 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
2083 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
2084 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
2085 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
2086 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
2087 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
2088 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
2089 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
2090 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
2091 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
2092 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
2093 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
2094 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
2095 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
2096 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
2098 dev->ib_dev.query_device = mlx5_ib_query_device;
2099 dev->ib_dev.query_port = mlx5_ib_query_port;
2100 dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
2101 dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
2102 dev->ib_dev.query_gid = mlx5_ib_query_gid;
2103 dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
2104 dev->ib_dev.modify_device = mlx5_ib_modify_device;
2105 dev->ib_dev.modify_port = mlx5_ib_modify_port;
2106 dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
2107 dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
2108 dev->ib_dev.mmap = mlx5_ib_mmap;
2109 dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
2110 dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
2111 dev->ib_dev.create_ah = mlx5_ib_create_ah;
2112 dev->ib_dev.query_ah = mlx5_ib_query_ah;
2113 dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
2114 dev->ib_dev.create_srq = mlx5_ib_create_srq;
2115 dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
2116 dev->ib_dev.query_srq = mlx5_ib_query_srq;
2117 dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
2118 dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
2119 dev->ib_dev.create_qp = mlx5_ib_create_qp;
2120 dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
2121 dev->ib_dev.query_qp = mlx5_ib_query_qp;
2122 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
2123 dev->ib_dev.post_send = mlx5_ib_post_send;
2124 dev->ib_dev.post_recv = mlx5_ib_post_recv;
2125 dev->ib_dev.create_cq = mlx5_ib_create_cq;
2126 dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
2127 dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
2128 dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
2129 dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
2130 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
2131 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
2132 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
2133 dev->ib_dev.reg_phys_mr = mlx5_ib_reg_phys_mr;
2134 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
2135 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
2136 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
2137 dev->ib_dev.process_mad = mlx5_ib_process_mad;
2138 dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr;
2139 dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
2140 dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
2142 if (MLX5_CAP_GEN(mdev, xrc)) {
2143 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
2144 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
2145 dev->ib_dev.uverbs_cmd_mask |=
2146 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2147 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2150 err = init_node_data(dev);
2152 goto err_disable_roce;
2154 mutex_init(&dev->cap_mask_mutex);
2155 INIT_LIST_HEAD(&dev->qp_list);
2156 spin_lock_init(&dev->reset_flow_resource_lock);
2158 err = create_dev_resources(&dev->devr);
2160 goto err_disable_roce;
2163 err = mlx5_ib_alloc_q_counters(dev);
2167 err = ib_register_device(&dev->ib_dev, NULL);
2171 err = create_umr_res(dev);
2175 if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2176 MLX5_CAP_PORT_TYPE_IB) {
2177 if (init_dc_improvements(dev))
2178 mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n");
2181 err = create_port_attrs(dev);
2185 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2186 err = device_create_file(&dev->ib_dev.dev,
2187 mlx5_class_attributes[i]);
2189 goto err_port_attrs;
2193 struct thread *rl_thread = NULL;
2194 struct proc *rl_proc = NULL;
2196 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2197 (void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread,
2198 RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i);
2202 dev->ib_active = true;
2207 destroy_ports_attrs(dev, dev->num_ports);
2210 if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2211 MLX5_CAP_PORT_TYPE_IB)
2212 cleanup_dc_improvements(dev);
2213 destroy_umrc_res(dev);
2216 ib_unregister_device(&dev->ib_dev);
2219 mlx5_ib_dealloc_q_counters(dev);
2222 destroy_dev_resources(&dev->devr);
2225 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2226 IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2227 mlx5_nic_vport_disable_roce(mdev);
2232 ib_dealloc_device((struct ib_device *)dev);
2237 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
2239 struct mlx5_ib_dev *dev = context;
2242 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2243 dev->port[i].port_gone = 1;
2244 while (dev->port[i].port_gone != 2)
2248 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2249 device_remove_file(&dev->ib_dev.dev,
2250 mlx5_class_attributes[i]);
2253 destroy_ports_attrs(dev, dev->num_ports);
2254 if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2255 MLX5_CAP_PORT_TYPE_IB)
2256 cleanup_dc_improvements(dev);
2257 mlx5_ib_dealloc_q_counters(dev);
2258 ib_unregister_device(&dev->ib_dev);
2259 destroy_umrc_res(dev);
2260 destroy_dev_resources(&dev->devr);
2262 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2263 IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2264 mlx5_nic_vport_disable_roce(mdev);
2267 ib_dealloc_device(&dev->ib_dev);
2270 static struct mlx5_interface mlx5_ib_interface = {
2272 .remove = mlx5_ib_remove,
2273 .event = mlx5_ib_event,
2274 .protocol = MLX5_INTERFACE_PROTOCOL_IB,
2277 static int __init mlx5_ib_init(void)
2281 if (deprecated_prof_sel != 2)
2282 printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
2284 err = mlx5_register_interface(&mlx5_ib_interface);
2288 mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq");
2290 printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__);
2297 mlx5_unregister_interface(&mlx5_ib_interface);
2303 static void __exit mlx5_ib_cleanup(void)
2305 destroy_workqueue(mlx5_ib_wq);
2306 mlx5_unregister_interface(&mlx5_ib_interface);
2309 module_init_order(mlx5_ib_init, SI_ORDER_THIRD);
2310 module_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD);