2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <linux/errno.h>
29 #include <linux/pci.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/slab.h>
32 #include <linux/io-mapping.h>
33 #include <linux/sched.h>
34 #include <linux/netdevice.h>
35 #include <linux/etherdevice.h>
37 #include <linux/list.h>
38 #include <dev/mlx5/driver.h>
39 #include <dev/mlx5/vport.h>
40 #include <asm/pgtable.h>
44 #include <rdma/ib_user_verbs.h>
45 #include <rdma/ib_smi.h>
46 #include <rdma/ib_umem.h>
50 #include <sys/unistd.h>
52 #define DRIVER_NAME "mlx5_ib"
53 #define DRIVER_VERSION "3.2-rc1"
54 #define DRIVER_RELDATE "May 2016"
57 #include <sys/module.h>
59 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
60 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
61 MODULE_LICENSE("Dual BSD/GPL");
62 MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1);
63 MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1);
64 MODULE_VERSION(mlx5ib, 1);
66 static int deprecated_prof_sel = 2;
67 module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
68 MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
71 MLX5_STANDARD_ATOMIC_SIZE = 0x8,
74 struct workqueue_struct *mlx5_ib_wq;
76 static char mlx5_version[] =
77 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
78 DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
80 static void get_atomic_caps(struct mlx5_ib_dev *dev,
81 struct ib_device_attr *props)
86 u8 atomic_req_endianess;
88 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
89 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
90 atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev,
91 atomic_req_8B_endianess_mode) ||
94 tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
95 if (((atomic_operations & tmp) == tmp)
96 && (atomic_size_qp & 8)) {
97 if (atomic_req_endianess) {
98 props->atomic_cap = IB_ATOMIC_HCA;
100 props->atomic_cap = IB_ATOMIC_NONE;
103 props->atomic_cap = IB_ATOMIC_NONE;
106 tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD;
107 if (((atomic_operations & tmp) == tmp)
108 &&(atomic_size_qp & 8)) {
109 if (atomic_req_endianess)
110 props->masked_atomic_cap = IB_ATOMIC_HCA;
112 props->masked_atomic_cap = IB_ATOMIC_NONE;
115 props->masked_atomic_cap = IB_ATOMIC_NONE;
119 static enum rdma_link_layer
120 mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
122 struct mlx5_ib_dev *dev = to_mdev(device);
124 switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
125 case MLX5_CAP_PORT_TYPE_IB:
126 return IB_LINK_LAYER_INFINIBAND;
127 case MLX5_CAP_PORT_TYPE_ETH:
128 return IB_LINK_LAYER_ETHERNET;
130 return IB_LINK_LAYER_UNSPECIFIED;
134 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
136 return !dev->mdev->issi;
140 MLX5_VPORT_ACCESS_METHOD_MAD,
141 MLX5_VPORT_ACCESS_METHOD_HCA,
142 MLX5_VPORT_ACCESS_METHOD_NIC,
145 static int mlx5_get_vport_access_method(struct ib_device *ibdev)
147 if (mlx5_use_mad_ifc(to_mdev(ibdev)))
148 return MLX5_VPORT_ACCESS_METHOD_MAD;
150 if (mlx5_ib_port_link_layer(ibdev, 1) ==
151 IB_LINK_LAYER_ETHERNET)
152 return MLX5_VPORT_ACCESS_METHOD_NIC;
154 return MLX5_VPORT_ACCESS_METHOD_HCA;
157 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
158 __be64 *sys_image_guid)
160 struct mlx5_ib_dev *dev = to_mdev(ibdev);
161 struct mlx5_core_dev *mdev = dev->mdev;
165 switch (mlx5_get_vport_access_method(ibdev)) {
166 case MLX5_VPORT_ACCESS_METHOD_MAD:
167 return mlx5_query_system_image_guid_mad_ifc(ibdev,
170 case MLX5_VPORT_ACCESS_METHOD_HCA:
171 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
173 *sys_image_guid = cpu_to_be64(tmp);
176 case MLX5_VPORT_ACCESS_METHOD_NIC:
177 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
179 *sys_image_guid = cpu_to_be64(tmp);
187 static int mlx5_query_max_pkeys(struct ib_device *ibdev,
190 struct mlx5_ib_dev *dev = to_mdev(ibdev);
191 struct mlx5_core_dev *mdev = dev->mdev;
193 switch (mlx5_get_vport_access_method(ibdev)) {
194 case MLX5_VPORT_ACCESS_METHOD_MAD:
195 return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys);
197 case MLX5_VPORT_ACCESS_METHOD_HCA:
198 case MLX5_VPORT_ACCESS_METHOD_NIC:
199 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
208 static int mlx5_query_vendor_id(struct ib_device *ibdev,
211 struct mlx5_ib_dev *dev = to_mdev(ibdev);
213 switch (mlx5_get_vport_access_method(ibdev)) {
214 case MLX5_VPORT_ACCESS_METHOD_MAD:
215 return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id);
217 case MLX5_VPORT_ACCESS_METHOD_HCA:
218 case MLX5_VPORT_ACCESS_METHOD_NIC:
219 return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
226 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
232 switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
233 case MLX5_VPORT_ACCESS_METHOD_MAD:
234 return mlx5_query_node_guid_mad_ifc(dev, node_guid);
236 case MLX5_VPORT_ACCESS_METHOD_HCA:
237 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
239 *node_guid = cpu_to_be64(tmp);
242 case MLX5_VPORT_ACCESS_METHOD_NIC:
243 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
245 *node_guid = cpu_to_be64(tmp);
253 struct mlx5_reg_node_desc {
257 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
259 struct mlx5_reg_node_desc in;
261 if (mlx5_use_mad_ifc(dev))
262 return mlx5_query_node_desc_mad_ifc(dev, node_desc);
264 memset(&in, 0, sizeof(in));
266 return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
267 sizeof(struct mlx5_reg_node_desc),
268 MLX5_REG_NODE_DESC, 0, 0);
271 static int mlx5_ib_query_device(struct ib_device *ibdev,
272 struct ib_device_attr *props)
274 struct mlx5_ib_dev *dev = to_mdev(ibdev);
275 struct mlx5_core_dev *mdev = dev->mdev;
282 memset(props, 0, sizeof(*props));
284 err = mlx5_query_system_image_guid(ibdev,
285 &props->sys_image_guid);
289 err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
293 err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
297 props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
298 ((u64)fw_rev_min(dev->mdev) << 16) |
299 fw_rev_sub(dev->mdev);
300 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
301 IB_DEVICE_PORT_ACTIVE_EVENT |
302 IB_DEVICE_SYS_IMAGE_GUID |
303 IB_DEVICE_RC_RNR_NAK_GEN;
305 if (MLX5_CAP_GEN(mdev, pkv))
306 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
307 if (MLX5_CAP_GEN(mdev, qkv))
308 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
309 if (MLX5_CAP_GEN(mdev, apm))
310 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
311 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
312 if (MLX5_CAP_GEN(mdev, xrc))
313 props->device_cap_flags |= IB_DEVICE_XRC;
314 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
315 if (MLX5_CAP_GEN(mdev, block_lb_mc))
316 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
318 props->vendor_part_id = mdev->pdev->device;
319 props->hw_ver = mdev->pdev->revision;
321 props->max_mr_size = ~0ull;
322 props->page_size_cap = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1);
323 props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
324 props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
325 max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
326 sizeof(struct mlx5_wqe_data_seg);
327 max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
328 max_sq_sg = (max_sq_desc -
329 sizeof(struct mlx5_wqe_ctrl_seg) -
330 sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg);
331 props->max_sge = min(max_rq_sg, max_sq_sg);
332 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
333 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
334 props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
335 props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
336 props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
337 props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
338 props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
339 props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
340 props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
341 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
342 props->max_srq_sge = max_rq_sg - 1;
343 props->max_fast_reg_page_list_len = (unsigned int)-1;
344 get_atomic_caps(dev, props);
345 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
346 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
347 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
348 props->max_mcast_grp;
349 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
350 props->max_ah = INT_MAX;
356 MLX5_IB_WIDTH_1X = 1 << 0,
357 MLX5_IB_WIDTH_2X = 1 << 1,
358 MLX5_IB_WIDTH_4X = 1 << 2,
359 MLX5_IB_WIDTH_8X = 1 << 3,
360 MLX5_IB_WIDTH_12X = 1 << 4
363 static int translate_active_width(struct ib_device *ibdev, u8 active_width,
366 struct mlx5_ib_dev *dev = to_mdev(ibdev);
369 if (active_width & MLX5_IB_WIDTH_1X) {
370 *ib_width = IB_WIDTH_1X;
371 } else if (active_width & MLX5_IB_WIDTH_2X) {
372 mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n",
375 } else if (active_width & MLX5_IB_WIDTH_4X) {
376 *ib_width = IB_WIDTH_4X;
377 } else if (active_width & MLX5_IB_WIDTH_8X) {
378 *ib_width = IB_WIDTH_8X;
379 } else if (active_width & MLX5_IB_WIDTH_12X) {
380 *ib_width = IB_WIDTH_12X;
382 mlx5_ib_dbg(dev, "Invalid active_width %d\n",
391 * TODO: Move to IB core
398 __IB_MAX_VL_0_14 = 5,
401 enum mlx5_vl_hw_cap {
413 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
418 *max_vl_num = __IB_MAX_VL_0;
421 *max_vl_num = __IB_MAX_VL_0_1;
424 *max_vl_num = __IB_MAX_VL_0_3;
427 *max_vl_num = __IB_MAX_VL_0_7;
429 case MLX5_VL_HW_0_14:
430 *max_vl_num = __IB_MAX_VL_0_14;
440 static int mlx5_query_port_ib(struct ib_device *ibdev, u8 port,
441 struct ib_port_attr *props)
443 struct mlx5_ib_dev *dev = to_mdev(ibdev);
444 struct mlx5_core_dev *mdev = dev->mdev;
446 int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
447 struct mlx5_ptys_reg *ptys;
448 struct mlx5_pmtu_reg *pmtu;
449 struct mlx5_pvlc_reg pvlc;
453 rep = mlx5_vzalloc(outlen);
454 ptys = kzalloc(sizeof(*ptys), GFP_KERNEL);
455 pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL);
456 if (!rep || !ptys || !pmtu) {
461 memset(props, 0, sizeof(*props));
463 /* what if I am pf with dual port */
464 err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen);
468 ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context);
470 props->lid = MLX5_GET(hca_vport_context, ctx, lid);
471 props->lmc = MLX5_GET(hca_vport_context, ctx, lmc);
472 props->sm_lid = MLX5_GET(hca_vport_context, ctx, sm_lid);
473 props->sm_sl = MLX5_GET(hca_vport_context, ctx, sm_sl);
474 props->state = MLX5_GET(hca_vport_context, ctx, vport_state);
475 props->phys_state = MLX5_GET(hca_vport_context, ctx,
476 port_physical_state);
477 props->port_cap_flags = MLX5_GET(hca_vport_context, ctx, cap_mask1);
478 props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
479 props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
480 props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
481 props->bad_pkey_cntr = MLX5_GET(hca_vport_context, ctx,
482 pkey_violation_counter);
483 props->qkey_viol_cntr = MLX5_GET(hca_vport_context, ctx,
484 qkey_violation_counter);
485 props->subnet_timeout = MLX5_GET(hca_vport_context, ctx,
487 props->init_type_reply = MLX5_GET(hca_vport_context, ctx,
490 ptys->proto_mask |= MLX5_PTYS_IB;
491 ptys->local_port = port;
492 err = mlx5_core_access_ptys(mdev, ptys, 0);
496 err = translate_active_width(ibdev, ptys->ib_link_width_oper,
497 &props->active_width);
501 props->active_speed = (u8)ptys->ib_proto_oper;
503 pmtu->local_port = port;
504 err = mlx5_core_access_pmtu(mdev, pmtu, 0);
508 props->max_mtu = pmtu->max_mtu;
509 props->active_mtu = pmtu->oper_mtu;
511 memset(&pvlc, 0, sizeof(pvlc));
512 pvlc.local_port = port;
513 err = mlx5_core_access_pvlc(mdev, &pvlc, 0);
517 err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap,
526 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
527 struct ib_port_attr *props)
529 switch (mlx5_get_vport_access_method(ibdev)) {
530 case MLX5_VPORT_ACCESS_METHOD_MAD:
531 return mlx5_query_port_mad_ifc(ibdev, port, props);
533 case MLX5_VPORT_ACCESS_METHOD_HCA:
534 return mlx5_query_port_ib(ibdev, port, props);
536 case MLX5_VPORT_ACCESS_METHOD_NIC:
537 return mlx5_query_port_roce(ibdev, port, props);
545 mlx5_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
547 if (dev->if_addrlen != ETH_ALEN)
549 memcpy(eui, IF_LLADDR(dev), 3);
550 memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
552 /* NOTE: The scope ID is added by the GID to IP conversion */
561 mlx5_make_default_gid(struct net_device *dev, union ib_gid *gid)
563 gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
564 mlx5_addrconf_ifid_eui48(&gid->raw[8], dev);
568 mlx5_ip2gid(const struct sockaddr *addr, union ib_gid *gid)
570 switch (addr->sa_family) {
572 ipv6_addr_set_v4mapped(((const struct sockaddr_in *)addr)->sin_addr.s_addr,
573 (struct in6_addr *)gid->raw);
576 memcpy(gid->raw, &((const struct sockaddr_in6 *)addr)->sin6_addr, 16);
588 mlx5_ib_roce_port_update(void *arg)
590 struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg;
591 struct mlx5_ib_dev *dev = port->dev;
592 struct mlx5_core_dev *mdev = dev->mdev;
593 struct net_device *xdev[MLX5_IB_GID_MAX];
594 struct net_device *idev;
595 struct net_device *ndev;
597 union ib_gid gid_temp;
599 while (port->port_gone == 0) {
605 ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH);
611 CURVNET_SET_QUIET(ndev->if_vnet);
613 memset(&gid_temp, 0, sizeof(gid_temp));
614 mlx5_make_default_gid(ndev, &gid_temp);
615 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
616 port->gid_table[gid_index] = gid_temp;
619 xdev[gid_index] = ndev;
623 TAILQ_FOREACH(idev, &V_ifnet, if_link) {
628 TAILQ_FOREACH(idev, &V_ifnet, if_link) {
630 if (idev->if_type != IFT_L2VLAN)
632 if (ndev != rdma_vlan_dev_real_dev(idev))
635 /* clone address information for IPv4 and IPv6 */
637 TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
638 if (ifa->ifa_addr == NULL ||
639 (ifa->ifa_addr->sa_family != AF_INET &&
640 ifa->ifa_addr->sa_family != AF_INET6) ||
641 gid_index >= MLX5_IB_GID_MAX)
643 memset(&gid_temp, 0, sizeof(gid_temp));
644 mlx5_ip2gid(ifa->ifa_addr, &gid_temp);
645 /* check for existing entry */
646 for (j = 0; j != gid_index; j++) {
647 if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0)
650 /* check if new entry must be added */
651 if (j == gid_index) {
652 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
653 port->gid_table[gid_index] = gid_temp;
656 xdev[gid_index] = idev;
660 IF_ADDR_RUNLOCK(idev);
667 mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) {
668 struct ib_event event = {
669 .device = &dev->ib_dev,
670 .element.port_num = port->port_num + 1,
671 .event = IB_EVENT_GID_CHANGE,
674 /* add new entries, if any */
675 for (j = 0; j != gid_index; j++) {
676 error = modify_gid_roce(&dev->ib_dev, port->port_num, j,
677 port->gid_table + j, xdev[j]);
679 printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error);
681 memset(&gid_temp, 0, sizeof(gid_temp));
683 /* clear old entries, if any */
684 for (; j != MLX5_IB_GID_MAX; j++) {
685 if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0)
687 port->gid_table[j] = gid_temp;
688 (void) modify_gid_roce(&dev->ib_dev, port->port_num, j,
689 port->gid_table + j, ndev);
692 /* make sure ibcore gets updated */
693 ib_dispatch_event(&event);
698 struct ib_event event = {
699 .device = &dev->ib_dev,
700 .element.port_num = port->port_num + 1,
701 .event = IB_EVENT_GID_CHANGE,
703 /* make sure ibcore gets updated */
704 ib_dispatch_event(&event);
713 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
716 struct mlx5_ib_dev *dev = to_mdev(ibdev);
717 struct mlx5_core_dev *mdev = dev->mdev;
719 switch (mlx5_get_vport_access_method(ibdev)) {
720 case MLX5_VPORT_ACCESS_METHOD_MAD:
721 return mlx5_query_gids_mad_ifc(ibdev, port, index, gid);
723 case MLX5_VPORT_ACCESS_METHOD_HCA:
724 return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid);
726 case MLX5_VPORT_ACCESS_METHOD_NIC:
727 if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) ||
728 index < 0 || index >= MLX5_IB_GID_MAX ||
729 dev->port[port - 1].port_gone != 0)
730 memset(gid, 0, sizeof(*gid));
732 *gid = dev->port[port - 1].gid_table[index];
740 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
743 struct mlx5_ib_dev *dev = to_mdev(ibdev);
744 struct mlx5_core_dev *mdev = dev->mdev;
746 switch (mlx5_get_vport_access_method(ibdev)) {
747 case MLX5_VPORT_ACCESS_METHOD_MAD:
748 return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey);
750 case MLX5_VPORT_ACCESS_METHOD_HCA:
751 case MLX5_VPORT_ACCESS_METHOD_NIC:
752 return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
760 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
761 struct ib_device_modify *props)
763 struct mlx5_ib_dev *dev = to_mdev(ibdev);
764 struct mlx5_reg_node_desc in;
765 struct mlx5_reg_node_desc out;
768 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
771 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
775 * If possible, pass node desc to FW, so it can generate
776 * a 144 trap. If cmd fails, just ignore.
778 memcpy(&in, props->node_desc, 64);
779 err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
780 sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
784 memcpy(ibdev->node_desc, props->node_desc, 64);
789 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
790 struct ib_port_modify *props)
792 u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) ==
793 IB_LINK_LAYER_ETHERNET);
794 struct mlx5_ib_dev *dev = to_mdev(ibdev);
795 struct ib_port_attr attr;
799 /* return OK if this is RoCE. CM calls ib_modify_port() regardless
800 * of whether port link layer is ETH or IB. For ETH ports, qkey
801 * violations and port capabilities are not valid.
806 mutex_lock(&dev->cap_mask_mutex);
808 err = mlx5_ib_query_port(ibdev, port, &attr);
812 tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
813 ~props->clr_port_cap_mask;
815 err = mlx5_set_port_caps(dev->mdev, port, tmp);
818 mutex_unlock(&dev->cap_mask_mutex);
822 enum mlx5_cap_flags {
823 MLX5_CAP_COMPACT_AV = 1 << 0,
826 static void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev)
828 *flags |= MLX5_CAP_GEN(dev, compact_address_vector) ?
829 MLX5_CAP_COMPACT_AV : 0;
832 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
833 struct ib_udata *udata)
835 struct mlx5_ib_dev *dev = to_mdev(ibdev);
836 struct mlx5_ib_alloc_ucontext_req_v2 req;
837 struct mlx5_ib_alloc_ucontext_resp resp;
838 struct mlx5_ib_ucontext *context;
839 struct mlx5_uuar_info *uuari;
840 struct mlx5_uar *uars;
850 return ERR_PTR(-EAGAIN);
852 memset(&req, 0, sizeof(req));
853 memset(&resp, 0, sizeof(resp));
855 reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
856 if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
858 else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
861 mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", (long)reqlen);
862 return ERR_PTR(-EINVAL);
865 err = ib_copy_from_udata(&req, udata, reqlen);
867 mlx5_ib_err(dev, "copy failed\n");
872 mlx5_ib_err(dev, "request corrupted\n");
873 return ERR_PTR(-EINVAL);
876 if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) {
877 mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars);
878 return ERR_PTR(-ENOMEM);
881 req.total_num_uuars = ALIGN(req.total_num_uuars,
882 MLX5_NON_FP_BF_REGS_PER_PAGE);
883 if (req.num_low_latency_uuars > req.total_num_uuars - 1) {
884 mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n",
885 req.total_num_uuars, req.total_num_uuars);
886 return ERR_PTR(-EINVAL);
889 num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
890 gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
891 resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
892 if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
893 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
894 resp.cache_line_size = L1_CACHE_BYTES;
895 resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
896 resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
897 resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
898 resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
899 resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
900 set_mlx5_flags(&resp.flags, dev->mdev);
902 if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen)
903 resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc);
905 if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen)
906 resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
908 context = kzalloc(sizeof(*context), GFP_KERNEL);
910 return ERR_PTR(-ENOMEM);
912 uuari = &context->uuari;
913 mutex_init(&uuari->lock);
914 uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
920 uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
921 sizeof(*uuari->bitmap),
923 if (!uuari->bitmap) {
928 * clear all fast path uuars
930 for (i = 0; i < gross_uuars; i++) {
932 if (uuarn == 2 || uuarn == 3)
933 set_bit(i, uuari->bitmap);
936 uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
942 for (i = 0; i < num_uars; i++) {
943 err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
945 mlx5_ib_err(dev, "uar alloc failed at %d\n", i);
949 for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++)
950 context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX;
952 INIT_LIST_HEAD(&context->db_page_list);
953 mutex_init(&context->db_page_mutex);
955 resp.tot_uuars = req.total_num_uuars;
956 resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
957 err = ib_copy_to_udata(udata, &resp,
958 min_t(size_t, udata->outlen, sizeof(resp)));
963 uuari->num_low_latency_uuars = req.num_low_latency_uuars;
965 uuari->num_uars = num_uars;
967 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
968 IB_LINK_LAYER_ETHERNET) {
969 err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn);
974 return &context->ibucontext;
977 for (i--; i >= 0; i--)
978 mlx5_cmd_free_uar(dev->mdev, uars[i].index);
982 kfree(uuari->bitmap);
992 static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
994 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
995 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
996 struct mlx5_uuar_info *uuari = &context->uuari;
999 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
1000 IB_LINK_LAYER_ETHERNET)
1001 mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
1003 for (i = 0; i < uuari->num_uars; i++) {
1004 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
1005 mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
1007 for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) {
1008 if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX)
1009 mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]);
1012 kfree(uuari->count);
1013 kfree(uuari->bitmap);
1020 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
1022 return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
1025 static int get_command(unsigned long offset)
1027 return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
1030 static int get_arg(unsigned long offset)
1032 return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
1035 static int get_index(unsigned long offset)
1037 return get_arg(offset);
1040 static int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc,
1041 struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev,
1042 struct mlx5_ib_ucontext *context)
1047 if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
1048 mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n",
1049 (long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start));
1053 idx = get_index(vma->vm_pgoff);
1054 if (idx >= uuari->num_uars) {
1055 mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n",
1056 idx, uuari->num_uars);
1060 pfn = uar_index2pfn(dev, uuari->uars[idx].index);
1061 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
1062 (unsigned long long)pfn);
1064 vma->vm_page_prot = prot;
1065 if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1066 PAGE_SIZE, vma->vm_page_prot)) {
1067 mlx5_ib_err(dev, "io remap failed\n");
1071 mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC",
1072 (long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT);
1077 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
1079 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1080 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1081 struct mlx5_uuar_info *uuari = &context->uuari;
1082 unsigned long command;
1084 command = get_command(vma->vm_pgoff);
1086 case MLX5_IB_MMAP_REGULAR_PAGE:
1087 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1089 uuari, dev, context);
1093 case MLX5_IB_MMAP_WC_PAGE:
1094 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1095 true, uuari, dev, context);
1098 case MLX5_IB_MMAP_NC_PAGE:
1099 return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot),
1100 false, uuari, dev, context);
1110 static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
1112 struct mlx5_create_mkey_mbox_in *in;
1113 struct mlx5_mkey_seg *seg;
1114 struct mlx5_core_mr mr;
1117 in = kzalloc(sizeof(*in), GFP_KERNEL);
1122 seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
1123 seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
1124 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1125 seg->start_addr = 0;
1127 err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
1130 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
1145 static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
1147 struct mlx5_core_mr mr;
1150 memset(&mr, 0, sizeof(mr));
1152 err = mlx5_core_destroy_mkey(dev->mdev, &mr);
1154 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
1157 static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
1158 struct ib_ucontext *context,
1159 struct ib_udata *udata)
1161 struct mlx5_ib_dev *dev = to_mdev(ibdev);
1162 struct mlx5_ib_alloc_pd_resp resp;
1163 struct mlx5_ib_pd *pd;
1166 pd = kmalloc(sizeof(*pd), GFP_KERNEL);
1168 return ERR_PTR(-ENOMEM);
1170 err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
1172 mlx5_ib_warn(dev, "pd alloc failed\n");
1174 return ERR_PTR(err);
1179 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
1180 mlx5_ib_err(dev, "copy failed\n");
1181 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1183 return ERR_PTR(-EFAULT);
1186 err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
1188 mlx5_ib_err(dev, "alloc mkey failed\n");
1189 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1191 return ERR_PTR(err);
1198 static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
1200 struct mlx5_ib_dev *mdev = to_mdev(pd->device);
1201 struct mlx5_ib_pd *mpd = to_mpd(pd);
1204 free_pa_mkey(mdev, mpd->pa_lkey);
1206 mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
1212 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1214 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1217 if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1220 err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
1222 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
1223 ibqp->qp_num, gid->raw);
1228 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1230 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1233 if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1236 err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
1238 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
1239 ibqp->qp_num, gid->raw);
1244 static int init_node_data(struct mlx5_ib_dev *dev)
1248 err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
1252 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
1255 static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
1258 struct mlx5_ib_dev *dev =
1259 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1261 return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages);
1264 static ssize_t show_reg_pages(struct device *device,
1265 struct device_attribute *attr, char *buf)
1267 struct mlx5_ib_dev *dev =
1268 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1270 return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
1273 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1276 struct mlx5_ib_dev *dev =
1277 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1278 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
1281 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1284 struct mlx5_ib_dev *dev =
1285 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1286 return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
1287 fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
1290 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1293 struct mlx5_ib_dev *dev =
1294 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1295 return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision);
1298 static ssize_t show_board(struct device *device, struct device_attribute *attr,
1301 struct mlx5_ib_dev *dev =
1302 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1303 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
1304 dev->mdev->board_id);
1307 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
1308 static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
1309 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
1310 static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
1311 static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
1312 static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
1314 static struct device_attribute *mlx5_class_attributes[] = {
1320 &dev_attr_reg_pages,
1323 static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
1325 struct mlx5_ib_qp *mqp;
1326 struct mlx5_ib_cq *send_mcq, *recv_mcq;
1327 struct mlx5_core_cq *mcq;
1328 struct list_head cq_armed_list;
1329 unsigned long flags_qp;
1330 unsigned long flags_cq;
1331 unsigned long flags;
1333 mlx5_ib_warn(ibdev, " started\n");
1334 INIT_LIST_HEAD(&cq_armed_list);
1336 /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
1337 spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
1338 list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
1339 spin_lock_irqsave(&mqp->sq.lock, flags_qp);
1340 if (mqp->sq.tail != mqp->sq.head) {
1341 send_mcq = to_mcq(mqp->ibqp.send_cq);
1342 spin_lock_irqsave(&send_mcq->lock, flags_cq);
1343 if (send_mcq->mcq.comp &&
1344 mqp->ibqp.send_cq->comp_handler) {
1345 if (!send_mcq->mcq.reset_notify_added) {
1346 send_mcq->mcq.reset_notify_added = 1;
1347 list_add_tail(&send_mcq->mcq.reset_notify,
1351 spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
1353 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
1354 spin_lock_irqsave(&mqp->rq.lock, flags_qp);
1355 /* no handling is needed for SRQ */
1356 if (!mqp->ibqp.srq) {
1357 if (mqp->rq.tail != mqp->rq.head) {
1358 recv_mcq = to_mcq(mqp->ibqp.recv_cq);
1359 spin_lock_irqsave(&recv_mcq->lock, flags_cq);
1360 if (recv_mcq->mcq.comp &&
1361 mqp->ibqp.recv_cq->comp_handler) {
1362 if (!recv_mcq->mcq.reset_notify_added) {
1363 recv_mcq->mcq.reset_notify_added = 1;
1364 list_add_tail(&recv_mcq->mcq.reset_notify,
1368 spin_unlock_irqrestore(&recv_mcq->lock,
1372 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
1374 /*At that point all inflight post send were put to be executed as of we
1375 * lock/unlock above locks Now need to arm all involved CQs.
1377 list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
1380 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
1381 mlx5_ib_warn(ibdev, " ended\n");
1385 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1386 enum mlx5_dev_event event, unsigned long param)
1388 struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
1389 struct ib_event ibev;
1394 case MLX5_DEV_EVENT_SYS_ERROR:
1395 ibdev->ib_active = false;
1396 ibev.event = IB_EVENT_DEVICE_FATAL;
1397 mlx5_ib_handle_internal_error(ibdev);
1400 case MLX5_DEV_EVENT_PORT_UP:
1401 ibev.event = IB_EVENT_PORT_ACTIVE;
1405 case MLX5_DEV_EVENT_PORT_DOWN:
1406 case MLX5_DEV_EVENT_PORT_INITIALIZED:
1407 ibev.event = IB_EVENT_PORT_ERR;
1411 case MLX5_DEV_EVENT_LID_CHANGE:
1412 ibev.event = IB_EVENT_LID_CHANGE;
1416 case MLX5_DEV_EVENT_PKEY_CHANGE:
1417 ibev.event = IB_EVENT_PKEY_CHANGE;
1421 case MLX5_DEV_EVENT_GUID_CHANGE:
1422 ibev.event = IB_EVENT_GID_CHANGE;
1426 case MLX5_DEV_EVENT_CLIENT_REREG:
1427 ibev.event = IB_EVENT_CLIENT_REREGISTER;
1435 ibev.device = &ibdev->ib_dev;
1436 ibev.element.port_num = port;
1438 if ((event != MLX5_DEV_EVENT_SYS_ERROR) &&
1439 (port < 1 || port > ibdev->num_ports)) {
1440 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
1444 if (ibdev->ib_active)
1445 ib_dispatch_event(&ibev);
1448 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1452 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
1453 mlx5_query_ext_port_caps(dev, port);
1456 static void config_atomic_responder(struct mlx5_ib_dev *dev,
1457 struct ib_device_attr *props)
1459 enum ib_atomic_cap cap = props->atomic_cap;
1462 if (cap == IB_ATOMIC_HCA ||
1463 cap == IB_ATOMIC_GLOB)
1465 dev->enable_atomic_resp = 1;
1467 dev->atomic_cap = cap;
1470 enum mlx5_addr_align {
1471 MLX5_ADDR_ALIGN_0 = 0,
1472 MLX5_ADDR_ALIGN_64 = 64,
1473 MLX5_ADDR_ALIGN_128 = 128,
1476 static int get_port_caps(struct mlx5_ib_dev *dev)
1478 struct ib_device_attr *dprops = NULL;
1479 struct ib_port_attr *pprops = NULL;
1483 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1487 dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1491 err = mlx5_ib_query_device(&dev->ib_dev, dprops);
1493 mlx5_ib_warn(dev, "query_device failed %d\n", err);
1496 config_atomic_responder(dev, dprops);
1498 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
1499 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1501 mlx5_ib_warn(dev, "query_port %d failed %d\n",
1505 dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys;
1506 dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len;
1507 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1508 dprops->max_pkeys, pprops->gid_tbl_len);
1518 static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1522 err = mlx5_mr_cache_cleanup(dev);
1524 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1526 ib_dereg_mr(dev->umrc.mr);
1527 ib_dealloc_pd(dev->umrc.pd);
1534 static int create_umr_res(struct mlx5_ib_dev *dev)
1540 pd = ib_alloc_pd(&dev->ib_dev);
1542 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1547 mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
1549 mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
1557 ret = mlx5_mr_cache_init(dev);
1559 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1573 static int create_dev_resources(struct mlx5_ib_resources *devr)
1575 struct ib_srq_init_attr attr;
1576 struct mlx5_ib_dev *dev;
1579 dev = container_of(devr, struct mlx5_ib_dev, devr);
1581 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1582 if (IS_ERR(devr->p0)) {
1583 ret = PTR_ERR(devr->p0);
1586 devr->p0->device = &dev->ib_dev;
1587 devr->p0->uobject = NULL;
1588 atomic_set(&devr->p0->usecnt, 0);
1590 devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL);
1591 if (IS_ERR(devr->c0)) {
1592 ret = PTR_ERR(devr->c0);
1595 devr->c0->device = &dev->ib_dev;
1596 devr->c0->uobject = NULL;
1597 devr->c0->comp_handler = NULL;
1598 devr->c0->event_handler = NULL;
1599 devr->c0->cq_context = NULL;
1600 atomic_set(&devr->c0->usecnt, 0);
1602 devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1603 if (IS_ERR(devr->x0)) {
1604 ret = PTR_ERR(devr->x0);
1607 devr->x0->device = &dev->ib_dev;
1608 devr->x0->inode = NULL;
1609 atomic_set(&devr->x0->usecnt, 0);
1610 mutex_init(&devr->x0->tgt_qp_mutex);
1611 INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1613 devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1614 if (IS_ERR(devr->x1)) {
1615 ret = PTR_ERR(devr->x1);
1618 devr->x1->device = &dev->ib_dev;
1619 devr->x1->inode = NULL;
1620 atomic_set(&devr->x1->usecnt, 0);
1621 mutex_init(&devr->x1->tgt_qp_mutex);
1622 INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1624 memset(&attr, 0, sizeof(attr));
1625 attr.attr.max_sge = 1;
1626 attr.attr.max_wr = 1;
1627 attr.srq_type = IB_SRQT_XRC;
1628 attr.ext.xrc.cq = devr->c0;
1629 attr.ext.xrc.xrcd = devr->x0;
1631 devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1632 if (IS_ERR(devr->s0)) {
1633 ret = PTR_ERR(devr->s0);
1636 devr->s0->device = &dev->ib_dev;
1637 devr->s0->pd = devr->p0;
1638 devr->s0->uobject = NULL;
1639 devr->s0->event_handler = NULL;
1640 devr->s0->srq_context = NULL;
1641 devr->s0->srq_type = IB_SRQT_XRC;
1642 devr->s0->ext.xrc.xrcd = devr->x0;
1643 devr->s0->ext.xrc.cq = devr->c0;
1644 atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1645 atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1646 atomic_inc(&devr->p0->usecnt);
1647 atomic_set(&devr->s0->usecnt, 0);
1649 memset(&attr, 0, sizeof(attr));
1650 attr.attr.max_sge = 1;
1651 attr.attr.max_wr = 1;
1652 attr.srq_type = IB_SRQT_BASIC;
1653 devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1654 if (IS_ERR(devr->s1)) {
1655 ret = PTR_ERR(devr->s1);
1658 devr->s1->device = &dev->ib_dev;
1659 devr->s1->pd = devr->p0;
1660 devr->s1->uobject = NULL;
1661 devr->s1->event_handler = NULL;
1662 devr->s1->srq_context = NULL;
1663 devr->s1->srq_type = IB_SRQT_BASIC;
1664 devr->s1->ext.xrc.cq = devr->c0;
1665 atomic_inc(&devr->p0->usecnt);
1666 atomic_set(&devr->s1->usecnt, 0);
1671 mlx5_ib_destroy_srq(devr->s0);
1673 mlx5_ib_dealloc_xrcd(devr->x1);
1675 mlx5_ib_dealloc_xrcd(devr->x0);
1677 mlx5_ib_destroy_cq(devr->c0);
1679 mlx5_ib_dealloc_pd(devr->p0);
1684 static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1686 mlx5_ib_destroy_srq(devr->s1);
1687 mlx5_ib_destroy_srq(devr->s0);
1688 mlx5_ib_dealloc_xrcd(devr->x0);
1689 mlx5_ib_dealloc_xrcd(devr->x1);
1690 mlx5_ib_destroy_cq(devr->c0);
1691 mlx5_ib_dealloc_pd(devr->p0);
1694 static void enable_dc_tracer(struct mlx5_ib_dev *dev)
1696 struct device *device = dev->ib_dev.dma_device;
1697 struct mlx5_dc_tracer *dct = &dev->dctr;
1703 size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096;
1704 if (size <= PAGE_SIZE)
1709 dct->pg = alloc_pages(GFP_KERNEL, order);
1711 mlx5_ib_err(dev, "failed to allocate %d pages\n", order);
1715 tmp = page_address(dct->pg);
1716 memset(tmp, 0xff, size);
1720 dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE);
1721 if (dma_mapping_error(device, dct->dma)) {
1722 mlx5_ib_err(dev, "dma mapping error\n");
1726 err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma);
1728 mlx5_ib_warn(dev, "failed to enable DC tracer\n");
1735 dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE);
1737 __free_pages(dct->pg, dct->order);
1741 static void disable_dc_tracer(struct mlx5_ib_dev *dev)
1743 struct device *device = dev->ib_dev.dma_device;
1744 struct mlx5_dc_tracer *dct = &dev->dctr;
1750 err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma);
1752 mlx5_ib_warn(dev, "failed to disable DC tracer\n");
1756 dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE);
1757 __free_pages(dct->pg, dct->order);
1762 MLX5_DC_CNAK_SIZE = 128,
1763 MLX5_NUM_BUF_IN_PAGE = PAGE_SIZE / MLX5_DC_CNAK_SIZE,
1764 MLX5_CNAK_TX_CQ_SIGNAL_FACTOR = 128,
1765 MLX5_DC_CNAK_SL = 0,
1766 MLX5_DC_CNAK_VL = 0,
1769 static int init_dc_improvements(struct mlx5_ib_dev *dev)
1771 if (!mlx5_core_is_pf(dev->mdev))
1774 if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace)))
1777 enable_dc_tracer(dev);
1782 static void cleanup_dc_improvements(struct mlx5_ib_dev *dev)
1785 disable_dc_tracer(dev);
1788 static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num)
1790 mlx5_vport_dealloc_q_counter(dev->mdev,
1791 MLX5_INTERFACE_PROTOCOL_IB,
1792 dev->port[port_num].q_cnt_id);
1793 dev->port[port_num].q_cnt_id = 0;
1796 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
1800 for (i = 0; i < dev->num_ports; i++)
1801 mlx5_ib_dealloc_q_port_counter(dev, i);
1804 static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
1809 for (i = 0; i < dev->num_ports; i++) {
1810 ret = mlx5_vport_alloc_q_counter(dev->mdev,
1811 MLX5_INTERFACE_PROTOCOL_IB,
1812 &dev->port[i].q_cnt_id);
1815 "couldn't allocate queue counter for port %d\n",
1817 goto dealloc_counters;
1825 mlx5_ib_dealloc_q_port_counter(dev, i);
1830 struct port_attribute {
1831 struct attribute attr;
1832 ssize_t (*show)(struct mlx5_ib_port *,
1833 struct port_attribute *, char *buf);
1834 ssize_t (*store)(struct mlx5_ib_port *,
1835 struct port_attribute *,
1836 const char *buf, size_t count);
1839 struct port_counter_attribute {
1840 struct port_attribute attr;
1844 static ssize_t port_attr_show(struct kobject *kobj,
1845 struct attribute *attr, char *buf)
1847 struct port_attribute *port_attr =
1848 container_of(attr, struct port_attribute, attr);
1849 struct mlx5_ib_port_sysfs_group *p =
1850 container_of(kobj, struct mlx5_ib_port_sysfs_group,
1852 struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port,
1855 if (!port_attr->show)
1858 return port_attr->show(mibport, port_attr, buf);
1861 static ssize_t show_port_counter(struct mlx5_ib_port *p,
1862 struct port_attribute *port_attr,
1865 int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
1866 struct port_counter_attribute *counter_attr =
1867 container_of(port_attr, struct port_counter_attribute, attr);
1871 out = mlx5_vzalloc(outlen);
1875 ret = mlx5_vport_query_q_counter(p->dev->mdev,
1881 ret = sprintf(buf, "%d\n",
1882 be32_to_cpu(*(__be32 *)(out + counter_attr->offset)));
1889 #define PORT_COUNTER_ATTR(_name) \
1890 struct port_counter_attribute port_counter_attr_##_name = { \
1891 .attr = __ATTR(_name, S_IRUGO, show_port_counter, NULL), \
1892 .offset = MLX5_BYTE_OFF(query_q_counter_out, _name) \
1895 static PORT_COUNTER_ATTR(rx_write_requests);
1896 static PORT_COUNTER_ATTR(rx_read_requests);
1897 static PORT_COUNTER_ATTR(rx_atomic_requests);
1898 static PORT_COUNTER_ATTR(rx_dct_connect);
1899 static PORT_COUNTER_ATTR(out_of_buffer);
1900 static PORT_COUNTER_ATTR(out_of_sequence);
1901 static PORT_COUNTER_ATTR(duplicate_request);
1902 static PORT_COUNTER_ATTR(rnr_nak_retry_err);
1903 static PORT_COUNTER_ATTR(packet_seq_err);
1904 static PORT_COUNTER_ATTR(implied_nak_seq_err);
1905 static PORT_COUNTER_ATTR(local_ack_timeout_err);
1907 static struct attribute *counter_attrs[] = {
1908 &port_counter_attr_rx_write_requests.attr.attr,
1909 &port_counter_attr_rx_read_requests.attr.attr,
1910 &port_counter_attr_rx_atomic_requests.attr.attr,
1911 &port_counter_attr_rx_dct_connect.attr.attr,
1912 &port_counter_attr_out_of_buffer.attr.attr,
1913 &port_counter_attr_out_of_sequence.attr.attr,
1914 &port_counter_attr_duplicate_request.attr.attr,
1915 &port_counter_attr_rnr_nak_retry_err.attr.attr,
1916 &port_counter_attr_packet_seq_err.attr.attr,
1917 &port_counter_attr_implied_nak_seq_err.attr.attr,
1918 &port_counter_attr_local_ack_timeout_err.attr.attr,
1922 static struct attribute_group port_counters_group = {
1924 .attrs = counter_attrs
1927 static const struct sysfs_ops port_sysfs_ops = {
1928 .show = port_attr_show
1931 static struct kobj_type port_type = {
1932 .sysfs_ops = &port_sysfs_ops,
1935 static int add_port_attrs(struct mlx5_ib_dev *dev,
1936 struct kobject *parent,
1937 struct mlx5_ib_port_sysfs_group *port,
1942 ret = kobject_init_and_add(&port->kobj, &port_type,
1948 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1949 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
1950 ret = sysfs_create_group(&port->kobj, &port_counters_group);
1955 port->enabled = true;
1959 kobject_put(&port->kobj);
1963 static void destroy_ports_attrs(struct mlx5_ib_dev *dev,
1964 unsigned int num_ports)
1968 for (i = 0; i < num_ports; i++) {
1969 struct mlx5_ib_port_sysfs_group *port =
1970 &dev->port[i].group;
1975 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1976 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
1977 sysfs_remove_group(&port->kobj,
1978 &port_counters_group);
1979 kobject_put(&port->kobj);
1980 port->enabled = false;
1983 if (dev->ports_parent) {
1984 kobject_put(dev->ports_parent);
1985 dev->ports_parent = NULL;
1989 static int create_port_attrs(struct mlx5_ib_dev *dev)
1993 struct device *device = &dev->ib_dev.dev;
1995 dev->ports_parent = kobject_create_and_add("mlx5_ports",
1997 if (!dev->ports_parent)
2000 for (i = 0; i < dev->num_ports; i++) {
2001 ret = add_port_attrs(dev,
2003 &dev->port[i].group,
2007 goto _destroy_ports_attrs;
2012 _destroy_ports_attrs:
2013 destroy_ports_attrs(dev, i);
2017 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2019 struct mlx5_ib_dev *dev;
2023 printk_once(KERN_INFO "%s", mlx5_version);
2025 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
2031 dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
2036 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2037 dev->port[i].dev = dev;
2038 dev->port[i].port_num = i;
2039 dev->port[i].port_gone = 0;
2040 memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table));
2043 err = get_port_caps(dev);
2047 if (mlx5_use_mad_ifc(dev))
2048 get_ext_port_caps(dev);
2050 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2051 IB_LINK_LAYER_ETHERNET) {
2052 if (MLX5_CAP_GEN(mdev, roce)) {
2053 err = mlx5_nic_vport_enable_roce(mdev);
2061 MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
2063 strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
2064 dev->ib_dev.owner = THIS_MODULE;
2065 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
2066 dev->ib_dev.local_dma_lkey = mdev->special_contexts.resd_lkey;
2067 dev->num_ports = MLX5_CAP_GEN(mdev, num_ports);
2068 dev->ib_dev.phys_port_cnt = dev->num_ports;
2069 dev->ib_dev.num_comp_vectors =
2070 dev->mdev->priv.eq_table.num_comp_vectors;
2071 dev->ib_dev.dma_device = &mdev->pdev->dev;
2073 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
2074 dev->ib_dev.uverbs_cmd_mask =
2075 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
2076 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
2077 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
2078 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
2079 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
2080 (1ull << IB_USER_VERBS_CMD_REG_MR) |
2081 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
2082 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2083 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
2084 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
2085 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
2086 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
2087 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
2088 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
2089 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
2090 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
2091 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
2092 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
2093 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
2094 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
2095 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
2096 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
2097 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
2099 dev->ib_dev.query_device = mlx5_ib_query_device;
2100 dev->ib_dev.query_port = mlx5_ib_query_port;
2101 dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
2102 dev->ib_dev.query_gid = mlx5_ib_query_gid;
2103 dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
2104 dev->ib_dev.modify_device = mlx5_ib_modify_device;
2105 dev->ib_dev.modify_port = mlx5_ib_modify_port;
2106 dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
2107 dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
2108 dev->ib_dev.mmap = mlx5_ib_mmap;
2109 dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
2110 dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
2111 dev->ib_dev.create_ah = mlx5_ib_create_ah;
2112 dev->ib_dev.query_ah = mlx5_ib_query_ah;
2113 dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
2114 dev->ib_dev.create_srq = mlx5_ib_create_srq;
2115 dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
2116 dev->ib_dev.query_srq = mlx5_ib_query_srq;
2117 dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
2118 dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
2119 dev->ib_dev.create_qp = mlx5_ib_create_qp;
2120 dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
2121 dev->ib_dev.query_qp = mlx5_ib_query_qp;
2122 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
2123 dev->ib_dev.post_send = mlx5_ib_post_send;
2124 dev->ib_dev.post_recv = mlx5_ib_post_recv;
2125 dev->ib_dev.create_cq = mlx5_ib_create_cq;
2126 dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
2127 dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
2128 dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
2129 dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
2130 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
2131 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
2132 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
2133 dev->ib_dev.reg_phys_mr = mlx5_ib_reg_phys_mr;
2134 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
2135 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
2136 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
2137 dev->ib_dev.process_mad = mlx5_ib_process_mad;
2138 dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr;
2139 dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
2140 dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
2142 if (MLX5_CAP_GEN(mdev, xrc)) {
2143 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
2144 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
2145 dev->ib_dev.uverbs_cmd_mask |=
2146 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2147 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2150 err = init_node_data(dev);
2152 goto err_disable_roce;
2154 mutex_init(&dev->cap_mask_mutex);
2155 INIT_LIST_HEAD(&dev->qp_list);
2156 spin_lock_init(&dev->reset_flow_resource_lock);
2158 err = create_dev_resources(&dev->devr);
2160 goto err_disable_roce;
2163 err = mlx5_ib_alloc_q_counters(dev);
2167 err = ib_register_device(&dev->ib_dev, NULL);
2171 err = create_umr_res(dev);
2175 if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2176 MLX5_CAP_PORT_TYPE_IB) {
2177 if (init_dc_improvements(dev))
2178 mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n");
2181 err = create_port_attrs(dev);
2185 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2186 err = device_create_file(&dev->ib_dev.dev,
2187 mlx5_class_attributes[i]);
2189 goto err_port_attrs;
2193 struct thread *rl_thread = NULL;
2194 struct proc *rl_proc = NULL;
2196 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2197 (void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread,
2198 RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i);
2202 dev->ib_active = true;
2207 destroy_ports_attrs(dev, dev->num_ports);
2210 if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2211 MLX5_CAP_PORT_TYPE_IB)
2212 cleanup_dc_improvements(dev);
2213 destroy_umrc_res(dev);
2216 ib_unregister_device(&dev->ib_dev);
2219 mlx5_ib_dealloc_q_counters(dev);
2222 destroy_dev_resources(&dev->devr);
2225 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2226 IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2227 mlx5_nic_vport_disable_roce(mdev);
2232 ib_dealloc_device((struct ib_device *)dev);
2237 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
2239 struct mlx5_ib_dev *dev = context;
2242 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2243 dev->port[i].port_gone = 1;
2244 while (dev->port[i].port_gone != 2)
2248 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2249 device_remove_file(&dev->ib_dev.dev,
2250 mlx5_class_attributes[i]);
2253 destroy_ports_attrs(dev, dev->num_ports);
2254 if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2255 MLX5_CAP_PORT_TYPE_IB)
2256 cleanup_dc_improvements(dev);
2257 mlx5_ib_dealloc_q_counters(dev);
2258 ib_unregister_device(&dev->ib_dev);
2259 destroy_umrc_res(dev);
2260 destroy_dev_resources(&dev->devr);
2262 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2263 IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2264 mlx5_nic_vport_disable_roce(mdev);
2267 ib_dealloc_device(&dev->ib_dev);
2270 static struct mlx5_interface mlx5_ib_interface = {
2272 .remove = mlx5_ib_remove,
2273 .event = mlx5_ib_event,
2274 .protocol = MLX5_INTERFACE_PROTOCOL_IB,
2277 static int __init mlx5_ib_init(void)
2281 if (deprecated_prof_sel != 2)
2282 printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
2284 err = mlx5_register_interface(&mlx5_ib_interface);
2288 mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq");
2290 printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__);
2297 mlx5_unregister_interface(&mlx5_ib_interface);
2303 static void __exit mlx5_ib_cleanup(void)
2305 destroy_workqueue(mlx5_ib_wq);
2306 mlx5_unregister_interface(&mlx5_ib_interface);
2309 module_init_order(mlx5_ib_init, SI_ORDER_THIRD);
2310 module_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD);