2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <linux/errno.h>
29 #include <linux/pci.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/slab.h>
32 #include <linux/io-mapping.h>
33 #include <linux/sched.h>
34 #include <linux/netdevice.h>
35 #include <linux/etherdevice.h>
36 #include <linux/list.h>
37 #include <dev/mlx5/driver.h>
38 #include <dev/mlx5/vport.h>
39 #include <asm/pgtable.h>
43 #include <rdma/ib_user_verbs.h>
44 #include <rdma/ib_smi.h>
45 #include <rdma/ib_umem.h>
49 #include <sys/unistd.h>
50 #include <sys/kthread.h>
52 #define DRIVER_NAME "mlx5_ib"
53 #define DRIVER_VERSION "3.2-rc1"
54 #define DRIVER_RELDATE "May 2016"
56 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
57 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
58 MODULE_LICENSE("Dual BSD/GPL");
59 MODULE_DEPEND(mlx5ib, linuxkpi, 1, 1, 1);
60 MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1);
61 MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1);
62 MODULE_VERSION(mlx5ib, 1);
64 static int deprecated_prof_sel = 2;
65 module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
66 MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
69 MLX5_STANDARD_ATOMIC_SIZE = 0x8,
72 struct workqueue_struct *mlx5_ib_wq;
74 static char mlx5_version[] =
75 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
76 DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
78 static void get_atomic_caps(struct mlx5_ib_dev *dev,
79 struct ib_device_attr *props)
84 u8 atomic_req_endianess;
86 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
87 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
88 atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev,
89 atomic_req_8B_endianess_mode) ||
92 tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
93 if (((atomic_operations & tmp) == tmp)
94 && (atomic_size_qp & 8)) {
95 if (atomic_req_endianess) {
96 props->atomic_cap = IB_ATOMIC_HCA;
98 props->atomic_cap = IB_ATOMIC_NONE;
101 props->atomic_cap = IB_ATOMIC_NONE;
104 tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD;
105 if (((atomic_operations & tmp) == tmp)
106 &&(atomic_size_qp & 8)) {
107 if (atomic_req_endianess)
108 props->masked_atomic_cap = IB_ATOMIC_HCA;
110 props->masked_atomic_cap = IB_ATOMIC_NONE;
113 props->masked_atomic_cap = IB_ATOMIC_NONE;
117 static enum rdma_link_layer
118 mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
120 struct mlx5_ib_dev *dev = to_mdev(device);
122 switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
123 case MLX5_CAP_PORT_TYPE_IB:
124 return IB_LINK_LAYER_INFINIBAND;
125 case MLX5_CAP_PORT_TYPE_ETH:
126 return IB_LINK_LAYER_ETHERNET;
128 return IB_LINK_LAYER_UNSPECIFIED;
132 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
134 return !dev->mdev->issi;
138 MLX5_VPORT_ACCESS_METHOD_MAD,
139 MLX5_VPORT_ACCESS_METHOD_HCA,
140 MLX5_VPORT_ACCESS_METHOD_NIC,
143 static int mlx5_get_vport_access_method(struct ib_device *ibdev)
145 if (mlx5_use_mad_ifc(to_mdev(ibdev)))
146 return MLX5_VPORT_ACCESS_METHOD_MAD;
148 if (mlx5_ib_port_link_layer(ibdev, 1) ==
149 IB_LINK_LAYER_ETHERNET)
150 return MLX5_VPORT_ACCESS_METHOD_NIC;
152 return MLX5_VPORT_ACCESS_METHOD_HCA;
155 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
156 __be64 *sys_image_guid)
158 struct mlx5_ib_dev *dev = to_mdev(ibdev);
159 struct mlx5_core_dev *mdev = dev->mdev;
163 switch (mlx5_get_vport_access_method(ibdev)) {
164 case MLX5_VPORT_ACCESS_METHOD_MAD:
165 return mlx5_query_system_image_guid_mad_ifc(ibdev,
168 case MLX5_VPORT_ACCESS_METHOD_HCA:
169 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
171 *sys_image_guid = cpu_to_be64(tmp);
174 case MLX5_VPORT_ACCESS_METHOD_NIC:
175 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
177 *sys_image_guid = cpu_to_be64(tmp);
185 static int mlx5_query_max_pkeys(struct ib_device *ibdev,
188 struct mlx5_ib_dev *dev = to_mdev(ibdev);
189 struct mlx5_core_dev *mdev = dev->mdev;
191 switch (mlx5_get_vport_access_method(ibdev)) {
192 case MLX5_VPORT_ACCESS_METHOD_MAD:
193 return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys);
195 case MLX5_VPORT_ACCESS_METHOD_HCA:
196 case MLX5_VPORT_ACCESS_METHOD_NIC:
197 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
206 static int mlx5_query_vendor_id(struct ib_device *ibdev,
209 struct mlx5_ib_dev *dev = to_mdev(ibdev);
211 switch (mlx5_get_vport_access_method(ibdev)) {
212 case MLX5_VPORT_ACCESS_METHOD_MAD:
213 return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id);
215 case MLX5_VPORT_ACCESS_METHOD_HCA:
216 case MLX5_VPORT_ACCESS_METHOD_NIC:
217 return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
224 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
230 switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
231 case MLX5_VPORT_ACCESS_METHOD_MAD:
232 return mlx5_query_node_guid_mad_ifc(dev, node_guid);
234 case MLX5_VPORT_ACCESS_METHOD_HCA:
235 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
237 *node_guid = cpu_to_be64(tmp);
240 case MLX5_VPORT_ACCESS_METHOD_NIC:
241 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
243 *node_guid = cpu_to_be64(tmp);
251 struct mlx5_reg_node_desc {
255 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
257 struct mlx5_reg_node_desc in;
259 if (mlx5_use_mad_ifc(dev))
260 return mlx5_query_node_desc_mad_ifc(dev, node_desc);
262 memset(&in, 0, sizeof(in));
264 return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
265 sizeof(struct mlx5_reg_node_desc),
266 MLX5_REG_NODE_DESC, 0, 0);
269 static int mlx5_ib_query_device(struct ib_device *ibdev,
270 struct ib_device_attr *props)
272 struct mlx5_ib_dev *dev = to_mdev(ibdev);
273 struct mlx5_core_dev *mdev = dev->mdev;
280 memset(props, 0, sizeof(*props));
282 err = mlx5_query_system_image_guid(ibdev,
283 &props->sys_image_guid);
287 err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
291 err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
295 props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
296 ((u64)fw_rev_min(dev->mdev) << 16) |
297 fw_rev_sub(dev->mdev);
298 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
299 IB_DEVICE_PORT_ACTIVE_EVENT |
300 IB_DEVICE_SYS_IMAGE_GUID |
301 IB_DEVICE_RC_RNR_NAK_GEN;
303 if (MLX5_CAP_GEN(mdev, pkv))
304 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
305 if (MLX5_CAP_GEN(mdev, qkv))
306 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
307 if (MLX5_CAP_GEN(mdev, apm))
308 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
309 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
310 if (MLX5_CAP_GEN(mdev, xrc))
311 props->device_cap_flags |= IB_DEVICE_XRC;
312 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
313 if (MLX5_CAP_GEN(mdev, block_lb_mc))
314 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
316 props->vendor_part_id = mdev->pdev->device;
317 props->hw_ver = mdev->pdev->revision;
319 props->max_mr_size = ~0ull;
320 props->page_size_cap = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1);
321 props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
322 props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
323 max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
324 sizeof(struct mlx5_wqe_data_seg);
325 max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
326 max_sq_sg = (max_sq_desc -
327 sizeof(struct mlx5_wqe_ctrl_seg) -
328 sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg);
329 props->max_sge = min(max_rq_sg, max_sq_sg);
330 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
331 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
332 props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
333 props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
334 props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
335 props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
336 props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
337 props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
338 props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
339 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
340 props->max_srq_sge = max_rq_sg - 1;
341 props->max_fast_reg_page_list_len = (unsigned int)-1;
342 get_atomic_caps(dev, props);
343 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
344 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
345 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
346 props->max_mcast_grp;
347 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
348 props->max_ah = INT_MAX;
354 MLX5_IB_WIDTH_1X = 1 << 0,
355 MLX5_IB_WIDTH_2X = 1 << 1,
356 MLX5_IB_WIDTH_4X = 1 << 2,
357 MLX5_IB_WIDTH_8X = 1 << 3,
358 MLX5_IB_WIDTH_12X = 1 << 4
361 static int translate_active_width(struct ib_device *ibdev, u8 active_width,
364 struct mlx5_ib_dev *dev = to_mdev(ibdev);
367 if (active_width & MLX5_IB_WIDTH_1X) {
368 *ib_width = IB_WIDTH_1X;
369 } else if (active_width & MLX5_IB_WIDTH_2X) {
370 mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n",
373 } else if (active_width & MLX5_IB_WIDTH_4X) {
374 *ib_width = IB_WIDTH_4X;
375 } else if (active_width & MLX5_IB_WIDTH_8X) {
376 *ib_width = IB_WIDTH_8X;
377 } else if (active_width & MLX5_IB_WIDTH_12X) {
378 *ib_width = IB_WIDTH_12X;
380 mlx5_ib_dbg(dev, "Invalid active_width %d\n",
389 * TODO: Move to IB core
396 __IB_MAX_VL_0_14 = 5,
399 enum mlx5_vl_hw_cap {
411 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
416 *max_vl_num = __IB_MAX_VL_0;
419 *max_vl_num = __IB_MAX_VL_0_1;
422 *max_vl_num = __IB_MAX_VL_0_3;
425 *max_vl_num = __IB_MAX_VL_0_7;
427 case MLX5_VL_HW_0_14:
428 *max_vl_num = __IB_MAX_VL_0_14;
438 static int mlx5_query_port_ib(struct ib_device *ibdev, u8 port,
439 struct ib_port_attr *props)
441 struct mlx5_ib_dev *dev = to_mdev(ibdev);
442 struct mlx5_core_dev *mdev = dev->mdev;
444 int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
445 struct mlx5_ptys_reg *ptys;
446 struct mlx5_pmtu_reg *pmtu;
447 struct mlx5_pvlc_reg pvlc;
451 rep = mlx5_vzalloc(outlen);
452 ptys = kzalloc(sizeof(*ptys), GFP_KERNEL);
453 pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL);
454 if (!rep || !ptys || !pmtu) {
459 memset(props, 0, sizeof(*props));
461 /* what if I am pf with dual port */
462 err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen);
466 ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context);
468 props->lid = MLX5_GET(hca_vport_context, ctx, lid);
469 props->lmc = MLX5_GET(hca_vport_context, ctx, lmc);
470 props->sm_lid = MLX5_GET(hca_vport_context, ctx, sm_lid);
471 props->sm_sl = MLX5_GET(hca_vport_context, ctx, sm_sl);
472 props->state = MLX5_GET(hca_vport_context, ctx, vport_state);
473 props->phys_state = MLX5_GET(hca_vport_context, ctx,
474 port_physical_state);
475 props->port_cap_flags = MLX5_GET(hca_vport_context, ctx, cap_mask1);
476 props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
477 props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
478 props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
479 props->bad_pkey_cntr = MLX5_GET(hca_vport_context, ctx,
480 pkey_violation_counter);
481 props->qkey_viol_cntr = MLX5_GET(hca_vport_context, ctx,
482 qkey_violation_counter);
483 props->subnet_timeout = MLX5_GET(hca_vport_context, ctx,
485 props->init_type_reply = MLX5_GET(hca_vport_context, ctx,
488 ptys->proto_mask |= MLX5_PTYS_IB;
489 ptys->local_port = port;
490 err = mlx5_core_access_ptys(mdev, ptys, 0);
494 err = translate_active_width(ibdev, ptys->ib_link_width_oper,
495 &props->active_width);
499 props->active_speed = (u8)ptys->ib_proto_oper;
501 pmtu->local_port = port;
502 err = mlx5_core_access_pmtu(mdev, pmtu, 0);
506 props->max_mtu = pmtu->max_mtu;
507 props->active_mtu = pmtu->oper_mtu;
509 memset(&pvlc, 0, sizeof(pvlc));
510 pvlc.local_port = port;
511 err = mlx5_core_access_pvlc(mdev, &pvlc, 0);
515 err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap,
524 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
525 struct ib_port_attr *props)
527 switch (mlx5_get_vport_access_method(ibdev)) {
528 case MLX5_VPORT_ACCESS_METHOD_MAD:
529 return mlx5_query_port_mad_ifc(ibdev, port, props);
531 case MLX5_VPORT_ACCESS_METHOD_HCA:
532 return mlx5_query_port_ib(ibdev, port, props);
534 case MLX5_VPORT_ACCESS_METHOD_NIC:
535 return mlx5_query_port_roce(ibdev, port, props);
543 mlx5_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
545 if (dev->if_addrlen != ETH_ALEN)
547 memcpy(eui, IF_LLADDR(dev), 3);
548 memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
550 /* NOTE: The scope ID is added by the GID to IP conversion */
559 mlx5_make_default_gid(struct net_device *dev, union ib_gid *gid)
561 gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
562 mlx5_addrconf_ifid_eui48(&gid->raw[8], dev);
566 mlx5_ib_roce_port_update(void *arg)
568 struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg;
569 struct mlx5_ib_dev *dev = port->dev;
570 struct mlx5_core_dev *mdev = dev->mdev;
571 struct net_device *xdev[MLX5_IB_GID_MAX];
572 struct net_device *idev;
573 struct net_device *ndev;
575 union ib_gid gid_temp;
577 while (port->port_gone == 0) {
583 ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH);
589 CURVNET_SET_QUIET(ndev->if_vnet);
591 memset(&gid_temp, 0, sizeof(gid_temp));
592 mlx5_make_default_gid(ndev, &gid_temp);
593 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
594 port->gid_table[gid_index] = gid_temp;
597 xdev[gid_index] = ndev;
601 TAILQ_FOREACH(idev, &V_ifnet, if_link) {
606 TAILQ_FOREACH(idev, &V_ifnet, if_link) {
608 if (idev->if_type != IFT_L2VLAN)
610 if (ndev != rdma_vlan_dev_real_dev(idev))
613 /* clone address information for IPv4 and IPv6 */
615 TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
616 if (ifa->ifa_addr == NULL ||
617 (ifa->ifa_addr->sa_family != AF_INET &&
618 ifa->ifa_addr->sa_family != AF_INET6) ||
619 gid_index >= MLX5_IB_GID_MAX)
621 memset(&gid_temp, 0, sizeof(gid_temp));
622 rdma_ip2gid(ifa->ifa_addr, &gid_temp);
623 /* check for existing entry */
624 for (j = 0; j != gid_index; j++) {
625 if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0)
628 /* check if new entry must be added */
629 if (j == gid_index) {
630 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
631 port->gid_table[gid_index] = gid_temp;
634 xdev[gid_index] = idev;
638 IF_ADDR_RUNLOCK(idev);
645 mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) {
646 struct ib_event event = {
647 .device = &dev->ib_dev,
648 .element.port_num = port->port_num + 1,
649 .event = IB_EVENT_GID_CHANGE,
652 /* add new entries, if any */
653 for (j = 0; j != gid_index; j++) {
654 error = modify_gid_roce(&dev->ib_dev, port->port_num, j,
655 port->gid_table + j, xdev[j]);
657 printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error);
659 memset(&gid_temp, 0, sizeof(gid_temp));
661 /* clear old entries, if any */
662 for (; j != MLX5_IB_GID_MAX; j++) {
663 if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0)
665 port->gid_table[j] = gid_temp;
666 (void) modify_gid_roce(&dev->ib_dev, port->port_num, j,
667 port->gid_table + j, ndev);
670 /* make sure ibcore gets updated */
671 ib_dispatch_event(&event);
676 struct ib_event event = {
677 .device = &dev->ib_dev,
678 .element.port_num = port->port_num + 1,
679 .event = IB_EVENT_GID_CHANGE,
681 /* make sure ibcore gets updated */
682 ib_dispatch_event(&event);
691 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
694 struct mlx5_ib_dev *dev = to_mdev(ibdev);
695 struct mlx5_core_dev *mdev = dev->mdev;
697 switch (mlx5_get_vport_access_method(ibdev)) {
698 case MLX5_VPORT_ACCESS_METHOD_MAD:
699 return mlx5_query_gids_mad_ifc(ibdev, port, index, gid);
701 case MLX5_VPORT_ACCESS_METHOD_HCA:
702 return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid);
704 case MLX5_VPORT_ACCESS_METHOD_NIC:
705 if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) ||
706 index < 0 || index >= MLX5_IB_GID_MAX ||
707 dev->port[port - 1].port_gone != 0)
708 memset(gid, 0, sizeof(*gid));
710 *gid = dev->port[port - 1].gid_table[index];
718 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
721 struct mlx5_ib_dev *dev = to_mdev(ibdev);
722 struct mlx5_core_dev *mdev = dev->mdev;
724 switch (mlx5_get_vport_access_method(ibdev)) {
725 case MLX5_VPORT_ACCESS_METHOD_MAD:
726 return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey);
728 case MLX5_VPORT_ACCESS_METHOD_HCA:
729 case MLX5_VPORT_ACCESS_METHOD_NIC:
730 return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
738 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
739 struct ib_device_modify *props)
741 struct mlx5_ib_dev *dev = to_mdev(ibdev);
742 struct mlx5_reg_node_desc in;
743 struct mlx5_reg_node_desc out;
746 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
749 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
753 * If possible, pass node desc to FW, so it can generate
754 * a 144 trap. If cmd fails, just ignore.
756 memcpy(&in, props->node_desc, 64);
757 err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
758 sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
762 memcpy(ibdev->node_desc, props->node_desc, 64);
767 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
768 struct ib_port_modify *props)
770 u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) ==
771 IB_LINK_LAYER_ETHERNET);
772 struct mlx5_ib_dev *dev = to_mdev(ibdev);
773 struct ib_port_attr attr;
777 /* return OK if this is RoCE. CM calls ib_modify_port() regardless
778 * of whether port link layer is ETH or IB. For ETH ports, qkey
779 * violations and port capabilities are not valid.
784 mutex_lock(&dev->cap_mask_mutex);
786 err = mlx5_ib_query_port(ibdev, port, &attr);
790 tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
791 ~props->clr_port_cap_mask;
793 err = mlx5_set_port_caps(dev->mdev, port, tmp);
796 mutex_unlock(&dev->cap_mask_mutex);
800 enum mlx5_cap_flags {
801 MLX5_CAP_COMPACT_AV = 1 << 0,
804 static void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev)
806 *flags |= MLX5_CAP_GEN(dev, compact_address_vector) ?
807 MLX5_CAP_COMPACT_AV : 0;
810 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
811 struct ib_udata *udata)
813 struct mlx5_ib_dev *dev = to_mdev(ibdev);
814 struct mlx5_ib_alloc_ucontext_req_v2 req;
815 struct mlx5_ib_alloc_ucontext_resp resp;
816 struct mlx5_ib_ucontext *context;
817 struct mlx5_uuar_info *uuari;
818 struct mlx5_uar *uars;
828 return ERR_PTR(-EAGAIN);
830 memset(&req, 0, sizeof(req));
831 memset(&resp, 0, sizeof(resp));
833 reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
834 if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
836 else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
839 mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", (long)reqlen);
840 return ERR_PTR(-EINVAL);
843 err = ib_copy_from_udata(&req, udata, reqlen);
845 mlx5_ib_err(dev, "copy failed\n");
850 mlx5_ib_err(dev, "request corrupted\n");
851 return ERR_PTR(-EINVAL);
854 if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) {
855 mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars);
856 return ERR_PTR(-ENOMEM);
859 req.total_num_uuars = ALIGN(req.total_num_uuars,
860 MLX5_NON_FP_BF_REGS_PER_PAGE);
861 if (req.num_low_latency_uuars > req.total_num_uuars - 1) {
862 mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n",
863 req.total_num_uuars, req.total_num_uuars);
864 return ERR_PTR(-EINVAL);
867 num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
868 gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
869 resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
870 if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
871 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
872 resp.cache_line_size = L1_CACHE_BYTES;
873 resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
874 resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
875 resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
876 resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
877 resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
878 set_mlx5_flags(&resp.flags, dev->mdev);
880 if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen)
881 resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc);
883 if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen)
884 resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
886 context = kzalloc(sizeof(*context), GFP_KERNEL);
888 return ERR_PTR(-ENOMEM);
890 uuari = &context->uuari;
891 mutex_init(&uuari->lock);
892 uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
898 uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
899 sizeof(*uuari->bitmap),
901 if (!uuari->bitmap) {
906 * clear all fast path uuars
908 for (i = 0; i < gross_uuars; i++) {
910 if (uuarn == 2 || uuarn == 3)
911 set_bit(i, uuari->bitmap);
914 uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
920 for (i = 0; i < num_uars; i++) {
921 err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
923 mlx5_ib_err(dev, "uar alloc failed at %d\n", i);
927 for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++)
928 context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX;
930 INIT_LIST_HEAD(&context->db_page_list);
931 mutex_init(&context->db_page_mutex);
933 resp.tot_uuars = req.total_num_uuars;
934 resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
935 err = ib_copy_to_udata(udata, &resp,
936 min_t(size_t, udata->outlen, sizeof(resp)));
941 uuari->num_low_latency_uuars = req.num_low_latency_uuars;
943 uuari->num_uars = num_uars;
945 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
946 IB_LINK_LAYER_ETHERNET) {
947 err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn);
952 return &context->ibucontext;
955 for (i--; i >= 0; i--)
956 mlx5_cmd_free_uar(dev->mdev, uars[i].index);
960 kfree(uuari->bitmap);
970 static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
972 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
973 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
974 struct mlx5_uuar_info *uuari = &context->uuari;
977 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
978 IB_LINK_LAYER_ETHERNET)
979 mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
981 for (i = 0; i < uuari->num_uars; i++) {
982 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
983 mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
985 for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) {
986 if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX)
987 mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]);
991 kfree(uuari->bitmap);
998 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
1000 return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
1003 static int get_command(unsigned long offset)
1005 return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
1008 static int get_arg(unsigned long offset)
1010 return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
1013 static int get_index(unsigned long offset)
1015 return get_arg(offset);
1018 static int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc,
1019 struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev,
1020 struct mlx5_ib_ucontext *context)
1025 if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
1026 mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n",
1027 (long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start));
1031 idx = get_index(vma->vm_pgoff);
1032 if (idx >= uuari->num_uars) {
1033 mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n",
1034 idx, uuari->num_uars);
1038 pfn = uar_index2pfn(dev, uuari->uars[idx].index);
1039 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
1040 (unsigned long long)pfn);
1042 vma->vm_page_prot = prot;
1043 if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1044 PAGE_SIZE, vma->vm_page_prot)) {
1045 mlx5_ib_err(dev, "io remap failed\n");
1049 mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC",
1050 (long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT);
1055 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
1057 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1058 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1059 struct mlx5_uuar_info *uuari = &context->uuari;
1060 unsigned long command;
1062 command = get_command(vma->vm_pgoff);
1064 case MLX5_IB_MMAP_REGULAR_PAGE:
1065 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1067 uuari, dev, context);
1071 case MLX5_IB_MMAP_WC_PAGE:
1072 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1073 true, uuari, dev, context);
1076 case MLX5_IB_MMAP_NC_PAGE:
1077 return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot),
1078 false, uuari, dev, context);
1088 static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
1090 struct mlx5_create_mkey_mbox_in *in;
1091 struct mlx5_mkey_seg *seg;
1092 struct mlx5_core_mr mr;
1095 in = kzalloc(sizeof(*in), GFP_KERNEL);
1100 seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
1101 seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
1102 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1103 seg->start_addr = 0;
1105 err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
1108 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
1123 static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
1125 struct mlx5_core_mr mr;
1128 memset(&mr, 0, sizeof(mr));
1130 err = mlx5_core_destroy_mkey(dev->mdev, &mr);
1132 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
1135 static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
1136 struct ib_ucontext *context,
1137 struct ib_udata *udata)
1139 struct mlx5_ib_dev *dev = to_mdev(ibdev);
1140 struct mlx5_ib_alloc_pd_resp resp;
1141 struct mlx5_ib_pd *pd;
1144 pd = kmalloc(sizeof(*pd), GFP_KERNEL);
1146 return ERR_PTR(-ENOMEM);
1148 err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
1150 mlx5_ib_warn(dev, "pd alloc failed\n");
1152 return ERR_PTR(err);
1157 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
1158 mlx5_ib_err(dev, "copy failed\n");
1159 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1161 return ERR_PTR(-EFAULT);
1164 err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
1166 mlx5_ib_err(dev, "alloc mkey failed\n");
1167 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1169 return ERR_PTR(err);
1176 static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
1178 struct mlx5_ib_dev *mdev = to_mdev(pd->device);
1179 struct mlx5_ib_pd *mpd = to_mpd(pd);
1182 free_pa_mkey(mdev, mpd->pa_lkey);
1184 mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
1190 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1192 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1195 if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1198 err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
1200 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
1201 ibqp->qp_num, gid->raw);
1206 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1208 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1211 if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1214 err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
1216 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
1217 ibqp->qp_num, gid->raw);
1222 static int init_node_data(struct mlx5_ib_dev *dev)
1226 err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
1230 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
1233 static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
1236 struct mlx5_ib_dev *dev =
1237 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1239 return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages);
1242 static ssize_t show_reg_pages(struct device *device,
1243 struct device_attribute *attr, char *buf)
1245 struct mlx5_ib_dev *dev =
1246 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1248 return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
1251 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1254 struct mlx5_ib_dev *dev =
1255 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1256 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
1259 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1262 struct mlx5_ib_dev *dev =
1263 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1264 return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
1265 fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
1268 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1271 struct mlx5_ib_dev *dev =
1272 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1273 return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision);
1276 static ssize_t show_board(struct device *device, struct device_attribute *attr,
1279 struct mlx5_ib_dev *dev =
1280 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1281 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
1282 dev->mdev->board_id);
1285 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
1286 static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
1287 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
1288 static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
1289 static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
1290 static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
1292 static struct device_attribute *mlx5_class_attributes[] = {
1298 &dev_attr_reg_pages,
1301 static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
1303 struct mlx5_ib_qp *mqp;
1304 struct mlx5_ib_cq *send_mcq, *recv_mcq;
1305 struct mlx5_core_cq *mcq;
1306 struct list_head cq_armed_list;
1307 unsigned long flags_qp;
1308 unsigned long flags_cq;
1309 unsigned long flags;
1311 mlx5_ib_warn(ibdev, " started\n");
1312 INIT_LIST_HEAD(&cq_armed_list);
1314 /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
1315 spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
1316 list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
1317 spin_lock_irqsave(&mqp->sq.lock, flags_qp);
1318 if (mqp->sq.tail != mqp->sq.head) {
1319 send_mcq = to_mcq(mqp->ibqp.send_cq);
1320 spin_lock_irqsave(&send_mcq->lock, flags_cq);
1321 if (send_mcq->mcq.comp &&
1322 mqp->ibqp.send_cq->comp_handler) {
1323 if (!send_mcq->mcq.reset_notify_added) {
1324 send_mcq->mcq.reset_notify_added = 1;
1325 list_add_tail(&send_mcq->mcq.reset_notify,
1329 spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
1331 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
1332 spin_lock_irqsave(&mqp->rq.lock, flags_qp);
1333 /* no handling is needed for SRQ */
1334 if (!mqp->ibqp.srq) {
1335 if (mqp->rq.tail != mqp->rq.head) {
1336 recv_mcq = to_mcq(mqp->ibqp.recv_cq);
1337 spin_lock_irqsave(&recv_mcq->lock, flags_cq);
1338 if (recv_mcq->mcq.comp &&
1339 mqp->ibqp.recv_cq->comp_handler) {
1340 if (!recv_mcq->mcq.reset_notify_added) {
1341 recv_mcq->mcq.reset_notify_added = 1;
1342 list_add_tail(&recv_mcq->mcq.reset_notify,
1346 spin_unlock_irqrestore(&recv_mcq->lock,
1350 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
1352 /*At that point all inflight post send were put to be executed as of we
1353 * lock/unlock above locks Now need to arm all involved CQs.
1355 list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
1358 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
1359 mlx5_ib_warn(ibdev, " ended\n");
1363 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1364 enum mlx5_dev_event event, unsigned long param)
1366 struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
1367 struct ib_event ibev;
1372 case MLX5_DEV_EVENT_SYS_ERROR:
1373 ibdev->ib_active = false;
1374 ibev.event = IB_EVENT_DEVICE_FATAL;
1375 mlx5_ib_handle_internal_error(ibdev);
1378 case MLX5_DEV_EVENT_PORT_UP:
1379 ibev.event = IB_EVENT_PORT_ACTIVE;
1383 case MLX5_DEV_EVENT_PORT_DOWN:
1384 case MLX5_DEV_EVENT_PORT_INITIALIZED:
1385 ibev.event = IB_EVENT_PORT_ERR;
1389 case MLX5_DEV_EVENT_LID_CHANGE:
1390 ibev.event = IB_EVENT_LID_CHANGE;
1394 case MLX5_DEV_EVENT_PKEY_CHANGE:
1395 ibev.event = IB_EVENT_PKEY_CHANGE;
1399 case MLX5_DEV_EVENT_GUID_CHANGE:
1400 ibev.event = IB_EVENT_GID_CHANGE;
1404 case MLX5_DEV_EVENT_CLIENT_REREG:
1405 ibev.event = IB_EVENT_CLIENT_REREGISTER;
1413 ibev.device = &ibdev->ib_dev;
1414 ibev.element.port_num = port;
1416 if ((event != MLX5_DEV_EVENT_SYS_ERROR) &&
1417 (port < 1 || port > ibdev->num_ports)) {
1418 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
1422 if (ibdev->ib_active)
1423 ib_dispatch_event(&ibev);
1426 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1430 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
1431 mlx5_query_ext_port_caps(dev, port);
1434 static void config_atomic_responder(struct mlx5_ib_dev *dev,
1435 struct ib_device_attr *props)
1437 enum ib_atomic_cap cap = props->atomic_cap;
1440 if (cap == IB_ATOMIC_HCA ||
1441 cap == IB_ATOMIC_GLOB)
1443 dev->enable_atomic_resp = 1;
1445 dev->atomic_cap = cap;
1448 enum mlx5_addr_align {
1449 MLX5_ADDR_ALIGN_0 = 0,
1450 MLX5_ADDR_ALIGN_64 = 64,
1451 MLX5_ADDR_ALIGN_128 = 128,
1454 static int get_port_caps(struct mlx5_ib_dev *dev)
1456 struct ib_device_attr *dprops = NULL;
1457 struct ib_port_attr *pprops = NULL;
1461 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1465 dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1469 err = mlx5_ib_query_device(&dev->ib_dev, dprops);
1471 mlx5_ib_warn(dev, "query_device failed %d\n", err);
1474 config_atomic_responder(dev, dprops);
1476 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
1477 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1479 mlx5_ib_warn(dev, "query_port %d failed %d\n",
1483 dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys;
1484 dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len;
1485 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1486 dprops->max_pkeys, pprops->gid_tbl_len);
1496 static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1500 err = mlx5_mr_cache_cleanup(dev);
1502 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1504 ib_dereg_mr(dev->umrc.mr);
1505 ib_dealloc_pd(dev->umrc.pd);
1512 static int create_umr_res(struct mlx5_ib_dev *dev)
1518 pd = ib_alloc_pd(&dev->ib_dev);
1520 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1525 mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
1527 mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
1535 ret = mlx5_mr_cache_init(dev);
1537 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1551 static int create_dev_resources(struct mlx5_ib_resources *devr)
1553 struct ib_srq_init_attr attr;
1554 struct mlx5_ib_dev *dev;
1556 struct ib_cq_init_attr cq_attr = { .cqe = 1 };
1558 dev = container_of(devr, struct mlx5_ib_dev, devr);
1560 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1561 if (IS_ERR(devr->p0)) {
1562 ret = PTR_ERR(devr->p0);
1565 devr->p0->device = &dev->ib_dev;
1566 devr->p0->uobject = NULL;
1567 atomic_set(&devr->p0->usecnt, 0);
1569 devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
1570 if (IS_ERR(devr->c0)) {
1571 ret = PTR_ERR(devr->c0);
1574 devr->c0->device = &dev->ib_dev;
1575 devr->c0->uobject = NULL;
1576 devr->c0->comp_handler = NULL;
1577 devr->c0->event_handler = NULL;
1578 devr->c0->cq_context = NULL;
1579 atomic_set(&devr->c0->usecnt, 0);
1581 devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1582 if (IS_ERR(devr->x0)) {
1583 ret = PTR_ERR(devr->x0);
1586 devr->x0->device = &dev->ib_dev;
1587 devr->x0->inode = NULL;
1588 atomic_set(&devr->x0->usecnt, 0);
1589 mutex_init(&devr->x0->tgt_qp_mutex);
1590 INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1592 devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1593 if (IS_ERR(devr->x1)) {
1594 ret = PTR_ERR(devr->x1);
1597 devr->x1->device = &dev->ib_dev;
1598 devr->x1->inode = NULL;
1599 atomic_set(&devr->x1->usecnt, 0);
1600 mutex_init(&devr->x1->tgt_qp_mutex);
1601 INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1603 memset(&attr, 0, sizeof(attr));
1604 attr.attr.max_sge = 1;
1605 attr.attr.max_wr = 1;
1606 attr.srq_type = IB_SRQT_XRC;
1607 attr.ext.xrc.cq = devr->c0;
1608 attr.ext.xrc.xrcd = devr->x0;
1610 devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1611 if (IS_ERR(devr->s0)) {
1612 ret = PTR_ERR(devr->s0);
1615 devr->s0->device = &dev->ib_dev;
1616 devr->s0->pd = devr->p0;
1617 devr->s0->uobject = NULL;
1618 devr->s0->event_handler = NULL;
1619 devr->s0->srq_context = NULL;
1620 devr->s0->srq_type = IB_SRQT_XRC;
1621 devr->s0->ext.xrc.xrcd = devr->x0;
1622 devr->s0->ext.xrc.cq = devr->c0;
1623 atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1624 atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1625 atomic_inc(&devr->p0->usecnt);
1626 atomic_set(&devr->s0->usecnt, 0);
1628 memset(&attr, 0, sizeof(attr));
1629 attr.attr.max_sge = 1;
1630 attr.attr.max_wr = 1;
1631 attr.srq_type = IB_SRQT_BASIC;
1632 devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1633 if (IS_ERR(devr->s1)) {
1634 ret = PTR_ERR(devr->s1);
1637 devr->s1->device = &dev->ib_dev;
1638 devr->s1->pd = devr->p0;
1639 devr->s1->uobject = NULL;
1640 devr->s1->event_handler = NULL;
1641 devr->s1->srq_context = NULL;
1642 devr->s1->srq_type = IB_SRQT_BASIC;
1643 devr->s1->ext.xrc.cq = devr->c0;
1644 atomic_inc(&devr->p0->usecnt);
1645 atomic_set(&devr->s1->usecnt, 0);
1650 mlx5_ib_destroy_srq(devr->s0);
1652 mlx5_ib_dealloc_xrcd(devr->x1);
1654 mlx5_ib_dealloc_xrcd(devr->x0);
1656 mlx5_ib_destroy_cq(devr->c0);
1658 mlx5_ib_dealloc_pd(devr->p0);
1663 static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1665 mlx5_ib_destroy_srq(devr->s1);
1666 mlx5_ib_destroy_srq(devr->s0);
1667 mlx5_ib_dealloc_xrcd(devr->x0);
1668 mlx5_ib_dealloc_xrcd(devr->x1);
1669 mlx5_ib_destroy_cq(devr->c0);
1670 mlx5_ib_dealloc_pd(devr->p0);
1673 static u32 get_core_cap_flags(struct ib_device *ibdev)
1675 struct mlx5_ib_dev *dev = to_mdev(ibdev);
1676 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
1677 u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
1678 u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
1681 if (ll == IB_LINK_LAYER_INFINIBAND)
1682 return RDMA_CORE_PORT_IBA_IB;
1684 ret = RDMA_CORE_PORT_RAW_PACKET;
1686 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
1689 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
1692 if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
1693 ret |= RDMA_CORE_PORT_IBA_ROCE;
1695 if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
1696 ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
1701 static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
1702 struct ib_port_immutable *immutable)
1704 struct ib_port_attr attr;
1705 struct mlx5_ib_dev *dev = to_mdev(ibdev);
1706 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
1709 immutable->core_cap_flags = get_core_cap_flags(ibdev);
1711 err = ib_query_port(ibdev, port_num, &attr);
1715 immutable->pkey_tbl_len = attr.pkey_tbl_len;
1716 immutable->gid_tbl_len = attr.gid_tbl_len;
1717 immutable->core_cap_flags = get_core_cap_flags(ibdev);
1718 if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
1719 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
1724 static void enable_dc_tracer(struct mlx5_ib_dev *dev)
1726 struct device *device = dev->ib_dev.dma_device;
1727 struct mlx5_dc_tracer *dct = &dev->dctr;
1733 size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096;
1734 if (size <= PAGE_SIZE)
1739 dct->pg = alloc_pages(GFP_KERNEL, order);
1741 mlx5_ib_err(dev, "failed to allocate %d pages\n", order);
1745 tmp = page_address(dct->pg);
1746 memset(tmp, 0xff, size);
1750 dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE);
1751 if (dma_mapping_error(device, dct->dma)) {
1752 mlx5_ib_err(dev, "dma mapping error\n");
1756 err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma);
1758 mlx5_ib_warn(dev, "failed to enable DC tracer\n");
1765 dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE);
1767 __free_pages(dct->pg, dct->order);
1771 static void disable_dc_tracer(struct mlx5_ib_dev *dev)
1773 struct device *device = dev->ib_dev.dma_device;
1774 struct mlx5_dc_tracer *dct = &dev->dctr;
1780 err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma);
1782 mlx5_ib_warn(dev, "failed to disable DC tracer\n");
1786 dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE);
1787 __free_pages(dct->pg, dct->order);
1792 MLX5_DC_CNAK_SIZE = 128,
1793 MLX5_NUM_BUF_IN_PAGE = PAGE_SIZE / MLX5_DC_CNAK_SIZE,
1794 MLX5_CNAK_TX_CQ_SIGNAL_FACTOR = 128,
1795 MLX5_DC_CNAK_SL = 0,
1796 MLX5_DC_CNAK_VL = 0,
1799 static int init_dc_improvements(struct mlx5_ib_dev *dev)
1801 if (!mlx5_core_is_pf(dev->mdev))
1804 if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace)))
1807 enable_dc_tracer(dev);
1812 static void cleanup_dc_improvements(struct mlx5_ib_dev *dev)
1815 disable_dc_tracer(dev);
1818 static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num)
1820 mlx5_vport_dealloc_q_counter(dev->mdev,
1821 MLX5_INTERFACE_PROTOCOL_IB,
1822 dev->port[port_num].q_cnt_id);
1823 dev->port[port_num].q_cnt_id = 0;
1826 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
1830 for (i = 0; i < dev->num_ports; i++)
1831 mlx5_ib_dealloc_q_port_counter(dev, i);
1834 static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
1839 for (i = 0; i < dev->num_ports; i++) {
1840 ret = mlx5_vport_alloc_q_counter(dev->mdev,
1841 MLX5_INTERFACE_PROTOCOL_IB,
1842 &dev->port[i].q_cnt_id);
1845 "couldn't allocate queue counter for port %d\n",
1847 goto dealloc_counters;
1855 mlx5_ib_dealloc_q_port_counter(dev, i);
1860 struct port_attribute {
1861 struct attribute attr;
1862 ssize_t (*show)(struct mlx5_ib_port *,
1863 struct port_attribute *, char *buf);
1864 ssize_t (*store)(struct mlx5_ib_port *,
1865 struct port_attribute *,
1866 const char *buf, size_t count);
1869 struct port_counter_attribute {
1870 struct port_attribute attr;
1874 static ssize_t port_attr_show(struct kobject *kobj,
1875 struct attribute *attr, char *buf)
1877 struct port_attribute *port_attr =
1878 container_of(attr, struct port_attribute, attr);
1879 struct mlx5_ib_port_sysfs_group *p =
1880 container_of(kobj, struct mlx5_ib_port_sysfs_group,
1882 struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port,
1885 if (!port_attr->show)
1888 return port_attr->show(mibport, port_attr, buf);
1891 static ssize_t show_port_counter(struct mlx5_ib_port *p,
1892 struct port_attribute *port_attr,
1895 int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
1896 struct port_counter_attribute *counter_attr =
1897 container_of(port_attr, struct port_counter_attribute, attr);
1901 out = mlx5_vzalloc(outlen);
1905 ret = mlx5_vport_query_q_counter(p->dev->mdev,
1911 ret = sprintf(buf, "%d\n",
1912 be32_to_cpu(*(__be32 *)(out + counter_attr->offset)));
1919 #define PORT_COUNTER_ATTR(_name) \
1920 struct port_counter_attribute port_counter_attr_##_name = { \
1921 .attr = __ATTR(_name, S_IRUGO, show_port_counter, NULL), \
1922 .offset = MLX5_BYTE_OFF(query_q_counter_out, _name) \
1925 static PORT_COUNTER_ATTR(rx_write_requests);
1926 static PORT_COUNTER_ATTR(rx_read_requests);
1927 static PORT_COUNTER_ATTR(rx_atomic_requests);
1928 static PORT_COUNTER_ATTR(rx_dct_connect);
1929 static PORT_COUNTER_ATTR(out_of_buffer);
1930 static PORT_COUNTER_ATTR(out_of_sequence);
1931 static PORT_COUNTER_ATTR(duplicate_request);
1932 static PORT_COUNTER_ATTR(rnr_nak_retry_err);
1933 static PORT_COUNTER_ATTR(packet_seq_err);
1934 static PORT_COUNTER_ATTR(implied_nak_seq_err);
1935 static PORT_COUNTER_ATTR(local_ack_timeout_err);
1937 static struct attribute *counter_attrs[] = {
1938 &port_counter_attr_rx_write_requests.attr.attr,
1939 &port_counter_attr_rx_read_requests.attr.attr,
1940 &port_counter_attr_rx_atomic_requests.attr.attr,
1941 &port_counter_attr_rx_dct_connect.attr.attr,
1942 &port_counter_attr_out_of_buffer.attr.attr,
1943 &port_counter_attr_out_of_sequence.attr.attr,
1944 &port_counter_attr_duplicate_request.attr.attr,
1945 &port_counter_attr_rnr_nak_retry_err.attr.attr,
1946 &port_counter_attr_packet_seq_err.attr.attr,
1947 &port_counter_attr_implied_nak_seq_err.attr.attr,
1948 &port_counter_attr_local_ack_timeout_err.attr.attr,
1952 static struct attribute_group port_counters_group = {
1954 .attrs = counter_attrs
1957 static const struct sysfs_ops port_sysfs_ops = {
1958 .show = port_attr_show
1961 static struct kobj_type port_type = {
1962 .sysfs_ops = &port_sysfs_ops,
1965 static int add_port_attrs(struct mlx5_ib_dev *dev,
1966 struct kobject *parent,
1967 struct mlx5_ib_port_sysfs_group *port,
1972 ret = kobject_init_and_add(&port->kobj, &port_type,
1978 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1979 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
1980 ret = sysfs_create_group(&port->kobj, &port_counters_group);
1985 port->enabled = true;
1989 kobject_put(&port->kobj);
1993 static void destroy_ports_attrs(struct mlx5_ib_dev *dev,
1994 unsigned int num_ports)
1998 for (i = 0; i < num_ports; i++) {
1999 struct mlx5_ib_port_sysfs_group *port =
2000 &dev->port[i].group;
2005 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
2006 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
2007 sysfs_remove_group(&port->kobj,
2008 &port_counters_group);
2009 kobject_put(&port->kobj);
2010 port->enabled = false;
2013 if (dev->ports_parent) {
2014 kobject_put(dev->ports_parent);
2015 dev->ports_parent = NULL;
2019 static int create_port_attrs(struct mlx5_ib_dev *dev)
2023 struct device *device = &dev->ib_dev.dev;
2025 dev->ports_parent = kobject_create_and_add("mlx5_ports",
2027 if (!dev->ports_parent)
2030 for (i = 0; i < dev->num_ports; i++) {
2031 ret = add_port_attrs(dev,
2033 &dev->port[i].group,
2037 goto _destroy_ports_attrs;
2042 _destroy_ports_attrs:
2043 destroy_ports_attrs(dev, i);
2047 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2049 struct mlx5_ib_dev *dev;
2053 printk_once(KERN_INFO "%s", mlx5_version);
2055 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
2061 dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
2066 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2067 dev->port[i].dev = dev;
2068 dev->port[i].port_num = i;
2069 dev->port[i].port_gone = 0;
2070 memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table));
2073 err = get_port_caps(dev);
2077 if (mlx5_use_mad_ifc(dev))
2078 get_ext_port_caps(dev);
2080 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2081 IB_LINK_LAYER_ETHERNET) {
2082 if (MLX5_CAP_GEN(mdev, roce)) {
2083 err = mlx5_nic_vport_enable_roce(mdev);
2091 MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
2093 strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
2094 dev->ib_dev.owner = THIS_MODULE;
2095 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
2096 dev->ib_dev.local_dma_lkey = mdev->special_contexts.resd_lkey;
2097 dev->num_ports = MLX5_CAP_GEN(mdev, num_ports);
2098 dev->ib_dev.phys_port_cnt = dev->num_ports;
2099 dev->ib_dev.num_comp_vectors =
2100 dev->mdev->priv.eq_table.num_comp_vectors;
2101 dev->ib_dev.dma_device = &mdev->pdev->dev;
2103 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
2104 dev->ib_dev.uverbs_cmd_mask =
2105 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
2106 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
2107 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
2108 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
2109 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
2110 (1ull << IB_USER_VERBS_CMD_REG_MR) |
2111 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
2112 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2113 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
2114 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
2115 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
2116 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
2117 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
2118 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
2119 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
2120 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
2121 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
2122 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
2123 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
2124 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
2125 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
2126 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
2127 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
2129 dev->ib_dev.query_device = mlx5_ib_query_device;
2130 dev->ib_dev.query_port = mlx5_ib_query_port;
2131 dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
2132 dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
2133 dev->ib_dev.query_gid = mlx5_ib_query_gid;
2134 dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
2135 dev->ib_dev.modify_device = mlx5_ib_modify_device;
2136 dev->ib_dev.modify_port = mlx5_ib_modify_port;
2137 dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
2138 dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
2139 dev->ib_dev.mmap = mlx5_ib_mmap;
2140 dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
2141 dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
2142 dev->ib_dev.create_ah = mlx5_ib_create_ah;
2143 dev->ib_dev.query_ah = mlx5_ib_query_ah;
2144 dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
2145 dev->ib_dev.create_srq = mlx5_ib_create_srq;
2146 dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
2147 dev->ib_dev.query_srq = mlx5_ib_query_srq;
2148 dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
2149 dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
2150 dev->ib_dev.create_qp = mlx5_ib_create_qp;
2151 dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
2152 dev->ib_dev.query_qp = mlx5_ib_query_qp;
2153 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
2154 dev->ib_dev.post_send = mlx5_ib_post_send;
2155 dev->ib_dev.post_recv = mlx5_ib_post_recv;
2156 dev->ib_dev.create_cq = mlx5_ib_create_cq;
2157 dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
2158 dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
2159 dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
2160 dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
2161 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
2162 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
2163 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
2164 dev->ib_dev.reg_phys_mr = mlx5_ib_reg_phys_mr;
2165 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
2166 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
2167 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
2168 dev->ib_dev.process_mad = mlx5_ib_process_mad;
2169 dev->ib_dev.get_port_immutable = mlx5_port_immutable;
2170 dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr;
2171 dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
2172 dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
2174 if (MLX5_CAP_GEN(mdev, xrc)) {
2175 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
2176 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
2177 dev->ib_dev.uverbs_cmd_mask |=
2178 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2179 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2182 err = init_node_data(dev);
2184 goto err_disable_roce;
2186 mutex_init(&dev->cap_mask_mutex);
2187 INIT_LIST_HEAD(&dev->qp_list);
2188 spin_lock_init(&dev->reset_flow_resource_lock);
2190 err = create_dev_resources(&dev->devr);
2192 goto err_disable_roce;
2195 err = mlx5_ib_alloc_q_counters(dev);
2199 err = ib_register_device(&dev->ib_dev, NULL);
2203 err = create_umr_res(dev);
2207 if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2208 MLX5_CAP_PORT_TYPE_IB) {
2209 if (init_dc_improvements(dev))
2210 mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n");
2213 err = create_port_attrs(dev);
2217 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2218 err = device_create_file(&dev->ib_dev.dev,
2219 mlx5_class_attributes[i]);
2221 goto err_port_attrs;
2225 struct thread *rl_thread = NULL;
2226 struct proc *rl_proc = NULL;
2228 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2229 (void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread,
2230 RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i);
2234 dev->ib_active = true;
2239 destroy_ports_attrs(dev, dev->num_ports);
2242 if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2243 MLX5_CAP_PORT_TYPE_IB)
2244 cleanup_dc_improvements(dev);
2245 destroy_umrc_res(dev);
2248 ib_unregister_device(&dev->ib_dev);
2251 mlx5_ib_dealloc_q_counters(dev);
2254 destroy_dev_resources(&dev->devr);
2257 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2258 IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2259 mlx5_nic_vport_disable_roce(mdev);
2264 ib_dealloc_device((struct ib_device *)dev);
2269 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
2271 struct mlx5_ib_dev *dev = context;
2274 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2275 dev->port[i].port_gone = 1;
2276 while (dev->port[i].port_gone != 2)
2280 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2281 device_remove_file(&dev->ib_dev.dev,
2282 mlx5_class_attributes[i]);
2285 destroy_ports_attrs(dev, dev->num_ports);
2286 if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2287 MLX5_CAP_PORT_TYPE_IB)
2288 cleanup_dc_improvements(dev);
2289 mlx5_ib_dealloc_q_counters(dev);
2290 ib_unregister_device(&dev->ib_dev);
2291 destroy_umrc_res(dev);
2292 destroy_dev_resources(&dev->devr);
2294 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2295 IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2296 mlx5_nic_vport_disable_roce(mdev);
2299 ib_dealloc_device(&dev->ib_dev);
2302 static struct mlx5_interface mlx5_ib_interface = {
2304 .remove = mlx5_ib_remove,
2305 .event = mlx5_ib_event,
2306 .protocol = MLX5_INTERFACE_PROTOCOL_IB,
2309 static int __init mlx5_ib_init(void)
2313 if (deprecated_prof_sel != 2)
2314 printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
2316 err = mlx5_register_interface(&mlx5_ib_interface);
2320 mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq");
2322 printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__);
2329 mlx5_unregister_interface(&mlx5_ib_interface);
2335 static void __exit mlx5_ib_cleanup(void)
2337 destroy_workqueue(mlx5_ib_wq);
2338 mlx5_unregister_interface(&mlx5_ib_interface);
2341 module_init_order(mlx5_ib_init, SI_ORDER_THIRD);
2342 module_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD);