]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c
Merge llvm, clang, lld, lldb, compiler-rt and libc++ r306325, and update
[FreeBSD/FreeBSD.git] / sys / dev / mlx4 / mlx4_ib / mlx4_ib_main.c
1 /*
2  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
3  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #define LINUXKPI_PARAM_PREFIX mlx4_
35
36 #include <linux/module.h>
37 #include <linux/slab.h>
38 #include <linux/errno.h>
39 #include <linux/netdevice.h>
40 #include <linux/inetdevice.h>
41 #include <linux/if_vlan.h>
42 #include <linux/fs.h>
43 #include <net/ipv6.h>
44
45 #include <rdma/ib_smi.h>
46 #include <rdma/ib_user_verbs.h>
47 #include <rdma/ib_user_verbs_exp.h>
48 #include <rdma/ib_addr.h>
49
50 #include <dev/mlx4/driver.h>
51 #include <dev/mlx4/cmd.h>
52 #include <linux/sched.h>
53 #include <linux/page.h>
54 #include <linux/printk.h>
55 #include "mlx4_ib.h"
56 #include "mlx4_exp.h"
57 #include "user.h"
58 #include "wc.h"
59
60 #define DRV_NAME        MLX4_IB_DRV_NAME
61 #define DRV_VERSION     "1.0"
62
63 #define MLX4_IB_DRIVER_PROC_DIR_NAME "driver/mlx4_ib"
64 #define MLX4_IB_MRS_PROC_DIR_NAME "mrs"
65 #define MLX4_IB_FLOW_MAX_PRIO 0xFFF
66 #define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
67
68 MODULE_AUTHOR("Roland Dreier");
69 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
70 MODULE_LICENSE("Dual BSD/GPL");
71 #ifdef __linux__
72 MODULE_VERSION(DRV_VERSION);
73 #endif
74
75 int mlx4_ib_sm_guid_assign = 1;
76
77 module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
78 MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
79
80 enum {
81         MAX_NUM_STR_BITMAP = 1 << 15,
82         DEFAULT_TBL_VAL = -1
83 };
84
85 static struct mlx4_dbdf2val_lst dev_assign_str = {
86         .name           = "dev_assign_str param",
87         .num_vals       = 1,
88         .def_val        = {DEFAULT_TBL_VAL},
89         .range          = {0, MAX_NUM_STR_BITMAP - 1}
90 };
91 module_param_string(dev_assign_str, dev_assign_str.str,
92                     sizeof(dev_assign_str.str), 0444);
93 MODULE_PARM_DESC(dev_assign_str,
94                  "Map device function numbers to IB device numbers (e.g. '0000:04:00.0-0,002b:1c:0b.a-1,...').\n"
95                  "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for IB device numbers (e.g. 1).\n"
96                  "\t\tMax supported devices - 32");
97
98
99 static unsigned long *dev_num_str_bitmap;
100 static spinlock_t dev_num_str_lock;
101
102 static const char mlx4_ib_version[] =
103         DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
104         DRV_VERSION "\n";
105
106 struct update_gid_work {
107         struct work_struct      work;
108         union ib_gid            gids[128];
109         struct mlx4_ib_dev     *dev;
110         int                     port;
111 };
112
113 struct dev_rec {
114         int     bus;
115         int     dev;
116         int     func;
117         int     nr;
118 };
119
120 static int dr_active;
121
122 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
123
124 static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, struct net_device*,
125                                  unsigned long);
126
127 static u8 mlx4_ib_get_dev_port(struct net_device *dev,
128                                         struct mlx4_ib_dev *ibdev);
129
130 static struct workqueue_struct *wq;
131
132 static void init_query_mad(struct ib_smp *mad)
133 {
134         mad->base_version  = 1;
135         mad->mgmt_class    = IB_MGMT_CLASS_SUBN_LID_ROUTED;
136         mad->class_version = 1;
137         mad->method        = IB_MGMT_METHOD_GET;
138 }
139
140 static union ib_gid zgid;
141
142 static int check_flow_steering_support(struct mlx4_dev *dev)
143 {
144         int eth_num_ports = 0;
145         int ib_num_ports = 0;
146         int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
147
148         if (dmfs) {
149                 int i;
150                 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
151                         eth_num_ports++;
152                 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
153                         ib_num_ports++;
154                 dmfs &= (!ib_num_ports ||
155                          (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
156                         (!eth_num_ports ||
157                          (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
158                 if (ib_num_ports && mlx4_is_mfunc(dev)) {
159                         dmfs = 0;
160                 }
161         }
162         return dmfs;
163 }
164
165 int mlx4_ib_query_device(struct ib_device *ibdev,
166                                 struct ib_device_attr *props)
167 {
168         struct mlx4_ib_dev *dev = to_mdev(ibdev);
169         struct ib_smp *in_mad  = NULL;
170         struct ib_smp *out_mad = NULL;
171         int err = -ENOMEM;
172
173         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
174         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
175         if (!in_mad || !out_mad)
176                 goto out;
177
178         init_query_mad(in_mad);
179         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
180
181         err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
182                            1, NULL, NULL, in_mad, out_mad);
183         if (err)
184                 goto out;
185
186         memset(props, 0, sizeof *props);
187
188         props->fw_ver = dev->dev->caps.fw_ver;
189         props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
190                 IB_DEVICE_PORT_ACTIVE_EVENT             |
191                 IB_DEVICE_SYS_IMAGE_GUID                |
192                 IB_DEVICE_RC_RNR_NAK_GEN                |
193                 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK      |
194                 IB_DEVICE_SHARED_MR;
195
196         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
197                 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
198         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
199                 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
200         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM)
201                 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
202         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
203                 props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
204         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
205                 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
206         if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
207                 props->device_cap_flags |= IB_DEVICE_UD_TSO;
208         if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
209                 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
210         if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
211             (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
212             (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
213                 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
214         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
215                 props->device_cap_flags |= IB_DEVICE_XRC;
216         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_CROSS_CHANNEL)
217                 props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
218
219         if (check_flow_steering_support(dev->dev))
220                 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
221
222
223         props->device_cap_flags |= IB_DEVICE_QPG;
224         if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
225                 props->device_cap_flags |= IB_DEVICE_UD_RSS;
226                 props->max_rss_tbl_sz = dev->dev->caps.max_rss_tbl_sz;
227         }
228         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
229                 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
230         if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
231                 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
232                         props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
233                 else
234                         props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
235         }
236         props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
237                 0xffffff;
238         props->vendor_part_id      = dev->dev->pdev->device;
239         props->hw_ver              = be32_to_cpup((__be32 *) (out_mad->data + 32));
240         memcpy(&props->sys_image_guid, out_mad->data +  4, 8);
241
242         props->max_mr_size         = ~0ull;
243         props->page_size_cap       = dev->dev->caps.page_size_cap;
244         props->max_qp              = dev->dev->quotas.qp;
245         props->max_qp_wr           = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
246         props->max_sge             = min(dev->dev->caps.max_sq_sg,
247                                          dev->dev->caps.max_rq_sg);
248         props->max_cq              = dev->dev->quotas.cq;
249         props->max_cqe             = dev->dev->caps.max_cqes;
250         props->max_mr              = dev->dev->quotas.mpt;
251         props->max_pd              = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
252         props->max_qp_rd_atom      = dev->dev->caps.max_qp_dest_rdma;
253         props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
254         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
255         props->max_srq             = dev->dev->quotas.srq;
256         props->max_srq_wr          = dev->dev->caps.max_srq_wqes - 1;
257         props->max_srq_sge         = dev->dev->caps.max_srq_sge;
258         props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
259         props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
260         props->atomic_cap          = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
261                 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
262         props->masked_atomic_cap   = props->atomic_cap;
263         props->max_pkeys           = dev->dev->caps.pkey_table_len[1];
264         props->max_mcast_grp       = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
265         props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
266         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
267                                            props->max_mcast_grp;
268         props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
269         props->hca_core_clock = dev->dev->caps.hca_core_clock;
270         if (dev->dev->caps.hca_core_clock > 0)
271                 props->comp_mask |= IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK;
272         if (dev->dev->caps.cq_timestamp) {
273                 props->timestamp_mask = 0xFFFFFFFFFFFF;
274                 props->comp_mask |= IB_DEVICE_ATTR_WITH_TIMESTAMP_MASK;
275         }
276
277 out:
278         kfree(in_mad);
279         kfree(out_mad);
280
281         return err;
282 }
283
284 static enum rdma_link_layer
285 mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
286 {
287         struct mlx4_dev *dev = to_mdev(device)->dev;
288
289         return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
290                 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
291 }
292
293 static int ib_link_query_port(struct ib_device *ibdev, u8 port,
294                               struct ib_port_attr *props, int netw_view)
295 {
296         struct ib_smp *in_mad  = NULL;
297         struct ib_smp *out_mad = NULL;
298         int ext_active_speed;
299         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
300         int err = -ENOMEM;
301
302         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
303         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
304         if (!in_mad || !out_mad)
305                 goto out;
306
307         init_query_mad(in_mad);
308         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
309         in_mad->attr_mod = cpu_to_be32(port);
310
311         if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
312                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
313
314         err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
315                                 in_mad, out_mad);
316         if (err)
317                 goto out;
318
319
320         props->lid              = be16_to_cpup((__be16 *) (out_mad->data + 16));
321         props->lmc              = out_mad->data[34] & 0x7;
322         props->sm_lid           = be16_to_cpup((__be16 *) (out_mad->data + 18));
323         props->sm_sl            = out_mad->data[36] & 0xf;
324         props->state            = out_mad->data[32] & 0xf;
325         props->phys_state       = out_mad->data[33] >> 4;
326         props->port_cap_flags   = be32_to_cpup((__be32 *) (out_mad->data + 20));
327         if (netw_view)
328                 props->gid_tbl_len = out_mad->data[50];
329         else
330                 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
331         props->max_msg_sz       = to_mdev(ibdev)->dev->caps.max_msg_sz;
332         props->pkey_tbl_len     = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
333         props->bad_pkey_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 46));
334         props->qkey_viol_cntr   = be16_to_cpup((__be16 *) (out_mad->data + 48));
335         props->active_width     = out_mad->data[31] & 0xf;
336         props->active_speed     = out_mad->data[35] >> 4;
337         props->max_mtu          = out_mad->data[41] & 0xf;
338         props->active_mtu       = out_mad->data[36] >> 4;
339         props->subnet_timeout   = out_mad->data[51] & 0x1f;
340         props->max_vl_num       = out_mad->data[37] >> 4;
341         props->init_type_reply  = out_mad->data[41] >> 4;
342
343         /* Check if extended speeds (EDR/FDR/...) are supported */
344         if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
345                 ext_active_speed = out_mad->data[62] >> 4;
346
347                 switch (ext_active_speed) {
348                 case 1:
349                         props->active_speed = IB_SPEED_FDR;
350                         break;
351                 case 2:
352                         props->active_speed = IB_SPEED_EDR;
353                         break;
354                 }
355         }
356
357         /* If reported active speed is QDR, check if is FDR-10 */
358         if (props->active_speed == IB_SPEED_QDR) {
359                 init_query_mad(in_mad);
360                 in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
361                 in_mad->attr_mod = cpu_to_be32(port);
362
363                 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
364                                    NULL, NULL, in_mad, out_mad);
365                 if (err)
366                         goto out;
367
368                 /* Checking LinkSpeedActive for FDR-10 */
369                 if (out_mad->data[15] & 0x1)
370                         props->active_speed = IB_SPEED_FDR10;
371         }
372
373         /* Avoid wrong speed value returned by FW if the IB link is down. */
374         if (props->state == IB_PORT_DOWN)
375                  props->active_speed = IB_SPEED_SDR;
376
377 out:
378         kfree(in_mad);
379         kfree(out_mad);
380         return err;
381 }
382
383 static u8 state_to_phys_state(enum ib_port_state state)
384 {
385         return state == IB_PORT_ACTIVE ? 5 : 3;
386 }
387
388 static int eth_link_query_port(struct ib_device *ibdev, u8 port,
389                                struct ib_port_attr *props, int netw_view)
390 {
391
392         struct mlx4_ib_dev *mdev = to_mdev(ibdev);
393         struct mlx4_ib_iboe *iboe = &mdev->iboe;
394         struct net_device *ndev;
395         enum ib_mtu tmp;
396         struct mlx4_cmd_mailbox *mailbox;
397         unsigned long flags;
398         int err = 0;
399
400         mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
401         if (IS_ERR(mailbox))
402                 return PTR_ERR(mailbox);
403
404         err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
405                            MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
406                            MLX4_CMD_WRAPPED);
407         if (err)
408                 goto out;
409
410         props->active_width     =  (((u8 *)mailbox->buf)[5] == 0x40) ?
411                                                 IB_WIDTH_4X : IB_WIDTH_1X;
412         props->active_speed     = IB_SPEED_QDR;
413         props->port_cap_flags   = IB_PORT_CM_SUP;
414         if (netw_view)
415                 props->gid_tbl_len = MLX4_ROCE_MAX_GIDS;
416         else
417                 props->gid_tbl_len   = mdev->dev->caps.gid_table_len[port];
418
419         props->max_msg_sz       = mdev->dev->caps.max_msg_sz;
420         props->pkey_tbl_len     = 1;
421         props->max_mtu          = IB_MTU_4096;
422         props->max_vl_num       = 2;
423         props->state            = IB_PORT_DOWN;
424         props->phys_state       = state_to_phys_state(props->state);
425         props->active_mtu       = IB_MTU_256;
426         spin_lock_irqsave(&iboe->lock, flags);
427         ndev = iboe->netdevs[port - 1];
428         if (!ndev)
429                 goto out_unlock;
430
431         tmp = iboe_get_mtu(ndev->if_mtu);
432         props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
433
434         props->state            = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
435                                         IB_PORT_ACTIVE : IB_PORT_DOWN;
436         props->phys_state       = state_to_phys_state(props->state);
437 out_unlock:
438         spin_unlock_irqrestore(&iboe->lock, flags);
439 out:
440         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
441         return err;
442 }
443
444 int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
445                          struct ib_port_attr *props, int netw_view)
446 {
447         int err;
448
449         memset(props, 0, sizeof *props);
450
451         err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
452                 ib_link_query_port(ibdev, port, props, netw_view) :
453                                 eth_link_query_port(ibdev, port, props, netw_view);
454
455         return err;
456 }
457
458 static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
459                               struct ib_port_attr *props)
460 {
461         /* returns host view */
462         return __mlx4_ib_query_port(ibdev, port, props, 0);
463 }
464
465 int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
466                         union ib_gid *gid, int netw_view)
467 {
468         struct ib_smp *in_mad  = NULL;
469         struct ib_smp *out_mad = NULL;
470         int err = -ENOMEM;
471         struct mlx4_ib_dev *dev = to_mdev(ibdev);
472         int clear = 0;
473         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
474
475         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
476         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
477         if (!in_mad || !out_mad)
478                 goto out;
479
480         init_query_mad(in_mad);
481         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
482         in_mad->attr_mod = cpu_to_be32(port);
483
484         if (mlx4_is_mfunc(dev->dev) && netw_view)
485                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
486
487         err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
488         if (err)
489                 goto out;
490
491         memcpy(gid->raw, out_mad->data + 8, 8);
492
493         if (mlx4_is_mfunc(dev->dev) && !netw_view) {
494                 if (index) {
495                         /* For any index > 0, return the null guid */
496                         err = 0;
497                         clear = 1;
498                         goto out;
499                 }
500         }
501
502         init_query_mad(in_mad);
503         in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
504         in_mad->attr_mod = cpu_to_be32(index / 8);
505
506         err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
507                            NULL, NULL, in_mad, out_mad);
508         if (err)
509                 goto out;
510
511         memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
512
513 out:
514         if (clear)
515                 memset(gid->raw + 8, 0, 8);
516         kfree(in_mad);
517         kfree(out_mad);
518         return err;
519 }
520
521 static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index,
522                           union ib_gid *gid)
523 {
524         struct mlx4_ib_dev *dev = to_mdev(ibdev);
525
526         *gid = dev->iboe.gid_table[port - 1][index];
527
528         return 0;
529 }
530
531 static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
532                              union ib_gid *gid)
533 {
534         if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
535                 return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
536         else
537                 return iboe_query_gid(ibdev, port, index, gid);
538 }
539
540 int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
541                          u16 *pkey, int netw_view)
542 {
543         struct ib_smp *in_mad  = NULL;
544         struct ib_smp *out_mad = NULL;
545         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
546         int err = -ENOMEM;
547
548         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
549         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
550         if (!in_mad || !out_mad)
551                 goto out;
552
553         init_query_mad(in_mad);
554         in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
555         in_mad->attr_mod = cpu_to_be32(index / 32);
556
557         if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
558                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
559
560         err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
561                            in_mad, out_mad);
562         if (err)
563                 goto out;
564
565         *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
566
567 out:
568         kfree(in_mad);
569         kfree(out_mad);
570         return err;
571 }
572
573 static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
574 {
575         return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
576 }
577
578 static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
579                                  struct ib_device_modify *props)
580 {
581         struct mlx4_cmd_mailbox *mailbox;
582         unsigned long flags;
583
584         if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
585                 return -EOPNOTSUPP;
586
587         if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
588                 return 0;
589
590         if (mlx4_is_slave(to_mdev(ibdev)->dev))
591                 return -EOPNOTSUPP;
592
593         spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
594         memcpy(ibdev->node_desc, props->node_desc, 64);
595         spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
596
597         /*
598          * If possible, pass node desc to FW, so it can generate
599          * a 144 trap.  If cmd fails, just ignore.
600          */
601         mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
602         if (IS_ERR(mailbox))
603                 return 0;
604
605         memset(mailbox->buf, 0, 256);
606         memcpy(mailbox->buf, props->node_desc, 64);
607         mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
608                  MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
609
610         mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
611
612         return 0;
613 }
614
615 static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
616                          u32 cap_mask)
617 {
618         struct mlx4_cmd_mailbox *mailbox;
619         int err;
620         u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
621
622         mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
623         if (IS_ERR(mailbox))
624                 return PTR_ERR(mailbox);
625
626         memset(mailbox->buf, 0, 256);
627
628         if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
629                 *(u8 *) mailbox->buf         = !!reset_qkey_viols << 6;
630                 ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
631         } else {
632                 ((u8 *) mailbox->buf)[3]     = !!reset_qkey_viols;
633                 ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
634         }
635
636         err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
637                        MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
638
639         mlx4_free_cmd_mailbox(dev->dev, mailbox);
640         return err;
641 }
642
643 static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
644                                struct ib_port_modify *props)
645 {
646         struct ib_port_attr attr;
647         u32 cap_mask;
648         int err;
649
650         mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
651
652         err = mlx4_ib_query_port(ibdev, port, &attr);
653         if (err)
654                 goto out;
655
656         cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
657                 ~props->clr_port_cap_mask;
658
659         err = mlx4_SET_PORT(to_mdev(ibdev), port,
660                             !!(mask & IB_PORT_RESET_QKEY_CNTR),
661                             cap_mask);
662
663 out:
664         mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
665         return err;
666 }
667
668 static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
669                                                   struct ib_udata *udata)
670 {
671         struct mlx4_ib_dev *dev = to_mdev(ibdev);
672         struct mlx4_ib_ucontext *context;
673         struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
674         struct mlx4_ib_alloc_ucontext_resp resp;
675         int err;
676
677         if (!dev->ib_active)
678                 return ERR_PTR(-EAGAIN);
679
680         if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
681                 resp_v3.qp_tab_size      = dev->dev->caps.num_qps;
682                 if (mlx4_wc_enabled()) {
683                         resp_v3.bf_reg_size      = dev->dev->caps.bf_reg_size;
684                         resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
685                 } else {
686                         resp_v3.bf_reg_size      = 0;
687                         resp_v3.bf_regs_per_page = 0;
688                 }
689         } else {
690                 resp.dev_caps         = dev->dev->caps.userspace_caps;
691                 resp.qp_tab_size      = dev->dev->caps.num_qps;
692                 if (mlx4_wc_enabled()) {
693                         resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
694                         resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
695                 } else {
696                         resp.bf_reg_size      = 0;
697                         resp.bf_regs_per_page = 0;
698                 }
699                 resp.cqe_size         = dev->dev->caps.cqe_size;
700         }
701
702         context = kmalloc(sizeof *context, GFP_KERNEL);
703         if (!context)
704                 return ERR_PTR(-ENOMEM);
705
706         err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
707         if (err) {
708                 kfree(context);
709                 return ERR_PTR(err);
710         }
711
712         INIT_LIST_HEAD(&context->db_page_list);
713         mutex_init(&context->db_page_mutex);
714
715         if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
716                 err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
717         else
718                 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
719
720         if (err) {
721                 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
722                 kfree(context);
723                 return ERR_PTR(-EFAULT);
724         }
725
726         return &context->ibucontext;
727 }
728
729 static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
730 {
731         struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
732
733         mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
734         kfree(context);
735
736         return 0;
737 }
738
739 /* XXX FBSD has no support for get_unmapped_area function */
740 #if 0
741 static unsigned long mlx4_ib_get_unmapped_area(struct file *file,
742                         unsigned long addr,
743                         unsigned long len, unsigned long pgoff,
744                         unsigned long flags)
745 {
746         struct mm_struct *mm;
747         struct vm_area_struct *vma;
748         unsigned long start_addr;
749         unsigned long page_size_order;
750         unsigned long  command;
751
752         mm = current->mm;
753         if (addr)
754                 return current->mm->get_unmapped_area(file, addr, len,
755                                                 pgoff, flags);
756
757         /* Last 8 bits hold the  command others are data per that command */
758         command = pgoff & MLX4_IB_MMAP_CMD_MASK;
759         if (command != MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES)
760                 return current->mm->get_unmapped_area(file, addr, len,
761                                                 pgoff, flags);
762
763         page_size_order = pgoff >> MLX4_IB_MMAP_CMD_BITS;
764         /* code is based on the huge-pages get_unmapped_area code */
765         start_addr = mm->free_area_cache;
766
767         if (len <= mm->cached_hole_size)
768                 start_addr = TASK_UNMAPPED_BASE;
769
770
771 full_search:
772         addr = ALIGN(start_addr, 1 << page_size_order);
773
774         for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
775                 /* At this point:  (!vma || addr < vma->vm_end). */
776                 if (TASK_SIZE - len < addr) {
777                         /*
778                          * Start a new search - just in case we missed
779                          * some holes.
780                          */
781                         if (start_addr != TASK_UNMAPPED_BASE) {
782                                 start_addr = TASK_UNMAPPED_BASE;
783                                 goto full_search;
784                         }
785                         return -ENOMEM;
786                 }
787
788                 if (!vma || addr + len <= vma->vm_start)
789                         return addr;
790                 addr = ALIGN(vma->vm_end, 1 << page_size_order);
791         }
792 }
793 #endif
794
795 static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
796 {
797         struct mlx4_ib_dev *dev = to_mdev(context->device);
798
799         /* Last 8 bits hold the  command others are data per that command */
800         unsigned long  command = vma->vm_pgoff & MLX4_IB_MMAP_CMD_MASK;
801
802         if (command < MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) {
803                 /* compatibility handling for commands 0 & 1*/
804                 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
805                         return -EINVAL;
806         }
807         if (command == MLX4_IB_MMAP_UAR_PAGE) {
808                 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
809
810                 if (io_remap_pfn_range(vma, vma->vm_start,
811                                        to_mucontext(context)->uar.pfn,
812                                        PAGE_SIZE, vma->vm_page_prot))
813                         return -EAGAIN;
814         } else if (command == MLX4_IB_MMAP_BLUE_FLAME_PAGE &&
815                         dev->dev->caps.bf_reg_size != 0) {
816                 vma->vm_page_prot = pgprot_wc(vma->vm_page_prot);
817
818                 if (io_remap_pfn_range(vma, vma->vm_start,
819                                        to_mucontext(context)->uar.pfn +
820                                        dev->dev->caps.num_uars,
821                                        PAGE_SIZE, vma->vm_page_prot))
822                         return -EAGAIN;
823         } else if (command == MLX4_IB_MMAP_GET_HW_CLOCK) {
824                 struct mlx4_clock_params params;
825                 int ret;
826
827                 ret = mlx4_get_internal_clock_params(dev->dev, &params);
828                 if (ret)
829                         return ret;
830
831                 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
832
833                 if (io_remap_pfn_range(vma, vma->vm_start,
834                                        (pci_resource_start(dev->dev->pdev,
835                                        params.bar) + params.offset)
836                                        >> PAGE_SHIFT,
837                                        PAGE_SIZE, vma->vm_page_prot))
838                         return -EAGAIN;
839         } else
840                 return -EINVAL;
841
842         return 0;
843 }
844
845 static int mlx4_ib_ioctl(struct ib_ucontext *context, unsigned int cmd,
846                          unsigned long arg)
847 {
848         struct mlx4_ib_dev *dev = to_mdev(context->device);
849         int ret;
850         int offset;
851
852         switch (cmd) {
853         case MLX4_IOCHWCLOCKOFFSET: {
854                 struct mlx4_clock_params params;
855                 int ret;
856                 ret = mlx4_get_internal_clock_params(dev->dev, &params);
857                 if (!ret) {
858                         offset = params.offset % PAGE_SIZE;
859                         ret = put_user(offset,
860                                          (int *)arg);
861                         return sizeof(int);
862                 } else {
863                         return ret;
864                 }
865         }
866         default: {
867                 pr_err("mlx4_ib: invalid ioctl %u command with arg %lX\n",
868                        cmd, arg);
869                 return -ENOTTY;
870         }
871         }
872
873         return ret;
874 }
875
876 static int mlx4_ib_query_values(struct ib_device *device, int q_values,
877                                 struct ib_device_values *values)
878 {
879         struct mlx4_ib_dev *dev = to_mdev(device);
880         cycle_t cycles;
881
882         values->values_mask = 0;
883         if (q_values & IBV_VALUES_HW_CLOCK) {
884                 cycles = mlx4_read_clock(dev->dev);
885                 if (cycles < 0) {
886                         values->hwclock = cycles & CORE_CLOCK_MASK;
887                         values->values_mask |= IBV_VALUES_HW_CLOCK;
888                 }
889                 q_values &= ~IBV_VALUES_HW_CLOCK;
890         }
891
892         if (q_values)
893                 return -ENOTTY;
894
895         return 0;
896 }
897
898 static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
899                                       struct ib_ucontext *context,
900                                       struct ib_udata *udata)
901 {
902         struct mlx4_ib_pd *pd;
903         int err;
904
905         pd = kmalloc(sizeof *pd, GFP_KERNEL);
906         if (!pd)
907                 return ERR_PTR(-ENOMEM);
908
909         err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
910         if (err) {
911                 kfree(pd);
912                 return ERR_PTR(err);
913         }
914
915         if (context)
916                 if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
917                         mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
918                         kfree(pd);
919                         return ERR_PTR(-EFAULT);
920                 }
921
922         return &pd->ibpd;
923 }
924
925 static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
926 {
927         mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
928         kfree(pd);
929
930         return 0;
931 }
932
933 static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
934                                           struct ib_ucontext *context,
935                                           struct ib_udata *udata)
936 {
937         struct mlx4_ib_xrcd *xrcd;
938         int err;
939
940         if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
941                 return ERR_PTR(-ENOSYS);
942
943         xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
944         if (!xrcd)
945                 return ERR_PTR(-ENOMEM);
946
947         err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
948         if (err)
949                 goto err1;
950
951         xrcd->pd = ib_alloc_pd(ibdev);
952         if (IS_ERR(xrcd->pd)) {
953                 err = PTR_ERR(xrcd->pd);
954                 goto err2;
955         }
956
957         xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0);
958         if (IS_ERR(xrcd->cq)) {
959                 err = PTR_ERR(xrcd->cq);
960                 goto err3;
961         }
962
963         return &xrcd->ibxrcd;
964
965 err3:
966         ib_dealloc_pd(xrcd->pd);
967 err2:
968         mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
969 err1:
970         kfree(xrcd);
971         return ERR_PTR(err);
972 }
973
974 static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
975 {
976         ib_destroy_cq(to_mxrcd(xrcd)->cq);
977         ib_dealloc_pd(to_mxrcd(xrcd)->pd);
978         mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
979         kfree(xrcd);
980
981         return 0;
982 }
983
984 static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
985 {
986         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
987         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
988         struct mlx4_ib_gid_entry *ge;
989
990         ge = kzalloc(sizeof *ge, GFP_KERNEL);
991         if (!ge)
992                 return -ENOMEM;
993
994         ge->gid = *gid;
995         if (mlx4_ib_add_mc(mdev, mqp, gid)) {
996                 ge->port = mqp->port;
997                 ge->added = 1;
998         }
999
1000         mutex_lock(&mqp->mutex);
1001         list_add_tail(&ge->list, &mqp->gid_list);
1002         mutex_unlock(&mqp->mutex);
1003
1004         return 0;
1005 }
1006
1007 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
1008                    union ib_gid *gid)
1009 {
1010         u8 mac[6];
1011         struct net_device *ndev;
1012         int ret = 0;
1013
1014         if (!mqp->port)
1015                 return 0;
1016
1017         spin_lock(&mdev->iboe.lock);
1018         ndev = mdev->iboe.netdevs[mqp->port - 1];
1019         if (ndev)
1020                 dev_hold(ndev);
1021         spin_unlock(&mdev->iboe.lock);
1022
1023         if (ndev) {
1024                 rdma_get_mcast_mac((struct in6_addr *)gid, mac);
1025                 rtnl_lock();
1026                 dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac, 6, 0);
1027                 ret = 1;
1028                 rtnl_unlock();
1029                 dev_put(ndev);
1030         }
1031
1032         return ret;
1033 }
1034
1035 struct mlx4_ib_steering {
1036         struct list_head list;
1037         u64 reg_id;
1038         union ib_gid gid;
1039 };
1040
1041 static int parse_flow_attr(struct mlx4_dev *dev,
1042                            union ib_flow_spec *ib_spec,
1043                            struct _rule_hw *mlx4_spec)
1044 {
1045         enum mlx4_net_trans_rule_id type;
1046
1047         switch (ib_spec->type) {
1048         case IB_FLOW_SPEC_ETH:
1049                 type = MLX4_NET_TRANS_RULE_ID_ETH;
1050                 memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
1051                        ETH_ALEN);
1052                 memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
1053                        ETH_ALEN);
1054                 mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
1055                 mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
1056                 break;
1057
1058         case IB_FLOW_SPEC_IB:
1059                 type = MLX4_NET_TRANS_RULE_ID_IB;
1060                 mlx4_spec->ib.l3_qpn = ib_spec->ib.val.l3_type_qpn;
1061                 mlx4_spec->ib.qpn_mask = ib_spec->ib.mask.l3_type_qpn;
1062                 memcpy(&mlx4_spec->ib.dst_gid, ib_spec->ib.val.dst_gid, 16);
1063                 memcpy(&mlx4_spec->ib.dst_gid_msk,
1064                        ib_spec->ib.mask.dst_gid, 16);
1065                 break;
1066
1067         case IB_FLOW_SPEC_IPV4:
1068                 type = MLX4_NET_TRANS_RULE_ID_IPV4;
1069                 mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
1070                 mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
1071                 mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
1072                 mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
1073                 break;
1074
1075         case IB_FLOW_SPEC_TCP:
1076         case IB_FLOW_SPEC_UDP:
1077                 type = ib_spec->type == IB_FLOW_SPEC_TCP ?
1078                                         MLX4_NET_TRANS_RULE_ID_TCP :
1079                                         MLX4_NET_TRANS_RULE_ID_UDP;
1080                 mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
1081                 mlx4_spec->tcp_udp.dst_port_msk =
1082                         ib_spec->tcp_udp.mask.dst_port;
1083                 mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
1084                 mlx4_spec->tcp_udp.src_port_msk =
1085                         ib_spec->tcp_udp.mask.src_port;
1086                 break;
1087
1088         default:
1089                 return -EINVAL;
1090         }
1091         if (map_sw_to_hw_steering_id(dev, type) < 0 ||
1092             hw_rule_sz(dev, type) < 0)
1093                 return -EINVAL;
1094         mlx4_spec->id = cpu_to_be16(map_sw_to_hw_steering_id(dev, type));
1095         mlx4_spec->size = hw_rule_sz(dev, type) >> 2;
1096         return hw_rule_sz(dev, type);
1097 }
1098
1099 static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
1100                           int domain,
1101                           enum mlx4_net_trans_promisc_mode flow_type,
1102                           u64 *reg_id)
1103 {
1104         int ret, i;
1105         int size = 0;
1106         void *ib_flow;
1107         struct mlx4_ib_dev *mdev = to_mdev(qp->device);
1108         struct mlx4_cmd_mailbox *mailbox;
1109         struct mlx4_net_trans_rule_hw_ctrl *ctrl;
1110         size_t rule_size = sizeof(struct mlx4_net_trans_rule_hw_ctrl) +
1111                            (sizeof(struct _rule_hw) * flow_attr->num_of_specs);
1112
1113         static const u16 __mlx4_domain[] = {
1114                 [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
1115                 [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
1116                 [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
1117                 [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
1118         };
1119
1120         if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
1121                 pr_err("Invalid priority value.\n");
1122                 return -EINVAL;
1123         }
1124         if (domain >= IB_FLOW_DOMAIN_NUM) {
1125                 pr_err("Invalid domain value.\n");
1126                 return -EINVAL;
1127         }
1128         if (map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
1129                 return -EINVAL;
1130
1131         mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
1132         if (IS_ERR(mailbox))
1133                 return PTR_ERR(mailbox);
1134         memset(mailbox->buf, 0, rule_size);
1135         ctrl = mailbox->buf;
1136
1137         ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
1138                                  flow_attr->priority);
1139         ctrl->type = map_sw_to_hw_steering_mode(mdev->dev, flow_type);
1140         ctrl->port = flow_attr->port;
1141         ctrl->qpn = cpu_to_be32(qp->qp_num);
1142
1143         if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK)
1144                 ctrl->flags = (1 << 3);
1145
1146         ib_flow = flow_attr + 1;
1147         size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
1148         for (i = 0; i < flow_attr->num_of_specs; i++) {
1149                 ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size);
1150                 if (ret < 0) {
1151                         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1152                         return -EINVAL;
1153                 }
1154                 ib_flow += ((union ib_flow_spec *)ib_flow)->size;
1155                 size += ret;
1156         }
1157
1158         ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
1159                            MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
1160                            MLX4_CMD_NATIVE);
1161         if (ret == -ENOMEM)
1162                 pr_err("mcg table is full. Fail to register network rule.\n");
1163         else if (ret == -ENXIO)
1164                 pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
1165         else if (ret)
1166                 pr_err("Invalid argumant. Fail to register network rule.\n");
1167         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1168         return ret;
1169 }
1170
1171 static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
1172 {
1173         int err;
1174         err = mlx4_cmd(dev, reg_id, 0, 0,
1175                        MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
1176                        MLX4_CMD_NATIVE);
1177         if (err)
1178                 pr_err("Fail to detach network rule. registration id = 0x%llx\n",
1179                        (unsigned long long)reg_id);
1180         return err;
1181 }
1182
1183 static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
1184                                     struct ib_flow_attr *flow_attr,
1185                                     int domain)
1186 {
1187         int err = 0, i = 0;
1188         struct mlx4_ib_flow *mflow;
1189         enum mlx4_net_trans_promisc_mode type[2];
1190
1191         memset(type, 0, sizeof(type));
1192
1193         mflow = kzalloc(sizeof(struct mlx4_ib_flow), GFP_KERNEL);
1194         if (!mflow) {
1195                 err = -ENOMEM;
1196                 goto err_free;
1197         }
1198
1199         switch (flow_attr->type) {
1200         case IB_FLOW_ATTR_NORMAL:
1201                 type[0] = MLX4_FS_REGULAR;
1202                 break;
1203
1204         case IB_FLOW_ATTR_ALL_DEFAULT:
1205                 type[0] = MLX4_FS_ALL_DEFAULT;
1206                 break;
1207
1208         case IB_FLOW_ATTR_MC_DEFAULT:
1209                 type[0] = MLX4_FS_MC_DEFAULT;
1210                 break;
1211
1212         case IB_FLOW_ATTR_SNIFFER:
1213                 type[0] = MLX4_FS_UC_SNIFFER;
1214                 type[1] = MLX4_FS_MC_SNIFFER;
1215                 break;
1216
1217         default:
1218                 err = -EINVAL;
1219                 goto err_free;
1220         }
1221
1222         while (i < ARRAY_SIZE(type) && type[i]) {
1223                 err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
1224                                             &mflow->reg_id[i]);
1225                 if (err)
1226                         goto err_free;
1227                 i++;
1228         }
1229
1230         return &mflow->ibflow;
1231
1232 err_free:
1233         kfree(mflow);
1234         return ERR_PTR(err);
1235 }
1236
1237 static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
1238 {
1239         int err, ret = 0;
1240         int i = 0;
1241         struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
1242         struct mlx4_ib_flow *mflow = to_mflow(flow_id);
1243
1244         while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) {
1245                 err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]);
1246                 if (err)
1247                         ret = err;
1248                 i++;
1249         }
1250
1251         kfree(mflow);
1252         return ret;
1253 }
1254
1255 static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
1256 {
1257         struct mlx4_ib_gid_entry *ge;
1258         struct mlx4_ib_gid_entry *tmp;
1259         struct mlx4_ib_gid_entry *ret = NULL;
1260
1261         list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
1262                 if (!memcmp(raw, ge->gid.raw, 16)) {
1263                         ret = ge;
1264                         break;
1265                 }
1266         }
1267
1268         return ret;
1269 }
1270
1271
1272 static int del_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
1273 {
1274         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1275         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1276         struct mlx4_ib_gid_entry *ge;
1277         struct net_device *ndev;
1278         u8 mac[6];
1279
1280         mutex_lock(&mqp->mutex);
1281         ge = find_gid_entry(mqp, gid->raw);
1282         if (ge) {
1283                 spin_lock(&mdev->iboe.lock);
1284                 ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
1285                 if (ndev)
1286                         dev_hold(ndev);
1287                 spin_unlock(&mdev->iboe.lock);
1288                 rdma_get_mcast_mac((struct in6_addr *)gid, mac);
1289                 if (ndev) {
1290                         rtnl_lock();
1291                         dev_mc_delete(mdev->iboe.netdevs[ge->port - 1], mac, 6, 0);
1292                         rtnl_unlock();
1293                         dev_put(ndev);
1294                 }
1295                 list_del(&ge->list);
1296                 kfree(ge);
1297         } else
1298                 pr_warn("could not find mgid entry\n");
1299
1300         mutex_unlock(&mqp->mutex);
1301         return ge != NULL ? 0 : -EINVAL;
1302 }
1303
1304 static int _mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid,
1305                                int count)
1306 {
1307         int err;
1308         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1309         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1310         u64 reg_id = 0;
1311         int record_err = 0;
1312
1313         if (mdev->dev->caps.steering_mode ==
1314             MLX4_STEERING_MODE_DEVICE_MANAGED) {
1315                 struct mlx4_ib_steering *ib_steering;
1316                 struct mlx4_ib_steering *tmp;
1317                 LIST_HEAD(temp);
1318
1319                 mutex_lock(&mqp->mutex);
1320                 list_for_each_entry_safe(ib_steering, tmp, &mqp->steering_rules,
1321                                          list) {
1322                         if (memcmp(ib_steering->gid.raw, gid->raw, 16))
1323                                 continue;
1324
1325                         if (--count < 0)
1326                                 break;
1327
1328                         list_del(&ib_steering->list);
1329                         list_add(&ib_steering->list, &temp);
1330                 }
1331                 mutex_unlock(&mqp->mutex);
1332                 list_for_each_entry_safe(ib_steering, tmp, &temp,
1333                                          list) {
1334                         reg_id = ib_steering->reg_id;
1335
1336                         err = mlx4_multicast_detach(mdev->dev, &mqp->mqp,
1337                                         gid->raw,
1338                                         (ibqp->qp_type == IB_QPT_RAW_PACKET) ?
1339                                         MLX4_PROT_ETH : MLX4_PROT_IB_IPV6,
1340                                         reg_id);
1341                         if (err) {
1342                                 record_err = record_err ?: err;
1343                                 continue;
1344                         }
1345
1346                         err = del_gid_entry(ibqp, gid);
1347                         if (err) {
1348                                 record_err = record_err ?: err;
1349                                 continue;
1350                         }
1351
1352                         list_del(&ib_steering->list);
1353                         kfree(ib_steering);
1354                 }
1355                 mutex_lock(&mqp->mutex);
1356                 list_for_each_entry(ib_steering, &temp, list) {
1357                         list_add(&ib_steering->list, &mqp->steering_rules);
1358                 }
1359                 mutex_unlock(&mqp->mutex);
1360                 if (count) {
1361                         pr_warn("Couldn't release all reg_ids for mgid. Steering rule is left attached\n");
1362                         return -EINVAL;
1363                 }
1364
1365         } else {
1366                 if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_B0 &&
1367                     ibqp->qp_type == IB_QPT_RAW_PACKET)
1368                         gid->raw[5] = mqp->port;
1369
1370                 err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1371                                 (ibqp->qp_type == IB_QPT_RAW_PACKET) ?
1372                                 MLX4_PROT_ETH : MLX4_PROT_IB_IPV6,
1373                                 reg_id);
1374                 if (err)
1375                         return err;
1376
1377                 err = del_gid_entry(ibqp, gid);
1378
1379                 if (err)
1380                         return err;
1381         }
1382
1383         return record_err;
1384 }
1385
1386 static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1387 {
1388         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1389         int count = (mdev->dev->caps.steering_mode ==
1390                      MLX4_STEERING_MODE_DEVICE_MANAGED) ?
1391                     mdev->dev->caps.num_ports : 1;
1392
1393         return _mlx4_ib_mcg_detach(ibqp, gid, lid, count);
1394 }
1395
1396 static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1397 {
1398         int err = -ENODEV;
1399         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1400         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1401         DECLARE_BITMAP(ports, MLX4_MAX_PORTS);
1402         int i = 0;
1403
1404         if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_B0 &&
1405             ibqp->qp_type == IB_QPT_RAW_PACKET)
1406                 gid->raw[5] = mqp->port;
1407
1408         if (mdev->dev->caps.steering_mode ==
1409             MLX4_STEERING_MODE_DEVICE_MANAGED) {
1410                 bitmap_fill(ports, mdev->dev->caps.num_ports);
1411         } else {
1412                 if (mqp->port <= mdev->dev->caps.num_ports) {
1413                         bitmap_zero(ports, mdev->dev->caps.num_ports);
1414                         set_bit(0, ports);
1415                 } else {
1416                         return -EINVAL;
1417                 }
1418         }
1419
1420         for (; i < mdev->dev->caps.num_ports; i++) {
1421                 u64 reg_id;
1422                 struct mlx4_ib_steering *ib_steering = NULL;
1423                 if (!test_bit(i, ports))
1424                         continue;
1425                 if (mdev->dev->caps.steering_mode ==
1426                     MLX4_STEERING_MODE_DEVICE_MANAGED) {
1427                         ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
1428                         if (!ib_steering)
1429                                 goto err_add;
1430                 }
1431
1432                 err = mlx4_multicast_attach(mdev->dev, &mqp->mqp,
1433                         gid->raw, i + 1,
1434                         !!(mqp->flags &
1435                                 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
1436                         (ibqp->qp_type == IB_QPT_RAW_PACKET) ?
1437                         MLX4_PROT_ETH : MLX4_PROT_IB_IPV6,
1438                         &reg_id);
1439                 if (err) {
1440                         kfree(ib_steering);
1441                         goto err_add;
1442                 }
1443
1444                 err = add_gid_entry(ibqp, gid);
1445                 if (err) {
1446                         mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1447                                               MLX4_PROT_IB_IPV6, reg_id);
1448                         kfree(ib_steering);
1449                         goto err_add;
1450                 }
1451
1452                 if (ib_steering) {
1453                         memcpy(ib_steering->gid.raw, gid->raw, 16);
1454                         mutex_lock(&mqp->mutex);
1455                         list_add(&ib_steering->list, &mqp->steering_rules);
1456                         mutex_unlock(&mqp->mutex);
1457                         ib_steering->reg_id = reg_id;
1458                 }
1459         }
1460
1461
1462         return 0;
1463
1464 err_add:
1465         if (i > 0)
1466                 _mlx4_ib_mcg_detach(ibqp, gid, lid, i);
1467
1468         return err;
1469 }
1470
1471 static int init_node_data(struct mlx4_ib_dev *dev)
1472 {
1473         struct ib_smp *in_mad  = NULL;
1474         struct ib_smp *out_mad = NULL;
1475         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
1476         int err = -ENOMEM;
1477
1478         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
1479         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
1480         if (!in_mad || !out_mad)
1481                 goto out;
1482
1483         init_query_mad(in_mad);
1484         in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
1485         if (mlx4_is_master(dev->dev))
1486                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
1487
1488         err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
1489         if (err)
1490                 goto out;
1491
1492         memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
1493
1494         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
1495
1496         err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
1497         if (err)
1498                 goto out;
1499
1500         dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
1501         memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
1502
1503 out:
1504         kfree(in_mad);
1505         kfree(out_mad);
1506         return err;
1507 }
1508
1509 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1510                         char *buf)
1511 {
1512         struct mlx4_ib_dev *dev =
1513                 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
1514         return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
1515 }
1516
1517 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1518                            char *buf)
1519 {
1520         struct mlx4_ib_dev *dev =
1521                 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
1522         return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32),
1523                        (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
1524                        (int) dev->dev->caps.fw_ver & 0xffff);
1525 }
1526
1527 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1528                         char *buf)
1529 {
1530         struct mlx4_ib_dev *dev =
1531                 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
1532         return sprintf(buf, "%x\n", dev->dev->rev_id);
1533 }
1534
1535 static ssize_t show_board(struct device *device, struct device_attribute *attr,
1536                           char *buf)
1537 {
1538         struct mlx4_ib_dev *dev =
1539                 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
1540         return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
1541                        dev->dev->board_id);
1542 }
1543
1544 static ssize_t show_vsd(struct device *device, struct device_attribute *attr,
1545                           char *buf)
1546 {
1547         struct mlx4_ib_dev *dev =
1548                 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
1549         ssize_t len = MLX4_VSD_LEN;
1550
1551         if (dev->dev->vsd_vendor_id == PCI_VENDOR_ID_MELLANOX)
1552                 len = sprintf(buf, "%.*s\n", MLX4_VSD_LEN, dev->dev->vsd);
1553         else
1554                 memcpy(buf, dev->dev->vsd, MLX4_VSD_LEN);
1555
1556         return len;
1557 }
1558
1559 static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
1560 static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
1561 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
1562 static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
1563 static DEVICE_ATTR(vsd,      S_IRUGO, show_vsd,    NULL);
1564
1565 static struct device_attribute *mlx4_class_attributes[] = {
1566         &dev_attr_hw_rev,
1567         &dev_attr_fw_ver,
1568         &dev_attr_hca_type,
1569         &dev_attr_board_id,
1570         &dev_attr_vsd
1571 };
1572
1573 static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev, u8 port)
1574 {
1575         memcpy(eui, IF_LLADDR(dev), 3);
1576         memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
1577         if (vlan_id < 0x1000) {
1578                 eui[3] = vlan_id >> 8;
1579                 eui[4] = vlan_id & 0xff;
1580         } else {
1581                 eui[3] = 0xff;
1582                 eui[4] = 0xfe;
1583         }
1584         eui[0] ^= 2;
1585 }
1586
1587 static void update_gids_task(struct work_struct *work)
1588 {
1589         struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
1590         struct mlx4_cmd_mailbox *mailbox;
1591         union ib_gid *gids;
1592         int err;
1593         struct mlx4_dev *dev = gw->dev->dev;
1594
1595
1596         mailbox = mlx4_alloc_cmd_mailbox(dev);
1597         if (IS_ERR(mailbox)) {
1598                 pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox));
1599                 goto free;
1600         }
1601
1602         gids = mailbox->buf;
1603         memcpy(gids, gw->gids, sizeof gw->gids);
1604
1605         if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) ==
1606                                         IB_LINK_LAYER_ETHERNET) {
1607                 err = mlx4_cmd(dev, mailbox->dma,
1608                                MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
1609                                1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
1610                                MLX4_CMD_WRAPPED);
1611
1612                 if (err)
1613                         pr_warn("set port command failed\n");
1614                 else
1615                         mlx4_ib_dispatch_event(gw->dev, gw->port,
1616                                                IB_EVENT_GID_CHANGE);
1617         }
1618
1619         mlx4_free_cmd_mailbox(dev, mailbox);
1620 free:
1621         kfree(gw);
1622 }
1623
1624 static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
1625 {
1626         struct mlx4_ib_dev *ibdev = to_mdev(device);
1627         return mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
1628 }
1629
1630 static void reset_gids_task(struct work_struct *work)
1631 {
1632         struct update_gid_work *gw =
1633                         container_of(work, struct update_gid_work, work);
1634         struct mlx4_cmd_mailbox *mailbox;
1635         union ib_gid *gids;
1636         int err;
1637         struct mlx4_dev *dev = gw->dev->dev;
1638
1639         mailbox = mlx4_alloc_cmd_mailbox(dev);
1640         if (IS_ERR(mailbox)) {
1641                 pr_warn("reset gid table failed\n");
1642                 goto free;
1643         }
1644
1645         gids = mailbox->buf;
1646         memcpy(gids, gw->gids, sizeof(gw->gids));
1647
1648         if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, 1) ==
1649                                         IB_LINK_LAYER_ETHERNET &&
1650                                         dev->caps.num_ports > 0) {
1651                 err = mlx4_cmd(dev, mailbox->dma,
1652                                MLX4_SET_PORT_GID_TABLE << 8 | 1,
1653                                1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
1654                                MLX4_CMD_WRAPPED);
1655                 if (err)
1656                         pr_warn("set port 1 command failed\n");
1657         }
1658
1659         if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, 2) ==
1660                                         IB_LINK_LAYER_ETHERNET &&
1661                                         dev->caps.num_ports > 1) {
1662                 err = mlx4_cmd(dev, mailbox->dma,
1663                                MLX4_SET_PORT_GID_TABLE << 8 | 2,
1664                                1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
1665                                MLX4_CMD_WRAPPED);
1666                 if (err)
1667                         pr_warn("set port 2 command failed\n");
1668         }
1669
1670         mlx4_free_cmd_mailbox(dev, mailbox);
1671 free:
1672         kfree(gw);
1673 }
1674
1675 static int update_gid_table(struct mlx4_ib_dev *dev, int port,
1676                 union ib_gid *gid, int clear, int default_gid)
1677 {
1678         struct update_gid_work *work;
1679         int i;
1680         int need_update = 0;
1681         int free = -1;
1682         int found = -1;
1683         int max_gids;
1684         int start_index = !default_gid;
1685
1686         max_gids = dev->dev->caps.gid_table_len[port];
1687         for (i = start_index; i < max_gids; ++i) {
1688                 if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid,
1689                     sizeof(*gid)))
1690                         found = i;
1691
1692                 if (clear) {
1693                         if (found >= 0) {
1694                                 need_update = 1;
1695                                 dev->iboe.gid_table[port - 1][found] = zgid;
1696                                 break;
1697                         }
1698                 } else {
1699                         if (found >= 0)
1700                                 break;
1701
1702                         if (free < 0 &&
1703                             !memcmp(&dev->iboe.gid_table[port - 1][i],
1704                                     &zgid, sizeof(*gid)))
1705                                 free = i;
1706                 }
1707         }
1708
1709         if (found == -1 && !clear && free < 0) {
1710                 pr_err("GID table of port %d is full. Can't add "GID_PRINT_FMT"\n",
1711                        port, GID_PRINT_ARGS(gid));
1712                 return -ENOMEM;
1713         }
1714         if (found == -1 && clear) {
1715                 pr_err(GID_PRINT_FMT" is not in GID table of port %d\n", GID_PRINT_ARGS(gid), port);
1716                 return -EINVAL;
1717         }
1718         if (found == -1 && !clear && free >= 0) {
1719                 dev->iboe.gid_table[port - 1][free] = *gid;
1720                 need_update = 1;
1721         }
1722
1723         if (!need_update)
1724                 return 0;
1725
1726         work = kzalloc(sizeof *work, GFP_ATOMIC);
1727         if (!work)
1728                 return -ENOMEM;
1729
1730         memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids));
1731         INIT_WORK(&work->work, update_gids_task);
1732         work->port = port;
1733         work->dev = dev;
1734         queue_work(wq, &work->work);
1735
1736         return 0;
1737 }
1738
1739 static int reset_gid_table(struct mlx4_ib_dev *dev)
1740 {
1741         struct update_gid_work *work;
1742
1743
1744         work = kzalloc(sizeof(*work), GFP_ATOMIC);
1745         if (!work)
1746                 return -ENOMEM;
1747
1748         memset(dev->iboe.gid_table, 0, sizeof(dev->iboe.gid_table));
1749         memset(work->gids, 0, sizeof(work->gids));
1750         INIT_WORK(&work->work, reset_gids_task);
1751         work->dev = dev;
1752         queue_work(wq, &work->work);
1753         return 0;
1754 }
1755
1756 /* XXX BOND Related - stub (no support for these flags in FBSD)*/
1757 static inline int netif_is_bond_master(struct net_device *dev)
1758 {
1759 #if 0
1760         return (dev->flags & IFF_MASTER) && (dev->priv_flags & IFF_BONDING);
1761 #endif
1762         return 0;
1763 }
1764
1765 static void mlx4_make_default_gid(struct  net_device *dev, union ib_gid *gid, u8 port)
1766 {
1767         gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
1768         mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev, port);
1769 }
1770
1771 static u8 mlx4_ib_get_dev_port(struct net_device *dev, struct mlx4_ib_dev *ibdev)
1772 {
1773         u8 port = 0;
1774         struct mlx4_ib_iboe *iboe;
1775         struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ?
1776                                 rdma_vlan_dev_real_dev(dev) : dev;
1777
1778         iboe = &ibdev->iboe;
1779
1780         for (port = 1; port <= MLX4_MAX_PORTS; ++port)
1781                 if ((netif_is_bond_master(real_dev) && (real_dev == iboe->masters[port - 1])) ||
1782                     (!netif_is_bond_master(real_dev) && (real_dev == iboe->netdevs[port - 1])))
1783                         break;
1784
1785         return port > MLX4_MAX_PORTS ? 0 : port;
1786 }
1787
1788 static void mlx4_ib_get_dev_addr(struct net_device *dev, struct mlx4_ib_dev *ibdev, u8 port)
1789 {
1790         struct ifaddr *ifa;
1791 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1792         struct inet6_dev *in6_dev;
1793         union ib_gid  *pgid;
1794         struct inet6_ifaddr *ifp;
1795 #endif
1796         union ib_gid gid;
1797
1798
1799         if ((port == 0) || (port > MLX4_MAX_PORTS))
1800                 return;
1801
1802         /* IPv4 gids */
1803         TAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) {
1804                 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET){
1805                         ipv6_addr_set_v4mapped(
1806                                 ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr,
1807                                 (struct in6_addr *)&gid);
1808                         update_gid_table(ibdev, port, &gid, 0, 0);
1809                 }
1810
1811         }
1812 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1813         /* IPv6 gids */
1814         in6_dev = in6_dev_get(dev);
1815         if (in6_dev) {
1816                 read_lock_bh(&in6_dev->lock);
1817                 list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
1818                         pgid = (union ib_gid *)&ifp->addr;
1819                                 update_gid_table(ibdev, port, pgid, 0, 0);
1820                         }
1821                 read_unlock_bh(&in6_dev->lock);
1822                 in6_dev_put(in6_dev);
1823                 }
1824 #endif
1825 }
1826
1827 static void mlx4_set_default_gid(struct mlx4_ib_dev *ibdev,
1828                                  struct  net_device *dev, u8 port)
1829 {
1830         union ib_gid gid;
1831         mlx4_make_default_gid(dev, &gid, port);
1832         update_gid_table(ibdev, port, &gid, 0, 1);
1833 }
1834
1835 static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
1836 {
1837         struct  net_device *dev;
1838
1839         if (reset_gid_table(ibdev))
1840                 return -1;
1841
1842         IFNET_RLOCK_NOSLEEP();
1843         TAILQ_FOREACH(dev, &V_ifnet, if_link) {
1844                 u8 port = mlx4_ib_get_dev_port(dev, ibdev);
1845                 if (port) {
1846                         if (!rdma_vlan_dev_real_dev(dev) &&
1847                             !netif_is_bond_master(dev))
1848                                 mlx4_set_default_gid(ibdev, dev, port);
1849                         mlx4_ib_get_dev_addr(dev, ibdev, port);
1850                 }
1851         }
1852
1853         IFNET_RUNLOCK_NOSLEEP();
1854
1855         return 0;
1856 }
1857
1858 static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
1859                                  struct net_device *dev, unsigned long event)
1860 {
1861         struct mlx4_ib_iboe *iboe;
1862         int port;
1863         int init = 0;
1864         unsigned long flags;
1865
1866         iboe = &ibdev->iboe;
1867
1868         spin_lock_irqsave(&iboe->lock, flags);
1869         mlx4_foreach_ib_transport_port(port, ibdev->dev) {
1870                 struct net_device *old_netdev = iboe->netdevs[port - 1];
1871 /* XXX BOND related */
1872 #if 0
1873                 struct net_device *old_master = iboe->masters[port - 1];
1874 #endif
1875                 iboe->masters[port - 1] = NULL;
1876                 iboe->netdevs[port - 1] =
1877                         mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
1878
1879
1880                 if (old_netdev != iboe->netdevs[port - 1])
1881                         init = 1;
1882                 if (dev == iboe->netdevs[port - 1] &&
1883                     event == NETDEV_CHANGEADDR)
1884                         init = 1;
1885 /* XXX BOND related */
1886 #if 0
1887                 if (iboe->netdevs[port - 1] && netif_is_bond_slave(iboe->netdevs[port - 1]))
1888                         iboe->masters[port - 1] = iboe->netdevs[port - 1]->master;
1889
1890                 /* if bonding is used it is possible that we add it to masters only after
1891                    IP address is assigned to the net bonding interface */
1892                 if (old_master != iboe->masters[port - 1])
1893                         init = 1;
1894 #endif
1895         }
1896
1897         spin_unlock_irqrestore(&iboe->lock, flags);
1898
1899         if (init)
1900                 if (mlx4_ib_init_gid_table(ibdev))
1901                         pr_warn("Fail to reset gid table\n");
1902 }
1903
1904 static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
1905                                 void *ptr)
1906 {
1907         struct net_device *dev = ptr;
1908         struct mlx4_ib_dev *ibdev;
1909
1910         ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
1911
1912         mlx4_ib_scan_netdevs(ibdev, dev, event);
1913
1914         return NOTIFY_DONE;
1915 }
1916
1917 /* This function initializes the gid table only if the event_netdev real device is an iboe
1918  * device, will be invoked by the inet/inet6 events */
1919 static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event,
1920                                 void *ptr)
1921 {
1922         struct net_device *event_netdev = ptr;
1923         struct mlx4_ib_dev *ibdev;
1924         struct mlx4_ib_iboe *ibdev_iboe;
1925         int port = 0;
1926
1927         ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet);
1928
1929         struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
1930                         rdma_vlan_dev_real_dev(event_netdev) :
1931                         event_netdev;
1932
1933         ibdev_iboe = &ibdev->iboe;
1934
1935         port = mlx4_ib_get_dev_port(real_dev, ibdev);
1936
1937         /* Perform init_gid_table if the event real_dev is the net_device which represents this port,
1938          * otherwise this event is not related and would be ignored.*/
1939         if(port && (real_dev == ibdev_iboe->netdevs[port - 1]))
1940                 if (mlx4_ib_init_gid_table(ibdev))
1941                         pr_warn("Fail to reset gid table\n");
1942
1943         return NOTIFY_DONE;
1944 }
1945
1946
1947 static void init_pkeys(struct mlx4_ib_dev *ibdev)
1948 {
1949         int port;
1950         int slave;
1951         int i;
1952
1953         if (mlx4_is_master(ibdev->dev)) {
1954                 for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
1955                         for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
1956                                 for (i = 0;
1957                                      i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
1958                                      ++i) {
1959                                         ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
1960                                         /* master has the identity virt2phys pkey mapping */
1961                                                 (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
1962                                                         ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
1963                                         mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
1964                                                              ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
1965                                 }
1966                         }
1967                 }
1968                 /* initialize pkey cache */
1969                 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
1970                         for (i = 0;
1971                              i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
1972                              ++i)
1973                                 ibdev->pkeys.phys_pkey_cache[port-1][i] =
1974                                         (i) ? 0 : 0xFFFF;
1975                 }
1976         }
1977 }
1978
1979 static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1980 {
1981         char name[32];
1982         int eq_per_port = 0;
1983         int added_eqs = 0;
1984         int total_eqs = 0;
1985         int i, j, eq;
1986
1987         /* Legacy mode or comp_pool is not large enough */
1988         if (dev->caps.comp_pool == 0 ||
1989             dev->caps.num_ports > dev->caps.comp_pool)
1990                 return;
1991
1992         eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/
1993                                         dev->caps.num_ports);
1994
1995         /* Init eq table */
1996         added_eqs = 0;
1997         mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
1998                 added_eqs += eq_per_port;
1999
2000         total_eqs = dev->caps.num_comp_vectors + added_eqs;
2001
2002         ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL);
2003         if (!ibdev->eq_table)
2004                 return;
2005
2006         ibdev->eq_added = added_eqs;
2007
2008         eq = 0;
2009         mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
2010                 for (j = 0; j < eq_per_port; j++) {
2011                         sprintf(name, "mlx4-ib-%d-%d@%d:%d:%d:%d", i, j,
2012                                                 pci_get_domain(dev->pdev->dev.bsddev),
2013                                                 pci_get_bus(dev->pdev->dev.bsddev),
2014                                                 PCI_SLOT(dev->pdev->devfn),
2015                                                 PCI_FUNC(dev->pdev->devfn));
2016
2017                         /* Set IRQ for specific name (per ring) */
2018                         if (mlx4_assign_eq(dev, name,
2019                                            &ibdev->eq_table[eq])) {
2020                                 /* Use legacy (same as mlx4_en driver) */
2021                                 pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
2022                                 ibdev->eq_table[eq] =
2023                                         (eq % dev->caps.num_comp_vectors);
2024                         }
2025                         eq++;
2026                 }
2027         }
2028
2029         /* Fill the reset of the vector with legacy EQ */
2030         for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++)
2031                 ibdev->eq_table[eq++] = i;
2032
2033         /* Advertise the new number of EQs to clients */
2034         ibdev->ib_dev.num_comp_vectors = total_eqs;
2035 }
2036
2037 static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
2038 {
2039         int i;
2040
2041         /* no additional eqs were added */
2042         if (!ibdev->eq_table)
2043                 return;
2044
2045         /* Reset the advertised EQ number */
2046         ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
2047
2048         /* Free only the added eqs */
2049         for (i = 0; i < ibdev->eq_added; i++) {
2050                 /* Don't free legacy eqs if used */
2051                 if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors)
2052                         continue;
2053                 mlx4_release_eq(dev, ibdev->eq_table[i]);
2054         }
2055
2056         kfree(ibdev->eq_table);
2057 }
2058
2059 /*
2060  * create show function and a device_attribute struct pointing to
2061  * the function for _name
2062  */
2063 #define DEVICE_DIAG_RPRT_ATTR(_name, _offset, _op_mod)          \
2064 static ssize_t show_rprt_##_name(struct device *dev,            \
2065                                  struct device_attribute *attr, \
2066                                  char *buf){                    \
2067         return show_diag_rprt(dev, buf, _offset, _op_mod);      \
2068 }                                                               \
2069 static DEVICE_ATTR(_name, S_IRUGO, show_rprt_##_name, NULL);
2070
2071 #define MLX4_DIAG_RPRT_CLEAR_DIAGS 3
2072
2073 static size_t show_diag_rprt(struct device *device, char *buf,
2074                              u32 offset, u8 op_modifier)
2075 {
2076         size_t ret;
2077         u32 counter_offset = offset;
2078         u32 diag_counter = 0;
2079         struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
2080                                                ib_dev.dev);
2081
2082         ret = mlx4_query_diag_counters(dev->dev, 1, op_modifier,
2083                                        &counter_offset, &diag_counter);
2084         if (ret)
2085                 return ret;
2086
2087         return sprintf(buf, "%d\n", diag_counter);
2088 }
2089
2090 static ssize_t clear_diag_counters(struct device *device,
2091                                    struct device_attribute *attr,
2092                                    const char *buf, size_t length)
2093 {
2094         size_t ret;
2095         struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
2096                                                ib_dev.dev);
2097
2098         ret = mlx4_query_diag_counters(dev->dev, 0, MLX4_DIAG_RPRT_CLEAR_DIAGS,
2099                                        NULL, NULL);
2100         if (ret)
2101                 return ret;
2102
2103         return length;
2104 }
2105
2106 DEVICE_DIAG_RPRT_ATTR(rq_num_lle        , 0x00, 2);
2107 DEVICE_DIAG_RPRT_ATTR(sq_num_lle        , 0x04, 2);
2108 DEVICE_DIAG_RPRT_ATTR(rq_num_lqpoe      , 0x08, 2);
2109 DEVICE_DIAG_RPRT_ATTR(sq_num_lqpoe      , 0x0C, 2);
2110 DEVICE_DIAG_RPRT_ATTR(rq_num_lpe        , 0x18, 2);
2111 DEVICE_DIAG_RPRT_ATTR(sq_num_lpe        , 0x1C, 2);
2112 DEVICE_DIAG_RPRT_ATTR(rq_num_wrfe       , 0x20, 2);
2113 DEVICE_DIAG_RPRT_ATTR(sq_num_wrfe       , 0x24, 2);
2114 DEVICE_DIAG_RPRT_ATTR(sq_num_mwbe       , 0x2C, 2);
2115 DEVICE_DIAG_RPRT_ATTR(sq_num_bre        , 0x34, 2);
2116 DEVICE_DIAG_RPRT_ATTR(rq_num_lae        , 0x38, 2);
2117 DEVICE_DIAG_RPRT_ATTR(sq_num_rire       , 0x44, 2);
2118 DEVICE_DIAG_RPRT_ATTR(rq_num_rire       , 0x48, 2);
2119 DEVICE_DIAG_RPRT_ATTR(sq_num_rae        , 0x4C, 2);
2120 DEVICE_DIAG_RPRT_ATTR(rq_num_rae        , 0x50, 2);
2121 DEVICE_DIAG_RPRT_ATTR(sq_num_roe        , 0x54, 2);
2122 DEVICE_DIAG_RPRT_ATTR(sq_num_tree       , 0x5C, 2);
2123 DEVICE_DIAG_RPRT_ATTR(sq_num_rree       , 0x64, 2);
2124 DEVICE_DIAG_RPRT_ATTR(rq_num_rnr        , 0x68, 2);
2125 DEVICE_DIAG_RPRT_ATTR(sq_num_rnr        , 0x6C, 2);
2126 DEVICE_DIAG_RPRT_ATTR(rq_num_oos        , 0x100, 2);
2127 DEVICE_DIAG_RPRT_ATTR(sq_num_oos        , 0x104, 2);
2128 DEVICE_DIAG_RPRT_ATTR(rq_num_mce        , 0x108, 2);
2129 DEVICE_DIAG_RPRT_ATTR(rq_num_udsdprd    , 0x118, 2);
2130 DEVICE_DIAG_RPRT_ATTR(rq_num_ucsdprd    , 0x120, 2);
2131 DEVICE_DIAG_RPRT_ATTR(num_cqovf         , 0x1A0, 2);
2132 DEVICE_DIAG_RPRT_ATTR(num_eqovf         , 0x1A4, 2);
2133 DEVICE_DIAG_RPRT_ATTR(num_baddb         , 0x1A8, 2);
2134
2135 static DEVICE_ATTR(clear_diag, S_IWUSR, NULL, clear_diag_counters);
2136
2137 static struct attribute *diag_rprt_attrs[] = {
2138         &dev_attr_rq_num_lle.attr,
2139         &dev_attr_sq_num_lle.attr,
2140         &dev_attr_rq_num_lqpoe.attr,
2141         &dev_attr_sq_num_lqpoe.attr,
2142         &dev_attr_rq_num_lpe.attr,
2143         &dev_attr_sq_num_lpe.attr,
2144         &dev_attr_rq_num_wrfe.attr,
2145         &dev_attr_sq_num_wrfe.attr,
2146         &dev_attr_sq_num_mwbe.attr,
2147         &dev_attr_sq_num_bre.attr,
2148         &dev_attr_rq_num_lae.attr,
2149         &dev_attr_sq_num_rire.attr,
2150         &dev_attr_rq_num_rire.attr,
2151         &dev_attr_sq_num_rae.attr,
2152         &dev_attr_rq_num_rae.attr,
2153         &dev_attr_sq_num_roe.attr,
2154         &dev_attr_sq_num_tree.attr,
2155         &dev_attr_sq_num_rree.attr,
2156         &dev_attr_rq_num_rnr.attr,
2157         &dev_attr_sq_num_rnr.attr,
2158         &dev_attr_rq_num_oos.attr,
2159         &dev_attr_sq_num_oos.attr,
2160         &dev_attr_rq_num_mce.attr,
2161         &dev_attr_rq_num_udsdprd.attr,
2162         &dev_attr_rq_num_ucsdprd.attr,
2163         &dev_attr_num_cqovf.attr,
2164         &dev_attr_num_eqovf.attr,
2165         &dev_attr_num_baddb.attr,
2166         &dev_attr_clear_diag.attr,
2167         NULL
2168 };
2169
2170 static struct attribute_group diag_counters_group = {
2171         .name  = "diag_counters",
2172         .attrs  = diag_rprt_attrs
2173 };
2174
2175 static void init_dev_assign(void)
2176 {
2177         int i = 1;
2178
2179         spin_lock_init(&dev_num_str_lock);
2180         if (mlx4_fill_dbdf2val_tbl(&dev_assign_str))
2181                 return;
2182         dev_num_str_bitmap =
2183                 kmalloc(BITS_TO_LONGS(MAX_NUM_STR_BITMAP) * sizeof(long),
2184                         GFP_KERNEL);
2185         if (!dev_num_str_bitmap) {
2186                 pr_warn("bitmap alloc failed -- cannot apply dev_assign_str parameter\n");
2187                 return;
2188         }
2189         bitmap_zero(dev_num_str_bitmap, MAX_NUM_STR_BITMAP);
2190         while ((i < MLX4_DEVS_TBL_SIZE) && (dev_assign_str.tbl[i].dbdf !=
2191                MLX4_ENDOF_TBL)) {
2192                 if (bitmap_allocate_region(dev_num_str_bitmap,
2193                                            dev_assign_str.tbl[i].val[0], 0))
2194                         goto err;
2195                 i++;
2196         }
2197         dr_active = 1;
2198         return;
2199
2200 err:
2201         kfree(dev_num_str_bitmap);
2202         dev_num_str_bitmap = NULL;
2203         pr_warn("mlx4_ib: The value of 'dev_assign_str' parameter "
2204                             "is incorrect. The parameter value is discarded!");
2205 }
2206
2207 static int mlx4_ib_dev_idx(struct mlx4_dev *dev)
2208 {
2209         int i, val;
2210
2211         if (!dr_active)
2212                 return -1;
2213         if (!dev)
2214                 return -1;
2215         if (mlx4_get_val(dev_assign_str.tbl, dev->pdev, 0, &val))
2216                 return -1;
2217
2218         if (val != DEFAULT_TBL_VAL) {
2219                 dev->flags |= MLX4_FLAG_DEV_NUM_STR;
2220                 return val;
2221         }
2222
2223         spin_lock(&dev_num_str_lock);
2224         i = bitmap_find_free_region(dev_num_str_bitmap, MAX_NUM_STR_BITMAP, 0);
2225         spin_unlock(&dev_num_str_lock);
2226         if (i >= 0)
2227                 return i;
2228
2229         return -1;
2230 }
2231
2232 static void *mlx4_ib_add(struct mlx4_dev *dev)
2233 {
2234         struct mlx4_ib_dev *ibdev;
2235         int num_ports = 0;
2236         int i, j;
2237         int err;
2238         struct mlx4_ib_iboe *iboe;
2239         int dev_idx;
2240
2241         pr_info_once("%s", mlx4_ib_version);
2242
2243         mlx4_foreach_ib_transport_port(i, dev)
2244                 num_ports++;
2245
2246         /* No point in registering a device with no ports... */
2247         if (num_ports == 0)
2248                 return NULL;
2249
2250         ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
2251         if (!ibdev) {
2252                 dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
2253                 return NULL;
2254         }
2255
2256         iboe = &ibdev->iboe;
2257
2258         if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
2259                 goto err_dealloc;
2260
2261         if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
2262                 goto err_pd;
2263
2264         ibdev->priv_uar.map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT,
2265                 PAGE_SIZE);
2266
2267         if (!ibdev->priv_uar.map)
2268                 goto err_uar;
2269
2270         MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
2271
2272         ibdev->dev = dev;
2273
2274         dev_idx = mlx4_ib_dev_idx(dev);
2275         if (dev_idx >= 0)
2276                 sprintf(ibdev->ib_dev.name, "mlx4_%d", dev_idx);
2277         else
2278                 strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
2279
2280         ibdev->ib_dev.owner             = THIS_MODULE;
2281         ibdev->ib_dev.node_type         = RDMA_NODE_IB_CA;
2282         ibdev->ib_dev.local_dma_lkey    = dev->caps.reserved_lkey;
2283         ibdev->num_ports                = num_ports;
2284         ibdev->ib_dev.phys_port_cnt     = ibdev->num_ports;
2285         ibdev->ib_dev.num_comp_vectors  = dev->caps.num_comp_vectors;
2286         ibdev->ib_dev.dma_device        = &dev->pdev->dev;
2287
2288         if (dev->caps.userspace_caps)
2289                 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
2290         else
2291                 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
2292
2293         ibdev->ib_dev.uverbs_cmd_mask   =
2294                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
2295                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
2296                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
2297                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
2298                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
2299                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
2300                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
2301                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2302                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
2303                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
2304                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
2305                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
2306                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
2307                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
2308                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
2309                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
2310                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
2311                 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
2312                 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
2313                 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
2314                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
2315                 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
2316                 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
2317
2318         ibdev->ib_dev.query_device      = mlx4_ib_query_device;
2319         ibdev->ib_dev.query_port        = mlx4_ib_query_port;
2320         ibdev->ib_dev.get_link_layer    = mlx4_ib_port_link_layer;
2321         ibdev->ib_dev.query_gid         = mlx4_ib_query_gid;
2322         ibdev->ib_dev.query_pkey        = mlx4_ib_query_pkey;
2323         ibdev->ib_dev.modify_device     = mlx4_ib_modify_device;
2324         ibdev->ib_dev.modify_port       = mlx4_ib_modify_port;
2325         ibdev->ib_dev.alloc_ucontext    = mlx4_ib_alloc_ucontext;
2326         ibdev->ib_dev.dealloc_ucontext  = mlx4_ib_dealloc_ucontext;
2327         ibdev->ib_dev.mmap              = mlx4_ib_mmap;
2328 /* XXX FBSD has no support for get_unmapped_area function */
2329 #if 0
2330         ibdev->ib_dev.get_unmapped_area = mlx4_ib_get_unmapped_area;
2331 #endif
2332         ibdev->ib_dev.alloc_pd          = mlx4_ib_alloc_pd;
2333         ibdev->ib_dev.dealloc_pd        = mlx4_ib_dealloc_pd;
2334         ibdev->ib_dev.create_ah         = mlx4_ib_create_ah;
2335         ibdev->ib_dev.query_ah          = mlx4_ib_query_ah;
2336         ibdev->ib_dev.destroy_ah        = mlx4_ib_destroy_ah;
2337         ibdev->ib_dev.create_srq        = mlx4_ib_create_srq;
2338         ibdev->ib_dev.modify_srq        = mlx4_ib_modify_srq;
2339         ibdev->ib_dev.query_srq         = mlx4_ib_query_srq;
2340         ibdev->ib_dev.destroy_srq       = mlx4_ib_destroy_srq;
2341         ibdev->ib_dev.post_srq_recv     = mlx4_ib_post_srq_recv;
2342         ibdev->ib_dev.create_qp         = mlx4_ib_create_qp;
2343         ibdev->ib_dev.modify_qp         = mlx4_ib_modify_qp;
2344         ibdev->ib_dev.query_qp          = mlx4_ib_query_qp;
2345         ibdev->ib_dev.destroy_qp        = mlx4_ib_destroy_qp;
2346         ibdev->ib_dev.post_send         = mlx4_ib_post_send;
2347         ibdev->ib_dev.post_recv         = mlx4_ib_post_recv;
2348         ibdev->ib_dev.create_cq         = mlx4_ib_create_cq;
2349         ibdev->ib_dev.modify_cq         = mlx4_ib_modify_cq;
2350         ibdev->ib_dev.resize_cq         = mlx4_ib_resize_cq;
2351         ibdev->ib_dev.destroy_cq        = mlx4_ib_destroy_cq;
2352         ibdev->ib_dev.poll_cq           = mlx4_ib_poll_cq;
2353         ibdev->ib_dev.req_notify_cq     = mlx4_ib_arm_cq;
2354         ibdev->ib_dev.get_dma_mr        = mlx4_ib_get_dma_mr;
2355         ibdev->ib_dev.reg_user_mr       = mlx4_ib_reg_user_mr;
2356         ibdev->ib_dev.dereg_mr          = mlx4_ib_dereg_mr;
2357         ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
2358         ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
2359         ibdev->ib_dev.free_fast_reg_page_list  = mlx4_ib_free_fast_reg_page_list;
2360         ibdev->ib_dev.attach_mcast      = mlx4_ib_mcg_attach;
2361         ibdev->ib_dev.detach_mcast      = mlx4_ib_mcg_detach;
2362         ibdev->ib_dev.process_mad       = mlx4_ib_process_mad;
2363         ibdev->ib_dev.get_netdev        = mlx4_ib_get_netdev;
2364         ibdev->ib_dev.ioctl             = mlx4_ib_ioctl;
2365         ibdev->ib_dev.query_values      = mlx4_ib_query_values;
2366
2367         if (!mlx4_is_slave(ibdev->dev)) {
2368                 ibdev->ib_dev.alloc_fmr         = mlx4_ib_fmr_alloc;
2369                 ibdev->ib_dev.map_phys_fmr      = mlx4_ib_map_phys_fmr;
2370                 ibdev->ib_dev.unmap_fmr         = mlx4_ib_unmap_fmr;
2371                 ibdev->ib_dev.dealloc_fmr       = mlx4_ib_fmr_dealloc;
2372         }
2373
2374         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW) {
2375                 ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
2376                 ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
2377                 ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
2378
2379                 ibdev->ib_dev.uverbs_cmd_mask |=
2380                         (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
2381                         (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
2382         }
2383
2384         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
2385                 ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
2386                 ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
2387                 ibdev->ib_dev.uverbs_cmd_mask |=
2388                         (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2389                         (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2390         }
2391
2392         /*
2393          * Set experimental data
2394          */
2395         ibdev->ib_dev.uverbs_exp_cmd_mask       =
2396                 (1ull << IB_USER_VERBS_EXP_CMD_CREATE_QP)       |
2397                 (1ull << IB_USER_VERBS_EXP_CMD_MODIFY_CQ)       |
2398                 (1ull << IB_USER_VERBS_EXP_CMD_QUERY_DEVICE)    |
2399                 (1ull << IB_USER_VERBS_EXP_CMD_CREATE_CQ);
2400         ibdev->ib_dev.exp_create_qp     = mlx4_ib_exp_create_qp;
2401         ibdev->ib_dev.exp_query_device  = mlx4_ib_exp_query_device;
2402         if (check_flow_steering_support(dev)) {
2403                 ibdev->ib_dev.uverbs_ex_cmd_mask        |=
2404                         (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
2405                         (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
2406                 ibdev->ib_dev.create_flow       = mlx4_ib_create_flow;
2407                 ibdev->ib_dev.destroy_flow      = mlx4_ib_destroy_flow;
2408         } else {
2409                 pr_debug("Device managed flow steering is unavailable for this configuration.\n");
2410         }
2411         /*
2412          * End of experimental data
2413          */
2414
2415         mlx4_ib_alloc_eqs(dev, ibdev);
2416
2417         spin_lock_init(&iboe->lock);
2418
2419         if (init_node_data(ibdev))
2420                 goto err_map;
2421
2422         for (i = 0; i < ibdev->num_ports; ++i) {
2423                 if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
2424                                                 IB_LINK_LAYER_ETHERNET) {
2425                         if (mlx4_is_slave(dev)) {
2426                                 ibdev->counters[i].status = mlx4_counter_alloc(ibdev->dev,
2427                                                                                i + 1,
2428                                                                                &ibdev->counters[i].counter_index);
2429                         } else {/* allocating the PF IB default counter indices reserved in mlx4_init_counters_table */
2430                                 ibdev->counters[i].counter_index = ((i + 1) << 1) - 1;
2431                                 ibdev->counters[i].status = 0;
2432                         }
2433
2434                         dev_info(&dev->pdev->dev,
2435                                  "%s: allocated counter index %d for port %d\n",
2436                                  __func__, ibdev->counters[i].counter_index, i+1);
2437                 } else {
2438                         ibdev->counters[i].counter_index = MLX4_SINK_COUNTER_INDEX;
2439                         ibdev->counters[i].status = -ENOSPC;
2440                 }
2441         }
2442
2443         spin_lock_init(&ibdev->sm_lock);
2444         mutex_init(&ibdev->cap_mask_mutex);
2445
2446         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
2447             !mlx4_is_mfunc(dev)) {
2448                 ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
2449                 err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
2450                                             MLX4_IB_UC_STEER_QPN_ALIGN, &ibdev->steer_qpn_base, 0);
2451                 if (err)
2452                         goto err_counter;
2453
2454                 ibdev->ib_uc_qpns_bitmap =
2455                         kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
2456                                 sizeof(long),
2457                                 GFP_KERNEL);
2458                 if (!ibdev->ib_uc_qpns_bitmap) {
2459                         dev_err(&dev->pdev->dev, "bit map alloc failed\n");
2460                         goto err_steer_qp_release;
2461                 }
2462
2463                 bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
2464
2465                 err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(dev, ibdev->steer_qpn_base,
2466                                 ibdev->steer_qpn_base + ibdev->steer_qpn_count - 1);
2467                 if (err)
2468                         goto err_steer_free_bitmap;
2469         }
2470
2471         if (ib_register_device(&ibdev->ib_dev, NULL))
2472                 goto err_steer_free_bitmap;
2473
2474         if (mlx4_ib_mad_init(ibdev))
2475                 goto err_reg;
2476
2477         if (mlx4_ib_init_sriov(ibdev))
2478                 goto err_mad;
2479
2480         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
2481                 if (!iboe->nb.notifier_call) {
2482                         iboe->nb.notifier_call = mlx4_ib_netdev_event;
2483                         err = register_netdevice_notifier(&iboe->nb);
2484                         if (err) {
2485                                 iboe->nb.notifier_call = NULL;
2486                                 goto err_notify;
2487                         }
2488                 }
2489                 if (!iboe->nb_inet.notifier_call) {
2490                         iboe->nb_inet.notifier_call = mlx4_ib_inet_event;
2491                         err = register_inetaddr_notifier(&iboe->nb_inet);
2492                         if (err) {
2493                                 iboe->nb_inet.notifier_call = NULL;
2494                                 goto err_notify;
2495                         }
2496                 }
2497                 mlx4_ib_scan_netdevs(ibdev, NULL, 0);
2498         }
2499         for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
2500                 if (device_create_file(&ibdev->ib_dev.dev,
2501                                        mlx4_class_attributes[j]))
2502                         goto err_notify;
2503         }
2504         if (sysfs_create_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group))
2505                 goto err_notify;
2506
2507         ibdev->ib_active = true;
2508
2509         if (mlx4_is_mfunc(ibdev->dev))
2510                 init_pkeys(ibdev);
2511
2512         /* create paravirt contexts for any VFs which are active */
2513         if (mlx4_is_master(ibdev->dev)) {
2514                 for (j = 0; j < MLX4_MFUNC_MAX; j++) {
2515                         if (j == mlx4_master_func_num(ibdev->dev))
2516                                 continue;
2517                         if (mlx4_is_slave_active(ibdev->dev, j))
2518                                 do_slave_init(ibdev, j, 1);
2519                 }
2520         }
2521         return ibdev;
2522
2523 err_notify:
2524         for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
2525                 device_remove_file(&ibdev->ib_dev.dev,
2526                         mlx4_class_attributes[j]);
2527         }
2528
2529         if (ibdev->iboe.nb.notifier_call) {
2530                 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
2531                         pr_warn("failure unregistering notifier\n");
2532                 ibdev->iboe.nb.notifier_call = NULL;
2533         }
2534         if (ibdev->iboe.nb_inet.notifier_call) {
2535                 if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
2536                         pr_warn("failure unregistering notifier\n");
2537                 ibdev->iboe.nb_inet.notifier_call = NULL;
2538         }
2539         flush_workqueue(wq);
2540
2541         mlx4_ib_close_sriov(ibdev);
2542
2543 err_mad:
2544         mlx4_ib_mad_cleanup(ibdev);
2545
2546 err_reg:
2547         ib_unregister_device(&ibdev->ib_dev);
2548
2549 err_steer_free_bitmap:
2550         kfree(ibdev->ib_uc_qpns_bitmap);
2551
2552 err_steer_qp_release:
2553         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED)
2554                 mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
2555                                 ibdev->steer_qpn_count);
2556 err_counter:
2557         for (; i; --i) {
2558                 if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i) ==
2559                                                 IB_LINK_LAYER_ETHERNET) {
2560                         mlx4_counter_free(ibdev->dev,
2561                                           i,
2562                                           ibdev->counters[i - 1].counter_index);
2563                 }
2564         }
2565
2566 err_map:
2567         iounmap(ibdev->priv_uar.map);
2568         mlx4_ib_free_eqs(dev, ibdev);
2569
2570 err_uar:
2571         mlx4_uar_free(dev, &ibdev->priv_uar);
2572
2573 err_pd:
2574         mlx4_pd_free(dev, ibdev->priv_pdn);
2575
2576 err_dealloc:
2577         ib_dealloc_device(&ibdev->ib_dev);
2578
2579         return NULL;
2580 }
2581
2582 int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
2583 {
2584         int offset;
2585
2586         WARN_ON(!dev->ib_uc_qpns_bitmap);
2587
2588         offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
2589                                          dev->steer_qpn_count,
2590                                          get_count_order(count));
2591         if (offset < 0)
2592                 return offset;
2593
2594         *qpn = dev->steer_qpn_base + offset;
2595         return 0;
2596 }
2597
2598 void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
2599 {
2600         if (!qpn ||
2601             dev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED)
2602                 return;
2603
2604         BUG_ON(qpn < dev->steer_qpn_base);
2605
2606         bitmap_release_region(dev->ib_uc_qpns_bitmap,
2607                         qpn - dev->steer_qpn_base, get_count_order(count));
2608 }
2609
2610 int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
2611                          int is_attach)
2612 {
2613         int err;
2614         size_t flow_size;
2615         struct ib_flow_attr *flow = NULL;
2616         struct ib_flow_spec_ib *ib_spec;
2617
2618         if (is_attach) {
2619                 flow_size = sizeof(struct ib_flow_attr) +
2620                             sizeof(struct ib_flow_spec_ib);
2621                 flow = kzalloc(flow_size, GFP_KERNEL);
2622                 if (!flow)
2623                         return -ENOMEM;
2624                 flow->port = mqp->port;
2625                 flow->num_of_specs = 1;
2626                 flow->size = flow_size;
2627                 ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
2628                 ib_spec->type = IB_FLOW_SPEC_IB;
2629                 ib_spec->size = sizeof(struct ib_flow_spec_ib);
2630                 ib_spec->val.l3_type_qpn = mqp->ibqp.qp_num;
2631                 ib_spec->mask.l3_type_qpn = MLX4_IB_FLOW_QPN_MASK;
2632
2633                 err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
2634                                             IB_FLOW_DOMAIN_NIC,
2635                                             MLX4_FS_REGULAR,
2636                                             &mqp->reg_id);
2637         } else {
2638                 err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
2639         }
2640         kfree(flow);
2641         return err;
2642 }
2643
2644 static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
2645 {
2646         struct mlx4_ib_dev *ibdev = ibdev_ptr;
2647         int p, j;
2648         int dev_idx, ret;
2649
2650         if (ibdev->iboe.nb_inet.notifier_call) {
2651                 if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
2652                         pr_warn("failure unregistering notifier\n");
2653                 ibdev->iboe.nb_inet.notifier_call = NULL;
2654         }
2655
2656         mlx4_ib_close_sriov(ibdev);
2657         sysfs_remove_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group);
2658         mlx4_ib_mad_cleanup(ibdev);
2659
2660         for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
2661                 device_remove_file(&ibdev->ib_dev.dev,
2662                         mlx4_class_attributes[j]);
2663         }
2664
2665
2666         dev_idx = -1;
2667         if (dr_active && !(ibdev->dev->flags & MLX4_FLAG_DEV_NUM_STR)) {
2668                 ret = sscanf(ibdev->ib_dev.name, "mlx4_%d", &dev_idx);
2669                 if (ret != 1)
2670                         dev_idx = -1;
2671         }
2672         ib_unregister_device(&ibdev->ib_dev);
2673         if (dev_idx >= 0) {
2674                 spin_lock(&dev_num_str_lock);
2675                 bitmap_release_region(dev_num_str_bitmap, dev_idx, 0);
2676                 spin_unlock(&dev_num_str_lock);
2677         }
2678
2679         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
2680                 mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
2681                                 ibdev->steer_qpn_count);
2682                 kfree(ibdev->ib_uc_qpns_bitmap);
2683         }
2684
2685         if (ibdev->iboe.nb.notifier_call) {
2686                 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
2687                         pr_warn("failure unregistering notifier\n");
2688                 ibdev->iboe.nb.notifier_call = NULL;
2689         }
2690         iounmap(ibdev->priv_uar.map);
2691
2692         for (p = 0; p < ibdev->num_ports; ++p) {
2693                 if (mlx4_ib_port_link_layer(&ibdev->ib_dev, p + 1) ==
2694                                                 IB_LINK_LAYER_ETHERNET) {
2695                         mlx4_counter_free(ibdev->dev,
2696                                           p + 1,
2697                                           ibdev->counters[p].counter_index);
2698                 }
2699         }
2700
2701         mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
2702                 mlx4_CLOSE_PORT(dev, p);
2703
2704         mlx4_ib_free_eqs(dev, ibdev);
2705
2706         mlx4_uar_free(dev, &ibdev->priv_uar);
2707         mlx4_pd_free(dev, ibdev->priv_pdn);
2708         ib_dealloc_device(&ibdev->ib_dev);
2709 }
2710
2711 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
2712 {
2713         struct mlx4_ib_demux_work **dm = NULL;
2714         struct mlx4_dev *dev = ibdev->dev;
2715         int i;
2716         unsigned long flags;
2717
2718         if (!mlx4_is_master(dev))
2719                 return;
2720
2721         dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
2722         if (!dm) {
2723                 pr_err("failed to allocate memory for tunneling qp update\n");
2724                 goto out;
2725         }
2726
2727         for (i = 0; i < dev->caps.num_ports; i++) {
2728                 dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
2729                 if (!dm[i]) {
2730                         pr_err("failed to allocate memory for tunneling qp update work struct\n");
2731                         for (i = 0; i < dev->caps.num_ports; i++) {
2732                                 if (dm[i])
2733                                         kfree(dm[i]);
2734                         }
2735                         goto out;
2736                 }
2737         }
2738         /* initialize or tear down tunnel QPs for the slave */
2739         for (i = 0; i < dev->caps.num_ports; i++) {
2740                 INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
2741                 dm[i]->port = i + 1;
2742                 dm[i]->slave = slave;
2743                 dm[i]->do_init = do_init;
2744                 dm[i]->dev = ibdev;
2745                 spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
2746                 if (!ibdev->sriov.is_going_down)
2747                         queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
2748                 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
2749         }
2750 out:
2751         if (dm)
2752                 kfree(dm);
2753         return;
2754 }
2755
2756 static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
2757                           enum mlx4_dev_event event, unsigned long param)
2758 {
2759         struct ib_event ibev;
2760         struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
2761         struct mlx4_eqe *eqe = NULL;
2762         struct ib_event_work *ew;
2763         int p = 0;
2764
2765         if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
2766                 eqe = (struct mlx4_eqe *)param;
2767         else
2768                 p = (int) param;
2769
2770         switch (event) {
2771         case MLX4_DEV_EVENT_PORT_UP:
2772                 if (p > ibdev->num_ports)
2773                         return;
2774                 if (mlx4_is_master(dev) &&
2775                     rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
2776                         IB_LINK_LAYER_INFINIBAND) {
2777                         mlx4_ib_invalidate_all_guid_record(ibdev, p);
2778                 }
2779                 mlx4_ib_info((struct ib_device *) ibdev_ptr,
2780                              "Port %d logical link is up\n", p);
2781                 ibev.event = IB_EVENT_PORT_ACTIVE;
2782                 break;
2783
2784         case MLX4_DEV_EVENT_PORT_DOWN:
2785                 if (p > ibdev->num_ports)
2786                         return;
2787                 mlx4_ib_info((struct ib_device *) ibdev_ptr,
2788                              "Port %d logical link is down\n", p);
2789                 ibev.event = IB_EVENT_PORT_ERR;
2790                 break;
2791
2792         case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
2793                 ibdev->ib_active = false;
2794                 ibev.event = IB_EVENT_DEVICE_FATAL;
2795                 break;
2796
2797         case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
2798                 ew = kmalloc(sizeof *ew, GFP_ATOMIC);
2799                 if (!ew) {
2800                         pr_err("failed to allocate memory for events work\n");
2801                         break;
2802                 }
2803
2804                 INIT_WORK(&ew->work, handle_port_mgmt_change_event);
2805                 memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
2806                 ew->ib_dev = ibdev;
2807                 /* need to queue only for port owner, which uses GEN_EQE */
2808                 if (mlx4_is_master(dev))
2809                         queue_work(wq, &ew->work);
2810                 else
2811                         handle_port_mgmt_change_event(&ew->work);
2812                 return;
2813
2814         case MLX4_DEV_EVENT_SLAVE_INIT:
2815                 /* here, p is the slave id */
2816                 do_slave_init(ibdev, p, 1);
2817                 return;
2818
2819         case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
2820                 /* here, p is the slave id */
2821                 do_slave_init(ibdev, p, 0);
2822                 return;
2823
2824         default:
2825                 return;
2826         }
2827
2828         ibev.device           = ibdev_ptr;
2829         ibev.element.port_num = (u8) p;
2830
2831         ib_dispatch_event(&ibev);
2832 }
2833
2834 static struct mlx4_interface mlx4_ib_interface = {
2835         .add            = mlx4_ib_add,
2836         .remove         = mlx4_ib_remove,
2837         .event          = mlx4_ib_event,
2838         .protocol       = MLX4_PROT_IB_IPV6
2839 };
2840
2841 static int __init mlx4_ib_init(void)
2842 {
2843         int err;
2844
2845         wq = create_singlethread_workqueue("mlx4_ib");
2846         if (!wq)
2847                 return -ENOMEM;
2848
2849         err = mlx4_ib_mcg_init();
2850         if (err)
2851                 goto clean_proc;
2852
2853         init_dev_assign();
2854
2855         err = mlx4_register_interface(&mlx4_ib_interface);
2856         if (err)
2857                 goto clean_mcg;
2858
2859         return 0;
2860
2861 clean_mcg:
2862         mlx4_ib_mcg_destroy();
2863
2864 clean_proc:
2865         destroy_workqueue(wq);
2866         return err;
2867 }
2868
2869 static void __exit mlx4_ib_cleanup(void)
2870 {
2871         mlx4_unregister_interface(&mlx4_ib_interface);
2872         mlx4_ib_mcg_destroy();
2873         destroy_workqueue(wq);
2874
2875         kfree(dev_num_str_bitmap);
2876 }
2877
2878 module_init_order(mlx4_ib_init, SI_ORDER_MIDDLE);
2879 module_exit(mlx4_ib_cleanup);
2880
2881 static int
2882 mlx4ib_evhand(module_t mod, int event, void *arg)
2883 {
2884         return (0);
2885 }
2886
2887 static moduledata_t mlx4ib_mod = {
2888         .name = "mlx4ib",
2889         .evhand = mlx4ib_evhand,
2890 };
2891
2892 DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_LAST, SI_ORDER_ANY);
2893 MODULE_DEPEND(mlx4ib, mlx4, 1, 1, 1);
2894 MODULE_DEPEND(mlx4ib, ibcore, 1, 1, 1);
2895 MODULE_DEPEND(mlx4ib, linuxkpi, 1, 1, 1);