]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx4/mlx4_core/mlx4_main.c
Merge llvm, clang, lld, lldb, compiler-rt and libc++ r303571, and update
[FreeBSD/FreeBSD.git] / sys / dev / mlx4 / mlx4_core / mlx4_main.c
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #define LINUXKPI_PARAM_PREFIX mlx4_
37
38 #include <linux/kmod.h>
39 #include <linux/module.h>
40 #include <linux/errno.h>
41 #include <linux/pci.h>
42 #include <linux/dma-mapping.h>
43 #include <linux/slab.h>
44 #include <linux/io-mapping.h>
45 #include <linux/delay.h>
46 #include <linux/netdevice.h>
47 #include <linux/string.h>
48 #include <linux/fs.h>
49
50 #include <dev/mlx4/device.h>
51 #include <dev/mlx4/doorbell.h>
52
53 #include "mlx4.h"
54 #include "fw.h"
55 #include "icm.h"
56 #include <dev/mlx4/stats.h>
57
58 /* Mellanox ConnectX HCA low-level driver */
59
60 struct workqueue_struct *mlx4_wq;
61
62 #ifdef CONFIG_MLX4_DEBUG
63
64 int mlx4_debug_level = 0;
65 module_param_named(debug_level, mlx4_debug_level, int, 0644);
66 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
67
68 #endif /* CONFIG_MLX4_DEBUG */
69
70 #ifdef CONFIG_PCI_MSI
71
72 static int msi_x = 1;
73 module_param(msi_x, int, 0444);
74 MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x (non-SRIOV only)");
75
76 #else /* CONFIG_PCI_MSI */
77
78 #define msi_x (0)
79
80 #endif /* CONFIG_PCI_MSI */
81
82 static int enable_sys_tune = 0;
83 module_param(enable_sys_tune, int, 0444);
84 MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)");
85
86 int mlx4_blck_lb = 1;
87 module_param_named(block_loopback, mlx4_blck_lb, int, 0644);
88 MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 "
89                                  "(default: 1)");
90 enum {
91         DEFAULT_DOMAIN  = 0,
92         BDF_STR_SIZE    = 8, /* bb:dd.f- */
93         DBDF_STR_SIZE   = 13 /* mmmm:bb:dd.f- */
94 };
95
96 enum {
97         NUM_VFS,
98         PROBE_VF,
99         PORT_TYPE_ARRAY
100 };
101
102 enum {
103         VALID_DATA,
104         INVALID_DATA,
105         INVALID_STR
106 };
107
108 struct param_data {
109         int                             id;
110         struct mlx4_dbdf2val_lst        dbdf2val;
111 };
112
113 static struct param_data num_vfs = {
114         .id             = NUM_VFS,
115         .dbdf2val = {
116                 .name           = "num_vfs param",
117                 .num_vals       = 1,
118                 .def_val        = {0},
119                 .range          = {0, MLX4_MAX_NUM_VF}
120         }
121 };
122 module_param_string(num_vfs, num_vfs.dbdf2val.str,
123                     sizeof(num_vfs.dbdf2val.str), 0444);
124 MODULE_PARM_DESC(num_vfs,
125                  "Either single value (e.g. '5') to define uniform num_vfs value for all devices functions\n"
126                  "\t\tor a string to map device function numbers to their num_vfs values (e.g. '0000:04:00.0-5,002b:1c:0b.a-15').\n"
127                  "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for num_vfs value (e.g. 15).");
128
129 static struct param_data probe_vf = {
130         .id             = PROBE_VF,
131         .dbdf2val = {
132                 .name           = "probe_vf param",
133                 .num_vals       = 1,
134                 .def_val        = {0},
135                 .range          = {0, MLX4_MAX_NUM_VF}
136         }
137 };
138 module_param_string(probe_vf, probe_vf.dbdf2val.str,
139                     sizeof(probe_vf.dbdf2val.str), 0444);
140 MODULE_PARM_DESC(probe_vf,
141                  "Either single value (e.g. '3') to define uniform number of VFs to probe by the pf driver for all devices functions\n"
142                  "\t\tor a string to map device function numbers to their probe_vf values (e.g. '0000:04:00.0-3,002b:1c:0b.a-13').\n"
143                  "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for probe_vf value (e.g. 13).");
144
145 int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
146
147 module_param_named(log_num_mgm_entry_size,
148                         mlx4_log_num_mgm_entry_size, int, 0444);
149 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
150                                          " of qp per mcg, for example:"
151                                          " 10 gives 248.range: 7 <="
152                                          " log_num_mgm_entry_size <= 12."
153                                          " To activate device managed"
154                                          " flow steering when available, set to -1");
155
156 static int high_rate_steer;
157 module_param(high_rate_steer, int, 0444);
158 MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate"
159                                   " (default off)");
160
161 static int fast_drop;
162 module_param_named(fast_drop, fast_drop, int, 0444);
163 MODULE_PARM_DESC(fast_drop,
164                  "Enable fast packet drop when no receive WQEs are posted");
165
166 int mlx4_enable_64b_cqe_eqe = 1;
167 module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644);
168 MODULE_PARM_DESC(enable_64b_cqe_eqe,
169                  "Enable 64 byte CQEs/EQEs when the FW supports this if non-zero (default: 1)");
170
171 #define HCA_GLOBAL_CAP_MASK            0
172
173 #define PF_CONTEXT_BEHAVIOUR_MASK       MLX4_FUNC_CAP_64B_EQE_CQE
174
175 static char mlx4_version[] __devinitdata =
176         DRV_NAME ": Mellanox ConnectX VPI driver v"
177         DRV_VERSION "\n";
178
179 static int log_num_mac = 7;
180 module_param_named(log_num_mac, log_num_mac, int, 0444);
181 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
182
183 static int log_num_vlan;
184 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
185 MODULE_PARM_DESC(log_num_vlan,
186         "(Obsolete) Log2 max number of VLANs per ETH port (0-7)");
187 /* Log2 max number of VLANs per ETH port (0-7) */
188 #define MLX4_LOG_NUM_VLANS 7
189
190 int log_mtts_per_seg = ilog2(1);
191 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
192 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment "
193                  "(0-7) (default: 0)");
194
195 static struct param_data port_type_array = {
196         .id             = PORT_TYPE_ARRAY,
197         .dbdf2val = {
198                 .name           = "port_type_array param",
199                 .num_vals       = 2,
200                 .def_val        = {MLX4_PORT_TYPE_ETH, MLX4_PORT_TYPE_ETH},
201                 .range          = {MLX4_PORT_TYPE_IB, MLX4_PORT_TYPE_NA}
202         }
203 };
204 module_param_string(port_type_array, port_type_array.dbdf2val.str,
205                     sizeof(port_type_array.dbdf2val.str), 0444);
206 MODULE_PARM_DESC(port_type_array,
207                  "Either pair of values (e.g. '1,2') to define uniform port1/port2 types configuration for all devices functions\n"
208                  "\t\tor a string to map device function numbers to their pair of port types values (e.g. '0000:04:00.0-1;2,002b:1c:0b.a-1;1').\n"
209                  "\t\tValid port types: 1-ib, 2-eth, 3-auto, 4-N/A\n"
210                  "\t\tIn case that only one port is available use the N/A port type for port2 (e.g '1,4').");
211
212
213 struct mlx4_port_config {
214         struct list_head list;
215         enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
216         struct pci_dev *pdev;
217 };
218
219 #define MLX4_LOG_NUM_MTT 20
220 /* We limit to 30 as of a bit map issue which uses int and not uint.
221      see mlx4_buddy_init -> bitmap_zero which gets int.
222 */
223 #define MLX4_MAX_LOG_NUM_MTT 30
224 static struct mlx4_profile mod_param_profile = {
225         .num_qp         = 19,
226         .num_srq        = 16,
227         .rdmarc_per_qp  = 4,
228         .num_cq         = 16,
229         .num_mcg        = 13,
230         .num_mpt        = 19,
231         .num_mtt_segs   = 0, /* max(20, 2*MTTs for host memory)) */
232 };
233
234 module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444);
235 MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)");
236
237 module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444);
238 MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA "
239                  "(default: 16)");
240
241 module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int,
242                    0444);
243 MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP "
244                  "(default: 4)");
245
246 module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444);
247 MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)");
248
249 module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444);
250 MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA "
251                  "(default: 13)");
252
253 module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444);
254 MODULE_PARM_DESC(log_num_mpt,
255                  "log maximum number of memory protection table entries per "
256                  "HCA (default: 19)");
257
258 module_param_named(log_num_mtt, mod_param_profile.num_mtt_segs, int, 0444);
259 MODULE_PARM_DESC(log_num_mtt,
260                  "log maximum number of memory translation table segments per "
261                  "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))");
262
263 enum {
264         MLX4_IF_STATE_BASIC,
265         MLX4_IF_STATE_EXTENDED
266 };
267
268 static inline u64 dbdf_to_u64(int domain, int bus, int dev, int fn)
269 {
270         return (domain << 20) | (bus << 12) | (dev << 4) | fn;
271 }
272
273 static inline void pr_bdf_err(const char *dbdf, const char *pname)
274 {
275         pr_warn("mlx4_core: '%s' is not valid bdf in '%s'\n", dbdf, pname);
276 }
277
278 static inline void pr_val_err(const char *dbdf, const char *pname,
279                               const char *val)
280 {
281         pr_warn("mlx4_core: value '%s' of bdf '%s' in '%s' is not valid\n"
282                 , val, dbdf, pname);
283 }
284
285 static inline void pr_out_of_range_bdf(const char *dbdf, int val,
286                                        struct mlx4_dbdf2val_lst *dbdf2val)
287 {
288         pr_warn("mlx4_core: value %d in bdf '%s' of '%s' is out of its valid range (%d,%d)\n"
289                 , val, dbdf, dbdf2val->name , dbdf2val->range.min,
290                 dbdf2val->range.max);
291 }
292
293 static inline void pr_out_of_range(struct mlx4_dbdf2val_lst *dbdf2val)
294 {
295         pr_warn("mlx4_core: value of '%s' is out of its valid range (%d,%d)\n"
296                 , dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max);
297 }
298
299 static inline int is_in_range(int val, struct mlx4_range *r)
300 {
301         return (val >= r->min && val <= r->max);
302 }
303
304 static int update_defaults(struct param_data *pdata)
305 {
306         long int val[MLX4_MAX_BDF_VALS];
307         int ret;
308         char *t, *p = pdata->dbdf2val.str;
309         char sval[32];
310         int val_len;
311
312         if (!strlen(p) || strchr(p, ':') || strchr(p, '.') || strchr(p, ';'))
313                 return INVALID_STR;
314
315         switch (pdata->id) {
316         case PORT_TYPE_ARRAY:
317                 t = strchr(p, ',');
318                 if (!t || t == p || (t - p) > sizeof(sval))
319                         return INVALID_STR;
320
321                 val_len = t - p;
322                 strncpy(sval, p, val_len);
323                 sval[val_len] = 0;
324
325                 ret = kstrtol(sval, 0, &val[0]);
326                 if (ret == -EINVAL)
327                         return INVALID_STR;
328                 if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) {
329                         pr_out_of_range(&pdata->dbdf2val);
330                         return INVALID_DATA;
331                 }
332
333                 ret = kstrtol(t + 1, 0, &val[1]);
334                 if (ret == -EINVAL)
335                         return INVALID_STR;
336                 if (ret || !is_in_range(val[1], &pdata->dbdf2val.range)) {
337                         pr_out_of_range(&pdata->dbdf2val);
338                         return INVALID_DATA;
339                 }
340
341                 pdata->dbdf2val.tbl[0].val[0] = val[0];
342                 pdata->dbdf2val.tbl[0].val[1] = val[1];
343                 break;
344
345         case NUM_VFS:
346         case PROBE_VF:
347                 ret = kstrtol(p, 0, &val[0]);
348                 if (ret == -EINVAL)
349                         return INVALID_STR;
350                 if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) {
351                         pr_out_of_range(&pdata->dbdf2val);
352                         return INVALID_DATA;
353                 }
354                 pdata->dbdf2val.tbl[0].val[0] = val[0];
355                 break;
356         }
357         pdata->dbdf2val.tbl[1].dbdf = MLX4_ENDOF_TBL;
358
359         return VALID_DATA;
360 }
361
362 int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst)
363 {
364         int domain, bus, dev, fn;
365         u64 dbdf;
366         char *p, *t, *v;
367         char tmp[32];
368         char sbdf[32];
369         char sep = ',';
370         int j, k, str_size, i = 1;
371         int prfx_size;
372
373         p = dbdf2val_lst->str;
374
375         for (j = 0; j < dbdf2val_lst->num_vals; j++)
376                 dbdf2val_lst->tbl[0].val[j] = dbdf2val_lst->def_val[j];
377         dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL;
378
379         str_size = strlen(dbdf2val_lst->str);
380
381         if (str_size == 0)
382                 return 0;
383
384         while (strlen(p)) {
385                 prfx_size = BDF_STR_SIZE;
386                 sbdf[prfx_size] = 0;
387                 strncpy(sbdf, p, prfx_size);
388                 domain = DEFAULT_DOMAIN;
389                 if (sscanf(sbdf, "%02x:%02x.%x-", &bus, &dev, &fn) != 3) {
390                         prfx_size = DBDF_STR_SIZE;
391                         sbdf[prfx_size] = 0;
392                         strncpy(sbdf, p, prfx_size);
393                         if (sscanf(sbdf, "%04x:%02x:%02x.%x-", &domain, &bus,
394                                    &dev, &fn) != 4) {
395                                 pr_bdf_err(sbdf, dbdf2val_lst->name);
396                                 goto err;
397                         }
398                         sprintf(tmp, "%04x:%02x:%02x.%x-", domain, bus, dev,
399                                 fn);
400                 } else {
401                         sprintf(tmp, "%02x:%02x.%x-", bus, dev, fn);
402                 }
403
404                 if (strnicmp(sbdf, tmp, sizeof(tmp))) {
405                         pr_bdf_err(sbdf, dbdf2val_lst->name);
406                         goto err;
407                 }
408
409                 dbdf = dbdf_to_u64(domain, bus, dev, fn);
410
411                 for (j = 1; j < i; j++)
412                         if (dbdf2val_lst->tbl[j].dbdf == dbdf) {
413                                 pr_warn("mlx4_core: in '%s', %s appears multiple times\n"
414                                         , dbdf2val_lst->name, sbdf);
415                                 goto err;
416                         }
417
418                 if (i >= MLX4_DEVS_TBL_SIZE) {
419                         pr_warn("mlx4_core: Too many devices in '%s'\n"
420                                 , dbdf2val_lst->name);
421                         goto err;
422                 }
423
424                 p += prfx_size;
425                 t = strchr(p, sep);
426                 t = t ? t : p + strlen(p);
427                 if (p >= t) {
428                         pr_val_err(sbdf, dbdf2val_lst->name, "");
429                         goto err;
430                 }
431
432                 for (k = 0; k < dbdf2val_lst->num_vals; k++) {
433                         char sval[32];
434                         long int val;
435                         int ret, val_len;
436                         char vsep = ';';
437
438                         v = (k == dbdf2val_lst->num_vals - 1) ? t : strchr(p, vsep);
439                         if (!v || v > t || v == p || (v - p) > sizeof(sval)) {
440                                 pr_val_err(sbdf, dbdf2val_lst->name, p);
441                                 goto err;
442                         }
443                         val_len = v - p;
444                         strncpy(sval, p, val_len);
445                         sval[val_len] = 0;
446
447                         ret = kstrtol(sval, 0, &val);
448                         if (ret) {
449                                 if (strchr(p, vsep))
450                                         pr_warn("mlx4_core: too many vals in bdf '%s' of '%s'\n"
451                                                 , sbdf, dbdf2val_lst->name);
452                                 else
453                                         pr_val_err(sbdf, dbdf2val_lst->name,
454                                                    sval);
455                                 goto err;
456                         }
457                         if (!is_in_range(val, &dbdf2val_lst->range)) {
458                                 pr_out_of_range_bdf(sbdf, val, dbdf2val_lst);
459                                 goto err;
460                         }
461
462                         dbdf2val_lst->tbl[i].val[k] = val;
463                         p = v;
464                         if (p[0] == vsep)
465                                 p++;
466                 }
467
468                 dbdf2val_lst->tbl[i].dbdf = dbdf;
469                 if (strlen(p)) {
470                         if (p[0] != sep) {
471                                 pr_warn("mlx4_core: expect separator '%c' before '%s' in '%s'\n"
472                                         , sep, p, dbdf2val_lst->name);
473                                 goto err;
474                         }
475                         p++;
476                 }
477                 i++;
478                 if (i < MLX4_DEVS_TBL_SIZE)
479                         dbdf2val_lst->tbl[i].dbdf = MLX4_ENDOF_TBL;
480         }
481
482         return 0;
483
484 err:
485         dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL;
486         pr_warn("mlx4_core: The value of '%s' is incorrect. The value is discarded!\n"
487                 , dbdf2val_lst->name);
488
489         return -EINVAL;
490 }
491 EXPORT_SYMBOL(mlx4_fill_dbdf2val_tbl);
492
493 int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx,
494                  int *val)
495 {
496         u64 dbdf;
497         int i = 1;
498
499         *val = tbl[0].val[idx];
500         if (!pdev)
501                 return -EINVAL;
502
503         dbdf = dbdf_to_u64(pci_get_domain(pdev->dev.bsddev), pci_get_bus(pdev->dev.bsddev),
504                            PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
505
506         while ((i < MLX4_DEVS_TBL_SIZE) && (tbl[i].dbdf != MLX4_ENDOF_TBL)) {
507                 if (tbl[i].dbdf == dbdf) {
508                         *val = tbl[i].val[idx];
509                         return 0;
510                 }
511                 i++;
512         }
513
514         return 0;
515 }
516 EXPORT_SYMBOL(mlx4_get_val);
517
518 static void process_mod_param_profile(struct mlx4_profile *profile)
519 {
520         vm_size_t hwphyssz;
521         hwphyssz = 0;
522         TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz);
523
524         profile->num_qp        = 1 << mod_param_profile.num_qp;
525         profile->num_srq       = 1 << mod_param_profile.num_srq;
526         profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp;
527         profile->num_cq        = 1 << mod_param_profile.num_cq;
528         profile->num_mcg       = 1 << mod_param_profile.num_mcg;
529         profile->num_mpt       = 1 << mod_param_profile.num_mpt;
530         /*
531          * We want to scale the number of MTTs with the size of the
532          * system memory, since it makes sense to register a lot of
533          * memory on a system with a lot of memory.  As a heuristic,
534          * make sure we have enough MTTs to register twice the system
535          * memory (with PAGE_SIZE entries).
536          *
537          * This number has to be a power of two and fit into 32 bits
538          * due to device limitations. We cap this at 2^30 as of bit map
539          * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero)
540          * That limits us to 4TB of memory registration per HCA with
541          * 4KB pages, which is probably OK for the next few months.
542          */
543         if (mod_param_profile.num_mtt_segs)
544                 profile->num_mtt_segs = 1 << mod_param_profile.num_mtt_segs;
545         else {
546                 profile->num_mtt_segs =
547                         roundup_pow_of_two(max_t(unsigned,
548                                                 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg),
549                                                 min(1UL << 
550                                                 (MLX4_MAX_LOG_NUM_MTT -
551                                                 log_mtts_per_seg),
552                                                 (hwphyssz << 1)
553                                                 >> log_mtts_per_seg)));
554                 /* set the actual value, so it will be reflected to the user
555                    using the sysfs */
556                 mod_param_profile.num_mtt_segs = ilog2(profile->num_mtt_segs);
557         }
558 }
559
560 int mlx4_check_port_params(struct mlx4_dev *dev,
561                            enum mlx4_port_type *port_type)
562 {
563         int i;
564
565         for (i = 0; i < dev->caps.num_ports - 1; i++) {
566                 if (port_type[i] != port_type[i + 1]) {
567                         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
568                                 mlx4_err(dev, "Only same port types supported "
569                                          "on this HCA, aborting.\n");
570                                 return -EINVAL;
571                         }
572                 }
573         }
574
575         for (i = 0; i < dev->caps.num_ports; i++) {
576                 if (!(port_type[i] & dev->caps.supported_type[i+1])) {
577                         mlx4_err(dev, "Requested port type for port %d is not "
578                                       "supported on this HCA\n", i + 1);
579                         return -EINVAL;
580                 }
581         }
582         return 0;
583 }
584
585 static void mlx4_set_port_mask(struct mlx4_dev *dev)
586 {
587         int i;
588
589         for (i = 1; i <= dev->caps.num_ports; ++i)
590                 dev->caps.port_mask[i] = dev->caps.port_type[i];
591 }
592
593 enum {
594         MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
595 };
596
597 static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
598 {
599         int err = 0;
600         struct mlx4_func func;
601
602         if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
603                 err = mlx4_QUERY_FUNC(dev, &func, 0);
604                 if (err) {
605                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
606                         return err;
607                 }
608                 dev_cap->max_eqs = func.max_eq;
609                 dev_cap->reserved_eqs = func.rsvd_eqs;
610                 dev_cap->reserved_uars = func.rsvd_uars;
611                 err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
612         }
613         return err;
614 }
615
616 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
617 {
618         int err;
619         int i;
620
621         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
622         if (err) {
623                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
624                 return err;
625         }
626
627         if (dev_cap->min_page_sz > PAGE_SIZE) {
628                 mlx4_err(dev, "HCA minimum page size of %d bigger than "
629                          "kernel PAGE_SIZE of %d, aborting.\n",
630                          dev_cap->min_page_sz, (int)PAGE_SIZE);
631                 return -ENODEV;
632         }
633         if (dev_cap->num_ports > MLX4_MAX_PORTS) {
634                 mlx4_err(dev, "HCA has %d ports, but we only support %d, "
635                          "aborting.\n",
636                          dev_cap->num_ports, MLX4_MAX_PORTS);
637                 return -ENODEV;
638         }
639
640         if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) {
641                 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than "
642                          "PCI resource 2 size of 0x%llx, aborting.\n",
643                          dev_cap->uar_size,
644                          (unsigned long long) pci_resource_len(dev->pdev, 2));
645                 return -ENODEV;
646         }
647
648         dev->caps.num_ports          = dev_cap->num_ports;
649        dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
650        dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
651                                      dev->caps.num_sys_eqs :
652                                      MLX4_MAX_EQ_NUM;
653         for (i = 1; i <= dev->caps.num_ports; ++i) {
654                 dev->caps.vl_cap[i]         = dev_cap->max_vl[i];
655                 dev->caps.ib_mtu_cap[i]     = dev_cap->ib_mtu[i];
656                 dev->phys_caps.gid_phys_table_len[i]  = dev_cap->max_gids[i];
657                 dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i];
658                 /* set gid and pkey table operating lengths by default
659                  * to non-sriov values */
660                 dev->caps.gid_table_len[i]  = dev_cap->max_gids[i];
661                 dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i];
662                 dev->caps.port_width_cap[i] = dev_cap->max_port_width[i];
663                 dev->caps.eth_mtu_cap[i]    = dev_cap->eth_mtu[i];
664                 dev->caps.def_mac[i]        = dev_cap->def_mac[i];
665                 dev->caps.supported_type[i] = dev_cap->supported_port_types[i];
666                 dev->caps.suggested_type[i] = dev_cap->suggested_type[i];
667                 dev->caps.default_sense[i] = dev_cap->default_sense[i];
668                 dev->caps.trans_type[i]     = dev_cap->trans_type[i];
669                 dev->caps.vendor_oui[i]     = dev_cap->vendor_oui[i];
670                 dev->caps.wavelength[i]     = dev_cap->wavelength[i];
671                 dev->caps.trans_code[i]     = dev_cap->trans_code[i];
672         }
673
674         dev->caps.uar_page_size      = PAGE_SIZE;
675         dev->caps.num_uars           = dev_cap->uar_size / PAGE_SIZE;
676         dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
677         dev->caps.bf_reg_size        = dev_cap->bf_reg_size;
678         dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
679         dev->caps.max_sq_sg          = dev_cap->max_sq_sg;
680         dev->caps.max_rq_sg          = dev_cap->max_rq_sg;
681         dev->caps.max_wqes           = dev_cap->max_qp_sz;
682         dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
683         dev->caps.max_srq_wqes       = dev_cap->max_srq_sz;
684         dev->caps.max_srq_sge        = dev_cap->max_rq_sg - 1;
685         dev->caps.reserved_srqs      = dev_cap->reserved_srqs;
686         dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
687         dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
688         /*
689          * Subtract 1 from the limit because we need to allocate a
690          * spare CQE to enable resizing the CQ
691          */
692         dev->caps.max_cqes           = dev_cap->max_cq_sz - 1;
693         dev->caps.reserved_cqs       = dev_cap->reserved_cqs;
694         dev->caps.reserved_eqs       = dev_cap->reserved_eqs;
695         dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
696         dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
697
698         /* The first 128 UARs are used for EQ doorbells */
699         dev->caps.reserved_uars      = max_t(int, 128, dev_cap->reserved_uars);
700         dev->caps.reserved_pds       = dev_cap->reserved_pds;
701         dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
702                                         dev_cap->reserved_xrcds : 0;
703         dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
704                                         dev_cap->max_xrcds : 0;
705         dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
706
707         dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
708         dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
709         dev->caps.flags              = dev_cap->flags;
710         dev->caps.flags2             = dev_cap->flags2;
711         dev->caps.bmme_flags         = dev_cap->bmme_flags;
712         dev->caps.reserved_lkey      = dev_cap->reserved_lkey;
713         dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
714         dev->caps.cq_timestamp       = dev_cap->timestamp_support;
715         dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
716         dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
717
718         /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
719         if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
720                 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
721         /* Don't do sense port on multifunction devices (for now at least) */
722         if (mlx4_is_mfunc(dev))
723                 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
724
725         dev->caps.log_num_macs  = log_num_mac;
726         dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
727
728         dev->caps.fast_drop     = fast_drop ?
729                                   !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) :
730                                   0;
731
732         for (i = 1; i <= dev->caps.num_ports; ++i) {
733                 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
734                 if (dev->caps.supported_type[i]) {
735                         /* if only ETH is supported - assign ETH */
736                         if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
737                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
738                         /* if only IB is supported, assign IB */
739                         else if (dev->caps.supported_type[i] ==
740                                  MLX4_PORT_TYPE_IB)
741                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
742                         else {
743                                 /*
744                                  * if IB and ETH are supported, we set the port
745                                  * type according to user selection of port type;
746                                  * if there is no user selection, take the FW hint
747                                  */
748                                 int pta;
749                                 mlx4_get_val(port_type_array.dbdf2val.tbl,
750                                              pci_physfn(dev->pdev), i - 1,
751                                              &pta);
752                                 if (pta == MLX4_PORT_TYPE_NONE) {
753                                         dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
754                                                 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
755                                 } else if (pta == MLX4_PORT_TYPE_NA) {
756                                         mlx4_err(dev, "Port %d is valid port. "
757                                                  "It is not allowed to configure its type to N/A(%d)\n",
758                                                  i, MLX4_PORT_TYPE_NA);
759                                         return -EINVAL;
760                                 } else {
761                                         dev->caps.port_type[i] = pta;
762                                 }
763                         }
764                 }
765                 /*
766                  * Link sensing is allowed on the port if 3 conditions are true:
767                  * 1. Both protocols are supported on the port.
768                  * 2. Different types are supported on the port
769                  * 3. FW declared that it supports link sensing
770                  */
771                 mlx4_priv(dev)->sense.sense_allowed[i] =
772                         ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
773                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
774                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
775
776                 /* Disablling auto sense for default Eth ports support */
777                 mlx4_priv(dev)->sense.sense_allowed[i] = 0;
778
779                 /*
780                  * If "default_sense" bit is set, we move the port to "AUTO" mode
781                  * and perform sense_port FW command to try and set the correct
782                  * port type from beginning
783                  */
784                 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
785                         enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
786                         dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
787                         mlx4_SENSE_PORT(dev, i, &sensed_port);
788                         if (sensed_port != MLX4_PORT_TYPE_NONE)
789                                 dev->caps.port_type[i] = sensed_port;
790                 } else {
791                         dev->caps.possible_type[i] = dev->caps.port_type[i];
792                 }
793
794                 if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
795                         dev->caps.log_num_macs = dev_cap->log_max_macs[i];
796                         mlx4_warn(dev, "Requested number of MACs is too much "
797                                   "for port %d, reducing to %d.\n",
798                                   i, 1 << dev->caps.log_num_macs);
799                 }
800                 if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) {
801                         dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
802                         mlx4_warn(dev, "Requested number of VLANs is too much "
803                                   "for port %d, reducing to %d.\n",
804                                   i, 1 << dev->caps.log_num_vlans);
805                 }
806         }
807
808         dev->caps.max_basic_counters = dev_cap->max_basic_counters;
809         dev->caps.max_extended_counters = dev_cap->max_extended_counters;
810         /* support extended counters if available */
811         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT)
812                 dev->caps.max_counters = dev->caps.max_extended_counters;
813         else
814                 dev->caps.max_counters = dev->caps.max_basic_counters;
815
816         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
817         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
818                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
819                 (1 << dev->caps.log_num_macs) *
820                 (1 << dev->caps.log_num_vlans) *
821                 dev->caps.num_ports;
822         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
823
824         dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
825                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
826                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
827                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
828
829         dev->caps.sync_qp = dev_cap->sync_qp;
830         if (dev->pdev->device == 0x1003)
831                 dev->caps.cq_flags |= MLX4_DEV_CAP_CQ_FLAG_IO;
832
833         dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
834
835         if (!mlx4_enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
836                 if (dev_cap->flags &
837                     (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
838                         mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
839                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
840                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
841                 }
842         }
843
844         if ((dev->caps.flags &
845             (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
846             mlx4_is_master(dev))
847                 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
848
849         if (!mlx4_is_slave(dev)) {
850                 for (i = 0; i < dev->caps.num_ports; ++i)
851                         dev->caps.def_counter_index[i] = i << 1;
852
853                 dev->caps.alloc_res_qp_mask =
854                         (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0);
855         } else {
856                 dev->caps.alloc_res_qp_mask = 0;
857         }
858
859         return 0;
860 }
861 /*The function checks if there are live vf, return the num of them*/
862 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
863 {
864         struct mlx4_priv *priv = mlx4_priv(dev);
865         struct mlx4_slave_state *s_state;
866         int i;
867         int ret = 0;
868
869         for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
870                 s_state = &priv->mfunc.master.slave_state[i];
871                 if (s_state->active && s_state->last_cmd !=
872                     MLX4_COMM_CMD_RESET) {
873                         mlx4_warn(dev, "%s: slave: %d is still active\n",
874                                   __func__, i);
875                         ret++;
876                 }
877         }
878         return ret;
879 }
880
881 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
882 {
883         u32 qk = MLX4_RESERVED_QKEY_BASE;
884
885         if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
886             qpn < dev->phys_caps.base_proxy_sqpn)
887                 return -EINVAL;
888
889         if (qpn >= dev->phys_caps.base_tunnel_sqpn)
890                 /* tunnel qp */
891                 qk += qpn - dev->phys_caps.base_tunnel_sqpn;
892         else
893                 qk += qpn - dev->phys_caps.base_proxy_sqpn;
894         *qkey = qk;
895         return 0;
896 }
897 EXPORT_SYMBOL(mlx4_get_parav_qkey);
898
899 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
900 {
901         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
902
903         if (!mlx4_is_master(dev))
904                 return;
905
906         priv->virt2phys_pkey[slave][port - 1][i] = val;
907 }
908 EXPORT_SYMBOL(mlx4_sync_pkey_table);
909
910 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
911 {
912         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
913
914         if (!mlx4_is_master(dev))
915                 return;
916
917         priv->slave_node_guids[slave] = guid;
918 }
919 EXPORT_SYMBOL(mlx4_put_slave_node_guid);
920
921 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
922 {
923         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
924
925         if (!mlx4_is_master(dev))
926                 return 0;
927
928         return priv->slave_node_guids[slave];
929 }
930 EXPORT_SYMBOL(mlx4_get_slave_node_guid);
931
932 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
933 {
934         struct mlx4_priv *priv = mlx4_priv(dev);
935         struct mlx4_slave_state *s_slave;
936
937         if (!mlx4_is_master(dev))
938                 return 0;
939
940         s_slave = &priv->mfunc.master.slave_state[slave];
941         return !!s_slave->active;
942 }
943 EXPORT_SYMBOL(mlx4_is_slave_active);
944
945 static void slave_adjust_steering_mode(struct mlx4_dev *dev,
946                                        struct mlx4_dev_cap *dev_cap,
947                                        struct mlx4_init_hca_param *hca_param)
948 {
949         dev->caps.steering_mode = hca_param->steering_mode;
950         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED)
951                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
952         else
953                 dev->caps.num_qp_per_mgm =
954                         4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
955
956         mlx4_dbg(dev, "Steering mode is: %s\n",
957                  mlx4_steering_mode_str(dev->caps.steering_mode));
958 }
959
960 static int mlx4_slave_cap(struct mlx4_dev *dev)
961 {
962         int                        err;
963         u32                        page_size;
964         struct mlx4_dev_cap        dev_cap;
965         struct mlx4_func_cap       func_cap;
966         struct mlx4_init_hca_param hca_param;
967         int                        i;
968
969         memset(&hca_param, 0, sizeof(hca_param));
970         err = mlx4_QUERY_HCA(dev, &hca_param);
971         if (err) {
972                 mlx4_err(dev, "QUERY_HCA command failed, aborting.\n");
973                 return err;
974         }
975
976         /*fail if the hca has an unknown capability */
977         if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) !=
978             HCA_GLOBAL_CAP_MASK) {
979                 mlx4_err(dev, "Unknown hca global capabilities\n");
980                 return -ENOSYS;
981         }
982
983         mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
984
985         dev->caps.hca_core_clock = hca_param.hca_core_clock;
986
987         memset(&dev_cap, 0, sizeof(dev_cap));
988         dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
989         err = mlx4_dev_cap(dev, &dev_cap);
990         if (err) {
991                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
992                 return err;
993         }
994
995         err = mlx4_QUERY_FW(dev);
996         if (err)
997                 mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n");
998
999         if (!hca_param.mw_enable) {
1000                 dev->caps.flags      &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW;
1001                 dev->caps.bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN;
1002         }
1003
1004         page_size = ~dev->caps.page_size_cap + 1;
1005         mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
1006         if (page_size > PAGE_SIZE) {
1007                 mlx4_err(dev, "HCA minimum page size of %d bigger than "
1008                          "kernel PAGE_SIZE of %d, aborting.\n",
1009                          page_size, (int)PAGE_SIZE);
1010                 return -ENODEV;
1011         }
1012
1013         /* slave gets uar page size from QUERY_HCA fw command */
1014         dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
1015
1016         /* TODO: relax this assumption */
1017         if (dev->caps.uar_page_size != PAGE_SIZE) {
1018                 mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n",
1019                          dev->caps.uar_page_size, (int)PAGE_SIZE);
1020                 return -ENODEV;
1021         }
1022
1023         memset(&func_cap, 0, sizeof(func_cap));
1024         err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
1025         if (err) {
1026                 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n",
1027                           err);
1028                 return err;
1029         }
1030
1031         if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
1032             PF_CONTEXT_BEHAVIOUR_MASK) {
1033                 mlx4_err(dev, "Unknown pf context behaviour\n");
1034                 return -ENOSYS;
1035         }
1036
1037         dev->caps.num_ports             = func_cap.num_ports;
1038         dev->quotas.qp                  = func_cap.qp_quota;
1039         dev->quotas.srq                 = func_cap.srq_quota;
1040         dev->quotas.cq                  = func_cap.cq_quota;
1041         dev->quotas.mpt                 = func_cap.mpt_quota;
1042         dev->quotas.mtt                 = func_cap.mtt_quota;
1043         dev->caps.num_qps               = 1 << hca_param.log_num_qps;
1044         dev->caps.num_srqs              = 1 << hca_param.log_num_srqs;
1045         dev->caps.num_cqs               = 1 << hca_param.log_num_cqs;
1046         dev->caps.num_mpts              = 1 << hca_param.log_mpt_sz;
1047         dev->caps.num_eqs               = func_cap.max_eq;
1048         dev->caps.reserved_eqs          = func_cap.reserved_eq;
1049         dev->caps.num_pds               = MLX4_NUM_PDS;
1050         dev->caps.num_mgms              = 0;
1051         dev->caps.num_amgms             = 0;
1052
1053         if (dev->caps.num_ports > MLX4_MAX_PORTS) {
1054                 mlx4_err(dev, "HCA has %d ports, but we only support %d, "
1055                          "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS);
1056                 return -ENODEV;
1057         }
1058
1059         dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
1060         dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
1061         dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
1062         dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
1063
1064         if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
1065             !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
1066                 err = -ENOMEM;
1067                 goto err_mem;
1068         }
1069
1070         for (i = 1; i <= dev->caps.num_ports; ++i) {
1071                 err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap);
1072                 if (err) {
1073                         mlx4_err(dev, "QUERY_FUNC_CAP port command failed for"
1074                                  " port %d, aborting (%d).\n", i, err);
1075                         goto err_mem;
1076                 }
1077                 dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
1078                 dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
1079                 dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
1080                 dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
1081                 dev->caps.def_counter_index[i - 1] = func_cap.def_counter_index;
1082
1083                 dev->caps.port_mask[i] = dev->caps.port_type[i];
1084                 err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
1085                                                       &dev->caps.gid_table_len[i],
1086                                                       &dev->caps.pkey_table_len[i]);
1087                 if (err)
1088                         goto err_mem;
1089         }
1090
1091         if (dev->caps.uar_page_size * (dev->caps.num_uars -
1092                                        dev->caps.reserved_uars) >
1093                                        pci_resource_len(dev->pdev, 2)) {
1094                 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than "
1095                          "PCI resource 2 size of 0x%llx, aborting.\n",
1096                          dev->caps.uar_page_size * dev->caps.num_uars,
1097                          (unsigned long long) pci_resource_len(dev->pdev, 2));
1098                 err = -ENOMEM;
1099                 goto err_mem;
1100         }
1101
1102         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
1103                 dev->caps.eqe_size   = 64;
1104                 dev->caps.eqe_factor = 1;
1105         } else {
1106                 dev->caps.eqe_size   = 32;
1107                 dev->caps.eqe_factor = 0;
1108         }
1109
1110         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
1111                 dev->caps.cqe_size   = 64;
1112                 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
1113         } else {
1114                 dev->caps.cqe_size   = 32;
1115         }
1116
1117         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1118         mlx4_warn(dev, "Timestamping is not supported in slave mode.\n");
1119
1120         slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
1121
1122         if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
1123             dev->caps.bf_reg_size)
1124                 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
1125
1126         return 0;
1127
1128 err_mem:
1129         kfree(dev->caps.qp0_tunnel);
1130         kfree(dev->caps.qp0_proxy);
1131         kfree(dev->caps.qp1_tunnel);
1132         kfree(dev->caps.qp1_proxy);
1133         dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
1134                 dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
1135
1136         return err;
1137 }
1138
1139 static void mlx4_request_modules(struct mlx4_dev *dev)
1140 {
1141         int port;
1142         int has_ib_port = false;
1143         int has_eth_port = false;
1144 #define EN_DRV_NAME     "mlx4_en"
1145 #define IB_DRV_NAME     "mlx4_ib"
1146
1147         for (port = 1; port <= dev->caps.num_ports; port++) {
1148                 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
1149                         has_ib_port = true;
1150                 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
1151                         has_eth_port = true;
1152         }
1153
1154         if (has_ib_port)
1155                 request_module_nowait(IB_DRV_NAME);
1156         if (has_eth_port)
1157                 request_module_nowait(EN_DRV_NAME);
1158 }
1159
1160 /*
1161  * Change the port configuration of the device.
1162  * Every user of this function must hold the port mutex.
1163  */
1164 int mlx4_change_port_types(struct mlx4_dev *dev,
1165                            enum mlx4_port_type *port_types)
1166 {
1167         int err = 0;
1168         int change = 0;
1169         int port;
1170
1171         for (port = 0; port <  dev->caps.num_ports; port++) {
1172                 /* Change the port type only if the new type is different
1173                  * from the current, and not set to Auto */
1174                 if (port_types[port] != dev->caps.port_type[port + 1])
1175                         change = 1;
1176         }
1177         if (change) {
1178                 mlx4_unregister_device(dev);
1179                 for (port = 1; port <= dev->caps.num_ports; port++) {
1180                         mlx4_CLOSE_PORT(dev, port);
1181                         dev->caps.port_type[port] = port_types[port - 1];
1182                         err = mlx4_SET_PORT(dev, port, -1);
1183                         if (err) {
1184                                 mlx4_err(dev, "Failed to set port %d, "
1185                                               "aborting\n", port);
1186                                 goto out;
1187                         }
1188                 }
1189                 mlx4_set_port_mask(dev);
1190                 err = mlx4_register_device(dev);
1191                 if (err) {
1192                         mlx4_err(dev, "Failed to register device\n");
1193                         goto out;
1194                 }
1195                 mlx4_request_modules(dev);
1196         }
1197
1198 out:
1199         return err;
1200 }
1201
1202 static ssize_t show_port_type(struct device *dev,
1203                               struct device_attribute *attr,
1204                               char *buf)
1205 {
1206         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1207                                                    port_attr);
1208         struct mlx4_dev *mdev = info->dev;
1209         char type[8];
1210
1211         sprintf(type, "%s",
1212                 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
1213                 "ib" : "eth");
1214         if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
1215                 sprintf(buf, "auto (%s)\n", type);
1216         else
1217                 sprintf(buf, "%s\n", type);
1218
1219         return strlen(buf);
1220 }
1221
1222 static ssize_t set_port_type(struct device *dev,
1223                              struct device_attribute *attr,
1224                              const char *buf, size_t count)
1225 {
1226         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1227                                                    port_attr);
1228         struct mlx4_dev *mdev = info->dev;
1229         struct mlx4_priv *priv = mlx4_priv(mdev);
1230         enum mlx4_port_type types[MLX4_MAX_PORTS];
1231         enum mlx4_port_type new_types[MLX4_MAX_PORTS];
1232         int i;
1233         int err = 0;
1234
1235         if (!strcmp(buf, "ib\n"))
1236                 info->tmp_type = MLX4_PORT_TYPE_IB;
1237         else if (!strcmp(buf, "eth\n"))
1238                 info->tmp_type = MLX4_PORT_TYPE_ETH;
1239         else if (!strcmp(buf, "auto\n"))
1240                 info->tmp_type = MLX4_PORT_TYPE_AUTO;
1241         else {
1242                 mlx4_err(mdev, "%s is not supported port type\n", buf);
1243                 return -EINVAL;
1244         }
1245
1246         if ((info->tmp_type & mdev->caps.supported_type[info->port]) !=
1247             info->tmp_type) {
1248                 mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n",
1249                          info->port);
1250                 return -EINVAL;
1251         }
1252
1253         mlx4_stop_sense(mdev);
1254         mutex_lock(&priv->port_mutex);
1255         /* Possible type is always the one that was delivered */
1256         mdev->caps.possible_type[info->port] = info->tmp_type;
1257
1258         for (i = 0; i < mdev->caps.num_ports; i++) {
1259                 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
1260                                         mdev->caps.possible_type[i+1];
1261                 if (types[i] == MLX4_PORT_TYPE_AUTO)
1262                         types[i] = mdev->caps.port_type[i+1];
1263         }
1264
1265         if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
1266             !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
1267                 for (i = 1; i <= mdev->caps.num_ports; i++) {
1268                         if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
1269                                 mdev->caps.possible_type[i] = mdev->caps.port_type[i];
1270                                 err = -EINVAL;
1271                         }
1272                 }
1273         }
1274         if (err) {
1275                 mlx4_err(mdev, "Auto sensing is not supported on this HCA. "
1276                                "Set only 'eth' or 'ib' for both ports "
1277                                "(should be the same)\n");
1278                 goto out;
1279         }
1280
1281         mlx4_do_sense_ports(mdev, new_types, types);
1282
1283         err = mlx4_check_port_params(mdev, new_types);
1284         if (err)
1285                 goto out;
1286
1287         /* We are about to apply the changes after the configuration
1288          * was verified, no need to remember the temporary types
1289          * any more */
1290         for (i = 0; i < mdev->caps.num_ports; i++)
1291                 priv->port[i + 1].tmp_type = 0;
1292
1293         err = mlx4_change_port_types(mdev, new_types);
1294
1295 out:
1296         mlx4_start_sense(mdev);
1297         mutex_unlock(&priv->port_mutex);
1298         return err ? err : count;
1299 }
1300
1301 enum ibta_mtu {
1302         IB_MTU_256  = 1,
1303         IB_MTU_512  = 2,
1304         IB_MTU_1024 = 3,
1305         IB_MTU_2048 = 4,
1306         IB_MTU_4096 = 5
1307 };
1308
1309 static inline int int_to_ibta_mtu(int mtu)
1310 {
1311         switch (mtu) {
1312         case 256:  return IB_MTU_256;
1313         case 512:  return IB_MTU_512;
1314         case 1024: return IB_MTU_1024;
1315         case 2048: return IB_MTU_2048;
1316         case 4096: return IB_MTU_4096;
1317         default: return -1;
1318         }
1319 }
1320
1321 static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
1322 {
1323         switch (mtu) {
1324         case IB_MTU_256:  return  256;
1325         case IB_MTU_512:  return  512;
1326         case IB_MTU_1024: return 1024;
1327         case IB_MTU_2048: return 2048;
1328         case IB_MTU_4096: return 4096;
1329         default: return -1;
1330         }
1331 }
1332
1333 static ssize_t
1334 show_board(struct device *device, struct device_attribute *attr,
1335                           char *buf)
1336 {
1337         struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info,
1338                                                    board_attr);
1339         struct mlx4_dev *mdev = info->dev;
1340
1341         return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
1342                        mdev->board_id);
1343 }
1344
1345 static ssize_t
1346 show_hca(struct device *device, struct device_attribute *attr,
1347                         char *buf)
1348 {
1349         struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info,
1350                                                    hca_attr);
1351         struct mlx4_dev *mdev = info->dev;
1352
1353         return sprintf(buf, "MT%d\n", mdev->pdev->device);
1354 }
1355
1356 static ssize_t
1357 show_firmware_version(struct device *dev,
1358                                 struct device_attribute *attr,
1359                                 char *buf)
1360 {
1361         struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info,
1362                                                    firmware_attr);
1363         struct mlx4_dev *mdev = info->dev;
1364
1365         return sprintf(buf, "%d.%d.%d\n", (int)(mdev->caps.fw_ver >> 32),
1366                        (int)(mdev->caps.fw_ver >> 16) & 0xffff,
1367                        (int)mdev->caps.fw_ver & 0xffff);
1368 }
1369
1370 static ssize_t show_port_ib_mtu(struct device *dev,
1371                              struct device_attribute *attr,
1372                              char *buf)
1373 {
1374         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1375                                                    port_mtu_attr);
1376         struct mlx4_dev *mdev = info->dev;
1377
1378         /* When port type is eth, port mtu value isn't used. */
1379         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
1380                 return -EINVAL;
1381
1382         sprintf(buf, "%d\n",
1383                         ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
1384         return strlen(buf);
1385 }
1386
1387 static ssize_t set_port_ib_mtu(struct device *dev,
1388                              struct device_attribute *attr,
1389                              const char *buf, size_t count)
1390 {
1391         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1392                                                    port_mtu_attr);
1393         struct mlx4_dev *mdev = info->dev;
1394         struct mlx4_priv *priv = mlx4_priv(mdev);
1395         int err, port, mtu, ibta_mtu = -1;
1396
1397         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
1398                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1399                 return -EINVAL;
1400         }
1401
1402         mtu = (int) simple_strtol(buf, NULL, 0);
1403         ibta_mtu = int_to_ibta_mtu(mtu);
1404
1405         if (ibta_mtu < 0) {
1406                 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
1407                 return -EINVAL;
1408         }
1409
1410         mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
1411
1412         mlx4_stop_sense(mdev);
1413         mutex_lock(&priv->port_mutex);
1414         mlx4_unregister_device(mdev);
1415         for (port = 1; port <= mdev->caps.num_ports; port++) {
1416                 mlx4_CLOSE_PORT(mdev, port);
1417                 err = mlx4_SET_PORT(mdev, port, -1);
1418                 if (err) {
1419                         mlx4_err(mdev, "Failed to set port %d, "
1420                                       "aborting\n", port);
1421                         goto err_set_port;
1422                 }
1423         }
1424         err = mlx4_register_device(mdev);
1425 err_set_port:
1426         mutex_unlock(&priv->port_mutex);
1427         mlx4_start_sense(mdev);
1428         return err ? err : count;
1429 }
1430
1431 static int mlx4_load_fw(struct mlx4_dev *dev)
1432 {
1433         struct mlx4_priv *priv = mlx4_priv(dev);
1434         int err, unmap_flag = 0;
1435
1436         priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
1437                                          GFP_HIGHUSER | __GFP_NOWARN, 0);
1438         if (!priv->fw.fw_icm) {
1439                 mlx4_err(dev, "Couldn't allocate FW area, aborting.\n");
1440                 return -ENOMEM;
1441         }
1442
1443         err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
1444         if (err) {
1445                 mlx4_err(dev, "MAP_FA command failed, aborting.\n");
1446                 goto err_free;
1447         }
1448
1449         err = mlx4_RUN_FW(dev);
1450         if (err) {
1451                 mlx4_err(dev, "RUN_FW command failed, aborting.\n");
1452                 goto err_unmap_fa;
1453         }
1454
1455         return 0;
1456
1457 err_unmap_fa:
1458         unmap_flag = mlx4_UNMAP_FA(dev);
1459         if (unmap_flag)
1460                 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n");
1461
1462 err_free:
1463         if (!unmap_flag)
1464                 mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1465         return err;
1466 }
1467
1468 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
1469                                 int cmpt_entry_sz)
1470 {
1471         struct mlx4_priv *priv = mlx4_priv(dev);
1472         int err;
1473         int num_eqs;
1474
1475         err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
1476                                   cmpt_base +
1477                                   ((u64) (MLX4_CMPT_TYPE_QP *
1478                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1479                                   cmpt_entry_sz, dev->caps.num_qps,
1480                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1481                                   0, 0);
1482         if (err)
1483                 goto err;
1484
1485         err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
1486                                   cmpt_base +
1487                                   ((u64) (MLX4_CMPT_TYPE_SRQ *
1488                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1489                                   cmpt_entry_sz, dev->caps.num_srqs,
1490                                   dev->caps.reserved_srqs, 0, 0);
1491         if (err)
1492                 goto err_qp;
1493
1494         err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
1495                                   cmpt_base +
1496                                   ((u64) (MLX4_CMPT_TYPE_CQ *
1497                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1498                                   cmpt_entry_sz, dev->caps.num_cqs,
1499                                   dev->caps.reserved_cqs, 0, 0);
1500         if (err)
1501                 goto err_srq;
1502
1503         num_eqs = dev->phys_caps.num_phys_eqs;
1504         err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
1505                                   cmpt_base +
1506                                   ((u64) (MLX4_CMPT_TYPE_EQ *
1507                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1508                                   cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
1509         if (err)
1510                 goto err_cq;
1511
1512         return 0;
1513
1514 err_cq:
1515         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1516
1517 err_srq:
1518         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1519
1520 err_qp:
1521         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1522
1523 err:
1524         return err;
1525 }
1526
1527 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
1528                          struct mlx4_init_hca_param *init_hca, u64 icm_size)
1529 {
1530         struct mlx4_priv *priv = mlx4_priv(dev);
1531         u64 aux_pages;
1532         int num_eqs;
1533         int err, unmap_flag = 0;
1534
1535         err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
1536         if (err) {
1537                 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n");
1538                 return err;
1539         }
1540
1541         mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n",
1542                  (unsigned long long) icm_size >> 10,
1543                  (unsigned long long) aux_pages << 2);
1544
1545         priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
1546                                           GFP_HIGHUSER | __GFP_NOWARN, 0);
1547         if (!priv->fw.aux_icm) {
1548                 mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n");
1549                 return -ENOMEM;
1550         }
1551
1552         err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
1553         if (err) {
1554                 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n");
1555                 goto err_free_aux;
1556         }
1557
1558         err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
1559         if (err) {
1560                 mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n");
1561                 goto err_unmap_aux;
1562         }
1563
1564
1565         num_eqs = dev->phys_caps.num_phys_eqs;
1566         err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1567                                   init_hca->eqc_base, dev_cap->eqc_entry_sz,
1568                                   num_eqs, num_eqs, 0, 0);
1569         if (err) {
1570                 mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
1571                 goto err_unmap_cmpt;
1572         }
1573
1574         /*
1575          * Reserved MTT entries must be aligned up to a cacheline
1576          * boundary, since the FW will write to them, while the driver
1577          * writes to all other MTT entries. (The variable
1578          * dev->caps.mtt_entry_sz below is really the MTT segment
1579          * size, not the raw entry size)
1580          */
1581         dev->caps.reserved_mtts =
1582                 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
1583                       dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
1584
1585         err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
1586                                   init_hca->mtt_base,
1587                                   dev->caps.mtt_entry_sz,
1588                                   dev->caps.num_mtts,
1589                                   dev->caps.reserved_mtts, 1, 0);
1590         if (err) {
1591                 mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
1592                 goto err_unmap_eq;
1593         }
1594
1595         err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
1596                                   init_hca->dmpt_base,
1597                                   dev_cap->dmpt_entry_sz,
1598                                   dev->caps.num_mpts,
1599                                   dev->caps.reserved_mrws, 1, 1);
1600         if (err) {
1601                 mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n");
1602                 goto err_unmap_mtt;
1603         }
1604
1605         err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
1606                                   init_hca->qpc_base,
1607                                   dev_cap->qpc_entry_sz,
1608                                   dev->caps.num_qps,
1609                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1610                                   0, 0);
1611         if (err) {
1612                 mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
1613                 goto err_unmap_dmpt;
1614         }
1615
1616         err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
1617                                   init_hca->auxc_base,
1618                                   dev_cap->aux_entry_sz,
1619                                   dev->caps.num_qps,
1620                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1621                                   0, 0);
1622         if (err) {
1623                 mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
1624                 goto err_unmap_qp;
1625         }
1626
1627         err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
1628                                   init_hca->altc_base,
1629                                   dev_cap->altc_entry_sz,
1630                                   dev->caps.num_qps,
1631                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1632                                   0, 0);
1633         if (err) {
1634                 mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
1635                 goto err_unmap_auxc;
1636         }
1637
1638         err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
1639                                   init_hca->rdmarc_base,
1640                                   dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
1641                                   dev->caps.num_qps,
1642                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1643                                   0, 0);
1644         if (err) {
1645                 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
1646                 goto err_unmap_altc;
1647         }
1648
1649         err = mlx4_init_icm_table(dev, &priv->cq_table.table,
1650                                   init_hca->cqc_base,
1651                                   dev_cap->cqc_entry_sz,
1652                                   dev->caps.num_cqs,
1653                                   dev->caps.reserved_cqs, 0, 0);
1654         if (err) {
1655                 mlx4_err(dev, "Failed to map CQ context memory, aborting.\n");
1656                 goto err_unmap_rdmarc;
1657         }
1658
1659         err = mlx4_init_icm_table(dev, &priv->srq_table.table,
1660                                   init_hca->srqc_base,
1661                                   dev_cap->srq_entry_sz,
1662                                   dev->caps.num_srqs,
1663                                   dev->caps.reserved_srqs, 0, 0);
1664         if (err) {
1665                 mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n");
1666                 goto err_unmap_cq;
1667         }
1668
1669         /*
1670          * For flow steering device managed mode it is required to use
1671          * mlx4_init_icm_table. For B0 steering mode it's not strictly
1672          * required, but for simplicity just map the whole multicast
1673          * group table now.  The table isn't very big and it's a lot
1674          * easier than trying to track ref counts.
1675          */
1676         err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
1677                                   init_hca->mc_base,
1678                                   mlx4_get_mgm_entry_size(dev),
1679                                   dev->caps.num_mgms + dev->caps.num_amgms,
1680                                   dev->caps.num_mgms + dev->caps.num_amgms,
1681                                   0, 0);
1682         if (err) {
1683                 mlx4_err(dev, "Failed to map MCG context memory, aborting.\n");
1684                 goto err_unmap_srq;
1685         }
1686
1687         return 0;
1688
1689 err_unmap_srq:
1690         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1691
1692 err_unmap_cq:
1693         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1694
1695 err_unmap_rdmarc:
1696         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1697
1698 err_unmap_altc:
1699         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1700
1701 err_unmap_auxc:
1702         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1703
1704 err_unmap_qp:
1705         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1706
1707 err_unmap_dmpt:
1708         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1709
1710 err_unmap_mtt:
1711         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1712
1713 err_unmap_eq:
1714         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1715
1716 err_unmap_cmpt:
1717         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1718         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1719         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1720         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1721
1722 err_unmap_aux:
1723         unmap_flag = mlx4_UNMAP_ICM_AUX(dev);
1724         if (unmap_flag)
1725                 pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n");
1726
1727 err_free_aux:
1728         if (!unmap_flag)
1729                 mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1730
1731         return err;
1732 }
1733
1734 static void mlx4_free_icms(struct mlx4_dev *dev)
1735 {
1736         struct mlx4_priv *priv = mlx4_priv(dev);
1737
1738         mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1739         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1740         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1741         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1742         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1743         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1744         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1745         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1746         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1747         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1748         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1749         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1750         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1751         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1752
1753         if (!mlx4_UNMAP_ICM_AUX(dev))
1754                 mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1755         else
1756                 pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n");
1757 }
1758
1759 static void mlx4_slave_exit(struct mlx4_dev *dev)
1760 {
1761         struct mlx4_priv *priv = mlx4_priv(dev);
1762
1763         mutex_lock(&priv->cmd.slave_cmd_mutex);
1764         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
1765                 mlx4_warn(dev, "Failed to close slave function.\n");
1766         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1767 }
1768
1769 static int map_bf_area(struct mlx4_dev *dev)
1770 {
1771         struct mlx4_priv *priv = mlx4_priv(dev);
1772         resource_size_t bf_start;
1773         resource_size_t bf_len;
1774         int err = 0;
1775
1776         if (!dev->caps.bf_reg_size)
1777                 return -ENXIO;
1778
1779         bf_start = pci_resource_start(dev->pdev, 2) +
1780                         (dev->caps.num_uars << PAGE_SHIFT);
1781         bf_len = pci_resource_len(dev->pdev, 2) -
1782                         (dev->caps.num_uars << PAGE_SHIFT);
1783         priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1784         if (!priv->bf_mapping)
1785                 err = -ENOMEM;
1786
1787         return err;
1788 }
1789
1790 static void unmap_bf_area(struct mlx4_dev *dev)
1791 {
1792         if (mlx4_priv(dev)->bf_mapping)
1793                 io_mapping_free(mlx4_priv(dev)->bf_mapping);
1794 }
1795
1796 int mlx4_read_clock(struct mlx4_dev *dev)
1797 {
1798         u32 clockhi, clocklo, clockhi1;
1799         cycle_t cycles;
1800         int i;
1801         struct mlx4_priv *priv = mlx4_priv(dev);
1802
1803         if (!priv->clock_mapping)
1804                 return -ENOTSUPP;
1805
1806         for (i = 0; i < 10; i++) {
1807                 clockhi = swab32(readl(priv->clock_mapping));
1808                 clocklo = swab32(readl(priv->clock_mapping + 4));
1809                 clockhi1 = swab32(readl(priv->clock_mapping));
1810                 if (clockhi == clockhi1)
1811                         break;
1812         }
1813
1814         cycles = (u64) clockhi << 32 | (u64) clocklo;
1815
1816         return cycles;
1817 }
1818 EXPORT_SYMBOL_GPL(mlx4_read_clock);
1819
1820
1821 static int map_internal_clock(struct mlx4_dev *dev)
1822 {
1823         struct mlx4_priv *priv = mlx4_priv(dev);
1824
1825         priv->clock_mapping = ioremap(pci_resource_start(dev->pdev,
1826                                 priv->fw.clock_bar) +
1827                                 priv->fw.clock_offset, MLX4_CLOCK_SIZE);
1828
1829         if (!priv->clock_mapping)
1830                 return -ENOMEM;
1831
1832         return 0;
1833 }
1834
1835
1836 int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
1837                                    struct mlx4_clock_params *params)
1838 {
1839         struct mlx4_priv *priv = mlx4_priv(dev);
1840
1841         if (mlx4_is_slave(dev))
1842                 return -ENOTSUPP;
1843         if (!params)
1844                 return -EINVAL;
1845
1846         params->bar = priv->fw.clock_bar;
1847         params->offset = priv->fw.clock_offset;
1848         params->size = MLX4_CLOCK_SIZE;
1849
1850         return 0;
1851 }
1852 EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params);
1853
1854 static void unmap_internal_clock(struct mlx4_dev *dev)
1855 {
1856         struct mlx4_priv *priv = mlx4_priv(dev);
1857
1858         if (priv->clock_mapping)
1859                 iounmap(priv->clock_mapping);
1860 }
1861
1862 static void mlx4_close_hca(struct mlx4_dev *dev)
1863 {
1864         unmap_internal_clock(dev);
1865         unmap_bf_area(dev);
1866         if (mlx4_is_slave(dev)) {
1867                 mlx4_slave_exit(dev);
1868         } else {
1869                 mlx4_CLOSE_HCA(dev, 0);
1870                 mlx4_free_icms(dev);
1871
1872                 if (!mlx4_UNMAP_FA(dev))
1873                          mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1874                 else
1875                         pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n");
1876         }
1877 }
1878
1879 static int mlx4_init_slave(struct mlx4_dev *dev)
1880 {
1881         struct mlx4_priv *priv = mlx4_priv(dev);
1882         u64 dma = (u64) priv->mfunc.vhcr_dma;
1883         int num_of_reset_retries = NUM_OF_RESET_RETRIES;
1884         int ret_from_reset = 0;
1885         u32 slave_read;
1886         u32 cmd_channel_ver;
1887
1888         mutex_lock(&priv->cmd.slave_cmd_mutex);
1889         priv->cmd.max_cmds = 1;
1890         mlx4_warn(dev, "Sending reset\n");
1891         ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
1892                                        MLX4_COMM_TIME);
1893         /* if we are in the middle of flr the slave will try
1894          * NUM_OF_RESET_RETRIES times before leaving.*/
1895         if (ret_from_reset) {
1896                 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
1897                         msleep(SLEEP_TIME_IN_RESET);
1898                         while (ret_from_reset && num_of_reset_retries) {
1899                                 mlx4_warn(dev, "slave is currently in the"
1900                                           "middle of FLR. retrying..."
1901                                           "(try num:%d)\n",
1902                                           (NUM_OF_RESET_RETRIES -
1903                                            num_of_reset_retries  + 1));
1904                                 ret_from_reset =
1905                                         mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET,
1906                                                       0, MLX4_COMM_TIME);
1907                                 num_of_reset_retries = num_of_reset_retries - 1;
1908                         }
1909                 } else
1910                         goto err;
1911         }
1912
1913         /* check the driver version - the slave I/F revision
1914          * must match the master's */
1915         slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
1916         cmd_channel_ver = mlx4_comm_get_version();
1917
1918         if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
1919                 MLX4_COMM_GET_IF_REV(slave_read)) {
1920                 mlx4_err(dev, "slave driver version is not supported"
1921                          " by the master\n");
1922                 goto err;
1923         }
1924
1925         mlx4_warn(dev, "Sending vhcr0\n");
1926         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
1927                                                     MLX4_COMM_TIME))
1928                 goto err;
1929         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
1930                                                     MLX4_COMM_TIME))
1931                 goto err;
1932         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
1933                                                     MLX4_COMM_TIME))
1934                 goto err;
1935         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
1936                 goto err;
1937
1938         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1939         return 0;
1940
1941 err:
1942         mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
1943         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1944         return -EIO;
1945 }
1946
1947 static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
1948 {
1949         int i;
1950
1951         for (i = 1; i <= dev->caps.num_ports; i++) {
1952                 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
1953                         dev->caps.gid_table_len[i] =
1954                                 mlx4_get_slave_num_gids(dev, 0);
1955                 else
1956                         dev->caps.gid_table_len[i] = 1;
1957                 dev->caps.pkey_table_len[i] =
1958                         dev->phys_caps.pkey_phys_table_len[i] - 1;
1959         }
1960 }
1961
1962 static int choose_log_fs_mgm_entry_size(int qp_per_entry)
1963 {
1964         int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
1965
1966         for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
1967               i++) {
1968                 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
1969                         break;
1970         }
1971
1972         return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
1973 }
1974
1975 static void choose_steering_mode(struct mlx4_dev *dev,
1976                                  struct mlx4_dev_cap *dev_cap)
1977 {
1978         int nvfs;
1979
1980         mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(dev->pdev), 0, &nvfs);
1981         if (high_rate_steer && !mlx4_is_mfunc(dev)) {
1982                 dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER |
1983                                      MLX4_DEV_CAP_FLAG_VEP_UC_STEER);
1984                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN;
1985         }
1986
1987         if (mlx4_log_num_mgm_entry_size == -1 &&
1988             dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
1989             (!mlx4_is_mfunc(dev) ||
1990              (dev_cap->fs_max_num_qp_per_entry >= (nvfs + 1))) &&
1991             choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
1992                 MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
1993                 dev->oper_log_mgm_entry_size =
1994                         choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
1995                 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
1996                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
1997         } else {
1998                 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
1999                     dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2000                         dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
2001                 else {
2002                         dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
2003
2004                         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
2005                             dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2006                                 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags "
2007                                           "set to use B0 steering. Falling back to A0 steering mode.\n");
2008                 }
2009                 dev->oper_log_mgm_entry_size =
2010                         mlx4_log_num_mgm_entry_size > 0 ?
2011                         mlx4_log_num_mgm_entry_size :
2012                         MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
2013                 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
2014         }
2015         mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, "
2016                  "log_num_mgm_entry_size = %d\n",
2017                  mlx4_steering_mode_str(dev->caps.steering_mode),
2018                  dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size);
2019 }
2020
2021 static int mlx4_init_hca(struct mlx4_dev *dev)
2022 {
2023         struct mlx4_priv          *priv = mlx4_priv(dev);
2024         struct mlx4_dev_cap        *dev_cap = NULL;
2025         struct mlx4_adapter        adapter;
2026         struct mlx4_mod_stat_cfg   mlx4_cfg;
2027         struct mlx4_profile        profile;
2028         struct mlx4_init_hca_param init_hca;
2029         u64 icm_size;
2030         int err;
2031
2032         if (!mlx4_is_slave(dev)) {
2033                 err = mlx4_QUERY_FW(dev);
2034                 if (err) {
2035                         if (err == -EACCES)
2036                                 mlx4_info(dev, "non-primary physical function, skipping.\n");
2037                         else
2038                                 mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
2039                         return err;
2040                 }
2041
2042                 err = mlx4_load_fw(dev);
2043                 if (err) {
2044                         mlx4_err(dev, "Failed to start FW, aborting.\n");
2045                         return err;
2046                 }
2047
2048                 mlx4_cfg.log_pg_sz_m = 1;
2049                 mlx4_cfg.log_pg_sz = 0;
2050                 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
2051                 if (err)
2052                         mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
2053
2054                 dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL);
2055                 if (!dev_cap) {
2056                         mlx4_err(dev, "Failed to allocate memory for dev_cap\n");
2057                         err = -ENOMEM;
2058                         goto err_stop_fw;
2059                 }
2060
2061                 err = mlx4_dev_cap(dev, dev_cap);
2062                 if (err) {
2063                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
2064                         goto err_stop_fw;
2065                 }
2066
2067                 choose_steering_mode(dev, dev_cap);
2068
2069                 if (mlx4_is_master(dev))
2070                         mlx4_parav_master_pf_caps(dev);
2071
2072                 process_mod_param_profile(&profile);
2073                 if (dev->caps.steering_mode ==
2074                     MLX4_STEERING_MODE_DEVICE_MANAGED)
2075                         profile.num_mcg = MLX4_FS_NUM_MCG;
2076
2077                 icm_size = mlx4_make_profile(dev, &profile, dev_cap,
2078                                              &init_hca);
2079                 if ((long long) icm_size < 0) {
2080                         err = icm_size;
2081                         goto err_stop_fw;
2082                 }
2083
2084                 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
2085
2086                 init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
2087                 init_hca.uar_page_sz = PAGE_SHIFT - 12;
2088
2089                 err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size);
2090                 if (err)
2091                         goto err_stop_fw;
2092
2093                 init_hca.mw_enable = 1;
2094
2095                 err = mlx4_INIT_HCA(dev, &init_hca);
2096                 if (err) {
2097                         mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
2098                         goto err_free_icm;
2099                 }
2100
2101                 if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
2102                         err = mlx4_query_func(dev, dev_cap);
2103                         if (err < 0) {
2104                                 mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
2105                                 goto err_stop_fw;
2106                         } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
2107                                 dev->caps.num_eqs = dev_cap->max_eqs;
2108                                 dev->caps.reserved_eqs = dev_cap->reserved_eqs;
2109                                 dev->caps.reserved_uars = dev_cap->reserved_uars;
2110                         }
2111                 }
2112
2113                 /*
2114                  * Read HCA frequency by QUERY_HCA command
2115                  */
2116                 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
2117                         memset(&init_hca, 0, sizeof(init_hca));
2118                         err = mlx4_QUERY_HCA(dev, &init_hca);
2119                         if (err) {
2120                                 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n");
2121                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2122                         } else {
2123                                 dev->caps.hca_core_clock =
2124                                         init_hca.hca_core_clock;
2125                         }
2126
2127                         /* In case we got HCA frequency 0 - disable timestamping
2128                          * to avoid dividing by zero
2129                          */
2130                         if (!dev->caps.hca_core_clock) {
2131                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2132                                 mlx4_err(dev, "HCA frequency is 0. Timestamping is not supported.");
2133                         } else if (map_internal_clock(dev)) {
2134                                 /* Map internal clock,
2135                                  * in case of failure disable timestamping
2136                                  */
2137                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2138                                 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n");
2139                         }
2140                 }
2141         } else {
2142                 err = mlx4_init_slave(dev);
2143                 if (err) {
2144                         mlx4_err(dev, "Failed to initialize slave\n");
2145                         return err;
2146                 }
2147
2148                 err = mlx4_slave_cap(dev);
2149                 if (err) {
2150                         mlx4_err(dev, "Failed to obtain slave caps\n");
2151                         goto err_close;
2152                 }
2153         }
2154
2155         if (map_bf_area(dev))
2156                 mlx4_dbg(dev, "Failed to map blue flame area\n");
2157
2158         /* Only the master set the ports, all the rest got it from it.*/
2159         if (!mlx4_is_slave(dev))
2160                 mlx4_set_port_mask(dev);
2161
2162         err = mlx4_QUERY_ADAPTER(dev, &adapter);
2163         if (err) {
2164                 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
2165                 goto unmap_bf;
2166         }
2167
2168         priv->eq_table.inta_pin = adapter.inta_pin;
2169         memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
2170         memcpy(dev->vsd, adapter.vsd, sizeof(dev->vsd));
2171         dev->vsd_vendor_id = adapter.vsd_vendor_id;
2172
2173         if (!mlx4_is_slave(dev))
2174                 kfree(dev_cap);
2175
2176         return 0;
2177
2178 unmap_bf:
2179         if (!mlx4_is_slave(dev))
2180                 unmap_internal_clock(dev);
2181         unmap_bf_area(dev);
2182
2183         if (mlx4_is_slave(dev)) {
2184                 kfree(dev->caps.qp0_tunnel);
2185                 kfree(dev->caps.qp0_proxy);
2186                 kfree(dev->caps.qp1_tunnel);
2187                 kfree(dev->caps.qp1_proxy);
2188         }
2189
2190 err_close:
2191         if (mlx4_is_slave(dev))
2192                 mlx4_slave_exit(dev);
2193         else
2194                 mlx4_CLOSE_HCA(dev, 0);
2195
2196 err_free_icm:
2197         if (!mlx4_is_slave(dev))
2198                 mlx4_free_icms(dev);
2199
2200 err_stop_fw:
2201         if (!mlx4_is_slave(dev)) {
2202                 if (!mlx4_UNMAP_FA(dev))
2203                         mlx4_free_icm(dev, priv->fw.fw_icm, 0);
2204                 else
2205                         pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n");
2206                 kfree(dev_cap);
2207         }
2208         return err;
2209 }
2210
2211 static int mlx4_init_counters_table(struct mlx4_dev *dev)
2212 {
2213         struct mlx4_priv *priv = mlx4_priv(dev);
2214         int nent_pow2, port_indx, vf_index, num_counters;
2215         int res, index = 0;
2216         struct counter_index *new_counter_index;
2217
2218
2219         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2220                 return -ENOENT;
2221
2222         if (!mlx4_is_slave(dev) &&
2223             dev->caps.max_counters == dev->caps.max_extended_counters) {
2224                 res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0,
2225                                MLX4_CMD_SET_IF_STAT,
2226                                MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
2227                 if (res) {
2228                         mlx4_err(dev, "Failed to set extended counters (err=%d)\n", res);
2229                         return res;
2230                 }
2231         }
2232
2233         mutex_init(&priv->counters_table.mutex);
2234
2235         if (mlx4_is_slave(dev)) {
2236                 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) {
2237                         INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]);
2238                         if (dev->caps.def_counter_index[port_indx] != 0xFF) {
2239                                 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
2240                                 if (!new_counter_index)
2241                                         return -ENOMEM;
2242                                 new_counter_index->index = dev->caps.def_counter_index[port_indx];
2243                                 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]);
2244                         }
2245                 }
2246                 mlx4_dbg(dev, "%s: slave allocated %d counters for %d ports\n",
2247                          __func__, dev->caps.num_ports, dev->caps.num_ports);
2248                 return 0;
2249         }
2250
2251         nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
2252
2253         for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) {
2254                 INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]);
2255                 /* allocating 2 counters per port for PFs */
2256                 /* For the PF, the ETH default counters are 0,2; */
2257                 /* and the RoCE default counters are 1,3 */
2258                 for (num_counters = 0; num_counters < 2; num_counters++, index++) {
2259                         new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
2260                         if (!new_counter_index)
2261                                 return -ENOMEM;
2262                         new_counter_index->index = index;
2263                         list_add_tail(&new_counter_index->list,
2264                                       &priv->counters_table.global_port_list[port_indx]);
2265                 }
2266         }
2267
2268         if (mlx4_is_master(dev)) {
2269                 for (vf_index = 0; vf_index < dev->num_vfs; vf_index++) {
2270                         for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) {
2271                                 INIT_LIST_HEAD(&priv->counters_table.vf_list[vf_index][port_indx]);
2272                                 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
2273                                 if (!new_counter_index)
2274                                         return -ENOMEM;
2275                                 if (index <  nent_pow2 - 2) {
2276                                         new_counter_index->index = index;
2277                                         index++;
2278                                 } else {
2279                                         new_counter_index->index = MLX4_SINK_COUNTER_INDEX;
2280                                 }
2281
2282                                 list_add_tail(&new_counter_index->list,
2283                                               &priv->counters_table.vf_list[vf_index][port_indx]);
2284                         }
2285                 }
2286
2287                 res = mlx4_bitmap_init(&priv->counters_table.bitmap,
2288                                        nent_pow2, nent_pow2 - 1,
2289                                        index, 1);
2290                 mlx4_dbg(dev, "%s: master allocated %d counters for %d VFs\n",
2291                          __func__, index, dev->num_vfs);
2292         } else {
2293                 res = mlx4_bitmap_init(&priv->counters_table.bitmap,
2294                                 nent_pow2, nent_pow2 - 1,
2295                                 index, 1);
2296                 mlx4_dbg(dev, "%s: native allocated %d counters for %d ports\n",
2297                          __func__, index, dev->caps.num_ports);
2298         }
2299
2300         return 0;
2301
2302 }
2303
2304 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
2305 {
2306         struct mlx4_priv *priv = mlx4_priv(dev);
2307         int i, j;
2308         struct counter_index *port, *tmp_port;
2309         struct counter_index *vf, *tmp_vf;
2310
2311         mutex_lock(&priv->counters_table.mutex);
2312
2313         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) {
2314                 for (i = 0; i < dev->caps.num_ports; i++) {
2315                         list_for_each_entry_safe(port, tmp_port,
2316                                                  &priv->counters_table.global_port_list[i],
2317                                                  list) {
2318                                 list_del(&port->list);
2319                                 kfree(port);
2320                         }
2321                 }
2322                 if (!mlx4_is_slave(dev)) {
2323                         for (i = 0; i < dev->num_vfs; i++) {
2324                                 for (j = 0; j < dev->caps.num_ports; j++) {
2325                                         list_for_each_entry_safe(vf, tmp_vf,
2326                                                                  &priv->counters_table.vf_list[i][j],
2327                                                                  list) {
2328                                                 /* clear the counter statistic */
2329                                                 if (__mlx4_clear_if_stat(dev, vf->index))
2330                                                         mlx4_dbg(dev, "%s: reset counter %d failed\n",
2331                                                                  __func__, vf->index);
2332                                                 list_del(&vf->list);
2333                                                 kfree(vf);
2334                                         }
2335                                 }
2336                         }
2337                         mlx4_bitmap_cleanup(&priv->counters_table.bitmap);
2338                 }
2339         }
2340         mutex_unlock(&priv->counters_table.mutex);
2341 }
2342
2343 int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave)
2344 {
2345         struct mlx4_priv *priv = mlx4_priv(dev);
2346         int i, first;
2347         struct counter_index *vf, *tmp_vf;
2348
2349         /* clean VF's counters for the next useg */
2350         if (slave > 0 && slave <= dev->num_vfs) {
2351                 mlx4_dbg(dev, "%s: free counters of slave(%d)\n"
2352                          , __func__, slave);
2353
2354                 mutex_lock(&priv->counters_table.mutex);
2355                 for (i = 0; i < dev->caps.num_ports; i++) {
2356                         first = 0;
2357                         list_for_each_entry_safe(vf, tmp_vf,
2358                                                  &priv->counters_table.vf_list[slave - 1][i],
2359                                                  list) {
2360                                 /* clear the counter statistic */
2361                                 if (__mlx4_clear_if_stat(dev, vf->index))
2362                                         mlx4_dbg(dev, "%s: reset counter %d failed\n",
2363                                                  __func__, vf->index);
2364                                 if (first++ && vf->index != MLX4_SINK_COUNTER_INDEX) {
2365                                         mlx4_dbg(dev, "%s: delete counter index %d for slave %d and port %d\n"
2366                                                  , __func__, vf->index, slave, i + 1);
2367                                         mlx4_bitmap_free(&priv->counters_table.bitmap, vf->index, MLX4_USE_RR);
2368                                         list_del(&vf->list);
2369                                         kfree(vf);
2370                                 } else {
2371                                         mlx4_dbg(dev, "%s: can't delete default counter index %d for slave %d and port %d\n"
2372                                                  , __func__, vf->index, slave, i + 1);
2373                                 }
2374                         }
2375                 }
2376                 mutex_unlock(&priv->counters_table.mutex);
2377         }
2378
2379         return 0;
2380 }
2381
2382 int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx)
2383 {
2384         struct mlx4_priv *priv = mlx4_priv(dev);
2385         struct counter_index *new_counter_index;
2386
2387         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2388                 return -ENOENT;
2389
2390         if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) ||
2391             (port < 0) || (port > MLX4_MAX_PORTS)) {
2392                 mlx4_dbg(dev, "%s: invalid slave(%d) or port(%d) index\n",
2393                          __func__, slave, port);
2394                 return -EINVAL;
2395         }
2396
2397         /* handle old guest request does not support request by port index */
2398         if (port == 0) {
2399                 *idx = MLX4_SINK_COUNTER_INDEX;
2400                 mlx4_dbg(dev, "%s: allocated default counter index %d for slave %d port %d\n"
2401                          , __func__, *idx, slave, port);
2402                 return 0;
2403         }
2404
2405         mutex_lock(&priv->counters_table.mutex);
2406
2407         *idx = mlx4_bitmap_alloc(&priv->counters_table.bitmap);
2408         /* if no resources return the default counter of the slave and port */
2409         if (*idx == -1) {
2410                 if (slave == 0) { /* its the ethernet counter ?????? */
2411                         new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next,
2412                                                        struct counter_index,
2413                                                        list);
2414                 } else {
2415                         new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next,
2416                                                        struct counter_index,
2417                                                        list);
2418                 }
2419
2420                 *idx = new_counter_index->index;
2421                 mlx4_dbg(dev, "%s: allocated defualt counter index %d for slave %d port %d\n"
2422                          , __func__, *idx, slave, port);
2423                 goto out;
2424         }
2425
2426         if (slave == 0) { /* native or master */
2427                 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
2428                 if (!new_counter_index)
2429                         goto no_mem;
2430                 new_counter_index->index = *idx;
2431                 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]);
2432         } else {
2433                 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
2434                 if (!new_counter_index)
2435                         goto no_mem;
2436                 new_counter_index->index = *idx;
2437                 list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[slave - 1][port - 1]);
2438         }
2439
2440         mlx4_dbg(dev, "%s: allocated counter index %d for slave %d port %d\n"
2441                  , __func__, *idx, slave, port);
2442 out:
2443         mutex_unlock(&priv->counters_table.mutex);
2444         return 0;
2445
2446 no_mem:
2447         mlx4_bitmap_free(&priv->counters_table.bitmap, *idx, MLX4_USE_RR);
2448         mutex_unlock(&priv->counters_table.mutex);
2449         *idx = MLX4_SINK_COUNTER_INDEX;
2450         mlx4_dbg(dev, "%s: failed err (%d)\n"
2451                  , __func__, -ENOMEM);
2452         return -ENOMEM;
2453 }
2454
2455 int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx)
2456 {
2457         u64 out_param;
2458         int err;
2459         struct mlx4_priv *priv = mlx4_priv(dev);
2460         struct counter_index *new_counter_index, *c_index;
2461
2462         if (mlx4_is_mfunc(dev)) {
2463                 err = mlx4_cmd_imm(dev, 0, &out_param,
2464                                    ((u32) port) << 8 | (u32) RES_COUNTER,
2465                                    RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
2466                                    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
2467                 if (!err) {
2468                         *idx = get_param_l(&out_param);
2469                         if (*idx == MLX4_SINK_COUNTER_INDEX)
2470                                 return -ENOSPC;
2471
2472                         mutex_lock(&priv->counters_table.mutex);
2473                         c_index = list_entry(priv->counters_table.global_port_list[port - 1].next,
2474                                              struct counter_index,
2475                                              list);
2476                         mutex_unlock(&priv->counters_table.mutex);
2477                         if (c_index->index == *idx)
2478                                 return -EEXIST;
2479
2480                         if (mlx4_is_slave(dev)) {
2481                                 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL);
2482                                 if (!new_counter_index) {
2483                                         mlx4_counter_free(dev, port, *idx);
2484                                         return -ENOMEM;
2485                                 }
2486                                 new_counter_index->index = *idx;
2487                                 mutex_lock(&priv->counters_table.mutex);
2488                                 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]);
2489                                 mutex_unlock(&priv->counters_table.mutex);
2490                                 mlx4_dbg(dev, "%s: allocated counter index %d for port %d\n"
2491                                          , __func__, *idx, port);
2492                         }
2493                 }
2494                 return err;
2495         }
2496         return __mlx4_counter_alloc(dev, 0, port, idx);
2497 }
2498 EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
2499
2500 void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx)
2501 {
2502         /* check if native or slave and deletes accordingly */
2503         struct mlx4_priv *priv = mlx4_priv(dev);
2504         struct counter_index *pf, *tmp_pf;
2505         struct counter_index *vf, *tmp_vf;
2506         int first;
2507
2508
2509         if (idx == MLX4_SINK_COUNTER_INDEX) {
2510                 mlx4_dbg(dev, "%s: try to delete default counter index %d for port %d\n"
2511                          , __func__, idx, port);
2512                         return;
2513         }
2514
2515         if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) ||
2516             (port < 0) || (port > MLX4_MAX_PORTS)) {
2517                 mlx4_warn(dev, "%s: deletion failed due to invalid slave(%d) or port(%d) index\n"
2518                          , __func__, slave, idx);
2519                         return;
2520         }
2521
2522         mutex_lock(&priv->counters_table.mutex);
2523         if (slave == 0) {
2524                 first = 0;
2525                 list_for_each_entry_safe(pf, tmp_pf,
2526                                          &priv->counters_table.global_port_list[port - 1],
2527                                          list) {
2528                         /* the first 2 counters are reserved */
2529                         if (pf->index == idx) {
2530                                 /* clear the counter statistic */
2531                                 if (__mlx4_clear_if_stat(dev, pf->index))
2532                                         mlx4_dbg(dev, "%s: reset counter %d failed\n",
2533                                                  __func__, pf->index);
2534                                 if (1 < first && idx != MLX4_SINK_COUNTER_INDEX) {
2535                                         list_del(&pf->list);
2536                                         kfree(pf);
2537                                         mlx4_dbg(dev, "%s: delete counter index %d for native device (%d) port %d\n"
2538                                                  , __func__, idx, slave, port);
2539                                         mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR);
2540                                         goto out;
2541                                 } else {
2542                                         mlx4_dbg(dev, "%s: can't delete default counter index %d for native device (%d) port %d\n"
2543                                                  , __func__, idx, slave, port);
2544                                         goto out;
2545                                 }
2546                         }
2547                         first++;
2548                 }
2549                 mlx4_dbg(dev, "%s: can't delete counter index %d for native device (%d) port %d\n"
2550                          , __func__, idx, slave, port);
2551         } else {
2552                 first = 0;
2553                 list_for_each_entry_safe(vf, tmp_vf,
2554                                          &priv->counters_table.vf_list[slave - 1][port - 1],
2555                                          list) {
2556                         /* the first element is reserved */
2557                         if (vf->index == idx) {
2558                                 /* clear the counter statistic */
2559                                 if (__mlx4_clear_if_stat(dev, vf->index))
2560                                         mlx4_dbg(dev, "%s: reset counter %d failed\n",
2561                                                  __func__, vf->index);
2562                                 if (first) {
2563                                         list_del(&vf->list);
2564                                         kfree(vf);
2565                                         mlx4_dbg(dev, "%s: delete counter index %d for slave %d port %d\n",
2566                                                  __func__, idx, slave, port);
2567                                         mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR);
2568                                         goto out;
2569                                 } else {
2570                                         mlx4_dbg(dev, "%s: can't delete default slave (%d) counter index %d for port %d\n"
2571                                                  , __func__, slave, idx, port);
2572                                         goto out;
2573                                 }
2574                         }
2575                         first++;
2576                 }
2577                 mlx4_dbg(dev, "%s: can't delete slave (%d) counter index %d for port %d\n"
2578                          , __func__, slave, idx, port);
2579         }
2580
2581 out:
2582         mutex_unlock(&priv->counters_table.mutex);
2583 }
2584
2585 void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx)
2586 {
2587         u64 in_param = 0;
2588         struct mlx4_priv *priv = mlx4_priv(dev);
2589         struct counter_index *counter, *tmp_counter;
2590         int first = 0;
2591
2592         if (mlx4_is_mfunc(dev)) {
2593                 set_param_l(&in_param, idx);
2594                 mlx4_cmd(dev, in_param,
2595                          ((u32) port) << 8 | (u32) RES_COUNTER,
2596                          RES_OP_RESERVE,
2597                          MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
2598                          MLX4_CMD_WRAPPED);
2599
2600                 if (mlx4_is_slave(dev) && idx != MLX4_SINK_COUNTER_INDEX) {
2601                         mutex_lock(&priv->counters_table.mutex);
2602                         list_for_each_entry_safe(counter, tmp_counter,
2603                                                  &priv->counters_table.global_port_list[port - 1],
2604                                                  list) {
2605                                 if (counter->index == idx && first++) {
2606                                         list_del(&counter->list);
2607                                         kfree(counter);
2608                                         mlx4_dbg(dev, "%s: delete counter index %d for port %d\n"
2609                                                  , __func__, idx, port);
2610                                         mutex_unlock(&priv->counters_table.mutex);
2611                                         return;
2612                                 }
2613                         }
2614                         mutex_unlock(&priv->counters_table.mutex);
2615                 }
2616
2617                 return;
2618         }
2619         __mlx4_counter_free(dev, 0, port, idx);
2620 }
2621 EXPORT_SYMBOL_GPL(mlx4_counter_free);
2622
2623 int __mlx4_clear_if_stat(struct mlx4_dev *dev,
2624                          u8 counter_index)
2625 {
2626         struct mlx4_cmd_mailbox *if_stat_mailbox = NULL;
2627         int err = 0;
2628         u32 if_stat_in_mod = (counter_index & 0xff) | (1 << 31);
2629
2630         if (counter_index == MLX4_SINK_COUNTER_INDEX)
2631                 return -EINVAL;
2632
2633         if (mlx4_is_slave(dev))
2634                 return 0;
2635
2636         if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
2637         if (IS_ERR(if_stat_mailbox)) {
2638                 err = PTR_ERR(if_stat_mailbox);
2639                 return err;
2640         }
2641
2642         err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0,
2643                            MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
2644                            MLX4_CMD_NATIVE);
2645
2646         mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
2647         return err;
2648 }
2649
2650 u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port)
2651 {
2652         struct mlx4_priv *priv = mlx4_priv(dev);
2653         struct counter_index *new_counter_index;
2654
2655         if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) {
2656                 mlx4_dbg(dev, "%s: return counter index %d for slave %d port (MLX4_PORT_TYPE_IB) %d\n",
2657                          __func__, MLX4_SINK_COUNTER_INDEX, slave, port);
2658                 return (u8)MLX4_SINK_COUNTER_INDEX;
2659         }
2660
2661         mutex_lock(&priv->counters_table.mutex);
2662         if (slave == 0) {
2663                 new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next,
2664                                                struct counter_index,
2665                                                list);
2666         } else {
2667                 new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next,
2668                                                struct counter_index,
2669                                                list);
2670         }
2671         mutex_unlock(&priv->counters_table.mutex);
2672
2673         mlx4_dbg(dev, "%s: return counter index %d for slave %d port %d\n",
2674                  __func__, new_counter_index->index, slave, port);
2675
2676
2677         return (u8)new_counter_index->index;
2678 }
2679
2680 int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port,
2681                          struct mlx4_en_vport_stats *vport_stats,
2682                          int reset)
2683 {
2684         struct mlx4_priv *priv = mlx4_priv(dev);
2685         struct mlx4_cmd_mailbox *if_stat_mailbox = NULL;
2686         union  mlx4_counter *counter;
2687         int err = 0;
2688         u32 if_stat_in_mod;
2689         struct counter_index *vport, *tmp_vport;
2690
2691         if (!vport_stats)
2692                 return -EINVAL;
2693
2694         if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
2695         if (IS_ERR(if_stat_mailbox)) {
2696                 err = PTR_ERR(if_stat_mailbox);
2697                 return err;
2698         }
2699
2700         mutex_lock(&priv->counters_table.mutex);
2701         list_for_each_entry_safe(vport, tmp_vport,
2702                                  &priv->counters_table.global_port_list[port - 1],
2703                                  list) {
2704                 if (vport->index == MLX4_SINK_COUNTER_INDEX)
2705                         continue;
2706
2707                 memset(if_stat_mailbox->buf, 0, sizeof(union  mlx4_counter));
2708                 if_stat_in_mod = (vport->index & 0xff) | ((reset & 1) << 31);
2709                 err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma,
2710                                    if_stat_in_mod, 0,
2711                                    MLX4_CMD_QUERY_IF_STAT,
2712                                    MLX4_CMD_TIME_CLASS_C,
2713                                    MLX4_CMD_NATIVE);
2714                 if (err) {
2715                         mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n",
2716                                  __func__, vport->index);
2717                         goto if_stat_out;
2718                 }
2719                 counter = (union mlx4_counter *)if_stat_mailbox->buf;
2720                 if ((counter->control.cnt_mode & 0xf) == 1) {
2721                         vport_stats->rx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastFrames);
2722                         vport_stats->rx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxUnicastFrames);
2723                         vport_stats->rx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxMulticastFrames);
2724                         vport_stats->tx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastFrames);
2725                         vport_stats->tx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxUnicastFrames);
2726                         vport_stats->tx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxMulticastFrames);
2727                         vport_stats->rx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastOctets);
2728                         vport_stats->rx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxUnicastOctets);
2729                         vport_stats->rx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxMulticastOctets);
2730                         vport_stats->tx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastOctets);
2731                         vport_stats->tx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxUnicastOctets);
2732                         vport_stats->tx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxMulticastOctets);
2733                         vport_stats->rx_errors += be64_to_cpu(counter->ext.counters[0].IfRxErrorFrames);
2734                         vport_stats->rx_dropped += be64_to_cpu(counter->ext.counters[0].IfRxNoBufferFrames);
2735                         vport_stats->tx_errors += be64_to_cpu(counter->ext.counters[0].IfTxDroppedFrames);
2736                 }
2737         }
2738
2739 if_stat_out:
2740         mutex_unlock(&priv->counters_table.mutex);
2741         mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
2742
2743         return err;
2744 }
2745 EXPORT_SYMBOL_GPL(mlx4_get_vport_ethtool_stats);
2746
2747 static int mlx4_setup_hca(struct mlx4_dev *dev)
2748 {
2749         struct mlx4_priv *priv = mlx4_priv(dev);
2750         int err;
2751         int port;
2752         __be32 ib_port_default_caps;
2753
2754         err = mlx4_init_uar_table(dev);
2755         if (err) {
2756                 mlx4_err(dev, "Failed to initialize "
2757                          "user access region table (err=%d), aborting.\n",
2758                          err);
2759                 return err;
2760         }
2761
2762         err = mlx4_uar_alloc(dev, &priv->driver_uar);
2763         if (err) {
2764                 mlx4_err(dev, "Failed to allocate driver access region "
2765                          "(err=%d), aborting.\n", err);
2766                 goto err_uar_table_free;
2767         }
2768
2769         priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
2770         if (!priv->kar) {
2771                 mlx4_err(dev, "Couldn't map kernel access region, "
2772                          "aborting.\n");
2773                 err = -ENOMEM;
2774                 goto err_uar_free;
2775         }
2776
2777         err = mlx4_init_pd_table(dev);
2778         if (err) {
2779                 mlx4_err(dev, "Failed to initialize "
2780                          "protection domain table (err=%d), aborting.\n", err);
2781                 goto err_kar_unmap;
2782         }
2783
2784         err = mlx4_init_xrcd_table(dev);
2785         if (err) {
2786                 mlx4_err(dev, "Failed to initialize "
2787                          "reliable connection domain table (err=%d), "
2788                          "aborting.\n", err);
2789                 goto err_pd_table_free;
2790         }
2791
2792         err = mlx4_init_mr_table(dev);
2793         if (err) {
2794                 mlx4_err(dev, "Failed to initialize "
2795                          "memory region table (err=%d), aborting.\n", err);
2796                 goto err_xrcd_table_free;
2797         }
2798
2799         if (!mlx4_is_slave(dev)) {
2800                 err = mlx4_init_mcg_table(dev);
2801                 if (err) {
2802                         mlx4_err(dev, "Failed to initialize "
2803                                  "multicast group table (err=%d), aborting.\n",
2804                                  err);
2805                         goto err_mr_table_free;
2806                 }
2807         }
2808
2809         err = mlx4_init_eq_table(dev);
2810         if (err) {
2811                 mlx4_err(dev, "Failed to initialize "
2812                          "event queue table (err=%d), aborting.\n", err);
2813                 goto err_mcg_table_free;
2814         }
2815
2816         err = mlx4_cmd_use_events(dev);
2817         if (err) {
2818                 mlx4_err(dev, "Failed to switch to event-driven "
2819                          "firmware commands (err=%d), aborting.\n", err);
2820                 goto err_eq_table_free;
2821         }
2822
2823         err = mlx4_NOP(dev);
2824         if (err) {
2825                 if (dev->flags & MLX4_FLAG_MSI_X) {
2826                         mlx4_warn(dev, "NOP command failed to generate MSI-X "
2827                                   "interrupt IRQ %d).\n",
2828                                   priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
2829                         mlx4_warn(dev, "Trying again without MSI-X.\n");
2830                 } else {
2831                         mlx4_err(dev, "NOP command failed to generate interrupt "
2832                                  "(IRQ %d), aborting.\n",
2833                                  priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
2834                         mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
2835                 }
2836
2837                 goto err_cmd_poll;
2838         }
2839
2840         mlx4_dbg(dev, "NOP command IRQ test passed\n");
2841
2842         err = mlx4_init_cq_table(dev);
2843         if (err) {
2844                 mlx4_err(dev, "Failed to initialize "
2845                          "completion queue table (err=%d), aborting.\n", err);
2846                 goto err_cmd_poll;
2847         }
2848
2849         err = mlx4_init_srq_table(dev);
2850         if (err) {
2851                 mlx4_err(dev, "Failed to initialize "
2852                          "shared receive queue table (err=%d), aborting.\n",
2853                          err);
2854                 goto err_cq_table_free;
2855         }
2856
2857         err = mlx4_init_qp_table(dev);
2858         if (err) {
2859                 mlx4_err(dev, "Failed to initialize "
2860                          "queue pair table (err=%d), aborting.\n", err);
2861                 goto err_srq_table_free;
2862         }
2863
2864         err = mlx4_init_counters_table(dev);
2865         if (err && err != -ENOENT) {
2866                 mlx4_err(dev, "Failed to initialize counters table (err=%d), "
2867                          "aborting.\n", err);
2868                 goto err_qp_table_free;
2869         }
2870
2871         if (!mlx4_is_slave(dev)) {
2872                 for (port = 1; port <= dev->caps.num_ports; port++) {
2873                         ib_port_default_caps = 0;
2874                         err = mlx4_get_port_ib_caps(dev, port,
2875                                                     &ib_port_default_caps);
2876                         if (err)
2877                                 mlx4_warn(dev, "failed to get port %d default "
2878                                           "ib capabilities (%d). Continuing "
2879                                           "with caps = 0\n", port, err);
2880                         dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
2881
2882                         /* initialize per-slave default ib port capabilities */
2883                         if (mlx4_is_master(dev)) {
2884                                 int i;
2885                                 for (i = 0; i < dev->num_slaves; i++) {
2886                                         if (i == mlx4_master_func_num(dev))
2887                                                 continue;
2888                                         priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
2889                                                         ib_port_default_caps;
2890                                 }
2891                         }
2892
2893                         dev->caps.port_ib_mtu[port] = IB_MTU_4096;
2894
2895                         err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
2896                                             dev->caps.pkey_table_len[port] : -1);
2897                         if (err) {
2898                                 mlx4_err(dev, "Failed to set port %d (err=%d), "
2899                                          "aborting\n", port, err);
2900                                 goto err_counters_table_free;
2901                         }
2902                 }
2903         }
2904
2905         return 0;
2906
2907 err_counters_table_free:
2908         mlx4_cleanup_counters_table(dev);
2909
2910 err_qp_table_free:
2911         mlx4_cleanup_qp_table(dev);
2912
2913 err_srq_table_free:
2914         mlx4_cleanup_srq_table(dev);
2915
2916 err_cq_table_free:
2917         mlx4_cleanup_cq_table(dev);
2918
2919 err_cmd_poll:
2920         mlx4_cmd_use_polling(dev);
2921
2922 err_eq_table_free:
2923         mlx4_cleanup_eq_table(dev);
2924
2925 err_mcg_table_free:
2926         if (!mlx4_is_slave(dev))
2927                 mlx4_cleanup_mcg_table(dev);
2928
2929 err_mr_table_free:
2930         mlx4_cleanup_mr_table(dev);
2931
2932 err_xrcd_table_free:
2933         mlx4_cleanup_xrcd_table(dev);
2934
2935 err_pd_table_free:
2936         mlx4_cleanup_pd_table(dev);
2937
2938 err_kar_unmap:
2939         iounmap(priv->kar);
2940
2941 err_uar_free:
2942         mlx4_uar_free(dev, &priv->driver_uar);
2943
2944 err_uar_table_free:
2945         mlx4_cleanup_uar_table(dev);
2946         return err;
2947 }
2948
2949 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
2950 {
2951         struct mlx4_priv *priv = mlx4_priv(dev);
2952         struct msix_entry *entries;
2953         int err;
2954         int i;
2955
2956         if (msi_x) {
2957                 int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ;
2958
2959                 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
2960                              nreq);
2961
2962                 if (msi_x > 1 && !mlx4_is_mfunc(dev))
2963                         nreq = min_t(int, nreq, msi_x);
2964
2965                 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
2966                 if (!entries)
2967                         goto no_msi;
2968
2969                 for (i = 0; i < nreq; ++i)
2970                         entries[i].entry = i;
2971
2972         retry:
2973                 err = pci_enable_msix(dev->pdev, entries, nreq);
2974                 if (err) {
2975                         /* Try again if at least 2 vectors are available */
2976                         if (err > 1) {
2977                                 mlx4_info(dev, "Requested %d vectors, "
2978                                           "but only %d MSI-X vectors available, "
2979                                           "trying again\n", nreq, err);
2980                                 nreq = err;
2981                                 goto retry;
2982                         }
2983                         kfree(entries);
2984                         /* if error, or can't alloc even 1 IRQ */
2985                         if (err < 0) {
2986                                 mlx4_err(dev, "No IRQs left, device can't "
2987                                     "be started.\n");
2988                                 goto no_irq;
2989                         }
2990                         goto no_msi;
2991                 }
2992
2993                 if (nreq <
2994                     MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) {
2995                         /*Working in legacy mode , all EQ's shared*/
2996                         dev->caps.comp_pool           = 0;
2997                         dev->caps.num_comp_vectors = nreq - 1;
2998                 } else {
2999                         dev->caps.comp_pool           = nreq - MSIX_LEGACY_SZ;
3000                         dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
3001                 }
3002                 for (i = 0; i < nreq; ++i)
3003                         priv->eq_table.eq[i].irq = entries[i].vector;
3004
3005                 dev->flags |= MLX4_FLAG_MSI_X;
3006
3007                 kfree(entries);
3008                 return;
3009         }
3010
3011 no_msi:
3012         dev->caps.num_comp_vectors = 1;
3013         dev->caps.comp_pool        = 0;
3014
3015         for (i = 0; i < 2; ++i)
3016                 priv->eq_table.eq[i].irq = dev->pdev->irq;
3017         return;
3018 no_irq:
3019         dev->caps.num_comp_vectors = 0;
3020         dev->caps.comp_pool        = 0;
3021         return;
3022 }
3023
3024 static void
3025 mlx4_init_hca_info(struct mlx4_dev *dev)
3026 {
3027         struct mlx4_hca_info *info = &mlx4_priv(dev)->hca_info;
3028
3029         info->dev = dev;
3030
3031         info->firmware_attr = (struct device_attribute)__ATTR(fw_ver, S_IRUGO,
3032                                                         show_firmware_version, NULL);
3033         if (device_create_file(&dev->pdev->dev, &info->firmware_attr))
3034                 mlx4_err(dev, "Failed to add file firmware version");
3035
3036         info->hca_attr = (struct device_attribute)__ATTR(hca, S_IRUGO, show_hca,
3037                                                                                 NULL);
3038         if (device_create_file(&dev->pdev->dev, &info->hca_attr))
3039                 mlx4_err(dev, "Failed to add file hca type");
3040
3041         info->board_attr = (struct device_attribute)__ATTR(board_id, S_IRUGO,
3042                                                             show_board, NULL);
3043         if (device_create_file(&dev->pdev->dev, &info->board_attr))
3044                 mlx4_err(dev, "Failed to add file board id type");
3045 }
3046
3047 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
3048 {
3049         struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
3050         int err = 0;
3051
3052         info->dev = dev;
3053         info->port = port;
3054         if (!mlx4_is_slave(dev)) {
3055                 mlx4_init_mac_table(dev, &info->mac_table);
3056                 mlx4_init_vlan_table(dev, &info->vlan_table);
3057                 info->base_qpn = mlx4_get_base_qpn(dev, port);
3058         }
3059
3060         sprintf(info->dev_name, "mlx4_port%d", port);
3061         info->port_attr.attr.name = info->dev_name;
3062         if (mlx4_is_mfunc(dev))
3063                 info->port_attr.attr.mode = S_IRUGO;
3064         else {
3065                 info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
3066                 info->port_attr.store     = set_port_type;
3067         }
3068         info->port_attr.show      = show_port_type;
3069         sysfs_attr_init(&info->port_attr.attr);
3070
3071         err = device_create_file(&dev->pdev->dev, &info->port_attr);
3072         if (err) {
3073                 mlx4_err(dev, "Failed to create file for port %d\n", port);
3074                 info->port = -1;
3075         }
3076
3077         sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
3078         info->port_mtu_attr.attr.name = info->dev_mtu_name;
3079         if (mlx4_is_mfunc(dev))
3080                 info->port_mtu_attr.attr.mode = S_IRUGO;
3081         else {
3082                 info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
3083                 info->port_mtu_attr.store     = set_port_ib_mtu;
3084         }
3085         info->port_mtu_attr.show      = show_port_ib_mtu;
3086         sysfs_attr_init(&info->port_mtu_attr.attr);
3087
3088         err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr);
3089         if (err) {
3090                 mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
3091                 device_remove_file(&info->dev->pdev->dev, &info->port_attr);
3092                 info->port = -1;
3093         }
3094
3095         return err;
3096 }
3097
3098 static void
3099 mlx4_cleanup_hca_info(struct mlx4_hca_info *info)
3100 {
3101         device_remove_file(&info->dev->pdev->dev, &info->firmware_attr);
3102         device_remove_file(&info->dev->pdev->dev, &info->board_attr);
3103         device_remove_file(&info->dev->pdev->dev, &info->hca_attr);
3104 }
3105
3106 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
3107 {
3108         if (info->port < 0)
3109                 return;
3110
3111         device_remove_file(&info->dev->pdev->dev, &info->port_attr);
3112         device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr);
3113 }
3114
3115 static int mlx4_init_steering(struct mlx4_dev *dev)
3116 {
3117         struct mlx4_priv *priv = mlx4_priv(dev);
3118         int num_entries = dev->caps.num_ports;
3119         int i, j;
3120
3121         priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
3122         if (!priv->steer)
3123                 return -ENOMEM;
3124
3125         for (i = 0; i < num_entries; i++)
3126                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
3127                         INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
3128                         INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
3129                 }
3130         return 0;
3131 }
3132
3133 static void mlx4_clear_steering(struct mlx4_dev *dev)
3134 {
3135         struct mlx4_priv *priv = mlx4_priv(dev);
3136         struct mlx4_steer_index *entry, *tmp_entry;
3137         struct mlx4_promisc_qp *pqp, *tmp_pqp;
3138         int num_entries = dev->caps.num_ports;
3139         int i, j;
3140
3141         for (i = 0; i < num_entries; i++) {
3142                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
3143                         list_for_each_entry_safe(pqp, tmp_pqp,
3144                                                  &priv->steer[i].promisc_qps[j],
3145                                                  list) {
3146                                 list_del(&pqp->list);
3147                                 kfree(pqp);
3148                         }
3149                         list_for_each_entry_safe(entry, tmp_entry,
3150                                                  &priv->steer[i].steer_entries[j],
3151                                                  list) {
3152                                 list_del(&entry->list);
3153                                 list_for_each_entry_safe(pqp, tmp_pqp,
3154                                                          &entry->duplicates,
3155                                                          list) {
3156                                         list_del(&pqp->list);
3157                                         kfree(pqp);
3158                                 }
3159                                 kfree(entry);
3160                         }
3161                 }
3162         }
3163         kfree(priv->steer);
3164 }
3165
3166 static int extended_func_num(struct pci_dev *pdev)
3167 {
3168         return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
3169 }
3170
3171 #define MLX4_OWNER_BASE 0x8069c
3172 #define MLX4_OWNER_SIZE 4
3173
3174 static int mlx4_get_ownership(struct mlx4_dev *dev)
3175 {
3176         void __iomem *owner;
3177         u32 ret;
3178
3179         if (pci_channel_offline(dev->pdev))
3180                 return -EIO;
3181
3182         owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
3183                         MLX4_OWNER_SIZE);
3184         if (!owner) {
3185                 mlx4_err(dev, "Failed to obtain ownership bit\n");
3186                 return -ENOMEM;
3187         }
3188
3189         ret = readl(owner);
3190         iounmap(owner);
3191         return (int) !!ret;
3192 }
3193
3194 static void mlx4_free_ownership(struct mlx4_dev *dev)
3195 {
3196         void __iomem *owner;
3197
3198         if (pci_channel_offline(dev->pdev))
3199                 return;
3200
3201         owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
3202                         MLX4_OWNER_SIZE);
3203         if (!owner) {
3204                 mlx4_err(dev, "Failed to obtain ownership bit\n");
3205                 return;
3206         }
3207         writel(0, owner);
3208         msleep(1000);
3209         iounmap(owner);
3210 }
3211
3212 static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
3213 {
3214         struct mlx4_priv *priv;
3215         struct mlx4_dev *dev;
3216         int err;
3217         int port;
3218         int nvfs, prb_vf;
3219
3220         pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
3221
3222         err = pci_enable_device(pdev);
3223         if (err) {
3224                 dev_err(&pdev->dev, "Cannot enable PCI device, "
3225                         "aborting.\n");
3226                 return err;
3227         }
3228
3229         mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(pdev), 0, &nvfs);
3230         mlx4_get_val(probe_vf.dbdf2val.tbl, pci_physfn(pdev), 0, &prb_vf);
3231         if (nvfs > MLX4_MAX_NUM_VF) {
3232                 dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n",
3233                         nvfs, MLX4_MAX_NUM_VF);
3234                 return -EINVAL;
3235         }
3236
3237         if (nvfs < 0) {
3238                 dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
3239                 return -EINVAL;
3240         }
3241         /*
3242          * Check for BARs.
3243          */
3244         if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
3245             !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
3246                 dev_err(&pdev->dev, "Missing DCS, aborting."
3247                         "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n",
3248                         pci_dev_data, pci_resource_flags(pdev, 0));
3249                 err = -ENODEV;
3250                 goto err_disable_pdev;
3251         }
3252         if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
3253                 dev_err(&pdev->dev, "Missing UAR, aborting.\n");
3254                 err = -ENODEV;
3255                 goto err_disable_pdev;
3256         }
3257
3258         err = pci_request_regions(pdev, DRV_NAME);
3259         if (err) {
3260                 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
3261                 goto err_disable_pdev;
3262         }
3263
3264         pci_set_master(pdev);
3265
3266         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
3267         if (err) {
3268                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
3269                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3270                 if (err) {
3271                         dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
3272                         goto err_release_regions;
3273                 }
3274         }
3275         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3276         if (err) {
3277                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
3278                          "consistent PCI DMA mask.\n");
3279                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3280                 if (err) {
3281                         dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
3282                                 "aborting.\n");
3283                         goto err_release_regions;
3284                 }
3285         }
3286
3287         /* Allow large DMA segments, up to the firmware limit of 1 GB */
3288         dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
3289
3290         priv = kzalloc(sizeof *priv, GFP_KERNEL);
3291         if (!priv) {
3292                 dev_err(&pdev->dev, "Device struct alloc failed, "
3293                         "aborting.\n");
3294                 err = -ENOMEM;
3295                 goto err_release_regions;
3296         }
3297
3298         dev       = &priv->dev;
3299         dev->pdev = pdev;
3300         INIT_LIST_HEAD(&priv->dev_list);
3301         INIT_LIST_HEAD(&priv->ctx_list);
3302         spin_lock_init(&priv->ctx_lock);
3303
3304         mutex_init(&priv->port_mutex);
3305
3306         INIT_LIST_HEAD(&priv->pgdir_list);
3307         mutex_init(&priv->pgdir_mutex);
3308
3309         INIT_LIST_HEAD(&priv->bf_list);
3310         mutex_init(&priv->bf_mutex);
3311
3312         dev->rev_id = pdev->revision;
3313         dev->numa_node = dev_to_node(&pdev->dev);
3314         /* Detect if this device is a virtual function */
3315         if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3316                 /* When acting as pf, we normally skip vfs unless explicitly
3317                  * requested to probe them. */
3318                 if (nvfs && extended_func_num(pdev) > prb_vf) {
3319                         mlx4_warn(dev, "Skipping virtual function:%d\n",
3320                                                 extended_func_num(pdev));
3321                         err = -ENODEV;
3322                         goto err_free_dev;
3323                 }
3324                 mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
3325                 dev->flags |= MLX4_FLAG_SLAVE;
3326         } else {
3327                 /* We reset the device and enable SRIOV only for physical
3328                  * devices.  Try to claim ownership on the device;
3329                  * if already taken, skip -- do not allow multiple PFs */
3330                 err = mlx4_get_ownership(dev);
3331                 if (err) {
3332                         if (err < 0)
3333                                 goto err_free_dev;
3334                         else {
3335                                 mlx4_warn(dev, "Multiple PFs not yet supported."
3336                                           " Skipping PF.\n");
3337                                 err = -EINVAL;
3338                                 goto err_free_dev;
3339                         }
3340                 }
3341
3342                 if (nvfs) {
3343                         mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", nvfs);
3344                         err = pci_enable_sriov(pdev, nvfs);
3345                         if (err) {
3346                                 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n",
3347                                          err);
3348                                 err = 0;
3349                         } else {
3350                                 mlx4_warn(dev, "Running in master mode\n");
3351                                 dev->flags |= MLX4_FLAG_SRIOV |
3352                                               MLX4_FLAG_MASTER;
3353                                 dev->num_vfs = nvfs;
3354                         }
3355                 }
3356
3357                 atomic_set(&priv->opreq_count, 0);
3358                 INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
3359
3360                 /*
3361                  * Now reset the HCA before we touch the PCI capabilities or
3362                  * attempt a firmware command, since a boot ROM may have left
3363                  * the HCA in an undefined state.
3364                  */
3365                 err = mlx4_reset(dev);
3366                 if (err) {
3367                         mlx4_err(dev, "Failed to reset HCA, aborting.\n");
3368                         goto err_sriov;
3369                 }
3370         }
3371
3372 slave_start:
3373         err = mlx4_cmd_init(dev);
3374         if (err) {
3375                 mlx4_err(dev, "Failed to init command interface, aborting.\n");
3376                 goto err_sriov;
3377         }
3378
3379         /* In slave functions, the communication channel must be initialized
3380          * before posting commands. Also, init num_slaves before calling
3381          * mlx4_init_hca */
3382         if (mlx4_is_mfunc(dev)) {
3383                 if (mlx4_is_master(dev))
3384                         dev->num_slaves = MLX4_MAX_NUM_SLAVES;
3385                 else {
3386                         dev->num_slaves = 0;
3387                         err = mlx4_multi_func_init(dev);
3388                         if (err) {
3389                                 mlx4_err(dev, "Failed to init slave mfunc"
3390                                          " interface, aborting.\n");
3391                                 goto err_cmd;
3392                         }
3393                 }
3394         }
3395
3396         err = mlx4_init_hca(dev);
3397         if (err) {
3398                 if (err == -EACCES) {
3399                         /* Not primary Physical function
3400                          * Running in slave mode */
3401                         mlx4_cmd_cleanup(dev);
3402                         dev->flags |= MLX4_FLAG_SLAVE;
3403                         dev->flags &= ~MLX4_FLAG_MASTER;
3404                         goto slave_start;
3405                 } else
3406                         goto err_mfunc;
3407         }
3408
3409         /* In master functions, the communication channel must be initialized
3410          * after obtaining its address from fw */
3411         if (mlx4_is_master(dev)) {
3412                 err = mlx4_multi_func_init(dev);
3413                 if (err) {
3414                         mlx4_err(dev, "Failed to init master mfunc"
3415                                  "interface, aborting.\n");
3416                         goto err_close;
3417                 }
3418         }
3419
3420         err = mlx4_alloc_eq_table(dev);
3421         if (err)
3422                 goto err_master_mfunc;
3423
3424         priv->msix_ctl.pool_bm = 0;
3425         mutex_init(&priv->msix_ctl.pool_lock);
3426
3427         mlx4_enable_msi_x(dev);
3428
3429         /* no MSIX and no shared IRQ */
3430         if (!dev->caps.num_comp_vectors && !dev->caps.comp_pool) {
3431                 err = -ENOSPC;
3432                 goto err_free_eq;
3433         }
3434
3435         if ((mlx4_is_mfunc(dev)) &&
3436             !(dev->flags & MLX4_FLAG_MSI_X)) {
3437                 err = -ENOSYS;
3438                 mlx4_err(dev, "INTx is not supported in multi-function mode."
3439                          " aborting.\n");
3440                 goto err_free_eq;
3441         }
3442
3443         if (!mlx4_is_slave(dev)) {
3444                 err = mlx4_init_steering(dev);
3445                 if (err)
3446                         goto err_free_eq;
3447         }
3448
3449         mlx4_init_quotas(dev);
3450
3451         err = mlx4_setup_hca(dev);
3452         if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
3453             !mlx4_is_mfunc(dev)) {
3454                 dev->flags &= ~MLX4_FLAG_MSI_X;
3455                 dev->caps.num_comp_vectors = 1;
3456                 dev->caps.comp_pool        = 0;
3457                 pci_disable_msix(pdev);
3458                 err = mlx4_setup_hca(dev);
3459         }
3460
3461         if (err)
3462                 goto err_steer;
3463
3464         mlx4_init_hca_info(dev);
3465
3466         for (port = 1; port <= dev->caps.num_ports; port++) {
3467                 err = mlx4_init_port_info(dev, port);
3468                 if (err)
3469                         goto err_port;
3470         }
3471
3472         err = mlx4_register_device(dev);
3473         if (err)
3474                 goto err_port;
3475
3476         mlx4_request_modules(dev);
3477
3478         mlx4_sense_init(dev);
3479         mlx4_start_sense(dev);
3480
3481         priv->pci_dev_data = pci_dev_data;
3482         pci_set_drvdata(pdev, dev);
3483
3484         return 0;
3485
3486 err_port:
3487         for (--port; port >= 1; --port)
3488                 mlx4_cleanup_port_info(&priv->port[port]);
3489
3490         mlx4_cleanup_counters_table(dev);
3491         mlx4_cleanup_qp_table(dev);
3492         mlx4_cleanup_srq_table(dev);
3493         mlx4_cleanup_cq_table(dev);
3494         mlx4_cmd_use_polling(dev);
3495         mlx4_cleanup_eq_table(dev);
3496         mlx4_cleanup_mcg_table(dev);
3497         mlx4_cleanup_mr_table(dev);
3498         mlx4_cleanup_xrcd_table(dev);
3499         mlx4_cleanup_pd_table(dev);
3500         mlx4_cleanup_uar_table(dev);
3501
3502 err_steer:
3503         if (!mlx4_is_slave(dev))
3504                 mlx4_clear_steering(dev);
3505
3506 err_free_eq:
3507         mlx4_free_eq_table(dev);
3508
3509 err_master_mfunc:
3510         if (mlx4_is_master(dev)) {
3511                 mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY);
3512                 mlx4_multi_func_cleanup(dev);
3513         }
3514
3515         if (mlx4_is_slave(dev)) {
3516                 kfree(dev->caps.qp0_tunnel);
3517                 kfree(dev->caps.qp0_proxy);
3518                 kfree(dev->caps.qp1_tunnel);
3519                 kfree(dev->caps.qp1_proxy);
3520         }
3521
3522 err_close:
3523         if (dev->flags & MLX4_FLAG_MSI_X)
3524                 pci_disable_msix(pdev);
3525
3526         mlx4_close_hca(dev);
3527
3528 err_mfunc:
3529         if (mlx4_is_slave(dev))
3530                 mlx4_multi_func_cleanup(dev);
3531
3532 err_cmd:
3533         mlx4_cmd_cleanup(dev);
3534
3535 err_sriov:
3536         if (dev->flags & MLX4_FLAG_SRIOV)
3537                 pci_disable_sriov(pdev);
3538
3539         if (!mlx4_is_slave(dev))
3540                 mlx4_free_ownership(dev);
3541
3542 err_free_dev:
3543         kfree(priv);
3544
3545 err_release_regions:
3546         pci_release_regions(pdev);
3547
3548 err_disable_pdev:
3549         pci_disable_device(pdev);
3550         pci_set_drvdata(pdev, NULL);
3551         return err;
3552 }
3553
3554 static int __devinit mlx4_init_one(struct pci_dev *pdev,
3555                                    const struct pci_device_id *id)
3556 {
3557         device_set_desc(pdev->dev.bsddev, mlx4_version);
3558         return __mlx4_init_one(pdev, id->driver_data);
3559 }
3560
3561 static void mlx4_remove_one(struct pci_dev *pdev)
3562 {
3563         struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
3564         struct mlx4_priv *priv = mlx4_priv(dev);
3565         int p;
3566
3567         if (dev) {
3568                 /* in SRIOV it is not allowed to unload the pf's
3569                  * driver while there are alive vf's */
3570                 if (mlx4_is_master(dev)) {
3571                         if (mlx4_how_many_lives_vf(dev))
3572                                 mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n");
3573                 }
3574                 mlx4_stop_sense(dev);
3575                 mlx4_unregister_device(dev);
3576
3577                 mlx4_cleanup_hca_info(&priv->hca_info);
3578                 for (p = 1; p <= dev->caps.num_ports; p++) {
3579                         mlx4_cleanup_port_info(&priv->port[p]);
3580                         mlx4_CLOSE_PORT(dev, p);
3581                 }
3582
3583                 if (mlx4_is_master(dev))
3584                         mlx4_free_resource_tracker(dev,
3585                                                    RES_TR_FREE_SLAVES_ONLY);
3586
3587                 mlx4_cleanup_counters_table(dev);
3588                 mlx4_cleanup_qp_table(dev);
3589                 mlx4_cleanup_srq_table(dev);
3590                 mlx4_cleanup_cq_table(dev);
3591                 mlx4_cmd_use_polling(dev);
3592                 mlx4_cleanup_eq_table(dev);
3593                 mlx4_cleanup_mcg_table(dev);
3594                 mlx4_cleanup_mr_table(dev);
3595                 mlx4_cleanup_xrcd_table(dev);
3596                 mlx4_cleanup_pd_table(dev);
3597
3598                 if (mlx4_is_master(dev))
3599                         mlx4_free_resource_tracker(dev,
3600                                                    RES_TR_FREE_STRUCTS_ONLY);
3601
3602                 iounmap(priv->kar);
3603                 mlx4_uar_free(dev, &priv->driver_uar);
3604                 mlx4_cleanup_uar_table(dev);
3605                 if (!mlx4_is_slave(dev))
3606                         mlx4_clear_steering(dev);
3607                 mlx4_free_eq_table(dev);
3608                 if (mlx4_is_master(dev))
3609                         mlx4_multi_func_cleanup(dev);
3610                 mlx4_close_hca(dev);
3611                 if (mlx4_is_slave(dev))
3612                         mlx4_multi_func_cleanup(dev);
3613                 mlx4_cmd_cleanup(dev);
3614
3615                 if (dev->flags & MLX4_FLAG_MSI_X)
3616                         pci_disable_msix(pdev);
3617                 if (dev->flags & MLX4_FLAG_SRIOV) {
3618                         mlx4_warn(dev, "Disabling SR-IOV\n");
3619                         pci_disable_sriov(pdev);
3620                 }
3621
3622                 if (!mlx4_is_slave(dev))
3623                         mlx4_free_ownership(dev);
3624
3625                 kfree(dev->caps.qp0_tunnel);
3626                 kfree(dev->caps.qp0_proxy);
3627                 kfree(dev->caps.qp1_tunnel);
3628                 kfree(dev->caps.qp1_proxy);
3629
3630                 kfree(priv);
3631                 pci_release_regions(pdev);
3632                 pci_disable_device(pdev);
3633                 pci_set_drvdata(pdev, NULL);
3634         }
3635 }
3636
3637 static int restore_current_port_types(struct mlx4_dev *dev,
3638                                       enum mlx4_port_type *types,
3639                                       enum mlx4_port_type *poss_types)
3640 {
3641         struct mlx4_priv *priv = mlx4_priv(dev);
3642         int err, i;
3643
3644         mlx4_stop_sense(dev);
3645         mutex_lock(&priv->port_mutex);
3646         for (i = 0; i < dev->caps.num_ports; i++)
3647                 dev->caps.possible_type[i + 1] = poss_types[i];
3648         err = mlx4_change_port_types(dev, types);
3649         mlx4_start_sense(dev);
3650         mutex_unlock(&priv->port_mutex);
3651         return err;
3652 }
3653
3654 int mlx4_restart_one(struct pci_dev *pdev)
3655 {
3656         struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
3657         struct mlx4_priv *priv = mlx4_priv(dev);
3658         enum mlx4_port_type curr_type[MLX4_MAX_PORTS];
3659         enum mlx4_port_type poss_type[MLX4_MAX_PORTS];
3660         int pci_dev_data, err, i;
3661
3662         pci_dev_data = priv->pci_dev_data;
3663         for (i = 0; i < dev->caps.num_ports; i++) {
3664                 curr_type[i] = dev->caps.port_type[i + 1];
3665                 poss_type[i] = dev->caps.possible_type[i + 1];
3666         }
3667
3668         mlx4_remove_one(pdev);
3669         err = __mlx4_init_one(pdev, pci_dev_data);
3670         if (err)
3671                 return err;
3672
3673         dev = pci_get_drvdata(pdev);
3674         err = restore_current_port_types(dev, curr_type, poss_type);
3675         if (err)
3676                 mlx4_err(dev, "mlx4_restart_one: could not restore original port types (%d)\n",
3677                          err);
3678         return 0;
3679 }
3680
3681 static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
3682         /* MT25408 "Hermon" SDR */
3683         { PCI_VDEVICE(MELLANOX, 0x6340),
3684                 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT },
3685         /* MT25408 "Hermon" DDR */
3686         { PCI_VDEVICE(MELLANOX, 0x634a),
3687                 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT },
3688         /* MT25408 "Hermon" QDR */
3689         { PCI_VDEVICE(MELLANOX, 0x6354),
3690                 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT },
3691         /* MT25408 "Hermon" DDR PCIe gen2 */
3692         { PCI_VDEVICE(MELLANOX, 0x6732),
3693                 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT },
3694         /* MT25408 "Hermon" QDR PCIe gen2 */
3695         { PCI_VDEVICE(MELLANOX, 0x673c),
3696                 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT },
3697         /* MT25408 "Hermon" EN 10GigE */
3698         { PCI_VDEVICE(MELLANOX, 0x6368),
3699                 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT },
3700         /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
3701         { PCI_VDEVICE(MELLANOX, 0x6750),
3702                 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT },
3703         /* MT25458 ConnectX EN 10GBASE-T 10GigE */
3704         { PCI_VDEVICE(MELLANOX, 0x6372),
3705                 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT },
3706         /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
3707         { PCI_VDEVICE(MELLANOX, 0x675a),
3708                 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT },
3709         /* MT26468 ConnectX EN 10GigE PCIe gen2*/
3710         { PCI_VDEVICE(MELLANOX, 0x6764),
3711                 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT },
3712         /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
3713         { PCI_VDEVICE(MELLANOX, 0x6746),
3714                 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT },
3715         /* MT26478 ConnectX2 40GigE PCIe gen2 */
3716         { PCI_VDEVICE(MELLANOX, 0x676e),
3717                 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT },
3718         /* MT25400 Family [ConnectX-2 Virtual Function] */
3719         { PCI_VDEVICE(MELLANOX, 0x1002),
3720                 .driver_data = MLX4_PCI_DEV_IS_VF },
3721         /* MT27500 Family [ConnectX-3] */
3722         { PCI_VDEVICE(MELLANOX, 0x1003) },
3723         /* MT27500 Family [ConnectX-3 Virtual Function] */
3724         { PCI_VDEVICE(MELLANOX, 0x1004),
3725                 .driver_data = MLX4_PCI_DEV_IS_VF },
3726         { PCI_VDEVICE(MELLANOX, 0x1005) }, /* MT27510 Family */
3727         { PCI_VDEVICE(MELLANOX, 0x1006) }, /* MT27511 Family */
3728         { PCI_VDEVICE(MELLANOX, 0x1007) }, /* MT27520 Family */
3729         { PCI_VDEVICE(MELLANOX, 0x1008) }, /* MT27521 Family */
3730         { PCI_VDEVICE(MELLANOX, 0x1009) }, /* MT27530 Family */
3731         { PCI_VDEVICE(MELLANOX, 0x100a) }, /* MT27531 Family */
3732         { PCI_VDEVICE(MELLANOX, 0x100b) }, /* MT27540 Family */
3733         { PCI_VDEVICE(MELLANOX, 0x100c) }, /* MT27541 Family */
3734         { PCI_VDEVICE(MELLANOX, 0x100d) }, /* MT27550 Family */
3735         { PCI_VDEVICE(MELLANOX, 0x100e) }, /* MT27551 Family */
3736         { PCI_VDEVICE(MELLANOX, 0x100f) }, /* MT27560 Family */
3737         { PCI_VDEVICE(MELLANOX, 0x1010) }, /* MT27561 Family */
3738         { 0, }
3739 };
3740
3741 MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
3742
3743 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
3744                                               pci_channel_state_t state)
3745 {
3746         mlx4_remove_one(pdev);
3747
3748         return state == pci_channel_io_perm_failure ?
3749                 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
3750 }
3751
3752 static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
3753 {
3754         int ret = __mlx4_init_one(pdev, 0);
3755
3756         return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
3757 }
3758
3759 static const struct pci_error_handlers mlx4_err_handler = {
3760         .error_detected = mlx4_pci_err_detected,
3761         .slot_reset     = mlx4_pci_slot_reset,
3762 };
3763
3764 static int suspend(struct pci_dev *pdev, pm_message_t state)
3765 {
3766         mlx4_remove_one(pdev);
3767
3768         return 0;
3769 }
3770
3771 static int resume(struct pci_dev *pdev)
3772 {
3773         return __mlx4_init_one(pdev, 0);
3774 }
3775
3776 static struct pci_driver mlx4_driver = {
3777         .name           = DRV_NAME,
3778         .id_table       = mlx4_pci_table,
3779         .probe          = mlx4_init_one,
3780         .remove         = __devexit_p(mlx4_remove_one),
3781         .suspend        = suspend,
3782         .resume         = resume,
3783         .err_handler    = &mlx4_err_handler,
3784 };
3785
3786 static int __init mlx4_verify_params(void)
3787 {
3788         int status;
3789
3790         status = update_defaults(&port_type_array);
3791         if (status == INVALID_STR) {
3792                 if (mlx4_fill_dbdf2val_tbl(&port_type_array.dbdf2val))
3793                         return -1;
3794         } else if (status == INVALID_DATA) {
3795                 return -1;
3796         }
3797
3798         status = update_defaults(&num_vfs);
3799         if (status == INVALID_STR) {
3800                 if (mlx4_fill_dbdf2val_tbl(&num_vfs.dbdf2val))
3801                         return -1;
3802         } else if (status == INVALID_DATA) {
3803                 return -1;
3804         }
3805
3806         status = update_defaults(&probe_vf);
3807         if (status == INVALID_STR) {
3808                 if (mlx4_fill_dbdf2val_tbl(&probe_vf.dbdf2val))
3809                         return -1;
3810         } else if (status == INVALID_DATA) {
3811                 return -1;
3812         }
3813
3814         if (msi_x < 0) {
3815                 pr_warn("mlx4_core: bad msi_x: %d\n", msi_x);
3816                 return -1;
3817         }
3818
3819         if ((log_num_mac < 0) || (log_num_mac > 7)) {
3820                 pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac);
3821                 return -1;
3822         }
3823
3824         if (log_num_vlan != 0)
3825                 pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
3826                            MLX4_LOG_NUM_VLANS);
3827
3828         if (mlx4_set_4k_mtu != -1)
3829                 pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n");
3830
3831         if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) {
3832                 pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
3833                 return -1;
3834         }
3835
3836         if (mlx4_log_num_mgm_entry_size != -1 &&
3837             (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
3838              mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) {
3839                 pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not "
3840                            "in legal range (-1 or %d..%d)\n",
3841                            mlx4_log_num_mgm_entry_size,
3842                            MLX4_MIN_MGM_LOG_ENTRY_SIZE,
3843                            MLX4_MAX_MGM_LOG_ENTRY_SIZE);
3844                 return -1;
3845         }
3846
3847         if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) {
3848                 pr_warning("mlx4_core: bad log_num_qp: %d\n",
3849                            mod_param_profile.num_qp);
3850                 return -1;
3851         }
3852
3853         if (mod_param_profile.num_srq < 10) {
3854                 pr_warning("mlx4_core: too low log_num_srq: %d\n",
3855                            mod_param_profile.num_srq);
3856                 return -1;
3857         }
3858
3859         if (mod_param_profile.num_cq < 10) {
3860                 pr_warning("mlx4_core: too low log_num_cq: %d\n",
3861                            mod_param_profile.num_cq);
3862                 return -1;
3863         }
3864
3865         if (mod_param_profile.num_mpt < 10) {
3866                 pr_warning("mlx4_core: too low log_num_mpt: %d\n",
3867                            mod_param_profile.num_mpt);
3868                 return -1;
3869         }
3870
3871         if (mod_param_profile.num_mtt_segs &&
3872             mod_param_profile.num_mtt_segs < 15) {
3873                 pr_warning("mlx4_core: too low log_num_mtt: %d\n",
3874                            mod_param_profile.num_mtt_segs);
3875                 return -1;
3876         }
3877
3878         if (mod_param_profile.num_mtt_segs > MLX4_MAX_LOG_NUM_MTT) {
3879                 pr_warning("mlx4_core: too high log_num_mtt: %d\n",
3880                            mod_param_profile.num_mtt_segs);
3881                 return -1;
3882         }
3883         return 0;
3884 }
3885
3886 static int __init mlx4_init(void)
3887 {
3888         int ret;
3889
3890         if (mlx4_verify_params())
3891                 return -EINVAL;
3892
3893         mlx4_catas_init();
3894
3895         mlx4_wq = create_singlethread_workqueue("mlx4");
3896         if (!mlx4_wq)
3897                 return -ENOMEM;
3898
3899         if (enable_sys_tune)
3900                 sys_tune_init();
3901
3902         ret = pci_register_driver(&mlx4_driver);
3903         if (ret < 0)
3904                 goto err;
3905
3906         return 0;
3907
3908 err:
3909         if (enable_sys_tune)
3910                 sys_tune_fini();
3911
3912         destroy_workqueue(mlx4_wq);
3913
3914         return ret;
3915 }
3916
3917 static void __exit mlx4_cleanup(void)
3918 {
3919         if (enable_sys_tune)
3920                 sys_tune_fini();
3921
3922         pci_unregister_driver(&mlx4_driver);
3923         destroy_workqueue(mlx4_wq);
3924 }
3925
3926 module_init_order(mlx4_init, SI_ORDER_MIDDLE);
3927 module_exit(mlx4_cleanup);
3928
3929 static int
3930 mlx4_evhand(module_t mod, int event, void *arg)
3931 {
3932         return (0);
3933 }
3934
3935 static moduledata_t mlx4_mod = {
3936         .name = "mlx4",
3937         .evhand = mlx4_evhand,
3938 };
3939 MODULE_VERSION(mlx4, 1);
3940 DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY);
3941 MODULE_DEPEND(mlx4, linuxkpi, 1, 1, 1);
3942