2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
4 * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #include <linux/types.h>
37 #include <linux/gfp.h>
38 #include <linux/module.h>
40 #include <linux/mlx4/cmd.h>
41 #include <linux/mlx4/qp.h>
47 * QP to support BF should have bits 6,7 cleared
49 #define MLX4_BF_QP_SKIP_MASK 0xc0
50 #define MLX4_MAX_BF_QP_RANGE 0x40
52 void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
54 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
57 spin_lock(&qp_table->lock);
59 qp = __mlx4_qp_lookup(dev, qpn);
61 atomic_inc(&qp->refcount);
63 spin_unlock(&qp_table->lock);
66 mlx4_dbg(dev, "Async event for none existent QP %08x\n", qpn);
70 qp->event(qp, event_type);
72 if (atomic_dec_and_test(&qp->refcount))
76 /* used for INIT/CLOSE port logic */
77 static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0)
79 /* this procedure is called after we already know we are on the master */
80 /* qp0 is either the proxy qp0, or the real qp0 */
81 u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev);
82 *proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1;
84 *real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn &&
85 qp->qpn <= dev->phys_caps.base_sqpn + 1;
87 return *real_qp0 || *proxy_qp0;
90 static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
91 enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
92 struct mlx4_qp_context *context,
93 enum mlx4_qp_optpar optpar,
94 int sqd_event, struct mlx4_qp *qp, int native)
96 static const u16 op[MLX4_QP_NUM_STATE][MLX4_QP_NUM_STATE] = {
97 [MLX4_QP_STATE_RST] = {
98 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
99 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
100 [MLX4_QP_STATE_INIT] = MLX4_CMD_RST2INIT_QP,
102 [MLX4_QP_STATE_INIT] = {
103 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
104 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
105 [MLX4_QP_STATE_INIT] = MLX4_CMD_INIT2INIT_QP,
106 [MLX4_QP_STATE_RTR] = MLX4_CMD_INIT2RTR_QP,
108 [MLX4_QP_STATE_RTR] = {
109 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
110 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
111 [MLX4_QP_STATE_RTS] = MLX4_CMD_RTR2RTS_QP,
113 [MLX4_QP_STATE_RTS] = {
114 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
115 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
116 [MLX4_QP_STATE_RTS] = MLX4_CMD_RTS2RTS_QP,
117 [MLX4_QP_STATE_SQD] = MLX4_CMD_RTS2SQD_QP,
119 [MLX4_QP_STATE_SQD] = {
120 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
121 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
122 [MLX4_QP_STATE_RTS] = MLX4_CMD_SQD2RTS_QP,
123 [MLX4_QP_STATE_SQD] = MLX4_CMD_SQD2SQD_QP,
125 [MLX4_QP_STATE_SQER] = {
126 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
127 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
128 [MLX4_QP_STATE_RTS] = MLX4_CMD_SQERR2RTS_QP,
130 [MLX4_QP_STATE_ERR] = {
131 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
132 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
136 struct mlx4_priv *priv = mlx4_priv(dev);
137 struct mlx4_cmd_mailbox *mailbox;
143 if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE ||
144 !op[cur_state][new_state])
147 if (op[cur_state][new_state] == MLX4_CMD_2RST_QP) {
148 ret = mlx4_cmd(dev, 0, qp->qpn, 2,
149 MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native);
150 if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR &&
151 cur_state != MLX4_QP_STATE_RST &&
152 is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
153 port = (qp->qpn & 1) + 1;
155 priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
157 priv->mfunc.master.qp0_state[port].qp0_active = 0;
162 mailbox = mlx4_alloc_cmd_mailbox(dev);
164 return PTR_ERR(mailbox);
166 if (cur_state == MLX4_QP_STATE_RST && new_state == MLX4_QP_STATE_INIT) {
167 u64 mtt_addr = mlx4_mtt_addr(dev, mtt);
168 context->mtt_base_addr_h = mtt_addr >> 32;
169 context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
170 context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
173 *(__be32 *) mailbox->buf = cpu_to_be32(optpar);
174 memcpy(mailbox->buf + 8, context, sizeof *context);
176 ((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn =
177 cpu_to_be32(qp->qpn);
179 ret = mlx4_cmd(dev, mailbox->dma,
180 qp->qpn | (!!sqd_event << 31),
181 new_state == MLX4_QP_STATE_RST ? 2 : 0,
182 op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native);
184 if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
185 port = (qp->qpn & 1) + 1;
186 if (cur_state != MLX4_QP_STATE_ERR &&
187 cur_state != MLX4_QP_STATE_RST &&
188 new_state == MLX4_QP_STATE_ERR) {
190 priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
192 priv->mfunc.master.qp0_state[port].qp0_active = 0;
193 } else if (new_state == MLX4_QP_STATE_RTR) {
195 priv->mfunc.master.qp0_state[port].proxy_qp0_active = 1;
197 priv->mfunc.master.qp0_state[port].qp0_active = 1;
201 mlx4_free_cmd_mailbox(dev, mailbox);
205 int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
206 enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
207 struct mlx4_qp_context *context,
208 enum mlx4_qp_optpar optpar,
209 int sqd_event, struct mlx4_qp *qp)
211 return __mlx4_qp_modify(dev, mtt, cur_state, new_state, context,
212 optpar, sqd_event, qp, 0);
214 EXPORT_SYMBOL_GPL(mlx4_qp_modify);
216 int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
219 int bf_qp = !!(flags & (u8) MLX4_RESERVE_BF_QP);
221 struct mlx4_priv *priv = mlx4_priv(dev);
222 struct mlx4_qp_table *qp_table = &priv->qp_table;
224 /* Only IPoIB uses a large cnt. In this case, just allocate
225 * as usual, ignoring bf skipping, since IPoIB does not run over RoCE
227 if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp)
230 *base = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align,
231 bf_qp ? MLX4_BF_QP_SKIP_MASK : 0);
238 int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
245 if (mlx4_is_mfunc(dev)) {
246 set_param_l(&in_param, (((u32) flags) << 24) | (u32) cnt);
247 set_param_h(&in_param, align);
248 err = mlx4_cmd_imm(dev, in_param, &out_param,
249 RES_QP, RES_OP_RESERVE,
251 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
255 *base = get_param_l(&out_param);
258 return __mlx4_qp_reserve_range(dev, cnt, align, base, flags);
260 EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range);
262 void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
264 struct mlx4_priv *priv = mlx4_priv(dev);
265 struct mlx4_qp_table *qp_table = &priv->qp_table;
267 if (mlx4_is_qp_reserved(dev, (u32) base_qpn))
269 mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, MLX4_USE_RR);
272 void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
277 if (mlx4_is_mfunc(dev)) {
278 set_param_l(&in_param, base_qpn);
279 set_param_h(&in_param, cnt);
280 err = mlx4_cmd(dev, in_param, RES_QP, RES_OP_RESERVE,
282 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
284 mlx4_warn(dev, "Failed to release qp range"
285 " base:%d cnt:%d\n", base_qpn, cnt);
288 __mlx4_qp_release_range(dev, base_qpn, cnt);
290 EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
292 int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
294 struct mlx4_priv *priv = mlx4_priv(dev);
295 struct mlx4_qp_table *qp_table = &priv->qp_table;
298 err = mlx4_table_get(dev, &qp_table->qp_table, qpn);
302 err = mlx4_table_get(dev, &qp_table->auxc_table, qpn);
306 err = mlx4_table_get(dev, &qp_table->altc_table, qpn);
310 err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn);
314 err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn);
321 mlx4_table_put(dev, &qp_table->rdmarc_table, qpn);
324 mlx4_table_put(dev, &qp_table->altc_table, qpn);
327 mlx4_table_put(dev, &qp_table->auxc_table, qpn);
330 mlx4_table_put(dev, &qp_table->qp_table, qpn);
336 static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
340 if (mlx4_is_mfunc(dev)) {
341 set_param_l(¶m, qpn);
342 return mlx4_cmd_imm(dev, param, ¶m, RES_QP, RES_OP_MAP_ICM,
343 MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A,
346 return __mlx4_qp_alloc_icm(dev, qpn);
349 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
351 struct mlx4_priv *priv = mlx4_priv(dev);
352 struct mlx4_qp_table *qp_table = &priv->qp_table;
354 mlx4_table_put(dev, &qp_table->cmpt_table, qpn);
355 mlx4_table_put(dev, &qp_table->rdmarc_table, qpn);
356 mlx4_table_put(dev, &qp_table->altc_table, qpn);
357 mlx4_table_put(dev, &qp_table->auxc_table, qpn);
358 mlx4_table_put(dev, &qp_table->qp_table, qpn);
361 static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
365 if (mlx4_is_mfunc(dev)) {
366 set_param_l(&in_param, qpn);
367 if (mlx4_cmd(dev, in_param, RES_QP, RES_OP_MAP_ICM,
368 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
370 mlx4_warn(dev, "Failed to free icm of qp:%d\n", qpn);
372 __mlx4_qp_free_icm(dev, qpn);
375 int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
377 struct mlx4_priv *priv = mlx4_priv(dev);
378 struct mlx4_qp_table *qp_table = &priv->qp_table;
386 err = mlx4_qp_alloc_icm(dev, qpn);
390 spin_lock_irq(&qp_table->lock);
391 err = radix_tree_insert(&dev->qp_table_tree, qp->qpn &
392 (dev->caps.num_qps - 1), qp);
393 spin_unlock_irq(&qp_table->lock);
397 atomic_set(&qp->refcount, 1);
398 init_completion(&qp->free);
403 mlx4_qp_free_icm(dev, qpn);
407 EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
409 void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp)
411 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
414 spin_lock_irqsave(&qp_table->lock, flags);
415 radix_tree_delete(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1));
416 spin_unlock_irqrestore(&qp_table->lock, flags);
418 EXPORT_SYMBOL_GPL(mlx4_qp_remove);
420 void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
422 if (atomic_dec_and_test(&qp->refcount))
424 wait_for_completion(&qp->free);
426 mlx4_qp_free_icm(dev, qp->qpn);
428 EXPORT_SYMBOL_GPL(mlx4_qp_free);
430 static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
432 return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP,
433 MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
436 int mlx4_init_qp_table(struct mlx4_dev *dev)
438 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
440 int reserved_from_top = 0;
441 int reserved_from_bot;
444 spin_lock_init(&qp_table->lock);
445 INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
446 if (mlx4_is_slave(dev))
450 * We reserve 2 extra QPs per port for the special QPs. The
451 * block of special QPs must be aligned to a multiple of 8, so
454 * We also reserve the MSB of the 24-bit QP number to indicate
455 * that a QP is an XRC QP.
457 dev->phys_caps.base_sqpn =
458 ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8);
461 int sort[MLX4_NUM_QP_REGION];
463 int last_base = dev->caps.num_qps;
465 for (i = 1; i < MLX4_NUM_QP_REGION; ++i)
468 for (i = MLX4_NUM_QP_REGION; i > 0; --i) {
469 for (j = 2; j < i; ++j) {
470 if (dev->caps.reserved_qps_cnt[sort[j]] >
471 dev->caps.reserved_qps_cnt[sort[j - 1]]) {
473 sort[j] = sort[j - 1];
479 for (i = 1; i < MLX4_NUM_QP_REGION; ++i) {
480 last_base -= dev->caps.reserved_qps_cnt[sort[i]];
481 dev->caps.reserved_qps_base[sort[i]] = last_base;
483 dev->caps.reserved_qps_cnt[sort[i]];
488 /* Reserve 8 real SQPs in both native and SRIOV modes.
489 * In addition, in SRIOV mode, reserve 8 proxy SQPs per function
490 * (for all PFs and VFs), and 8 corresponding tunnel QPs.
491 * Each proxy SQP works opposite its own tunnel QP.
493 * The QPs are arranged as follows:
495 * b. All the proxy SQPs (8 per function)
496 * c. All the tunnel QPs (8 per function)
498 reserved_from_bot = mlx4_num_reserved_sqps(dev);
499 if (reserved_from_bot + reserved_from_top > dev->caps.num_qps) {
500 mlx4_err(dev, "Number of reserved QPs is higher than number "
501 "of QPs, increase the value of log_num_qp\n");
505 err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
506 (1 << 23) - 1, reserved_from_bot,
511 if (mlx4_is_mfunc(dev)) {
513 dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8;
514 dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX;
516 /* In mfunc, calculate proxy and tunnel qp offsets for the PF here,
517 * since the PF does not call mlx4_slave_caps */
518 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
519 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
520 dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
521 dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
523 if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
524 !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
529 for (k = 0; k < dev->caps.num_ports; k++) {
530 dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn +
531 8 * mlx4_master_func_num(dev) + k;
532 dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX;
533 dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn +
534 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k;
535 dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX;
540 err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn);
546 kfree(dev->caps.qp0_tunnel);
547 kfree(dev->caps.qp0_proxy);
548 kfree(dev->caps.qp1_tunnel);
549 kfree(dev->caps.qp1_proxy);
550 dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
551 dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
555 void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
557 if (mlx4_is_slave(dev))
560 mlx4_CONF_SPECIAL_QP(dev, 0);
561 mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap);
564 int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
565 struct mlx4_qp_context *context)
567 struct mlx4_cmd_mailbox *mailbox;
570 mailbox = mlx4_alloc_cmd_mailbox(dev);
572 return PTR_ERR(mailbox);
574 err = mlx4_cmd_box(dev, 0, mailbox->dma, qp->qpn, 0,
575 MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A,
578 memcpy(context, mailbox->buf + 8, sizeof *context);
580 mlx4_free_cmd_mailbox(dev, mailbox);
583 EXPORT_SYMBOL_GPL(mlx4_qp_query);
585 int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
586 struct mlx4_qp_context *context,
587 struct mlx4_qp *qp, enum mlx4_qp_state *qp_state)
591 enum mlx4_qp_state states[] = {
598 for (i = 0; i < ARRAY_SIZE(states) - 1; i++) {
599 context->flags &= cpu_to_be32(~(0xf << 28));
600 context->flags |= cpu_to_be32(states[i + 1] << 28);
601 err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1],
604 mlx4_err(dev, "Failed to bring QP to state: "
605 "%d with error: %d\n",
610 *qp_state = states[i + 1];
615 EXPORT_SYMBOL_GPL(mlx4_qp_to_ready);