2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
4 * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #include <linux/types.h>
37 #include <linux/gfp.h>
38 #include <linux/module.h>
40 #include <dev/mlx4/cmd.h>
41 #include <dev/mlx4/qp.h>
47 * QP to support BF should have bits 6,7 cleared
49 #define MLX4_BF_QP_SKIP_MASK 0xc0
50 #define MLX4_MAX_BF_QP_RANGE 0x40
52 void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
54 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
57 spin_lock(&qp_table->lock);
59 qp = __mlx4_qp_lookup(dev, qpn);
61 atomic_inc(&qp->refcount);
63 spin_unlock(&qp_table->lock);
66 mlx4_dbg(dev, "Async event for none existent QP %08x\n", qpn);
70 qp->event(qp, event_type);
72 if (atomic_dec_and_test(&qp->refcount))
76 /* used for INIT/CLOSE port logic */
77 static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0)
79 /* this procedure is called after we already know we are on the master */
80 /* qp0 is either the proxy qp0, or the real qp0 */
81 u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev);
82 *proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1;
84 *real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn &&
85 qp->qpn <= dev->phys_caps.base_sqpn + 1;
87 return *real_qp0 || *proxy_qp0;
90 static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
91 enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
92 struct mlx4_qp_context *context,
93 enum mlx4_qp_optpar optpar,
94 int sqd_event, struct mlx4_qp *qp, int native)
96 static const u16 op[MLX4_QP_NUM_STATE][MLX4_QP_NUM_STATE] = {
97 [MLX4_QP_STATE_RST] = {
98 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
99 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
100 [MLX4_QP_STATE_INIT] = MLX4_CMD_RST2INIT_QP,
102 [MLX4_QP_STATE_INIT] = {
103 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
104 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
105 [MLX4_QP_STATE_INIT] = MLX4_CMD_INIT2INIT_QP,
106 [MLX4_QP_STATE_RTR] = MLX4_CMD_INIT2RTR_QP,
108 [MLX4_QP_STATE_RTR] = {
109 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
110 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
111 [MLX4_QP_STATE_RTS] = MLX4_CMD_RTR2RTS_QP,
113 [MLX4_QP_STATE_RTS] = {
114 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
115 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
116 [MLX4_QP_STATE_RTS] = MLX4_CMD_RTS2RTS_QP,
117 [MLX4_QP_STATE_SQD] = MLX4_CMD_RTS2SQD_QP,
119 [MLX4_QP_STATE_SQD] = {
120 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
121 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
122 [MLX4_QP_STATE_RTS] = MLX4_CMD_SQD2RTS_QP,
123 [MLX4_QP_STATE_SQD] = MLX4_CMD_SQD2SQD_QP,
125 [MLX4_QP_STATE_SQER] = {
126 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
127 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
128 [MLX4_QP_STATE_RTS] = MLX4_CMD_SQERR2RTS_QP,
130 [MLX4_QP_STATE_ERR] = {
131 [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
132 [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
136 struct mlx4_priv *priv = mlx4_priv(dev);
137 struct mlx4_cmd_mailbox *mailbox;
143 if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE ||
144 !op[cur_state][new_state])
147 if (op[cur_state][new_state] == MLX4_CMD_2RST_QP) {
148 ret = mlx4_cmd(dev, 0, qp->qpn, 2,
149 MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native);
150 if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR &&
151 cur_state != MLX4_QP_STATE_RST &&
152 is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
153 port = (qp->qpn & 1) + 1;
155 priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
157 priv->mfunc.master.qp0_state[port].qp0_active = 0;
162 mailbox = mlx4_alloc_cmd_mailbox(dev);
164 return PTR_ERR(mailbox);
166 if (cur_state == MLX4_QP_STATE_RST && new_state == MLX4_QP_STATE_INIT) {
167 u64 mtt_addr = mlx4_mtt_addr(dev, mtt);
168 context->mtt_base_addr_h = mtt_addr >> 32;
169 context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
170 context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
173 *(__be32 *) mailbox->buf = cpu_to_be32(optpar);
174 memcpy(mailbox->buf + 8, context, sizeof *context);
176 ((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn =
177 cpu_to_be32(qp->qpn);
179 ret = mlx4_cmd(dev, mailbox->dma,
180 qp->qpn | (!!sqd_event << 31),
181 new_state == MLX4_QP_STATE_RST ? 2 : 0,
182 op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native);
184 if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
185 port = (qp->qpn & 1) + 1;
186 if (cur_state != MLX4_QP_STATE_ERR &&
187 cur_state != MLX4_QP_STATE_RST &&
188 new_state == MLX4_QP_STATE_ERR) {
190 priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
192 priv->mfunc.master.qp0_state[port].qp0_active = 0;
193 } else if (new_state == MLX4_QP_STATE_RTR) {
195 priv->mfunc.master.qp0_state[port].proxy_qp0_active = 1;
197 priv->mfunc.master.qp0_state[port].qp0_active = 1;
201 mlx4_free_cmd_mailbox(dev, mailbox);
205 int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
206 enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
207 struct mlx4_qp_context *context,
208 enum mlx4_qp_optpar optpar,
209 int sqd_event, struct mlx4_qp *qp)
211 return __mlx4_qp_modify(dev, mtt, cur_state, new_state, context,
212 optpar, sqd_event, qp, 0);
214 EXPORT_SYMBOL_GPL(mlx4_qp_modify);
216 int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
219 int bf_qp = !!(flags & (u8) MLX4_RESERVE_BF_QP);
221 struct mlx4_priv *priv = mlx4_priv(dev);
222 struct mlx4_qp_table *qp_table = &priv->qp_table;
224 /* Only IPoIB uses a large cnt. In this case, just allocate
225 * as usual, ignoring bf skipping, since IPoIB does not run over RoCE
227 if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp)
230 *base = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align,
231 bf_qp ? MLX4_BF_QP_SKIP_MASK : 0);
238 int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
245 /* Turn off all unsupported QP allocation flags */
246 flags &= dev->caps.alloc_res_qp_mask;
248 if (mlx4_is_mfunc(dev)) {
249 set_param_l(&in_param, (((u32) flags) << 24) | (u32) cnt);
250 set_param_h(&in_param, align);
251 err = mlx4_cmd_imm(dev, in_param, &out_param,
252 RES_QP, RES_OP_RESERVE,
254 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
258 *base = get_param_l(&out_param);
261 return __mlx4_qp_reserve_range(dev, cnt, align, base, flags);
263 EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range);
265 void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
267 struct mlx4_priv *priv = mlx4_priv(dev);
268 struct mlx4_qp_table *qp_table = &priv->qp_table;
270 if (mlx4_is_qp_reserved(dev, (u32) base_qpn))
272 mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, MLX4_USE_RR);
275 void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
280 if (mlx4_is_mfunc(dev)) {
281 set_param_l(&in_param, base_qpn);
282 set_param_h(&in_param, cnt);
283 err = mlx4_cmd(dev, in_param, RES_QP, RES_OP_RESERVE,
285 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
287 mlx4_warn(dev, "Failed to release qp range"
288 " base:%d cnt:%d\n", base_qpn, cnt);
291 __mlx4_qp_release_range(dev, base_qpn, cnt);
293 EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
295 int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
297 struct mlx4_priv *priv = mlx4_priv(dev);
298 struct mlx4_qp_table *qp_table = &priv->qp_table;
301 err = mlx4_table_get(dev, &qp_table->qp_table, qpn);
305 err = mlx4_table_get(dev, &qp_table->auxc_table, qpn);
309 err = mlx4_table_get(dev, &qp_table->altc_table, qpn);
313 err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn);
317 err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn);
324 mlx4_table_put(dev, &qp_table->rdmarc_table, qpn);
327 mlx4_table_put(dev, &qp_table->altc_table, qpn);
330 mlx4_table_put(dev, &qp_table->auxc_table, qpn);
333 mlx4_table_put(dev, &qp_table->qp_table, qpn);
339 static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
343 if (mlx4_is_mfunc(dev)) {
344 set_param_l(¶m, qpn);
345 return mlx4_cmd_imm(dev, param, ¶m, RES_QP, RES_OP_MAP_ICM,
346 MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A,
349 return __mlx4_qp_alloc_icm(dev, qpn);
352 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
354 struct mlx4_priv *priv = mlx4_priv(dev);
355 struct mlx4_qp_table *qp_table = &priv->qp_table;
357 mlx4_table_put(dev, &qp_table->cmpt_table, qpn);
358 mlx4_table_put(dev, &qp_table->rdmarc_table, qpn);
359 mlx4_table_put(dev, &qp_table->altc_table, qpn);
360 mlx4_table_put(dev, &qp_table->auxc_table, qpn);
361 mlx4_table_put(dev, &qp_table->qp_table, qpn);
364 static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
368 if (mlx4_is_mfunc(dev)) {
369 set_param_l(&in_param, qpn);
370 if (mlx4_cmd(dev, in_param, RES_QP, RES_OP_MAP_ICM,
371 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
373 mlx4_warn(dev, "Failed to free icm of qp:%d\n", qpn);
375 __mlx4_qp_free_icm(dev, qpn);
378 int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
380 struct mlx4_priv *priv = mlx4_priv(dev);
381 struct mlx4_qp_table *qp_table = &priv->qp_table;
389 err = mlx4_qp_alloc_icm(dev, qpn);
393 spin_lock_irq(&qp_table->lock);
394 err = radix_tree_insert(&dev->qp_table_tree, qp->qpn &
395 (dev->caps.num_qps - 1), qp);
396 spin_unlock_irq(&qp_table->lock);
400 atomic_set(&qp->refcount, 1);
401 init_completion(&qp->free);
406 mlx4_qp_free_icm(dev, qpn);
410 EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
412 void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp)
414 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
417 spin_lock_irqsave(&qp_table->lock, flags);
418 radix_tree_delete(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1));
419 spin_unlock_irqrestore(&qp_table->lock, flags);
421 EXPORT_SYMBOL_GPL(mlx4_qp_remove);
423 void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
425 if (atomic_dec_and_test(&qp->refcount))
427 wait_for_completion(&qp->free);
429 mlx4_qp_free_icm(dev, qp->qpn);
431 EXPORT_SYMBOL_GPL(mlx4_qp_free);
433 static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
435 return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP,
436 MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
439 int mlx4_init_qp_table(struct mlx4_dev *dev)
441 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
443 int reserved_from_top = 0;
444 int reserved_from_bot;
447 spin_lock_init(&qp_table->lock);
448 INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
449 if (mlx4_is_slave(dev))
453 * We reserve 2 extra QPs per port for the special QPs. The
454 * block of special QPs must be aligned to a multiple of 8, so
457 * We also reserve the MSB of the 24-bit QP number to indicate
458 * that a QP is an XRC QP.
460 dev->phys_caps.base_sqpn =
461 ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8);
464 int sort[MLX4_NUM_QP_REGION];
466 int last_base = dev->caps.num_qps;
468 for (i = 1; i < MLX4_NUM_QP_REGION; ++i)
471 for (i = MLX4_NUM_QP_REGION; i > 0; --i) {
472 for (j = 2; j < i; ++j) {
473 if (dev->caps.reserved_qps_cnt[sort[j]] >
474 dev->caps.reserved_qps_cnt[sort[j - 1]]) {
476 sort[j] = sort[j - 1];
482 for (i = 1; i < MLX4_NUM_QP_REGION; ++i) {
483 last_base -= dev->caps.reserved_qps_cnt[sort[i]];
484 dev->caps.reserved_qps_base[sort[i]] = last_base;
486 dev->caps.reserved_qps_cnt[sort[i]];
491 /* Reserve 8 real SQPs in both native and SRIOV modes.
492 * In addition, in SRIOV mode, reserve 8 proxy SQPs per function
493 * (for all PFs and VFs), and 8 corresponding tunnel QPs.
494 * Each proxy SQP works opposite its own tunnel QP.
496 * The QPs are arranged as follows:
498 * b. All the proxy SQPs (8 per function)
499 * c. All the tunnel QPs (8 per function)
501 reserved_from_bot = mlx4_num_reserved_sqps(dev);
502 if (reserved_from_bot + reserved_from_top > dev->caps.num_qps) {
503 mlx4_err(dev, "Number of reserved QPs is higher than number "
504 "of QPs, increase the value of log_num_qp\n");
508 err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
509 (1 << 23) - 1, reserved_from_bot,
514 if (mlx4_is_mfunc(dev)) {
516 dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8;
517 dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX;
519 /* In mfunc, calculate proxy and tunnel qp offsets for the PF here,
520 * since the PF does not call mlx4_slave_caps */
521 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
522 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
523 dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
524 dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
526 if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
527 !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
532 for (k = 0; k < dev->caps.num_ports; k++) {
533 dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn +
534 8 * mlx4_master_func_num(dev) + k;
535 dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX;
536 dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn +
537 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k;
538 dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX;
543 err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn);
549 kfree(dev->caps.qp0_tunnel);
550 kfree(dev->caps.qp0_proxy);
551 kfree(dev->caps.qp1_tunnel);
552 kfree(dev->caps.qp1_proxy);
553 dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
554 dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
558 void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
560 if (mlx4_is_slave(dev))
563 mlx4_CONF_SPECIAL_QP(dev, 0);
564 mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap);
567 int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
568 struct mlx4_qp_context *context)
570 struct mlx4_cmd_mailbox *mailbox;
573 mailbox = mlx4_alloc_cmd_mailbox(dev);
575 return PTR_ERR(mailbox);
577 err = mlx4_cmd_box(dev, 0, mailbox->dma, qp->qpn, 0,
578 MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A,
581 memcpy(context, mailbox->buf + 8, sizeof *context);
583 mlx4_free_cmd_mailbox(dev, mailbox);
586 EXPORT_SYMBOL_GPL(mlx4_qp_query);
588 int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
589 struct mlx4_qp_context *context,
590 struct mlx4_qp *qp, enum mlx4_qp_state *qp_state)
594 enum mlx4_qp_state states[] = {
601 for (i = 0; i < ARRAY_SIZE(states) - 1; i++) {
602 context->flags &= cpu_to_be32(~(0xf << 28));
603 context->flags |= cpu_to_be32(states[i + 1] << 28);
604 err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1],
607 mlx4_err(dev, "Failed to bring QP to state: "
608 "%d with error: %d\n",
613 *qp_state = states[i + 1];
618 EXPORT_SYMBOL_GPL(mlx4_qp_to_ready);