2 * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41 #include <sys/types.h>
51 int mlx5_single_threaded = 0;
53 static inline int is_xrc_tgt(int type)
55 return type == IBV_QPT_XRC_RECV;
58 int mlx5_query_device(struct ibv_context *context, struct ibv_device_attr *attr)
60 struct ibv_query_device cmd;
62 unsigned major, minor, sub_minor;
65 ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, &cmd, sizeof cmd);
69 major = (raw_fw_ver >> 32) & 0xffff;
70 minor = (raw_fw_ver >> 16) & 0xffff;
71 sub_minor = raw_fw_ver & 0xffff;
73 snprintf(attr->fw_ver, sizeof attr->fw_ver,
74 "%d.%d.%04d", major, minor, sub_minor);
79 #define READL(ptr) (*((uint32_t *)(ptr)))
80 static int mlx5_read_clock(struct ibv_context *context, uint64_t *cycles)
82 unsigned int clockhi, clocklo, clockhi1;
84 struct mlx5_context *ctx = to_mctx(context);
86 if (!ctx->hca_core_clock)
89 /* Handle wraparound */
90 for (i = 0; i < 2; i++) {
91 clockhi = be32toh(READL(ctx->hca_core_clock));
92 clocklo = be32toh(READL(ctx->hca_core_clock + 4));
93 clockhi1 = be32toh(READL(ctx->hca_core_clock));
94 if (clockhi == clockhi1)
98 *cycles = (uint64_t)clockhi << 32 | (uint64_t)clocklo;
103 int mlx5_query_rt_values(struct ibv_context *context,
104 struct ibv_values_ex *values)
106 uint32_t comp_mask = 0;
109 if (values->comp_mask & IBV_VALUES_MASK_RAW_CLOCK) {
112 err = mlx5_read_clock(context, &cycles);
114 values->raw_clock.tv_sec = 0;
115 values->raw_clock.tv_nsec = cycles;
116 comp_mask |= IBV_VALUES_MASK_RAW_CLOCK;
120 values->comp_mask = comp_mask;
125 int mlx5_query_port(struct ibv_context *context, uint8_t port,
126 struct ibv_port_attr *attr)
128 struct ibv_query_port cmd;
130 return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd);
133 struct ibv_pd *mlx5_alloc_pd(struct ibv_context *context)
135 struct ibv_alloc_pd cmd;
136 struct mlx5_alloc_pd_resp resp;
139 pd = calloc(1, sizeof *pd);
143 if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd,
144 &resp.ibv_resp, sizeof resp)) {
154 int mlx5_free_pd(struct ibv_pd *pd)
158 ret = ibv_cmd_dealloc_pd(pd);
166 struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
170 struct ibv_reg_mr cmd;
172 enum ibv_access_flags access = (enum ibv_access_flags)acc;
173 struct ibv_reg_mr_resp resp;
175 mr = calloc(1, sizeof(*mr));
179 ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access,
180 &(mr->ibv_mr), &cmd, sizeof(cmd), &resp,
183 mlx5_free_buf(&(mr->buf));
187 mr->alloc_flags = acc;
192 int mlx5_rereg_mr(struct ibv_mr *ibmr, int flags, struct ibv_pd *pd, void *addr,
193 size_t length, int access)
195 struct ibv_rereg_mr cmd;
196 struct ibv_rereg_mr_resp resp;
198 if (flags & IBV_REREG_MR_KEEP_VALID)
201 return ibv_cmd_rereg_mr(ibmr, flags, addr, length, (uintptr_t)addr,
202 access, pd, &cmd, sizeof(cmd), &resp,
206 int mlx5_dereg_mr(struct ibv_mr *ibmr)
209 struct mlx5_mr *mr = to_mmr(ibmr);
211 ret = ibv_cmd_dereg_mr(ibmr);
219 struct ibv_mw *mlx5_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type)
222 struct ibv_alloc_mw cmd;
223 struct ibv_alloc_mw_resp resp;
226 mw = malloc(sizeof(*mw));
230 memset(mw, 0, sizeof(*mw));
232 ret = ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp,
242 int mlx5_dealloc_mw(struct ibv_mw *mw)
245 struct ibv_dealloc_mw cmd;
247 ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd));
255 int mlx5_round_up_power_of_two(long long sz)
259 for (ret = 1; ret < sz; ret <<= 1)
263 fprintf(stderr, "%s: roundup overflow\n", __func__);
270 static int align_queue_size(long long req)
272 return mlx5_round_up_power_of_two(req);
275 static int get_cqe_size(void)
280 env = getenv("MLX5_CQE_SIZE");
294 static int use_scatter_to_cqe(void)
298 env = getenv("MLX5_SCATTER_TO_CQE");
299 if (env && !strcmp(env, "0"))
305 static int srq_sig_enabled(void)
309 env = getenv("MLX5_SRQ_SIGNATURE");
316 static int qp_sig_enabled(void)
320 env = getenv("MLX5_QP_SIGNATURE");
328 CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
329 IBV_WC_EX_WITH_COMPLETION_TIMESTAMP |
330 IBV_WC_EX_WITH_CVLAN |
331 IBV_WC_EX_WITH_FLOW_TAG
335 CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS
339 CREATE_CQ_SUPPORTED_FLAGS = IBV_CREATE_CQ_ATTR_SINGLE_THREADED
342 static struct ibv_cq_ex *create_cq(struct ibv_context *context,
343 const struct ibv_cq_init_attr_ex *cq_attr,
345 struct mlx5dv_cq_init_attr *mlx5cq_attr)
347 struct mlx5_create_cq cmd;
348 struct mlx5_create_cq_resp resp;
353 struct mlx5_context *mctx = to_mctx(context);
354 FILE *fp = to_mctx(context)->dbg_fp;
357 mlx5_dbg(fp, MLX5_DBG_CQ, "CQE invalid\n");
362 if (cq_attr->comp_mask & ~CREATE_CQ_SUPPORTED_COMP_MASK) {
363 mlx5_dbg(fp, MLX5_DBG_CQ,
364 "Unsupported comp_mask for create_cq\n");
369 if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS &&
370 cq_attr->flags & ~CREATE_CQ_SUPPORTED_FLAGS) {
371 mlx5_dbg(fp, MLX5_DBG_CQ,
372 "Unsupported creation flags requested for create_cq\n");
377 if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS) {
378 mlx5_dbg(fp, MLX5_DBG_CQ, "\n");
383 cq = calloc(1, sizeof *cq);
385 mlx5_dbg(fp, MLX5_DBG_CQ, "\n");
389 memset(&cmd, 0, sizeof cmd);
392 if (mlx5_spinlock_init(&cq->lock))
395 ncqe = align_queue_size(cq_attr->cqe + 1);
396 if ((ncqe > (1 << 24)) || (ncqe < (cq_attr->cqe + 1))) {
397 mlx5_dbg(fp, MLX5_DBG_CQ, "ncqe %d\n", ncqe);
402 cqe_sz = get_cqe_size();
404 mlx5_dbg(fp, MLX5_DBG_CQ, "\n");
409 if (mlx5_alloc_cq_buf(to_mctx(context), cq, &cq->buf_a, ncqe, cqe_sz)) {
410 mlx5_dbg(fp, MLX5_DBG_CQ, "\n");
414 cq->dbrec = mlx5_alloc_dbrec(to_mctx(context));
416 mlx5_dbg(fp, MLX5_DBG_CQ, "\n");
420 cq->dbrec[MLX5_CQ_SET_CI] = 0;
421 cq->dbrec[MLX5_CQ_ARM_DB] = 0;
424 cq->flags = cq_alloc_flags;
426 if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS &&
427 cq_attr->flags & IBV_CREATE_CQ_ATTR_SINGLE_THREADED)
428 cq->flags |= MLX5_CQ_FLAGS_SINGLE_THREADED;
429 cmd.buf_addr = (uintptr_t) cq->buf_a.buf;
430 cmd.db_addr = (uintptr_t) cq->dbrec;
431 cmd.cqe_size = cqe_sz;
434 if (mlx5cq_attr->comp_mask & ~(MLX5DV_CQ_INIT_ATTR_MASK_RESERVED - 1)) {
435 mlx5_dbg(fp, MLX5_DBG_CQ,
436 "Unsupported vendor comp_mask for create_cq\n");
441 if (mlx5cq_attr->comp_mask & MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE) {
442 if (mctx->cqe_comp_caps.max_num &&
443 (mlx5cq_attr->cqe_comp_res_format &
444 mctx->cqe_comp_caps.supported_format)) {
446 cmd.cqe_comp_res_format = mlx5cq_attr->cqe_comp_res_format;
448 mlx5_dbg(fp, MLX5_DBG_CQ, "CQE Compression is not supported\n");
455 ret = ibv_cmd_create_cq(context, ncqe - 1, cq_attr->channel,
456 cq_attr->comp_vector,
457 ibv_cq_ex_to_cq(&cq->ibv_cq), &cmd.ibv_cmd,
458 sizeof(cmd), &resp.ibv_resp, sizeof(resp));
460 mlx5_dbg(fp, MLX5_DBG_CQ, "ret %d\n", ret);
464 cq->active_buf = &cq->buf_a;
465 cq->resize_buf = NULL;
467 cq->stall_enable = to_mctx(context)->stall_enable;
468 cq->stall_adaptive_enable = to_mctx(context)->stall_adaptive_enable;
469 cq->stall_cycles = to_mctx(context)->stall_cycles;
471 if (cq_alloc_flags & MLX5_CQ_FLAGS_EXTENDED)
472 mlx5_cq_fill_pfns(cq, cq_attr);
477 mlx5_free_db(to_mctx(context), cq->dbrec);
480 mlx5_free_cq_buf(to_mctx(context), &cq->buf_a);
483 mlx5_spinlock_destroy(&cq->lock);
491 struct ibv_cq *mlx5_create_cq(struct ibv_context *context, int cqe,
492 struct ibv_comp_channel *channel,
495 struct ibv_cq_ex *cq;
496 struct ibv_cq_init_attr_ex cq_attr = {.cqe = cqe, .channel = channel,
497 .comp_vector = comp_vector,
498 .wc_flags = IBV_WC_STANDARD_FLAGS};
505 cq = create_cq(context, &cq_attr, 0, NULL);
506 return cq ? ibv_cq_ex_to_cq(cq) : NULL;
509 struct ibv_cq_ex *mlx5_create_cq_ex(struct ibv_context *context,
510 struct ibv_cq_init_attr_ex *cq_attr)
512 return create_cq(context, cq_attr, MLX5_CQ_FLAGS_EXTENDED, NULL);
515 struct ibv_cq_ex *mlx5dv_create_cq(struct ibv_context *context,
516 struct ibv_cq_init_attr_ex *cq_attr,
517 struct mlx5dv_cq_init_attr *mlx5_cq_attr)
519 struct ibv_cq_ex *cq;
521 cq = create_cq(context, cq_attr, MLX5_CQ_FLAGS_EXTENDED, mlx5_cq_attr);
525 verbs_init_cq(ibv_cq_ex_to_cq(cq), context,
526 cq_attr->channel, cq_attr->cq_context);
530 int mlx5_resize_cq(struct ibv_cq *ibcq, int cqe)
532 struct mlx5_cq *cq = to_mcq(ibcq);
533 struct mlx5_resize_cq_resp resp;
534 struct mlx5_resize_cq cmd;
535 struct mlx5_context *mctx = to_mctx(ibcq->context);
543 memset(&cmd, 0, sizeof(cmd));
544 memset(&resp, 0, sizeof(resp));
546 if (((long long)cqe * 64) > INT_MAX)
549 mlx5_spin_lock(&cq->lock);
550 cq->active_cqes = cq->ibv_cq.cqe;
551 if (cq->active_buf == &cq->buf_a)
552 cq->resize_buf = &cq->buf_b;
554 cq->resize_buf = &cq->buf_a;
556 cqe = align_queue_size(cqe + 1);
557 if (cqe == ibcq->cqe + 1) {
558 cq->resize_buf = NULL;
563 /* currently we don't change cqe size */
564 cq->resize_cqe_sz = cq->cqe_sz;
565 cq->resize_cqes = cqe;
566 err = mlx5_alloc_cq_buf(mctx, cq, cq->resize_buf, cq->resize_cqes, cq->resize_cqe_sz);
568 cq->resize_buf = NULL;
573 cmd.buf_addr = (uintptr_t)cq->resize_buf->buf;
574 cmd.cqe_size = cq->resize_cqe_sz;
576 err = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof(cmd),
577 &resp.ibv_resp, sizeof(resp));
581 mlx5_cq_resize_copy_cqes(cq);
582 mlx5_free_cq_buf(mctx, cq->active_buf);
583 cq->active_buf = cq->resize_buf;
584 cq->ibv_cq.cqe = cqe - 1;
585 mlx5_spin_unlock(&cq->lock);
586 cq->resize_buf = NULL;
590 mlx5_free_cq_buf(mctx, cq->resize_buf);
591 cq->resize_buf = NULL;
594 mlx5_spin_unlock(&cq->lock);
598 int mlx5_destroy_cq(struct ibv_cq *cq)
602 ret = ibv_cmd_destroy_cq(cq);
606 mlx5_free_db(to_mctx(cq->context), to_mcq(cq)->dbrec);
607 mlx5_free_cq_buf(to_mctx(cq->context), to_mcq(cq)->active_buf);
613 struct ibv_srq *mlx5_create_srq(struct ibv_pd *pd,
614 struct ibv_srq_init_attr *attr)
616 struct mlx5_create_srq cmd;
617 struct mlx5_create_srq_resp resp;
618 struct mlx5_srq *srq;
620 struct mlx5_context *ctx;
622 struct ibv_srq *ibsrq;
624 ctx = to_mctx(pd->context);
625 srq = calloc(1, sizeof *srq);
627 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
630 ibsrq = &srq->vsrq.srq;
632 memset(&cmd, 0, sizeof cmd);
633 if (mlx5_spinlock_init(&srq->lock)) {
634 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
638 if (attr->attr.max_wr > ctx->max_srq_recv_wr) {
639 fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n", __func__, __LINE__,
640 attr->attr.max_wr, ctx->max_srq_recv_wr);
646 * this calculation does not consider required control segments. The
647 * final calculation is done again later. This is done so to avoid
648 * overflows of variables
650 max_sge = ctx->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
651 if (attr->attr.max_sge > max_sge) {
652 fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n", __func__, __LINE__,
653 attr->attr.max_wr, ctx->max_srq_recv_wr);
658 srq->max = align_queue_size(attr->attr.max_wr + 1);
659 srq->max_gs = attr->attr.max_sge;
662 if (mlx5_alloc_srq_buf(pd->context, srq)) {
663 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
667 srq->db = mlx5_alloc_dbrec(to_mctx(pd->context));
669 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
675 cmd.buf_addr = (uintptr_t) srq->buf.buf;
676 cmd.db_addr = (uintptr_t) srq->db;
677 srq->wq_sig = srq_sig_enabled();
679 cmd.flags = MLX5_SRQ_FLAG_SIGNATURE;
681 attr->attr.max_sge = srq->max_gs;
682 pthread_mutex_lock(&ctx->srq_table_mutex);
683 ret = ibv_cmd_create_srq(pd, ibsrq, attr, &cmd.ibv_cmd, sizeof(cmd),
684 &resp.ibv_resp, sizeof(resp));
688 ret = mlx5_store_srq(ctx, resp.srqn, srq);
692 pthread_mutex_unlock(&ctx->srq_table_mutex);
694 srq->srqn = resp.srqn;
695 srq->rsc.rsn = resp.srqn;
696 srq->rsc.type = MLX5_RSC_TYPE_SRQ;
701 ibv_cmd_destroy_srq(ibsrq);
704 pthread_mutex_unlock(&ctx->srq_table_mutex);
705 mlx5_free_db(to_mctx(pd->context), srq->db);
709 mlx5_free_buf(&srq->buf);
717 int mlx5_modify_srq(struct ibv_srq *srq,
718 struct ibv_srq_attr *attr,
721 struct ibv_modify_srq cmd;
723 return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd);
726 int mlx5_query_srq(struct ibv_srq *srq,
727 struct ibv_srq_attr *attr)
729 struct ibv_query_srq cmd;
731 return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
734 int mlx5_destroy_srq(struct ibv_srq *srq)
737 struct mlx5_srq *msrq = to_msrq(srq);
738 struct mlx5_context *ctx = to_mctx(srq->context);
740 ret = ibv_cmd_destroy_srq(srq);
744 if (ctx->cqe_version && msrq->rsc.type == MLX5_RSC_TYPE_XSRQ)
745 mlx5_clear_uidx(ctx, msrq->rsc.rsn);
747 mlx5_clear_srq(ctx, msrq->srqn);
749 mlx5_free_db(ctx, msrq->db);
750 mlx5_free_buf(&msrq->buf);
757 static int sq_overhead(enum ibv_qp_type qp_type)
760 size_t mw_bind_size =
761 sizeof(struct mlx5_wqe_umr_ctrl_seg) +
762 sizeof(struct mlx5_wqe_mkey_context_seg) +
763 max_t(size_t, sizeof(struct mlx5_wqe_umr_klm_seg), 64);
767 size += sizeof(struct mlx5_wqe_ctrl_seg) +
768 max(sizeof(struct mlx5_wqe_atomic_seg) +
769 sizeof(struct mlx5_wqe_raddr_seg),
774 size = sizeof(struct mlx5_wqe_ctrl_seg) +
775 max(sizeof(struct mlx5_wqe_raddr_seg),
780 size = sizeof(struct mlx5_wqe_ctrl_seg) +
781 sizeof(struct mlx5_wqe_datagram_seg);
784 case IBV_QPT_XRC_SEND:
785 size = sizeof(struct mlx5_wqe_ctrl_seg) + mw_bind_size;
788 case IBV_QPT_XRC_RECV:
789 size = max(size, sizeof(struct mlx5_wqe_ctrl_seg) +
790 sizeof(struct mlx5_wqe_xrc_seg) +
791 sizeof(struct mlx5_wqe_raddr_seg));
794 case IBV_QPT_RAW_PACKET:
795 size = sizeof(struct mlx5_wqe_ctrl_seg) +
796 sizeof(struct mlx5_wqe_eth_seg);
806 static int mlx5_calc_send_wqe(struct mlx5_context *ctx,
807 struct ibv_qp_init_attr_ex *attr,
815 size = sq_overhead(attr->qp_type);
819 if (attr->cap.max_inline_data) {
820 inl_size = size + align(sizeof(struct mlx5_wqe_inl_data_seg) +
821 attr->cap.max_inline_data, 16);
824 if (attr->comp_mask & IBV_QP_INIT_ATTR_MAX_TSO_HEADER) {
825 size += align(attr->max_tso_header, 16);
826 qp->max_tso_header = attr->max_tso_header;
829 max_gather = (ctx->max_sq_desc_sz - size) /
830 sizeof(struct mlx5_wqe_data_seg);
831 if (attr->cap.max_send_sge > max_gather)
834 size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
835 tot_size = max_int(size, inl_size);
837 if (tot_size > ctx->max_sq_desc_sz)
840 return align(tot_size, MLX5_SEND_WQE_BB);
843 static int mlx5_calc_rcv_wqe(struct mlx5_context *ctx,
844 struct ibv_qp_init_attr_ex *attr,
853 num_scatter = max_t(uint32_t, attr->cap.max_recv_sge, 1);
854 size = sizeof(struct mlx5_wqe_data_seg) * num_scatter;
856 size += sizeof(struct mlx5_rwqe_sig);
858 if (size > ctx->max_rq_desc_sz)
861 size = mlx5_round_up_power_of_two(size);
866 static int mlx5_calc_sq_size(struct mlx5_context *ctx,
867 struct ibv_qp_init_attr_ex *attr,
872 FILE *fp = ctx->dbg_fp;
874 if (!attr->cap.max_send_wr)
877 wqe_size = mlx5_calc_send_wqe(ctx, attr, qp);
879 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
883 if (wqe_size > ctx->max_sq_desc_sz) {
884 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
888 qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
889 sizeof(struct mlx5_wqe_inl_data_seg);
890 attr->cap.max_inline_data = qp->max_inline_data;
893 * to avoid overflow, we limit max_send_wr so
894 * that the multiplication will fit in int
896 if (attr->cap.max_send_wr > 0x7fffffff / ctx->max_sq_desc_sz) {
897 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
901 wq_size = mlx5_round_up_power_of_two(attr->cap.max_send_wr * wqe_size);
902 qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
903 if (qp->sq.wqe_cnt > ctx->max_send_wqebb) {
904 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
908 qp->sq.wqe_shift = mlx5_ilog2(MLX5_SEND_WQE_BB);
909 qp->sq.max_gs = attr->cap.max_send_sge;
910 qp->sq.max_post = wq_size / wqe_size;
915 static int mlx5_calc_rwq_size(struct mlx5_context *ctx,
916 struct mlx5_rwq *rwq,
917 struct ibv_wq_init_attr *attr)
921 uint32_t num_scatter;
927 /* TBD: check caps for RQ */
928 num_scatter = max_t(uint32_t, attr->max_sge, 1);
929 wqe_size = sizeof(struct mlx5_wqe_data_seg) * num_scatter;
932 wqe_size += sizeof(struct mlx5_rwqe_sig);
934 if (wqe_size <= 0 || wqe_size > ctx->max_rq_desc_sz)
937 wqe_size = mlx5_round_up_power_of_two(wqe_size);
938 wq_size = mlx5_round_up_power_of_two(attr->max_wr) * wqe_size;
939 wq_size = max(wq_size, MLX5_SEND_WQE_BB);
940 rwq->rq.wqe_cnt = wq_size / wqe_size;
941 rwq->rq.wqe_shift = mlx5_ilog2(wqe_size);
942 rwq->rq.max_post = 1 << mlx5_ilog2(wq_size / wqe_size);
943 scat_spc = wqe_size -
944 ((rwq->wq_sig) ? sizeof(struct mlx5_rwqe_sig) : 0);
945 rwq->rq.max_gs = scat_spc / sizeof(struct mlx5_wqe_data_seg);
949 static int mlx5_calc_rq_size(struct mlx5_context *ctx,
950 struct ibv_qp_init_attr_ex *attr,
956 FILE *fp = ctx->dbg_fp;
958 if (!attr->cap.max_recv_wr)
961 if (attr->cap.max_recv_wr > ctx->max_recv_wr) {
962 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
966 wqe_size = mlx5_calc_rcv_wqe(ctx, attr, qp);
967 if (wqe_size < 0 || wqe_size > ctx->max_rq_desc_sz) {
968 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
972 wq_size = mlx5_round_up_power_of_two(attr->cap.max_recv_wr) * wqe_size;
974 wq_size = max(wq_size, MLX5_SEND_WQE_BB);
975 qp->rq.wqe_cnt = wq_size / wqe_size;
976 qp->rq.wqe_shift = mlx5_ilog2(wqe_size);
977 qp->rq.max_post = 1 << mlx5_ilog2(wq_size / wqe_size);
978 scat_spc = wqe_size -
979 (qp->wq_sig ? sizeof(struct mlx5_rwqe_sig) : 0);
980 qp->rq.max_gs = scat_spc / sizeof(struct mlx5_wqe_data_seg);
983 qp->rq.wqe_shift = 0;
990 static int mlx5_calc_wq_size(struct mlx5_context *ctx,
991 struct ibv_qp_init_attr_ex *attr,
997 ret = mlx5_calc_sq_size(ctx, attr, qp);
1002 ret = mlx5_calc_rq_size(ctx, attr, qp);
1008 qp->sq.offset = ret;
1014 static void map_uuar(struct ibv_context *context, struct mlx5_qp *qp,
1017 struct mlx5_context *ctx = to_mctx(context);
1019 qp->bf = &ctx->bfs[uuar_index];
1022 static const char *qptype2key(enum ibv_qp_type type)
1025 case IBV_QPT_RC: return "HUGE_RC";
1026 case IBV_QPT_UC: return "HUGE_UC";
1027 case IBV_QPT_UD: return "HUGE_UD";
1028 case IBV_QPT_RAW_PACKET: return "HUGE_RAW_ETH";
1029 default: return "HUGE_NA";
1033 static int mlx5_alloc_qp_buf(struct ibv_context *context,
1034 struct ibv_qp_init_attr_ex *attr,
1039 enum mlx5_alloc_type alloc_type;
1040 enum mlx5_alloc_type default_alloc_type = MLX5_ALLOC_TYPE_ANON;
1041 const char *qp_huge_key;
1043 if (qp->sq.wqe_cnt) {
1044 qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid));
1051 qp->sq.wr_data = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data));
1052 if (!qp->sq.wr_data) {
1059 qp->sq.wqe_head = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head));
1060 if (!qp->sq.wqe_head) {
1066 if (qp->rq.wqe_cnt) {
1067 qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof(uint64_t));
1075 /* compatibility support */
1076 qp_huge_key = qptype2key(qp->ibv_qp->qp_type);
1077 if (mlx5_use_huge(qp_huge_key))
1078 default_alloc_type = MLX5_ALLOC_TYPE_HUGE;
1080 mlx5_get_alloc_type(MLX5_QP_PREFIX, &alloc_type,
1081 default_alloc_type);
1083 err = mlx5_alloc_prefered_buf(to_mctx(context), &qp->buf,
1084 align(qp->buf_size, to_mdev
1085 (context->device)->page_size),
1086 to_mdev(context->device)->page_size,
1095 memset(qp->buf.buf, 0, qp->buf_size);
1097 if (attr->qp_type == IBV_QPT_RAW_PACKET) {
1098 size_t aligned_sq_buf_size = align(qp->sq_buf_size,
1099 to_mdev(context->device)->page_size);
1100 /* For Raw Packet QP, allocate a separate buffer for the SQ */
1101 err = mlx5_alloc_prefered_buf(to_mctx(context), &qp->sq_buf,
1102 aligned_sq_buf_size,
1103 to_mdev(context->device)->page_size,
1111 memset(qp->sq_buf.buf, 0, aligned_sq_buf_size);
1116 mlx5_free_actual_buf(to_mctx(qp->verbs_qp.qp.context), &qp->buf);
1121 if (qp->sq.wqe_head)
1122 free(qp->sq.wqe_head);
1125 free(qp->sq.wr_data);
1132 static void mlx5_free_qp_buf(struct mlx5_qp *qp)
1134 struct mlx5_context *ctx = to_mctx(qp->ibv_qp->context);
1136 mlx5_free_actual_buf(ctx, &qp->buf);
1139 mlx5_free_actual_buf(ctx, &qp->sq_buf);
1144 if (qp->sq.wqe_head)
1145 free(qp->sq.wqe_head);
1151 free(qp->sq.wr_data);
1154 static int mlx5_cmd_create_rss_qp(struct ibv_context *context,
1155 struct ibv_qp_init_attr_ex *attr,
1158 struct mlx5_create_qp_ex_rss cmd_ex_rss = {};
1159 struct mlx5_create_qp_resp_ex resp = {};
1162 if (attr->rx_hash_conf.rx_hash_key_len > sizeof(cmd_ex_rss.rx_hash_key)) {
1167 cmd_ex_rss.rx_hash_fields_mask = attr->rx_hash_conf.rx_hash_fields_mask;
1168 cmd_ex_rss.rx_hash_function = attr->rx_hash_conf.rx_hash_function;
1169 cmd_ex_rss.rx_key_len = attr->rx_hash_conf.rx_hash_key_len;
1170 memcpy(cmd_ex_rss.rx_hash_key, attr->rx_hash_conf.rx_hash_key,
1171 attr->rx_hash_conf.rx_hash_key_len);
1173 ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp,
1174 sizeof(qp->verbs_qp), attr,
1175 &cmd_ex_rss.ibv_cmd, sizeof(cmd_ex_rss.ibv_cmd),
1176 sizeof(cmd_ex_rss), &resp.ibv_resp,
1177 sizeof(resp.ibv_resp), sizeof(resp));
1185 static int mlx5_cmd_create_qp_ex(struct ibv_context *context,
1186 struct ibv_qp_init_attr_ex *attr,
1187 struct mlx5_create_qp *cmd,
1189 struct mlx5_create_qp_resp_ex *resp)
1191 struct mlx5_create_qp_ex cmd_ex;
1194 memset(&cmd_ex, 0, sizeof(cmd_ex));
1195 memcpy(&cmd_ex.ibv_cmd.base, &cmd->ibv_cmd.user_handle,
1196 offsetof(typeof(cmd->ibv_cmd), is_srq) +
1197 sizeof(cmd->ibv_cmd.is_srq) -
1198 offsetof(typeof(cmd->ibv_cmd), user_handle));
1200 memcpy(&cmd_ex.drv_ex, &cmd->buf_addr,
1201 offsetof(typeof(*cmd), sq_buf_addr) +
1202 sizeof(cmd->sq_buf_addr) - sizeof(cmd->ibv_cmd));
1204 ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp,
1205 sizeof(qp->verbs_qp), attr,
1206 &cmd_ex.ibv_cmd, sizeof(cmd_ex.ibv_cmd),
1207 sizeof(cmd_ex), &resp->ibv_resp,
1208 sizeof(resp->ibv_resp), sizeof(*resp));
1214 MLX5_CREATE_QP_SUP_COMP_MASK = (IBV_QP_INIT_ATTR_PD |
1215 IBV_QP_INIT_ATTR_XRCD |
1216 IBV_QP_INIT_ATTR_CREATE_FLAGS |
1217 IBV_QP_INIT_ATTR_MAX_TSO_HEADER |
1218 IBV_QP_INIT_ATTR_IND_TABLE |
1219 IBV_QP_INIT_ATTR_RX_HASH),
1223 MLX5_CREATE_QP_EX2_COMP_MASK = (IBV_QP_INIT_ATTR_CREATE_FLAGS |
1224 IBV_QP_INIT_ATTR_MAX_TSO_HEADER |
1225 IBV_QP_INIT_ATTR_IND_TABLE |
1226 IBV_QP_INIT_ATTR_RX_HASH),
1229 static struct ibv_qp *create_qp(struct ibv_context *context,
1230 struct ibv_qp_init_attr_ex *attr)
1232 struct mlx5_create_qp cmd;
1233 struct mlx5_create_qp_resp resp;
1234 struct mlx5_create_qp_resp_ex resp_ex;
1237 struct mlx5_context *ctx = to_mctx(context);
1238 struct ibv_qp *ibqp;
1239 int32_t usr_idx = 0;
1240 uint32_t uuar_index;
1241 FILE *fp = ctx->dbg_fp;
1243 if (attr->comp_mask & ~MLX5_CREATE_QP_SUP_COMP_MASK)
1246 if ((attr->comp_mask & IBV_QP_INIT_ATTR_MAX_TSO_HEADER) &&
1247 (attr->qp_type != IBV_QPT_RAW_PACKET))
1250 qp = calloc(1, sizeof(*qp));
1252 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
1255 ibqp = (struct ibv_qp *)&qp->verbs_qp;
1258 memset(&cmd, 0, sizeof(cmd));
1259 memset(&resp, 0, sizeof(resp));
1260 memset(&resp_ex, 0, sizeof(resp_ex));
1262 if (attr->comp_mask & IBV_QP_INIT_ATTR_RX_HASH) {
1263 ret = mlx5_cmd_create_rss_qp(context, attr, qp);
1270 qp->wq_sig = qp_sig_enabled();
1272 cmd.flags |= MLX5_QP_FLAG_SIGNATURE;
1274 if (use_scatter_to_cqe())
1275 cmd.flags |= MLX5_QP_FLAG_SCATTER_CQE;
1277 ret = mlx5_calc_wq_size(ctx, attr, qp);
1283 if (attr->qp_type == IBV_QPT_RAW_PACKET) {
1284 qp->buf_size = qp->sq.offset;
1285 qp->sq_buf_size = ret - qp->buf_size;
1289 qp->sq_buf_size = 0;
1292 if (mlx5_alloc_qp_buf(context, attr, qp, ret)) {
1293 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
1297 if (attr->qp_type == IBV_QPT_RAW_PACKET) {
1298 qp->sq_start = qp->sq_buf.buf;
1299 qp->sq.qend = qp->sq_buf.buf +
1300 (qp->sq.wqe_cnt << qp->sq.wqe_shift);
1302 qp->sq_start = qp->buf.buf + qp->sq.offset;
1303 qp->sq.qend = qp->buf.buf + qp->sq.offset +
1304 (qp->sq.wqe_cnt << qp->sq.wqe_shift);
1307 mlx5_init_qp_indices(qp);
1309 if (mlx5_spinlock_init(&qp->sq.lock) ||
1310 mlx5_spinlock_init(&qp->rq.lock))
1311 goto err_free_qp_buf;
1313 qp->db = mlx5_alloc_dbrec(ctx);
1315 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
1316 goto err_free_qp_buf;
1319 qp->db[MLX5_RCV_DBR] = 0;
1320 qp->db[MLX5_SND_DBR] = 0;
1322 cmd.buf_addr = (uintptr_t) qp->buf.buf;
1323 cmd.sq_buf_addr = (attr->qp_type == IBV_QPT_RAW_PACKET) ?
1324 (uintptr_t) qp->sq_buf.buf : 0;
1325 cmd.db_addr = (uintptr_t) qp->db;
1326 cmd.sq_wqe_count = qp->sq.wqe_cnt;
1327 cmd.rq_wqe_count = qp->rq.wqe_cnt;
1328 cmd.rq_wqe_shift = qp->rq.wqe_shift;
1330 if (ctx->atomic_cap == IBV_ATOMIC_HCA)
1331 qp->atomics_enabled = 1;
1333 if (!ctx->cqe_version) {
1334 cmd.uidx = 0xffffff;
1335 pthread_mutex_lock(&ctx->qp_table_mutex);
1336 } else if (!is_xrc_tgt(attr->qp_type)) {
1337 usr_idx = mlx5_store_uidx(ctx, qp);
1339 mlx5_dbg(fp, MLX5_DBG_QP, "Couldn't find free user index\n");
1346 if (attr->comp_mask & MLX5_CREATE_QP_EX2_COMP_MASK)
1347 ret = mlx5_cmd_create_qp_ex(context, attr, &cmd, qp, &resp_ex);
1349 ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, sizeof(qp->verbs_qp),
1350 attr, &cmd.ibv_cmd, sizeof(cmd),
1351 &resp.ibv_resp, sizeof(resp));
1353 mlx5_dbg(fp, MLX5_DBG_QP, "ret %d\n", ret);
1357 uuar_index = (attr->comp_mask & MLX5_CREATE_QP_EX2_COMP_MASK) ?
1358 resp_ex.uuar_index : resp.uuar_index;
1359 if (!ctx->cqe_version) {
1360 if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) {
1361 ret = mlx5_store_qp(ctx, ibqp->qp_num, qp);
1363 mlx5_dbg(fp, MLX5_DBG_QP, "ret %d\n", ret);
1368 pthread_mutex_unlock(&ctx->qp_table_mutex);
1371 map_uuar(context, qp, uuar_index);
1373 qp->rq.max_post = qp->rq.wqe_cnt;
1374 if (attr->sq_sig_all)
1375 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
1377 qp->sq_signal_bits = 0;
1379 attr->cap.max_send_wr = qp->sq.max_post;
1380 attr->cap.max_recv_wr = qp->rq.max_post;
1381 attr->cap.max_recv_sge = qp->rq.max_gs;
1383 qp->rsc.type = MLX5_RSC_TYPE_QP;
1384 qp->rsc.rsn = (ctx->cqe_version && !is_xrc_tgt(attr->qp_type)) ?
1385 usr_idx : ibqp->qp_num;
1390 ibv_cmd_destroy_qp(ibqp);
1393 if (!ctx->cqe_version)
1394 pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
1395 else if (!is_xrc_tgt(attr->qp_type))
1396 mlx5_clear_uidx(ctx, usr_idx);
1399 mlx5_free_db(to_mctx(context), qp->db);
1402 mlx5_free_qp_buf(qp);
1410 struct ibv_qp *mlx5_create_qp(struct ibv_pd *pd,
1411 struct ibv_qp_init_attr *attr)
1414 struct ibv_qp_init_attr_ex attrx;
1416 memset(&attrx, 0, sizeof(attrx));
1417 memcpy(&attrx, attr, sizeof(*attr));
1418 attrx.comp_mask = IBV_QP_INIT_ATTR_PD;
1420 qp = create_qp(pd->context, &attrx);
1422 memcpy(attr, &attrx, sizeof(*attr));
1427 static void mlx5_lock_cqs(struct ibv_qp *qp)
1429 struct mlx5_cq *send_cq = to_mcq(qp->send_cq);
1430 struct mlx5_cq *recv_cq = to_mcq(qp->recv_cq);
1432 if (send_cq && recv_cq) {
1433 if (send_cq == recv_cq) {
1434 mlx5_spin_lock(&send_cq->lock);
1435 } else if (send_cq->cqn < recv_cq->cqn) {
1436 mlx5_spin_lock(&send_cq->lock);
1437 mlx5_spin_lock(&recv_cq->lock);
1439 mlx5_spin_lock(&recv_cq->lock);
1440 mlx5_spin_lock(&send_cq->lock);
1442 } else if (send_cq) {
1443 mlx5_spin_lock(&send_cq->lock);
1444 } else if (recv_cq) {
1445 mlx5_spin_lock(&recv_cq->lock);
1449 static void mlx5_unlock_cqs(struct ibv_qp *qp)
1451 struct mlx5_cq *send_cq = to_mcq(qp->send_cq);
1452 struct mlx5_cq *recv_cq = to_mcq(qp->recv_cq);
1454 if (send_cq && recv_cq) {
1455 if (send_cq == recv_cq) {
1456 mlx5_spin_unlock(&send_cq->lock);
1457 } else if (send_cq->cqn < recv_cq->cqn) {
1458 mlx5_spin_unlock(&recv_cq->lock);
1459 mlx5_spin_unlock(&send_cq->lock);
1461 mlx5_spin_unlock(&send_cq->lock);
1462 mlx5_spin_unlock(&recv_cq->lock);
1464 } else if (send_cq) {
1465 mlx5_spin_unlock(&send_cq->lock);
1466 } else if (recv_cq) {
1467 mlx5_spin_unlock(&recv_cq->lock);
1471 int mlx5_destroy_qp(struct ibv_qp *ibqp)
1473 struct mlx5_qp *qp = to_mqp(ibqp);
1474 struct mlx5_context *ctx = to_mctx(ibqp->context);
1478 ret = ibv_cmd_destroy_qp(ibqp);
1484 if (!ctx->cqe_version)
1485 pthread_mutex_lock(&ctx->qp_table_mutex);
1487 ret = ibv_cmd_destroy_qp(ibqp);
1489 if (!ctx->cqe_version)
1490 pthread_mutex_unlock(&ctx->qp_table_mutex);
1494 mlx5_lock_cqs(ibqp);
1496 __mlx5_cq_clean(to_mcq(ibqp->recv_cq), qp->rsc.rsn,
1497 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
1498 if (ibqp->send_cq != ibqp->recv_cq)
1499 __mlx5_cq_clean(to_mcq(ibqp->send_cq), qp->rsc.rsn, NULL);
1501 if (!ctx->cqe_version) {
1502 if (qp->sq.wqe_cnt || qp->rq.wqe_cnt)
1503 mlx5_clear_qp(ctx, ibqp->qp_num);
1506 mlx5_unlock_cqs(ibqp);
1507 if (!ctx->cqe_version)
1508 pthread_mutex_unlock(&ctx->qp_table_mutex);
1509 else if (!is_xrc_tgt(ibqp->qp_type))
1510 mlx5_clear_uidx(ctx, qp->rsc.rsn);
1512 mlx5_free_db(ctx, qp->db);
1513 mlx5_free_qp_buf(qp);
1520 int mlx5_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
1521 int attr_mask, struct ibv_qp_init_attr *init_attr)
1523 struct ibv_query_qp cmd;
1524 struct mlx5_qp *qp = to_mqp(ibqp);
1530 ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof(cmd));
1534 init_attr->cap.max_send_wr = qp->sq.max_post;
1535 init_attr->cap.max_send_sge = qp->sq.max_gs;
1536 init_attr->cap.max_inline_data = qp->max_inline_data;
1538 attr->cap = init_attr->cap;
1544 MLX5_MODIFY_QP_EX_ATTR_MASK = IBV_QP_RATE_LIMIT,
1547 int mlx5_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1550 struct ibv_modify_qp cmd = {};
1551 struct ibv_modify_qp_ex cmd_ex = {};
1552 struct ibv_modify_qp_resp_ex resp = {};
1553 struct mlx5_qp *mqp = to_mqp(qp);
1554 struct mlx5_context *context = to_mctx(qp->context);
1561 if (attr_mask & IBV_QP_PORT) {
1562 switch (qp->qp_type) {
1563 case IBV_QPT_RAW_PACKET:
1564 if (context->cached_link_layer[attr->port_num - 1] ==
1565 IBV_LINK_LAYER_ETHERNET) {
1566 if (context->cached_device_cap_flags &
1567 IBV_DEVICE_RAW_IP_CSUM)
1568 mqp->qp_cap_cache |=
1569 MLX5_CSUM_SUPPORT_RAW_OVER_ETH |
1572 if (ibv_is_qpt_supported(
1573 context->cached_tso_caps.supported_qpts,
1574 IBV_QPT_RAW_PACKET))
1576 context->cached_tso_caps.max_tso;
1584 if (attr_mask & MLX5_MODIFY_QP_EX_ATTR_MASK)
1585 ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask,
1587 sizeof(cmd_ex), sizeof(cmd_ex),
1589 sizeof(resp), sizeof(resp));
1591 ret = ibv_cmd_modify_qp(qp, attr, attr_mask,
1595 (attr_mask & IBV_QP_STATE) &&
1596 attr->qp_state == IBV_QPS_RESET) {
1598 mlx5_cq_clean(to_mcq(qp->recv_cq), mqp->rsc.rsn,
1599 qp->srq ? to_msrq(qp->srq) : NULL);
1601 if (qp->send_cq != qp->recv_cq && qp->send_cq)
1602 mlx5_cq_clean(to_mcq(qp->send_cq),
1603 to_mqp(qp)->rsc.rsn, NULL);
1605 mlx5_init_qp_indices(mqp);
1607 db[MLX5_RCV_DBR] = 0;
1608 db[MLX5_SND_DBR] = 0;
1612 * When the Raw Packet QP is in INIT state, its RQ
1613 * underneath is already in RDY, which means it can
1614 * receive packets. According to the IB spec, a QP can't
1615 * receive packets until moved to RTR state. To achieve this,
1616 * for Raw Packet QPs, we update the doorbell record
1617 * once the QP is moved to RTR.
1620 (attr_mask & IBV_QP_STATE) &&
1621 attr->qp_state == IBV_QPS_RTR &&
1622 qp->qp_type == IBV_QPT_RAW_PACKET) {
1623 mlx5_spin_lock(&mqp->rq.lock);
1624 mqp->db[MLX5_RCV_DBR] = htobe32(mqp->rq.head & 0xffff);
1625 mlx5_spin_unlock(&mqp->rq.lock);
1631 #define RROCE_UDP_SPORT_MIN 0xC000
1632 #define RROCE_UDP_SPORT_MAX 0xFFFF
1633 struct ibv_ah *mlx5_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
1635 struct mlx5_context *ctx = to_mctx(pd->context);
1636 struct ibv_port_attr port_attr;
1643 if (attr->port_num < 1 || attr->port_num > ctx->num_ports)
1646 if (ctx->cached_link_layer[attr->port_num - 1]) {
1647 is_eth = ctx->cached_link_layer[attr->port_num - 1] ==
1648 IBV_LINK_LAYER_ETHERNET;
1650 if (ibv_query_port(pd->context, attr->port_num, &port_attr))
1653 is_eth = (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET);
1656 if (unlikely((!attr->is_global) && is_eth)) {
1661 ah = calloc(1, sizeof *ah);
1666 if (ibv_query_gid_type(pd->context, attr->port_num,
1667 attr->grh.sgid_index, &gid_type))
1670 if (gid_type == IBV_GID_TYPE_ROCE_V2)
1671 ah->av.rlid = htobe16(rand() % (RROCE_UDP_SPORT_MAX + 1
1672 - RROCE_UDP_SPORT_MIN)
1673 + RROCE_UDP_SPORT_MIN);
1674 /* Since RoCE packets must contain GRH, this bit is reserved
1675 * for RoCE and shouldn't be set.
1679 ah->av.fl_mlid = attr->src_path_bits & 0x7f;
1680 ah->av.rlid = htobe16(attr->dlid);
1683 ah->av.stat_rate_sl = (attr->static_rate << 4) | attr->sl;
1684 if (attr->is_global) {
1685 ah->av.tclass = attr->grh.traffic_class;
1686 ah->av.hop_limit = attr->grh.hop_limit;
1687 tmp = htobe32((grh << 30) |
1688 ((attr->grh.sgid_index & 0xff) << 20) |
1689 (attr->grh.flow_label & 0xfffff));
1690 ah->av.grh_gid_fl = tmp;
1691 memcpy(ah->av.rgid, attr->grh.dgid.raw, 16);
1695 if (ctx->cmds_supp_uhw & MLX5_USER_CMDS_SUPP_UHW_CREATE_AH) {
1696 struct mlx5_create_ah_resp resp = {};
1698 if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp.ibv_resp, sizeof(resp)))
1702 memcpy(ah->av.rmac, resp.dmac, ETHERNET_LL_SIZE);
1706 if (ibv_resolve_eth_l2_from_gid(pd->context, attr,
1718 int mlx5_destroy_ah(struct ibv_ah *ah)
1720 struct mlx5_ah *mah = to_mah(ah);
1724 err = ibv_cmd_destroy_ah(ah);
1733 int mlx5_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
1735 return ibv_cmd_attach_mcast(qp, gid, lid);
1738 int mlx5_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
1740 return ibv_cmd_detach_mcast(qp, gid, lid);
1743 struct ibv_qp *mlx5_create_qp_ex(struct ibv_context *context,
1744 struct ibv_qp_init_attr_ex *attr)
1746 return create_qp(context, attr);
1749 int mlx5_get_srq_num(struct ibv_srq *srq, uint32_t *srq_num)
1751 struct mlx5_srq *msrq = to_msrq(srq);
1753 *srq_num = msrq->srqn;
1759 mlx5_open_xrcd(struct ibv_context *context,
1760 struct ibv_xrcd_init_attr *xrcd_init_attr)
1763 struct verbs_xrcd *xrcd;
1764 struct ibv_open_xrcd cmd = {};
1765 struct ibv_open_xrcd_resp resp = {};
1767 xrcd = calloc(1, sizeof(*xrcd));
1771 err = ibv_cmd_open_xrcd(context, xrcd, sizeof(*xrcd), xrcd_init_attr,
1772 &cmd, sizeof(cmd), &resp, sizeof(resp));
1781 int mlx5_close_xrcd(struct ibv_xrcd *ib_xrcd)
1783 struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd);
1786 ret = ibv_cmd_close_xrcd(xrcd);
1793 static struct ibv_srq *
1794 mlx5_create_xrc_srq(struct ibv_context *context,
1795 struct ibv_srq_init_attr_ex *attr)
1798 struct mlx5_create_srq_ex cmd;
1799 struct mlx5_create_srq_resp resp;
1800 struct mlx5_srq *msrq;
1801 struct mlx5_context *ctx = to_mctx(context);
1803 struct ibv_srq *ibsrq;
1805 FILE *fp = ctx->dbg_fp;
1807 msrq = calloc(1, sizeof(*msrq));
1811 ibsrq = (struct ibv_srq *)&msrq->vsrq;
1813 memset(&cmd, 0, sizeof(cmd));
1814 memset(&resp, 0, sizeof(resp));
1816 if (mlx5_spinlock_init(&msrq->lock)) {
1817 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
1821 if (attr->attr.max_wr > ctx->max_srq_recv_wr) {
1822 fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n",
1823 __func__, __LINE__, attr->attr.max_wr,
1824 ctx->max_srq_recv_wr);
1830 * this calculation does not consider required control segments. The
1831 * final calculation is done again later. This is done so to avoid
1832 * overflows of variables
1834 max_sge = ctx->max_recv_wr / sizeof(struct mlx5_wqe_data_seg);
1835 if (attr->attr.max_sge > max_sge) {
1836 fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n",
1837 __func__, __LINE__, attr->attr.max_wr,
1838 ctx->max_srq_recv_wr);
1843 msrq->max = align_queue_size(attr->attr.max_wr + 1);
1844 msrq->max_gs = attr->attr.max_sge;
1847 if (mlx5_alloc_srq_buf(context, msrq)) {
1848 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
1852 msrq->db = mlx5_alloc_dbrec(ctx);
1854 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
1860 cmd.buf_addr = (uintptr_t)msrq->buf.buf;
1861 cmd.db_addr = (uintptr_t)msrq->db;
1862 msrq->wq_sig = srq_sig_enabled();
1864 cmd.flags = MLX5_SRQ_FLAG_SIGNATURE;
1866 attr->attr.max_sge = msrq->max_gs;
1867 if (ctx->cqe_version) {
1868 uidx = mlx5_store_uidx(ctx, msrq);
1870 mlx5_dbg(fp, MLX5_DBG_QP, "Couldn't find free user index\n");
1875 cmd.uidx = 0xffffff;
1876 pthread_mutex_lock(&ctx->srq_table_mutex);
1879 err = ibv_cmd_create_srq_ex(context, &msrq->vsrq, sizeof(msrq->vsrq),
1880 attr, &cmd.ibv_cmd, sizeof(cmd),
1881 &resp.ibv_resp, sizeof(resp));
1885 if (!ctx->cqe_version) {
1886 err = mlx5_store_srq(to_mctx(context), resp.srqn, msrq);
1890 pthread_mutex_unlock(&ctx->srq_table_mutex);
1893 msrq->srqn = resp.srqn;
1894 msrq->rsc.type = MLX5_RSC_TYPE_XSRQ;
1895 msrq->rsc.rsn = ctx->cqe_version ? cmd.uidx : resp.srqn;
1900 ibv_cmd_destroy_srq(ibsrq);
1903 if (ctx->cqe_version)
1904 mlx5_clear_uidx(ctx, cmd.uidx);
1906 pthread_mutex_unlock(&ctx->srq_table_mutex);
1909 mlx5_free_db(ctx, msrq->db);
1913 mlx5_free_buf(&msrq->buf);
1921 struct ibv_srq *mlx5_create_srq_ex(struct ibv_context *context,
1922 struct ibv_srq_init_attr_ex *attr)
1924 if (!(attr->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ||
1925 (attr->srq_type == IBV_SRQT_BASIC))
1926 return mlx5_create_srq(attr->pd,
1927 (struct ibv_srq_init_attr *)attr);
1928 else if (attr->srq_type == IBV_SRQT_XRC)
1929 return mlx5_create_xrc_srq(context, attr);
1934 int mlx5_query_device_ex(struct ibv_context *context,
1935 const struct ibv_query_device_ex_input *input,
1936 struct ibv_device_attr_ex *attr,
1939 struct mlx5_context *mctx = to_mctx(context);
1940 struct mlx5_query_device_ex_resp resp;
1941 struct mlx5_query_device_ex cmd;
1942 struct ibv_device_attr *a;
1943 uint64_t raw_fw_ver;
1948 int cmd_supp_uhw = mctx->cmds_supp_uhw &
1949 MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE;
1951 memset(&cmd, 0, sizeof(cmd));
1952 memset(&resp, 0, sizeof(resp));
1953 err = ibv_cmd_query_device_ex(context, input, attr, attr_size,
1955 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd),
1956 &resp.ibv_resp, sizeof(resp.ibv_resp),
1957 cmd_supp_uhw ? sizeof(resp) : sizeof(resp.ibv_resp));
1961 attr->tso_caps = resp.tso_caps;
1962 attr->rss_caps.rx_hash_fields_mask = resp.rss_caps.rx_hash_fields_mask;
1963 attr->rss_caps.rx_hash_function = resp.rss_caps.rx_hash_function;
1964 attr->packet_pacing_caps = resp.packet_pacing_caps.caps;
1966 if (resp.support_multi_pkt_send_wqe)
1967 mctx->vendor_cap_flags |= MLX5_VENDOR_CAP_FLAGS_MPW;
1969 mctx->cqe_comp_caps = resp.cqe_comp_caps;
1971 major = (raw_fw_ver >> 32) & 0xffff;
1972 minor = (raw_fw_ver >> 16) & 0xffff;
1973 sub_minor = raw_fw_ver & 0xffff;
1974 a = &attr->orig_attr;
1975 snprintf(a->fw_ver, sizeof(a->fw_ver), "%d.%d.%04d",
1976 major, minor, sub_minor);
1981 static int rwq_sig_enabled(struct ibv_context *context)
1985 env = getenv("MLX5_RWQ_SIGNATURE");
1992 static void mlx5_free_rwq_buf(struct mlx5_rwq *rwq, struct ibv_context *context)
1994 struct mlx5_context *ctx = to_mctx(context);
1996 mlx5_free_actual_buf(ctx, &rwq->buf);
2000 static int mlx5_alloc_rwq_buf(struct ibv_context *context,
2001 struct mlx5_rwq *rwq,
2005 enum mlx5_alloc_type default_alloc_type = MLX5_ALLOC_TYPE_PREFER_CONTIG;
2007 rwq->rq.wrid = malloc(rwq->rq.wqe_cnt * sizeof(uint64_t));
2008 if (!rwq->rq.wrid) {
2013 err = mlx5_alloc_prefered_buf(to_mctx(context), &rwq->buf,
2014 align(rwq->buf_size, to_mdev
2015 (context->device)->page_size),
2016 to_mdev(context->device)->page_size,
2029 struct ibv_wq *mlx5_create_wq(struct ibv_context *context,
2030 struct ibv_wq_init_attr *attr)
2032 struct mlx5_create_wq cmd;
2033 struct mlx5_create_wq_resp resp;
2035 struct mlx5_rwq *rwq;
2036 struct mlx5_context *ctx = to_mctx(context);
2038 int32_t usr_idx = 0;
2039 FILE *fp = ctx->dbg_fp;
2041 if (attr->wq_type != IBV_WQT_RQ)
2044 memset(&cmd, 0, sizeof(cmd));
2045 memset(&resp, 0, sizeof(resp));
2047 rwq = calloc(1, sizeof(*rwq));
2051 rwq->wq_sig = rwq_sig_enabled(context);
2053 cmd.drv.flags = MLX5_RWQ_FLAG_SIGNATURE;
2055 ret = mlx5_calc_rwq_size(ctx, rwq, attr);
2061 rwq->buf_size = ret;
2062 if (mlx5_alloc_rwq_buf(context, rwq, ret))
2065 mlx5_init_rwq_indices(rwq);
2067 if (mlx5_spinlock_init(&rwq->rq.lock))
2068 goto err_free_rwq_buf;
2070 rwq->db = mlx5_alloc_dbrec(ctx);
2072 goto err_free_rwq_buf;
2074 rwq->db[MLX5_RCV_DBR] = 0;
2075 rwq->db[MLX5_SND_DBR] = 0;
2076 rwq->pbuff = rwq->buf.buf + rwq->rq.offset;
2077 rwq->recv_db = &rwq->db[MLX5_RCV_DBR];
2078 cmd.drv.buf_addr = (uintptr_t)rwq->buf.buf;
2079 cmd.drv.db_addr = (uintptr_t)rwq->db;
2080 cmd.drv.rq_wqe_count = rwq->rq.wqe_cnt;
2081 cmd.drv.rq_wqe_shift = rwq->rq.wqe_shift;
2082 usr_idx = mlx5_store_uidx(ctx, rwq);
2084 mlx5_dbg(fp, MLX5_DBG_QP, "Couldn't find free user index\n");
2085 goto err_free_db_rec;
2088 cmd.drv.user_index = usr_idx;
2089 err = ibv_cmd_create_wq(context, attr, &rwq->wq, &cmd.ibv_cmd,
2090 sizeof(cmd.ibv_cmd),
2092 &resp.ibv_resp, sizeof(resp.ibv_resp),
2097 rwq->rsc.type = MLX5_RSC_TYPE_RWQ;
2098 rwq->rsc.rsn = cmd.drv.user_index;
2100 rwq->wq.post_recv = mlx5_post_wq_recv;
2104 mlx5_clear_uidx(ctx, cmd.drv.user_index);
2106 mlx5_free_db(to_mctx(context), rwq->db);
2108 mlx5_free_rwq_buf(rwq, context);
2114 int mlx5_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr)
2116 struct mlx5_modify_wq cmd = {};
2117 struct mlx5_rwq *rwq = to_mrwq(wq);
2119 if ((attr->attr_mask & IBV_WQ_ATTR_STATE) &&
2120 attr->wq_state == IBV_WQS_RDY) {
2121 if ((attr->attr_mask & IBV_WQ_ATTR_CURR_STATE) &&
2122 attr->curr_wq_state != wq->state)
2125 if (wq->state == IBV_WQS_RESET) {
2126 mlx5_spin_lock(&to_mcq(wq->cq)->lock);
2127 __mlx5_cq_clean(to_mcq(wq->cq),
2128 rwq->rsc.rsn, NULL);
2129 mlx5_spin_unlock(&to_mcq(wq->cq)->lock);
2130 mlx5_init_rwq_indices(rwq);
2131 rwq->db[MLX5_RCV_DBR] = 0;
2132 rwq->db[MLX5_SND_DBR] = 0;
2136 return ibv_cmd_modify_wq(wq, attr, &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd));
2139 int mlx5_destroy_wq(struct ibv_wq *wq)
2141 struct mlx5_rwq *rwq = to_mrwq(wq);
2144 ret = ibv_cmd_destroy_wq(wq);
2148 mlx5_spin_lock(&to_mcq(wq->cq)->lock);
2149 __mlx5_cq_clean(to_mcq(wq->cq), rwq->rsc.rsn, NULL);
2150 mlx5_spin_unlock(&to_mcq(wq->cq)->lock);
2151 mlx5_clear_uidx(to_mctx(wq->context), rwq->rsc.rsn);
2152 mlx5_free_db(to_mctx(wq->context), rwq->db);
2153 mlx5_free_rwq_buf(rwq, wq->context);
2159 struct ibv_rwq_ind_table *mlx5_create_rwq_ind_table(struct ibv_context *context,
2160 struct ibv_rwq_ind_table_init_attr *init_attr)
2162 struct ibv_create_rwq_ind_table *cmd;
2163 struct mlx5_create_rwq_ind_table_resp resp;
2164 struct ibv_rwq_ind_table *ind_table;
2165 uint32_t required_tbl_size;
2166 int num_tbl_entries;
2170 num_tbl_entries = 1 << init_attr->log_ind_tbl_size;
2171 /* Data must be u64 aligned */
2172 required_tbl_size = (num_tbl_entries * sizeof(uint32_t)) < sizeof(uint64_t) ?
2173 sizeof(uint64_t) : (num_tbl_entries * sizeof(uint32_t));
2175 cmd_size = required_tbl_size + sizeof(*cmd);
2176 cmd = calloc(1, cmd_size);
2180 memset(&resp, 0, sizeof(resp));
2181 ind_table = calloc(1, sizeof(*ind_table));
2185 err = ibv_cmd_create_rwq_ind_table(context, init_attr, ind_table, cmd,
2186 cmd_size, cmd_size, &resp.ibv_resp, sizeof(resp.ibv_resp),
2201 int mlx5_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table)
2205 ret = ibv_cmd_destroy_rwq_ind_table(rwq_ind_table);
2210 free(rwq_ind_table);