2 * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
42 #include <infiniband/opcode.h>
55 MLX5_CQ_MODIFY_RESEIZE = 0,
56 MLX5_CQ_MODIFY_MODER = 1,
57 MLX5_CQ_MODIFY_MAPPING = 2,
60 int mlx5_stall_num_loop = 60;
61 int mlx5_stall_cq_poll_min = 60;
62 int mlx5_stall_cq_poll_max = 100000;
63 int mlx5_stall_cq_inc_step = 100;
64 int mlx5_stall_cq_dec_step = 10;
66 static inline uint8_t get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe)
68 return (cqe->l4_hdr_type_etc >> 2) & 0x3;
71 static void *get_buf_cqe(struct mlx5_buf *buf, int n, int cqe_sz)
73 return buf->buf + n * cqe_sz;
76 static void *get_cqe(struct mlx5_cq *cq, int n)
78 return cq->active_buf->buf + n * cq->cqe_sz;
81 static void *get_sw_cqe(struct mlx5_cq *cq, int n)
83 void *cqe = get_cqe(cq, n & cq->ibv_cq.cqe);
84 struct mlx5_cqe64 *cqe64;
86 cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
88 if (likely(mlx5dv_get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
89 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibv_cq.cqe + 1)))) {
96 static void *next_cqe_sw(struct mlx5_cq *cq)
98 return get_sw_cqe(cq, cq->cons_index);
101 static void update_cons_index(struct mlx5_cq *cq)
103 cq->dbrec[MLX5_CQ_SET_CI] = htobe32(cq->cons_index & 0xffffff);
106 static inline void handle_good_req(struct ibv_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_wq *wq, int idx)
108 switch (be32toh(cqe->sop_drop_qpn) >> 24) {
109 case MLX5_OPCODE_RDMA_WRITE_IMM:
110 wc->wc_flags |= IBV_WC_WITH_IMM;
112 case MLX5_OPCODE_RDMA_WRITE:
113 wc->opcode = IBV_WC_RDMA_WRITE;
115 case MLX5_OPCODE_SEND_IMM:
116 wc->wc_flags |= IBV_WC_WITH_IMM;
118 case MLX5_OPCODE_SEND:
119 case MLX5_OPCODE_SEND_INVAL:
120 wc->opcode = IBV_WC_SEND;
122 case MLX5_OPCODE_RDMA_READ:
123 wc->opcode = IBV_WC_RDMA_READ;
124 wc->byte_len = be32toh(cqe->byte_cnt);
126 case MLX5_OPCODE_ATOMIC_CS:
127 wc->opcode = IBV_WC_COMP_SWAP;
130 case MLX5_OPCODE_ATOMIC_FA:
131 wc->opcode = IBV_WC_FETCH_ADD;
134 case MLX5_OPCODE_UMR:
135 wc->opcode = wq->wr_data[idx];
137 case MLX5_OPCODE_TSO:
138 wc->opcode = IBV_WC_TSO;
143 static inline int handle_responder_lazy(struct mlx5_cq *cq, struct mlx5_cqe64 *cqe,
144 struct mlx5_resource *cur_rsc, struct mlx5_srq *srq)
148 struct mlx5_qp *qp = rsc_to_mqp(cur_rsc);
149 int err = IBV_WC_SUCCESS;
152 wqe_ctr = be16toh(cqe->wqe_counter);
153 cq->ibv_cq.wr_id = srq->wrid[wqe_ctr];
154 mlx5_free_srq_wqe(srq, wqe_ctr);
155 if (cqe->op_own & MLX5_INLINE_SCATTER_32)
156 err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe,
157 be32toh(cqe->byte_cnt));
158 else if (cqe->op_own & MLX5_INLINE_SCATTER_64)
159 err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe - 1,
160 be32toh(cqe->byte_cnt));
162 if (likely(cur_rsc->type == MLX5_RSC_TYPE_QP)) {
164 if (qp->qp_cap_cache & MLX5_RX_CSUM_VALID)
165 cq->flags |= MLX5_CQ_FLAGS_RX_CSUM_VALID;
167 wq = &(rsc_to_mrwq(cur_rsc)->rq);
170 wqe_ctr = wq->tail & (wq->wqe_cnt - 1);
171 cq->ibv_cq.wr_id = wq->wrid[wqe_ctr];
173 if (cqe->op_own & MLX5_INLINE_SCATTER_32)
174 err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe,
175 be32toh(cqe->byte_cnt));
176 else if (cqe->op_own & MLX5_INLINE_SCATTER_64)
177 err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe - 1,
178 be32toh(cqe->byte_cnt));
184 static inline int handle_responder(struct ibv_wc *wc, struct mlx5_cqe64 *cqe,
185 struct mlx5_resource *cur_rsc, struct mlx5_srq *srq)
189 struct mlx5_qp *qp = rsc_to_mqp(cur_rsc);
193 wc->byte_len = be32toh(cqe->byte_cnt);
195 wqe_ctr = be16toh(cqe->wqe_counter);
196 wc->wr_id = srq->wrid[wqe_ctr];
197 mlx5_free_srq_wqe(srq, wqe_ctr);
198 if (cqe->op_own & MLX5_INLINE_SCATTER_32)
199 err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe,
201 else if (cqe->op_own & MLX5_INLINE_SCATTER_64)
202 err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe - 1,
205 if (likely(cur_rsc->type == MLX5_RSC_TYPE_QP)) {
207 if (qp->qp_cap_cache & MLX5_RX_CSUM_VALID)
208 wc->wc_flags |= (!!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) &
209 !!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) &
210 (get_cqe_l3_hdr_type(cqe) ==
211 MLX5_CQE_L3_HDR_TYPE_IPV4)) <<
212 IBV_WC_IP_CSUM_OK_SHIFT;
214 wq = &(rsc_to_mrwq(cur_rsc)->rq);
217 wqe_ctr = wq->tail & (wq->wqe_cnt - 1);
218 wc->wr_id = wq->wrid[wqe_ctr];
220 if (cqe->op_own & MLX5_INLINE_SCATTER_32)
221 err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe,
223 else if (cqe->op_own & MLX5_INLINE_SCATTER_64)
224 err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe - 1,
230 switch (cqe->op_own >> 4) {
231 case MLX5_CQE_RESP_WR_IMM:
232 wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM;
233 wc->wc_flags |= IBV_WC_WITH_IMM;
234 wc->imm_data = cqe->imm_inval_pkey;
236 case MLX5_CQE_RESP_SEND:
237 wc->opcode = IBV_WC_RECV;
239 case MLX5_CQE_RESP_SEND_IMM:
240 wc->opcode = IBV_WC_RECV;
241 wc->wc_flags |= IBV_WC_WITH_IMM;
242 wc->imm_data = cqe->imm_inval_pkey;
244 case MLX5_CQE_RESP_SEND_INV:
245 wc->opcode = IBV_WC_RECV;
246 wc->wc_flags |= IBV_WC_WITH_INV;
247 wc->imm_data = be32toh(cqe->imm_inval_pkey);
250 wc->slid = be16toh(cqe->slid);
251 wc->sl = (be32toh(cqe->flags_rqpn) >> 24) & 0xf;
252 wc->src_qp = be32toh(cqe->flags_rqpn) & 0xffffff;
253 wc->dlid_path_bits = cqe->ml_path & 0x7f;
254 g = (be32toh(cqe->flags_rqpn) >> 28) & 3;
255 wc->wc_flags |= g ? IBV_WC_GRH : 0;
256 wc->pkey_index = be32toh(cqe->imm_inval_pkey) & 0xffff;
258 return IBV_WC_SUCCESS;
261 static void dump_cqe(FILE *fp, void *buf)
266 for (i = 0; i < 16; i += 4)
267 fprintf(fp, "%08x %08x %08x %08x\n", be32toh(p[i]), be32toh(p[i + 1]),
268 be32toh(p[i + 2]), be32toh(p[i + 3]));
271 static enum ibv_wc_status mlx5_handle_error_cqe(struct mlx5_err_cqe *cqe)
273 switch (cqe->syndrome) {
274 case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
275 return IBV_WC_LOC_LEN_ERR;
276 case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
277 return IBV_WC_LOC_QP_OP_ERR;
278 case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
279 return IBV_WC_LOC_PROT_ERR;
280 case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
281 return IBV_WC_WR_FLUSH_ERR;
282 case MLX5_CQE_SYNDROME_MW_BIND_ERR:
283 return IBV_WC_MW_BIND_ERR;
284 case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
285 return IBV_WC_BAD_RESP_ERR;
286 case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
287 return IBV_WC_LOC_ACCESS_ERR;
288 case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
289 return IBV_WC_REM_INV_REQ_ERR;
290 case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
291 return IBV_WC_REM_ACCESS_ERR;
292 case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
293 return IBV_WC_REM_OP_ERR;
294 case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
295 return IBV_WC_RETRY_EXC_ERR;
296 case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
297 return IBV_WC_RNR_RETRY_EXC_ERR;
298 case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
299 return IBV_WC_REM_ABORT_ERR;
301 return IBV_WC_GENERAL_ERR;
305 #if defined(__x86_64__) || defined (__i386__)
306 static inline unsigned long get_cycles(void)
310 asm volatile ("rdtsc" : "=a" (low), "=d" (high));
312 val = (val << 32) | low;
316 static void mlx5_stall_poll_cq(void)
320 for (i = 0; i < mlx5_stall_num_loop; i++)
323 static void mlx5_stall_cycles_poll_cq(uint64_t cycles)
325 while (get_cycles() < cycles)
328 static void mlx5_get_cycles(uint64_t *cycles)
330 *cycles = get_cycles();
333 static void mlx5_stall_poll_cq(void)
336 static void mlx5_stall_cycles_poll_cq(uint64_t cycles)
339 static void mlx5_get_cycles(uint64_t *cycles)
344 static inline struct mlx5_qp *get_req_context(struct mlx5_context *mctx,
345 struct mlx5_resource **cur_rsc,
346 uint32_t rsn, int cqe_ver)
348 static inline struct mlx5_qp *get_req_context(struct mlx5_context *mctx,
349 struct mlx5_resource **cur_rsc,
350 uint32_t rsn, int cqe_ver)
352 if (!*cur_rsc || (rsn != (*cur_rsc)->rsn))
353 *cur_rsc = cqe_ver ? mlx5_find_uidx(mctx, rsn) :
354 (struct mlx5_resource *)mlx5_find_qp(mctx, rsn);
356 return rsc_to_mqp(*cur_rsc);
359 static inline int get_resp_ctx_v1(struct mlx5_context *mctx,
360 struct mlx5_resource **cur_rsc,
361 struct mlx5_srq **cur_srq,
362 uint32_t uidx, uint8_t *is_srq)
364 static inline int get_resp_ctx_v1(struct mlx5_context *mctx,
365 struct mlx5_resource **cur_rsc,
366 struct mlx5_srq **cur_srq,
367 uint32_t uidx, uint8_t *is_srq)
371 if (!*cur_rsc || (uidx != (*cur_rsc)->rsn)) {
372 *cur_rsc = mlx5_find_uidx(mctx, uidx);
373 if (unlikely(!*cur_rsc))
377 switch ((*cur_rsc)->type) {
378 case MLX5_RSC_TYPE_QP:
379 mqp = rsc_to_mqp(*cur_rsc);
380 if (mqp->verbs_qp.qp.srq) {
381 *cur_srq = to_msrq(mqp->verbs_qp.qp.srq);
385 case MLX5_RSC_TYPE_XSRQ:
386 *cur_srq = rsc_to_msrq(*cur_rsc);
389 case MLX5_RSC_TYPE_RWQ:
398 static inline int get_qp_ctx(struct mlx5_context *mctx,
399 struct mlx5_resource **cur_rsc,
402 static inline int get_qp_ctx(struct mlx5_context *mctx,
403 struct mlx5_resource **cur_rsc,
406 if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) {
408 * We do not have to take the QP table lock here,
409 * because CQs will be locked while QPs are removed
412 *cur_rsc = (struct mlx5_resource *)mlx5_find_qp(mctx, qpn);
413 if (unlikely(!*cur_rsc))
420 static inline int get_srq_ctx(struct mlx5_context *mctx,
421 struct mlx5_srq **cur_srq,
424 static inline int get_srq_ctx(struct mlx5_context *mctx,
425 struct mlx5_srq **cur_srq,
428 if (!*cur_srq || (srqn != (*cur_srq)->srqn)) {
429 *cur_srq = mlx5_find_srq(mctx, srqn);
430 if (unlikely(!*cur_srq))
437 static inline int get_cur_rsc(struct mlx5_context *mctx,
441 struct mlx5_resource **cur_rsc,
442 struct mlx5_srq **cur_srq,
448 err = get_resp_ctx_v1(mctx, cur_rsc, cur_srq, srqn_uidx,
453 err = get_srq_ctx(mctx, cur_srq, srqn_uidx);
455 err = get_qp_ctx(mctx, cur_rsc, qpn);
463 static inline int mlx5_get_next_cqe(struct mlx5_cq *cq,
464 struct mlx5_cqe64 **pcqe64,
467 static inline int mlx5_get_next_cqe(struct mlx5_cq *cq,
468 struct mlx5_cqe64 **pcqe64,
472 struct mlx5_cqe64 *cqe64;
474 cqe = next_cqe_sw(cq);
478 cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
482 VALGRIND_MAKE_MEM_DEFINED(cqe64, sizeof *cqe64);
485 * Make sure we read CQ entry contents after we've checked the
488 udma_from_device_barrier();
492 struct mlx5_context *mctx = to_mctx(cq->ibv_cq.context);
494 if (mlx5_debug_mask & MLX5_DBG_CQ_CQE) {
495 FILE *fp = mctx->dbg_fp;
497 mlx5_dbg(fp, MLX5_DBG_CQ_CQE, "dump cqe for cqn 0x%x:\n", cq->cqn);
508 static inline int mlx5_parse_cqe(struct mlx5_cq *cq,
509 struct mlx5_cqe64 *cqe64,
511 struct mlx5_resource **cur_rsc,
512 struct mlx5_srq **cur_srq,
514 int cqe_ver, int lazy)
516 static inline int mlx5_parse_cqe(struct mlx5_cq *cq,
517 struct mlx5_cqe64 *cqe64,
519 struct mlx5_resource **cur_rsc,
520 struct mlx5_srq **cur_srq,
522 int cqe_ver, int lazy)
530 struct mlx5_err_cqe *ecqe;
533 struct mlx5_context *mctx;
536 mctx = to_mctx(ibv_cq_ex_to_cq(&cq->ibv_cq)->context);
537 qpn = be32toh(cqe64->sop_drop_qpn) & 0xffffff;
540 cq->flags &= (~MLX5_CQ_FLAGS_RX_CSUM_VALID);
546 opcode = mlx5dv_get_cqe_opcode(cqe64);
550 mqp = get_req_context(mctx, cur_rsc,
551 (cqe_ver ? (be32toh(cqe64->srqn_uidx) & 0xffffff) : qpn),
556 wqe_ctr = be16toh(cqe64->wqe_counter);
557 idx = wqe_ctr & (wq->wqe_cnt - 1);
559 uint32_t wc_byte_len;
561 switch (be32toh(cqe64->sop_drop_qpn) >> 24) {
562 case MLX5_OPCODE_UMR:
563 cq->umr_opcode = wq->wr_data[idx];
566 case MLX5_OPCODE_RDMA_READ:
567 wc_byte_len = be32toh(cqe64->byte_cnt);
569 case MLX5_OPCODE_ATOMIC_CS:
570 case MLX5_OPCODE_ATOMIC_FA:
574 if (cqe64->op_own & MLX5_INLINE_SCATTER_32)
575 err = mlx5_copy_to_send_wqe(
576 mqp, wqe_ctr, cqe, wc_byte_len);
577 else if (cqe64->op_own & MLX5_INLINE_SCATTER_64)
578 err = mlx5_copy_to_send_wqe(
579 mqp, wqe_ctr, cqe - 1, wc_byte_len);
583 cq->ibv_cq.wr_id = wq->wrid[idx];
584 cq->ibv_cq.status = err;
586 handle_good_req(wc, cqe64, wq, idx);
588 if (cqe64->op_own & MLX5_INLINE_SCATTER_32)
589 err = mlx5_copy_to_send_wqe(mqp, wqe_ctr, cqe,
591 else if (cqe64->op_own & MLX5_INLINE_SCATTER_64)
592 err = mlx5_copy_to_send_wqe(
593 mqp, wqe_ctr, cqe - 1, wc->byte_len);
595 wc->wr_id = wq->wrid[idx];
599 wq->tail = wq->wqe_head[idx] + 1;
602 case MLX5_CQE_RESP_WR_IMM:
603 case MLX5_CQE_RESP_SEND:
604 case MLX5_CQE_RESP_SEND_IMM:
605 case MLX5_CQE_RESP_SEND_INV:
606 srqn_uidx = be32toh(cqe64->srqn_uidx) & 0xffffff;
607 err = get_cur_rsc(mctx, cqe_ver, qpn, srqn_uidx, cur_rsc,
613 cq->ibv_cq.status = handle_responder_lazy(cq, cqe64,
615 is_srq ? *cur_srq : NULL);
617 wc->status = handle_responder(wc, cqe64, *cur_rsc,
618 is_srq ? *cur_srq : NULL);
620 case MLX5_CQE_RESIZE_CQ:
622 case MLX5_CQE_REQ_ERR:
623 case MLX5_CQE_RESP_ERR:
624 srqn_uidx = be32toh(cqe64->srqn_uidx) & 0xffffff;
625 ecqe = (struct mlx5_err_cqe *)cqe64;
627 enum ibv_wc_status *pstatus = lazy ? &cq->ibv_cq.status : &wc->status;
629 *pstatus = mlx5_handle_error_cqe(ecqe);
633 wc->vendor_err = ecqe->vendor_err_synd;
635 if (unlikely(ecqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR &&
636 ecqe->syndrome != MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR)) {
637 FILE *fp = mctx->dbg_fp;
638 fprintf(fp, PFX "%s: got completion with error:\n",
641 if (mlx5_freeze_on_error_cqe) {
642 fprintf(fp, PFX "freezing at poll cq...");
648 if (opcode == MLX5_CQE_REQ_ERR) {
649 mqp = get_req_context(mctx, cur_rsc,
650 (cqe_ver ? srqn_uidx : qpn), cqe_ver);
654 wqe_ctr = be16toh(cqe64->wqe_counter);
655 idx = wqe_ctr & (wq->wqe_cnt - 1);
657 cq->ibv_cq.wr_id = wq->wrid[idx];
659 wc->wr_id = wq->wrid[idx];
660 wq->tail = wq->wqe_head[idx] + 1;
662 err = get_cur_rsc(mctx, cqe_ver, qpn, srqn_uidx,
663 cur_rsc, cur_srq, &is_srq);
668 wqe_ctr = be16toh(cqe64->wqe_counter);
670 cq->ibv_cq.wr_id = (*cur_srq)->wrid[wqe_ctr];
672 wc->wr_id = (*cur_srq)->wrid[wqe_ctr];
673 mlx5_free_srq_wqe(*cur_srq, wqe_ctr);
675 switch ((*cur_rsc)->type) {
676 case MLX5_RSC_TYPE_RWQ:
677 wq = &(rsc_to_mrwq(*cur_rsc)->rq);
680 wq = &(rsc_to_mqp(*cur_rsc)->rq);
685 cq->ibv_cq.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
687 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
697 static inline int mlx5_parse_lazy_cqe(struct mlx5_cq *cq,
698 struct mlx5_cqe64 *cqe64,
699 void *cqe, int cqe_ver)
701 static inline int mlx5_parse_lazy_cqe(struct mlx5_cq *cq,
702 struct mlx5_cqe64 *cqe64,
703 void *cqe, int cqe_ver)
705 return mlx5_parse_cqe(cq, cqe64, cqe, &cq->cur_rsc, &cq->cur_srq, NULL, cqe_ver, 1);
708 static inline int mlx5_poll_one(struct mlx5_cq *cq,
709 struct mlx5_resource **cur_rsc,
710 struct mlx5_srq **cur_srq,
711 struct ibv_wc *wc, int cqe_ver)
713 static inline int mlx5_poll_one(struct mlx5_cq *cq,
714 struct mlx5_resource **cur_rsc,
715 struct mlx5_srq **cur_srq,
716 struct ibv_wc *wc, int cqe_ver)
718 struct mlx5_cqe64 *cqe64;
722 err = mlx5_get_next_cqe(cq, &cqe64, &cqe);
726 return mlx5_parse_cqe(cq, cqe64, cqe, cur_rsc, cur_srq, wc, cqe_ver, 0);
729 static inline int poll_cq(struct ibv_cq *ibcq, int ne,
730 struct ibv_wc *wc, int cqe_ver)
732 static inline int poll_cq(struct ibv_cq *ibcq, int ne,
733 struct ibv_wc *wc, int cqe_ver)
735 struct mlx5_cq *cq = to_mcq(ibcq);
736 struct mlx5_resource *rsc = NULL;
737 struct mlx5_srq *srq = NULL;
741 if (cq->stall_enable) {
742 if (cq->stall_adaptive_enable) {
743 if (cq->stall_last_count)
744 mlx5_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles);
745 } else if (cq->stall_next_poll) {
746 cq->stall_next_poll = 0;
747 mlx5_stall_poll_cq();
751 mlx5_spin_lock(&cq->lock);
753 for (npolled = 0; npolled < ne; ++npolled) {
754 err = mlx5_poll_one(cq, &rsc, &srq, wc + npolled, cqe_ver);
759 update_cons_index(cq);
761 mlx5_spin_unlock(&cq->lock);
763 if (cq->stall_enable) {
764 if (cq->stall_adaptive_enable) {
766 cq->stall_cycles = max(cq->stall_cycles-mlx5_stall_cq_dec_step,
767 mlx5_stall_cq_poll_min);
768 mlx5_get_cycles(&cq->stall_last_count);
769 } else if (npolled < ne) {
770 cq->stall_cycles = min(cq->stall_cycles+mlx5_stall_cq_inc_step,
771 mlx5_stall_cq_poll_max);
772 mlx5_get_cycles(&cq->stall_last_count);
774 cq->stall_cycles = max(cq->stall_cycles-mlx5_stall_cq_dec_step,
775 mlx5_stall_cq_poll_min);
776 cq->stall_last_count = 0;
778 } else if (err == CQ_EMPTY) {
779 cq->stall_next_poll = 1;
783 return err == CQ_POLL_ERR ? err : npolled;
787 POLLING_MODE_NO_STALL,
789 POLLING_MODE_STALL_ADAPTIVE
792 static inline void _mlx5_end_poll(struct ibv_cq_ex *ibcq,
793 int lock, enum polling_mode stall)
795 static inline void _mlx5_end_poll(struct ibv_cq_ex *ibcq,
796 int lock, enum polling_mode stall)
798 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
800 update_cons_index(cq);
803 mlx5_spin_unlock(&cq->lock);
806 if (stall == POLLING_MODE_STALL_ADAPTIVE) {
807 if (!(cq->flags & MLX5_CQ_FLAGS_FOUND_CQES)) {
808 cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step,
809 mlx5_stall_cq_poll_min);
810 mlx5_get_cycles(&cq->stall_last_count);
811 } else if (cq->flags & MLX5_CQ_FLAGS_EMPTY_DURING_POLL) {
812 cq->stall_cycles = min(cq->stall_cycles + mlx5_stall_cq_inc_step,
813 mlx5_stall_cq_poll_max);
814 mlx5_get_cycles(&cq->stall_last_count);
816 cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step,
817 mlx5_stall_cq_poll_min);
818 cq->stall_last_count = 0;
820 } else if (!(cq->flags & MLX5_CQ_FLAGS_FOUND_CQES)) {
821 cq->stall_next_poll = 1;
824 cq->flags &= ~(MLX5_CQ_FLAGS_FOUND_CQES | MLX5_CQ_FLAGS_EMPTY_DURING_POLL);
828 static inline int mlx5_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr,
829 int lock, enum polling_mode stall, int cqe_version)
831 static inline int mlx5_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr,
832 int lock, enum polling_mode stall, int cqe_version)
834 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
835 struct mlx5_cqe64 *cqe64;
839 if (unlikely(attr->comp_mask))
843 if (stall == POLLING_MODE_STALL_ADAPTIVE) {
844 if (cq->stall_last_count)
845 mlx5_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles);
846 } else if (cq->stall_next_poll) {
847 cq->stall_next_poll = 0;
848 mlx5_stall_poll_cq();
853 mlx5_spin_lock(&cq->lock);
858 err = mlx5_get_next_cqe(cq, &cqe64, &cqe);
859 if (err == CQ_EMPTY) {
861 mlx5_spin_unlock(&cq->lock);
864 if (stall == POLLING_MODE_STALL_ADAPTIVE) {
865 cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step,
866 mlx5_stall_cq_poll_min);
867 mlx5_get_cycles(&cq->stall_last_count);
869 cq->stall_next_poll = 1;
877 cq->flags |= MLX5_CQ_FLAGS_FOUND_CQES;
879 err = mlx5_parse_lazy_cqe(cq, cqe64, cqe, cqe_version);
881 mlx5_spin_unlock(&cq->lock);
884 if (stall == POLLING_MODE_STALL_ADAPTIVE) {
885 cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step,
886 mlx5_stall_cq_poll_min);
887 cq->stall_last_count = 0;
890 cq->flags &= ~(MLX5_CQ_FLAGS_FOUND_CQES);
896 static inline int mlx5_next_poll(struct ibv_cq_ex *ibcq,
897 enum polling_mode stall, int cqe_version)
899 static inline int mlx5_next_poll(struct ibv_cq_ex *ibcq,
900 enum polling_mode stall,
903 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
904 struct mlx5_cqe64 *cqe64;
908 err = mlx5_get_next_cqe(cq, &cqe64, &cqe);
909 if (err == CQ_EMPTY) {
910 if (stall == POLLING_MODE_STALL_ADAPTIVE)
911 cq->flags |= MLX5_CQ_FLAGS_EMPTY_DURING_POLL;
916 return mlx5_parse_lazy_cqe(cq, cqe64, cqe, cqe_version);
919 static inline int mlx5_next_poll_adaptive_v0(struct ibv_cq_ex *ibcq)
921 return mlx5_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 0);
924 static inline int mlx5_next_poll_adaptive_v1(struct ibv_cq_ex *ibcq)
926 return mlx5_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 1);
929 static inline int mlx5_next_poll_v0(struct ibv_cq_ex *ibcq)
931 return mlx5_next_poll(ibcq, 0, 0);
934 static inline int mlx5_next_poll_v1(struct ibv_cq_ex *ibcq)
936 return mlx5_next_poll(ibcq, 0, 1);
939 static inline int mlx5_start_poll_v0(struct ibv_cq_ex *ibcq,
940 struct ibv_poll_cq_attr *attr)
942 return mlx5_start_poll(ibcq, attr, 0, 0, 0);
945 static inline int mlx5_start_poll_v1(struct ibv_cq_ex *ibcq,
946 struct ibv_poll_cq_attr *attr)
948 return mlx5_start_poll(ibcq, attr, 0, 0, 1);
951 static inline int mlx5_start_poll_v0_lock(struct ibv_cq_ex *ibcq,
952 struct ibv_poll_cq_attr *attr)
954 return mlx5_start_poll(ibcq, attr, 1, 0, 0);
957 static inline int mlx5_start_poll_v1_lock(struct ibv_cq_ex *ibcq,
958 struct ibv_poll_cq_attr *attr)
960 return mlx5_start_poll(ibcq, attr, 1, 0, 1);
963 static inline int mlx5_start_poll_adaptive_stall_v0_lock(struct ibv_cq_ex *ibcq,
964 struct ibv_poll_cq_attr *attr)
966 return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0);
969 static inline int mlx5_start_poll_stall_v0_lock(struct ibv_cq_ex *ibcq,
970 struct ibv_poll_cq_attr *attr)
972 return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0);
975 static inline int mlx5_start_poll_adaptive_stall_v1_lock(struct ibv_cq_ex *ibcq,
976 struct ibv_poll_cq_attr *attr)
978 return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1);
981 static inline int mlx5_start_poll_stall_v1_lock(struct ibv_cq_ex *ibcq,
982 struct ibv_poll_cq_attr *attr)
984 return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1);
987 static inline int mlx5_start_poll_stall_v0(struct ibv_cq_ex *ibcq,
988 struct ibv_poll_cq_attr *attr)
990 return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0);
993 static inline int mlx5_start_poll_adaptive_stall_v0(struct ibv_cq_ex *ibcq,
994 struct ibv_poll_cq_attr *attr)
996 return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0);
999 static inline int mlx5_start_poll_adaptive_stall_v1(struct ibv_cq_ex *ibcq,
1000 struct ibv_poll_cq_attr *attr)
1002 return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1);
1005 static inline int mlx5_start_poll_stall_v1(struct ibv_cq_ex *ibcq,
1006 struct ibv_poll_cq_attr *attr)
1008 return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1);
1011 static inline void mlx5_end_poll_adaptive_stall_lock(struct ibv_cq_ex *ibcq)
1013 _mlx5_end_poll(ibcq, 1, POLLING_MODE_STALL_ADAPTIVE);
1016 static inline void mlx5_end_poll_stall_lock(struct ibv_cq_ex *ibcq)
1018 _mlx5_end_poll(ibcq, 1, POLLING_MODE_STALL);
1021 static inline void mlx5_end_poll_adaptive_stall(struct ibv_cq_ex *ibcq)
1023 _mlx5_end_poll(ibcq, 0, POLLING_MODE_STALL_ADAPTIVE);
1026 static inline void mlx5_end_poll_stall(struct ibv_cq_ex *ibcq)
1028 _mlx5_end_poll(ibcq, 0, POLLING_MODE_STALL);
1031 static inline void mlx5_end_poll(struct ibv_cq_ex *ibcq)
1033 _mlx5_end_poll(ibcq, 0, 0);
1036 static inline void mlx5_end_poll_lock(struct ibv_cq_ex *ibcq)
1038 _mlx5_end_poll(ibcq, 1, 0);
1041 int mlx5_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
1043 return poll_cq(ibcq, ne, wc, 0);
1046 int mlx5_poll_cq_v1(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
1048 return poll_cq(ibcq, ne, wc, 1);
1051 static inline enum ibv_wc_opcode mlx5_cq_read_wc_opcode(struct ibv_cq_ex *ibcq)
1053 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1055 switch (mlx5dv_get_cqe_opcode(cq->cqe64)) {
1056 case MLX5_CQE_RESP_WR_IMM:
1057 return IBV_WC_RECV_RDMA_WITH_IMM;
1058 case MLX5_CQE_RESP_SEND:
1059 case MLX5_CQE_RESP_SEND_IMM:
1060 case MLX5_CQE_RESP_SEND_INV:
1063 switch (be32toh(cq->cqe64->sop_drop_qpn) >> 24) {
1064 case MLX5_OPCODE_RDMA_WRITE_IMM:
1065 case MLX5_OPCODE_RDMA_WRITE:
1066 return IBV_WC_RDMA_WRITE;
1067 case MLX5_OPCODE_SEND_IMM:
1068 case MLX5_OPCODE_SEND:
1069 case MLX5_OPCODE_SEND_INVAL:
1071 case MLX5_OPCODE_RDMA_READ:
1072 return IBV_WC_RDMA_READ;
1073 case MLX5_OPCODE_ATOMIC_CS:
1074 return IBV_WC_COMP_SWAP;
1075 case MLX5_OPCODE_ATOMIC_FA:
1076 return IBV_WC_FETCH_ADD;
1077 case MLX5_OPCODE_UMR:
1078 return cq->umr_opcode;
1079 case MLX5_OPCODE_TSO:
1086 struct mlx5_context *ctx = to_mctx(ibcq->context);
1088 mlx5_dbg(ctx->dbg_fp, MLX5_DBG_CQ_CQE, "un-expected opcode in cqe\n");
1094 static inline uint32_t mlx5_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq)
1096 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1098 return be32toh(cq->cqe64->sop_drop_qpn) & 0xffffff;
1101 static inline int mlx5_cq_read_wc_flags(struct ibv_cq_ex *ibcq)
1103 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1106 if (cq->flags & MLX5_CQ_FLAGS_RX_CSUM_VALID)
1107 wc_flags = (!!(cq->cqe64->hds_ip_ext & MLX5_CQE_L4_OK) &
1108 !!(cq->cqe64->hds_ip_ext & MLX5_CQE_L3_OK) &
1109 (get_cqe_l3_hdr_type(cq->cqe64) ==
1110 MLX5_CQE_L3_HDR_TYPE_IPV4)) <<
1111 IBV_WC_IP_CSUM_OK_SHIFT;
1113 switch (mlx5dv_get_cqe_opcode(cq->cqe64)) {
1114 case MLX5_CQE_RESP_WR_IMM:
1115 case MLX5_CQE_RESP_SEND_IMM:
1116 wc_flags |= IBV_WC_WITH_IMM;
1118 case MLX5_CQE_RESP_SEND_INV:
1119 wc_flags |= IBV_WC_WITH_INV;
1123 wc_flags |= ((be32toh(cq->cqe64->flags_rqpn) >> 28) & 3) ? IBV_WC_GRH : 0;
1127 static inline uint32_t mlx5_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq)
1129 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1131 return be32toh(cq->cqe64->byte_cnt);
1134 static inline uint32_t mlx5_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq)
1136 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1137 struct mlx5_err_cqe *ecqe = (struct mlx5_err_cqe *)cq->cqe64;
1139 return ecqe->vendor_err_synd;
1142 static inline uint32_t mlx5_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq)
1144 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1146 switch (mlx5dv_get_cqe_opcode(cq->cqe64)) {
1147 case MLX5_CQE_RESP_SEND_INV:
1148 return be32toh(cq->cqe64->imm_inval_pkey);
1150 return cq->cqe64->imm_inval_pkey;
1154 static inline uint32_t mlx5_cq_read_wc_slid(struct ibv_cq_ex *ibcq)
1156 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1158 return (uint32_t)be16toh(cq->cqe64->slid);
1161 static inline uint8_t mlx5_cq_read_wc_sl(struct ibv_cq_ex *ibcq)
1163 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1165 return (be32toh(cq->cqe64->flags_rqpn) >> 24) & 0xf;
1168 static inline uint32_t mlx5_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq)
1170 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1172 return be32toh(cq->cqe64->flags_rqpn) & 0xffffff;
1175 static inline uint8_t mlx5_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq)
1177 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1179 return cq->cqe64->ml_path & 0x7f;
1182 static inline uint64_t mlx5_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq)
1184 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1186 return be64toh(cq->cqe64->timestamp);
1189 static inline uint16_t mlx5_cq_read_wc_cvlan(struct ibv_cq_ex *ibcq)
1191 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1193 return be16toh(cq->cqe64->vlan_info);
1196 static inline uint32_t mlx5_cq_read_flow_tag(struct ibv_cq_ex *ibcq)
1198 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1200 return be32toh(cq->cqe64->sop_drop_qpn) & MLX5_FLOW_TAG_MASK;
1203 #define BIT(i) (1UL << (i))
1205 #define SINGLE_THREADED BIT(0)
1206 #define STALL BIT(1)
1208 #define ADAPTIVE BIT(3)
1210 #define mlx5_start_poll_name(cqe_ver, lock, stall, adaptive) \
1211 mlx5_start_poll##adaptive##stall##cqe_ver##lock
1212 #define mlx5_next_poll_name(cqe_ver, adaptive) \
1213 mlx5_next_poll##adaptive##cqe_ver
1214 #define mlx5_end_poll_name(lock, stall, adaptive) \
1215 mlx5_end_poll##adaptive##stall##lock
1217 #define POLL_FN_ENTRY(cqe_ver, lock, stall, adaptive) { \
1218 .start_poll = &mlx5_start_poll_name(cqe_ver, lock, stall, adaptive), \
1219 .next_poll = &mlx5_next_poll_name(cqe_ver, adaptive), \
1220 .end_poll = &mlx5_end_poll_name(lock, stall, adaptive), \
1223 static const struct op
1225 int (*start_poll)(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr);
1226 int (*next_poll)(struct ibv_cq_ex *ibcq);
1227 void (*end_poll)(struct ibv_cq_ex *ibcq);
1228 } ops[ADAPTIVE + V1 + STALL + SINGLE_THREADED + 1] = {
1229 [V1] = POLL_FN_ENTRY(_v1, _lock, , ),
1230 [0] = POLL_FN_ENTRY(_v0, _lock, , ),
1231 [V1 | SINGLE_THREADED] = POLL_FN_ENTRY(_v1, , , ),
1232 [SINGLE_THREADED] = POLL_FN_ENTRY(_v0, , , ),
1233 [V1 | STALL] = POLL_FN_ENTRY(_v1, _lock, _stall, ),
1234 [STALL] = POLL_FN_ENTRY(_v0, _lock, _stall, ),
1235 [V1 | SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v1, , _stall, ),
1236 [SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v0, , _stall, ),
1237 [V1 | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive),
1238 [STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive),
1239 [V1 | SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive),
1240 [SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive),
1243 void mlx5_cq_fill_pfns(struct mlx5_cq *cq, const struct ibv_cq_init_attr_ex *cq_attr)
1245 struct mlx5_context *mctx = to_mctx(ibv_cq_ex_to_cq(&cq->ibv_cq)->context);
1246 const struct op *poll_ops = &ops[((cq->stall_enable && cq->stall_adaptive_enable) ? ADAPTIVE : 0) |
1247 (mctx->cqe_version ? V1 : 0) |
1248 (cq->flags & MLX5_CQ_FLAGS_SINGLE_THREADED ?
1249 SINGLE_THREADED : 0) |
1250 (cq->stall_enable ? STALL : 0)];
1252 cq->ibv_cq.start_poll = poll_ops->start_poll;
1253 cq->ibv_cq.next_poll = poll_ops->next_poll;
1254 cq->ibv_cq.end_poll = poll_ops->end_poll;
1256 cq->ibv_cq.read_opcode = mlx5_cq_read_wc_opcode;
1257 cq->ibv_cq.read_vendor_err = mlx5_cq_read_wc_vendor_err;
1258 cq->ibv_cq.read_wc_flags = mlx5_cq_read_wc_flags;
1259 if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN)
1260 cq->ibv_cq.read_byte_len = mlx5_cq_read_wc_byte_len;
1261 if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM)
1262 cq->ibv_cq.read_imm_data = mlx5_cq_read_wc_imm_data;
1263 if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM)
1264 cq->ibv_cq.read_qp_num = mlx5_cq_read_wc_qp_num;
1265 if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP)
1266 cq->ibv_cq.read_src_qp = mlx5_cq_read_wc_src_qp;
1267 if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID)
1268 cq->ibv_cq.read_slid = mlx5_cq_read_wc_slid;
1269 if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL)
1270 cq->ibv_cq.read_sl = mlx5_cq_read_wc_sl;
1271 if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
1272 cq->ibv_cq.read_dlid_path_bits = mlx5_cq_read_wc_dlid_path_bits;
1273 if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP)
1274 cq->ibv_cq.read_completion_ts = mlx5_cq_read_wc_completion_ts;
1275 if (cq_attr->wc_flags & IBV_WC_EX_WITH_CVLAN)
1276 cq->ibv_cq.read_cvlan = mlx5_cq_read_wc_cvlan;
1277 if (cq_attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG)
1278 cq->ibv_cq.read_flow_tag = mlx5_cq_read_flow_tag;
1281 int mlx5_arm_cq(struct ibv_cq *ibvcq, int solicited)
1283 struct mlx5_cq *cq = to_mcq(ibvcq);
1284 struct mlx5_context *ctx = to_mctx(ibvcq->context);
1285 uint32_t doorbell[2];
1290 sn = cq->arm_sn & 3;
1291 ci = cq->cons_index & 0xffffff;
1292 cmd = solicited ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT;
1294 cq->dbrec[MLX5_CQ_ARM_DB] = htobe32(sn << 28 | cmd | ci);
1297 * Make sure that the doorbell record in host memory is
1298 * written before ringing the doorbell via PCI WC MMIO.
1302 doorbell[0] = htobe32(sn << 28 | cmd | ci);
1303 doorbell[1] = htobe32(cq->cqn);
1305 mlx5_write64(doorbell, ctx->uar[0] + MLX5_CQ_DOORBELL, &ctx->lock32);
1307 mmio_flush_writes();
1312 void mlx5_cq_event(struct ibv_cq *cq)
1314 to_mcq(cq)->arm_sn++;
1317 static int is_equal_rsn(struct mlx5_cqe64 *cqe64, uint32_t rsn)
1319 return rsn == (be32toh(cqe64->sop_drop_qpn) & 0xffffff);
1322 static inline int is_equal_uidx(struct mlx5_cqe64 *cqe64, uint32_t uidx)
1324 return uidx == (be32toh(cqe64->srqn_uidx) & 0xffffff);
1327 static inline int is_responder(uint8_t opcode)
1330 case MLX5_CQE_RESP_WR_IMM:
1331 case MLX5_CQE_RESP_SEND:
1332 case MLX5_CQE_RESP_SEND_IMM:
1333 case MLX5_CQE_RESP_SEND_INV:
1334 case MLX5_CQE_RESP_ERR:
1341 static inline int free_res_cqe(struct mlx5_cqe64 *cqe64, uint32_t rsn,
1342 struct mlx5_srq *srq, int cqe_version)
1345 if (is_equal_uidx(cqe64, rsn)) {
1346 if (srq && is_responder(mlx5dv_get_cqe_opcode(cqe64)))
1347 mlx5_free_srq_wqe(srq,
1348 be16toh(cqe64->wqe_counter));
1352 if (is_equal_rsn(cqe64, rsn)) {
1353 if (srq && (be32toh(cqe64->srqn_uidx) & 0xffffff))
1354 mlx5_free_srq_wqe(srq,
1355 be16toh(cqe64->wqe_counter));
1363 void __mlx5_cq_clean(struct mlx5_cq *cq, uint32_t rsn, struct mlx5_srq *srq)
1365 uint32_t prod_index;
1367 struct mlx5_cqe64 *cqe64, *dest64;
1372 if (!cq || cq->flags & MLX5_CQ_FLAGS_DV_OWNED)
1376 * First we need to find the current producer index, so we
1377 * know where to start cleaning from. It doesn't matter if HW
1378 * adds new entries after this loop -- the QP we're worried
1379 * about is already in RESET, so the new entries won't come
1380 * from our QP and therefore don't need to be checked.
1382 for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
1383 if (prod_index == cq->cons_index + cq->ibv_cq.cqe)
1387 * Now sweep backwards through the CQ, removing CQ entries
1388 * that match our QP by copying older entries on top of them.
1390 cqe_version = (to_mctx(cq->ibv_cq.context))->cqe_version;
1391 while ((int) --prod_index - (int) cq->cons_index >= 0) {
1392 cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
1393 cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
1394 if (free_res_cqe(cqe64, rsn, srq, cqe_version)) {
1396 } else if (nfreed) {
1397 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibv_cq.cqe);
1398 dest64 = (cq->cqe_sz == 64) ? dest : dest + 64;
1399 owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
1400 memcpy(dest, cqe, cq->cqe_sz);
1401 dest64->op_own = owner_bit |
1402 (dest64->op_own & ~MLX5_CQE_OWNER_MASK);
1407 cq->cons_index += nfreed;
1409 * Make sure update of buffer contents is done before
1410 * updating consumer index.
1412 udma_to_device_barrier();
1413 update_cons_index(cq);
1417 void mlx5_cq_clean(struct mlx5_cq *cq, uint32_t qpn, struct mlx5_srq *srq)
1419 mlx5_spin_lock(&cq->lock);
1420 __mlx5_cq_clean(cq, qpn, srq);
1421 mlx5_spin_unlock(&cq->lock);
1424 static uint8_t sw_ownership_bit(int n, int nent)
1426 return (n & nent) ? 1 : 0;
1429 static int is_hw(uint8_t own, int n, int mask)
1431 return (own & MLX5_CQE_OWNER_MASK) ^ !!(n & (mask + 1));
1434 void mlx5_cq_resize_copy_cqes(struct mlx5_cq *cq)
1436 struct mlx5_cqe64 *scqe64;
1437 struct mlx5_cqe64 *dcqe64;
1447 dsize = cq->resize_cqe_sz;
1450 scqe = get_buf_cqe(cq->active_buf, i & cq->active_cqes, ssize);
1451 scqe64 = ssize == 64 ? scqe : scqe + 64;
1453 if (is_hw(scqe64->op_own, i, cq->active_cqes)) {
1454 fprintf(stderr, "expected cqe in sw ownership\n");
1458 while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
1459 dcqe = get_buf_cqe(cq->resize_buf, (i + 1) & (cq->resize_cqes - 1), dsize);
1460 dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
1461 sw_own = sw_ownership_bit(i + 1, cq->resize_cqes);
1462 memcpy(dcqe, scqe, ssize);
1463 dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
1466 scqe = get_buf_cqe(cq->active_buf, i & cq->active_cqes, ssize);
1467 scqe64 = ssize == 64 ? scqe : scqe + 64;
1468 if (is_hw(scqe64->op_own, i, cq->active_cqes)) {
1469 fprintf(stderr, "expected cqe in sw ownership\n");
1473 if (scqe == start_cqe) {
1474 fprintf(stderr, "resize CQ failed to get resize CQE\n");
1481 int mlx5_alloc_cq_buf(struct mlx5_context *mctx, struct mlx5_cq *cq,
1482 struct mlx5_buf *buf, int nent, int cqe_sz)
1484 struct mlx5_cqe64 *cqe;
1486 struct mlx5_device *dev = to_mdev(mctx->ibv_ctx.device);
1488 enum mlx5_alloc_type type;
1489 enum mlx5_alloc_type default_type = MLX5_ALLOC_TYPE_ANON;
1491 if (mlx5_use_huge("HUGE_CQ"))
1492 default_type = MLX5_ALLOC_TYPE_HUGE;
1494 mlx5_get_alloc_type(MLX5_CQ_PREFIX, &type, default_type);
1496 ret = mlx5_alloc_prefered_buf(mctx, buf,
1497 align(nent * cqe_sz, dev->page_size),
1505 memset(buf->buf, 0, nent * cqe_sz);
1507 for (i = 0; i < nent; ++i) {
1508 cqe = buf->buf + i * cqe_sz;
1509 cqe += cqe_sz == 128 ? 1 : 0;
1510 cqe->op_own = MLX5_CQE_INVALID << 4;
1516 int mlx5_free_cq_buf(struct mlx5_context *ctx, struct mlx5_buf *buf)
1518 return mlx5_free_actual_buf(ctx, buf);