/* * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include "mlx5.h" #include "wqe.h" #include "doorbell.h" enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; enum { MLX5_CQ_MODIFY_RESEIZE = 0, MLX5_CQ_MODIFY_MODER = 1, MLX5_CQ_MODIFY_MAPPING = 2, }; int mlx5_stall_num_loop = 60; int mlx5_stall_cq_poll_min = 60; int mlx5_stall_cq_poll_max = 100000; int mlx5_stall_cq_inc_step = 100; int mlx5_stall_cq_dec_step = 10; static inline uint8_t get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe) { return (cqe->l4_hdr_type_etc >> 2) & 0x3; } static void *get_buf_cqe(struct mlx5_buf *buf, int n, int cqe_sz) { return buf->buf + n * cqe_sz; } static void *get_cqe(struct mlx5_cq *cq, int n) { return cq->active_buf->buf + n * cq->cqe_sz; } static void *get_sw_cqe(struct mlx5_cq *cq, int n) { void *cqe = get_cqe(cq, n & cq->ibv_cq.cqe); struct mlx5_cqe64 *cqe64; cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64; if (likely(mlx5dv_get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibv_cq.cqe + 1)))) { return cqe; } else { return NULL; } } static void *next_cqe_sw(struct mlx5_cq *cq) { return get_sw_cqe(cq, cq->cons_index); } static void update_cons_index(struct mlx5_cq *cq) { cq->dbrec[MLX5_CQ_SET_CI] = htobe32(cq->cons_index & 0xffffff); } static inline void handle_good_req(struct ibv_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_wq *wq, int idx) { switch (be32toh(cqe->sop_drop_qpn) >> 24) { case MLX5_OPCODE_RDMA_WRITE_IMM: wc->wc_flags |= IBV_WC_WITH_IMM; SWITCH_FALLTHROUGH; case MLX5_OPCODE_RDMA_WRITE: wc->opcode = IBV_WC_RDMA_WRITE; break; case MLX5_OPCODE_SEND_IMM: wc->wc_flags |= IBV_WC_WITH_IMM; SWITCH_FALLTHROUGH; case MLX5_OPCODE_SEND: case MLX5_OPCODE_SEND_INVAL: wc->opcode = IBV_WC_SEND; break; case MLX5_OPCODE_RDMA_READ: wc->opcode = IBV_WC_RDMA_READ; wc->byte_len = be32toh(cqe->byte_cnt); break; case MLX5_OPCODE_ATOMIC_CS: wc->opcode = IBV_WC_COMP_SWAP; wc->byte_len = 8; break; case MLX5_OPCODE_ATOMIC_FA: wc->opcode = IBV_WC_FETCH_ADD; wc->byte_len = 8; break; case MLX5_OPCODE_UMR: wc->opcode = wq->wr_data[idx]; break; case MLX5_OPCODE_TSO: wc->opcode = IBV_WC_TSO; break; } } static inline int handle_responder_lazy(struct mlx5_cq *cq, struct mlx5_cqe64 *cqe, struct mlx5_resource *cur_rsc, struct mlx5_srq *srq) { uint16_t wqe_ctr; struct mlx5_wq *wq; struct mlx5_qp *qp = rsc_to_mqp(cur_rsc); int err = IBV_WC_SUCCESS; if (srq) { wqe_ctr = be16toh(cqe->wqe_counter); cq->ibv_cq.wr_id = srq->wrid[wqe_ctr]; mlx5_free_srq_wqe(srq, wqe_ctr); if (cqe->op_own & MLX5_INLINE_SCATTER_32) err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe, be32toh(cqe->byte_cnt)); else if (cqe->op_own & MLX5_INLINE_SCATTER_64) err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe - 1, be32toh(cqe->byte_cnt)); } else { if (likely(cur_rsc->type == MLX5_RSC_TYPE_QP)) { wq = &qp->rq; if (qp->qp_cap_cache & MLX5_RX_CSUM_VALID) cq->flags |= MLX5_CQ_FLAGS_RX_CSUM_VALID; } else { wq = &(rsc_to_mrwq(cur_rsc)->rq); } wqe_ctr = wq->tail & (wq->wqe_cnt - 1); cq->ibv_cq.wr_id = wq->wrid[wqe_ctr]; ++wq->tail; if (cqe->op_own & MLX5_INLINE_SCATTER_32) err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe, be32toh(cqe->byte_cnt)); else if (cqe->op_own & MLX5_INLINE_SCATTER_64) err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe - 1, be32toh(cqe->byte_cnt)); } return err; } static inline int handle_responder(struct ibv_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_resource *cur_rsc, struct mlx5_srq *srq) { uint16_t wqe_ctr; struct mlx5_wq *wq; struct mlx5_qp *qp = rsc_to_mqp(cur_rsc); uint8_t g; int err = 0; wc->byte_len = be32toh(cqe->byte_cnt); if (srq) { wqe_ctr = be16toh(cqe->wqe_counter); wc->wr_id = srq->wrid[wqe_ctr]; mlx5_free_srq_wqe(srq, wqe_ctr); if (cqe->op_own & MLX5_INLINE_SCATTER_32) err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe, wc->byte_len); else if (cqe->op_own & MLX5_INLINE_SCATTER_64) err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe - 1, wc->byte_len); } else { if (likely(cur_rsc->type == MLX5_RSC_TYPE_QP)) { wq = &qp->rq; if (qp->qp_cap_cache & MLX5_RX_CSUM_VALID) wc->wc_flags |= (!!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) & !!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) & (get_cqe_l3_hdr_type(cqe) == MLX5_CQE_L3_HDR_TYPE_IPV4)) << IBV_WC_IP_CSUM_OK_SHIFT; } else { wq = &(rsc_to_mrwq(cur_rsc)->rq); } wqe_ctr = wq->tail & (wq->wqe_cnt - 1); wc->wr_id = wq->wrid[wqe_ctr]; ++wq->tail; if (cqe->op_own & MLX5_INLINE_SCATTER_32) err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe, wc->byte_len); else if (cqe->op_own & MLX5_INLINE_SCATTER_64) err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe - 1, wc->byte_len); } if (err) return err; switch (cqe->op_own >> 4) { case MLX5_CQE_RESP_WR_IMM: wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM; wc->wc_flags |= IBV_WC_WITH_IMM; wc->imm_data = cqe->imm_inval_pkey; break; case MLX5_CQE_RESP_SEND: wc->opcode = IBV_WC_RECV; break; case MLX5_CQE_RESP_SEND_IMM: wc->opcode = IBV_WC_RECV; wc->wc_flags |= IBV_WC_WITH_IMM; wc->imm_data = cqe->imm_inval_pkey; break; case MLX5_CQE_RESP_SEND_INV: wc->opcode = IBV_WC_RECV; wc->wc_flags |= IBV_WC_WITH_INV; wc->imm_data = be32toh(cqe->imm_inval_pkey); break; } wc->slid = be16toh(cqe->slid); wc->sl = (be32toh(cqe->flags_rqpn) >> 24) & 0xf; wc->src_qp = be32toh(cqe->flags_rqpn) & 0xffffff; wc->dlid_path_bits = cqe->ml_path & 0x7f; g = (be32toh(cqe->flags_rqpn) >> 28) & 3; wc->wc_flags |= g ? IBV_WC_GRH : 0; wc->pkey_index = be32toh(cqe->imm_inval_pkey) & 0xffff; return IBV_WC_SUCCESS; } static void dump_cqe(FILE *fp, void *buf) { uint32_t *p = buf; int i; for (i = 0; i < 16; i += 4) fprintf(fp, "%08x %08x %08x %08x\n", be32toh(p[i]), be32toh(p[i + 1]), be32toh(p[i + 2]), be32toh(p[i + 3])); } static enum ibv_wc_status mlx5_handle_error_cqe(struct mlx5_err_cqe *cqe) { switch (cqe->syndrome) { case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR: return IBV_WC_LOC_LEN_ERR; case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR: return IBV_WC_LOC_QP_OP_ERR; case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR: return IBV_WC_LOC_PROT_ERR; case MLX5_CQE_SYNDROME_WR_FLUSH_ERR: return IBV_WC_WR_FLUSH_ERR; case MLX5_CQE_SYNDROME_MW_BIND_ERR: return IBV_WC_MW_BIND_ERR; case MLX5_CQE_SYNDROME_BAD_RESP_ERR: return IBV_WC_BAD_RESP_ERR; case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR: return IBV_WC_LOC_ACCESS_ERR; case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: return IBV_WC_REM_INV_REQ_ERR; case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR: return IBV_WC_REM_ACCESS_ERR; case MLX5_CQE_SYNDROME_REMOTE_OP_ERR: return IBV_WC_REM_OP_ERR; case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: return IBV_WC_RETRY_EXC_ERR; case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR: return IBV_WC_RNR_RETRY_EXC_ERR; case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR: return IBV_WC_REM_ABORT_ERR; default: return IBV_WC_GENERAL_ERR; } } #if defined(__x86_64__) || defined (__i386__) static inline unsigned long get_cycles(void) { uint32_t low, high; uint64_t val; asm volatile ("rdtsc" : "=a" (low), "=d" (high)); val = high; val = (val << 32) | low; return val; } static void mlx5_stall_poll_cq(void) { int i; for (i = 0; i < mlx5_stall_num_loop; i++) (void)get_cycles(); } static void mlx5_stall_cycles_poll_cq(uint64_t cycles) { while (get_cycles() < cycles) ; /* Nothing */ } static void mlx5_get_cycles(uint64_t *cycles) { *cycles = get_cycles(); } #else static void mlx5_stall_poll_cq(void) { } static void mlx5_stall_cycles_poll_cq(uint64_t cycles) { } static void mlx5_get_cycles(uint64_t *cycles) { } #endif static inline struct mlx5_qp *get_req_context(struct mlx5_context *mctx, struct mlx5_resource **cur_rsc, uint32_t rsn, int cqe_ver) ALWAYS_INLINE; static inline struct mlx5_qp *get_req_context(struct mlx5_context *mctx, struct mlx5_resource **cur_rsc, uint32_t rsn, int cqe_ver) { if (!*cur_rsc || (rsn != (*cur_rsc)->rsn)) *cur_rsc = cqe_ver ? mlx5_find_uidx(mctx, rsn) : (struct mlx5_resource *)mlx5_find_qp(mctx, rsn); return rsc_to_mqp(*cur_rsc); } static inline int get_resp_ctx_v1(struct mlx5_context *mctx, struct mlx5_resource **cur_rsc, struct mlx5_srq **cur_srq, uint32_t uidx, uint8_t *is_srq) ALWAYS_INLINE; static inline int get_resp_ctx_v1(struct mlx5_context *mctx, struct mlx5_resource **cur_rsc, struct mlx5_srq **cur_srq, uint32_t uidx, uint8_t *is_srq) { struct mlx5_qp *mqp; if (!*cur_rsc || (uidx != (*cur_rsc)->rsn)) { *cur_rsc = mlx5_find_uidx(mctx, uidx); if (unlikely(!*cur_rsc)) return CQ_POLL_ERR; } switch ((*cur_rsc)->type) { case MLX5_RSC_TYPE_QP: mqp = rsc_to_mqp(*cur_rsc); if (mqp->verbs_qp.qp.srq) { *cur_srq = to_msrq(mqp->verbs_qp.qp.srq); *is_srq = 1; } break; case MLX5_RSC_TYPE_XSRQ: *cur_srq = rsc_to_msrq(*cur_rsc); *is_srq = 1; break; case MLX5_RSC_TYPE_RWQ: break; default: return CQ_POLL_ERR; } return CQ_OK; } static inline int get_qp_ctx(struct mlx5_context *mctx, struct mlx5_resource **cur_rsc, uint32_t qpn) ALWAYS_INLINE; static inline int get_qp_ctx(struct mlx5_context *mctx, struct mlx5_resource **cur_rsc, uint32_t qpn) { if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) { /* * We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. */ *cur_rsc = (struct mlx5_resource *)mlx5_find_qp(mctx, qpn); if (unlikely(!*cur_rsc)) return CQ_POLL_ERR; } return CQ_OK; } static inline int get_srq_ctx(struct mlx5_context *mctx, struct mlx5_srq **cur_srq, uint32_t srqn_uidx) ALWAYS_INLINE; static inline int get_srq_ctx(struct mlx5_context *mctx, struct mlx5_srq **cur_srq, uint32_t srqn) { if (!*cur_srq || (srqn != (*cur_srq)->srqn)) { *cur_srq = mlx5_find_srq(mctx, srqn); if (unlikely(!*cur_srq)) return CQ_POLL_ERR; } return CQ_OK; } static inline int get_cur_rsc(struct mlx5_context *mctx, int cqe_ver, uint32_t qpn, uint32_t srqn_uidx, struct mlx5_resource **cur_rsc, struct mlx5_srq **cur_srq, uint8_t *is_srq) { int err; if (cqe_ver) { err = get_resp_ctx_v1(mctx, cur_rsc, cur_srq, srqn_uidx, is_srq); } else { if (srqn_uidx) { *is_srq = 1; err = get_srq_ctx(mctx, cur_srq, srqn_uidx); } else { err = get_qp_ctx(mctx, cur_rsc, qpn); } } return err; } static inline int mlx5_get_next_cqe(struct mlx5_cq *cq, struct mlx5_cqe64 **pcqe64, void **pcqe) ALWAYS_INLINE; static inline int mlx5_get_next_cqe(struct mlx5_cq *cq, struct mlx5_cqe64 **pcqe64, void **pcqe) { void *cqe; struct mlx5_cqe64 *cqe64; cqe = next_cqe_sw(cq); if (!cqe) return CQ_EMPTY; cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64; ++cq->cons_index; VALGRIND_MAKE_MEM_DEFINED(cqe64, sizeof *cqe64); /* * Make sure we read CQ entry contents after we've checked the * ownership bit. */ udma_from_device_barrier(); #ifdef MLX5_DEBUG { struct mlx5_context *mctx = to_mctx(cq->ibv_cq.context); if (mlx5_debug_mask & MLX5_DBG_CQ_CQE) { FILE *fp = mctx->dbg_fp; mlx5_dbg(fp, MLX5_DBG_CQ_CQE, "dump cqe for cqn 0x%x:\n", cq->cqn); dump_cqe(fp, cqe64); } } #endif *pcqe64 = cqe64; *pcqe = cqe; return CQ_OK; } static inline int mlx5_parse_cqe(struct mlx5_cq *cq, struct mlx5_cqe64 *cqe64, void *cqe, struct mlx5_resource **cur_rsc, struct mlx5_srq **cur_srq, struct ibv_wc *wc, int cqe_ver, int lazy) ALWAYS_INLINE; static inline int mlx5_parse_cqe(struct mlx5_cq *cq, struct mlx5_cqe64 *cqe64, void *cqe, struct mlx5_resource **cur_rsc, struct mlx5_srq **cur_srq, struct ibv_wc *wc, int cqe_ver, int lazy) { struct mlx5_wq *wq; uint16_t wqe_ctr; uint32_t qpn; uint32_t srqn_uidx; int idx; uint8_t opcode; struct mlx5_err_cqe *ecqe; int err = 0; struct mlx5_qp *mqp; struct mlx5_context *mctx; uint8_t is_srq = 0; mctx = to_mctx(ibv_cq_ex_to_cq(&cq->ibv_cq)->context); qpn = be32toh(cqe64->sop_drop_qpn) & 0xffffff; if (lazy) { cq->cqe64 = cqe64; cq->flags &= (~MLX5_CQ_FLAGS_RX_CSUM_VALID); } else { wc->wc_flags = 0; wc->qp_num = qpn; } opcode = mlx5dv_get_cqe_opcode(cqe64); switch (opcode) { case MLX5_CQE_REQ: { mqp = get_req_context(mctx, cur_rsc, (cqe_ver ? (be32toh(cqe64->srqn_uidx) & 0xffffff) : qpn), cqe_ver); if (unlikely(!mqp)) return CQ_POLL_ERR; wq = &mqp->sq; wqe_ctr = be16toh(cqe64->wqe_counter); idx = wqe_ctr & (wq->wqe_cnt - 1); if (lazy) { uint32_t wc_byte_len; switch (be32toh(cqe64->sop_drop_qpn) >> 24) { case MLX5_OPCODE_UMR: cq->umr_opcode = wq->wr_data[idx]; break; case MLX5_OPCODE_RDMA_READ: wc_byte_len = be32toh(cqe64->byte_cnt); goto scatter_out; case MLX5_OPCODE_ATOMIC_CS: case MLX5_OPCODE_ATOMIC_FA: wc_byte_len = 8; scatter_out: if (cqe64->op_own & MLX5_INLINE_SCATTER_32) err = mlx5_copy_to_send_wqe( mqp, wqe_ctr, cqe, wc_byte_len); else if (cqe64->op_own & MLX5_INLINE_SCATTER_64) err = mlx5_copy_to_send_wqe( mqp, wqe_ctr, cqe - 1, wc_byte_len); break; } cq->ibv_cq.wr_id = wq->wrid[idx]; cq->ibv_cq.status = err; } else { handle_good_req(wc, cqe64, wq, idx); if (cqe64->op_own & MLX5_INLINE_SCATTER_32) err = mlx5_copy_to_send_wqe(mqp, wqe_ctr, cqe, wc->byte_len); else if (cqe64->op_own & MLX5_INLINE_SCATTER_64) err = mlx5_copy_to_send_wqe( mqp, wqe_ctr, cqe - 1, wc->byte_len); wc->wr_id = wq->wrid[idx]; wc->status = err; } wq->tail = wq->wqe_head[idx] + 1; break; } case MLX5_CQE_RESP_WR_IMM: case MLX5_CQE_RESP_SEND: case MLX5_CQE_RESP_SEND_IMM: case MLX5_CQE_RESP_SEND_INV: srqn_uidx = be32toh(cqe64->srqn_uidx) & 0xffffff; err = get_cur_rsc(mctx, cqe_ver, qpn, srqn_uidx, cur_rsc, cur_srq, &is_srq); if (unlikely(err)) return CQ_POLL_ERR; if (lazy) cq->ibv_cq.status = handle_responder_lazy(cq, cqe64, *cur_rsc, is_srq ? *cur_srq : NULL); else wc->status = handle_responder(wc, cqe64, *cur_rsc, is_srq ? *cur_srq : NULL); break; case MLX5_CQE_RESIZE_CQ: break; case MLX5_CQE_REQ_ERR: case MLX5_CQE_RESP_ERR: srqn_uidx = be32toh(cqe64->srqn_uidx) & 0xffffff; ecqe = (struct mlx5_err_cqe *)cqe64; { enum ibv_wc_status *pstatus = lazy ? &cq->ibv_cq.status : &wc->status; *pstatus = mlx5_handle_error_cqe(ecqe); } if (!lazy) wc->vendor_err = ecqe->vendor_err_synd; if (unlikely(ecqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR && ecqe->syndrome != MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR)) { FILE *fp = mctx->dbg_fp; fprintf(fp, PFX "%s: got completion with error:\n", mctx->hostname); dump_cqe(fp, ecqe); if (mlx5_freeze_on_error_cqe) { fprintf(fp, PFX "freezing at poll cq..."); while (1) sleep(10); } } if (opcode == MLX5_CQE_REQ_ERR) { mqp = get_req_context(mctx, cur_rsc, (cqe_ver ? srqn_uidx : qpn), cqe_ver); if (unlikely(!mqp)) return CQ_POLL_ERR; wq = &mqp->sq; wqe_ctr = be16toh(cqe64->wqe_counter); idx = wqe_ctr & (wq->wqe_cnt - 1); if (lazy) cq->ibv_cq.wr_id = wq->wrid[idx]; else wc->wr_id = wq->wrid[idx]; wq->tail = wq->wqe_head[idx] + 1; } else { err = get_cur_rsc(mctx, cqe_ver, qpn, srqn_uidx, cur_rsc, cur_srq, &is_srq); if (unlikely(err)) return CQ_POLL_ERR; if (is_srq) { wqe_ctr = be16toh(cqe64->wqe_counter); if (lazy) cq->ibv_cq.wr_id = (*cur_srq)->wrid[wqe_ctr]; else wc->wr_id = (*cur_srq)->wrid[wqe_ctr]; mlx5_free_srq_wqe(*cur_srq, wqe_ctr); } else { switch ((*cur_rsc)->type) { case MLX5_RSC_TYPE_RWQ: wq = &(rsc_to_mrwq(*cur_rsc)->rq); break; default: wq = &(rsc_to_mqp(*cur_rsc)->rq); break; } if (lazy) cq->ibv_cq.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; else wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } } break; } return CQ_OK; } static inline int mlx5_parse_lazy_cqe(struct mlx5_cq *cq, struct mlx5_cqe64 *cqe64, void *cqe, int cqe_ver) ALWAYS_INLINE; static inline int mlx5_parse_lazy_cqe(struct mlx5_cq *cq, struct mlx5_cqe64 *cqe64, void *cqe, int cqe_ver) { return mlx5_parse_cqe(cq, cqe64, cqe, &cq->cur_rsc, &cq->cur_srq, NULL, cqe_ver, 1); } static inline int mlx5_poll_one(struct mlx5_cq *cq, struct mlx5_resource **cur_rsc, struct mlx5_srq **cur_srq, struct ibv_wc *wc, int cqe_ver) ALWAYS_INLINE; static inline int mlx5_poll_one(struct mlx5_cq *cq, struct mlx5_resource **cur_rsc, struct mlx5_srq **cur_srq, struct ibv_wc *wc, int cqe_ver) { struct mlx5_cqe64 *cqe64; void *cqe; int err; err = mlx5_get_next_cqe(cq, &cqe64, &cqe); if (err == CQ_EMPTY) return err; return mlx5_parse_cqe(cq, cqe64, cqe, cur_rsc, cur_srq, wc, cqe_ver, 0); } static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc, int cqe_ver) ALWAYS_INLINE; static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc, int cqe_ver) { struct mlx5_cq *cq = to_mcq(ibcq); struct mlx5_resource *rsc = NULL; struct mlx5_srq *srq = NULL; int npolled; int err = CQ_OK; if (cq->stall_enable) { if (cq->stall_adaptive_enable) { if (cq->stall_last_count) mlx5_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles); } else if (cq->stall_next_poll) { cq->stall_next_poll = 0; mlx5_stall_poll_cq(); } } mlx5_spin_lock(&cq->lock); for (npolled = 0; npolled < ne; ++npolled) { err = mlx5_poll_one(cq, &rsc, &srq, wc + npolled, cqe_ver); if (err != CQ_OK) break; } update_cons_index(cq); mlx5_spin_unlock(&cq->lock); if (cq->stall_enable) { if (cq->stall_adaptive_enable) { if (npolled == 0) { cq->stall_cycles = max(cq->stall_cycles-mlx5_stall_cq_dec_step, mlx5_stall_cq_poll_min); mlx5_get_cycles(&cq->stall_last_count); } else if (npolled < ne) { cq->stall_cycles = min(cq->stall_cycles+mlx5_stall_cq_inc_step, mlx5_stall_cq_poll_max); mlx5_get_cycles(&cq->stall_last_count); } else { cq->stall_cycles = max(cq->stall_cycles-mlx5_stall_cq_dec_step, mlx5_stall_cq_poll_min); cq->stall_last_count = 0; } } else if (err == CQ_EMPTY) { cq->stall_next_poll = 1; } } return err == CQ_POLL_ERR ? err : npolled; } enum polling_mode { POLLING_MODE_NO_STALL, POLLING_MODE_STALL, POLLING_MODE_STALL_ADAPTIVE }; static inline void _mlx5_end_poll(struct ibv_cq_ex *ibcq, int lock, enum polling_mode stall) ALWAYS_INLINE; static inline void _mlx5_end_poll(struct ibv_cq_ex *ibcq, int lock, enum polling_mode stall) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); update_cons_index(cq); if (lock) mlx5_spin_unlock(&cq->lock); if (stall) { if (stall == POLLING_MODE_STALL_ADAPTIVE) { if (!(cq->flags & MLX5_CQ_FLAGS_FOUND_CQES)) { cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step, mlx5_stall_cq_poll_min); mlx5_get_cycles(&cq->stall_last_count); } else if (cq->flags & MLX5_CQ_FLAGS_EMPTY_DURING_POLL) { cq->stall_cycles = min(cq->stall_cycles + mlx5_stall_cq_inc_step, mlx5_stall_cq_poll_max); mlx5_get_cycles(&cq->stall_last_count); } else { cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step, mlx5_stall_cq_poll_min); cq->stall_last_count = 0; } } else if (!(cq->flags & MLX5_CQ_FLAGS_FOUND_CQES)) { cq->stall_next_poll = 1; } cq->flags &= ~(MLX5_CQ_FLAGS_FOUND_CQES | MLX5_CQ_FLAGS_EMPTY_DURING_POLL); } } static inline int mlx5_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, int lock, enum polling_mode stall, int cqe_version) ALWAYS_INLINE; static inline int mlx5_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, int lock, enum polling_mode stall, int cqe_version) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); struct mlx5_cqe64 *cqe64; void *cqe; int err; if (unlikely(attr->comp_mask)) return EINVAL; if (stall) { if (stall == POLLING_MODE_STALL_ADAPTIVE) { if (cq->stall_last_count) mlx5_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles); } else if (cq->stall_next_poll) { cq->stall_next_poll = 0; mlx5_stall_poll_cq(); } } if (lock) mlx5_spin_lock(&cq->lock); cq->cur_rsc = NULL; cq->cur_srq = NULL; err = mlx5_get_next_cqe(cq, &cqe64, &cqe); if (err == CQ_EMPTY) { if (lock) mlx5_spin_unlock(&cq->lock); if (stall) { if (stall == POLLING_MODE_STALL_ADAPTIVE) { cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step, mlx5_stall_cq_poll_min); mlx5_get_cycles(&cq->stall_last_count); } else { cq->stall_next_poll = 1; } } return ENOENT; } if (stall) cq->flags |= MLX5_CQ_FLAGS_FOUND_CQES; err = mlx5_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); if (lock && err) mlx5_spin_unlock(&cq->lock); if (stall && err) { if (stall == POLLING_MODE_STALL_ADAPTIVE) { cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step, mlx5_stall_cq_poll_min); cq->stall_last_count = 0; } cq->flags &= ~(MLX5_CQ_FLAGS_FOUND_CQES); } return err; } static inline int mlx5_next_poll(struct ibv_cq_ex *ibcq, enum polling_mode stall, int cqe_version) ALWAYS_INLINE; static inline int mlx5_next_poll(struct ibv_cq_ex *ibcq, enum polling_mode stall, int cqe_version) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); struct mlx5_cqe64 *cqe64; void *cqe; int err; err = mlx5_get_next_cqe(cq, &cqe64, &cqe); if (err == CQ_EMPTY) { if (stall == POLLING_MODE_STALL_ADAPTIVE) cq->flags |= MLX5_CQ_FLAGS_EMPTY_DURING_POLL; return ENOENT; } return mlx5_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); } static inline int mlx5_next_poll_adaptive_v0(struct ibv_cq_ex *ibcq) { return mlx5_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 0); } static inline int mlx5_next_poll_adaptive_v1(struct ibv_cq_ex *ibcq) { return mlx5_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 1); } static inline int mlx5_next_poll_v0(struct ibv_cq_ex *ibcq) { return mlx5_next_poll(ibcq, 0, 0); } static inline int mlx5_next_poll_v1(struct ibv_cq_ex *ibcq) { return mlx5_next_poll(ibcq, 0, 1); } static inline int mlx5_start_poll_v0(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr) { return mlx5_start_poll(ibcq, attr, 0, 0, 0); } static inline int mlx5_start_poll_v1(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr) { return mlx5_start_poll(ibcq, attr, 0, 0, 1); } static inline int mlx5_start_poll_v0_lock(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr) { return mlx5_start_poll(ibcq, attr, 1, 0, 0); } static inline int mlx5_start_poll_v1_lock(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr) { return mlx5_start_poll(ibcq, attr, 1, 0, 1); } static inline int mlx5_start_poll_adaptive_stall_v0_lock(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr) { return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0); } static inline int mlx5_start_poll_stall_v0_lock(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr) { return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0); } static inline int mlx5_start_poll_adaptive_stall_v1_lock(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr) { return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1); } static inline int mlx5_start_poll_stall_v1_lock(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr) { return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1); } static inline int mlx5_start_poll_stall_v0(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr) { return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0); } static inline int mlx5_start_poll_adaptive_stall_v0(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr) { return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0); } static inline int mlx5_start_poll_adaptive_stall_v1(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr) { return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1); } static inline int mlx5_start_poll_stall_v1(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr) { return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1); } static inline void mlx5_end_poll_adaptive_stall_lock(struct ibv_cq_ex *ibcq) { _mlx5_end_poll(ibcq, 1, POLLING_MODE_STALL_ADAPTIVE); } static inline void mlx5_end_poll_stall_lock(struct ibv_cq_ex *ibcq) { _mlx5_end_poll(ibcq, 1, POLLING_MODE_STALL); } static inline void mlx5_end_poll_adaptive_stall(struct ibv_cq_ex *ibcq) { _mlx5_end_poll(ibcq, 0, POLLING_MODE_STALL_ADAPTIVE); } static inline void mlx5_end_poll_stall(struct ibv_cq_ex *ibcq) { _mlx5_end_poll(ibcq, 0, POLLING_MODE_STALL); } static inline void mlx5_end_poll(struct ibv_cq_ex *ibcq) { _mlx5_end_poll(ibcq, 0, 0); } static inline void mlx5_end_poll_lock(struct ibv_cq_ex *ibcq) { _mlx5_end_poll(ibcq, 1, 0); } int mlx5_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) { return poll_cq(ibcq, ne, wc, 0); } int mlx5_poll_cq_v1(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) { return poll_cq(ibcq, ne, wc, 1); } static inline enum ibv_wc_opcode mlx5_cq_read_wc_opcode(struct ibv_cq_ex *ibcq) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); switch (mlx5dv_get_cqe_opcode(cq->cqe64)) { case MLX5_CQE_RESP_WR_IMM: return IBV_WC_RECV_RDMA_WITH_IMM; case MLX5_CQE_RESP_SEND: case MLX5_CQE_RESP_SEND_IMM: case MLX5_CQE_RESP_SEND_INV: return IBV_WC_RECV; case MLX5_CQE_REQ: switch (be32toh(cq->cqe64->sop_drop_qpn) >> 24) { case MLX5_OPCODE_RDMA_WRITE_IMM: case MLX5_OPCODE_RDMA_WRITE: return IBV_WC_RDMA_WRITE; case MLX5_OPCODE_SEND_IMM: case MLX5_OPCODE_SEND: case MLX5_OPCODE_SEND_INVAL: return IBV_WC_SEND; case MLX5_OPCODE_RDMA_READ: return IBV_WC_RDMA_READ; case MLX5_OPCODE_ATOMIC_CS: return IBV_WC_COMP_SWAP; case MLX5_OPCODE_ATOMIC_FA: return IBV_WC_FETCH_ADD; case MLX5_OPCODE_UMR: return cq->umr_opcode; case MLX5_OPCODE_TSO: return IBV_WC_TSO; } } #ifdef MLX5_DEBUG { struct mlx5_context *ctx = to_mctx(ibcq->context); mlx5_dbg(ctx->dbg_fp, MLX5_DBG_CQ_CQE, "un-expected opcode in cqe\n"); } #endif return 0; } static inline uint32_t mlx5_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); return be32toh(cq->cqe64->sop_drop_qpn) & 0xffffff; } static inline int mlx5_cq_read_wc_flags(struct ibv_cq_ex *ibcq) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); int wc_flags = 0; if (cq->flags & MLX5_CQ_FLAGS_RX_CSUM_VALID) wc_flags = (!!(cq->cqe64->hds_ip_ext & MLX5_CQE_L4_OK) & !!(cq->cqe64->hds_ip_ext & MLX5_CQE_L3_OK) & (get_cqe_l3_hdr_type(cq->cqe64) == MLX5_CQE_L3_HDR_TYPE_IPV4)) << IBV_WC_IP_CSUM_OK_SHIFT; switch (mlx5dv_get_cqe_opcode(cq->cqe64)) { case MLX5_CQE_RESP_WR_IMM: case MLX5_CQE_RESP_SEND_IMM: wc_flags |= IBV_WC_WITH_IMM; break; case MLX5_CQE_RESP_SEND_INV: wc_flags |= IBV_WC_WITH_INV; break; } wc_flags |= ((be32toh(cq->cqe64->flags_rqpn) >> 28) & 3) ? IBV_WC_GRH : 0; return wc_flags; } static inline uint32_t mlx5_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); return be32toh(cq->cqe64->byte_cnt); } static inline uint32_t mlx5_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); struct mlx5_err_cqe *ecqe = (struct mlx5_err_cqe *)cq->cqe64; return ecqe->vendor_err_synd; } static inline uint32_t mlx5_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); switch (mlx5dv_get_cqe_opcode(cq->cqe64)) { case MLX5_CQE_RESP_SEND_INV: return be32toh(cq->cqe64->imm_inval_pkey); default: return cq->cqe64->imm_inval_pkey; } } static inline uint32_t mlx5_cq_read_wc_slid(struct ibv_cq_ex *ibcq) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); return (uint32_t)be16toh(cq->cqe64->slid); } static inline uint8_t mlx5_cq_read_wc_sl(struct ibv_cq_ex *ibcq) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); return (be32toh(cq->cqe64->flags_rqpn) >> 24) & 0xf; } static inline uint32_t mlx5_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); return be32toh(cq->cqe64->flags_rqpn) & 0xffffff; } static inline uint8_t mlx5_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); return cq->cqe64->ml_path & 0x7f; } static inline uint64_t mlx5_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); return be64toh(cq->cqe64->timestamp); } static inline uint16_t mlx5_cq_read_wc_cvlan(struct ibv_cq_ex *ibcq) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); return be16toh(cq->cqe64->vlan_info); } static inline uint32_t mlx5_cq_read_flow_tag(struct ibv_cq_ex *ibcq) { struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); return be32toh(cq->cqe64->sop_drop_qpn) & MLX5_FLOW_TAG_MASK; } #define BIT(i) (1UL << (i)) #define SINGLE_THREADED BIT(0) #define STALL BIT(1) #define V1 BIT(2) #define ADAPTIVE BIT(3) #define mlx5_start_poll_name(cqe_ver, lock, stall, adaptive) \ mlx5_start_poll##adaptive##stall##cqe_ver##lock #define mlx5_next_poll_name(cqe_ver, adaptive) \ mlx5_next_poll##adaptive##cqe_ver #define mlx5_end_poll_name(lock, stall, adaptive) \ mlx5_end_poll##adaptive##stall##lock #define POLL_FN_ENTRY(cqe_ver, lock, stall, adaptive) { \ .start_poll = &mlx5_start_poll_name(cqe_ver, lock, stall, adaptive), \ .next_poll = &mlx5_next_poll_name(cqe_ver, adaptive), \ .end_poll = &mlx5_end_poll_name(lock, stall, adaptive), \ } static const struct op { int (*start_poll)(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr); int (*next_poll)(struct ibv_cq_ex *ibcq); void (*end_poll)(struct ibv_cq_ex *ibcq); } ops[ADAPTIVE + V1 + STALL + SINGLE_THREADED + 1] = { [V1] = POLL_FN_ENTRY(_v1, _lock, , ), [0] = POLL_FN_ENTRY(_v0, _lock, , ), [V1 | SINGLE_THREADED] = POLL_FN_ENTRY(_v1, , , ), [SINGLE_THREADED] = POLL_FN_ENTRY(_v0, , , ), [V1 | STALL] = POLL_FN_ENTRY(_v1, _lock, _stall, ), [STALL] = POLL_FN_ENTRY(_v0, _lock, _stall, ), [V1 | SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v1, , _stall, ), [SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v0, , _stall, ), [V1 | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive), [STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive), [V1 | SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive), [SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive), }; void mlx5_cq_fill_pfns(struct mlx5_cq *cq, const struct ibv_cq_init_attr_ex *cq_attr) { struct mlx5_context *mctx = to_mctx(ibv_cq_ex_to_cq(&cq->ibv_cq)->context); const struct op *poll_ops = &ops[((cq->stall_enable && cq->stall_adaptive_enable) ? ADAPTIVE : 0) | (mctx->cqe_version ? V1 : 0) | (cq->flags & MLX5_CQ_FLAGS_SINGLE_THREADED ? SINGLE_THREADED : 0) | (cq->stall_enable ? STALL : 0)]; cq->ibv_cq.start_poll = poll_ops->start_poll; cq->ibv_cq.next_poll = poll_ops->next_poll; cq->ibv_cq.end_poll = poll_ops->end_poll; cq->ibv_cq.read_opcode = mlx5_cq_read_wc_opcode; cq->ibv_cq.read_vendor_err = mlx5_cq_read_wc_vendor_err; cq->ibv_cq.read_wc_flags = mlx5_cq_read_wc_flags; if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) cq->ibv_cq.read_byte_len = mlx5_cq_read_wc_byte_len; if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM) cq->ibv_cq.read_imm_data = mlx5_cq_read_wc_imm_data; if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM) cq->ibv_cq.read_qp_num = mlx5_cq_read_wc_qp_num; if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP) cq->ibv_cq.read_src_qp = mlx5_cq_read_wc_src_qp; if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID) cq->ibv_cq.read_slid = mlx5_cq_read_wc_slid; if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL) cq->ibv_cq.read_sl = mlx5_cq_read_wc_sl; if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) cq->ibv_cq.read_dlid_path_bits = mlx5_cq_read_wc_dlid_path_bits; if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) cq->ibv_cq.read_completion_ts = mlx5_cq_read_wc_completion_ts; if (cq_attr->wc_flags & IBV_WC_EX_WITH_CVLAN) cq->ibv_cq.read_cvlan = mlx5_cq_read_wc_cvlan; if (cq_attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG) cq->ibv_cq.read_flow_tag = mlx5_cq_read_flow_tag; } int mlx5_arm_cq(struct ibv_cq *ibvcq, int solicited) { struct mlx5_cq *cq = to_mcq(ibvcq); struct mlx5_context *ctx = to_mctx(ibvcq->context); uint32_t doorbell[2]; uint32_t sn; uint32_t ci; uint32_t cmd; sn = cq->arm_sn & 3; ci = cq->cons_index & 0xffffff; cmd = solicited ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT; cq->dbrec[MLX5_CQ_ARM_DB] = htobe32(sn << 28 | cmd | ci); /* * Make sure that the doorbell record in host memory is * written before ringing the doorbell via PCI WC MMIO. */ mmio_wc_start(); doorbell[0] = htobe32(sn << 28 | cmd | ci); doorbell[1] = htobe32(cq->cqn); mlx5_write64(doorbell, ctx->uar[0] + MLX5_CQ_DOORBELL, &ctx->lock32); mmio_flush_writes(); return 0; } void mlx5_cq_event(struct ibv_cq *cq) { to_mcq(cq)->arm_sn++; } static int is_equal_rsn(struct mlx5_cqe64 *cqe64, uint32_t rsn) { return rsn == (be32toh(cqe64->sop_drop_qpn) & 0xffffff); } static inline int is_equal_uidx(struct mlx5_cqe64 *cqe64, uint32_t uidx) { return uidx == (be32toh(cqe64->srqn_uidx) & 0xffffff); } static inline int is_responder(uint8_t opcode) { switch (opcode) { case MLX5_CQE_RESP_WR_IMM: case MLX5_CQE_RESP_SEND: case MLX5_CQE_RESP_SEND_IMM: case MLX5_CQE_RESP_SEND_INV: case MLX5_CQE_RESP_ERR: return 1; } return 0; } static inline int free_res_cqe(struct mlx5_cqe64 *cqe64, uint32_t rsn, struct mlx5_srq *srq, int cqe_version) { if (cqe_version) { if (is_equal_uidx(cqe64, rsn)) { if (srq && is_responder(mlx5dv_get_cqe_opcode(cqe64))) mlx5_free_srq_wqe(srq, be16toh(cqe64->wqe_counter)); return 1; } } else { if (is_equal_rsn(cqe64, rsn)) { if (srq && (be32toh(cqe64->srqn_uidx) & 0xffffff)) mlx5_free_srq_wqe(srq, be16toh(cqe64->wqe_counter)); return 1; } } return 0; } void __mlx5_cq_clean(struct mlx5_cq *cq, uint32_t rsn, struct mlx5_srq *srq) { uint32_t prod_index; int nfreed = 0; struct mlx5_cqe64 *cqe64, *dest64; void *cqe, *dest; uint8_t owner_bit; int cqe_version; if (!cq || cq->flags & MLX5_CQ_FLAGS_DV_OWNED) return; /* * First we need to find the current producer index, so we * know where to start cleaning from. It doesn't matter if HW * adds new entries after this loop -- the QP we're worried * about is already in RESET, so the new entries won't come * from our QP and therefore don't need to be checked. */ for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index) if (prod_index == cq->cons_index + cq->ibv_cq.cqe) break; /* * Now sweep backwards through the CQ, removing CQ entries * that match our QP by copying older entries on top of them. */ cqe_version = (to_mctx(cq->ibv_cq.context))->cqe_version; while ((int) --prod_index - (int) cq->cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe); cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64; if (free_res_cqe(cqe64, rsn, srq, cqe_version)) { ++nfreed; } else if (nfreed) { dest = get_cqe(cq, (prod_index + nfreed) & cq->ibv_cq.cqe); dest64 = (cq->cqe_sz == 64) ? dest : dest + 64; owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK; memcpy(dest, cqe, cq->cqe_sz); dest64->op_own = owner_bit | (dest64->op_own & ~MLX5_CQE_OWNER_MASK); } } if (nfreed) { cq->cons_index += nfreed; /* * Make sure update of buffer contents is done before * updating consumer index. */ udma_to_device_barrier(); update_cons_index(cq); } } void mlx5_cq_clean(struct mlx5_cq *cq, uint32_t qpn, struct mlx5_srq *srq) { mlx5_spin_lock(&cq->lock); __mlx5_cq_clean(cq, qpn, srq); mlx5_spin_unlock(&cq->lock); } static uint8_t sw_ownership_bit(int n, int nent) { return (n & nent) ? 1 : 0; } static int is_hw(uint8_t own, int n, int mask) { return (own & MLX5_CQE_OWNER_MASK) ^ !!(n & (mask + 1)); } void mlx5_cq_resize_copy_cqes(struct mlx5_cq *cq) { struct mlx5_cqe64 *scqe64; struct mlx5_cqe64 *dcqe64; void *start_cqe; void *scqe; void *dcqe; int ssize; int dsize; int i; uint8_t sw_own; ssize = cq->cqe_sz; dsize = cq->resize_cqe_sz; i = cq->cons_index; scqe = get_buf_cqe(cq->active_buf, i & cq->active_cqes, ssize); scqe64 = ssize == 64 ? scqe : scqe + 64; start_cqe = scqe; if (is_hw(scqe64->op_own, i, cq->active_cqes)) { fprintf(stderr, "expected cqe in sw ownership\n"); return; } while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { dcqe = get_buf_cqe(cq->resize_buf, (i + 1) & (cq->resize_cqes - 1), dsize); dcqe64 = dsize == 64 ? dcqe : dcqe + 64; sw_own = sw_ownership_bit(i + 1, cq->resize_cqes); memcpy(dcqe, scqe, ssize); dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own; ++i; scqe = get_buf_cqe(cq->active_buf, i & cq->active_cqes, ssize); scqe64 = ssize == 64 ? scqe : scqe + 64; if (is_hw(scqe64->op_own, i, cq->active_cqes)) { fprintf(stderr, "expected cqe in sw ownership\n"); return; } if (scqe == start_cqe) { fprintf(stderr, "resize CQ failed to get resize CQE\n"); return; } } ++cq->cons_index; } int mlx5_alloc_cq_buf(struct mlx5_context *mctx, struct mlx5_cq *cq, struct mlx5_buf *buf, int nent, int cqe_sz) { struct mlx5_cqe64 *cqe; int i; struct mlx5_device *dev = to_mdev(mctx->ibv_ctx.device); int ret; enum mlx5_alloc_type type; enum mlx5_alloc_type default_type = MLX5_ALLOC_TYPE_ANON; if (mlx5_use_huge("HUGE_CQ")) default_type = MLX5_ALLOC_TYPE_HUGE; mlx5_get_alloc_type(MLX5_CQ_PREFIX, &type, default_type); ret = mlx5_alloc_prefered_buf(mctx, buf, align(nent * cqe_sz, dev->page_size), dev->page_size, type, MLX5_CQ_PREFIX); if (ret) return -1; memset(buf->buf, 0, nent * cqe_sz); for (i = 0; i < nent; ++i) { cqe = buf->buf + i * cqe_sz; cqe += cqe_sz == 128 ? 1 : 0; cqe->op_own = MLX5_CQE_INVALID << 4; } return 0; } int mlx5_free_cq_buf(struct mlx5_context *ctx, struct mlx5_buf *buf) { return mlx5_free_actual_buf(ctx, buf); }