]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/ofed/libmlx5/cq.c
Merge OpenSSL 1.1.1d.
[FreeBSD/FreeBSD.git] / contrib / ofed / libmlx5 / cq.c
1 /*
2  * Copyright (c) 2012 Mellanox Technologies, Inc.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <config.h>
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <pthread.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <unistd.h>
41
42 #include <infiniband/opcode.h>
43
44 #include "mlx5.h"
45 #include "wqe.h"
46 #include "doorbell.h"
47
48 enum {
49         CQ_OK                                   =  0,
50         CQ_EMPTY                                = -1,
51         CQ_POLL_ERR                             = -2
52 };
53
54 enum {
55         MLX5_CQ_MODIFY_RESEIZE = 0,
56         MLX5_CQ_MODIFY_MODER = 1,
57         MLX5_CQ_MODIFY_MAPPING = 2,
58 };
59
60 int mlx5_stall_num_loop = 60;
61 int mlx5_stall_cq_poll_min = 60;
62 int mlx5_stall_cq_poll_max = 100000;
63 int mlx5_stall_cq_inc_step = 100;
64 int mlx5_stall_cq_dec_step = 10;
65
66 static inline uint8_t get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe)
67 {
68         return (cqe->l4_hdr_type_etc >> 2) & 0x3;
69 }
70
71 static void *get_buf_cqe(struct mlx5_buf *buf, int n, int cqe_sz)
72 {
73         return buf->buf + n * cqe_sz;
74 }
75
76 static void *get_cqe(struct mlx5_cq *cq, int n)
77 {
78         return cq->active_buf->buf + n * cq->cqe_sz;
79 }
80
81 static void *get_sw_cqe(struct mlx5_cq *cq, int n)
82 {
83         void *cqe = get_cqe(cq, n & cq->ibv_cq.cqe);
84         struct mlx5_cqe64 *cqe64;
85
86         cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
87
88         if (likely(mlx5dv_get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
89             !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibv_cq.cqe + 1)))) {
90                 return cqe;
91         } else {
92                 return NULL;
93         }
94 }
95
96 static void *next_cqe_sw(struct mlx5_cq *cq)
97 {
98         return get_sw_cqe(cq, cq->cons_index);
99 }
100
101 static void update_cons_index(struct mlx5_cq *cq)
102 {
103         cq->dbrec[MLX5_CQ_SET_CI] = htobe32(cq->cons_index & 0xffffff);
104 }
105
106 static inline void handle_good_req(struct ibv_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_wq *wq, int idx)
107 {
108         switch (be32toh(cqe->sop_drop_qpn) >> 24) {
109         case MLX5_OPCODE_RDMA_WRITE_IMM:
110                 wc->wc_flags |= IBV_WC_WITH_IMM;
111                 SWITCH_FALLTHROUGH;
112         case MLX5_OPCODE_RDMA_WRITE:
113                 wc->opcode    = IBV_WC_RDMA_WRITE;
114                 break;
115         case MLX5_OPCODE_SEND_IMM:
116                 wc->wc_flags |= IBV_WC_WITH_IMM;
117                 SWITCH_FALLTHROUGH;
118         case MLX5_OPCODE_SEND:
119         case MLX5_OPCODE_SEND_INVAL:
120                 wc->opcode    = IBV_WC_SEND;
121                 break;
122         case MLX5_OPCODE_RDMA_READ:
123                 wc->opcode    = IBV_WC_RDMA_READ;
124                 wc->byte_len  = be32toh(cqe->byte_cnt);
125                 break;
126         case MLX5_OPCODE_ATOMIC_CS:
127                 wc->opcode    = IBV_WC_COMP_SWAP;
128                 wc->byte_len  = 8;
129                 break;
130         case MLX5_OPCODE_ATOMIC_FA:
131                 wc->opcode    = IBV_WC_FETCH_ADD;
132                 wc->byte_len  = 8;
133                 break;
134         case MLX5_OPCODE_UMR:
135                 wc->opcode = wq->wr_data[idx];
136                 break;
137         case MLX5_OPCODE_TSO:
138                 wc->opcode    = IBV_WC_TSO;
139                 break;
140         }
141 }
142
143 static inline int handle_responder_lazy(struct mlx5_cq *cq, struct mlx5_cqe64 *cqe,
144                                         struct mlx5_resource *cur_rsc, struct mlx5_srq *srq)
145 {
146         uint16_t        wqe_ctr;
147         struct mlx5_wq *wq;
148         struct mlx5_qp *qp = rsc_to_mqp(cur_rsc);
149         int err = IBV_WC_SUCCESS;
150
151         if (srq) {
152                 wqe_ctr = be16toh(cqe->wqe_counter);
153                 cq->ibv_cq.wr_id = srq->wrid[wqe_ctr];
154                 mlx5_free_srq_wqe(srq, wqe_ctr);
155                 if (cqe->op_own & MLX5_INLINE_SCATTER_32)
156                         err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe,
157                                                     be32toh(cqe->byte_cnt));
158                 else if (cqe->op_own & MLX5_INLINE_SCATTER_64)
159                         err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe - 1,
160                                                     be32toh(cqe->byte_cnt));
161         } else {
162                 if (likely(cur_rsc->type == MLX5_RSC_TYPE_QP)) {
163                         wq = &qp->rq;
164                         if (qp->qp_cap_cache & MLX5_RX_CSUM_VALID)
165                                 cq->flags |= MLX5_CQ_FLAGS_RX_CSUM_VALID;
166                 } else {
167                         wq = &(rsc_to_mrwq(cur_rsc)->rq);
168                 }
169
170                 wqe_ctr = wq->tail & (wq->wqe_cnt - 1);
171                 cq->ibv_cq.wr_id = wq->wrid[wqe_ctr];
172                 ++wq->tail;
173                 if (cqe->op_own & MLX5_INLINE_SCATTER_32)
174                         err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe,
175                                                     be32toh(cqe->byte_cnt));
176                 else if (cqe->op_own & MLX5_INLINE_SCATTER_64)
177                         err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe - 1,
178                                                     be32toh(cqe->byte_cnt));
179         }
180
181         return err;
182 }
183
184 static inline int handle_responder(struct ibv_wc *wc, struct mlx5_cqe64 *cqe,
185                                    struct mlx5_resource *cur_rsc, struct mlx5_srq *srq)
186 {
187         uint16_t        wqe_ctr;
188         struct mlx5_wq *wq;
189         struct mlx5_qp *qp = rsc_to_mqp(cur_rsc);
190         uint8_t g;
191         int err = 0;
192
193         wc->byte_len = be32toh(cqe->byte_cnt);
194         if (srq) {
195                 wqe_ctr = be16toh(cqe->wqe_counter);
196                 wc->wr_id = srq->wrid[wqe_ctr];
197                 mlx5_free_srq_wqe(srq, wqe_ctr);
198                 if (cqe->op_own & MLX5_INLINE_SCATTER_32)
199                         err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe,
200                                                     wc->byte_len);
201                 else if (cqe->op_own & MLX5_INLINE_SCATTER_64)
202                         err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe - 1,
203                                                     wc->byte_len);
204         } else {
205                 if (likely(cur_rsc->type == MLX5_RSC_TYPE_QP)) {
206                         wq = &qp->rq;
207                         if (qp->qp_cap_cache & MLX5_RX_CSUM_VALID)
208                                 wc->wc_flags |= (!!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) &
209                                                  !!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) &
210                                                 (get_cqe_l3_hdr_type(cqe) ==
211                                                 MLX5_CQE_L3_HDR_TYPE_IPV4)) <<
212                                                 IBV_WC_IP_CSUM_OK_SHIFT;
213                 } else {
214                         wq = &(rsc_to_mrwq(cur_rsc)->rq);
215                 }
216
217                 wqe_ctr = wq->tail & (wq->wqe_cnt - 1);
218                 wc->wr_id = wq->wrid[wqe_ctr];
219                 ++wq->tail;
220                 if (cqe->op_own & MLX5_INLINE_SCATTER_32)
221                         err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe,
222                                                     wc->byte_len);
223                 else if (cqe->op_own & MLX5_INLINE_SCATTER_64)
224                         err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe - 1,
225                                                     wc->byte_len);
226         }
227         if (err)
228                 return err;
229
230         switch (cqe->op_own >> 4) {
231         case MLX5_CQE_RESP_WR_IMM:
232                 wc->opcode      = IBV_WC_RECV_RDMA_WITH_IMM;
233                 wc->wc_flags    |= IBV_WC_WITH_IMM;
234                 wc->imm_data = cqe->imm_inval_pkey;
235                 break;
236         case MLX5_CQE_RESP_SEND:
237                 wc->opcode   = IBV_WC_RECV;
238                 break;
239         case MLX5_CQE_RESP_SEND_IMM:
240                 wc->opcode      = IBV_WC_RECV;
241                 wc->wc_flags    |= IBV_WC_WITH_IMM;
242                 wc->imm_data = cqe->imm_inval_pkey;
243                 break;
244         case MLX5_CQE_RESP_SEND_INV:
245                 wc->opcode = IBV_WC_RECV;
246                 wc->wc_flags |= IBV_WC_WITH_INV;
247                 wc->imm_data = be32toh(cqe->imm_inval_pkey);
248                 break;
249         }
250         wc->slid           = be16toh(cqe->slid);
251         wc->sl             = (be32toh(cqe->flags_rqpn) >> 24) & 0xf;
252         wc->src_qp         = be32toh(cqe->flags_rqpn) & 0xffffff;
253         wc->dlid_path_bits = cqe->ml_path & 0x7f;
254         g = (be32toh(cqe->flags_rqpn) >> 28) & 3;
255         wc->wc_flags |= g ? IBV_WC_GRH : 0;
256         wc->pkey_index     = be32toh(cqe->imm_inval_pkey) & 0xffff;
257
258         return IBV_WC_SUCCESS;
259 }
260
261 static void dump_cqe(FILE *fp, void *buf)
262 {
263         uint32_t *p = buf;
264         int i;
265
266         for (i = 0; i < 16; i += 4)
267                 fprintf(fp, "%08x %08x %08x %08x\n", be32toh(p[i]), be32toh(p[i + 1]),
268                         be32toh(p[i + 2]), be32toh(p[i + 3]));
269 }
270
271 static enum ibv_wc_status mlx5_handle_error_cqe(struct mlx5_err_cqe *cqe)
272 {
273         switch (cqe->syndrome) {
274         case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
275                 return IBV_WC_LOC_LEN_ERR;
276         case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
277                 return IBV_WC_LOC_QP_OP_ERR;
278         case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
279                 return IBV_WC_LOC_PROT_ERR;
280         case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
281                 return IBV_WC_WR_FLUSH_ERR;
282         case MLX5_CQE_SYNDROME_MW_BIND_ERR:
283                 return IBV_WC_MW_BIND_ERR;
284         case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
285                 return IBV_WC_BAD_RESP_ERR;
286         case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
287                 return IBV_WC_LOC_ACCESS_ERR;
288         case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
289                 return IBV_WC_REM_INV_REQ_ERR;
290         case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
291                 return IBV_WC_REM_ACCESS_ERR;
292         case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
293                 return IBV_WC_REM_OP_ERR;
294         case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
295                 return IBV_WC_RETRY_EXC_ERR;
296         case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
297                 return IBV_WC_RNR_RETRY_EXC_ERR;
298         case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
299                 return IBV_WC_REM_ABORT_ERR;
300         default:
301                 return IBV_WC_GENERAL_ERR;
302         }
303 }
304
305 #if defined(__x86_64__) || defined (__i386__)
306 static inline unsigned long get_cycles(void)
307 {
308         uint32_t low, high;
309         uint64_t val;
310         asm volatile ("rdtsc" : "=a" (low), "=d" (high));
311         val = high;
312         val = (val << 32) | low;
313         return val;
314 }
315
316 static void mlx5_stall_poll_cq(void)
317 {
318         int i;
319
320         for (i = 0; i < mlx5_stall_num_loop; i++)
321                 (void)get_cycles();
322 }
323 static void mlx5_stall_cycles_poll_cq(uint64_t cycles)
324 {
325         while (get_cycles()  <  cycles)
326                 ; /* Nothing */
327 }
328 static void mlx5_get_cycles(uint64_t *cycles)
329 {
330         *cycles = get_cycles();
331 }
332 #else
333 static void mlx5_stall_poll_cq(void)
334 {
335 }
336 static void mlx5_stall_cycles_poll_cq(uint64_t cycles)
337 {
338 }
339 static void mlx5_get_cycles(uint64_t *cycles)
340 {
341 }
342 #endif
343
344 static inline struct mlx5_qp *get_req_context(struct mlx5_context *mctx,
345                                               struct mlx5_resource **cur_rsc,
346                                               uint32_t rsn, int cqe_ver)
347                                               ALWAYS_INLINE;
348 static inline struct mlx5_qp *get_req_context(struct mlx5_context *mctx,
349                                               struct mlx5_resource **cur_rsc,
350                                               uint32_t rsn, int cqe_ver)
351 {
352         if (!*cur_rsc || (rsn != (*cur_rsc)->rsn))
353                 *cur_rsc = cqe_ver ? mlx5_find_uidx(mctx, rsn) :
354                                       (struct mlx5_resource *)mlx5_find_qp(mctx, rsn);
355
356         return rsc_to_mqp(*cur_rsc);
357 }
358
359 static inline int get_resp_ctx_v1(struct mlx5_context *mctx,
360                                   struct mlx5_resource **cur_rsc,
361                                   struct mlx5_srq **cur_srq,
362                                   uint32_t uidx, uint8_t *is_srq)
363                                   ALWAYS_INLINE;
364 static inline int get_resp_ctx_v1(struct mlx5_context *mctx,
365                                   struct mlx5_resource **cur_rsc,
366                                   struct mlx5_srq **cur_srq,
367                                   uint32_t uidx, uint8_t *is_srq)
368 {
369         struct mlx5_qp *mqp;
370
371         if (!*cur_rsc || (uidx != (*cur_rsc)->rsn)) {
372                 *cur_rsc = mlx5_find_uidx(mctx, uidx);
373                 if (unlikely(!*cur_rsc))
374                         return CQ_POLL_ERR;
375         }
376
377         switch ((*cur_rsc)->type) {
378         case MLX5_RSC_TYPE_QP:
379                 mqp = rsc_to_mqp(*cur_rsc);
380                 if (mqp->verbs_qp.qp.srq) {
381                         *cur_srq = to_msrq(mqp->verbs_qp.qp.srq);
382                         *is_srq = 1;
383                 }
384                 break;
385         case MLX5_RSC_TYPE_XSRQ:
386                 *cur_srq = rsc_to_msrq(*cur_rsc);
387                 *is_srq = 1;
388                 break;
389         case MLX5_RSC_TYPE_RWQ:
390                 break;
391         default:
392                 return CQ_POLL_ERR;
393         }
394
395         return CQ_OK;
396 }
397
398 static inline int get_qp_ctx(struct mlx5_context *mctx,
399                              struct mlx5_resource **cur_rsc,
400                              uint32_t qpn)
401                              ALWAYS_INLINE;
402 static inline int get_qp_ctx(struct mlx5_context *mctx,
403                              struct mlx5_resource **cur_rsc,
404                              uint32_t qpn)
405 {
406         if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) {
407                 /*
408                  * We do not have to take the QP table lock here,
409                  * because CQs will be locked while QPs are removed
410                  * from the table.
411                  */
412                 *cur_rsc = (struct mlx5_resource *)mlx5_find_qp(mctx, qpn);
413                 if (unlikely(!*cur_rsc))
414                         return CQ_POLL_ERR;
415         }
416
417         return CQ_OK;
418 }
419
420 static inline int get_srq_ctx(struct mlx5_context *mctx,
421                               struct mlx5_srq **cur_srq,
422                               uint32_t srqn_uidx)
423                               ALWAYS_INLINE;
424 static inline int get_srq_ctx(struct mlx5_context *mctx,
425                               struct mlx5_srq **cur_srq,
426                               uint32_t srqn)
427 {
428         if (!*cur_srq || (srqn != (*cur_srq)->srqn)) {
429                 *cur_srq = mlx5_find_srq(mctx, srqn);
430                 if (unlikely(!*cur_srq))
431                         return CQ_POLL_ERR;
432         }
433
434         return CQ_OK;
435 }
436
437 static inline int get_cur_rsc(struct mlx5_context *mctx,
438                               int cqe_ver,
439                               uint32_t qpn,
440                               uint32_t srqn_uidx,
441                               struct mlx5_resource **cur_rsc,
442                               struct mlx5_srq **cur_srq,
443                               uint8_t *is_srq)
444 {
445         int err;
446
447         if (cqe_ver) {
448                 err = get_resp_ctx_v1(mctx, cur_rsc, cur_srq, srqn_uidx,
449                                       is_srq);
450         } else {
451                 if (srqn_uidx) {
452                         *is_srq = 1;
453                         err = get_srq_ctx(mctx, cur_srq, srqn_uidx);
454                 } else {
455                         err = get_qp_ctx(mctx, cur_rsc, qpn);
456                 }
457         }
458
459         return err;
460
461 }
462
463 static inline int mlx5_get_next_cqe(struct mlx5_cq *cq,
464                                     struct mlx5_cqe64 **pcqe64,
465                                     void **pcqe)
466                                     ALWAYS_INLINE;
467 static inline int mlx5_get_next_cqe(struct mlx5_cq *cq,
468                                     struct mlx5_cqe64 **pcqe64,
469                                     void **pcqe)
470 {
471         void *cqe;
472         struct mlx5_cqe64 *cqe64;
473
474         cqe = next_cqe_sw(cq);
475         if (!cqe)
476                 return CQ_EMPTY;
477
478         cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
479
480         ++cq->cons_index;
481
482         VALGRIND_MAKE_MEM_DEFINED(cqe64, sizeof *cqe64);
483
484         /*
485          * Make sure we read CQ entry contents after we've checked the
486          * ownership bit.
487          */
488         udma_from_device_barrier();
489
490 #ifdef MLX5_DEBUG
491         {
492                 struct mlx5_context *mctx = to_mctx(cq->ibv_cq.context);
493
494                 if (mlx5_debug_mask & MLX5_DBG_CQ_CQE) {
495                         FILE *fp = mctx->dbg_fp;
496
497                         mlx5_dbg(fp, MLX5_DBG_CQ_CQE, "dump cqe for cqn 0x%x:\n", cq->cqn);
498                         dump_cqe(fp, cqe64);
499                 }
500         }
501 #endif
502         *pcqe64 = cqe64;
503         *pcqe = cqe;
504
505         return CQ_OK;
506 }
507
508 static inline int mlx5_parse_cqe(struct mlx5_cq *cq,
509                                  struct mlx5_cqe64 *cqe64,
510                                  void *cqe,
511                                  struct mlx5_resource **cur_rsc,
512                                  struct mlx5_srq **cur_srq,
513                                  struct ibv_wc *wc,
514                                  int cqe_ver, int lazy)
515                                  ALWAYS_INLINE;
516 static inline int mlx5_parse_cqe(struct mlx5_cq *cq,
517                                  struct mlx5_cqe64 *cqe64,
518                                  void *cqe,
519                                  struct mlx5_resource **cur_rsc,
520                                  struct mlx5_srq **cur_srq,
521                                  struct ibv_wc *wc,
522                                  int cqe_ver, int lazy)
523 {
524         struct mlx5_wq *wq;
525         uint16_t wqe_ctr;
526         uint32_t qpn;
527         uint32_t srqn_uidx;
528         int idx;
529         uint8_t opcode;
530         struct mlx5_err_cqe *ecqe;
531         int err = 0;
532         struct mlx5_qp *mqp;
533         struct mlx5_context *mctx;
534         uint8_t is_srq = 0;
535
536         mctx = to_mctx(ibv_cq_ex_to_cq(&cq->ibv_cq)->context);
537         qpn = be32toh(cqe64->sop_drop_qpn) & 0xffffff;
538         if (lazy) {
539                 cq->cqe64 = cqe64;
540                 cq->flags &= (~MLX5_CQ_FLAGS_RX_CSUM_VALID);
541         } else {
542                 wc->wc_flags = 0;
543                 wc->qp_num = qpn;
544         }
545
546         opcode = mlx5dv_get_cqe_opcode(cqe64);
547         switch (opcode) {
548         case MLX5_CQE_REQ:
549         {
550                 mqp = get_req_context(mctx, cur_rsc,
551                                       (cqe_ver ? (be32toh(cqe64->srqn_uidx) & 0xffffff) : qpn),
552                                       cqe_ver);
553                 if (unlikely(!mqp))
554                         return CQ_POLL_ERR;
555                 wq = &mqp->sq;
556                 wqe_ctr = be16toh(cqe64->wqe_counter);
557                 idx = wqe_ctr & (wq->wqe_cnt - 1);
558                 if (lazy) {
559                         uint32_t wc_byte_len;
560
561                         switch (be32toh(cqe64->sop_drop_qpn) >> 24) {
562                         case MLX5_OPCODE_UMR:
563                                 cq->umr_opcode = wq->wr_data[idx];
564                                 break;
565
566                         case MLX5_OPCODE_RDMA_READ:
567                                 wc_byte_len = be32toh(cqe64->byte_cnt);
568                                 goto scatter_out;
569                         case MLX5_OPCODE_ATOMIC_CS:
570                         case MLX5_OPCODE_ATOMIC_FA:
571                                 wc_byte_len = 8;
572
573                         scatter_out:
574                                 if (cqe64->op_own & MLX5_INLINE_SCATTER_32)
575                                         err = mlx5_copy_to_send_wqe(
576                                             mqp, wqe_ctr, cqe, wc_byte_len);
577                                 else if (cqe64->op_own & MLX5_INLINE_SCATTER_64)
578                                         err = mlx5_copy_to_send_wqe(
579                                             mqp, wqe_ctr, cqe - 1, wc_byte_len);
580                                 break;
581                         }
582
583                         cq->ibv_cq.wr_id = wq->wrid[idx];
584                         cq->ibv_cq.status = err;
585                 } else {
586                         handle_good_req(wc, cqe64, wq, idx);
587
588                         if (cqe64->op_own & MLX5_INLINE_SCATTER_32)
589                                 err = mlx5_copy_to_send_wqe(mqp, wqe_ctr, cqe,
590                                                             wc->byte_len);
591                         else if (cqe64->op_own & MLX5_INLINE_SCATTER_64)
592                                 err = mlx5_copy_to_send_wqe(
593                                     mqp, wqe_ctr, cqe - 1, wc->byte_len);
594
595                         wc->wr_id = wq->wrid[idx];
596                         wc->status = err;
597                 }
598
599                 wq->tail = wq->wqe_head[idx] + 1;
600                 break;
601         }
602         case MLX5_CQE_RESP_WR_IMM:
603         case MLX5_CQE_RESP_SEND:
604         case MLX5_CQE_RESP_SEND_IMM:
605         case MLX5_CQE_RESP_SEND_INV:
606                 srqn_uidx = be32toh(cqe64->srqn_uidx) & 0xffffff;
607                 err = get_cur_rsc(mctx, cqe_ver, qpn, srqn_uidx, cur_rsc,
608                                   cur_srq, &is_srq);
609                 if (unlikely(err))
610                         return CQ_POLL_ERR;
611
612                 if (lazy)
613                         cq->ibv_cq.status = handle_responder_lazy(cq, cqe64,
614                                                               *cur_rsc,
615                                                               is_srq ? *cur_srq : NULL);
616                 else
617                         wc->status = handle_responder(wc, cqe64, *cur_rsc,
618                                               is_srq ? *cur_srq : NULL);
619                 break;
620         case MLX5_CQE_RESIZE_CQ:
621                 break;
622         case MLX5_CQE_REQ_ERR:
623         case MLX5_CQE_RESP_ERR:
624                 srqn_uidx = be32toh(cqe64->srqn_uidx) & 0xffffff;
625                 ecqe = (struct mlx5_err_cqe *)cqe64;
626                 {
627                         enum ibv_wc_status *pstatus = lazy ? &cq->ibv_cq.status : &wc->status;
628
629                         *pstatus = mlx5_handle_error_cqe(ecqe);
630                 }
631
632                 if (!lazy)
633                         wc->vendor_err = ecqe->vendor_err_synd;
634
635                 if (unlikely(ecqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR &&
636                              ecqe->syndrome != MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR)) {
637                         FILE *fp = mctx->dbg_fp;
638                         fprintf(fp, PFX "%s: got completion with error:\n",
639                                 mctx->hostname);
640                         dump_cqe(fp, ecqe);
641                         if (mlx5_freeze_on_error_cqe) {
642                                 fprintf(fp, PFX "freezing at poll cq...");
643                                 while (1)
644                                         sleep(10);
645                         }
646                 }
647
648                 if (opcode == MLX5_CQE_REQ_ERR) {
649                         mqp = get_req_context(mctx, cur_rsc,
650                                               (cqe_ver ? srqn_uidx : qpn), cqe_ver);
651                         if (unlikely(!mqp))
652                                 return CQ_POLL_ERR;
653                         wq = &mqp->sq;
654                         wqe_ctr = be16toh(cqe64->wqe_counter);
655                         idx = wqe_ctr & (wq->wqe_cnt - 1);
656                         if (lazy)
657                                 cq->ibv_cq.wr_id = wq->wrid[idx];
658                         else
659                                 wc->wr_id = wq->wrid[idx];
660                         wq->tail = wq->wqe_head[idx] + 1;
661                 } else {
662                         err = get_cur_rsc(mctx, cqe_ver, qpn, srqn_uidx,
663                                           cur_rsc, cur_srq, &is_srq);
664                         if (unlikely(err))
665                                 return CQ_POLL_ERR;
666
667                         if (is_srq) {
668                                 wqe_ctr = be16toh(cqe64->wqe_counter);
669                                 if (lazy)
670                                         cq->ibv_cq.wr_id = (*cur_srq)->wrid[wqe_ctr];
671                                 else
672                                         wc->wr_id = (*cur_srq)->wrid[wqe_ctr];
673                                 mlx5_free_srq_wqe(*cur_srq, wqe_ctr);
674                         } else {
675                                 switch ((*cur_rsc)->type) {
676                                 case MLX5_RSC_TYPE_RWQ:
677                                         wq = &(rsc_to_mrwq(*cur_rsc)->rq);
678                                         break;
679                                 default:
680                                         wq = &(rsc_to_mqp(*cur_rsc)->rq);
681                                         break;
682                                 }
683
684                                 if (lazy)
685                                         cq->ibv_cq.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
686                                 else
687                                         wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
688                                 ++wq->tail;
689                         }
690                 }
691                 break;
692         }
693
694         return CQ_OK;
695 }
696
697 static inline int mlx5_parse_lazy_cqe(struct mlx5_cq *cq,
698                                       struct mlx5_cqe64 *cqe64,
699                                       void *cqe, int cqe_ver)
700                                       ALWAYS_INLINE;
701 static inline int mlx5_parse_lazy_cqe(struct mlx5_cq *cq,
702                                       struct mlx5_cqe64 *cqe64,
703                                       void *cqe, int cqe_ver)
704 {
705         return mlx5_parse_cqe(cq, cqe64, cqe, &cq->cur_rsc, &cq->cur_srq, NULL, cqe_ver, 1);
706 }
707
708 static inline int mlx5_poll_one(struct mlx5_cq *cq,
709                                 struct mlx5_resource **cur_rsc,
710                                 struct mlx5_srq **cur_srq,
711                                 struct ibv_wc *wc, int cqe_ver)
712                                 ALWAYS_INLINE;
713 static inline int mlx5_poll_one(struct mlx5_cq *cq,
714                                 struct mlx5_resource **cur_rsc,
715                                 struct mlx5_srq **cur_srq,
716                                 struct ibv_wc *wc, int cqe_ver)
717 {
718         struct mlx5_cqe64 *cqe64;
719         void *cqe;
720         int err;
721
722         err = mlx5_get_next_cqe(cq, &cqe64, &cqe);
723         if (err == CQ_EMPTY)
724                 return err;
725
726         return mlx5_parse_cqe(cq, cqe64, cqe, cur_rsc, cur_srq, wc, cqe_ver, 0);
727 }
728
729 static inline int poll_cq(struct ibv_cq *ibcq, int ne,
730                       struct ibv_wc *wc, int cqe_ver)
731                       ALWAYS_INLINE;
732 static inline int poll_cq(struct ibv_cq *ibcq, int ne,
733                       struct ibv_wc *wc, int cqe_ver)
734 {
735         struct mlx5_cq *cq = to_mcq(ibcq);
736         struct mlx5_resource *rsc = NULL;
737         struct mlx5_srq *srq = NULL;
738         int npolled;
739         int err = CQ_OK;
740
741         if (cq->stall_enable) {
742                 if (cq->stall_adaptive_enable) {
743                         if (cq->stall_last_count)
744                                 mlx5_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles);
745                 } else if (cq->stall_next_poll) {
746                         cq->stall_next_poll = 0;
747                         mlx5_stall_poll_cq();
748                 }
749         }
750
751         mlx5_spin_lock(&cq->lock);
752
753         for (npolled = 0; npolled < ne; ++npolled) {
754                 err = mlx5_poll_one(cq, &rsc, &srq, wc + npolled, cqe_ver);
755                 if (err != CQ_OK)
756                         break;
757         }
758
759         update_cons_index(cq);
760
761         mlx5_spin_unlock(&cq->lock);
762
763         if (cq->stall_enable) {
764                 if (cq->stall_adaptive_enable) {
765                         if (npolled == 0) {
766                                 cq->stall_cycles = max(cq->stall_cycles-mlx5_stall_cq_dec_step,
767                                                        mlx5_stall_cq_poll_min);
768                                 mlx5_get_cycles(&cq->stall_last_count);
769                         } else if (npolled < ne) {
770                                 cq->stall_cycles = min(cq->stall_cycles+mlx5_stall_cq_inc_step,
771                                                        mlx5_stall_cq_poll_max);
772                                 mlx5_get_cycles(&cq->stall_last_count);
773                         } else {
774                                 cq->stall_cycles = max(cq->stall_cycles-mlx5_stall_cq_dec_step,
775                                                        mlx5_stall_cq_poll_min);
776                                 cq->stall_last_count = 0;
777                         }
778                 } else if (err == CQ_EMPTY) {
779                         cq->stall_next_poll = 1;
780                 }
781         }
782
783         return err == CQ_POLL_ERR ? err : npolled;
784 }
785
786 enum  polling_mode {
787         POLLING_MODE_NO_STALL,
788         POLLING_MODE_STALL,
789         POLLING_MODE_STALL_ADAPTIVE
790 };
791
792 static inline void _mlx5_end_poll(struct ibv_cq_ex *ibcq,
793                                   int lock, enum polling_mode stall)
794                                   ALWAYS_INLINE;
795 static inline void _mlx5_end_poll(struct ibv_cq_ex *ibcq,
796                                   int lock, enum polling_mode stall)
797 {
798         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
799
800         update_cons_index(cq);
801
802         if (lock)
803                 mlx5_spin_unlock(&cq->lock);
804
805         if (stall) {
806                 if (stall == POLLING_MODE_STALL_ADAPTIVE) {
807                         if (!(cq->flags & MLX5_CQ_FLAGS_FOUND_CQES)) {
808                                 cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step,
809                                                        mlx5_stall_cq_poll_min);
810                                 mlx5_get_cycles(&cq->stall_last_count);
811                         } else if (cq->flags & MLX5_CQ_FLAGS_EMPTY_DURING_POLL) {
812                                 cq->stall_cycles = min(cq->stall_cycles + mlx5_stall_cq_inc_step,
813                                                        mlx5_stall_cq_poll_max);
814                                 mlx5_get_cycles(&cq->stall_last_count);
815                         } else {
816                                 cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step,
817                                                        mlx5_stall_cq_poll_min);
818                                 cq->stall_last_count = 0;
819                         }
820                 } else if (!(cq->flags & MLX5_CQ_FLAGS_FOUND_CQES)) {
821                         cq->stall_next_poll = 1;
822                 }
823
824                 cq->flags &= ~(MLX5_CQ_FLAGS_FOUND_CQES | MLX5_CQ_FLAGS_EMPTY_DURING_POLL);
825         }
826 }
827
828 static inline int mlx5_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr,
829                                   int lock, enum polling_mode stall, int cqe_version)
830                                   ALWAYS_INLINE;
831 static inline int mlx5_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr,
832                                   int lock, enum polling_mode stall, int cqe_version)
833 {
834         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
835         struct mlx5_cqe64 *cqe64;
836         void *cqe;
837         int err;
838
839         if (unlikely(attr->comp_mask))
840                 return EINVAL;
841
842         if (stall) {
843                 if (stall == POLLING_MODE_STALL_ADAPTIVE) {
844                         if (cq->stall_last_count)
845                                 mlx5_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles);
846                 } else if (cq->stall_next_poll) {
847                         cq->stall_next_poll = 0;
848                         mlx5_stall_poll_cq();
849                 }
850         }
851
852         if (lock)
853                 mlx5_spin_lock(&cq->lock);
854
855         cq->cur_rsc = NULL;
856         cq->cur_srq = NULL;
857
858         err = mlx5_get_next_cqe(cq, &cqe64, &cqe);
859         if (err == CQ_EMPTY) {
860                 if (lock)
861                         mlx5_spin_unlock(&cq->lock);
862
863                 if (stall) {
864                         if (stall == POLLING_MODE_STALL_ADAPTIVE) {
865                                 cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step,
866                                                 mlx5_stall_cq_poll_min);
867                                 mlx5_get_cycles(&cq->stall_last_count);
868                         } else {
869                                 cq->stall_next_poll = 1;
870                         }
871                 }
872
873                 return ENOENT;
874         }
875
876         if (stall)
877                 cq->flags |= MLX5_CQ_FLAGS_FOUND_CQES;
878
879         err = mlx5_parse_lazy_cqe(cq, cqe64, cqe, cqe_version);
880         if (lock && err)
881                 mlx5_spin_unlock(&cq->lock);
882
883         if (stall && err) {
884                 if (stall == POLLING_MODE_STALL_ADAPTIVE) {
885                         cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step,
886                                                 mlx5_stall_cq_poll_min);
887                         cq->stall_last_count = 0;
888                 }
889
890                 cq->flags &= ~(MLX5_CQ_FLAGS_FOUND_CQES);
891         }
892
893         return err;
894 }
895
896 static inline int mlx5_next_poll(struct ibv_cq_ex *ibcq,
897                                  enum polling_mode stall, int cqe_version)
898                                  ALWAYS_INLINE;
899 static inline int mlx5_next_poll(struct ibv_cq_ex *ibcq,
900                                  enum polling_mode stall,
901                                  int cqe_version)
902 {
903         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
904         struct mlx5_cqe64 *cqe64;
905         void *cqe;
906         int err;
907
908         err = mlx5_get_next_cqe(cq, &cqe64, &cqe);
909         if (err == CQ_EMPTY) {
910                 if (stall == POLLING_MODE_STALL_ADAPTIVE)
911                         cq->flags |= MLX5_CQ_FLAGS_EMPTY_DURING_POLL;
912
913                 return ENOENT;
914         }
915
916         return mlx5_parse_lazy_cqe(cq, cqe64, cqe, cqe_version);
917 }
918
919 static inline int mlx5_next_poll_adaptive_v0(struct ibv_cq_ex *ibcq)
920 {
921         return mlx5_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 0);
922 }
923
924 static inline int mlx5_next_poll_adaptive_v1(struct ibv_cq_ex *ibcq)
925 {
926         return mlx5_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 1);
927 }
928
929 static inline int mlx5_next_poll_v0(struct ibv_cq_ex *ibcq)
930 {
931         return mlx5_next_poll(ibcq, 0, 0);
932 }
933
934 static inline int mlx5_next_poll_v1(struct ibv_cq_ex *ibcq)
935 {
936         return mlx5_next_poll(ibcq, 0, 1);
937 }
938
939 static inline int mlx5_start_poll_v0(struct ibv_cq_ex *ibcq,
940                                      struct ibv_poll_cq_attr *attr)
941 {
942         return mlx5_start_poll(ibcq, attr, 0, 0, 0);
943 }
944
945 static inline int mlx5_start_poll_v1(struct ibv_cq_ex *ibcq,
946                                      struct ibv_poll_cq_attr *attr)
947 {
948         return mlx5_start_poll(ibcq, attr, 0, 0, 1);
949 }
950
951 static inline int mlx5_start_poll_v0_lock(struct ibv_cq_ex *ibcq,
952                                           struct ibv_poll_cq_attr *attr)
953 {
954         return mlx5_start_poll(ibcq, attr, 1, 0, 0);
955 }
956
957 static inline int mlx5_start_poll_v1_lock(struct ibv_cq_ex *ibcq,
958                                           struct ibv_poll_cq_attr *attr)
959 {
960         return mlx5_start_poll(ibcq, attr, 1, 0, 1);
961 }
962
963 static inline int mlx5_start_poll_adaptive_stall_v0_lock(struct ibv_cq_ex *ibcq,
964                                                          struct ibv_poll_cq_attr *attr)
965 {
966         return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0);
967 }
968
969 static inline int mlx5_start_poll_stall_v0_lock(struct ibv_cq_ex *ibcq,
970                                                 struct ibv_poll_cq_attr *attr)
971 {
972         return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0);
973 }
974
975 static inline int mlx5_start_poll_adaptive_stall_v1_lock(struct ibv_cq_ex *ibcq,
976                                                          struct ibv_poll_cq_attr *attr)
977 {
978         return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1);
979 }
980
981 static inline int mlx5_start_poll_stall_v1_lock(struct ibv_cq_ex *ibcq,
982                                                 struct ibv_poll_cq_attr *attr)
983 {
984         return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1);
985 }
986
987 static inline int mlx5_start_poll_stall_v0(struct ibv_cq_ex *ibcq,
988                                            struct ibv_poll_cq_attr *attr)
989 {
990         return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0);
991 }
992
993 static inline int mlx5_start_poll_adaptive_stall_v0(struct ibv_cq_ex *ibcq,
994                                                     struct ibv_poll_cq_attr *attr)
995 {
996         return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0);
997 }
998
999 static inline int mlx5_start_poll_adaptive_stall_v1(struct ibv_cq_ex *ibcq,
1000                                                     struct ibv_poll_cq_attr *attr)
1001 {
1002         return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1);
1003 }
1004
1005 static inline int mlx5_start_poll_stall_v1(struct ibv_cq_ex *ibcq,
1006                                            struct ibv_poll_cq_attr *attr)
1007 {
1008         return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1);
1009 }
1010
1011 static inline void mlx5_end_poll_adaptive_stall_lock(struct ibv_cq_ex *ibcq)
1012 {
1013         _mlx5_end_poll(ibcq, 1, POLLING_MODE_STALL_ADAPTIVE);
1014 }
1015
1016 static inline void mlx5_end_poll_stall_lock(struct ibv_cq_ex *ibcq)
1017 {
1018         _mlx5_end_poll(ibcq, 1, POLLING_MODE_STALL);
1019 }
1020
1021 static inline void mlx5_end_poll_adaptive_stall(struct ibv_cq_ex *ibcq)
1022 {
1023         _mlx5_end_poll(ibcq, 0, POLLING_MODE_STALL_ADAPTIVE);
1024 }
1025
1026 static inline void mlx5_end_poll_stall(struct ibv_cq_ex *ibcq)
1027 {
1028         _mlx5_end_poll(ibcq, 0, POLLING_MODE_STALL);
1029 }
1030
1031 static inline void mlx5_end_poll(struct ibv_cq_ex *ibcq)
1032 {
1033         _mlx5_end_poll(ibcq, 0, 0);
1034 }
1035
1036 static inline void mlx5_end_poll_lock(struct ibv_cq_ex *ibcq)
1037 {
1038         _mlx5_end_poll(ibcq, 1, 0);
1039 }
1040
1041 int mlx5_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
1042 {
1043         return poll_cq(ibcq, ne, wc, 0);
1044 }
1045
1046 int mlx5_poll_cq_v1(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
1047 {
1048         return poll_cq(ibcq, ne, wc, 1);
1049 }
1050
1051 static inline enum ibv_wc_opcode mlx5_cq_read_wc_opcode(struct ibv_cq_ex *ibcq)
1052 {
1053         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1054
1055         switch (mlx5dv_get_cqe_opcode(cq->cqe64)) {
1056         case MLX5_CQE_RESP_WR_IMM:
1057                 return IBV_WC_RECV_RDMA_WITH_IMM;
1058         case MLX5_CQE_RESP_SEND:
1059         case MLX5_CQE_RESP_SEND_IMM:
1060         case MLX5_CQE_RESP_SEND_INV:
1061                 return IBV_WC_RECV;
1062         case MLX5_CQE_REQ:
1063                 switch (be32toh(cq->cqe64->sop_drop_qpn) >> 24) {
1064                 case MLX5_OPCODE_RDMA_WRITE_IMM:
1065                 case MLX5_OPCODE_RDMA_WRITE:
1066                         return IBV_WC_RDMA_WRITE;
1067                 case MLX5_OPCODE_SEND_IMM:
1068                 case MLX5_OPCODE_SEND:
1069                 case MLX5_OPCODE_SEND_INVAL:
1070                         return IBV_WC_SEND;
1071                 case MLX5_OPCODE_RDMA_READ:
1072                         return IBV_WC_RDMA_READ;
1073                 case MLX5_OPCODE_ATOMIC_CS:
1074                         return IBV_WC_COMP_SWAP;
1075                 case MLX5_OPCODE_ATOMIC_FA:
1076                         return IBV_WC_FETCH_ADD;
1077                 case MLX5_OPCODE_UMR:
1078                         return cq->umr_opcode;
1079                 case MLX5_OPCODE_TSO:
1080                         return IBV_WC_TSO;
1081                 }
1082         }
1083
1084 #ifdef MLX5_DEBUG
1085 {
1086         struct mlx5_context *ctx = to_mctx(ibcq->context);
1087
1088         mlx5_dbg(ctx->dbg_fp, MLX5_DBG_CQ_CQE, "un-expected opcode in cqe\n");
1089 }
1090 #endif
1091         return 0;
1092 }
1093
1094 static inline uint32_t mlx5_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq)
1095 {
1096         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1097
1098         return be32toh(cq->cqe64->sop_drop_qpn) & 0xffffff;
1099 }
1100
1101 static inline int mlx5_cq_read_wc_flags(struct ibv_cq_ex *ibcq)
1102 {
1103         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1104         int wc_flags = 0;
1105
1106         if (cq->flags & MLX5_CQ_FLAGS_RX_CSUM_VALID)
1107                 wc_flags = (!!(cq->cqe64->hds_ip_ext & MLX5_CQE_L4_OK) &
1108                                  !!(cq->cqe64->hds_ip_ext & MLX5_CQE_L3_OK) &
1109                                  (get_cqe_l3_hdr_type(cq->cqe64) ==
1110                                   MLX5_CQE_L3_HDR_TYPE_IPV4)) <<
1111                                 IBV_WC_IP_CSUM_OK_SHIFT;
1112
1113         switch (mlx5dv_get_cqe_opcode(cq->cqe64)) {
1114         case MLX5_CQE_RESP_WR_IMM:
1115         case MLX5_CQE_RESP_SEND_IMM:
1116                 wc_flags        |= IBV_WC_WITH_IMM;
1117                 break;
1118         case MLX5_CQE_RESP_SEND_INV:
1119                 wc_flags |= IBV_WC_WITH_INV;
1120                 break;
1121         }
1122
1123         wc_flags |= ((be32toh(cq->cqe64->flags_rqpn) >> 28) & 3) ? IBV_WC_GRH : 0;
1124         return wc_flags;
1125 }
1126
1127 static inline uint32_t mlx5_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq)
1128 {
1129         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1130
1131         return be32toh(cq->cqe64->byte_cnt);
1132 }
1133
1134 static inline uint32_t mlx5_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq)
1135 {
1136         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1137         struct mlx5_err_cqe *ecqe = (struct mlx5_err_cqe *)cq->cqe64;
1138
1139         return ecqe->vendor_err_synd;
1140 }
1141
1142 static inline uint32_t mlx5_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq)
1143 {
1144         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1145
1146         switch (mlx5dv_get_cqe_opcode(cq->cqe64)) {
1147         case MLX5_CQE_RESP_SEND_INV:
1148                 return be32toh(cq->cqe64->imm_inval_pkey);
1149         default:
1150                 return cq->cqe64->imm_inval_pkey;
1151         }
1152 }
1153
1154 static inline uint32_t mlx5_cq_read_wc_slid(struct ibv_cq_ex *ibcq)
1155 {
1156         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1157
1158         return (uint32_t)be16toh(cq->cqe64->slid);
1159 }
1160
1161 static inline uint8_t mlx5_cq_read_wc_sl(struct ibv_cq_ex *ibcq)
1162 {
1163         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1164
1165         return (be32toh(cq->cqe64->flags_rqpn) >> 24) & 0xf;
1166 }
1167
1168 static inline uint32_t mlx5_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq)
1169 {
1170         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1171
1172         return be32toh(cq->cqe64->flags_rqpn) & 0xffffff;
1173 }
1174
1175 static inline uint8_t mlx5_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq)
1176 {
1177         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1178
1179         return cq->cqe64->ml_path & 0x7f;
1180 }
1181
1182 static inline uint64_t mlx5_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq)
1183 {
1184         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1185
1186         return be64toh(cq->cqe64->timestamp);
1187 }
1188
1189 static inline uint16_t mlx5_cq_read_wc_cvlan(struct ibv_cq_ex *ibcq)
1190 {
1191         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1192
1193         return be16toh(cq->cqe64->vlan_info);
1194 }
1195
1196 static inline uint32_t mlx5_cq_read_flow_tag(struct ibv_cq_ex *ibcq)
1197 {
1198         struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1199
1200         return be32toh(cq->cqe64->sop_drop_qpn) & MLX5_FLOW_TAG_MASK;
1201 }
1202
1203 #define BIT(i) (1UL << (i))
1204
1205 #define SINGLE_THREADED BIT(0)
1206 #define STALL BIT(1)
1207 #define V1 BIT(2)
1208 #define ADAPTIVE BIT(3)
1209
1210 #define mlx5_start_poll_name(cqe_ver, lock, stall, adaptive) \
1211         mlx5_start_poll##adaptive##stall##cqe_ver##lock
1212 #define mlx5_next_poll_name(cqe_ver, adaptive) \
1213         mlx5_next_poll##adaptive##cqe_ver
1214 #define mlx5_end_poll_name(lock, stall, adaptive) \
1215         mlx5_end_poll##adaptive##stall##lock
1216
1217 #define POLL_FN_ENTRY(cqe_ver, lock, stall, adaptive) { \
1218                 .start_poll = &mlx5_start_poll_name(cqe_ver, lock, stall, adaptive), \
1219                 .next_poll = &mlx5_next_poll_name(cqe_ver, adaptive), \
1220                 .end_poll = &mlx5_end_poll_name(lock, stall, adaptive), \
1221         }
1222
1223 static const struct op
1224 {
1225         int (*start_poll)(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr);
1226         int (*next_poll)(struct ibv_cq_ex *ibcq);
1227         void (*end_poll)(struct ibv_cq_ex *ibcq);
1228 } ops[ADAPTIVE + V1 + STALL + SINGLE_THREADED + 1] = {
1229         [V1] =  POLL_FN_ENTRY(_v1, _lock, , ),
1230         [0] =  POLL_FN_ENTRY(_v0, _lock, , ),
1231         [V1 | SINGLE_THREADED] =  POLL_FN_ENTRY(_v1, , , ),
1232         [SINGLE_THREADED] =  POLL_FN_ENTRY(_v0, , , ),
1233         [V1 | STALL] =  POLL_FN_ENTRY(_v1, _lock, _stall, ),
1234         [STALL] =  POLL_FN_ENTRY(_v0, _lock, _stall, ),
1235         [V1 | SINGLE_THREADED | STALL] =  POLL_FN_ENTRY(_v1, , _stall, ),
1236         [SINGLE_THREADED | STALL] =  POLL_FN_ENTRY(_v0, , _stall, ),
1237         [V1 | STALL | ADAPTIVE] =  POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive),
1238         [STALL | ADAPTIVE] =  POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive),
1239         [V1 | SINGLE_THREADED | STALL | ADAPTIVE] =  POLL_FN_ENTRY(_v1, , _stall, _adaptive),
1240         [SINGLE_THREADED | STALL | ADAPTIVE] =  POLL_FN_ENTRY(_v0, , _stall, _adaptive),
1241 };
1242
1243 void mlx5_cq_fill_pfns(struct mlx5_cq *cq, const struct ibv_cq_init_attr_ex *cq_attr)
1244 {
1245         struct mlx5_context *mctx = to_mctx(ibv_cq_ex_to_cq(&cq->ibv_cq)->context);
1246         const struct op *poll_ops = &ops[((cq->stall_enable && cq->stall_adaptive_enable) ? ADAPTIVE : 0) |
1247                                          (mctx->cqe_version ? V1 : 0) |
1248                                          (cq->flags & MLX5_CQ_FLAGS_SINGLE_THREADED ?
1249                                                       SINGLE_THREADED : 0) |
1250                                          (cq->stall_enable ? STALL : 0)];
1251
1252         cq->ibv_cq.start_poll = poll_ops->start_poll;
1253         cq->ibv_cq.next_poll = poll_ops->next_poll;
1254         cq->ibv_cq.end_poll = poll_ops->end_poll;
1255
1256         cq->ibv_cq.read_opcode = mlx5_cq_read_wc_opcode;
1257         cq->ibv_cq.read_vendor_err = mlx5_cq_read_wc_vendor_err;
1258         cq->ibv_cq.read_wc_flags = mlx5_cq_read_wc_flags;
1259         if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN)
1260                 cq->ibv_cq.read_byte_len = mlx5_cq_read_wc_byte_len;
1261         if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM)
1262                 cq->ibv_cq.read_imm_data = mlx5_cq_read_wc_imm_data;
1263         if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM)
1264                 cq->ibv_cq.read_qp_num = mlx5_cq_read_wc_qp_num;
1265         if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP)
1266                 cq->ibv_cq.read_src_qp = mlx5_cq_read_wc_src_qp;
1267         if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID)
1268                 cq->ibv_cq.read_slid = mlx5_cq_read_wc_slid;
1269         if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL)
1270                 cq->ibv_cq.read_sl = mlx5_cq_read_wc_sl;
1271         if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
1272                 cq->ibv_cq.read_dlid_path_bits = mlx5_cq_read_wc_dlid_path_bits;
1273         if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP)
1274                 cq->ibv_cq.read_completion_ts = mlx5_cq_read_wc_completion_ts;
1275         if (cq_attr->wc_flags & IBV_WC_EX_WITH_CVLAN)
1276                 cq->ibv_cq.read_cvlan = mlx5_cq_read_wc_cvlan;
1277         if (cq_attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG)
1278                 cq->ibv_cq.read_flow_tag = mlx5_cq_read_flow_tag;
1279 }
1280
1281 int mlx5_arm_cq(struct ibv_cq *ibvcq, int solicited)
1282 {
1283         struct mlx5_cq *cq = to_mcq(ibvcq);
1284         struct mlx5_context *ctx = to_mctx(ibvcq->context);
1285         uint32_t doorbell[2];
1286         uint32_t sn;
1287         uint32_t ci;
1288         uint32_t cmd;
1289
1290         sn  = cq->arm_sn & 3;
1291         ci  = cq->cons_index & 0xffffff;
1292         cmd = solicited ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT;
1293
1294         cq->dbrec[MLX5_CQ_ARM_DB] = htobe32(sn << 28 | cmd | ci);
1295
1296         /*
1297          * Make sure that the doorbell record in host memory is
1298          * written before ringing the doorbell via PCI WC MMIO.
1299          */
1300         mmio_wc_start();
1301
1302         doorbell[0] = htobe32(sn << 28 | cmd | ci);
1303         doorbell[1] = htobe32(cq->cqn);
1304
1305         mlx5_write64(doorbell, ctx->uar[0] + MLX5_CQ_DOORBELL, &ctx->lock32);
1306
1307         mmio_flush_writes();
1308
1309         return 0;
1310 }
1311
1312 void mlx5_cq_event(struct ibv_cq *cq)
1313 {
1314         to_mcq(cq)->arm_sn++;
1315 }
1316
1317 static int is_equal_rsn(struct mlx5_cqe64 *cqe64, uint32_t rsn)
1318 {
1319         return rsn == (be32toh(cqe64->sop_drop_qpn) & 0xffffff);
1320 }
1321
1322 static inline int is_equal_uidx(struct mlx5_cqe64 *cqe64, uint32_t uidx)
1323 {
1324         return uidx == (be32toh(cqe64->srqn_uidx) & 0xffffff);
1325 }
1326
1327 static inline int is_responder(uint8_t opcode)
1328 {
1329         switch (opcode) {
1330         case MLX5_CQE_RESP_WR_IMM:
1331         case MLX5_CQE_RESP_SEND:
1332         case MLX5_CQE_RESP_SEND_IMM:
1333         case MLX5_CQE_RESP_SEND_INV:
1334         case MLX5_CQE_RESP_ERR:
1335                 return 1;
1336         }
1337
1338         return 0;
1339 }
1340
1341 static inline int free_res_cqe(struct mlx5_cqe64 *cqe64, uint32_t rsn,
1342                                struct mlx5_srq *srq, int cqe_version)
1343 {
1344         if (cqe_version) {
1345                 if (is_equal_uidx(cqe64, rsn)) {
1346                         if (srq && is_responder(mlx5dv_get_cqe_opcode(cqe64)))
1347                                 mlx5_free_srq_wqe(srq,
1348                                                   be16toh(cqe64->wqe_counter));
1349                         return 1;
1350                 }
1351         } else {
1352                 if (is_equal_rsn(cqe64, rsn)) {
1353                         if (srq && (be32toh(cqe64->srqn_uidx) & 0xffffff))
1354                                 mlx5_free_srq_wqe(srq,
1355                                                   be16toh(cqe64->wqe_counter));
1356                         return 1;
1357                 }
1358         }
1359
1360         return 0;
1361 }
1362
1363 void __mlx5_cq_clean(struct mlx5_cq *cq, uint32_t rsn, struct mlx5_srq *srq)
1364 {
1365         uint32_t prod_index;
1366         int nfreed = 0;
1367         struct mlx5_cqe64 *cqe64, *dest64;
1368         void *cqe, *dest;
1369         uint8_t owner_bit;
1370         int cqe_version;
1371
1372         if (!cq || cq->flags & MLX5_CQ_FLAGS_DV_OWNED)
1373                 return;
1374
1375         /*
1376          * First we need to find the current producer index, so we
1377          * know where to start cleaning from.  It doesn't matter if HW
1378          * adds new entries after this loop -- the QP we're worried
1379          * about is already in RESET, so the new entries won't come
1380          * from our QP and therefore don't need to be checked.
1381          */
1382         for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
1383                 if (prod_index == cq->cons_index + cq->ibv_cq.cqe)
1384                         break;
1385
1386         /*
1387          * Now sweep backwards through the CQ, removing CQ entries
1388          * that match our QP by copying older entries on top of them.
1389          */
1390         cqe_version = (to_mctx(cq->ibv_cq.context))->cqe_version;
1391         while ((int) --prod_index - (int) cq->cons_index >= 0) {
1392                 cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
1393                 cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
1394                 if (free_res_cqe(cqe64, rsn, srq, cqe_version)) {
1395                         ++nfreed;
1396                 } else if (nfreed) {
1397                         dest = get_cqe(cq, (prod_index + nfreed) & cq->ibv_cq.cqe);
1398                         dest64 = (cq->cqe_sz == 64) ? dest : dest + 64;
1399                         owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
1400                         memcpy(dest, cqe, cq->cqe_sz);
1401                         dest64->op_own = owner_bit |
1402                                 (dest64->op_own & ~MLX5_CQE_OWNER_MASK);
1403                 }
1404         }
1405
1406         if (nfreed) {
1407                 cq->cons_index += nfreed;
1408                 /*
1409                  * Make sure update of buffer contents is done before
1410                  * updating consumer index.
1411                  */
1412                 udma_to_device_barrier();
1413                 update_cons_index(cq);
1414         }
1415 }
1416
1417 void mlx5_cq_clean(struct mlx5_cq *cq, uint32_t qpn, struct mlx5_srq *srq)
1418 {
1419         mlx5_spin_lock(&cq->lock);
1420         __mlx5_cq_clean(cq, qpn, srq);
1421         mlx5_spin_unlock(&cq->lock);
1422 }
1423
1424 static uint8_t sw_ownership_bit(int n, int nent)
1425 {
1426         return (n & nent) ? 1 : 0;
1427 }
1428
1429 static int is_hw(uint8_t own, int n, int mask)
1430 {
1431         return (own & MLX5_CQE_OWNER_MASK) ^ !!(n & (mask + 1));
1432 }
1433
1434 void mlx5_cq_resize_copy_cqes(struct mlx5_cq *cq)
1435 {
1436         struct mlx5_cqe64 *scqe64;
1437         struct mlx5_cqe64 *dcqe64;
1438         void *start_cqe;
1439         void *scqe;
1440         void *dcqe;
1441         int ssize;
1442         int dsize;
1443         int i;
1444         uint8_t sw_own;
1445
1446         ssize = cq->cqe_sz;
1447         dsize = cq->resize_cqe_sz;
1448
1449         i = cq->cons_index;
1450         scqe = get_buf_cqe(cq->active_buf, i & cq->active_cqes, ssize);
1451         scqe64 = ssize == 64 ? scqe : scqe + 64;
1452         start_cqe = scqe;
1453         if (is_hw(scqe64->op_own, i, cq->active_cqes)) {
1454                 fprintf(stderr, "expected cqe in sw ownership\n");
1455                 return;
1456         }
1457
1458         while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
1459                 dcqe = get_buf_cqe(cq->resize_buf, (i + 1) & (cq->resize_cqes - 1), dsize);
1460                 dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
1461                 sw_own = sw_ownership_bit(i + 1, cq->resize_cqes);
1462                 memcpy(dcqe, scqe, ssize);
1463                 dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
1464
1465                 ++i;
1466                 scqe = get_buf_cqe(cq->active_buf, i & cq->active_cqes, ssize);
1467                 scqe64 = ssize == 64 ? scqe : scqe + 64;
1468                 if (is_hw(scqe64->op_own, i, cq->active_cqes)) {
1469                         fprintf(stderr, "expected cqe in sw ownership\n");
1470                         return;
1471                 }
1472
1473                 if (scqe == start_cqe) {
1474                         fprintf(stderr, "resize CQ failed to get resize CQE\n");
1475                         return;
1476                 }
1477         }
1478         ++cq->cons_index;
1479 }
1480
1481 int mlx5_alloc_cq_buf(struct mlx5_context *mctx, struct mlx5_cq *cq,
1482                       struct mlx5_buf *buf, int nent, int cqe_sz)
1483 {
1484         struct mlx5_cqe64 *cqe;
1485         int i;
1486         struct mlx5_device *dev = to_mdev(mctx->ibv_ctx.device);
1487         int ret;
1488         enum mlx5_alloc_type type;
1489         enum mlx5_alloc_type default_type = MLX5_ALLOC_TYPE_ANON;
1490
1491         if (mlx5_use_huge("HUGE_CQ"))
1492                 default_type = MLX5_ALLOC_TYPE_HUGE;
1493
1494         mlx5_get_alloc_type(MLX5_CQ_PREFIX, &type, default_type);
1495
1496         ret = mlx5_alloc_prefered_buf(mctx, buf,
1497                                       align(nent * cqe_sz, dev->page_size),
1498                                       dev->page_size,
1499                                       type,
1500                                       MLX5_CQ_PREFIX);
1501
1502         if (ret)
1503                 return -1;
1504
1505         memset(buf->buf, 0, nent * cqe_sz);
1506
1507         for (i = 0; i < nent; ++i) {
1508                 cqe = buf->buf + i * cqe_sz;
1509                 cqe += cqe_sz == 128 ? 1 : 0;
1510                 cqe->op_own = MLX5_CQE_INVALID << 4;
1511         }
1512
1513         return 0;
1514 }
1515
1516 int mlx5_free_cq_buf(struct mlx5_context *ctx, struct mlx5_buf *buf)
1517 {
1518         return mlx5_free_actual_buf(ctx, buf);
1519 }