]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c
MFC r322810 and r322830:
[FreeBSD/stable/10.git] / sys / dev / mlx5 / mlx5_ib / mlx5_ib_qp.c
1 /*-
2  * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27
28 #include <linux/module.h>
29 #include <rdma/ib_cache.h>
30 #include <rdma/ib_umem.h>
31 #include "mlx5_ib.h"
32 #include "user.h"
33 #include <dev/mlx5/mlx5_core/transobj.h>
34 #include <sys/priv.h>
35
36 #define IPV6_DEFAULT_HOPLIMIT 64
37
38
39 static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
40                                const struct ib_qp_attr *attr, int attr_mask,
41                                enum ib_qp_state cur_state, enum ib_qp_state new_state);
42
43 /* not supported currently */
44 static int workqueue_signature;
45
46 enum {
47         MLX5_IB_ACK_REQ_FREQ    = 8,
48 };
49
50 enum {
51         MLX5_IB_DEFAULT_SCHED_QUEUE     = 0x83,
52         MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
53         MLX5_IB_LINK_TYPE_IB            = 0,
54         MLX5_IB_LINK_TYPE_ETH           = 1
55 };
56
57 enum {
58         MLX5_IB_SQ_STRIDE       = 6,
59         MLX5_IB_CACHE_LINE_SIZE = 64,
60 };
61
62 enum {
63         MLX5_RQ_NUM_STATE       = MLX5_RQC_STATE_ERR + 1,
64         MLX5_SQ_NUM_STATE       = MLX5_SQC_STATE_ERR + 1,
65         MLX5_QP_STATE           = MLX5_QP_NUM_STATE + 1,
66         MLX5_QP_STATE_BAD       = MLX5_QP_STATE + 1,
67 };
68
69 static const u32 mlx5_ib_opcode[] = {
70         [IB_WR_SEND]                            = MLX5_OPCODE_SEND,
71         [IB_WR_SEND_WITH_IMM]                   = MLX5_OPCODE_SEND_IMM,
72         [IB_WR_RDMA_WRITE]                      = MLX5_OPCODE_RDMA_WRITE,
73         [IB_WR_RDMA_WRITE_WITH_IMM]             = MLX5_OPCODE_RDMA_WRITE_IMM,
74         [IB_WR_RDMA_READ]                       = MLX5_OPCODE_RDMA_READ,
75         [IB_WR_ATOMIC_CMP_AND_SWP]              = MLX5_OPCODE_ATOMIC_CS,
76         [IB_WR_ATOMIC_FETCH_AND_ADD]            = MLX5_OPCODE_ATOMIC_FA,
77         [IB_WR_SEND_WITH_INV]                   = MLX5_OPCODE_SEND_INVAL,
78         [IB_WR_LOCAL_INV]                       = MLX5_OPCODE_UMR,
79         [IB_WR_FAST_REG_MR]                     = MLX5_OPCODE_UMR,
80         [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = MLX5_OPCODE_ATOMIC_MASKED_CS,
81         [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = MLX5_OPCODE_ATOMIC_MASKED_FA,
82 };
83
84 struct umr_wr {
85         u64                             virt_addr;
86         struct ib_pd                   *pd;
87         unsigned int                    page_shift;
88         unsigned int                    npages;
89         u32                             length;
90         int                             access_flags;
91         u32                             mkey;
92 };
93
94 static int is_qp0(enum ib_qp_type qp_type)
95 {
96         return qp_type == IB_QPT_SMI;
97 }
98
99 static int is_qp1(enum ib_qp_type qp_type)
100 {
101         return qp_type == IB_QPT_GSI;
102 }
103
104 static int is_sqp(enum ib_qp_type qp_type)
105 {
106         return is_qp0(qp_type) || is_qp1(qp_type);
107 }
108
109 static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
110 {
111         return mlx5_buf_offset(&qp->buf, offset);
112 }
113
114 static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
115 {
116         return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
117 }
118
119 void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
120 {
121         return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
122 }
123
124
125 static int
126 query_wqe_idx(struct mlx5_ib_qp *qp)
127 {
128         struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
129         struct mlx5_query_qp_mbox_out *outb;
130         struct mlx5_qp_context *context;
131         int ret;
132
133         outb = kzalloc(sizeof(*outb), GFP_KERNEL);
134         if (!outb)
135                 return -ENOMEM;
136
137         context = &outb->ctx;
138
139         mutex_lock(&qp->mutex);
140         ret = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb));
141         if (ret)
142                 goto out_free;
143
144         ret = be16_to_cpu(context->hw_sq_wqe_counter) & (qp->sq.wqe_cnt - 1);
145
146 out_free:
147         mutex_unlock(&qp->mutex);
148         kfree(outb);
149
150         return ret;
151 }
152
153 static int mlx5_handle_sig_pipelining(struct mlx5_ib_qp *qp)
154 {
155         int wqe_idx;
156
157         wqe_idx = query_wqe_idx(qp);
158         if (wqe_idx < 0) {
159                 printf("mlx5_ib: ERR: ""Failed to query QP 0x%x wqe index\n", qp->mqp.qpn);
160                 return wqe_idx;
161         }
162
163         if (qp->sq.swr_ctx[wqe_idx].sig_piped) {
164                 struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
165                 struct mlx5_wqe_ctrl_seg *cwqe;
166
167                 cwqe = mlx5_get_send_wqe(qp, wqe_idx);
168                 cwqe->opmod_idx_opcode = cpu_to_be32(be32_to_cpu(cwqe->opmod_idx_opcode) & 0xffffff00);
169                 qp->sq.swr_ctx[wqe_idx].w_list.opcode |= MLX5_OPCODE_SIGNATURE_CANCELED;
170                 mlx5_ib_dbg(dev, "Cancel QP 0x%x wqe_index 0x%x\n",
171                             qp->mqp.qpn, wqe_idx);
172         }
173
174         return 0;
175 }
176
177 static void mlx5_ib_sqd_work(struct work_struct *work)
178 {
179         struct mlx5_ib_sqd *sqd;
180         struct mlx5_ib_qp *qp;
181         struct ib_qp_attr qp_attr;
182
183         sqd = container_of(work, struct mlx5_ib_sqd, work);
184         qp = sqd->qp;
185
186         if (mlx5_handle_sig_pipelining(qp))
187                 goto out;
188
189         mutex_lock(&qp->mutex);
190         if (__mlx5_ib_modify_qp(&qp->ibqp, &qp_attr, 0, IB_QPS_SQD, IB_QPS_RTS))
191                 printf("mlx5_ib: ERR: ""Failed to resume QP 0x%x\n", qp->mqp.qpn);
192         mutex_unlock(&qp->mutex);
193 out:
194         kfree(sqd);
195 }
196
197 static void mlx5_ib_sigerr_sqd_event(struct mlx5_ib_qp *qp)
198 {
199         struct mlx5_ib_sqd *sqd;
200
201         sqd = kzalloc(sizeof(*sqd), GFP_ATOMIC);
202         if (!sqd)
203                 return;
204
205         sqd->qp = qp;
206         INIT_WORK(&sqd->work, mlx5_ib_sqd_work);
207         queue_work(mlx5_ib_wq, &sqd->work);
208 }
209
210 static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
211 {
212         struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
213         struct ib_event event;
214
215         if (type == MLX5_EVENT_TYPE_SQ_DRAINED &&
216             to_mibqp(qp)->state != IB_QPS_SQD) {
217                 mlx5_ib_sigerr_sqd_event(to_mibqp(qp));
218                 return;
219         }
220
221         if (type == MLX5_EVENT_TYPE_PATH_MIG)
222                 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
223
224         if (ibqp->event_handler) {
225                 event.device     = ibqp->device;
226                 event.element.qp = ibqp;
227                 switch (type) {
228                 case MLX5_EVENT_TYPE_PATH_MIG:
229                         event.event = IB_EVENT_PATH_MIG;
230                         break;
231                 case MLX5_EVENT_TYPE_COMM_EST:
232                         event.event = IB_EVENT_COMM_EST;
233                         break;
234                 case MLX5_EVENT_TYPE_SQ_DRAINED:
235                         event.event = IB_EVENT_SQ_DRAINED;
236                         break;
237                 case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
238                         event.event = IB_EVENT_QP_LAST_WQE_REACHED;
239                         break;
240                 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
241                         event.event = IB_EVENT_QP_FATAL;
242                         break;
243                 case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
244                         event.event = IB_EVENT_PATH_MIG_ERR;
245                         break;
246                 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
247                         event.event = IB_EVENT_QP_REQ_ERR;
248                         break;
249                 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
250                         event.event = IB_EVENT_QP_ACCESS_ERR;
251                         break;
252                 default:
253                         printf("mlx5_ib: WARN: ""mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
254                         return;
255                 }
256
257                 ibqp->event_handler(&event, ibqp->qp_context);
258         }
259 }
260
261 static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
262                        int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
263 {
264         int wqe_size;
265         int wq_size;
266
267         /* Sanity check RQ size before proceeding */
268         if (cap->max_recv_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)))
269                 return -EINVAL;
270
271         if (!has_rq) {
272                 qp->rq.max_gs = 0;
273                 qp->rq.wqe_cnt = 0;
274                 qp->rq.wqe_shift = 0;
275                 cap->max_recv_wr = 0;
276                 cap->max_recv_sge = 0;
277         } else {
278                 if (ucmd) {
279                         qp->rq.wqe_cnt = ucmd->rq_wqe_count;
280                         qp->rq.wqe_shift = ucmd->rq_wqe_shift;
281                         qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
282                         qp->rq.max_post = qp->rq.wqe_cnt;
283                 } else {
284                         wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0;
285                         wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
286                         wqe_size = roundup_pow_of_two(wqe_size);
287                         wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
288                         wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
289                         qp->rq.wqe_cnt = wq_size / wqe_size;
290                         if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq)) {
291                                 mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
292                                             wqe_size,
293                                             MLX5_CAP_GEN(dev->mdev,
294                                                          max_wqe_sz_rq));
295                                 return -EINVAL;
296                         }
297                         qp->rq.wqe_shift = ilog2(wqe_size);
298                         qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
299                         qp->rq.max_post = qp->rq.wqe_cnt;
300                 }
301         }
302
303         return 0;
304 }
305
306 static int sq_overhead(enum ib_qp_type qp_type)
307 {
308         int size = 0;
309
310         switch (qp_type) {
311         case IB_QPT_XRC_INI:
312                 size += sizeof(struct mlx5_wqe_xrc_seg);
313                 /* fall through */
314         case IB_QPT_RC:
315                 size += sizeof(struct mlx5_wqe_ctrl_seg) +
316                         sizeof(struct mlx5_wqe_atomic_seg) +
317                         sizeof(struct mlx5_wqe_raddr_seg) +
318                         sizeof(struct mlx5_wqe_umr_ctrl_seg) +
319                         sizeof(struct mlx5_mkey_seg);
320                 break;
321
322         case IB_QPT_XRC_TGT:
323                 return 0;
324
325         case IB_QPT_UC:
326                 size += sizeof(struct mlx5_wqe_ctrl_seg) +
327                         sizeof(struct mlx5_wqe_raddr_seg) +
328                         sizeof(struct mlx5_wqe_umr_ctrl_seg) +
329                         sizeof(struct mlx5_mkey_seg);
330                 break;
331
332         case IB_QPT_UD:
333         case IB_QPT_SMI:
334         case IB_QPT_GSI:
335                 size += sizeof(struct mlx5_wqe_ctrl_seg) +
336                         sizeof(struct mlx5_wqe_datagram_seg);
337                 break;
338
339         default:
340                 return -EINVAL;
341         }
342
343         return size;
344 }
345
346 static int calc_send_wqe(struct ib_qp_init_attr *attr)
347 {
348         int inl_size = 0;
349         int size;
350
351         size = sq_overhead(attr->qp_type);
352         if (size < 0)
353                 return size;
354
355         if (attr->cap.max_inline_data) {
356                 inl_size = size + sizeof(struct mlx5_wqe_inline_seg) +
357                         attr->cap.max_inline_data;
358         }
359
360         size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
361         return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
362 }
363
364 static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size)
365 {
366         int max_sge;
367
368         if (attr->qp_type == IB_QPT_RC)
369                 max_sge = (min_t(int, wqe_size, 512) -
370                            sizeof(struct mlx5_wqe_ctrl_seg) -
371                            sizeof(struct mlx5_wqe_raddr_seg)) /
372                         sizeof(struct mlx5_wqe_data_seg);
373         else if (attr->qp_type == IB_QPT_XRC_INI)
374                 max_sge = (min_t(int, wqe_size, 512) -
375                            sizeof(struct mlx5_wqe_ctrl_seg) -
376                            sizeof(struct mlx5_wqe_xrc_seg) -
377                            sizeof(struct mlx5_wqe_raddr_seg)) /
378                         sizeof(struct mlx5_wqe_data_seg);
379         else
380                 max_sge = (wqe_size - sq_overhead(attr->qp_type)) /
381                         sizeof(struct mlx5_wqe_data_seg);
382
383         return min_t(int, max_sge, wqe_size - sq_overhead(attr->qp_type) /
384                      sizeof(struct mlx5_wqe_data_seg));
385 }
386
387 static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
388                         struct mlx5_ib_qp *qp)
389 {
390         int wqe_size;
391         int wq_size;
392
393         if (!attr->cap.max_send_wr)
394                 return 0;
395
396         wqe_size = calc_send_wqe(attr);
397         mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size);
398         if (wqe_size < 0)
399                 return wqe_size;
400
401         if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) {
402                 mlx5_ib_warn(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
403                              wqe_size, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq));
404                 return -EINVAL;
405         }
406
407         qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
408                 sizeof(struct mlx5_wqe_inline_seg);
409         attr->cap.max_inline_data = qp->max_inline_data;
410
411         wq_size = roundup_pow_of_two(attr->cap.max_send_wr * (u64)wqe_size);
412         qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
413         if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
414                 mlx5_ib_warn(dev, "wqe count(%d) exceeds limits(%d)\n",
415                              qp->sq.wqe_cnt,
416                              1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
417                 return -ENOMEM;
418         }
419         qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
420         qp->sq.max_gs = get_send_sge(attr, wqe_size);
421         if (qp->sq.max_gs < attr->cap.max_send_sge) {
422                 mlx5_ib_warn(dev, "max sge(%d) exceeds limits(%d)\n",
423                              qp->sq.max_gs, attr->cap.max_send_sge);
424                 return -ENOMEM;
425         }
426
427         attr->cap.max_send_sge = qp->sq.max_gs;
428         qp->sq.max_post = wq_size / wqe_size;
429         attr->cap.max_send_wr = qp->sq.max_post;
430
431         return wq_size;
432 }
433
434 static int set_user_buf_size(struct mlx5_ib_dev *dev,
435                             struct mlx5_ib_qp *qp,
436                             struct mlx5_ib_create_qp *ucmd,
437                             struct ib_qp_init_attr *attr)
438 {
439         int desc_sz = 1 << qp->sq.wqe_shift;
440
441         if (desc_sz > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) {
442                 mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
443                              desc_sz, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq));
444                 return -EINVAL;
445         }
446
447         if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) {
448                 mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n",
449                              ucmd->sq_wqe_count, ucmd->sq_wqe_count);
450                 return -EINVAL;
451         }
452
453         qp->sq.wqe_cnt = ucmd->sq_wqe_count;
454
455         if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
456                 mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
457                              qp->sq.wqe_cnt,
458                              1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
459                 return -EINVAL;
460         }
461
462
463         if (attr->qp_type == IB_QPT_RAW_PACKET) {
464                 qp->buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
465                 qp->sq_buf_size = qp->sq.wqe_cnt << 6;
466         } else {
467                 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
468                         (qp->sq.wqe_cnt << 6);
469                 qp->sq_buf_size = 0;
470         }
471
472         return 0;
473 }
474
475 static int qp_has_rq(struct ib_qp_init_attr *attr)
476 {
477         if (attr->qp_type == IB_QPT_XRC_INI ||
478             attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
479             !attr->cap.max_recv_wr)
480                 return 0;
481
482         return 1;
483 }
484
485 static int first_med_uuar(void)
486 {
487         return 1;
488 }
489
490 static int next_uuar(int n)
491 {
492         n++;
493
494         while (((n % 4) & 2))
495                 n++;
496
497         return n;
498 }
499
500 static int num_med_uuar(struct mlx5_uuar_info *uuari)
501 {
502         int n;
503
504         n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
505                 uuari->num_low_latency_uuars - 1;
506
507         return n >= 0 ? n : 0;
508 }
509
510 static int max_uuari(struct mlx5_uuar_info *uuari)
511 {
512         return uuari->num_uars * 4;
513 }
514
515 static int first_hi_uuar(struct mlx5_uuar_info *uuari)
516 {
517         int med;
518         int i;
519         int t;
520
521         med = num_med_uuar(uuari);
522         for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
523                 t++;
524                 if (t == med)
525                         return next_uuar(i);
526         }
527
528         return 0;
529 }
530
531 static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
532 {
533         int i;
534
535         for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
536                 if (!test_bit(i, uuari->bitmap)) {
537                         set_bit(i, uuari->bitmap);
538                         uuari->count[i]++;
539                         return i;
540                 }
541         }
542
543         return -ENOMEM;
544 }
545
546 static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
547 {
548         int minidx = first_med_uuar();
549         int i;
550
551         for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
552                 if (uuari->count[i] < uuari->count[minidx])
553                         minidx = i;
554         }
555
556         uuari->count[minidx]++;
557
558         return minidx;
559 }
560
561 static int alloc_uuar(struct mlx5_uuar_info *uuari,
562                       enum mlx5_ib_latency_class lat)
563 {
564         int uuarn = -EINVAL;
565
566         mutex_lock(&uuari->lock);
567         switch (lat) {
568         case MLX5_IB_LATENCY_CLASS_LOW:
569                 uuarn = 0;
570                 uuari->count[uuarn]++;
571                 break;
572
573         case MLX5_IB_LATENCY_CLASS_MEDIUM:
574                 if (uuari->ver < 2)
575                         uuarn = -ENOMEM;
576                 else
577                         uuarn = alloc_med_class_uuar(uuari);
578                 break;
579
580         case MLX5_IB_LATENCY_CLASS_HIGH:
581                 if (uuari->ver < 2)
582                         uuarn = -ENOMEM;
583                 else
584                         uuarn = alloc_high_class_uuar(uuari);
585                 break;
586
587         case MLX5_IB_LATENCY_CLASS_FAST_PATH:
588                 uuarn = 2;
589                 break;
590         }
591         mutex_unlock(&uuari->lock);
592
593         return uuarn;
594 }
595
596 static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
597 {
598         clear_bit(uuarn, uuari->bitmap);
599         --uuari->count[uuarn];
600 }
601
602 static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
603 {
604         clear_bit(uuarn, uuari->bitmap);
605         --uuari->count[uuarn];
606 }
607
608 static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn)
609 {
610         int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
611         int high_uuar = nuuars - uuari->num_low_latency_uuars;
612
613         mutex_lock(&uuari->lock);
614         if (uuarn == 0) {
615                 --uuari->count[uuarn];
616                 goto out;
617         }
618
619         if (uuarn < high_uuar) {
620                 free_med_class_uuar(uuari, uuarn);
621                 goto out;
622         }
623
624         free_high_class_uuar(uuari, uuarn);
625
626 out:
627         mutex_unlock(&uuari->lock);
628 }
629
630 static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
631 {
632         switch (state) {
633         case IB_QPS_RESET:      return MLX5_QP_STATE_RST;
634         case IB_QPS_INIT:       return MLX5_QP_STATE_INIT;
635         case IB_QPS_RTR:        return MLX5_QP_STATE_RTR;
636         case IB_QPS_RTS:        return MLX5_QP_STATE_RTS;
637         case IB_QPS_SQD:        return MLX5_QP_STATE_SQD;
638         case IB_QPS_SQE:        return MLX5_QP_STATE_SQER;
639         case IB_QPS_ERR:        return MLX5_QP_STATE_ERR;
640         default:                return -1;
641         }
642 }
643
644 static int to_mlx5_st(enum ib_qp_type type)
645 {
646         switch (type) {
647         case IB_QPT_RC:                 return MLX5_QP_ST_RC;
648         case IB_QPT_UC:                 return MLX5_QP_ST_UC;
649         case IB_QPT_UD:                 return MLX5_QP_ST_UD;
650         case IB_QPT_XRC_INI:
651         case IB_QPT_XRC_TGT:            return MLX5_QP_ST_XRC;
652         case IB_QPT_SMI:                return MLX5_QP_ST_QP0;
653         case IB_QPT_GSI:                return MLX5_QP_ST_QP1;
654         case IB_QPT_RAW_IPV6:           return MLX5_QP_ST_RAW_IPV6;
655         case IB_QPT_RAW_PACKET:
656         case IB_QPT_RAW_ETHERTYPE:      return MLX5_QP_ST_RAW_ETHERTYPE;
657         case IB_QPT_MAX:
658         default:                return -EINVAL;
659         }
660 }
661
662 static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
663                              struct mlx5_ib_cq *recv_cq);
664 static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
665                                struct mlx5_ib_cq *recv_cq);
666
667 static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
668 {
669         return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
670 }
671
672 static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
673                           struct mlx5_ib_qp *qp, struct ib_udata *udata,
674                           struct ib_qp_init_attr *attr,
675                           struct mlx5_create_qp_mbox_in **in,
676                           int *inlen,
677                           struct mlx5_exp_ib_create_qp *ucmd)
678 {
679         struct mlx5_exp_ib_create_qp_resp resp;
680         struct mlx5_ib_ucontext *context;
681         int page_shift = 0;
682         int uar_index;
683         int npages;
684         u32 offset = 0;
685         int uuarn;
686         int ncont = 0;
687         int err;
688
689         context = to_mucontext(pd->uobject->context);
690         memset(&resp, 0, sizeof(resp));
691         resp.size_of_prefix = offsetof(struct mlx5_exp_ib_create_qp_resp, prefix_reserved);
692         /*
693          * TBD: should come from the verbs when we have the API
694          */
695         if (ucmd->exp.comp_mask & MLX5_EXP_CREATE_QP_MASK_WC_UAR_IDX) {
696                 if (ucmd->exp.wc_uar_index == MLX5_EXP_CREATE_QP_DB_ONLY_UUAR) {
697                         /* Assign LATENCY_CLASS_LOW (DB only UUAR) to this QP */
698                         uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
699                         if (uuarn < 0) {
700                                 mlx5_ib_warn(dev, "DB only uuar allocation failed\n");
701                                 return uuarn;
702                         }
703                         uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
704                 } else if (ucmd->exp.wc_uar_index >= MLX5_IB_MAX_CTX_DYNAMIC_UARS ||
705                            context->dynamic_wc_uar_index[ucmd->exp.wc_uar_index] ==
706                            MLX5_IB_INVALID_UAR_INDEX) {
707                         mlx5_ib_warn(dev, "dynamic uuar allocation failed\n");
708                         return -EINVAL;
709                 } else {
710                         uar_index = context->dynamic_wc_uar_index[ucmd->exp.wc_uar_index];
711                         uuarn = MLX5_EXP_INVALID_UUAR;
712                 }
713         } else {
714                 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
715                 if (uuarn < 0) {
716                         mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
717                         mlx5_ib_dbg(dev, "reverting to medium latency\n");
718                         uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
719                         if (uuarn < 0) {
720                                 mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
721                                 mlx5_ib_dbg(dev, "reverting to high latency\n");
722                                 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
723                                 if (uuarn < 0) {
724                                         mlx5_ib_warn(dev, "uuar allocation failed\n");
725                                         return uuarn;
726                                 }
727                         }
728                 }
729                 uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
730         }
731         mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
732
733         qp->rq.offset = 0;
734         qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
735         qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
736
737         err = set_user_buf_size(dev, qp, (struct mlx5_ib_create_qp *)ucmd, attr);
738         if (err)
739                 goto err_uuar;
740
741         if (ucmd->buf_addr && qp->buf_size) {
742                 qp->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr,
743                                        qp->buf_size, 0, 0);
744                 if (IS_ERR(qp->umem)) {
745                         mlx5_ib_warn(dev, "umem_get failed\n");
746                         err = PTR_ERR(qp->umem);
747                         goto err_uuar;
748                 }
749         } else {
750                 qp->umem = NULL;
751         }
752
753         if (qp->umem) {
754                 mlx5_ib_cont_pages(qp->umem, ucmd->buf_addr, &npages, &page_shift,
755                                    &ncont, NULL);
756                 err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, &offset);
757                 if (err) {
758                         mlx5_ib_warn(dev, "bad offset\n");
759                         goto err_umem;
760                 }
761                 mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
762                             (unsigned long long)ucmd->buf_addr, qp->buf_size,
763                             npages, page_shift, ncont, offset);
764         }
765
766         *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
767         *in = mlx5_vzalloc(*inlen);
768         if (!*in) {
769                 err = -ENOMEM;
770                 goto err_umem;
771         }
772         if (qp->umem)
773                 mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
774         (*in)->ctx.log_pg_sz_remote_qpn =
775                 cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
776         (*in)->ctx.params2 = cpu_to_be32(offset << 6);
777
778         (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
779         resp.uuar_index = uuarn;
780         qp->uuarn = uuarn;
781
782         err = mlx5_ib_db_map_user(context, ucmd->db_addr, &qp->db);
783         if (err) {
784                 mlx5_ib_warn(dev, "map failed\n");
785                 goto err_free;
786         }
787
788         err = ib_copy_to_udata(udata, &resp, sizeof(struct mlx5_ib_create_qp_resp));
789         if (err) {
790                 mlx5_ib_err(dev, "copy failed\n");
791                 goto err_unmap;
792         }
793         qp->create_type = MLX5_QP_USER;
794
795         return 0;
796
797 err_unmap:
798         mlx5_ib_db_unmap_user(context, &qp->db);
799
800 err_free:
801         kvfree(*in);
802
803 err_umem:
804         if (qp->umem)
805                 ib_umem_release(qp->umem);
806
807 err_uuar:
808         free_uuar(&context->uuari, uuarn);
809         return err;
810 }
811
812 static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
813 {
814         struct mlx5_ib_ucontext *context;
815
816         context = to_mucontext(pd->uobject->context);
817         mlx5_ib_db_unmap_user(context, &qp->db);
818         if (qp->umem)
819                 ib_umem_release(qp->umem);
820         if (qp->sq_umem)
821                 ib_umem_release(qp->sq_umem);
822         /*
823          * Free only the UUARs handled by the kernel.
824          * UUARs of UARs allocated dynamically are handled by user.
825          */
826         if (qp->uuarn != MLX5_EXP_INVALID_UUAR)
827                 free_uuar(&context->uuari, qp->uuarn);
828 }
829
830 static int create_kernel_qp(struct mlx5_ib_dev *dev,
831                             struct ib_qp_init_attr *init_attr,
832                             struct mlx5_ib_qp *qp,
833                             struct mlx5_create_qp_mbox_in **in, int *inlen)
834 {
835         enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
836         struct mlx5_uuar_info *uuari;
837         int uar_index;
838         int uuarn;
839         int err;
840
841         uuari = &dev->mdev->priv.uuari;
842         if (init_attr->create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
843                 return -EINVAL;
844
845         uuarn = alloc_uuar(uuari, lc);
846         if (uuarn < 0) {
847                 mlx5_ib_warn(dev, "\n");
848                 return -ENOMEM;
849         }
850
851         qp->bf = &uuari->bfs[uuarn];
852         uar_index = qp->bf->uar->index;
853
854         err = calc_sq_size(dev, init_attr, qp);
855         if (err < 0) {
856                 mlx5_ib_warn(dev, "err %d\n", err);
857                 goto err_uuar;
858         }
859
860         qp->rq.offset = 0;
861         qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
862         qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
863
864         err = mlx5_buf_alloc(dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf);
865         if (err) {
866                 mlx5_ib_warn(dev, "err %d\n", err);
867                 goto err_uuar;
868         }
869
870         qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
871         *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
872         *in = mlx5_vzalloc(*inlen);
873         if (!*in) {
874                 err = -ENOMEM;
875                 goto err_buf;
876         }
877         (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
878         (*in)->ctx.log_pg_sz_remote_qpn =
879                 cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
880         /* Set "fast registration enabled" for all kernel QPs */
881         (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
882         (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
883
884         mlx5_fill_page_array(&qp->buf, (*in)->pas);
885
886         err = mlx5_db_alloc(dev->mdev, &qp->db);
887         if (err) {
888                 mlx5_ib_warn(dev, "err %d\n", err);
889                 goto err_free;
890         }
891
892         qp->sq.swr_ctx = kcalloc(qp->sq.wqe_cnt, sizeof(*qp->sq.swr_ctx),
893                                  GFP_KERNEL);
894         qp->rq.rwr_ctx = kcalloc(qp->rq.wqe_cnt, sizeof(*qp->rq.rwr_ctx),
895                                  GFP_KERNEL);
896         if (!qp->sq.swr_ctx || !qp->rq.rwr_ctx) {
897                 err = -ENOMEM;
898                 goto err_wrid;
899         }
900         qp->create_type = MLX5_QP_KERNEL;
901
902         return 0;
903
904 err_wrid:
905         mlx5_db_free(dev->mdev, &qp->db);
906         kfree(qp->sq.swr_ctx);
907         kfree(qp->rq.rwr_ctx);
908
909 err_free:
910         kvfree(*in);
911
912 err_buf:
913         mlx5_buf_free(dev->mdev, &qp->buf);
914
915 err_uuar:
916         free_uuar(&dev->mdev->priv.uuari, uuarn);
917         return err;
918 }
919
920 static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
921 {
922         mlx5_db_free(dev->mdev, &qp->db);
923         kfree(qp->sq.swr_ctx);
924         kfree(qp->rq.rwr_ctx);
925         mlx5_buf_free(dev->mdev, &qp->buf);
926         free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn);
927 }
928
929 static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
930 {
931         enum ib_qp_type qt = attr->qp_type;
932
933         if (attr->srq || (qt == IB_QPT_XRC_TGT) || (qt == IB_QPT_XRC_INI))
934                 return cpu_to_be32(MLX5_SRQ_RQ);
935         else if (!qp->has_rq)
936                 return cpu_to_be32(MLX5_ZERO_LEN_RQ);
937         else
938                 return cpu_to_be32(MLX5_NON_ZERO_RQ);
939 }
940
941 static int is_connected(enum ib_qp_type qp_type)
942 {
943         if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC)
944                 return 1;
945
946         return 0;
947 }
948
949 static void get_cqs(enum ib_qp_type qp_type,
950                     struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
951                     struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
952 {
953         switch (qp_type) {
954         case IB_QPT_XRC_TGT:
955                 *send_cq = NULL;
956                 *recv_cq = NULL;
957                 break;
958         case IB_QPT_XRC_INI:
959                 *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
960                 *recv_cq = NULL;
961                 break;
962
963         case IB_QPT_SMI:
964         case IB_QPT_GSI:
965         case IB_QPT_RC:
966         case IB_QPT_UC:
967         case IB_QPT_UD:
968         case IB_QPT_RAW_IPV6:
969         case IB_QPT_RAW_ETHERTYPE:
970         case IB_QPT_RAW_PACKET:
971                 *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
972                 *recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL;
973                 break;
974
975         case IB_QPT_MAX:
976         default:
977                 *send_cq = NULL;
978                 *recv_cq = NULL;
979                 break;
980         }
981 }
982
983 enum {
984         MLX5_QP_END_PAD_MODE_ALIGN      = MLX5_WQ_END_PAD_MODE_ALIGN,
985         MLX5_QP_END_PAD_MODE_NONE       = MLX5_WQ_END_PAD_MODE_NONE,
986 };
987
988 static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
989                             struct ib_qp_init_attr *init_attr,
990                             struct ib_udata *udata, struct mlx5_ib_qp *qp)
991 {
992         struct mlx5_ib_resources *devr = &dev->devr;
993         struct mlx5_core_dev *mdev = dev->mdev;
994         struct mlx5_create_qp_mbox_in *in = NULL;
995         struct mlx5_exp_ib_create_qp ucmd;
996         struct mlx5_ib_create_qp *pucmd = NULL;
997         struct mlx5_ib_cq *send_cq;
998         struct mlx5_ib_cq *recv_cq;
999         unsigned long flags;
1000         int inlen = sizeof(*in);
1001         size_t ucmd_size;
1002         int err;
1003         int st;
1004         u32 uidx;
1005         void *qpc;
1006
1007         mutex_init(&qp->mutex);
1008         spin_lock_init(&qp->sq.lock);
1009         spin_lock_init(&qp->rq.lock);
1010
1011         if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
1012                 if (!MLX5_CAP_GEN(mdev, block_lb_mc)) {
1013                         mlx5_ib_warn(dev, "block multicast loopback isn't supported\n");
1014                         return -EINVAL;
1015                 } else {
1016                         qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
1017                 }
1018         }
1019
1020         if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
1021                 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
1022
1023         if (pd && pd->uobject) {
1024                 memset(&ucmd, 0, sizeof(ucmd));
1025                 ucmd_size = sizeof(struct mlx5_ib_create_qp);
1026                 if (ucmd_size > offsetof(struct mlx5_exp_ib_create_qp, size_of_prefix)) {
1027                         mlx5_ib_warn(dev, "mlx5_ib_create_qp is too big to fit as prefix of mlx5_exp_ib_create_qp\n");
1028                                 return -EINVAL;
1029                 }
1030                 err = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, ucmd_size));
1031                 if (err) {
1032                         mlx5_ib_err(dev, "copy failed\n");
1033                         return err;
1034                 }
1035                 pucmd = (struct mlx5_ib_create_qp *)&ucmd;
1036                 if (ucmd.exp.comp_mask & MLX5_EXP_CREATE_QP_MASK_UIDX)
1037                         uidx = ucmd.exp.uidx;
1038                 else
1039                         uidx = 0xffffff;
1040
1041                 qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
1042         } else {
1043                 qp->wq_sig = !!workqueue_signature;
1044                 uidx = 0xffffff;
1045         }
1046
1047         qp->has_rq = qp_has_rq(init_attr);
1048         err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
1049                           qp, (pd && pd->uobject) ? pucmd : NULL);
1050         if (err) {
1051                 mlx5_ib_warn(dev, "err %d\n", err);
1052                 return err;
1053         }
1054
1055         if (pd) {
1056                 if (pd->uobject) {
1057                         __u32 max_wqes =
1058                                 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
1059                         mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
1060                         if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
1061                             ucmd.rq_wqe_count != qp->rq.wqe_cnt) {
1062                                 mlx5_ib_warn(dev, "invalid rq params\n");
1063                                 return -EINVAL;
1064                         }
1065                         if (ucmd.sq_wqe_count > max_wqes) {
1066                                 mlx5_ib_warn(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
1067                                              ucmd.sq_wqe_count, max_wqes);
1068                                 return -EINVAL;
1069                         }
1070                         err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
1071                                              &inlen, &ucmd);
1072                         if (err)
1073                                 mlx5_ib_warn(dev, "err %d\n", err);
1074                 } else {
1075                         if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
1076                                 mlx5_ib_warn(dev, "Raw Eth QP is disabled for Kernel consumers\n");
1077                                 return -EINVAL;
1078                         }
1079                         err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
1080                         if (err)
1081                                 mlx5_ib_warn(dev, "err %d\n", err);
1082                         else
1083                                 qp->pa_lkey = to_mpd(pd)->pa_lkey;
1084                 }
1085
1086                 if (err)
1087                         return err;
1088         } else {
1089                 in = mlx5_vzalloc(sizeof(*in));
1090                 if (!in)
1091                         return -ENOMEM;
1092
1093                 qp->create_type = MLX5_QP_EMPTY;
1094         }
1095
1096         if (is_sqp(init_attr->qp_type))
1097                 qp->port = init_attr->port_num;
1098
1099         st = to_mlx5_st(init_attr->qp_type);
1100         if (st < 0) {
1101                 mlx5_ib_warn(dev, "invalid service type\n");
1102                 err = st;
1103                 goto err_create;
1104         }
1105         in->ctx.flags |= cpu_to_be32(st << 16 | MLX5_QP_PM_MIGRATED << 11);
1106
1107         in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn);
1108
1109         if (qp->wq_sig)
1110                 in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
1111
1112         if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
1113                 in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
1114
1115         if (qp->flags &  MLX5_IB_QP_CAP_RX_END_PADDING)
1116                 in->ctx.flags |= cpu_to_be32(MLX5_QP_END_PAD_MODE_ALIGN << 2);
1117         else
1118                 in->ctx.flags |= cpu_to_be32(MLX5_QP_END_PAD_MODE_NONE << 2);
1119
1120         if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
1121                 int rcqe_sz;
1122                 int scqe_sz;
1123
1124                 rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq);
1125                 scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
1126
1127                 if (rcqe_sz == 128) {
1128                         in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE;
1129                 } else {
1130                         in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE;
1131                 }
1132
1133                 if (init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
1134                         in->ctx.cs_req = 0;
1135                 } else {
1136                         if (scqe_sz == 128)
1137                                 in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE;
1138                         else
1139                                 in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE;
1140                 }
1141         }
1142
1143         if (qp->rq.wqe_cnt) {
1144                 in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4);
1145                 in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3;
1146         }
1147
1148         in->ctx.rq_type_srqn = get_rx_type(qp, init_attr);
1149
1150         if (qp->sq.wqe_cnt)
1151                 in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11);
1152         else
1153                 in->ctx.sq_crq_size |= cpu_to_be16(0x8000);
1154
1155         /* Set default resources */
1156         switch (init_attr->qp_type) {
1157         case IB_QPT_XRC_TGT:
1158                 in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
1159                 in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
1160                 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
1161                 in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn);
1162                 break;
1163         case IB_QPT_XRC_INI:
1164                 in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
1165                 in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
1166                 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
1167                 break;
1168         default:
1169                 if (init_attr->srq) {
1170                         in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn);
1171                         in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn);
1172                 } else {
1173                         in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
1174                         in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s1)->msrq.srqn);
1175                 }
1176         }
1177
1178         if (init_attr->send_cq)
1179                 in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn);
1180
1181         if (init_attr->recv_cq)
1182                 in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn);
1183
1184         in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
1185
1186         if (MLX5_CAP_GEN(mdev, cqe_version)) {
1187                 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
1188                 /* 0xffffff means we ask to work with cqe version 0 */
1189                 MLX5_SET(qpc, qpc, user_index, uidx);
1190         }
1191
1192         if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
1193                 if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) {
1194                         mlx5_ib_warn(dev, "Raw Ethernet QP is allowed only for Ethernet link layer\n");
1195                         return -ENOSYS;
1196                 }
1197                 if (ucmd.exp.comp_mask & MLX5_EXP_CREATE_QP_MASK_SQ_BUFF_ADD) {
1198                         qp->sq_buf_addr = ucmd.exp.sq_buf_addr;
1199                 } else {
1200                         mlx5_ib_warn(dev, "Raw Ethernet QP needs SQ buff address\n");
1201                         return -EINVAL;
1202                 }
1203                 err = -EOPNOTSUPP;
1204         } else {
1205                 err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen);
1206                 qp->mqp.event = mlx5_ib_qp_event;
1207         }
1208
1209         if (err) {
1210                 mlx5_ib_warn(dev, "create qp failed\n");
1211                 goto err_create;
1212         }
1213
1214         kvfree(in);
1215         /* Hardware wants QPN written in big-endian order (after
1216          * shifting) for send doorbell.  Precompute this value to save
1217          * a little bit when posting sends.
1218          */
1219         qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
1220
1221         get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq,
1222                 &send_cq, &recv_cq);
1223         spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
1224         mlx5_ib_lock_cqs(send_cq, recv_cq);
1225         /* Maintain device to QPs access, needed for further handling via reset
1226          * flow
1227          */
1228         list_add_tail(&qp->qps_list, &dev->qp_list);
1229         /* Maintain CQ to QPs access, needed for further handling via reset flow
1230          */
1231         if (send_cq)
1232                 list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
1233         if (recv_cq)
1234                 list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
1235         mlx5_ib_unlock_cqs(send_cq, recv_cq);
1236         spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
1237
1238         return 0;
1239
1240 err_create:
1241         if (qp->create_type == MLX5_QP_USER)
1242                 destroy_qp_user(pd, qp);
1243         else if (qp->create_type == MLX5_QP_KERNEL)
1244                 destroy_qp_kernel(dev, qp);
1245
1246         kvfree(in);
1247         return err;
1248 }
1249
1250 static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
1251         __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
1252 {
1253         if (send_cq) {
1254                 if (recv_cq) {
1255                         if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
1256                                 spin_lock(&send_cq->lock);
1257                                 spin_lock_nested(&recv_cq->lock,
1258                                                  SINGLE_DEPTH_NESTING);
1259                         } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
1260                                 spin_lock(&send_cq->lock);
1261                                 __acquire(&recv_cq->lock);
1262                         } else {
1263                                 spin_lock(&recv_cq->lock);
1264                                 spin_lock_nested(&send_cq->lock,
1265                                                  SINGLE_DEPTH_NESTING);
1266                         }
1267                 } else {
1268                         spin_lock(&send_cq->lock);
1269                         __acquire(&recv_cq->lock);
1270                 }
1271         } else if (recv_cq) {
1272                 spin_lock(&recv_cq->lock);
1273                 __acquire(&send_cq->lock);
1274         } else {
1275                 __acquire(&send_cq->lock);
1276                 __acquire(&recv_cq->lock);
1277         }
1278 }
1279
1280 static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
1281         __releases(&send_cq->lock) __releases(&recv_cq->lock)
1282 {
1283         if (send_cq) {
1284                 if (recv_cq) {
1285                         if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
1286                                 spin_unlock(&recv_cq->lock);
1287                                 spin_unlock(&send_cq->lock);
1288                         } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
1289                                 __release(&recv_cq->lock);
1290                                 spin_unlock(&send_cq->lock);
1291                         } else {
1292                                 spin_unlock(&send_cq->lock);
1293                                 spin_unlock(&recv_cq->lock);
1294                         }
1295                 } else {
1296                         __release(&recv_cq->lock);
1297                         spin_unlock(&send_cq->lock);
1298                 }
1299         } else if (recv_cq) {
1300                 __release(&send_cq->lock);
1301                 spin_unlock(&recv_cq->lock);
1302         } else {
1303                 __release(&recv_cq->lock);
1304                 __release(&send_cq->lock);
1305         }
1306 }
1307
1308 static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
1309 {
1310         return to_mpd(qp->ibqp.pd);
1311 }
1312
1313 static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
1314 {
1315         struct mlx5_ib_cq *send_cq, *recv_cq;
1316         struct mlx5_modify_qp_mbox_in *in;
1317         unsigned long flags;
1318         int err;
1319
1320         in = kzalloc(sizeof(*in), GFP_KERNEL);
1321         if (!in)
1322                 return;
1323
1324         if (qp->state != IB_QPS_RESET) {
1325                 if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
1326                         if (mlx5_core_qp_modify(dev->mdev, MLX5_CMD_OP_2RST_QP, in, 0,
1327                                                 &qp->mqp))
1328                         mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
1329                                      qp->mqp.qpn);
1330                 }
1331         }
1332
1333         get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
1334                 &send_cq, &recv_cq);
1335
1336         spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
1337         mlx5_ib_lock_cqs(send_cq, recv_cq);
1338         /* del from lists under both locks above to protect reset flow paths */
1339         list_del(&qp->qps_list);
1340         if (send_cq)
1341                 list_del(&qp->cq_send_list);
1342
1343         if (recv_cq)
1344                 list_del(&qp->cq_recv_list);
1345
1346         if (qp->create_type == MLX5_QP_KERNEL) {
1347                 __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
1348                                    qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1349                 if (send_cq != recv_cq)
1350                         __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
1351         }
1352         mlx5_ib_unlock_cqs(send_cq, recv_cq);
1353         spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
1354
1355         if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
1356         } else {
1357                 err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp);
1358                 if (err)
1359                         mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
1360                                      qp->mqp.qpn);
1361         }
1362
1363         kfree(in);
1364
1365         if (qp->create_type == MLX5_QP_KERNEL)
1366                 destroy_qp_kernel(dev, qp);
1367         else if (qp->create_type == MLX5_QP_USER)
1368                 destroy_qp_user(&get_pd(qp)->ibpd, qp);
1369 }
1370
1371 static const char *ib_qp_type_str(enum ib_qp_type type)
1372 {
1373         switch (type) {
1374         case IB_QPT_SMI:
1375                 return "IB_QPT_SMI";
1376         case IB_QPT_GSI:
1377                 return "IB_QPT_GSI";
1378         case IB_QPT_RC:
1379                 return "IB_QPT_RC";
1380         case IB_QPT_UC:
1381                 return "IB_QPT_UC";
1382         case IB_QPT_UD:
1383                 return "IB_QPT_UD";
1384         case IB_QPT_RAW_IPV6:
1385                 return "IB_QPT_RAW_IPV6";
1386         case IB_QPT_RAW_ETHERTYPE:
1387                 return "IB_QPT_RAW_ETHERTYPE";
1388         case IB_QPT_XRC_INI:
1389                 return "IB_QPT_XRC_INI";
1390         case IB_QPT_XRC_TGT:
1391                 return "IB_QPT_XRC_TGT";
1392         case IB_QPT_RAW_PACKET:
1393                 return "IB_QPT_RAW_PACKET";
1394         case IB_QPT_MAX:
1395         default:
1396                 return "Invalid QP type";
1397         }
1398 }
1399
1400 struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
1401                                 struct ib_qp_init_attr *init_attr,
1402                                 struct ib_udata *udata)
1403 {
1404         struct mlx5_ib_dev *dev;
1405         struct mlx5_ib_qp *qp;
1406         u16 xrcdn = 0;
1407         int err;
1408         u32 rcqn;
1409         u32 scqn;
1410
1411         init_attr->qpg_type = IB_QPG_NONE;
1412
1413         if (pd) {
1414                 dev = to_mdev(pd->device);
1415         } else {
1416                 /* being cautious here */
1417                 if (init_attr->qp_type != IB_QPT_XRC_TGT) {
1418                         printf("mlx5_ib: WARN: ""%s: no PD for transport %s\n", __func__, ib_qp_type_str(init_attr->qp_type));
1419                         return ERR_PTR(-EINVAL);
1420                 }
1421                 dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
1422         }
1423
1424         switch (init_attr->qp_type) {
1425         case IB_QPT_XRC_TGT:
1426         case IB_QPT_XRC_INI:
1427                 if (!MLX5_CAP_GEN(dev->mdev, xrc)) {
1428                         mlx5_ib_warn(dev, "XRC not supported\n");
1429                         return ERR_PTR(-ENOSYS);
1430                 }
1431                 init_attr->recv_cq = NULL;
1432                 if (init_attr->qp_type == IB_QPT_XRC_TGT) {
1433                         xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
1434                         init_attr->send_cq = NULL;
1435                 }
1436
1437                 /* fall through */
1438         case IB_QPT_RC:
1439         case IB_QPT_UC:
1440         case IB_QPT_UD:
1441         case IB_QPT_SMI:
1442         case IB_QPT_GSI:
1443         case IB_QPT_RAW_ETHERTYPE:
1444         case IB_QPT_RAW_PACKET:
1445                 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1446                 if (!qp)
1447                         return ERR_PTR(-ENOMEM);
1448
1449                 err = create_qp_common(dev, pd, init_attr, udata, qp);
1450                 if (err) {
1451                         mlx5_ib_warn(dev, "create_qp_common failed\n");
1452                         kfree(qp);
1453                         return ERR_PTR(err);
1454                 }
1455
1456                 if (is_qp0(init_attr->qp_type))
1457                         qp->ibqp.qp_num = 0;
1458                 else if (is_qp1(init_attr->qp_type))
1459                         qp->ibqp.qp_num = 1;
1460                 else
1461                         qp->ibqp.qp_num = qp->mqp.qpn;
1462
1463                 rcqn = init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1;
1464                 scqn = init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1;
1465                 mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
1466                             qp->ibqp.qp_num, qp->mqp.qpn, rcqn, scqn);
1467
1468                 qp->xrcdn = xrcdn;
1469
1470                 break;
1471
1472         case IB_QPT_RAW_IPV6:
1473         case IB_QPT_MAX:
1474         default:
1475                 mlx5_ib_warn(dev, "unsupported qp type %d\n",
1476                              init_attr->qp_type);
1477                 /* Don't support raw QPs */
1478                 return ERR_PTR(-EINVAL);
1479         }
1480
1481         return &qp->ibqp;
1482 }
1483
1484 int mlx5_ib_destroy_qp(struct ib_qp *qp)
1485 {
1486         struct mlx5_ib_dev *dev = to_mdev(qp->device);
1487         struct mlx5_ib_qp *mqp = to_mqp(qp);
1488
1489         destroy_qp_common(dev, mqp);
1490
1491         kfree(mqp);
1492
1493         return 0;
1494 }
1495
1496 static u32 atomic_mode_qp(struct mlx5_ib_dev *dev)
1497 {
1498         unsigned long mask;
1499         unsigned long tmp;
1500
1501         mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp) &
1502                 MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
1503
1504         tmp = find_last_bit(&mask, BITS_PER_LONG);
1505         if (tmp < 2 || tmp >= BITS_PER_LONG)
1506                 return MLX5_ATOMIC_MODE_NONE;
1507
1508         if (tmp == 2)
1509                 return MLX5_ATOMIC_MODE_CX;
1510
1511         return tmp << MLX5_ATOMIC_MODE_OFF;
1512 }
1513
1514 static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
1515                                    int attr_mask)
1516 {
1517         struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
1518         u32 hw_access_flags = 0;
1519         u8 dest_rd_atomic;
1520         u32 access_flags;
1521
1522         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1523                 dest_rd_atomic = attr->max_dest_rd_atomic;
1524         else
1525                 dest_rd_atomic = qp->resp_depth;
1526
1527         if (attr_mask & IB_QP_ACCESS_FLAGS)
1528                 access_flags = attr->qp_access_flags;
1529         else
1530                 access_flags = qp->atomic_rd_en;
1531
1532         if (!dest_rd_atomic)
1533                 access_flags &= IB_ACCESS_REMOTE_WRITE;
1534
1535         if (access_flags & IB_ACCESS_REMOTE_READ)
1536                 hw_access_flags |= MLX5_QP_BIT_RRE;
1537         if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
1538                 hw_access_flags |= (MLX5_QP_BIT_RAE |
1539                                     atomic_mode_qp(dev));
1540         if (access_flags & IB_ACCESS_REMOTE_WRITE)
1541                 hw_access_flags |= MLX5_QP_BIT_RWE;
1542
1543         return cpu_to_be32(hw_access_flags);
1544 }
1545
1546 enum {
1547         MLX5_PATH_FLAG_FL       = 1 << 0,
1548         MLX5_PATH_FLAG_FREE_AR  = 1 << 1,
1549         MLX5_PATH_FLAG_COUNTER  = 1 << 2,
1550 };
1551
1552 static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
1553 {
1554         if (rate == IB_RATE_PORT_CURRENT) {
1555                 return 0;
1556         } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
1557                 return -EINVAL;
1558         } else {
1559                 while (rate != IB_RATE_2_5_GBPS &&
1560                        !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
1561                          MLX5_CAP_GEN(dev->mdev, stat_rate_support)))
1562                         --rate;
1563         }
1564
1565         return rate + MLX5_STAT_RATE_OFFSET;
1566 }
1567
1568 static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
1569                          struct mlx5_qp_path *path, u8 port, int attr_mask,
1570                          u32 path_flags, const struct ib_qp_attr *attr,
1571                          int alt)
1572 {
1573         enum rdma_link_layer ll = dev->ib_dev.get_link_layer(&dev->ib_dev,
1574                                                              port);
1575         int err;
1576         int gid_type;
1577
1578         if ((ll == IB_LINK_LAYER_ETHERNET) || (ah->ah_flags & IB_AH_GRH)) {
1579                 int len = dev->ib_dev.gid_tbl_len[port - 1];
1580                 if (ah->grh.sgid_index >= len) {
1581                         printf("mlx5_ib: ERR: ""sgid_index (%u) too large. max is %d\n", ah->grh.sgid_index, len - 1);
1582                         return -EINVAL;
1583                 }
1584         }
1585
1586         if (ll == IB_LINK_LAYER_ETHERNET) {
1587                 if (!(ah->ah_flags & IB_AH_GRH))
1588                         return -EINVAL;
1589
1590                 err = mlx5_get_roce_gid_type(dev, port, ah->grh.sgid_index,
1591                                              &gid_type);
1592                 if (err)
1593                         return err;
1594                 err = mlx5_ib_resolve_grh(ah, path->rmac, NULL);
1595                 if (err)
1596                         return err;
1597                 path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
1598                                                           ah->grh.sgid_index,
1599                                                           0);
1600                 path->dci_cfi_prio_sl = (ah->sl & 0xf) << 4;
1601         } else {
1602                 path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
1603                 path->grh_mlid  = ah->src_path_bits & 0x7f;
1604                 path->rlid      = cpu_to_be16(ah->dlid);
1605                 if (ah->ah_flags & IB_AH_GRH)
1606                         path->grh_mlid  |= 1 << 7;
1607                 if (attr_mask & IB_QP_PKEY_INDEX)
1608                         path->pkey_index = cpu_to_be16(alt ?
1609                                                        attr->alt_pkey_index :
1610                                                        attr->pkey_index);
1611
1612                 path->dci_cfi_prio_sl = ah->sl & 0xf;
1613         }
1614
1615         path->fl_free_ar |= (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0;
1616
1617         if (ah->ah_flags & IB_AH_GRH) {
1618                 path->mgid_index = ah->grh.sgid_index;
1619                 path->hop_limit  = ah->grh.hop_limit;
1620                 path->tclass_flowlabel =
1621                         cpu_to_be32((ah->grh.traffic_class << 20) |
1622                                     (ah->grh.flow_label));
1623                 memcpy(path->rgid, ah->grh.dgid.raw, 16);
1624         }
1625
1626         err = ib_rate_to_mlx5(dev, ah->static_rate);
1627         if (err < 0)
1628                 return err;
1629         path->static_rate = err;
1630         path->port = port;
1631
1632         if (attr_mask & IB_QP_TIMEOUT)
1633                 path->ackto_lt = alt ? attr->alt_timeout << 3 : attr->timeout << 3;
1634
1635         return 0;
1636 }
1637
1638 static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = {
1639         [MLX5_QP_STATE_INIT] = {
1640                 [MLX5_QP_STATE_INIT] = {
1641                         [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE            |
1642                                           MLX5_QP_OPTPAR_RAE            |
1643                                           MLX5_QP_OPTPAR_RWE            |
1644                                           MLX5_QP_OPTPAR_PKEY_INDEX     |
1645                                           MLX5_QP_OPTPAR_PRI_PORT,
1646                         [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE            |
1647                                           MLX5_QP_OPTPAR_PKEY_INDEX     |
1648                                           MLX5_QP_OPTPAR_PRI_PORT,
1649                         [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX     |
1650                                           MLX5_QP_OPTPAR_Q_KEY          |
1651                                           MLX5_QP_OPTPAR_PRI_PORT,
1652                         [MLX5_QP_ST_DCI] = MLX5_QP_OPTPAR_PRI_PORT      |
1653                                           MLX5_QP_OPTPAR_DC_KEY         |
1654                                           MLX5_QP_OPTPAR_PKEY_INDEX     |
1655                                           MLX5_QP_OPTPAR_RAE,
1656                 },
1657                 [MLX5_QP_STATE_RTR] = {
1658                         [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
1659                                           MLX5_QP_OPTPAR_RRE            |
1660                                           MLX5_QP_OPTPAR_RAE            |
1661                                           MLX5_QP_OPTPAR_RWE            |
1662                                           MLX5_QP_OPTPAR_PKEY_INDEX,
1663                         [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
1664                                           MLX5_QP_OPTPAR_RWE            |
1665                                           MLX5_QP_OPTPAR_PKEY_INDEX,
1666                         [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX     |
1667                                           MLX5_QP_OPTPAR_Q_KEY,
1668                         [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX    |
1669                                            MLX5_QP_OPTPAR_Q_KEY,
1670                         [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1671                                           MLX5_QP_OPTPAR_RRE            |
1672                                           MLX5_QP_OPTPAR_RAE            |
1673                                           MLX5_QP_OPTPAR_RWE            |
1674                                           MLX5_QP_OPTPAR_PKEY_INDEX,
1675                         [MLX5_QP_ST_DCI] = MLX5_QP_OPTPAR_PKEY_INDEX    |
1676                                           MLX5_QP_OPTPAR_RAE            |
1677                                           MLX5_QP_OPTPAR_DC_KEY,
1678                 },
1679         },
1680         [MLX5_QP_STATE_RTR] = {
1681                 [MLX5_QP_STATE_RTS] = {
1682                         [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
1683                                           MLX5_QP_OPTPAR_RRE            |
1684                                           MLX5_QP_OPTPAR_RAE            |
1685                                           MLX5_QP_OPTPAR_RWE            |
1686                                           MLX5_QP_OPTPAR_PM_STATE       |
1687                                           MLX5_QP_OPTPAR_RNR_TIMEOUT,
1688                         [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
1689                                           MLX5_QP_OPTPAR_RWE            |
1690                                           MLX5_QP_OPTPAR_PM_STATE,
1691                         [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
1692                         [MLX5_QP_ST_DCI] = MLX5_QP_OPTPAR_DC_KEY                |
1693                                           MLX5_QP_OPTPAR_PM_STATE       |
1694                                           MLX5_QP_OPTPAR_RAE,
1695                 },
1696         },
1697         [MLX5_QP_STATE_RTS] = {
1698                 [MLX5_QP_STATE_RTS] = {
1699                         [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE            |
1700                                           MLX5_QP_OPTPAR_RAE            |
1701                                           MLX5_QP_OPTPAR_RWE            |
1702                                           MLX5_QP_OPTPAR_RNR_TIMEOUT    |
1703                                           MLX5_QP_OPTPAR_PM_STATE       |
1704                                           MLX5_QP_OPTPAR_ALT_ADDR_PATH,
1705                         [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE            |
1706                                           MLX5_QP_OPTPAR_PM_STATE       |
1707                                           MLX5_QP_OPTPAR_ALT_ADDR_PATH,
1708                         [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY          |
1709                                           MLX5_QP_OPTPAR_SRQN           |
1710                                           MLX5_QP_OPTPAR_CQN_RCV,
1711                         [MLX5_QP_ST_DCI] = MLX5_QP_OPTPAR_DC_KEY                |
1712                                           MLX5_QP_OPTPAR_PM_STATE       |
1713                                           MLX5_QP_OPTPAR_RAE,
1714                 },
1715         },
1716         [MLX5_QP_STATE_SQER] = {
1717                 [MLX5_QP_STATE_RTS] = {
1718                         [MLX5_QP_ST_UD]  = MLX5_QP_OPTPAR_Q_KEY,
1719                         [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
1720                         [MLX5_QP_ST_UC]  = MLX5_QP_OPTPAR_RWE,
1721                         [MLX5_QP_ST_RC]  = MLX5_QP_OPTPAR_RNR_TIMEOUT   |
1722                                            MLX5_QP_OPTPAR_RWE           |
1723                                            MLX5_QP_OPTPAR_RAE           |
1724                                            MLX5_QP_OPTPAR_RRE,
1725                         [MLX5_QP_ST_DCI]  = MLX5_QP_OPTPAR_DC_KEY       |
1726                                            MLX5_QP_OPTPAR_RAE,
1727
1728                 },
1729         },
1730         [MLX5_QP_STATE_SQD] = {
1731                 [MLX5_QP_STATE_RTS] = {
1732                         [MLX5_QP_ST_UD]  = MLX5_QP_OPTPAR_Q_KEY,
1733                         [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
1734                         [MLX5_QP_ST_UC]  = MLX5_QP_OPTPAR_RWE,
1735                         [MLX5_QP_ST_RC]  = MLX5_QP_OPTPAR_RNR_TIMEOUT   |
1736                                            MLX5_QP_OPTPAR_RWE           |
1737                                            MLX5_QP_OPTPAR_RAE           |
1738                                            MLX5_QP_OPTPAR_RRE,
1739                 },
1740         },
1741 };
1742
1743 static int ib_nr_to_mlx5_nr(int ib_mask)
1744 {
1745         switch (ib_mask) {
1746         case IB_QP_STATE:
1747                 return 0;
1748         case IB_QP_CUR_STATE:
1749                 return 0;
1750         case IB_QP_EN_SQD_ASYNC_NOTIFY:
1751                 return 0;
1752         case IB_QP_ACCESS_FLAGS:
1753                 return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE |
1754                         MLX5_QP_OPTPAR_RAE;
1755         case IB_QP_PKEY_INDEX:
1756                 return MLX5_QP_OPTPAR_PKEY_INDEX;
1757         case IB_QP_PORT:
1758                 return MLX5_QP_OPTPAR_PRI_PORT;
1759         case IB_QP_QKEY:
1760                 return MLX5_QP_OPTPAR_Q_KEY;
1761         case IB_QP_AV:
1762                 return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH |
1763                         MLX5_QP_OPTPAR_PRI_PORT;
1764         case IB_QP_PATH_MTU:
1765                 return 0;
1766         case IB_QP_TIMEOUT:
1767                 return MLX5_QP_OPTPAR_ACK_TIMEOUT;
1768         case IB_QP_RETRY_CNT:
1769                 return MLX5_QP_OPTPAR_RETRY_COUNT;
1770         case IB_QP_RNR_RETRY:
1771                 return MLX5_QP_OPTPAR_RNR_RETRY;
1772         case IB_QP_RQ_PSN:
1773                 return 0;
1774         case IB_QP_MAX_QP_RD_ATOMIC:
1775                 return MLX5_QP_OPTPAR_SRA_MAX;
1776         case IB_QP_ALT_PATH:
1777                 return MLX5_QP_OPTPAR_ALT_ADDR_PATH;
1778         case IB_QP_MIN_RNR_TIMER:
1779                 return MLX5_QP_OPTPAR_RNR_TIMEOUT;
1780         case IB_QP_SQ_PSN:
1781                 return 0;
1782         case IB_QP_MAX_DEST_RD_ATOMIC:
1783                 return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE |
1784                         MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE;
1785         case IB_QP_PATH_MIG_STATE:
1786                 return MLX5_QP_OPTPAR_PM_STATE;
1787         case IB_QP_CAP:
1788                 return 0;
1789         case IB_QP_DEST_QPN:
1790                 return 0;
1791         }
1792         return 0;
1793 }
1794
1795 static int ib_mask_to_mlx5_opt(int ib_mask)
1796 {
1797         int result = 0;
1798         int i;
1799
1800         for (i = 0; i < 8 * sizeof(int); i++) {
1801                 if ((1 << i) & ib_mask)
1802                         result |= ib_nr_to_mlx5_nr(1 << i);
1803         }
1804
1805         return result;
1806 }
1807
1808 static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
1809                                const struct ib_qp_attr *attr, int attr_mask,
1810                                enum ib_qp_state cur_state, enum ib_qp_state new_state)
1811 {
1812         static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
1813                 [MLX5_QP_STATE_RST] = {
1814                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
1815                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
1816                         [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_RST2INIT_QP,
1817                 },
1818                 [MLX5_QP_STATE_INIT]  = {
1819                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
1820                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
1821                         [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_INIT2INIT_QP,
1822                         [MLX5_QP_STATE_RTR]     = MLX5_CMD_OP_INIT2RTR_QP,
1823                 },
1824                 [MLX5_QP_STATE_RTR]   = {
1825                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
1826                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
1827                         [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTR2RTS_QP,
1828                 },
1829                 [MLX5_QP_STATE_RTS]   = {
1830                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
1831                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
1832                         [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTS2RTS_QP,
1833                 },
1834                 [MLX5_QP_STATE_SQD] = {
1835                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
1836                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
1837                         [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_SQD_RTS_QP,
1838                 },
1839                 [MLX5_QP_STATE_SQER] = {
1840                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
1841                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
1842                         [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_SQERR2RTS_QP,
1843                 },
1844                 [MLX5_QP_STATE_ERR] = {
1845                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
1846                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
1847                 }
1848         };
1849
1850         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1851         struct mlx5_ib_qp *qp = to_mqp(ibqp);
1852         struct mlx5_ib_cq *send_cq, *recv_cq;
1853         struct mlx5_qp_context *context;
1854         struct mlx5_modify_qp_mbox_in *in;
1855         struct mlx5_ib_pd *pd;
1856         enum mlx5_qp_state mlx5_cur, mlx5_new;
1857         enum mlx5_qp_optpar optpar;
1858         int sqd_event;
1859         int mlx5_st;
1860         int err;
1861         u16 op;
1862
1863         in = kzalloc(sizeof(*in), GFP_KERNEL);
1864         if (!in)
1865                 return -ENOMEM;
1866
1867         context = &in->ctx;
1868         err = to_mlx5_st(ibqp->qp_type);
1869         if (err < 0)
1870                 goto out;
1871
1872         context->flags = cpu_to_be32(err << 16);
1873
1874         if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
1875                 context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
1876         } else {
1877                 switch (attr->path_mig_state) {
1878                 case IB_MIG_MIGRATED:
1879                         context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
1880                         break;
1881                 case IB_MIG_REARM:
1882                         context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11);
1883                         break;
1884                 case IB_MIG_ARMED:
1885                         context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11);
1886                         break;
1887                 }
1888         }
1889
1890         if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
1891                 context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
1892         } else if (ibqp->qp_type == IB_QPT_UD) {
1893                 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
1894         } else if (attr_mask & IB_QP_PATH_MTU) {
1895                 if (attr->path_mtu < IB_MTU_256 ||
1896                     attr->path_mtu > IB_MTU_4096) {
1897                         mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu);
1898                         err = -EINVAL;
1899                         goto out;
1900                 }
1901                 context->mtu_msgmax = (attr->path_mtu << 5) |
1902                                       (u8)MLX5_CAP_GEN(dev->mdev, log_max_msg);
1903         }
1904
1905         if (attr_mask & IB_QP_DEST_QPN)
1906                 context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
1907
1908         if (attr_mask & IB_QP_PKEY_INDEX)
1909                 context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index);
1910
1911         /* todo implement counter_index functionality */
1912
1913         if (is_sqp(ibqp->qp_type))
1914                 context->pri_path.port = qp->port;
1915
1916         if (attr_mask & IB_QP_PORT)
1917                 context->pri_path.port = attr->port_num;
1918
1919         if (attr_mask & IB_QP_AV) {
1920                 err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
1921                                     attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
1922                                     attr_mask, 0, attr, 0);
1923                 if (err)
1924                         goto out;
1925         }
1926
1927         if (attr_mask & IB_QP_TIMEOUT)
1928                 context->pri_path.ackto_lt |= attr->timeout << 3;
1929
1930         if (attr_mask & IB_QP_ALT_PATH) {
1931                 err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
1932                                     attr->alt_port_num,
1933                                     attr_mask  | IB_QP_PKEY_INDEX | IB_QP_TIMEOUT,
1934                                     0, attr, 1);
1935                 if (err)
1936                         goto out;
1937         }
1938
1939         pd = get_pd(qp);
1940         get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
1941                 &send_cq, &recv_cq);
1942
1943         context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
1944         context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
1945         context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0;
1946         context->params1  = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28);
1947
1948         if (attr_mask & IB_QP_RNR_RETRY)
1949                 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
1950
1951         if (attr_mask & IB_QP_RETRY_CNT)
1952                 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
1953
1954         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1955                 if (attr->max_rd_atomic)
1956                         context->params1 |=
1957                                 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
1958         }
1959
1960         if (attr_mask & IB_QP_SQ_PSN)
1961                 context->next_send_psn = cpu_to_be32(attr->sq_psn & 0xffffff);
1962
1963         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1964                 if (attr->max_dest_rd_atomic)
1965                         context->params2 |=
1966                                 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
1967         }
1968
1969         if ((attr_mask & IB_QP_ACCESS_FLAGS) &&
1970             (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
1971             !dev->enable_atomic_resp) {
1972                 mlx5_ib_warn(dev, "atomic responder is not supported\n");
1973                 err = -EINVAL;
1974                 goto out;
1975         }
1976
1977         if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
1978                 context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
1979
1980         if (attr_mask & IB_QP_MIN_RNR_TIMER)
1981                 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
1982
1983         if (attr_mask & IB_QP_RQ_PSN)
1984                 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn & 0xffffff);
1985
1986         if (attr_mask & IB_QP_QKEY)
1987                 context->qkey = cpu_to_be32(attr->qkey);
1988
1989         if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1990                 context->db_rec_addr = cpu_to_be64(qp->db.dma);
1991
1992         if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD  &&
1993             attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
1994                 sqd_event = 1;
1995         else
1996                 sqd_event = 0;
1997
1998         if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1999                 context->sq_crq_size |= cpu_to_be16(1 << 4);
2000
2001         if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
2002                 u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
2003                                qp->port) - 1;
2004                 struct mlx5_ib_port *mibport = &dev->port[port_num];
2005
2006                 context->qp_counter_set_usr_page |=
2007                         cpu_to_be32(mibport->q_cnt_id << 24);
2008         }
2009
2010         mlx5_cur = to_mlx5_state(cur_state);
2011         mlx5_new = to_mlx5_state(new_state);
2012         mlx5_st = to_mlx5_st(ibqp->qp_type);
2013         if (mlx5_st < 0)
2014                 goto out;
2015
2016         if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
2017             !optab[mlx5_cur][mlx5_new])
2018                 return -EINVAL;
2019
2020         op = optab[mlx5_cur][mlx5_new];
2021         optpar = ib_mask_to_mlx5_opt(attr_mask);
2022         optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
2023         in->optparam = cpu_to_be32(optpar);
2024
2025         if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
2026                 err = -EOPNOTSUPP;
2027         else
2028                 err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
2029                                   &qp->mqp);
2030         if (err)
2031                 goto out;
2032
2033         qp->state = new_state;
2034
2035         if (attr_mask & IB_QP_ACCESS_FLAGS)
2036                 qp->atomic_rd_en = attr->qp_access_flags;
2037         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
2038                 qp->resp_depth = attr->max_dest_rd_atomic;
2039         if (attr_mask & IB_QP_PORT)
2040                 qp->port = attr->port_num;
2041         if (attr_mask & IB_QP_ALT_PATH)
2042                 qp->alt_port = attr->alt_port_num;
2043
2044         /*
2045          * If we moved a kernel QP to RESET, clean up all old CQ
2046          * entries and reinitialize the QP.
2047          */
2048         if (new_state == IB_QPS_RESET && !ibqp->uobject) {
2049                 mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
2050                                  ibqp->srq ? to_msrq(ibqp->srq) : NULL);
2051                 if (send_cq != recv_cq)
2052                         mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
2053
2054                 qp->rq.head = 0;
2055                 qp->rq.tail = 0;
2056                 qp->sq.head = 0;
2057                 qp->sq.tail = 0;
2058                 qp->sq.cur_post = 0;
2059                 qp->sq.last_poll = 0;
2060                 if (qp->db.db) {
2061                         qp->db.db[MLX5_RCV_DBR] = 0;
2062                         qp->db.db[MLX5_SND_DBR] = 0;
2063                 }
2064         }
2065
2066 out:
2067         kfree(in);
2068         return err;
2069 }
2070
2071 static int ignored_ts_check(enum ib_qp_type qp_type)
2072 {
2073         return 0;
2074 }
2075
2076 int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2077                       int attr_mask, struct ib_udata *udata)
2078 {
2079         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2080         struct mlx5_ib_qp *qp = to_mqp(ibqp);
2081         enum ib_qp_state cur_state, new_state;
2082         int err = -EINVAL;
2083         int port;
2084  
2085         mutex_lock(&qp->mutex);
2086
2087         cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
2088         new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
2089  
2090         if (!ignored_ts_check(ibqp->qp_type) &&
2091             !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
2092                 goto out;
2093
2094         if ((attr_mask & IB_QP_PORT) &&
2095             (attr->port_num == 0 ||
2096              attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports)))
2097                 goto out;
2098
2099         if (attr_mask & IB_QP_PKEY_INDEX) {
2100                 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
2101                 if (attr->pkey_index >=
2102                     dev->mdev->port_caps[port - 1].pkey_table_len)
2103                         goto out;
2104         }
2105
2106         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
2107             attr->max_rd_atomic >
2108             (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp)))
2109                 goto out;
2110
2111         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
2112             attr->max_dest_rd_atomic >
2113             (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp)))
2114                 goto out;
2115
2116         if (cur_state == new_state && cur_state == IB_QPS_RESET) {
2117                 err = 0;
2118                 goto out;
2119         }
2120
2121         err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
2122
2123 out:
2124         mutex_unlock(&qp->mutex);
2125         return err;
2126 }
2127
2128 static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
2129 {
2130         struct mlx5_ib_cq *cq;
2131         unsigned cur;
2132
2133         cur = wq->head - wq->tail;
2134         if (likely(cur + nreq < wq->max_post))
2135                 return 0;
2136
2137         cq = to_mcq(ib_cq);
2138         spin_lock(&cq->lock);
2139         cur = wq->head - wq->tail;
2140         spin_unlock(&cq->lock);
2141
2142         return cur + nreq >= wq->max_post;
2143 }
2144
2145 static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
2146                                           u64 remote_addr, u32 rkey)
2147 {
2148         rseg->raddr    = cpu_to_be64(remote_addr);
2149         rseg->rkey     = cpu_to_be32(rkey);
2150         rseg->reserved = 0;
2151 }
2152
2153 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
2154                              struct ib_send_wr *wr)
2155 {
2156         memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av));
2157         dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV);
2158         dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
2159 }
2160
2161 static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
2162 {
2163         dseg->byte_count = cpu_to_be32(sg->length);
2164         dseg->lkey       = cpu_to_be32(sg->lkey);
2165         dseg->addr       = cpu_to_be64(sg->addr);
2166 }
2167
2168 static __be16 get_klm_octo(int npages)
2169 {
2170         return cpu_to_be16(ALIGN(npages, 8) / 2);
2171 }
2172
2173 static __be64 frwr_mkey_mask(void)
2174 {
2175         u64 result;
2176
2177         result = MLX5_MKEY_MASK_LEN             |
2178                 MLX5_MKEY_MASK_PAGE_SIZE        |
2179                 MLX5_MKEY_MASK_START_ADDR       |
2180                 MLX5_MKEY_MASK_EN_RINVAL        |
2181                 MLX5_MKEY_MASK_KEY              |
2182                 MLX5_MKEY_MASK_LR               |
2183                 MLX5_MKEY_MASK_LW               |
2184                 MLX5_MKEY_MASK_RR               |
2185                 MLX5_MKEY_MASK_RW               |
2186                 MLX5_MKEY_MASK_A                |
2187                 MLX5_MKEY_MASK_SMALL_FENCE      |
2188                 MLX5_MKEY_MASK_FREE;
2189
2190         return cpu_to_be64(result);
2191 }
2192
2193 static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
2194                                  struct ib_send_wr *wr, int li)
2195 {
2196         memset(umr, 0, sizeof(*umr));
2197
2198         if (li) {
2199                 umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
2200                 umr->flags = 1 << 7;
2201                 return;
2202         }
2203
2204         umr->flags = (1 << 5); /* fail if not free */
2205         umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len);
2206         umr->mkey_mask = frwr_mkey_mask();
2207 }
2208
2209 static u8 get_umr_flags(int acc)
2210 {
2211         return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
2212                (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
2213                (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
2214                (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
2215                 MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
2216 }
2217
2218 static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
2219                              int li, int *writ)
2220 {
2221         memset(seg, 0, sizeof(*seg));
2222         if (li) {
2223                 seg->status = MLX5_MKEY_STATUS_FREE;
2224                 return;
2225         }
2226
2227         seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) |
2228                      MLX5_ACCESS_MODE_MTT;
2229         *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
2230         seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
2231         seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
2232         seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
2233         seg->len = cpu_to_be64(wr->wr.fast_reg.length);
2234         seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2);
2235         seg->log2_page_size = wr->wr.fast_reg.page_shift;
2236 }
2237
2238 static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
2239                            struct ib_send_wr *wr,
2240                            struct mlx5_core_dev *mdev,
2241                            struct mlx5_ib_pd *pd,
2242                            int writ)
2243 {
2244         struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
2245         u64 *page_list = wr->wr.fast_reg.page_list->page_list;
2246         u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
2247         int i;
2248
2249         for (i = 0; i < wr->wr.fast_reg.page_list_len; i++)
2250                 mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
2251         dseg->addr = cpu_to_be64(mfrpl->map);
2252         dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
2253         dseg->lkey = cpu_to_be32(pd->pa_lkey);
2254 }
2255
2256 static __be32 send_ieth(struct ib_send_wr *wr)
2257 {
2258         switch (wr->opcode) {
2259         case IB_WR_SEND_WITH_IMM:
2260         case IB_WR_RDMA_WRITE_WITH_IMM:
2261                 return wr->ex.imm_data;
2262
2263         case IB_WR_SEND_WITH_INV:
2264                 return cpu_to_be32(wr->ex.invalidate_rkey);
2265
2266         default:
2267                 return 0;
2268         }
2269 }
2270
2271 static u8 calc_sig(void *wqe, int size)
2272 {
2273         u8 *p = wqe;
2274         u8 res = 0;
2275         int i;
2276
2277         for (i = 0; i < size; i++)
2278                 res ^= p[i];
2279
2280         return ~res;
2281 }
2282
2283 static u8 calc_wq_sig(void *wqe)
2284 {
2285         return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
2286 }
2287
2288 static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
2289                             void *wqe, int *sz)
2290 {
2291         struct mlx5_wqe_inline_seg *seg;
2292         void *qend = qp->sq.qend;
2293         void *addr;
2294         int inl = 0;
2295         int copy;
2296         int len;
2297         int i;
2298
2299         seg = wqe;
2300         wqe += sizeof(*seg);
2301         for (i = 0; i < wr->num_sge; i++) {
2302                 addr = (void *)(uintptr_t)(wr->sg_list[i].addr);
2303                 len  = wr->sg_list[i].length;
2304                 inl += len;
2305
2306                 if (unlikely(inl > qp->max_inline_data))
2307                         return -ENOMEM;
2308
2309                 if (unlikely(wqe + len > qend)) {
2310                         copy = (int)(qend - wqe);
2311                         memcpy(wqe, addr, copy);
2312                         addr += copy;
2313                         len -= copy;
2314                         wqe = mlx5_get_send_wqe(qp, 0);
2315                 }
2316                 memcpy(wqe, addr, len);
2317                 wqe += len;
2318         }
2319
2320         seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
2321
2322         *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
2323
2324         return 0;
2325 }
2326
2327 static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
2328                           struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
2329 {
2330         int writ = 0;
2331         int li;
2332
2333         li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0;
2334         if (unlikely(wr->send_flags & IB_SEND_INLINE))
2335                 return -EINVAL;
2336
2337         set_frwr_umr_segment(*seg, wr, li);
2338         *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
2339         *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
2340         if (unlikely((*seg == qp->sq.qend)))
2341                 *seg = mlx5_get_send_wqe(qp, 0);
2342         set_mkey_segment(*seg, wr, li, &writ);
2343         *seg += sizeof(struct mlx5_mkey_seg);
2344         *size += sizeof(struct mlx5_mkey_seg) / 16;
2345         if (unlikely((*seg == qp->sq.qend)))
2346                 *seg = mlx5_get_send_wqe(qp, 0);
2347         if (!li) {
2348                 if (unlikely(wr->wr.fast_reg.page_list_len >
2349                              wr->wr.fast_reg.page_list->max_page_list_len))
2350                         return  -ENOMEM;
2351
2352                 set_frwr_pages(*seg, wr, mdev, pd, writ);
2353                 *seg += sizeof(struct mlx5_wqe_data_seg);
2354                 *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
2355         }
2356         return 0;
2357 }
2358
2359 static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
2360 {
2361         __be32 *p = NULL;
2362         int tidx = idx;
2363         int i, j;
2364
2365         pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
2366         for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
2367                 if ((i & 0xf) == 0) {
2368                         void *buf = mlx5_get_send_wqe(qp, tidx);
2369                         tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
2370                         p = buf;
2371                         j = 0;
2372                 }
2373                 pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
2374                          be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
2375                          be32_to_cpu(p[j + 3]));
2376         }
2377 }
2378
2379 static void mlx5_bf_copy(u64 __iomem *dst, u64 *src,
2380                          unsigned bytecnt, struct mlx5_ib_qp *qp)
2381 {
2382         while (bytecnt > 0) {
2383                 __iowrite64_copy(dst++, src++, 8);
2384                 __iowrite64_copy(dst++, src++, 8);
2385                 __iowrite64_copy(dst++, src++, 8);
2386                 __iowrite64_copy(dst++, src++, 8);
2387                 __iowrite64_copy(dst++, src++, 8);
2388                 __iowrite64_copy(dst++, src++, 8);
2389                 __iowrite64_copy(dst++, src++, 8);
2390                 __iowrite64_copy(dst++, src++, 8);
2391                 bytecnt -= 64;
2392                 if (unlikely(src == qp->sq.qend))
2393                         src = mlx5_get_send_wqe(qp, 0);
2394         }
2395 }
2396
2397 static u8 get_fence(u8 fence, struct ib_send_wr *wr)
2398 {
2399         if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
2400                      wr->send_flags & IB_SEND_FENCE))
2401                 return MLX5_FENCE_MODE_STRONG_ORDERING;
2402
2403         if (unlikely(fence)) {
2404                 if (wr->send_flags & IB_SEND_FENCE)
2405                         return MLX5_FENCE_MODE_SMALL_AND_FENCE;
2406                 else
2407                         return fence;
2408
2409         } else {
2410                 return 0;
2411         }
2412 }
2413
2414 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
2415                      struct mlx5_wqe_ctrl_seg **ctrl,
2416                      struct ib_send_wr *wr, unsigned *idx,
2417                      int *size, int nreq)
2418 {
2419         int err = 0;
2420
2421         if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
2422                 mlx5_ib_warn(to_mdev(qp->ibqp.device), "work queue overflow\n");
2423                 err = -ENOMEM;
2424                 return err;
2425         }
2426
2427         *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
2428         *seg = mlx5_get_send_wqe(qp, *idx);
2429         *ctrl = *seg;
2430         *(u32 *)(*seg + 8) = 0;
2431         (*ctrl)->imm = send_ieth(wr);
2432         (*ctrl)->fm_ce_se = qp->sq_signal_bits |
2433                 (wr->send_flags & IB_SEND_SIGNALED ?
2434                  MLX5_WQE_CTRL_CQ_UPDATE : 0) |
2435                 (wr->send_flags & IB_SEND_SOLICITED ?
2436                  MLX5_WQE_CTRL_SOLICITED : 0);
2437
2438         *seg += sizeof(**ctrl);
2439         *size = sizeof(**ctrl) / 16;
2440
2441         return err;
2442 }
2443
2444 static void finish_wqe(struct mlx5_ib_qp *qp,
2445                        struct mlx5_wqe_ctrl_seg *ctrl,
2446                        u8 size, unsigned idx,
2447                        struct ib_send_wr *wr,
2448                        int nreq, u8 fence, u8 next_fence,
2449                        u32 mlx5_opcode)
2450 {
2451         u8 opmod = 0;
2452
2453         ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
2454                                              mlx5_opcode | ((u32)opmod << 24));
2455         ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
2456         ctrl->fm_ce_se |= fence;
2457         qp->fm_cache = next_fence;
2458         if (unlikely(qp->wq_sig))
2459                 ctrl->signature = calc_wq_sig(ctrl);
2460
2461         qp->sq.swr_ctx[idx].wrid = wr->wr_id;
2462         qp->sq.swr_ctx[idx].w_list.opcode = mlx5_opcode;
2463         qp->sq.swr_ctx[idx].wqe_head = qp->sq.head + nreq;
2464         qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
2465         qp->sq.swr_ctx[idx].w_list.next = qp->sq.cur_post;
2466         qp->sq.swr_ctx[idx].sig_piped = 0;
2467 }
2468
2469 int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2470                       struct ib_send_wr **bad_wr)
2471 {
2472         struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
2473         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2474         struct mlx5_core_dev *mdev = dev->mdev;
2475         struct mlx5_ib_qp *qp = to_mqp(ibqp);
2476         struct mlx5_wqe_data_seg *dpseg;
2477         struct mlx5_wqe_xrc_seg *xrc;
2478         struct mlx5_bf *bf = qp->bf;
2479         int uninitialized_var(size);
2480         void *qend = qp->sq.qend;
2481         unsigned long flags;
2482         unsigned idx;
2483         int err = 0;
2484         int inl = 0;
2485         int num_sge;
2486         void *seg;
2487         int nreq;
2488         int i;
2489         u8 next_fence = 0;
2490         u8 fence;
2491
2492
2493         spin_lock_irqsave(&qp->sq.lock, flags);
2494
2495         if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
2496                 err = -EIO;
2497                 *bad_wr = wr;
2498                 nreq = 0;
2499                 goto out;
2500         }
2501
2502         for (nreq = 0; wr; nreq++, wr = wr->next) {
2503                 if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
2504                         mlx5_ib_warn(dev, "Invalid opcode 0x%x\n", wr->opcode);
2505                         err = -EINVAL;
2506                         *bad_wr = wr;
2507                         goto out;
2508                 }
2509
2510                 fence = qp->fm_cache;
2511                 num_sge = wr->num_sge;
2512                 if (unlikely(num_sge > qp->sq.max_gs)) {
2513                         mlx5_ib_warn(dev, "Max gs exceeded %d (max = %d)\n", wr->num_sge, qp->sq.max_gs);
2514                         err = -ENOMEM;
2515                         *bad_wr = wr;
2516                         goto out;
2517                 }
2518
2519                 err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
2520                 if (err) {
2521                         mlx5_ib_warn(dev, "Failed to prepare WQE\n");
2522                         err = -ENOMEM;
2523                         *bad_wr = wr;
2524                         goto out;
2525                 }
2526
2527                 switch (ibqp->qp_type) {
2528                 case IB_QPT_XRC_INI:
2529                         xrc = seg;
2530                         xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num);
2531                         seg += sizeof(*xrc);
2532                         size += sizeof(*xrc) / 16;
2533                         /* fall through */
2534                 case IB_QPT_RC:
2535                         switch (wr->opcode) {
2536                         case IB_WR_RDMA_READ:
2537                         case IB_WR_RDMA_WRITE:
2538                         case IB_WR_RDMA_WRITE_WITH_IMM:
2539                                 set_raddr_seg(seg, wr->wr.rdma.remote_addr,
2540                                               wr->wr.rdma.rkey);
2541                                 seg += sizeof(struct mlx5_wqe_raddr_seg);
2542                                 size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
2543                                 break;
2544
2545                         case IB_WR_ATOMIC_CMP_AND_SWP:
2546                         case IB_WR_ATOMIC_FETCH_AND_ADD:
2547                         case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
2548                                 mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
2549                                 err = -ENOSYS;
2550                                 *bad_wr = wr;
2551                                 goto out;
2552
2553                         case IB_WR_LOCAL_INV:
2554                                 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
2555                                 qp->sq.swr_ctx[idx].wr_data = IB_WR_LOCAL_INV;
2556                                 ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
2557                                 err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
2558                                 if (err) {
2559                                         mlx5_ib_warn(dev, "Failed to prepare LOCAL_INV WQE\n");
2560                                         *bad_wr = wr;
2561                                         goto out;
2562                                 }
2563                                 num_sge = 0;
2564                                 break;
2565
2566                         case IB_WR_FAST_REG_MR:
2567                                 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
2568                                 qp->sq.swr_ctx[idx].wr_data = IB_WR_FAST_REG_MR;
2569                                 ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
2570                                 err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
2571                                 if (err) {
2572                                         mlx5_ib_warn(dev, "Failed to prepare FAST_REG_MR WQE\n");
2573                                         *bad_wr = wr;
2574                                         goto out;
2575                                 }
2576                                 num_sge = 0;
2577                                 break;
2578
2579                         default:
2580                                 break;
2581                         }
2582                         break;
2583
2584                 case IB_QPT_UC:
2585                         switch (wr->opcode) {
2586                         case IB_WR_RDMA_WRITE:
2587                         case IB_WR_RDMA_WRITE_WITH_IMM:
2588                                 set_raddr_seg(seg, wr->wr.rdma.remote_addr,
2589                                               wr->wr.rdma.rkey);
2590                                 seg  += sizeof(struct mlx5_wqe_raddr_seg);
2591                                 size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
2592                                 break;
2593
2594                         default:
2595                                 break;
2596                         }
2597                         break;
2598
2599                 case IB_QPT_SMI:
2600                         if (!mlx5_core_is_pf(mdev)) {
2601                                 err = -EINVAL;
2602                                 mlx5_ib_warn(dev, "Only physical function is allowed to send SMP MADs\n");
2603                                 *bad_wr = wr;
2604                                 goto out;
2605                         }
2606                 case IB_QPT_GSI:
2607                 case IB_QPT_UD:
2608                         set_datagram_seg(seg, wr);
2609                         seg += sizeof(struct mlx5_wqe_datagram_seg);
2610                         size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
2611                         if (unlikely((seg == qend)))
2612                                 seg = mlx5_get_send_wqe(qp, 0);
2613                         break;
2614                 default:
2615                         break;
2616                 }
2617
2618                 if (wr->send_flags & IB_SEND_INLINE && num_sge) {
2619                         int uninitialized_var(sz);
2620
2621                         err = set_data_inl_seg(qp, wr, seg, &sz);
2622                         if (unlikely(err)) {
2623                                 mlx5_ib_warn(dev, "Failed to prepare inline data segment\n");
2624                                 *bad_wr = wr;
2625                                 goto out;
2626                         }
2627                         inl = 1;
2628                         size += sz;
2629                 } else {
2630                         dpseg = seg;
2631                         for (i = 0; i < num_sge; i++) {
2632                                 if (unlikely(dpseg == qend)) {
2633                                         seg = mlx5_get_send_wqe(qp, 0);
2634                                         dpseg = seg;
2635                                 }
2636                                 if (likely(wr->sg_list[i].length)) {
2637                                         set_data_ptr_seg(dpseg, wr->sg_list + i);
2638                                         size += sizeof(struct mlx5_wqe_data_seg) / 16;
2639                                         dpseg++;
2640                                 }
2641                         }
2642                 }
2643
2644                 finish_wqe(qp, ctrl, size, idx, wr, nreq,
2645                            get_fence(fence, wr), next_fence,
2646                            mlx5_ib_opcode[wr->opcode]);
2647                 if (0)
2648                         dump_wqe(qp, idx, size);
2649         }
2650
2651 out:
2652         if (likely(nreq)) {
2653                 qp->sq.head += nreq;
2654
2655                 /* Make sure that descriptors are written before
2656                  * updating doorbell record and ringing the doorbell
2657                  */
2658                 wmb();
2659
2660                 qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
2661
2662                 /* Make sure doorbell record is visible to the HCA before
2663                  * we hit doorbell */
2664                 wmb();
2665
2666                 if (bf->need_lock)
2667                         spin_lock(&bf->lock);
2668                 else
2669                         __acquire(&bf->lock);
2670
2671                 /* TBD enable WC */
2672                 if (BF_ENABLE && nreq == 1 && bf->uuarn && inl && size > 1 &&
2673                     size <= bf->buf_size / 16) {
2674                         mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp);
2675                         /* wc_wmb(); */
2676                 } else {
2677                         mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
2678                                      MLX5_GET_DOORBELL_LOCK(&bf->lock32));
2679                         /* Make sure doorbells don't leak out of SQ spinlock
2680                          * and reach the HCA out of order.
2681                          */
2682                         mmiowb();
2683                 }
2684                 bf->offset ^= bf->buf_size;
2685                 if (bf->need_lock)
2686                         spin_unlock(&bf->lock);
2687                 else
2688                         __release(&bf->lock);
2689         }
2690
2691         spin_unlock_irqrestore(&qp->sq.lock, flags);
2692
2693         return err;
2694 }
2695
2696 static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
2697 {
2698         sig->signature = calc_sig(sig, size);
2699 }
2700
2701 int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2702                       struct ib_recv_wr **bad_wr)
2703 {
2704         struct mlx5_ib_qp *qp = to_mqp(ibqp);
2705         struct mlx5_wqe_data_seg *scat;
2706         struct mlx5_rwqe_sig *sig;
2707         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2708         struct mlx5_core_dev *mdev = dev->mdev;
2709         unsigned long flags;
2710         int err = 0;
2711         int nreq;
2712         int ind;
2713         int i;
2714
2715         spin_lock_irqsave(&qp->rq.lock, flags);
2716
2717         if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
2718                 err = -EIO;
2719                 *bad_wr = wr;
2720                 nreq = 0;
2721                 goto out;
2722         }
2723
2724         ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
2725
2726         for (nreq = 0; wr; nreq++, wr = wr->next) {
2727                 if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
2728                         err = -ENOMEM;
2729                         *bad_wr = wr;
2730                         goto out;
2731                 }
2732
2733                 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
2734                         err = -EINVAL;
2735                         *bad_wr = wr;
2736                         goto out;
2737                 }
2738
2739                 scat = get_recv_wqe(qp, ind);
2740                 if (qp->wq_sig)
2741                         scat++;
2742
2743                 for (i = 0; i < wr->num_sge; i++)
2744                         set_data_ptr_seg(scat + i, wr->sg_list + i);
2745
2746                 if (i < qp->rq.max_gs) {
2747                         scat[i].byte_count = 0;
2748                         scat[i].lkey       = cpu_to_be32(MLX5_INVALID_LKEY);
2749                         scat[i].addr       = 0;
2750                 }
2751
2752                 if (qp->wq_sig) {
2753                         sig = (struct mlx5_rwqe_sig *)scat;
2754                         set_sig_seg(sig, (qp->rq.max_gs + 1) << 2);
2755                 }
2756
2757                 qp->rq.rwr_ctx[ind].wrid = wr->wr_id;
2758
2759                 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
2760         }
2761
2762 out:
2763         if (likely(nreq)) {
2764                 qp->rq.head += nreq;
2765
2766                 /* Make sure that descriptors are written before
2767                  * doorbell record.
2768                  */
2769                 wmb();
2770
2771                 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
2772         }
2773
2774         spin_unlock_irqrestore(&qp->rq.lock, flags);
2775
2776         return err;
2777 }
2778
2779 static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
2780 {
2781         switch (mlx5_state) {
2782         case MLX5_QP_STATE_RST:      return IB_QPS_RESET;
2783         case MLX5_QP_STATE_INIT:     return IB_QPS_INIT;
2784         case MLX5_QP_STATE_RTR:      return IB_QPS_RTR;
2785         case MLX5_QP_STATE_RTS:      return IB_QPS_RTS;
2786         case MLX5_QP_STATE_SQ_DRAINING:
2787         case MLX5_QP_STATE_SQD:      return IB_QPS_SQD;
2788         case MLX5_QP_STATE_SQER:     return IB_QPS_SQE;
2789         case MLX5_QP_STATE_ERR:      return IB_QPS_ERR;
2790         default:                     return -1;
2791         }
2792 }
2793
2794 static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
2795 {
2796         switch (mlx5_mig_state) {
2797         case MLX5_QP_PM_ARMED:          return IB_MIG_ARMED;
2798         case MLX5_QP_PM_REARM:          return IB_MIG_REARM;
2799         case MLX5_QP_PM_MIGRATED:       return IB_MIG_MIGRATED;
2800         default: return -1;
2801         }
2802 }
2803
2804 static int to_ib_qp_access_flags(int mlx5_flags)
2805 {
2806         int ib_flags = 0;
2807
2808         if (mlx5_flags & MLX5_QP_BIT_RRE)
2809                 ib_flags |= IB_ACCESS_REMOTE_READ;
2810         if (mlx5_flags & MLX5_QP_BIT_RWE)
2811                 ib_flags |= IB_ACCESS_REMOTE_WRITE;
2812         if (mlx5_flags & MLX5_QP_BIT_RAE)
2813                 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
2814
2815         return ib_flags;
2816 }
2817
2818 static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
2819                                 struct mlx5_qp_path *path)
2820 {
2821         struct mlx5_core_dev *dev = ibdev->mdev;
2822
2823         memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
2824         ib_ah_attr->port_num      = path->port;
2825
2826         if (ib_ah_attr->port_num == 0 ||
2827             ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
2828                 return;
2829
2830         ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
2831
2832         ib_ah_attr->dlid          = be16_to_cpu(path->rlid);
2833         ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
2834         ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;
2835         ib_ah_attr->ah_flags      = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0;
2836         if (ib_ah_attr->ah_flags) {
2837                 ib_ah_attr->grh.sgid_index = path->mgid_index;
2838                 ib_ah_attr->grh.hop_limit  = path->hop_limit;
2839                 ib_ah_attr->grh.traffic_class =
2840                         (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
2841                 ib_ah_attr->grh.flow_label =
2842                         be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
2843                 memcpy(ib_ah_attr->grh.dgid.raw,
2844                        path->rgid, sizeof(ib_ah_attr->grh.dgid.raw));
2845         }
2846 }
2847
2848 int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
2849                      struct ib_qp_init_attr *qp_init_attr)
2850 {
2851         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2852         struct mlx5_ib_qp *qp = to_mqp(ibqp);
2853         struct mlx5_query_qp_mbox_out *outb;
2854         struct mlx5_qp_context *context;
2855         int mlx5_state;
2856         int err = 0;
2857
2858         mutex_lock(&qp->mutex);
2859         if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
2860                 err = -EOPNOTSUPP;
2861                 goto out;
2862         } else {
2863                 outb = kzalloc(sizeof(*outb), GFP_KERNEL);
2864                 if (!outb) {
2865                         err = -ENOMEM;
2866                         goto out;
2867                 }
2868
2869                 context = &outb->ctx;
2870                 err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb,
2871                                          sizeof(*outb));
2872                 if (err) {
2873                         kfree(outb);
2874                         goto out;
2875                 }
2876
2877                 mlx5_state = be32_to_cpu(context->flags) >> 28;
2878
2879                 qp->state                    = to_ib_qp_state(mlx5_state);
2880                 qp_attr->path_mtu            = context->mtu_msgmax >> 5;
2881                 qp_attr->path_mig_state      =
2882                         to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
2883                 qp_attr->qkey                = be32_to_cpu(context->qkey);
2884                 qp_attr->rq_psn              = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
2885                 qp_attr->sq_psn              = be32_to_cpu(context->next_send_psn) & 0xffffff;
2886                 qp_attr->dest_qp_num         = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff;
2887                 qp_attr->qp_access_flags     =
2888                         to_ib_qp_access_flags(be32_to_cpu(context->params2));
2889
2890                 if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
2891                         to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
2892                         to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
2893                                 qp_attr->alt_pkey_index = be16_to_cpu(context->alt_path.pkey_index);
2894                         qp_attr->alt_port_num   = qp_attr->alt_ah_attr.port_num;
2895                 }
2896
2897                 qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index);
2898                 qp_attr->port_num = context->pri_path.port;
2899
2900                 /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
2901                 qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING;
2902
2903                 qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
2904
2905                 qp_attr->max_dest_rd_atomic =
2906                         1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
2907                 qp_attr->min_rnr_timer      =
2908                         (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
2909                 qp_attr->timeout            = context->pri_path.ackto_lt >> 3;
2910                 qp_attr->retry_cnt          = (be32_to_cpu(context->params1) >> 16) & 0x7;
2911                 qp_attr->rnr_retry          = (be32_to_cpu(context->params1) >> 13) & 0x7;
2912                 qp_attr->alt_timeout        = context->alt_path.ackto_lt >> 3;
2913
2914
2915                 kfree(outb);
2916         }
2917
2918         qp_attr->qp_state            = qp->state;
2919         qp_attr->cur_qp_state        = qp_attr->qp_state;
2920         qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
2921         qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
2922
2923         if (!ibqp->uobject) {
2924                 qp_attr->cap.max_send_wr  = qp->sq.max_post;
2925                 qp_attr->cap.max_send_sge = qp->sq.max_gs;
2926                 qp_init_attr->qp_context = ibqp->qp_context;
2927         } else {
2928                 qp_attr->cap.max_send_wr  = 0;
2929                 qp_attr->cap.max_send_sge = 0;
2930         }
2931
2932         qp_init_attr->qp_type = ibqp->qp_type;
2933         qp_init_attr->recv_cq = ibqp->recv_cq;
2934         qp_init_attr->send_cq = ibqp->send_cq;
2935         qp_init_attr->srq = ibqp->srq;
2936         qp_attr->cap.max_inline_data = qp->max_inline_data;
2937
2938         qp_init_attr->cap            = qp_attr->cap;
2939
2940         qp_init_attr->create_flags = 0;
2941         if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
2942                 qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
2943
2944         qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
2945                 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
2946
2947 out:
2948         mutex_unlock(&qp->mutex);
2949         return err;
2950 }
2951
2952 struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
2953                                           struct ib_ucontext *context,
2954                                           struct ib_udata *udata)
2955 {
2956         struct mlx5_ib_dev *dev = to_mdev(ibdev);
2957         struct mlx5_ib_xrcd *xrcd;
2958         int err;
2959
2960         if (!MLX5_CAP_GEN(dev->mdev, xrc))
2961                 return ERR_PTR(-ENOSYS);
2962
2963         xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
2964         if (!xrcd)
2965                 return ERR_PTR(-ENOMEM);
2966
2967         err = mlx5_core_xrcd_alloc(dev->mdev, &xrcd->xrcdn);
2968         if (err) {
2969                 kfree(xrcd);
2970                 return ERR_PTR(-ENOMEM);
2971         }
2972
2973         return &xrcd->ibxrcd;
2974 }
2975
2976 int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
2977 {
2978         struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
2979         u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
2980         int err;
2981
2982         err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn);
2983         if (err) {
2984                 mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
2985                 return err;
2986         }
2987
2988         kfree(xrcd);
2989
2990         return 0;
2991 }