]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c
MFS r353184, r353186, r353188, r353190, r353192, r353194, r353196, r353198,
[FreeBSD/FreeBSD.git] / sys / dev / mlx5 / mlx5_ib / mlx5_ib_qp.c
1 /*-
2  * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27
28 #include <linux/module.h>
29 #include <rdma/ib_umem.h>
30 #include <rdma/ib_cache.h>
31 #include <rdma/ib_user_verbs.h>
32 #include "mlx5_ib.h"
33
34 /* not supported currently */
35 static int wq_signature;
36
37 enum {
38         MLX5_IB_ACK_REQ_FREQ    = 8,
39 };
40
41 enum {
42         MLX5_IB_DEFAULT_SCHED_QUEUE     = 0x83,
43         MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
44         MLX5_IB_LINK_TYPE_IB            = 0,
45         MLX5_IB_LINK_TYPE_ETH           = 1
46 };
47
48 enum {
49         MLX5_IB_SQ_STRIDE       = 6,
50 };
51
52 static const u32 mlx5_ib_opcode[] = {
53         [IB_WR_SEND]                            = MLX5_OPCODE_SEND,
54         [IB_WR_LSO]                             = MLX5_OPCODE_LSO,
55         [IB_WR_SEND_WITH_IMM]                   = MLX5_OPCODE_SEND_IMM,
56         [IB_WR_RDMA_WRITE]                      = MLX5_OPCODE_RDMA_WRITE,
57         [IB_WR_RDMA_WRITE_WITH_IMM]             = MLX5_OPCODE_RDMA_WRITE_IMM,
58         [IB_WR_RDMA_READ]                       = MLX5_OPCODE_RDMA_READ,
59         [IB_WR_ATOMIC_CMP_AND_SWP]              = MLX5_OPCODE_ATOMIC_CS,
60         [IB_WR_ATOMIC_FETCH_AND_ADD]            = MLX5_OPCODE_ATOMIC_FA,
61         [IB_WR_SEND_WITH_INV]                   = MLX5_OPCODE_SEND_INVAL,
62         [IB_WR_LOCAL_INV]                       = MLX5_OPCODE_UMR,
63         [IB_WR_REG_MR]                          = MLX5_OPCODE_UMR,
64         [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = MLX5_OPCODE_ATOMIC_MASKED_CS,
65         [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = MLX5_OPCODE_ATOMIC_MASKED_FA,
66         [MLX5_IB_WR_UMR]                        = MLX5_OPCODE_UMR,
67 };
68
69 struct mlx5_wqe_eth_pad {
70         u8 rsvd0[16];
71 };
72
73 enum raw_qp_set_mask_map {
74         MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID         = 1UL << 0,
75 };
76
77 struct mlx5_modify_raw_qp_param {
78         u16 operation;
79
80         u32 set_mask; /* raw_qp_set_mask_map */
81         u8 rq_q_ctr_id;
82 };
83
84 static void get_cqs(enum ib_qp_type qp_type,
85                     struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
86                     struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq);
87
88 static int is_qp0(enum ib_qp_type qp_type)
89 {
90         return qp_type == IB_QPT_SMI;
91 }
92
93 static int is_sqp(enum ib_qp_type qp_type)
94 {
95         return is_qp0(qp_type) || is_qp1(qp_type);
96 }
97
98 static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
99 {
100         return mlx5_buf_offset(&qp->buf, offset);
101 }
102
103 static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
104 {
105         return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
106 }
107
108 void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
109 {
110         return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
111 }
112
113 /**
114  * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space.
115  *
116  * @qp: QP to copy from.
117  * @send: copy from the send queue when non-zero, use the receive queue
118  *        otherwise.
119  * @wqe_index:  index to start copying from. For send work queues, the
120  *              wqe_index is in units of MLX5_SEND_WQE_BB.
121  *              For receive work queue, it is the number of work queue
122  *              element in the queue.
123  * @buffer: destination buffer.
124  * @length: maximum number of bytes to copy.
125  *
126  * Copies at least a single WQE, but may copy more data.
127  *
128  * Return: the number of bytes copied, or an error code.
129  */
130 int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
131                           void *buffer, u32 length,
132                           struct mlx5_ib_qp_base *base)
133 {
134         struct ib_device *ibdev = qp->ibqp.device;
135         struct mlx5_ib_dev *dev = to_mdev(ibdev);
136         struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
137         size_t offset;
138         size_t wq_end;
139         struct ib_umem *umem = base->ubuffer.umem;
140         u32 first_copy_length;
141         int wqe_length;
142         int ret;
143
144         if (wq->wqe_cnt == 0) {
145                 mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n",
146                             qp->ibqp.qp_type);
147                 return -EINVAL;
148         }
149
150         offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift);
151         wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift);
152
153         if (send && length < sizeof(struct mlx5_wqe_ctrl_seg))
154                 return -EINVAL;
155
156         if (offset > umem->length ||
157             (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length))
158                 return -EINVAL;
159
160         first_copy_length = min_t(u32, offset + length, wq_end) - offset;
161         ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length);
162         if (ret)
163                 return ret;
164
165         if (send) {
166                 struct mlx5_wqe_ctrl_seg *ctrl = buffer;
167                 int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
168
169                 wqe_length = ds * MLX5_WQE_DS_UNITS;
170         } else {
171                 wqe_length = 1 << wq->wqe_shift;
172         }
173
174         if (wqe_length <= first_copy_length)
175                 return first_copy_length;
176
177         ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset,
178                                 wqe_length - first_copy_length);
179         if (ret)
180                 return ret;
181
182         return wqe_length;
183 }
184
185 static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
186 {
187         struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
188         struct ib_event event;
189
190         if (type == MLX5_EVENT_TYPE_PATH_MIG) {
191                 /* This event is only valid for trans_qps */
192                 to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
193         }
194
195         if (ibqp->event_handler) {
196                 event.device     = ibqp->device;
197                 event.element.qp = ibqp;
198                 switch (type) {
199                 case MLX5_EVENT_TYPE_PATH_MIG:
200                         event.event = IB_EVENT_PATH_MIG;
201                         break;
202                 case MLX5_EVENT_TYPE_COMM_EST:
203                         event.event = IB_EVENT_COMM_EST;
204                         break;
205                 case MLX5_EVENT_TYPE_SQ_DRAINED:
206                         event.event = IB_EVENT_SQ_DRAINED;
207                         break;
208                 case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
209                         event.event = IB_EVENT_QP_LAST_WQE_REACHED;
210                         break;
211                 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
212                         event.event = IB_EVENT_QP_FATAL;
213                         break;
214                 case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
215                         event.event = IB_EVENT_PATH_MIG_ERR;
216                         break;
217                 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
218                         event.event = IB_EVENT_QP_REQ_ERR;
219                         break;
220                 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
221                         event.event = IB_EVENT_QP_ACCESS_ERR;
222                         break;
223                 default:
224                         pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
225                         return;
226                 }
227
228                 ibqp->event_handler(&event, ibqp->qp_context);
229         }
230 }
231
232 static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
233                        int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
234 {
235         int wqe_size;
236         int wq_size;
237
238         /* Sanity check RQ size before proceeding */
239         if (cap->max_recv_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)))
240                 return -EINVAL;
241
242         if (!has_rq) {
243                 qp->rq.max_gs = 0;
244                 qp->rq.wqe_cnt = 0;
245                 qp->rq.wqe_shift = 0;
246                 cap->max_recv_wr = 0;
247                 cap->max_recv_sge = 0;
248         } else {
249                 if (ucmd) {
250                         qp->rq.wqe_cnt = ucmd->rq_wqe_count;
251                         qp->rq.wqe_shift = ucmd->rq_wqe_shift;
252                         qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
253                         qp->rq.max_post = qp->rq.wqe_cnt;
254                 } else {
255                         wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0;
256                         wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
257                         wqe_size = roundup_pow_of_two(wqe_size);
258                         wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
259                         wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
260                         qp->rq.wqe_cnt = wq_size / wqe_size;
261                         if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq)) {
262                                 mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
263                                             wqe_size,
264                                             MLX5_CAP_GEN(dev->mdev,
265                                                          max_wqe_sz_rq));
266                                 return -EINVAL;
267                         }
268                         qp->rq.wqe_shift = ilog2(wqe_size);
269                         qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
270                         qp->rq.max_post = qp->rq.wqe_cnt;
271                 }
272         }
273
274         return 0;
275 }
276
277 static int sq_overhead(struct ib_qp_init_attr *attr)
278 {
279         int size = 0;
280
281         switch (attr->qp_type) {
282         case IB_QPT_XRC_INI:
283                 size += sizeof(struct mlx5_wqe_xrc_seg);
284                 /* fall through */
285         case IB_QPT_RC:
286                 size += sizeof(struct mlx5_wqe_ctrl_seg) +
287                         max(sizeof(struct mlx5_wqe_atomic_seg) +
288                             sizeof(struct mlx5_wqe_raddr_seg),
289                             sizeof(struct mlx5_wqe_umr_ctrl_seg) +
290                             sizeof(struct mlx5_mkey_seg));
291                 break;
292
293         case IB_QPT_XRC_TGT:
294                 return 0;
295
296         case IB_QPT_UC:
297                 size += sizeof(struct mlx5_wqe_ctrl_seg) +
298                         max(sizeof(struct mlx5_wqe_raddr_seg),
299                             sizeof(struct mlx5_wqe_umr_ctrl_seg) +
300                             sizeof(struct mlx5_mkey_seg));
301                 break;
302
303         case IB_QPT_UD:
304                 if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
305                         size += sizeof(struct mlx5_wqe_eth_pad) +
306                                 sizeof(struct mlx5_wqe_eth_seg);
307                 /* fall through */
308         case IB_QPT_SMI:
309         case MLX5_IB_QPT_HW_GSI:
310                 size += sizeof(struct mlx5_wqe_ctrl_seg) +
311                         sizeof(struct mlx5_wqe_datagram_seg);
312                 break;
313
314         case MLX5_IB_QPT_REG_UMR:
315                 size += sizeof(struct mlx5_wqe_ctrl_seg) +
316                         sizeof(struct mlx5_wqe_umr_ctrl_seg) +
317                         sizeof(struct mlx5_mkey_seg);
318                 break;
319
320         default:
321                 return -EINVAL;
322         }
323
324         return size;
325 }
326
327 static int calc_send_wqe(struct ib_qp_init_attr *attr)
328 {
329         int inl_size = 0;
330         int size;
331
332         size = sq_overhead(attr);
333         if (size < 0)
334                 return size;
335
336         if (attr->cap.max_inline_data) {
337                 inl_size = size + sizeof(struct mlx5_wqe_inline_seg) +
338                         attr->cap.max_inline_data;
339         }
340
341         size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
342         if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN &&
343             ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE)
344                         return MLX5_SIG_WQE_SIZE;
345         else
346                 return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
347 }
348
349 static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size)
350 {
351         int max_sge;
352
353         if (attr->qp_type == IB_QPT_RC)
354                 max_sge = (min_t(int, wqe_size, 512) -
355                            sizeof(struct mlx5_wqe_ctrl_seg) -
356                            sizeof(struct mlx5_wqe_raddr_seg)) /
357                         sizeof(struct mlx5_wqe_data_seg);
358         else if (attr->qp_type == IB_QPT_XRC_INI)
359                 max_sge = (min_t(int, wqe_size, 512) -
360                            sizeof(struct mlx5_wqe_ctrl_seg) -
361                            sizeof(struct mlx5_wqe_xrc_seg) -
362                            sizeof(struct mlx5_wqe_raddr_seg)) /
363                         sizeof(struct mlx5_wqe_data_seg);
364         else
365                 max_sge = (wqe_size - sq_overhead(attr)) /
366                         sizeof(struct mlx5_wqe_data_seg);
367
368         return min_t(int, max_sge, wqe_size - sq_overhead(attr) /
369                      sizeof(struct mlx5_wqe_data_seg));
370 }
371
372 static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
373                         struct mlx5_ib_qp *qp)
374 {
375         int wqe_size;
376         int wq_size;
377
378         if (!attr->cap.max_send_wr)
379                 return 0;
380
381         wqe_size = calc_send_wqe(attr);
382         mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size);
383         if (wqe_size < 0)
384                 return wqe_size;
385
386         if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) {
387                 mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
388                             wqe_size, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq));
389                 return -EINVAL;
390         }
391
392         qp->max_inline_data = wqe_size - sq_overhead(attr) -
393                               sizeof(struct mlx5_wqe_inline_seg);
394         attr->cap.max_inline_data = qp->max_inline_data;
395
396         if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
397                 qp->signature_en = true;
398
399         wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
400         qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
401         if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
402                 mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n",
403                             qp->sq.wqe_cnt,
404                             1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
405                 return -ENOMEM;
406         }
407         qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
408         qp->sq.max_gs = get_send_sge(attr, wqe_size);
409         if (qp->sq.max_gs < attr->cap.max_send_sge)
410                 return -ENOMEM;
411
412         attr->cap.max_send_sge = qp->sq.max_gs;
413         qp->sq.max_post = wq_size / wqe_size;
414         attr->cap.max_send_wr = qp->sq.max_post;
415
416         return wq_size;
417 }
418
419 static int set_user_buf_size(struct mlx5_ib_dev *dev,
420                             struct mlx5_ib_qp *qp,
421                             struct mlx5_ib_create_qp *ucmd,
422                             struct mlx5_ib_qp_base *base,
423                             struct ib_qp_init_attr *attr)
424 {
425         int desc_sz = 1 << qp->sq.wqe_shift;
426
427         if (desc_sz > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) {
428                 mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
429                              desc_sz, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq));
430                 return -EINVAL;
431         }
432
433         if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) {
434                 mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n",
435                              ucmd->sq_wqe_count, ucmd->sq_wqe_count);
436                 return -EINVAL;
437         }
438
439         qp->sq.wqe_cnt = ucmd->sq_wqe_count;
440
441         if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
442                 mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
443                              qp->sq.wqe_cnt,
444                              1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
445                 return -EINVAL;
446         }
447
448         if (attr->qp_type == IB_QPT_RAW_PACKET) {
449                 base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
450                 qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
451         } else {
452                 base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
453                                          (qp->sq.wqe_cnt << 6);
454         }
455
456         return 0;
457 }
458
459 static int qp_has_rq(struct ib_qp_init_attr *attr)
460 {
461         if (attr->qp_type == IB_QPT_XRC_INI ||
462             attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
463             attr->qp_type == MLX5_IB_QPT_REG_UMR ||
464             !attr->cap.max_recv_wr)
465                 return 0;
466
467         return 1;
468 }
469
470 static int first_med_uuar(void)
471 {
472         return 1;
473 }
474
475 static int next_uuar(int n)
476 {
477         n++;
478
479         while (((n % 4) & 2))
480                 n++;
481
482         return n;
483 }
484
485 static int num_med_uuar(struct mlx5_uuar_info *uuari)
486 {
487         int n;
488
489         n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
490                 uuari->num_low_latency_uuars - 1;
491
492         return n >= 0 ? n : 0;
493 }
494
495 static int max_uuari(struct mlx5_uuar_info *uuari)
496 {
497         return uuari->num_uars * 4;
498 }
499
500 static int first_hi_uuar(struct mlx5_uuar_info *uuari)
501 {
502         int med;
503         int i;
504         int t;
505
506         med = num_med_uuar(uuari);
507         for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
508                 t++;
509                 if (t == med)
510                         return next_uuar(i);
511         }
512
513         return 0;
514 }
515
516 static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
517 {
518         int i;
519
520         for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
521                 if (!test_bit(i, uuari->bitmap)) {
522                         set_bit(i, uuari->bitmap);
523                         uuari->count[i]++;
524                         return i;
525                 }
526         }
527
528         return -ENOMEM;
529 }
530
531 static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
532 {
533         int minidx = first_med_uuar();
534         int i;
535
536         for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
537                 if (uuari->count[i] < uuari->count[minidx])
538                         minidx = i;
539         }
540
541         uuari->count[minidx]++;
542         return minidx;
543 }
544
545 static int alloc_uuar(struct mlx5_uuar_info *uuari,
546                       enum mlx5_ib_latency_class lat)
547 {
548         int uuarn = -EINVAL;
549
550         mutex_lock(&uuari->lock);
551         switch (lat) {
552         case MLX5_IB_LATENCY_CLASS_LOW:
553                 uuarn = 0;
554                 uuari->count[uuarn]++;
555                 break;
556
557         case MLX5_IB_LATENCY_CLASS_MEDIUM:
558                 if (uuari->ver < 2)
559                         uuarn = -ENOMEM;
560                 else
561                         uuarn = alloc_med_class_uuar(uuari);
562                 break;
563
564         case MLX5_IB_LATENCY_CLASS_HIGH:
565                 if (uuari->ver < 2)
566                         uuarn = -ENOMEM;
567                 else
568                         uuarn = alloc_high_class_uuar(uuari);
569                 break;
570
571         case MLX5_IB_LATENCY_CLASS_FAST_PATH:
572                 uuarn = 2;
573                 break;
574         }
575         mutex_unlock(&uuari->lock);
576
577         return uuarn;
578 }
579
580 static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
581 {
582         clear_bit(uuarn, uuari->bitmap);
583         --uuari->count[uuarn];
584 }
585
586 static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
587 {
588         clear_bit(uuarn, uuari->bitmap);
589         --uuari->count[uuarn];
590 }
591
592 static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn)
593 {
594         int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
595         int high_uuar = nuuars - uuari->num_low_latency_uuars;
596
597         mutex_lock(&uuari->lock);
598         if (uuarn == 0) {
599                 --uuari->count[uuarn];
600                 goto out;
601         }
602
603         if (uuarn < high_uuar) {
604                 free_med_class_uuar(uuari, uuarn);
605                 goto out;
606         }
607
608         free_high_class_uuar(uuari, uuarn);
609
610 out:
611         mutex_unlock(&uuari->lock);
612 }
613
614 static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
615 {
616         switch (state) {
617         case IB_QPS_RESET:      return MLX5_QP_STATE_RST;
618         case IB_QPS_INIT:       return MLX5_QP_STATE_INIT;
619         case IB_QPS_RTR:        return MLX5_QP_STATE_RTR;
620         case IB_QPS_RTS:        return MLX5_QP_STATE_RTS;
621         case IB_QPS_SQD:        return MLX5_QP_STATE_SQD;
622         case IB_QPS_SQE:        return MLX5_QP_STATE_SQER;
623         case IB_QPS_ERR:        return MLX5_QP_STATE_ERR;
624         default:                return -1;
625         }
626 }
627
628 static int to_mlx5_st(enum ib_qp_type type)
629 {
630         switch (type) {
631         case IB_QPT_RC:                 return MLX5_QP_ST_RC;
632         case IB_QPT_UC:                 return MLX5_QP_ST_UC;
633         case IB_QPT_UD:                 return MLX5_QP_ST_UD;
634         case MLX5_IB_QPT_REG_UMR:       return MLX5_QP_ST_REG_UMR;
635         case IB_QPT_XRC_INI:
636         case IB_QPT_XRC_TGT:            return MLX5_QP_ST_XRC;
637         case IB_QPT_SMI:                return MLX5_QP_ST_QP0;
638         case MLX5_IB_QPT_HW_GSI:        return MLX5_QP_ST_QP1;
639         case IB_QPT_RAW_IPV6:           return MLX5_QP_ST_RAW_IPV6;
640         case IB_QPT_RAW_PACKET:
641         case IB_QPT_RAW_ETHERTYPE:      return MLX5_QP_ST_RAW_ETHERTYPE;
642         case IB_QPT_MAX:
643         default:                return -EINVAL;
644         }
645 }
646
647 static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
648                              struct mlx5_ib_cq *recv_cq);
649 static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
650                                struct mlx5_ib_cq *recv_cq);
651
652 static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
653 {
654         return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
655 }
656
657 static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
658                             struct ib_pd *pd,
659                             unsigned long addr, size_t size,
660                             struct ib_umem **umem,
661                             int *npages, int *page_shift, int *ncont,
662                             u32 *offset)
663 {
664         int err;
665
666         *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0);
667         if (IS_ERR(*umem)) {
668                 mlx5_ib_dbg(dev, "umem_get failed\n");
669                 return PTR_ERR(*umem);
670         }
671
672         mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL);
673
674         err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
675         if (err) {
676                 mlx5_ib_warn(dev, "bad offset\n");
677                 goto err_umem;
678         }
679
680         mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
681                     addr, size, *npages, *page_shift, *ncont, *offset);
682
683         return 0;
684
685 err_umem:
686         ib_umem_release(*umem);
687         *umem = NULL;
688
689         return err;
690 }
691
692 static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq)
693 {
694         struct mlx5_ib_ucontext *context;
695
696         context = to_mucontext(pd->uobject->context);
697         mlx5_ib_db_unmap_user(context, &rwq->db);
698         if (rwq->umem)
699                 ib_umem_release(rwq->umem);
700 }
701
702 static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
703                           struct mlx5_ib_rwq *rwq,
704                           struct mlx5_ib_create_wq *ucmd)
705 {
706         struct mlx5_ib_ucontext *context;
707         int page_shift = 0;
708         int npages;
709         u32 offset = 0;
710         int ncont = 0;
711         int err;
712
713         if (!ucmd->buf_addr)
714                 return -EINVAL;
715
716         context = to_mucontext(pd->uobject->context);
717         rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr,
718                                rwq->buf_size, 0, 0);
719         if (IS_ERR(rwq->umem)) {
720                 mlx5_ib_dbg(dev, "umem_get failed\n");
721                 err = PTR_ERR(rwq->umem);
722                 return err;
723         }
724
725         mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift,
726                            &ncont, NULL);
727         err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift,
728                                      &rwq->rq_page_offset);
729         if (err) {
730                 mlx5_ib_warn(dev, "bad offset\n");
731                 goto err_umem;
732         }
733
734         rwq->rq_num_pas = ncont;
735         rwq->page_shift = page_shift;
736         rwq->log_page_size =  page_shift - MLX5_ADAPTER_PAGE_SHIFT;
737         rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE);
738
739         mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n",
740                     (unsigned long long)ucmd->buf_addr, rwq->buf_size,
741                     npages, page_shift, ncont, offset);
742
743         err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db);
744         if (err) {
745                 mlx5_ib_dbg(dev, "map failed\n");
746                 goto err_umem;
747         }
748
749         rwq->create_type = MLX5_WQ_USER;
750         return 0;
751
752 err_umem:
753         ib_umem_release(rwq->umem);
754         return err;
755 }
756
757 static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
758                           struct mlx5_ib_qp *qp, struct ib_udata *udata,
759                           struct ib_qp_init_attr *attr,
760                           u32 **in,
761                           struct mlx5_ib_create_qp_resp *resp, int *inlen,
762                           struct mlx5_ib_qp_base *base)
763 {
764         struct mlx5_ib_ucontext *context;
765         struct mlx5_ib_create_qp ucmd;
766         struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
767         int page_shift = 0;
768         int uar_index;
769         int npages;
770         u32 offset = 0;
771         int uuarn;
772         int ncont = 0;
773         __be64 *pas;
774         void *qpc;
775         int err;
776
777         err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
778         if (err) {
779                 mlx5_ib_dbg(dev, "copy failed\n");
780                 return err;
781         }
782
783         context = to_mucontext(pd->uobject->context);
784         /*
785          * TBD: should come from the verbs when we have the API
786          */
787         if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
788                 /* In CROSS_CHANNEL CQ and QP must use the same UAR */
789                 uuarn = MLX5_CROSS_CHANNEL_UUAR;
790         else {
791                 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
792                 if (uuarn < 0) {
793                         mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
794                         mlx5_ib_dbg(dev, "reverting to medium latency\n");
795                         uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
796                         if (uuarn < 0) {
797                                 mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
798                                 mlx5_ib_dbg(dev, "reverting to high latency\n");
799                                 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
800                                 if (uuarn < 0) {
801                                         mlx5_ib_warn(dev, "uuar allocation failed\n");
802                                         return uuarn;
803                                 }
804                         }
805                 }
806         }
807
808         uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
809         mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
810
811         qp->rq.offset = 0;
812         qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
813         qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
814
815         err = set_user_buf_size(dev, qp, &ucmd, base, attr);
816         if (err)
817                 goto err_uuar;
818
819         if (ucmd.buf_addr && ubuffer->buf_size) {
820                 ubuffer->buf_addr = ucmd.buf_addr;
821                 err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr,
822                                        ubuffer->buf_size,
823                                        &ubuffer->umem, &npages, &page_shift,
824                                        &ncont, &offset);
825                 if (err)
826                         goto err_uuar;
827         } else {
828                 ubuffer->umem = NULL;
829         }
830
831         *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
832                  MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont;
833         *in = mlx5_vzalloc(*inlen);
834         if (!*in) {
835                 err = -ENOMEM;
836                 goto err_umem;
837         }
838
839         pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
840         if (ubuffer->umem)
841                 mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
842
843         qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
844
845         MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
846         MLX5_SET(qpc, qpc, page_offset, offset);
847
848         MLX5_SET(qpc, qpc, uar_page, uar_index);
849         resp->uuar_index = uuarn;
850         qp->uuarn = uuarn;
851
852         err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
853         if (err) {
854                 mlx5_ib_dbg(dev, "map failed\n");
855                 goto err_free;
856         }
857
858         err = ib_copy_to_udata(udata, resp, sizeof(*resp));
859         if (err) {
860                 mlx5_ib_dbg(dev, "copy failed\n");
861                 goto err_unmap;
862         }
863         qp->create_type = MLX5_QP_USER;
864
865         return 0;
866
867 err_unmap:
868         mlx5_ib_db_unmap_user(context, &qp->db);
869
870 err_free:
871         kvfree(*in);
872
873 err_umem:
874         if (ubuffer->umem)
875                 ib_umem_release(ubuffer->umem);
876
877 err_uuar:
878         free_uuar(&context->uuari, uuarn);
879         return err;
880 }
881
882 static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
883                             struct mlx5_ib_qp_base *base)
884 {
885         struct mlx5_ib_ucontext *context;
886
887         context = to_mucontext(pd->uobject->context);
888         mlx5_ib_db_unmap_user(context, &qp->db);
889         if (base->ubuffer.umem)
890                 ib_umem_release(base->ubuffer.umem);
891         free_uuar(&context->uuari, qp->uuarn);
892 }
893
894 static int create_kernel_qp(struct mlx5_ib_dev *dev,
895                             struct ib_qp_init_attr *init_attr,
896                             struct mlx5_ib_qp *qp,
897                             u32 **in, int *inlen,
898                             struct mlx5_ib_qp_base *base)
899 {
900         enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
901         struct mlx5_uuar_info *uuari;
902         int uar_index;
903         void *qpc;
904         int uuarn;
905         int err;
906
907         uuari = &dev->mdev->priv.uuari;
908         if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN |
909                                         IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
910                                         IB_QP_CREATE_IPOIB_UD_LSO |
911                                         mlx5_ib_create_qp_sqpn_qp1()))
912                 return -EINVAL;
913
914         if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
915                 lc = MLX5_IB_LATENCY_CLASS_FAST_PATH;
916
917         uuarn = alloc_uuar(uuari, lc);
918         if (uuarn < 0) {
919                 mlx5_ib_dbg(dev, "\n");
920                 return -ENOMEM;
921         }
922
923         qp->bf = &uuari->bfs[uuarn];
924         uar_index = qp->bf->uar->index;
925
926         err = calc_sq_size(dev, init_attr, qp);
927         if (err < 0) {
928                 mlx5_ib_dbg(dev, "err %d\n", err);
929                 goto err_uuar;
930         }
931
932         qp->rq.offset = 0;
933         qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
934         base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
935
936         err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size,
937             2 * PAGE_SIZE, &qp->buf);
938         if (err) {
939                 mlx5_ib_dbg(dev, "err %d\n", err);
940                 goto err_uuar;
941         }
942
943         qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
944         *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
945                  MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
946         *in = mlx5_vzalloc(*inlen);
947         if (!*in) {
948                 err = -ENOMEM;
949                 goto err_buf;
950         }
951
952         qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
953         MLX5_SET(qpc, qpc, uar_page, uar_index);
954         MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
955
956         /* Set "fast registration enabled" for all kernel QPs */
957         MLX5_SET(qpc, qpc, fre, 1);
958         MLX5_SET(qpc, qpc, rlky, 1);
959
960         if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
961                 MLX5_SET(qpc, qpc, deth_sqpn, 1);
962                 qp->flags |= MLX5_IB_QP_SQPN_QP1;
963         }
964
965         mlx5_fill_page_array(&qp->buf,
966                              (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas));
967
968         err = mlx5_db_alloc(dev->mdev, &qp->db);
969         if (err) {
970                 mlx5_ib_dbg(dev, "err %d\n", err);
971                 goto err_free;
972         }
973
974         qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL);
975         qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL);
976         qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL);
977         qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL);
978         qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL);
979
980         if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid ||
981             !qp->sq.w_list || !qp->sq.wqe_head) {
982                 err = -ENOMEM;
983                 goto err_wrid;
984         }
985         qp->create_type = MLX5_QP_KERNEL;
986
987         return 0;
988
989 err_wrid:
990         mlx5_db_free(dev->mdev, &qp->db);
991         kfree(qp->sq.wqe_head);
992         kfree(qp->sq.w_list);
993         kfree(qp->sq.wrid);
994         kfree(qp->sq.wr_data);
995         kfree(qp->rq.wrid);
996
997 err_free:
998         kvfree(*in);
999
1000 err_buf:
1001         mlx5_buf_free(dev->mdev, &qp->buf);
1002
1003 err_uuar:
1004         free_uuar(&dev->mdev->priv.uuari, uuarn);
1005         return err;
1006 }
1007
1008 static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
1009 {
1010         mlx5_db_free(dev->mdev, &qp->db);
1011         kfree(qp->sq.wqe_head);
1012         kfree(qp->sq.w_list);
1013         kfree(qp->sq.wrid);
1014         kfree(qp->sq.wr_data);
1015         kfree(qp->rq.wrid);
1016         mlx5_buf_free(dev->mdev, &qp->buf);
1017         free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn);
1018 }
1019
1020 static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
1021 {
1022         if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
1023             (attr->qp_type == IB_QPT_XRC_INI))
1024                 return MLX5_SRQ_RQ;
1025         else if (!qp->has_rq)
1026                 return MLX5_ZERO_LEN_RQ;
1027         else
1028                 return MLX5_NON_ZERO_RQ;
1029 }
1030
1031 static int is_connected(enum ib_qp_type qp_type)
1032 {
1033         if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC)
1034                 return 1;
1035
1036         return 0;
1037 }
1038
1039 static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
1040                                     struct mlx5_ib_sq *sq, u32 tdn)
1041 {
1042         u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
1043         void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
1044
1045         MLX5_SET(tisc, tisc, transport_domain, tdn);
1046         return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
1047 }
1048
1049 static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
1050                                       struct mlx5_ib_sq *sq)
1051 {
1052         mlx5_core_destroy_tis(dev->mdev, sq->tisn);
1053 }
1054
1055 static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
1056                                    struct mlx5_ib_sq *sq, void *qpin,
1057                                    struct ib_pd *pd)
1058 {
1059         struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
1060         __be64 *pas;
1061         void *in;
1062         void *sqc;
1063         void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
1064         void *wq;
1065         int inlen;
1066         int err;
1067         int page_shift = 0;
1068         int npages;
1069         int ncont = 0;
1070         u32 offset = 0;
1071
1072         err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size,
1073                                &sq->ubuffer.umem, &npages, &page_shift,
1074                                &ncont, &offset);
1075         if (err)
1076                 return err;
1077
1078         inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
1079         in = mlx5_vzalloc(inlen);
1080         if (!in) {
1081                 err = -ENOMEM;
1082                 goto err_umem;
1083         }
1084
1085         sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1086         MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1087         MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1088         MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
1089         MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
1090         MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1091         MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
1092
1093         wq = MLX5_ADDR_OF(sqc, sqc, wq);
1094         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1095         MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
1096         MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page));
1097         MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
1098         MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1099         MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
1100         MLX5_SET(wq, wq, log_wq_pg_sz,  page_shift - MLX5_ADAPTER_PAGE_SHIFT);
1101         MLX5_SET(wq, wq, page_offset, offset);
1102
1103         pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
1104         mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
1105
1106         err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp);
1107
1108         kvfree(in);
1109
1110         if (err)
1111                 goto err_umem;
1112
1113         return 0;
1114
1115 err_umem:
1116         ib_umem_release(sq->ubuffer.umem);
1117         sq->ubuffer.umem = NULL;
1118
1119         return err;
1120 }
1121
1122 static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
1123                                      struct mlx5_ib_sq *sq)
1124 {
1125         mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
1126         ib_umem_release(sq->ubuffer.umem);
1127 }
1128
1129 static int get_rq_pas_size(void *qpc)
1130 {
1131         u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
1132         u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
1133         u32 log_rq_size   = MLX5_GET(qpc, qpc, log_rq_size);
1134         u32 page_offset   = MLX5_GET(qpc, qpc, page_offset);
1135         u32 po_quanta     = 1 << (log_page_size - 6);
1136         u32 rq_sz         = 1 << (log_rq_size + 4 + log_rq_stride);
1137         u32 page_size     = 1 << log_page_size;
1138         u32 rq_sz_po      = rq_sz + (page_offset * po_quanta);
1139         u32 rq_num_pas    = (rq_sz_po + page_size - 1) / page_size;
1140
1141         return rq_num_pas * sizeof(u64);
1142 }
1143
1144 static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
1145                                    struct mlx5_ib_rq *rq, void *qpin)
1146 {
1147         struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
1148         __be64 *pas;
1149         __be64 *qp_pas;
1150         void *in;
1151         void *rqc;
1152         void *wq;
1153         void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
1154         int inlen;
1155         int err;
1156         u32 rq_pas_size = get_rq_pas_size(qpc);
1157
1158         inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
1159         in = mlx5_vzalloc(inlen);
1160         if (!in)
1161                 return -ENOMEM;
1162
1163         rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
1164         MLX5_SET(rqc, rqc, vlan_strip_disable, 1);
1165         MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE);
1166         MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
1167         MLX5_SET(rqc, rqc, flush_in_error_en, 1);
1168         MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
1169         MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
1170
1171         if (mqp->flags & MLX5_IB_QP_CAP_SCATTER_FCS)
1172                 MLX5_SET(rqc, rqc, scatter_fcs, 1);
1173
1174         wq = MLX5_ADDR_OF(rqc, rqc, wq);
1175         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1176         MLX5_SET(wq, wq, end_padding_mode,
1177                  MLX5_GET(qpc, qpc, end_padding_mode));
1178         MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
1179         MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
1180         MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
1181         MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
1182         MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
1183         MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
1184
1185         pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
1186         qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
1187         memcpy(pas, qp_pas, rq_pas_size);
1188
1189         err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp);
1190
1191         kvfree(in);
1192
1193         return err;
1194 }
1195
1196 static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
1197                                      struct mlx5_ib_rq *rq)
1198 {
1199         mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp);
1200 }
1201
1202 static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
1203                                     struct mlx5_ib_rq *rq, u32 tdn)
1204 {
1205         u32 *in;
1206         void *tirc;
1207         int inlen;
1208         int err;
1209
1210         inlen = MLX5_ST_SZ_BYTES(create_tir_in);
1211         in = mlx5_vzalloc(inlen);
1212         if (!in)
1213                 return -ENOMEM;
1214
1215         tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
1216         MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
1217         MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
1218         MLX5_SET(tirc, tirc, transport_domain, tdn);
1219
1220         err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
1221
1222         kvfree(in);
1223
1224         return err;
1225 }
1226
1227 static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
1228                                       struct mlx5_ib_rq *rq)
1229 {
1230         mlx5_core_destroy_tir(dev->mdev, rq->tirn);
1231 }
1232
1233 static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1234                                 u32 *in,
1235                                 struct ib_pd *pd)
1236 {
1237         struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
1238         struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
1239         struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
1240         struct ib_uobject *uobj = pd->uobject;
1241         struct ib_ucontext *ucontext = uobj->context;
1242         struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
1243         int err;
1244         u32 tdn = mucontext->tdn;
1245
1246         if (qp->sq.wqe_cnt) {
1247                 err = create_raw_packet_qp_tis(dev, sq, tdn);
1248                 if (err)
1249                         return err;
1250
1251                 err = create_raw_packet_qp_sq(dev, sq, in, pd);
1252                 if (err)
1253                         goto err_destroy_tis;
1254
1255                 sq->base.container_mibqp = qp;
1256         }
1257
1258         if (qp->rq.wqe_cnt) {
1259                 rq->base.container_mibqp = qp;
1260
1261                 err = create_raw_packet_qp_rq(dev, rq, in);
1262                 if (err)
1263                         goto err_destroy_sq;
1264
1265
1266                 err = create_raw_packet_qp_tir(dev, rq, tdn);
1267                 if (err)
1268                         goto err_destroy_rq;
1269         }
1270
1271         qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
1272                                                      rq->base.mqp.qpn;
1273
1274         return 0;
1275
1276 err_destroy_rq:
1277         destroy_raw_packet_qp_rq(dev, rq);
1278 err_destroy_sq:
1279         if (!qp->sq.wqe_cnt)
1280                 return err;
1281         destroy_raw_packet_qp_sq(dev, sq);
1282 err_destroy_tis:
1283         destroy_raw_packet_qp_tis(dev, sq);
1284
1285         return err;
1286 }
1287
1288 static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
1289                                   struct mlx5_ib_qp *qp)
1290 {
1291         struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
1292         struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
1293         struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
1294
1295         if (qp->rq.wqe_cnt) {
1296                 destroy_raw_packet_qp_tir(dev, rq);
1297                 destroy_raw_packet_qp_rq(dev, rq);
1298         }
1299
1300         if (qp->sq.wqe_cnt) {
1301                 destroy_raw_packet_qp_sq(dev, sq);
1302                 destroy_raw_packet_qp_tis(dev, sq);
1303         }
1304 }
1305
1306 static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
1307                                     struct mlx5_ib_raw_packet_qp *raw_packet_qp)
1308 {
1309         struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
1310         struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
1311
1312         sq->sq = &qp->sq;
1313         rq->rq = &qp->rq;
1314         sq->doorbell = &qp->db;
1315         rq->doorbell = &qp->db;
1316 }
1317
1318 static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
1319 {
1320         mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn);
1321 }
1322
1323 static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1324                                  struct ib_pd *pd,
1325                                  struct ib_qp_init_attr *init_attr,
1326                                  struct ib_udata *udata)
1327 {
1328         struct ib_uobject *uobj = pd->uobject;
1329         struct ib_ucontext *ucontext = uobj->context;
1330         struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
1331         struct mlx5_ib_create_qp_resp resp = {};
1332         int inlen;
1333         int err;
1334         u32 *in;
1335         void *tirc;
1336         void *hfso;
1337         u32 selected_fields = 0;
1338         size_t min_resp_len;
1339         u32 tdn = mucontext->tdn;
1340         struct mlx5_ib_create_qp_rss ucmd = {};
1341         size_t required_cmd_sz;
1342
1343         if (init_attr->qp_type != IB_QPT_RAW_PACKET)
1344                 return -EOPNOTSUPP;
1345
1346         if (init_attr->create_flags || init_attr->send_cq)
1347                 return -EINVAL;
1348
1349         min_resp_len = offsetof(typeof(resp), uuar_index) + sizeof(resp.uuar_index);
1350         if (udata->outlen < min_resp_len)
1351                 return -EINVAL;
1352
1353         required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1);
1354         if (udata->inlen < required_cmd_sz) {
1355                 mlx5_ib_dbg(dev, "invalid inlen\n");
1356                 return -EINVAL;
1357         }
1358
1359         if (udata->inlen > sizeof(ucmd) &&
1360             !ib_is_udata_cleared(udata, sizeof(ucmd),
1361                                  udata->inlen - sizeof(ucmd))) {
1362                 mlx5_ib_dbg(dev, "inlen is not supported\n");
1363                 return -EOPNOTSUPP;
1364         }
1365
1366         if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
1367                 mlx5_ib_dbg(dev, "copy failed\n");
1368                 return -EFAULT;
1369         }
1370
1371         if (ucmd.comp_mask) {
1372                 mlx5_ib_dbg(dev, "invalid comp mask\n");
1373                 return -EOPNOTSUPP;
1374         }
1375
1376         if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)) || ucmd.reserved1) {
1377                 mlx5_ib_dbg(dev, "invalid reserved\n");
1378                 return -EOPNOTSUPP;
1379         }
1380
1381         err = ib_copy_to_udata(udata, &resp, min_resp_len);
1382         if (err) {
1383                 mlx5_ib_dbg(dev, "copy failed\n");
1384                 return -EINVAL;
1385         }
1386
1387         inlen = MLX5_ST_SZ_BYTES(create_tir_in);
1388         in = mlx5_vzalloc(inlen);
1389         if (!in)
1390                 return -ENOMEM;
1391
1392         tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
1393         MLX5_SET(tirc, tirc, disp_type,
1394                  MLX5_TIRC_DISP_TYPE_INDIRECT);
1395         MLX5_SET(tirc, tirc, indirect_table,
1396                  init_attr->rwq_ind_tbl->ind_tbl_num);
1397         MLX5_SET(tirc, tirc, transport_domain, tdn);
1398
1399         hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1400         switch (ucmd.rx_hash_function) {
1401         case MLX5_RX_HASH_FUNC_TOEPLITZ:
1402         {
1403                 void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1404                 size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
1405
1406                 if (len != ucmd.rx_key_len) {
1407                         err = -EINVAL;
1408                         goto err;
1409                 }
1410
1411                 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FUNC_TOEPLITZ);
1412                 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1413                 memcpy(rss_key, ucmd.rx_hash_key, len);
1414                 break;
1415         }
1416         default:
1417                 err = -EOPNOTSUPP;
1418                 goto err;
1419         }
1420
1421         if (!ucmd.rx_hash_fields_mask) {
1422                 /* special case when this TIR serves as steering entry without hashing */
1423                 if (!init_attr->rwq_ind_tbl->log_ind_tbl_size)
1424                         goto create_tir;
1425                 err = -EINVAL;
1426                 goto err;
1427         }
1428
1429         if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
1430              (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) &&
1431              ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
1432              (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) {
1433                 err = -EINVAL;
1434                 goto err;
1435         }
1436
1437         /* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */
1438         if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
1439             (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4))
1440                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1441                          MLX5_L3_PROT_TYPE_IPV4);
1442         else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
1443                  (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
1444                 MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1445                          MLX5_L3_PROT_TYPE_IPV6);
1446
1447         if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
1448              (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) &&
1449              ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
1450              (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) {
1451                 err = -EINVAL;
1452                 goto err;
1453         }
1454
1455         /* If none of TCP & UDP SRC/DST was set - this bit field is ignored */
1456         if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
1457             (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
1458                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1459                          MLX5_L4_PROT_TYPE_TCP);
1460         else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
1461                  (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
1462                 MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1463                          MLX5_L4_PROT_TYPE_UDP);
1464
1465         if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
1466             (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6))
1467                 selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP;
1468
1469         if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) ||
1470             (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
1471                 selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP;
1472
1473         if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
1474             (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP))
1475                 selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT;
1476
1477         if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) ||
1478             (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
1479                 selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT;
1480
1481         MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
1482
1483 create_tir:
1484         err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
1485
1486         if (err)
1487                 goto err;
1488
1489         kvfree(in);
1490         /* qpn is reserved for that QP */
1491         qp->trans_qp.base.mqp.qpn = 0;
1492         qp->flags |= MLX5_IB_QP_RSS;
1493         return 0;
1494
1495 err:
1496         kvfree(in);
1497         return err;
1498 }
1499
1500 static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1501                             struct ib_qp_init_attr *init_attr,
1502                             struct ib_udata *udata, struct mlx5_ib_qp *qp)
1503 {
1504         struct mlx5_ib_resources *devr = &dev->devr;
1505         int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
1506         struct mlx5_core_dev *mdev = dev->mdev;
1507         struct mlx5_ib_create_qp_resp resp;
1508         struct mlx5_ib_cq *send_cq;
1509         struct mlx5_ib_cq *recv_cq;
1510         unsigned long flags;
1511         u32 uidx = MLX5_IB_DEFAULT_UIDX;
1512         struct mlx5_ib_create_qp ucmd;
1513         struct mlx5_ib_qp_base *base;
1514         void *qpc;
1515         u32 *in;
1516         int err;
1517
1518         base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
1519                &qp->raw_packet_qp.rq.base :
1520                &qp->trans_qp.base;
1521
1522         if (init_attr->qp_type != IB_QPT_RAW_PACKET)
1523                 mlx5_ib_odp_create_qp(qp);
1524
1525         mutex_init(&qp->mutex);
1526         spin_lock_init(&qp->sq.lock);
1527         spin_lock_init(&qp->rq.lock);
1528
1529         if (init_attr->rwq_ind_tbl) {
1530                 if (!udata)
1531                         return -ENOSYS;
1532
1533                 err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata);
1534                 return err;
1535         }
1536
1537         if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
1538                 if (!MLX5_CAP_GEN(mdev, block_lb_mc)) {
1539                         mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
1540                         return -EINVAL;
1541                 } else {
1542                         qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
1543                 }
1544         }
1545
1546         if (init_attr->create_flags &
1547                         (IB_QP_CREATE_CROSS_CHANNEL |
1548                          IB_QP_CREATE_MANAGED_SEND |
1549                          IB_QP_CREATE_MANAGED_RECV)) {
1550                 if (!MLX5_CAP_GEN(mdev, cd)) {
1551                         mlx5_ib_dbg(dev, "cross-channel isn't supported\n");
1552                         return -EINVAL;
1553                 }
1554                 if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
1555                         qp->flags |= MLX5_IB_QP_CROSS_CHANNEL;
1556                 if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
1557                         qp->flags |= MLX5_IB_QP_MANAGED_SEND;
1558                 if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
1559                         qp->flags |= MLX5_IB_QP_MANAGED_RECV;
1560         }
1561
1562         if (init_attr->qp_type == IB_QPT_UD &&
1563             (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO))
1564                 if (!MLX5_CAP_GEN(mdev, ipoib_ipoib_offloads)) {
1565                         mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n");
1566                         return -EOPNOTSUPP;
1567                 }
1568
1569         if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) {
1570                 if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
1571                         mlx5_ib_dbg(dev, "Scatter FCS is supported only for Raw Packet QPs");
1572                         return -EOPNOTSUPP;
1573                 }
1574                 if (!MLX5_CAP_GEN(dev->mdev, eth_net_offloads) ||
1575                     !MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
1576                         mlx5_ib_dbg(dev, "Scatter FCS isn't supported\n");
1577                         return -EOPNOTSUPP;
1578                 }
1579                 qp->flags |= MLX5_IB_QP_CAP_SCATTER_FCS;
1580         }
1581
1582         if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
1583                 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
1584
1585         if (pd && pd->uobject) {
1586                 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
1587                         mlx5_ib_dbg(dev, "copy failed\n");
1588                         return -EFAULT;
1589                 }
1590
1591                 err = get_qp_user_index(to_mucontext(pd->uobject->context),
1592                                         &ucmd, udata->inlen, &uidx);
1593                 if (err)
1594                         return err;
1595
1596                 qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
1597                 qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
1598         } else {
1599                 qp->wq_sig = !!wq_signature;
1600         }
1601
1602         qp->has_rq = qp_has_rq(init_attr);
1603         err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
1604                           qp, (pd && pd->uobject) ? &ucmd : NULL);
1605         if (err) {
1606                 mlx5_ib_dbg(dev, "err %d\n", err);
1607                 return err;
1608         }
1609
1610         if (pd) {
1611                 if (pd->uobject) {
1612                         __u32 max_wqes =
1613                                 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
1614                         mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
1615                         if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
1616                             ucmd.rq_wqe_count != qp->rq.wqe_cnt) {
1617                                 mlx5_ib_dbg(dev, "invalid rq params\n");
1618                                 return -EINVAL;
1619                         }
1620                         if (ucmd.sq_wqe_count > max_wqes) {
1621                                 mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
1622                                             ucmd.sq_wqe_count, max_wqes);
1623                                 return -EINVAL;
1624                         }
1625                         if (init_attr->create_flags &
1626                             mlx5_ib_create_qp_sqpn_qp1()) {
1627                                 mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n");
1628                                 return -EINVAL;
1629                         }
1630                         err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
1631                                              &resp, &inlen, base);
1632                         if (err)
1633                                 mlx5_ib_dbg(dev, "err %d\n", err);
1634                 } else {
1635                         err = create_kernel_qp(dev, init_attr, qp, &in, &inlen,
1636                                                base);
1637                         if (err)
1638                                 mlx5_ib_dbg(dev, "err %d\n", err);
1639                 }
1640
1641                 if (err)
1642                         return err;
1643         } else {
1644                 in = mlx5_vzalloc(inlen);
1645                 if (!in)
1646                         return -ENOMEM;
1647
1648                 qp->create_type = MLX5_QP_EMPTY;
1649         }
1650
1651         if (is_sqp(init_attr->qp_type))
1652                 qp->port = init_attr->port_num;
1653
1654         qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
1655
1656         MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type));
1657         MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
1658
1659         if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
1660                 MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn);
1661         else
1662                 MLX5_SET(qpc, qpc, latency_sensitive, 1);
1663
1664
1665         if (qp->wq_sig)
1666                 MLX5_SET(qpc, qpc, wq_signature, 1);
1667
1668         if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
1669                 MLX5_SET(qpc, qpc, block_lb_mc, 1);
1670
1671         if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
1672                 MLX5_SET(qpc, qpc, cd_master, 1);
1673         if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
1674                 MLX5_SET(qpc, qpc, cd_slave_send, 1);
1675         if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
1676                 MLX5_SET(qpc, qpc, cd_slave_receive, 1);
1677
1678         if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
1679                 int rcqe_sz;
1680                 int scqe_sz;
1681
1682                 rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq);
1683                 scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
1684
1685                 if (rcqe_sz == 128)
1686                         MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
1687                 else
1688                         MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
1689
1690                 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
1691                         if (scqe_sz == 128)
1692                                 MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
1693                         else
1694                                 MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
1695                 }
1696         }
1697
1698         if (qp->rq.wqe_cnt) {
1699                 MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
1700                 MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
1701         }
1702
1703         MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
1704
1705         if (qp->sq.wqe_cnt)
1706                 MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
1707         else
1708                 MLX5_SET(qpc, qpc, no_sq, 1);
1709
1710         /* Set default resources */
1711         switch (init_attr->qp_type) {
1712         case IB_QPT_XRC_TGT:
1713                 MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
1714                 MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn);
1715                 MLX5_SET(qpc, qpc, srqn_rmpn, to_msrq(devr->s0)->msrq.srqn);
1716                 MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn);
1717                 break;
1718         case IB_QPT_XRC_INI:
1719                 MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
1720                 MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
1721                 MLX5_SET(qpc, qpc, srqn_rmpn, to_msrq(devr->s0)->msrq.srqn);
1722                 break;
1723         default:
1724                 if (init_attr->srq) {
1725                         MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
1726                         MLX5_SET(qpc, qpc, srqn_rmpn, to_msrq(init_attr->srq)->msrq.srqn);
1727                 } else {
1728                         MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
1729                         MLX5_SET(qpc, qpc, srqn_rmpn, to_msrq(devr->s1)->msrq.srqn);
1730                 }
1731         }
1732
1733         if (init_attr->send_cq)
1734                 MLX5_SET(qpc, qpc, cqn_snd, to_mcq(init_attr->send_cq)->mcq.cqn);
1735
1736         if (init_attr->recv_cq)
1737                 MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(init_attr->recv_cq)->mcq.cqn);
1738
1739         MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
1740
1741         /* 0xffffff means we ask to work with cqe version 0 */
1742         if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
1743                 MLX5_SET(qpc, qpc, user_index, uidx);
1744
1745         /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
1746         if (init_attr->qp_type == IB_QPT_UD &&
1747             (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) {
1748                 MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
1749                 qp->flags |= MLX5_IB_QP_LSO;
1750         }
1751
1752         if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
1753                 qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
1754                 raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
1755                 err = create_raw_packet_qp(dev, qp, in, pd);
1756         } else {
1757                 err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
1758         }
1759
1760         if (err) {
1761                 mlx5_ib_dbg(dev, "create qp failed\n");
1762                 goto err_create;
1763         }
1764
1765         kvfree(in);
1766
1767         base->container_mibqp = qp;
1768         base->mqp.event = mlx5_ib_qp_event;
1769
1770         get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq,
1771                 &send_cq, &recv_cq);
1772         spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
1773         mlx5_ib_lock_cqs(send_cq, recv_cq);
1774         /* Maintain device to QPs access, needed for further handling via reset
1775          * flow
1776          */
1777         list_add_tail(&qp->qps_list, &dev->qp_list);
1778         /* Maintain CQ to QPs access, needed for further handling via reset flow
1779          */
1780         if (send_cq)
1781                 list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
1782         if (recv_cq)
1783                 list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
1784         mlx5_ib_unlock_cqs(send_cq, recv_cq);
1785         spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
1786
1787         return 0;
1788
1789 err_create:
1790         if (qp->create_type == MLX5_QP_USER)
1791                 destroy_qp_user(pd, qp, base);
1792         else if (qp->create_type == MLX5_QP_KERNEL)
1793                 destroy_qp_kernel(dev, qp);
1794
1795         kvfree(in);
1796         return err;
1797 }
1798
1799 static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
1800         __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
1801 {
1802         if (send_cq) {
1803                 if (recv_cq) {
1804                         if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
1805                                 spin_lock(&send_cq->lock);
1806                                 spin_lock_nested(&recv_cq->lock,
1807                                                  SINGLE_DEPTH_NESTING);
1808                         } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
1809                                 spin_lock(&send_cq->lock);
1810                                 __acquire(&recv_cq->lock);
1811                         } else {
1812                                 spin_lock(&recv_cq->lock);
1813                                 spin_lock_nested(&send_cq->lock,
1814                                                  SINGLE_DEPTH_NESTING);
1815                         }
1816                 } else {
1817                         spin_lock(&send_cq->lock);
1818                         __acquire(&recv_cq->lock);
1819                 }
1820         } else if (recv_cq) {
1821                 spin_lock(&recv_cq->lock);
1822                 __acquire(&send_cq->lock);
1823         } else {
1824                 __acquire(&send_cq->lock);
1825                 __acquire(&recv_cq->lock);
1826         }
1827 }
1828
1829 static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
1830         __releases(&send_cq->lock) __releases(&recv_cq->lock)
1831 {
1832         if (send_cq) {
1833                 if (recv_cq) {
1834                         if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
1835                                 spin_unlock(&recv_cq->lock);
1836                                 spin_unlock(&send_cq->lock);
1837                         } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
1838                                 __release(&recv_cq->lock);
1839                                 spin_unlock(&send_cq->lock);
1840                         } else {
1841                                 spin_unlock(&send_cq->lock);
1842                                 spin_unlock(&recv_cq->lock);
1843                         }
1844                 } else {
1845                         __release(&recv_cq->lock);
1846                         spin_unlock(&send_cq->lock);
1847                 }
1848         } else if (recv_cq) {
1849                 __release(&send_cq->lock);
1850                 spin_unlock(&recv_cq->lock);
1851         } else {
1852                 __release(&recv_cq->lock);
1853                 __release(&send_cq->lock);
1854         }
1855 }
1856
1857 static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
1858 {
1859         return to_mpd(qp->ibqp.pd);
1860 }
1861
1862 static void get_cqs(enum ib_qp_type qp_type,
1863                     struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
1864                     struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
1865 {
1866         switch (qp_type) {
1867         case IB_QPT_XRC_TGT:
1868                 *send_cq = NULL;
1869                 *recv_cq = NULL;
1870                 break;
1871         case MLX5_IB_QPT_REG_UMR:
1872         case IB_QPT_XRC_INI:
1873                 *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
1874                 *recv_cq = NULL;
1875                 break;
1876
1877         case IB_QPT_SMI:
1878         case MLX5_IB_QPT_HW_GSI:
1879         case IB_QPT_RC:
1880         case IB_QPT_UC:
1881         case IB_QPT_UD:
1882         case IB_QPT_RAW_IPV6:
1883         case IB_QPT_RAW_ETHERTYPE:
1884         case IB_QPT_RAW_PACKET:
1885                 *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
1886                 *recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL;
1887                 break;
1888
1889         case IB_QPT_MAX:
1890         default:
1891                 *send_cq = NULL;
1892                 *recv_cq = NULL;
1893                 break;
1894         }
1895 }
1896
1897 static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1898                                 const struct mlx5_modify_raw_qp_param *raw_qp_param,
1899                                 u8 lag_tx_affinity);
1900
1901 static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
1902 {
1903         struct mlx5_ib_cq *send_cq, *recv_cq;
1904         struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
1905         unsigned long flags;
1906         int err;
1907
1908         if (qp->ibqp.rwq_ind_tbl) {
1909                 destroy_rss_raw_qp_tir(dev, qp);
1910                 return;
1911         }
1912
1913         base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
1914                &qp->raw_packet_qp.rq.base :
1915                &qp->trans_qp.base;
1916
1917         if (qp->state != IB_QPS_RESET) {
1918                 if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
1919                         mlx5_ib_qp_disable_pagefaults(qp);
1920                         err = mlx5_core_qp_modify(dev->mdev,
1921                                                   MLX5_CMD_OP_2RST_QP, 0,
1922                                                   NULL, &base->mqp);
1923                 } else {
1924                         struct mlx5_modify_raw_qp_param raw_qp_param = {
1925                                 .operation = MLX5_CMD_OP_2RST_QP
1926                         };
1927
1928                         err = modify_raw_packet_qp(dev, qp, &raw_qp_param, 0);
1929                 }
1930                 if (err)
1931                         mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
1932                                      base->mqp.qpn);
1933         }
1934
1935         get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
1936                 &send_cq, &recv_cq);
1937
1938         spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
1939         mlx5_ib_lock_cqs(send_cq, recv_cq);
1940         /* del from lists under both locks above to protect reset flow paths */
1941         list_del(&qp->qps_list);
1942         if (send_cq)
1943                 list_del(&qp->cq_send_list);
1944
1945         if (recv_cq)
1946                 list_del(&qp->cq_recv_list);
1947
1948         if (qp->create_type == MLX5_QP_KERNEL) {
1949                 __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
1950                                    qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1951                 if (send_cq != recv_cq)
1952                         __mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
1953                                            NULL);
1954         }
1955         mlx5_ib_unlock_cqs(send_cq, recv_cq);
1956         spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
1957
1958         if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
1959                 destroy_raw_packet_qp(dev, qp);
1960         } else {
1961                 err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
1962                 if (err)
1963                         mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
1964                                      base->mqp.qpn);
1965         }
1966
1967         if (qp->create_type == MLX5_QP_KERNEL)
1968                 destroy_qp_kernel(dev, qp);
1969         else if (qp->create_type == MLX5_QP_USER)
1970                 destroy_qp_user(&get_pd(qp)->ibpd, qp, base);
1971 }
1972
1973 static const char *ib_qp_type_str(enum ib_qp_type type)
1974 {
1975         switch (type) {
1976         case IB_QPT_SMI:
1977                 return "IB_QPT_SMI";
1978         case IB_QPT_GSI:
1979                 return "IB_QPT_GSI";
1980         case IB_QPT_RC:
1981                 return "IB_QPT_RC";
1982         case IB_QPT_UC:
1983                 return "IB_QPT_UC";
1984         case IB_QPT_UD:
1985                 return "IB_QPT_UD";
1986         case IB_QPT_RAW_IPV6:
1987                 return "IB_QPT_RAW_IPV6";
1988         case IB_QPT_RAW_ETHERTYPE:
1989                 return "IB_QPT_RAW_ETHERTYPE";
1990         case IB_QPT_XRC_INI:
1991                 return "IB_QPT_XRC_INI";
1992         case IB_QPT_XRC_TGT:
1993                 return "IB_QPT_XRC_TGT";
1994         case IB_QPT_RAW_PACKET:
1995                 return "IB_QPT_RAW_PACKET";
1996         case MLX5_IB_QPT_REG_UMR:
1997                 return "MLX5_IB_QPT_REG_UMR";
1998         case IB_QPT_MAX:
1999         default:
2000                 return "Invalid QP type";
2001         }
2002 }
2003
2004 struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
2005                                 struct ib_qp_init_attr *init_attr,
2006                                 struct ib_udata *udata)
2007 {
2008         struct mlx5_ib_dev *dev;
2009         struct mlx5_ib_qp *qp;
2010         u16 xrcdn = 0;
2011         int err;
2012
2013         if (pd) {
2014                 dev = to_mdev(pd->device);
2015
2016                 if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
2017                         if (!pd->uobject) {
2018                                 mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
2019                                 return ERR_PTR(-EINVAL);
2020                         } else if (!to_mucontext(pd->uobject->context)->cqe_version) {
2021                                 mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
2022                                 return ERR_PTR(-EINVAL);
2023                         }
2024                 }
2025         } else {
2026                 /* being cautious here */
2027                 if (init_attr->qp_type != IB_QPT_XRC_TGT &&
2028                     init_attr->qp_type != MLX5_IB_QPT_REG_UMR) {
2029                         pr_warn("%s: no PD for transport %s\n", __func__,
2030                                 ib_qp_type_str(init_attr->qp_type));
2031                         return ERR_PTR(-EINVAL);
2032                 }
2033                 dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
2034         }
2035
2036         switch (init_attr->qp_type) {
2037         case IB_QPT_XRC_TGT:
2038         case IB_QPT_XRC_INI:
2039                 if (!MLX5_CAP_GEN(dev->mdev, xrc)) {
2040                         mlx5_ib_dbg(dev, "XRC not supported\n");
2041                         return ERR_PTR(-ENOSYS);
2042                 }
2043                 init_attr->recv_cq = NULL;
2044                 if (init_attr->qp_type == IB_QPT_XRC_TGT) {
2045                         xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
2046                         init_attr->send_cq = NULL;
2047                 }
2048
2049                 /* fall through */
2050         case IB_QPT_RAW_PACKET:
2051         case IB_QPT_RC:
2052         case IB_QPT_UC:
2053         case IB_QPT_UD:
2054         case IB_QPT_SMI:
2055         case MLX5_IB_QPT_HW_GSI:
2056         case MLX5_IB_QPT_REG_UMR:
2057                 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
2058                 if (!qp)
2059                         return ERR_PTR(-ENOMEM);
2060
2061                 err = create_qp_common(dev, pd, init_attr, udata, qp);
2062                 if (err) {
2063                         mlx5_ib_dbg(dev, "create_qp_common failed\n");
2064                         kfree(qp);
2065                         return ERR_PTR(err);
2066                 }
2067
2068                 if (is_qp0(init_attr->qp_type))
2069                         qp->ibqp.qp_num = 0;
2070                 else if (is_qp1(init_attr->qp_type))
2071                         qp->ibqp.qp_num = 1;
2072                 else
2073                         qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
2074
2075                 mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
2076                             qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
2077                             init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1,
2078                             init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1);
2079
2080                 qp->trans_qp.xrcdn = xrcdn;
2081
2082                 break;
2083
2084         case IB_QPT_GSI:
2085                 return mlx5_ib_gsi_create_qp(pd, init_attr);
2086
2087         case IB_QPT_RAW_IPV6:
2088         case IB_QPT_RAW_ETHERTYPE:
2089         case IB_QPT_MAX:
2090         default:
2091                 mlx5_ib_dbg(dev, "unsupported qp type %d\n",
2092                             init_attr->qp_type);
2093                 /* Don't support raw QPs */
2094                 return ERR_PTR(-EINVAL);
2095         }
2096
2097         return &qp->ibqp;
2098 }
2099
2100 int mlx5_ib_destroy_qp(struct ib_qp *qp)
2101 {
2102         struct mlx5_ib_dev *dev = to_mdev(qp->device);
2103         struct mlx5_ib_qp *mqp = to_mqp(qp);
2104
2105         if (unlikely(qp->qp_type == IB_QPT_GSI))
2106                 return mlx5_ib_gsi_destroy_qp(qp);
2107
2108         destroy_qp_common(dev, mqp);
2109
2110         kfree(mqp);
2111
2112         return 0;
2113 }
2114
2115 static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
2116                                    int attr_mask)
2117 {
2118         u32 hw_access_flags = 0;
2119         u8 dest_rd_atomic;
2120         u32 access_flags;
2121
2122         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
2123                 dest_rd_atomic = attr->max_dest_rd_atomic;
2124         else
2125                 dest_rd_atomic = qp->trans_qp.resp_depth;
2126
2127         if (attr_mask & IB_QP_ACCESS_FLAGS)
2128                 access_flags = attr->qp_access_flags;
2129         else
2130                 access_flags = qp->trans_qp.atomic_rd_en;
2131
2132         if (!dest_rd_atomic)
2133                 access_flags &= IB_ACCESS_REMOTE_WRITE;
2134
2135         if (access_flags & IB_ACCESS_REMOTE_READ)
2136                 hw_access_flags |= MLX5_QP_BIT_RRE;
2137         if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
2138                 hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX);
2139         if (access_flags & IB_ACCESS_REMOTE_WRITE)
2140                 hw_access_flags |= MLX5_QP_BIT_RWE;
2141
2142         return cpu_to_be32(hw_access_flags);
2143 }
2144
2145 enum {
2146         MLX5_PATH_FLAG_FL       = 1 << 0,
2147         MLX5_PATH_FLAG_FREE_AR  = 1 << 1,
2148         MLX5_PATH_FLAG_COUNTER  = 1 << 2,
2149 };
2150
2151 static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
2152 {
2153         if (rate == IB_RATE_PORT_CURRENT) {
2154                 return 0;
2155         } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS) {
2156                 return -EINVAL;
2157         } else {
2158                 while (rate != IB_RATE_2_5_GBPS &&
2159                        !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
2160                          MLX5_CAP_GEN(dev->mdev, stat_rate_support)))
2161                         --rate;
2162         }
2163
2164         return rate + MLX5_STAT_RATE_OFFSET;
2165 }
2166
2167 static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
2168                                       struct mlx5_ib_sq *sq, u8 sl)
2169 {
2170         void *in;
2171         void *tisc;
2172         int inlen;
2173         int err;
2174
2175         inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
2176         in = mlx5_vzalloc(inlen);
2177         if (!in)
2178                 return -ENOMEM;
2179
2180         MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
2181
2182         tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
2183         MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
2184
2185         err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
2186
2187         kvfree(in);
2188
2189         return err;
2190 }
2191
2192 static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
2193                                          struct mlx5_ib_sq *sq, u8 tx_affinity)
2194 {
2195         void *in;
2196         void *tisc;
2197         int inlen;
2198         int err;
2199
2200         inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
2201         in = mlx5_vzalloc(inlen);
2202         if (!in)
2203                 return -ENOMEM;
2204
2205         MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1);
2206
2207         tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
2208         MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity);
2209
2210         err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
2211
2212         kvfree(in);
2213
2214         return err;
2215 }
2216
2217 static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2218                          const struct ib_ah_attr *ah,
2219                          struct mlx5_qp_path *path, u8 port, int attr_mask,
2220                          u32 path_flags, const struct ib_qp_attr *attr,
2221                          bool alt)
2222 {
2223         enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
2224         int err;
2225         enum ib_gid_type gid_type;
2226
2227         if (attr_mask & IB_QP_PKEY_INDEX)
2228                 path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index :
2229                                                      attr->pkey_index);
2230
2231         if (ah->ah_flags & IB_AH_GRH) {
2232                 if (ah->grh.sgid_index >=
2233                     dev->mdev->port_caps[port - 1].gid_table_len) {
2234                         pr_err("sgid_index (%u) too large. max is %d\n",
2235                                ah->grh.sgid_index,
2236                                dev->mdev->port_caps[port - 1].gid_table_len);
2237                         return -EINVAL;
2238                 }
2239         }
2240
2241         if (ll == IB_LINK_LAYER_ETHERNET) {
2242                 if (!(ah->ah_flags & IB_AH_GRH))
2243                         return -EINVAL;
2244                 err = mlx5_get_roce_gid_type(dev, port, ah->grh.sgid_index,
2245                                              &gid_type);
2246                 if (err)
2247                         return err;
2248                 memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
2249                 path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
2250                                                           ah->grh.sgid_index);
2251                 path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
2252                 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
2253                         path->ecn_dscp = (ah->grh.traffic_class >> 2) & 0x3f;
2254         } else {
2255                 path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
2256                 path->fl_free_ar |=
2257                         (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0;
2258                 path->rlid = cpu_to_be16(ah->dlid);
2259                 path->grh_mlid = ah->src_path_bits & 0x7f;
2260                 if (ah->ah_flags & IB_AH_GRH)
2261                         path->grh_mlid  |= 1 << 7;
2262                 path->dci_cfi_prio_sl = ah->sl & 0xf;
2263         }
2264
2265         if (ah->ah_flags & IB_AH_GRH) {
2266                 path->mgid_index = ah->grh.sgid_index;
2267                 path->hop_limit  = ah->grh.hop_limit;
2268                 path->tclass_flowlabel =
2269                         cpu_to_be32((ah->grh.traffic_class << 20) |
2270                                     (ah->grh.flow_label));
2271                 memcpy(path->rgid, ah->grh.dgid.raw, 16);
2272         }
2273
2274         err = ib_rate_to_mlx5(dev, ah->static_rate);
2275         if (err < 0)
2276                 return err;
2277         path->static_rate = err;
2278         path->port = port;
2279
2280         if (attr_mask & IB_QP_TIMEOUT)
2281                 path->ackto_lt = (alt ? attr->alt_timeout : attr->timeout) << 3;
2282
2283         if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
2284                 return modify_raw_packet_eth_prio(dev->mdev,
2285                                                   &qp->raw_packet_qp.sq,
2286                                                   ah->sl & 0xf);
2287
2288         return 0;
2289 }
2290
2291 static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = {
2292         [MLX5_QP_STATE_INIT] = {
2293                 [MLX5_QP_STATE_INIT] = {
2294                         [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE            |
2295                                           MLX5_QP_OPTPAR_RAE            |
2296                                           MLX5_QP_OPTPAR_RWE            |
2297                                           MLX5_QP_OPTPAR_PKEY_INDEX     |
2298                                           MLX5_QP_OPTPAR_PRI_PORT,
2299                         [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE            |
2300                                           MLX5_QP_OPTPAR_PKEY_INDEX     |
2301                                           MLX5_QP_OPTPAR_PRI_PORT,
2302                         [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX     |
2303                                           MLX5_QP_OPTPAR_Q_KEY          |
2304                                           MLX5_QP_OPTPAR_PRI_PORT,
2305                 },
2306                 [MLX5_QP_STATE_RTR] = {
2307                         [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
2308                                           MLX5_QP_OPTPAR_RRE            |
2309                                           MLX5_QP_OPTPAR_RAE            |
2310                                           MLX5_QP_OPTPAR_RWE            |
2311                                           MLX5_QP_OPTPAR_PKEY_INDEX,
2312                         [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
2313                                           MLX5_QP_OPTPAR_RWE            |
2314                                           MLX5_QP_OPTPAR_PKEY_INDEX,
2315                         [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX     |
2316                                           MLX5_QP_OPTPAR_Q_KEY,
2317                         [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX    |
2318                                            MLX5_QP_OPTPAR_Q_KEY,
2319                         [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
2320                                           MLX5_QP_OPTPAR_RRE            |
2321                                           MLX5_QP_OPTPAR_RAE            |
2322                                           MLX5_QP_OPTPAR_RWE            |
2323                                           MLX5_QP_OPTPAR_PKEY_INDEX,
2324                 },
2325         },
2326         [MLX5_QP_STATE_RTR] = {
2327                 [MLX5_QP_STATE_RTS] = {
2328                         [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
2329                                           MLX5_QP_OPTPAR_RRE            |
2330                                           MLX5_QP_OPTPAR_RAE            |
2331                                           MLX5_QP_OPTPAR_RWE            |
2332                                           MLX5_QP_OPTPAR_PM_STATE       |
2333                                           MLX5_QP_OPTPAR_RNR_TIMEOUT,
2334                         [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
2335                                           MLX5_QP_OPTPAR_RWE            |
2336                                           MLX5_QP_OPTPAR_PM_STATE,
2337                         [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
2338                 },
2339         },
2340         [MLX5_QP_STATE_RTS] = {
2341                 [MLX5_QP_STATE_RTS] = {
2342                         [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE            |
2343                                           MLX5_QP_OPTPAR_RAE            |
2344                                           MLX5_QP_OPTPAR_RWE            |
2345                                           MLX5_QP_OPTPAR_RNR_TIMEOUT    |
2346                                           MLX5_QP_OPTPAR_PM_STATE       |
2347                                           MLX5_QP_OPTPAR_ALT_ADDR_PATH,
2348                         [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE            |
2349                                           MLX5_QP_OPTPAR_PM_STATE       |
2350                                           MLX5_QP_OPTPAR_ALT_ADDR_PATH,
2351                         [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY          |
2352                                           MLX5_QP_OPTPAR_SRQN           |
2353                                           MLX5_QP_OPTPAR_CQN_RCV,
2354                 },
2355         },
2356         [MLX5_QP_STATE_SQER] = {
2357                 [MLX5_QP_STATE_RTS] = {
2358                         [MLX5_QP_ST_UD]  = MLX5_QP_OPTPAR_Q_KEY,
2359                         [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
2360                         [MLX5_QP_ST_UC]  = MLX5_QP_OPTPAR_RWE,
2361                         [MLX5_QP_ST_RC]  = MLX5_QP_OPTPAR_RNR_TIMEOUT   |
2362                                            MLX5_QP_OPTPAR_RWE           |
2363                                            MLX5_QP_OPTPAR_RAE           |
2364                                            MLX5_QP_OPTPAR_RRE,
2365                 },
2366         },
2367 };
2368
2369 static int ib_nr_to_mlx5_nr(int ib_mask)
2370 {
2371         switch (ib_mask) {
2372         case IB_QP_STATE:
2373                 return 0;
2374         case IB_QP_CUR_STATE:
2375                 return 0;
2376         case IB_QP_EN_SQD_ASYNC_NOTIFY:
2377                 return 0;
2378         case IB_QP_ACCESS_FLAGS:
2379                 return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE |
2380                         MLX5_QP_OPTPAR_RAE;
2381         case IB_QP_PKEY_INDEX:
2382                 return MLX5_QP_OPTPAR_PKEY_INDEX;
2383         case IB_QP_PORT:
2384                 return MLX5_QP_OPTPAR_PRI_PORT;
2385         case IB_QP_QKEY:
2386                 return MLX5_QP_OPTPAR_Q_KEY;
2387         case IB_QP_AV:
2388                 return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH |
2389                         MLX5_QP_OPTPAR_PRI_PORT;
2390         case IB_QP_PATH_MTU:
2391                 return 0;
2392         case IB_QP_TIMEOUT:
2393                 return MLX5_QP_OPTPAR_ACK_TIMEOUT;
2394         case IB_QP_RETRY_CNT:
2395                 return MLX5_QP_OPTPAR_RETRY_COUNT;
2396         case IB_QP_RNR_RETRY:
2397                 return MLX5_QP_OPTPAR_RNR_RETRY;
2398         case IB_QP_RQ_PSN:
2399                 return 0;
2400         case IB_QP_MAX_QP_RD_ATOMIC:
2401                 return MLX5_QP_OPTPAR_SRA_MAX;
2402         case IB_QP_ALT_PATH:
2403                 return MLX5_QP_OPTPAR_ALT_ADDR_PATH;
2404         case IB_QP_MIN_RNR_TIMER:
2405                 return MLX5_QP_OPTPAR_RNR_TIMEOUT;
2406         case IB_QP_SQ_PSN:
2407                 return 0;
2408         case IB_QP_MAX_DEST_RD_ATOMIC:
2409                 return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE |
2410                         MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE;
2411         case IB_QP_PATH_MIG_STATE:
2412                 return MLX5_QP_OPTPAR_PM_STATE;
2413         case IB_QP_CAP:
2414                 return 0;
2415         case IB_QP_DEST_QPN:
2416                 return 0;
2417         }
2418         return 0;
2419 }
2420
2421 static int ib_mask_to_mlx5_opt(int ib_mask)
2422 {
2423         int result = 0;
2424         int i;
2425
2426         for (i = 0; i < 8 * sizeof(int); i++) {
2427                 if ((1 << i) & ib_mask)
2428                         result |= ib_nr_to_mlx5_nr(1 << i);
2429         }
2430
2431         return result;
2432 }
2433
2434 static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
2435                                    struct mlx5_ib_rq *rq, int new_state,
2436                                    const struct mlx5_modify_raw_qp_param *raw_qp_param)
2437 {
2438         void *in;
2439         void *rqc;
2440         int inlen;
2441         int err;
2442
2443         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
2444         in = mlx5_vzalloc(inlen);
2445         if (!in)
2446                 return -ENOMEM;
2447
2448         MLX5_SET(modify_rq_in, in, rqn, rq->base.mqp.qpn);
2449         MLX5_SET(modify_rq_in, in, rq_state, rq->state);
2450
2451         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
2452         MLX5_SET(rqc, rqc, state, new_state);
2453
2454         if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) {
2455                 if (MLX5_CAP_GEN(dev->mdev, modify_rq_counters_set_id)) {
2456                         MLX5_SET64(modify_rq_in, in, modify_bitmask,
2457                                    MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID);
2458                         MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
2459                 } else
2460                         pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n",
2461                                      dev->ib_dev.name);
2462         }
2463
2464         err = mlx5_core_modify_rq(dev->mdev, in, inlen);
2465         if (err)
2466                 goto out;
2467
2468         rq->state = new_state;
2469
2470 out:
2471         kvfree(in);
2472         return err;
2473 }
2474
2475 static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
2476                                    struct mlx5_ib_sq *sq, int new_state)
2477 {
2478         void *in;
2479         void *sqc;
2480         int inlen;
2481         int err;
2482
2483         inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
2484         in = mlx5_vzalloc(inlen);
2485         if (!in)
2486                 return -ENOMEM;
2487
2488         MLX5_SET(modify_sq_in, in, sqn, sq->base.mqp.qpn);
2489         MLX5_SET(modify_sq_in, in, sq_state, sq->state);
2490
2491         sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
2492         MLX5_SET(sqc, sqc, state, new_state);
2493
2494         err = mlx5_core_modify_sq(dev, in, inlen);
2495         if (err)
2496                 goto out;
2497
2498         sq->state = new_state;
2499
2500 out:
2501         kvfree(in);
2502         return err;
2503 }
2504
2505 static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2506                                 const struct mlx5_modify_raw_qp_param *raw_qp_param,
2507                                 u8 tx_affinity)
2508 {
2509         struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
2510         struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
2511         struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
2512         int rq_state;
2513         int sq_state;
2514         int err;
2515
2516         switch (raw_qp_param->operation) {
2517         case MLX5_CMD_OP_RST2INIT_QP:
2518                 rq_state = MLX5_RQC_STATE_RDY;
2519                 sq_state = MLX5_SQC_STATE_RDY;
2520                 break;
2521         case MLX5_CMD_OP_2ERR_QP:
2522                 rq_state = MLX5_RQC_STATE_ERR;
2523                 sq_state = MLX5_SQC_STATE_ERR;
2524                 break;
2525         case MLX5_CMD_OP_2RST_QP:
2526                 rq_state = MLX5_RQC_STATE_RST;
2527                 sq_state = MLX5_SQC_STATE_RST;
2528                 break;
2529         case MLX5_CMD_OP_INIT2INIT_QP:
2530         case MLX5_CMD_OP_INIT2RTR_QP:
2531         case MLX5_CMD_OP_RTR2RTS_QP:
2532         case MLX5_CMD_OP_RTS2RTS_QP:
2533                 if (raw_qp_param->set_mask)
2534                         return -EINVAL;
2535                 else
2536                         return 0;
2537         default:
2538                 WARN_ON(1);
2539                 return -EINVAL;
2540         }
2541
2542         if (qp->rq.wqe_cnt) {
2543                 err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param);
2544                 if (err)
2545                         return err;
2546         }
2547
2548         if (qp->sq.wqe_cnt) {
2549                 if (tx_affinity) {
2550                         err = modify_raw_packet_tx_affinity(dev->mdev, sq,
2551                                                             tx_affinity);
2552                         if (err)
2553                                 return err;
2554                 }
2555
2556                 return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
2557         }
2558
2559         return 0;
2560 }
2561
2562 static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
2563                                const struct ib_qp_attr *attr, int attr_mask,
2564                                enum ib_qp_state cur_state, enum ib_qp_state new_state)
2565 {
2566         static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
2567                 [MLX5_QP_STATE_RST] = {
2568                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
2569                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
2570                         [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_RST2INIT_QP,
2571                 },
2572                 [MLX5_QP_STATE_INIT]  = {
2573                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
2574                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
2575                         [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_INIT2INIT_QP,
2576                         [MLX5_QP_STATE_RTR]     = MLX5_CMD_OP_INIT2RTR_QP,
2577                 },
2578                 [MLX5_QP_STATE_RTR]   = {
2579                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
2580                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
2581                         [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTR2RTS_QP,
2582                 },
2583                 [MLX5_QP_STATE_RTS]   = {
2584                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
2585                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
2586                         [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTS2RTS_QP,
2587                 },
2588                 [MLX5_QP_STATE_SQD] = {
2589                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
2590                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
2591                 },
2592                 [MLX5_QP_STATE_SQER] = {
2593                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
2594                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
2595                         [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_SQERR2RTS_QP,
2596                 },
2597                 [MLX5_QP_STATE_ERR] = {
2598                         [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
2599                         [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
2600                 }
2601         };
2602
2603         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2604         struct mlx5_ib_qp *qp = to_mqp(ibqp);
2605         struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
2606         struct mlx5_ib_cq *send_cq, *recv_cq;
2607         struct mlx5_qp_context *context;
2608         struct mlx5_ib_pd *pd;
2609         struct mlx5_ib_port *mibport = NULL;
2610         enum mlx5_qp_state mlx5_cur, mlx5_new;
2611         enum mlx5_qp_optpar optpar;
2612         int sqd_event;
2613         int mlx5_st;
2614         int err;
2615         u16 op;
2616
2617         context = kzalloc(sizeof(*context), GFP_KERNEL);
2618         if (!context)
2619                 return -ENOMEM;
2620
2621         err = to_mlx5_st(ibqp->qp_type);
2622         if (err < 0) {
2623                 mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
2624                 goto out;
2625         }
2626
2627         context->flags = cpu_to_be32(err << 16);
2628
2629         if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
2630                 context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
2631         } else {
2632                 switch (attr->path_mig_state) {
2633                 case IB_MIG_MIGRATED:
2634                         context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
2635                         break;
2636                 case IB_MIG_REARM:
2637                         context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11);
2638                         break;
2639                 case IB_MIG_ARMED:
2640                         context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11);
2641                         break;
2642                 }
2643         }
2644
2645         if (is_sqp(ibqp->qp_type)) {
2646                 context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
2647         } else if (ibqp->qp_type == IB_QPT_UD ||
2648                    ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
2649                 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
2650         } else if (attr_mask & IB_QP_PATH_MTU) {
2651                 if (attr->path_mtu < IB_MTU_256 ||
2652                     attr->path_mtu > IB_MTU_4096) {
2653                         mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu);
2654                         err = -EINVAL;
2655                         goto out;
2656                 }
2657                 context->mtu_msgmax = (attr->path_mtu << 5) |
2658                                       (u8)MLX5_CAP_GEN(dev->mdev, log_max_msg);
2659         }
2660
2661         if (attr_mask & IB_QP_DEST_QPN)
2662                 context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
2663
2664         if (attr_mask & IB_QP_PKEY_INDEX)
2665                 context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index);
2666
2667         /* todo implement counter_index functionality */
2668
2669         if (is_sqp(ibqp->qp_type))
2670                 context->pri_path.port = qp->port;
2671
2672         if (attr_mask & IB_QP_PORT)
2673                 context->pri_path.port = attr->port_num;
2674
2675         if (attr_mask & IB_QP_AV) {
2676                 err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
2677                                     attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
2678                                     attr_mask, 0, attr, false);
2679                 if (err)
2680                         goto out;
2681         }
2682
2683         if (attr_mask & IB_QP_TIMEOUT)
2684                 context->pri_path.ackto_lt |= attr->timeout << 3;
2685
2686         if (attr_mask & IB_QP_ALT_PATH) {
2687                 err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
2688                                     &context->alt_path,
2689                                     attr->alt_port_num,
2690                                     attr_mask | IB_QP_PKEY_INDEX | IB_QP_TIMEOUT,
2691                                     0, attr, true);
2692                 if (err)
2693                         goto out;
2694         }
2695
2696         pd = get_pd(qp);
2697         get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
2698                 &send_cq, &recv_cq);
2699
2700         context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
2701         context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
2702         context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0;
2703         context->params1  = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28);
2704
2705         if (attr_mask & IB_QP_RNR_RETRY)
2706                 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
2707
2708         if (attr_mask & IB_QP_RETRY_CNT)
2709                 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
2710
2711         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2712                 if (attr->max_rd_atomic)
2713                         context->params1 |=
2714                                 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
2715         }
2716
2717         if (attr_mask & IB_QP_SQ_PSN)
2718                 context->next_send_psn = cpu_to_be32(attr->sq_psn);
2719
2720         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2721                 if (attr->max_dest_rd_atomic)
2722                         context->params2 |=
2723                                 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
2724         }
2725
2726         if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
2727                 context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
2728
2729         if (attr_mask & IB_QP_MIN_RNR_TIMER)
2730                 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
2731
2732         if (attr_mask & IB_QP_RQ_PSN)
2733                 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
2734
2735         if (attr_mask & IB_QP_QKEY)
2736                 context->qkey = cpu_to_be32(attr->qkey);
2737
2738         if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
2739                 context->db_rec_addr = cpu_to_be64(qp->db.dma);
2740
2741         if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD  &&
2742             attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
2743                 sqd_event = 1;
2744         else
2745                 sqd_event = 0;
2746
2747         if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
2748                 u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
2749                                qp->port) - 1;
2750                 mibport = &dev->port[port_num];
2751                 context->qp_counter_set_usr_page |=
2752                         cpu_to_be32((u32)(mibport->q_cnt_id) << 24);
2753         }
2754
2755         if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
2756                 context->sq_crq_size |= cpu_to_be16(1 << 4);
2757
2758         if (qp->flags & MLX5_IB_QP_SQPN_QP1)
2759                 context->deth_sqpn = cpu_to_be32(1);
2760
2761         mlx5_cur = to_mlx5_state(cur_state);
2762         mlx5_new = to_mlx5_state(new_state);
2763         mlx5_st = to_mlx5_st(ibqp->qp_type);
2764         if (mlx5_st < 0)
2765                 goto out;
2766
2767         /* If moving to a reset or error state, we must disable page faults on
2768          * this QP and flush all current page faults. Otherwise a stale page
2769          * fault may attempt to work on this QP after it is reset and moved
2770          * again to RTS, and may cause the driver and the device to get out of
2771          * sync. */
2772         if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
2773             (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
2774             (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
2775                 mlx5_ib_qp_disable_pagefaults(qp);
2776
2777         if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
2778             !optab[mlx5_cur][mlx5_new])
2779                 goto out;
2780
2781         op = optab[mlx5_cur][mlx5_new];
2782         optpar = ib_mask_to_mlx5_opt(attr_mask);
2783         optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
2784
2785         if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
2786                 struct mlx5_modify_raw_qp_param raw_qp_param = {};
2787
2788                 raw_qp_param.operation = op;
2789                 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
2790                         raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id;
2791                         raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
2792                 }
2793                 err = modify_raw_packet_qp(dev, qp, &raw_qp_param, 0);
2794         } else {
2795                 err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
2796                                           &base->mqp);
2797         }
2798
2799         if (err)
2800                 goto out;
2801
2802         if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
2803             (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
2804                 mlx5_ib_qp_enable_pagefaults(qp);
2805
2806         qp->state = new_state;
2807
2808         if (attr_mask & IB_QP_ACCESS_FLAGS)
2809                 qp->trans_qp.atomic_rd_en = attr->qp_access_flags;
2810         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
2811                 qp->trans_qp.resp_depth = attr->max_dest_rd_atomic;
2812         if (attr_mask & IB_QP_PORT)
2813                 qp->port = attr->port_num;
2814         if (attr_mask & IB_QP_ALT_PATH)
2815                 qp->trans_qp.alt_port = attr->alt_port_num;
2816
2817         /*
2818          * If we moved a kernel QP to RESET, clean up all old CQ
2819          * entries and reinitialize the QP.
2820          */
2821         if (new_state == IB_QPS_RESET && !ibqp->uobject) {
2822                 mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
2823                                  ibqp->srq ? to_msrq(ibqp->srq) : NULL);
2824                 if (send_cq != recv_cq)
2825                         mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL);
2826
2827                 qp->rq.head = 0;
2828                 qp->rq.tail = 0;
2829                 qp->sq.head = 0;
2830                 qp->sq.tail = 0;
2831                 qp->sq.cur_post = 0;
2832                 qp->sq.last_poll = 0;
2833                 qp->db.db[MLX5_RCV_DBR] = 0;
2834                 qp->db.db[MLX5_SND_DBR] = 0;
2835         }
2836
2837 out:
2838         kfree(context);
2839         return err;
2840 }
2841
2842 int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2843                       int attr_mask, struct ib_udata *udata)
2844 {
2845         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2846         struct mlx5_ib_qp *qp = to_mqp(ibqp);
2847         enum ib_qp_type qp_type;
2848         enum ib_qp_state cur_state, new_state;
2849         int err = -EINVAL;
2850         int port;
2851         enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
2852
2853         if (ibqp->rwq_ind_tbl)
2854                 return -ENOSYS;
2855
2856         if (unlikely(ibqp->qp_type == IB_QPT_GSI))
2857                 return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
2858
2859         qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ?
2860                 IB_QPT_GSI : ibqp->qp_type;
2861
2862         mutex_lock(&qp->mutex);
2863
2864         cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
2865         new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
2866
2867         if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
2868                 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
2869                 ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
2870         }
2871
2872         if (qp_type != MLX5_IB_QPT_REG_UMR &&
2873             !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
2874                 mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
2875                             cur_state, new_state, ibqp->qp_type, attr_mask);
2876                 goto out;
2877         }
2878
2879         if ((attr_mask & IB_QP_PORT) &&
2880             (attr->port_num == 0 ||
2881              attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) {
2882                 mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
2883                             attr->port_num, dev->num_ports);
2884                 goto out;
2885         }
2886
2887         if (attr_mask & IB_QP_PKEY_INDEX) {
2888                 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
2889                 if (attr->pkey_index >=
2890                     dev->mdev->port_caps[port - 1].pkey_table_len) {
2891                         mlx5_ib_dbg(dev, "invalid pkey index %d\n",
2892                                     attr->pkey_index);
2893                         goto out;
2894                 }
2895         }
2896
2897         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
2898             attr->max_rd_atomic >
2899             (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) {
2900                 mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
2901                             attr->max_rd_atomic);
2902                 goto out;
2903         }
2904
2905         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
2906             attr->max_dest_rd_atomic >
2907             (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) {
2908                 mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
2909                             attr->max_dest_rd_atomic);
2910                 goto out;
2911         }
2912
2913         if (cur_state == new_state && cur_state == IB_QPS_RESET) {
2914                 err = 0;
2915                 goto out;
2916         }
2917
2918         err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
2919
2920 out:
2921         mutex_unlock(&qp->mutex);
2922         return err;
2923 }
2924
2925 static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
2926 {
2927         struct mlx5_ib_cq *cq;
2928         unsigned cur;
2929
2930         cur = wq->head - wq->tail;
2931         if (likely(cur + nreq < wq->max_post))
2932                 return 0;
2933
2934         cq = to_mcq(ib_cq);
2935         spin_lock(&cq->lock);
2936         cur = wq->head - wq->tail;
2937         spin_unlock(&cq->lock);
2938
2939         return cur + nreq >= wq->max_post;
2940 }
2941
2942 static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
2943                                           u64 remote_addr, u32 rkey)
2944 {
2945         rseg->raddr    = cpu_to_be64(remote_addr);
2946         rseg->rkey     = cpu_to_be32(rkey);
2947         rseg->reserved = 0;
2948 }
2949
2950 static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
2951                          struct ib_send_wr *wr, void *qend,
2952                          struct mlx5_ib_qp *qp, int *size)
2953 {
2954         void *seg = eseg;
2955
2956         memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
2957
2958         if (wr->send_flags & IB_SEND_IP_CSUM)
2959                 eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
2960                                  MLX5_ETH_WQE_L4_CSUM;
2961
2962         seg += sizeof(struct mlx5_wqe_eth_seg);
2963         *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
2964
2965         if (wr->opcode == IB_WR_LSO) {
2966                 struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
2967                 int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start);
2968                 u64 left, leftlen, copysz;
2969                 void *pdata = ud_wr->header;
2970
2971                 left = ud_wr->hlen;
2972                 eseg->mss = cpu_to_be16(ud_wr->mss);
2973                 eseg->inline_hdr_sz = cpu_to_be16(left);
2974
2975                 /*
2976                  * check if there is space till the end of queue, if yes,
2977                  * copy all in one shot, otherwise copy till the end of queue,
2978                  * rollback and than the copy the left
2979                  */
2980                 leftlen = qend - (void *)eseg->inline_hdr_start;
2981                 copysz = min_t(u64, leftlen, left);
2982
2983                 memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
2984
2985                 if (likely(copysz > size_of_inl_hdr_start)) {
2986                         seg += ALIGN(copysz - size_of_inl_hdr_start, 16);
2987                         *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16;
2988                 }
2989
2990                 if (unlikely(copysz < left)) { /* the last wqe in the queue */
2991                         seg = mlx5_get_send_wqe(qp, 0);
2992                         left -= copysz;
2993                         pdata += copysz;
2994                         memcpy(seg, pdata, left);
2995                         seg += ALIGN(left, 16);
2996                         *size += ALIGN(left, 16) / 16;
2997                 }
2998         }
2999
3000         return seg;
3001 }
3002
3003 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
3004                              struct ib_send_wr *wr)
3005 {
3006         memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
3007         dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
3008         dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
3009 }
3010
3011 static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
3012 {
3013         dseg->byte_count = cpu_to_be32(sg->length);
3014         dseg->lkey       = cpu_to_be32(sg->lkey);
3015         dseg->addr       = cpu_to_be64(sg->addr);
3016 }
3017
3018 static __be16 get_klm_octo(int npages)
3019 {
3020         return cpu_to_be16(ALIGN(npages, 8) / 2);
3021 }
3022
3023 static __be64 frwr_mkey_mask(void)
3024 {
3025         u64 result;
3026
3027         result = MLX5_MKEY_MASK_LEN             |
3028                 MLX5_MKEY_MASK_PAGE_SIZE        |
3029                 MLX5_MKEY_MASK_START_ADDR       |
3030                 MLX5_MKEY_MASK_EN_RINVAL        |
3031                 MLX5_MKEY_MASK_KEY              |
3032                 MLX5_MKEY_MASK_LR               |
3033                 MLX5_MKEY_MASK_LW               |
3034                 MLX5_MKEY_MASK_RR               |
3035                 MLX5_MKEY_MASK_RW               |
3036                 MLX5_MKEY_MASK_A                |
3037                 MLX5_MKEY_MASK_SMALL_FENCE      |
3038                 MLX5_MKEY_MASK_FREE;
3039
3040         return cpu_to_be64(result);
3041 }
3042
3043 static __be64 sig_mkey_mask(void)
3044 {
3045         u64 result;
3046
3047         result = MLX5_MKEY_MASK_LEN             |
3048                 MLX5_MKEY_MASK_PAGE_SIZE        |
3049                 MLX5_MKEY_MASK_START_ADDR       |
3050                 MLX5_MKEY_MASK_EN_SIGERR        |
3051                 MLX5_MKEY_MASK_EN_RINVAL        |
3052                 MLX5_MKEY_MASK_KEY              |
3053                 MLX5_MKEY_MASK_LR               |
3054                 MLX5_MKEY_MASK_LW               |
3055                 MLX5_MKEY_MASK_RR               |
3056                 MLX5_MKEY_MASK_RW               |
3057                 MLX5_MKEY_MASK_SMALL_FENCE      |
3058                 MLX5_MKEY_MASK_FREE             |
3059                 MLX5_MKEY_MASK_BSF_EN;
3060
3061         return cpu_to_be64(result);
3062 }
3063
3064 static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
3065                                 struct mlx5_ib_mr *mr)
3066 {
3067         int ndescs = mr->ndescs;
3068
3069         memset(umr, 0, sizeof(*umr));
3070
3071         if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
3072                 /* KLMs take twice the size of MTTs */
3073                 ndescs *= 2;
3074
3075         umr->flags = MLX5_UMR_CHECK_NOT_FREE;
3076         umr->klm_octowords = get_klm_octo(ndescs);
3077         umr->mkey_mask = frwr_mkey_mask();
3078 }
3079
3080 static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
3081 {
3082         memset(umr, 0, sizeof(*umr));
3083         umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
3084         umr->flags = 1 << 7;
3085 }
3086
3087 static __be64 get_umr_reg_mr_mask(void)
3088 {
3089         u64 result;
3090
3091         result = MLX5_MKEY_MASK_LEN             |
3092                  MLX5_MKEY_MASK_PAGE_SIZE       |
3093                  MLX5_MKEY_MASK_START_ADDR      |
3094                  MLX5_MKEY_MASK_PD              |
3095                  MLX5_MKEY_MASK_LR              |
3096                  MLX5_MKEY_MASK_LW              |
3097                  MLX5_MKEY_MASK_KEY             |
3098                  MLX5_MKEY_MASK_RR              |
3099                  MLX5_MKEY_MASK_RW              |
3100                  MLX5_MKEY_MASK_A               |
3101                  MLX5_MKEY_MASK_FREE;
3102
3103         return cpu_to_be64(result);
3104 }
3105
3106 static __be64 get_umr_unreg_mr_mask(void)
3107 {
3108         u64 result;
3109
3110         result = MLX5_MKEY_MASK_FREE;
3111
3112         return cpu_to_be64(result);
3113 }
3114
3115 static __be64 get_umr_update_mtt_mask(void)
3116 {
3117         u64 result;
3118
3119         result = MLX5_MKEY_MASK_FREE;
3120
3121         return cpu_to_be64(result);
3122 }
3123
3124 static __be64 get_umr_update_translation_mask(void)
3125 {
3126         u64 result;
3127
3128         result = MLX5_MKEY_MASK_LEN |
3129                  MLX5_MKEY_MASK_PAGE_SIZE |
3130                  MLX5_MKEY_MASK_START_ADDR |
3131                  MLX5_MKEY_MASK_KEY |
3132                  MLX5_MKEY_MASK_FREE;
3133
3134         return cpu_to_be64(result);
3135 }
3136
3137 static __be64 get_umr_update_access_mask(void)
3138 {
3139         u64 result;
3140
3141         result = MLX5_MKEY_MASK_LW |
3142                  MLX5_MKEY_MASK_RR |
3143                  MLX5_MKEY_MASK_RW |
3144                  MLX5_MKEY_MASK_A |
3145                  MLX5_MKEY_MASK_KEY |
3146                  MLX5_MKEY_MASK_FREE;
3147
3148         return cpu_to_be64(result);
3149 }
3150
3151 static __be64 get_umr_update_pd_mask(void)
3152 {
3153         u64 result;
3154
3155         result = MLX5_MKEY_MASK_PD |
3156                  MLX5_MKEY_MASK_KEY |
3157                  MLX5_MKEY_MASK_FREE;
3158
3159         return cpu_to_be64(result);
3160 }
3161
3162 static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
3163                                 struct ib_send_wr *wr)
3164 {
3165         struct mlx5_umr_wr *umrwr = umr_wr(wr);
3166
3167         memset(umr, 0, sizeof(*umr));
3168
3169         if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
3170                 umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */
3171         else
3172                 umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */
3173
3174         if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
3175                 umr->klm_octowords = get_klm_octo(umrwr->npages);
3176                 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT) {
3177                         umr->mkey_mask = get_umr_update_mtt_mask();
3178                         umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
3179                         umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
3180                 }
3181                 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
3182                         umr->mkey_mask |= get_umr_update_translation_mask();
3183                 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_ACCESS)
3184                         umr->mkey_mask |= get_umr_update_access_mask();
3185                 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD)
3186                         umr->mkey_mask |= get_umr_update_pd_mask();
3187                 if (!umr->mkey_mask)
3188                         umr->mkey_mask = get_umr_reg_mr_mask();
3189         } else {
3190                 umr->mkey_mask = get_umr_unreg_mr_mask();
3191         }
3192
3193         if (!wr->num_sge)
3194                 umr->flags |= MLX5_UMR_INLINE;
3195 }
3196
3197 static u8 get_umr_flags(int acc)
3198 {
3199         return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
3200                (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
3201                (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
3202                (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
3203                 MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
3204 }
3205
3206 static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
3207                              struct mlx5_ib_mr *mr,
3208                              u32 key, int access)
3209 {
3210         int ndescs = ALIGN(mr->ndescs, 8) >> 1;
3211
3212         memset(seg, 0, sizeof(*seg));
3213
3214         if (mr->access_mode == MLX5_ACCESS_MODE_MTT)
3215                 seg->log2_page_size = ilog2(mr->ibmr.page_size);
3216         else if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
3217                 /* KLMs take twice the size of MTTs */
3218                 ndescs *= 2;
3219
3220         seg->flags = get_umr_flags(access) | mr->access_mode;
3221         seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
3222         seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
3223         seg->start_addr = cpu_to_be64(mr->ibmr.iova);
3224         seg->len = cpu_to_be64(mr->ibmr.length);
3225         seg->xlt_oct_size = cpu_to_be32(ndescs);
3226 }
3227
3228 static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
3229 {
3230         memset(seg, 0, sizeof(*seg));
3231         seg->status = MLX5_MKEY_STATUS_FREE;
3232 }
3233
3234 static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
3235 {
3236         struct mlx5_umr_wr *umrwr = umr_wr(wr);
3237
3238         memset(seg, 0, sizeof(*seg));
3239         if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
3240                 seg->status = MLX5_MKEY_STATUS_FREE;
3241                 return;
3242         }
3243
3244         seg->flags = convert_access(umrwr->access_flags);
3245         if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
3246                 if (umrwr->pd)
3247                         seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
3248                 seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
3249         }
3250         seg->len = cpu_to_be64(umrwr->length);
3251         seg->log2_page_size = umrwr->page_shift;
3252         seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
3253                                        mlx5_mkey_variant(umrwr->mkey));
3254 }
3255
3256 static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
3257                              struct mlx5_ib_mr *mr,
3258                              struct mlx5_ib_pd *pd)
3259 {
3260         int bcount = mr->desc_size * mr->ndescs;
3261
3262         dseg->addr = cpu_to_be64(mr->desc_map);
3263         dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
3264         dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
3265 }
3266
3267 static __be32 send_ieth(struct ib_send_wr *wr)
3268 {
3269         switch (wr->opcode) {
3270         case IB_WR_SEND_WITH_IMM:
3271         case IB_WR_RDMA_WRITE_WITH_IMM:
3272                 return wr->ex.imm_data;
3273
3274         case IB_WR_SEND_WITH_INV:
3275                 return cpu_to_be32(wr->ex.invalidate_rkey);
3276
3277         default:
3278                 return 0;
3279         }
3280 }
3281
3282 static u8 calc_sig(void *wqe, int size)
3283 {
3284         u8 *p = wqe;
3285         u8 res = 0;
3286         int i;
3287
3288         for (i = 0; i < size; i++)
3289                 res ^= p[i];
3290
3291         return ~res;
3292 }
3293
3294 static u8 wq_sig(void *wqe)
3295 {
3296         return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
3297 }
3298
3299 static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
3300                             void *wqe, int *sz)
3301 {
3302         struct mlx5_wqe_inline_seg *seg;
3303         void *qend = qp->sq.qend;
3304         void *addr;
3305         int inl = 0;
3306         int copy;
3307         int len;
3308         int i;
3309
3310         seg = wqe;
3311         wqe += sizeof(*seg);
3312         for (i = 0; i < wr->num_sge; i++) {
3313                 addr = (void *)(unsigned long)(wr->sg_list[i].addr);
3314                 len  = wr->sg_list[i].length;
3315                 inl += len;
3316
3317                 if (unlikely(inl > qp->max_inline_data))
3318                         return -ENOMEM;
3319
3320                 if (unlikely(wqe + len > qend)) {
3321                         copy = qend - wqe;
3322                         memcpy(wqe, addr, copy);
3323                         addr += copy;
3324                         len -= copy;
3325                         wqe = mlx5_get_send_wqe(qp, 0);
3326                 }
3327                 memcpy(wqe, addr, len);
3328                 wqe += len;
3329         }
3330
3331         seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
3332
3333         *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
3334
3335         return 0;
3336 }
3337
3338 static u16 prot_field_size(enum ib_signature_type type)
3339 {
3340         switch (type) {
3341         case IB_SIG_TYPE_T10_DIF:
3342                 return MLX5_DIF_SIZE;
3343         default:
3344                 return 0;
3345         }
3346 }
3347
3348 static u8 bs_selector(int block_size)
3349 {
3350         switch (block_size) {
3351         case 512:           return 0x1;
3352         case 520:           return 0x2;
3353         case 4096:          return 0x3;
3354         case 4160:          return 0x4;
3355         case 1073741824:    return 0x5;
3356         default:            return 0;
3357         }
3358 }
3359
3360 static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
3361                               struct mlx5_bsf_inl *inl)
3362 {
3363         /* Valid inline section and allow BSF refresh */
3364         inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
3365                                        MLX5_BSF_REFRESH_DIF);
3366         inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
3367         inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
3368         /* repeating block */
3369         inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
3370         inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
3371                         MLX5_DIF_CRC : MLX5_DIF_IPCS;
3372
3373         if (domain->sig.dif.ref_remap)
3374                 inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
3375
3376         if (domain->sig.dif.app_escape) {
3377                 if (domain->sig.dif.ref_escape)
3378                         inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
3379                 else
3380                         inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
3381         }
3382
3383         inl->dif_app_bitmask_check =
3384                 cpu_to_be16(domain->sig.dif.apptag_check_mask);
3385 }
3386
3387 static int mlx5_set_bsf(struct ib_mr *sig_mr,
3388                         struct ib_sig_attrs *sig_attrs,
3389                         struct mlx5_bsf *bsf, u32 data_size)
3390 {
3391         struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
3392         struct mlx5_bsf_basic *basic = &bsf->basic;
3393         struct ib_sig_domain *mem = &sig_attrs->mem;
3394         struct ib_sig_domain *wire = &sig_attrs->wire;
3395
3396         memset(bsf, 0, sizeof(*bsf));
3397
3398         /* Basic + Extended + Inline */
3399         basic->bsf_size_sbs = 1 << 7;
3400         /* Input domain check byte mask */
3401         basic->check_byte_mask = sig_attrs->check_mask;
3402         basic->raw_data_size = cpu_to_be32(data_size);
3403
3404         /* Memory domain */
3405         switch (sig_attrs->mem.sig_type) {
3406         case IB_SIG_TYPE_NONE:
3407                 break;
3408         case IB_SIG_TYPE_T10_DIF:
3409                 basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
3410                 basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
3411                 mlx5_fill_inl_bsf(mem, &bsf->m_inl);
3412                 break;
3413         default:
3414                 return -EINVAL;
3415         }
3416
3417         /* Wire domain */
3418         switch (sig_attrs->wire.sig_type) {
3419         case IB_SIG_TYPE_NONE:
3420                 break;
3421         case IB_SIG_TYPE_T10_DIF:
3422                 if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
3423                     mem->sig_type == wire->sig_type) {
3424                         /* Same block structure */
3425                         basic->bsf_size_sbs |= 1 << 4;
3426                         if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
3427                                 basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
3428                         if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
3429                                 basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
3430                         if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
3431                                 basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
3432                 } else
3433                         basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
3434
3435                 basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
3436                 mlx5_fill_inl_bsf(wire, &bsf->w_inl);
3437                 break;
3438         default:
3439                 return -EINVAL;
3440         }
3441
3442         return 0;
3443 }
3444
3445 static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
3446                                 struct mlx5_ib_qp *qp, void **seg, int *size)
3447 {
3448         struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
3449         struct ib_mr *sig_mr = wr->sig_mr;
3450         struct mlx5_bsf *bsf;
3451         u32 data_len = wr->wr.sg_list->length;
3452         u32 data_key = wr->wr.sg_list->lkey;
3453         u64 data_va = wr->wr.sg_list->addr;
3454         int ret;
3455         int wqe_size;
3456
3457         if (!wr->prot ||
3458             (data_key == wr->prot->lkey &&
3459              data_va == wr->prot->addr &&
3460              data_len == wr->prot->length)) {
3461                 /**
3462                  * Source domain doesn't contain signature information
3463                  * or data and protection are interleaved in memory.
3464                  * So need construct:
3465                  *                  ------------------
3466                  *                 |     data_klm     |
3467                  *                  ------------------
3468                  *                 |       BSF        |
3469                  *                  ------------------
3470                  **/
3471                 struct mlx5_klm *data_klm = *seg;
3472
3473                 data_klm->bcount = cpu_to_be32(data_len);
3474                 data_klm->key = cpu_to_be32(data_key);
3475                 data_klm->va = cpu_to_be64(data_va);
3476                 wqe_size = ALIGN(sizeof(*data_klm), 64);
3477         } else {
3478                 /**
3479                  * Source domain contains signature information
3480                  * So need construct a strided block format:
3481                  *               ---------------------------
3482                  *              |     stride_block_ctrl     |
3483                  *               ---------------------------
3484                  *              |          data_klm         |
3485                  *               ---------------------------
3486                  *              |          prot_klm         |
3487                  *               ---------------------------
3488                  *              |             BSF           |
3489                  *               ---------------------------
3490                  **/
3491                 struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
3492                 struct mlx5_stride_block_entry *data_sentry;
3493                 struct mlx5_stride_block_entry *prot_sentry;
3494                 u32 prot_key = wr->prot->lkey;
3495                 u64 prot_va = wr->prot->addr;
3496                 u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
3497                 int prot_size;
3498
3499                 sblock_ctrl = *seg;
3500                 data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
3501                 prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
3502
3503                 prot_size = prot_field_size(sig_attrs->mem.sig_type);
3504                 if (!prot_size) {
3505                         pr_err("Bad block size given: %u\n", block_size);
3506                         return -EINVAL;
3507                 }
3508                 sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
3509                                                             prot_size);
3510                 sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
3511                 sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
3512                 sblock_ctrl->num_entries = cpu_to_be16(2);
3513
3514                 data_sentry->bcount = cpu_to_be16(block_size);
3515                 data_sentry->key = cpu_to_be32(data_key);
3516                 data_sentry->va = cpu_to_be64(data_va);
3517                 data_sentry->stride = cpu_to_be16(block_size);
3518
3519                 prot_sentry->bcount = cpu_to_be16(prot_size);
3520                 prot_sentry->key = cpu_to_be32(prot_key);
3521                 prot_sentry->va = cpu_to_be64(prot_va);
3522                 prot_sentry->stride = cpu_to_be16(prot_size);
3523
3524                 wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
3525                                  sizeof(*prot_sentry), 64);
3526         }
3527
3528         *seg += wqe_size;
3529         *size += wqe_size / 16;
3530         if (unlikely((*seg == qp->sq.qend)))
3531                 *seg = mlx5_get_send_wqe(qp, 0);
3532
3533         bsf = *seg;
3534         ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
3535         if (ret)
3536                 return -EINVAL;
3537
3538         *seg += sizeof(*bsf);
3539         *size += sizeof(*bsf) / 16;
3540         if (unlikely((*seg == qp->sq.qend)))
3541                 *seg = mlx5_get_send_wqe(qp, 0);
3542
3543         return 0;
3544 }
3545
3546 static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
3547                                  struct ib_sig_handover_wr *wr, u32 nelements,
3548                                  u32 length, u32 pdn)
3549 {
3550         struct ib_mr *sig_mr = wr->sig_mr;
3551         u32 sig_key = sig_mr->rkey;
3552         u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
3553
3554         memset(seg, 0, sizeof(*seg));
3555
3556         seg->flags = get_umr_flags(wr->access_flags) |
3557                                    MLX5_ACCESS_MODE_KLM;
3558         seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
3559         seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
3560                                     MLX5_MKEY_BSF_EN | pdn);
3561         seg->len = cpu_to_be64(length);
3562         seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements)));
3563         seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
3564 }
3565
3566 static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
3567                                 u32 nelements)
3568 {
3569         memset(umr, 0, sizeof(*umr));
3570
3571         umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
3572         umr->klm_octowords = get_klm_octo(nelements);
3573         umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
3574         umr->mkey_mask = sig_mkey_mask();
3575 }
3576
3577
3578 static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
3579                           void **seg, int *size)
3580 {
3581         struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
3582         struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
3583         u32 pdn = get_pd(qp)->pdn;
3584         u32 klm_oct_size;
3585         int region_len, ret;
3586
3587         if (unlikely(wr->wr.num_sge != 1) ||
3588             unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) ||
3589             unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
3590             unlikely(!sig_mr->sig->sig_status_checked))
3591                 return -EINVAL;
3592
3593         /* length of the protected region, data + protection */
3594         region_len = wr->wr.sg_list->length;
3595         if (wr->prot &&
3596             (wr->prot->lkey != wr->wr.sg_list->lkey  ||
3597              wr->prot->addr != wr->wr.sg_list->addr  ||
3598              wr->prot->length != wr->wr.sg_list->length))
3599                 region_len += wr->prot->length;
3600
3601         /**
3602          * KLM octoword size - if protection was provided
3603          * then we use strided block format (3 octowords),
3604          * else we use single KLM (1 octoword)
3605          **/
3606         klm_oct_size = wr->prot ? 3 : 1;
3607
3608         set_sig_umr_segment(*seg, klm_oct_size);
3609         *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
3610         *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
3611         if (unlikely((*seg == qp->sq.qend)))
3612                 *seg = mlx5_get_send_wqe(qp, 0);
3613
3614         set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn);
3615         *seg += sizeof(struct mlx5_mkey_seg);
3616         *size += sizeof(struct mlx5_mkey_seg) / 16;
3617         if (unlikely((*seg == qp->sq.qend)))
3618                 *seg = mlx5_get_send_wqe(qp, 0);
3619
3620         ret = set_sig_data_segment(wr, qp, seg, size);
3621         if (ret)
3622                 return ret;
3623
3624         sig_mr->sig->sig_status_checked = false;
3625         return 0;
3626 }
3627
3628 static int set_psv_wr(struct ib_sig_domain *domain,
3629                       u32 psv_idx, void **seg, int *size)
3630 {
3631         struct mlx5_seg_set_psv *psv_seg = *seg;
3632
3633         memset(psv_seg, 0, sizeof(*psv_seg));
3634         psv_seg->psv_num = cpu_to_be32(psv_idx);
3635         switch (domain->sig_type) {
3636         case IB_SIG_TYPE_NONE:
3637                 break;
3638         case IB_SIG_TYPE_T10_DIF:
3639                 psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
3640                                                      domain->sig.dif.app_tag);
3641                 psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
3642                 break;
3643         default:
3644                 pr_err("Bad signature type given.\n");
3645                 return 1;
3646         }
3647
3648         *seg += sizeof(*psv_seg);
3649         *size += sizeof(*psv_seg) / 16;
3650
3651         return 0;
3652 }
3653
3654 static int set_reg_wr(struct mlx5_ib_qp *qp,
3655                       struct ib_reg_wr *wr,
3656                       void **seg, int *size)
3657 {
3658         struct mlx5_ib_mr *mr = to_mmr(wr->mr);
3659         struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
3660
3661         if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
3662                 mlx5_ib_warn(to_mdev(qp->ibqp.device),
3663                              "Invalid IB_SEND_INLINE send flag\n");
3664                 return -EINVAL;
3665         }
3666
3667         set_reg_umr_seg(*seg, mr);
3668         *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
3669         *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
3670         if (unlikely((*seg == qp->sq.qend)))
3671                 *seg = mlx5_get_send_wqe(qp, 0);
3672
3673         set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
3674         *seg += sizeof(struct mlx5_mkey_seg);
3675         *size += sizeof(struct mlx5_mkey_seg) / 16;
3676         if (unlikely((*seg == qp->sq.qend)))
3677                 *seg = mlx5_get_send_wqe(qp, 0);
3678
3679         set_reg_data_seg(*seg, mr, pd);
3680         *seg += sizeof(struct mlx5_wqe_data_seg);
3681         *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
3682
3683         return 0;
3684 }
3685
3686 static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size)
3687 {
3688         set_linv_umr_seg(*seg);
3689         *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
3690         *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
3691         if (unlikely((*seg == qp->sq.qend)))
3692                 *seg = mlx5_get_send_wqe(qp, 0);
3693         set_linv_mkey_seg(*seg);
3694         *seg += sizeof(struct mlx5_mkey_seg);
3695         *size += sizeof(struct mlx5_mkey_seg) / 16;
3696         if (unlikely((*seg == qp->sq.qend)))
3697                 *seg = mlx5_get_send_wqe(qp, 0);
3698 }
3699
3700 static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
3701 {
3702         __be32 *p = NULL;
3703         int tidx = idx;
3704         int i, j;
3705
3706         pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
3707         for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
3708                 if ((i & 0xf) == 0) {
3709                         void *buf = mlx5_get_send_wqe(qp, tidx);
3710                         tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
3711                         p = buf;
3712                         j = 0;
3713                 }
3714                 pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
3715                          be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
3716                          be32_to_cpu(p[j + 3]));
3717         }
3718 }
3719
3720 static void mlx5_bf_copy(u64 __iomem *dst, u64 *src,
3721                          unsigned bytecnt, struct mlx5_ib_qp *qp)
3722 {
3723         while (bytecnt > 0) {
3724                 __iowrite64_copy(dst++, src++, 8);
3725                 __iowrite64_copy(dst++, src++, 8);
3726                 __iowrite64_copy(dst++, src++, 8);
3727                 __iowrite64_copy(dst++, src++, 8);
3728                 __iowrite64_copy(dst++, src++, 8);
3729                 __iowrite64_copy(dst++, src++, 8);
3730                 __iowrite64_copy(dst++, src++, 8);
3731                 __iowrite64_copy(dst++, src++, 8);
3732                 bytecnt -= 64;
3733                 if (unlikely(src == qp->sq.qend))
3734                         src = mlx5_get_send_wqe(qp, 0);
3735         }
3736 }
3737
3738 static u8 get_fence(u8 fence, struct ib_send_wr *wr)
3739 {
3740         if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
3741                      wr->send_flags & IB_SEND_FENCE))
3742                 return MLX5_FENCE_MODE_STRONG_ORDERING;
3743
3744         if (unlikely(fence)) {
3745                 if (wr->send_flags & IB_SEND_FENCE)
3746                         return MLX5_FENCE_MODE_SMALL_AND_FENCE;
3747                 else
3748                         return fence;
3749         } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) {
3750                 return MLX5_FENCE_MODE_FENCE;
3751         }
3752
3753         return 0;
3754 }
3755
3756 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
3757                      struct mlx5_wqe_ctrl_seg **ctrl,
3758                      struct ib_send_wr *wr, unsigned *idx,
3759                      int *size, int nreq)
3760 {
3761         if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
3762                 return -ENOMEM;
3763
3764         *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
3765         *seg = mlx5_get_send_wqe(qp, *idx);
3766         *ctrl = *seg;
3767         *(uint32_t *)(*seg + 8) = 0;
3768         (*ctrl)->imm = send_ieth(wr);
3769         (*ctrl)->fm_ce_se = qp->sq_signal_bits |
3770                 (wr->send_flags & IB_SEND_SIGNALED ?
3771                  MLX5_WQE_CTRL_CQ_UPDATE : 0) |
3772                 (wr->send_flags & IB_SEND_SOLICITED ?
3773                  MLX5_WQE_CTRL_SOLICITED : 0);
3774
3775         *seg += sizeof(**ctrl);
3776         *size = sizeof(**ctrl) / 16;
3777
3778         return 0;
3779 }
3780
3781 static void finish_wqe(struct mlx5_ib_qp *qp,
3782                        struct mlx5_wqe_ctrl_seg *ctrl,
3783                        u8 size, unsigned idx, u64 wr_id,
3784                        int nreq, u8 fence, u8 next_fence,
3785                        u32 mlx5_opcode)
3786 {
3787         u8 opmod = 0;
3788
3789         ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
3790                                              mlx5_opcode | ((u32)opmod << 24));
3791         ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
3792         ctrl->fm_ce_se |= fence;
3793         qp->fm_cache = next_fence;
3794         if (unlikely(qp->wq_sig))
3795                 ctrl->signature = wq_sig(ctrl);
3796
3797         qp->sq.wrid[idx] = wr_id;
3798         qp->sq.w_list[idx].opcode = mlx5_opcode;
3799         qp->sq.wqe_head[idx] = qp->sq.head + nreq;
3800         qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
3801         qp->sq.w_list[idx].next = qp->sq.cur_post;
3802 }
3803
3804
3805 int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3806                       struct ib_send_wr **bad_wr)
3807 {
3808         struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
3809         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
3810         struct mlx5_core_dev *mdev = dev->mdev;
3811         struct mlx5_ib_qp *qp;
3812         struct mlx5_ib_mr *mr;
3813         struct mlx5_wqe_data_seg *dpseg;
3814         struct mlx5_wqe_xrc_seg *xrc;
3815         struct mlx5_bf *bf;
3816         int uninitialized_var(size);
3817         void *qend;
3818         unsigned long flags;
3819         unsigned idx;
3820         int err = 0;
3821         int inl = 0;
3822         int num_sge;
3823         void *seg;
3824         int nreq;
3825         int i;
3826         u8 next_fence = 0;
3827         u8 fence;
3828
3829         if (unlikely(ibqp->qp_type == IB_QPT_GSI))
3830                 return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
3831
3832         qp = to_mqp(ibqp);
3833         bf = qp->bf;
3834         qend = qp->sq.qend;
3835
3836         spin_lock_irqsave(&qp->sq.lock, flags);
3837
3838         if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
3839                 err = -EIO;
3840                 *bad_wr = wr;
3841                 nreq = 0;
3842                 goto out;
3843         }
3844
3845         for (nreq = 0; wr; nreq++, wr = wr->next) {
3846                 if (unlikely(wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
3847                         mlx5_ib_warn(dev, "\n");
3848                         err = -EINVAL;
3849                         *bad_wr = wr;
3850                         goto out;
3851                 }
3852
3853                 fence = qp->fm_cache;
3854                 num_sge = wr->num_sge;
3855                 if (unlikely(num_sge > qp->sq.max_gs)) {
3856                         mlx5_ib_warn(dev, "\n");
3857                         err = -EINVAL;
3858                         *bad_wr = wr;
3859                         goto out;
3860                 }
3861
3862                 err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
3863                 if (err) {
3864                         mlx5_ib_warn(dev, "\n");
3865                         err = -ENOMEM;
3866                         *bad_wr = wr;
3867                         goto out;
3868                 }
3869
3870                 switch (ibqp->qp_type) {
3871                 case IB_QPT_XRC_INI:
3872                         xrc = seg;
3873                         seg += sizeof(*xrc);
3874                         size += sizeof(*xrc) / 16;
3875                         /* fall through */
3876                 case IB_QPT_RC:
3877                         switch (wr->opcode) {
3878                         case IB_WR_RDMA_READ:
3879                         case IB_WR_RDMA_WRITE:
3880                         case IB_WR_RDMA_WRITE_WITH_IMM:
3881                                 set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
3882                                               rdma_wr(wr)->rkey);
3883                                 seg += sizeof(struct mlx5_wqe_raddr_seg);
3884                                 size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
3885                                 break;
3886
3887                         case IB_WR_ATOMIC_CMP_AND_SWP:
3888                         case IB_WR_ATOMIC_FETCH_AND_ADD:
3889                         case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
3890                                 mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
3891                                 err = -ENOSYS;
3892                                 *bad_wr = wr;
3893                                 goto out;
3894
3895                         case IB_WR_LOCAL_INV:
3896                                 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
3897                                 qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
3898                                 ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
3899                                 set_linv_wr(qp, &seg, &size);
3900                                 num_sge = 0;
3901                                 break;
3902
3903                         case IB_WR_REG_MR:
3904                                 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
3905                                 qp->sq.wr_data[idx] = IB_WR_REG_MR;
3906                                 ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
3907                                 err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
3908                                 if (err) {
3909                                         *bad_wr = wr;
3910                                         goto out;
3911                                 }
3912                                 num_sge = 0;
3913                                 break;
3914
3915                         case IB_WR_REG_SIG_MR:
3916                                 qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
3917                                 mr = to_mmr(sig_handover_wr(wr)->sig_mr);
3918
3919                                 ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
3920                                 err = set_sig_umr_wr(wr, qp, &seg, &size);
3921                                 if (err) {
3922                                         mlx5_ib_warn(dev, "\n");
3923                                         *bad_wr = wr;
3924                                         goto out;
3925                                 }
3926
3927                                 finish_wqe(qp, ctrl, size, idx, wr->wr_id,
3928                                            nreq, get_fence(fence, wr),
3929                                            next_fence, MLX5_OPCODE_UMR);
3930                                 /*
3931                                  * SET_PSV WQEs are not signaled and solicited
3932                                  * on error
3933                                  */
3934                                 wr->send_flags &= ~IB_SEND_SIGNALED;
3935                                 wr->send_flags |= IB_SEND_SOLICITED;
3936                                 err = begin_wqe(qp, &seg, &ctrl, wr,
3937                                                 &idx, &size, nreq);
3938                                 if (err) {
3939                                         mlx5_ib_warn(dev, "\n");
3940                                         err = -ENOMEM;
3941                                         *bad_wr = wr;
3942                                         goto out;
3943                                 }
3944
3945                                 err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem,
3946                                                  mr->sig->psv_memory.psv_idx, &seg,
3947                                                  &size);
3948                                 if (err) {
3949                                         mlx5_ib_warn(dev, "\n");
3950                                         *bad_wr = wr;
3951                                         goto out;
3952                                 }
3953
3954                                 finish_wqe(qp, ctrl, size, idx, wr->wr_id,
3955                                            nreq, get_fence(fence, wr),
3956                                            next_fence, MLX5_OPCODE_SET_PSV);
3957                                 err = begin_wqe(qp, &seg, &ctrl, wr,
3958                                                 &idx, &size, nreq);
3959                                 if (err) {
3960                                         mlx5_ib_warn(dev, "\n");
3961                                         err = -ENOMEM;
3962                                         *bad_wr = wr;
3963                                         goto out;
3964                                 }
3965
3966                                 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
3967                                 err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
3968                                                  mr->sig->psv_wire.psv_idx, &seg,
3969                                                  &size);
3970                                 if (err) {
3971                                         mlx5_ib_warn(dev, "\n");
3972                                         *bad_wr = wr;
3973                                         goto out;
3974                                 }
3975
3976                                 finish_wqe(qp, ctrl, size, idx, wr->wr_id,
3977                                            nreq, get_fence(fence, wr),
3978                                            next_fence, MLX5_OPCODE_SET_PSV);
3979                                 num_sge = 0;
3980                                 goto skip_psv;
3981
3982                         default:
3983                                 break;
3984                         }
3985                         break;
3986
3987                 case IB_QPT_UC:
3988                         switch (wr->opcode) {
3989                         case IB_WR_RDMA_WRITE:
3990                         case IB_WR_RDMA_WRITE_WITH_IMM:
3991                                 set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
3992                                               rdma_wr(wr)->rkey);
3993                                 seg  += sizeof(struct mlx5_wqe_raddr_seg);
3994                                 size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
3995                                 break;
3996
3997                         default:
3998                                 break;
3999                         }
4000                         break;
4001
4002                 case IB_QPT_SMI:
4003                 case MLX5_IB_QPT_HW_GSI:
4004                         set_datagram_seg(seg, wr);
4005                         seg += sizeof(struct mlx5_wqe_datagram_seg);
4006                         size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
4007                         if (unlikely((seg == qend)))
4008                                 seg = mlx5_get_send_wqe(qp, 0);
4009                         break;
4010                 case IB_QPT_UD:
4011                         set_datagram_seg(seg, wr);
4012                         seg += sizeof(struct mlx5_wqe_datagram_seg);
4013                         size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
4014
4015                         if (unlikely((seg == qend)))
4016                                 seg = mlx5_get_send_wqe(qp, 0);
4017
4018                         /* handle qp that supports ud offload */
4019                         if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
4020                                 struct mlx5_wqe_eth_pad *pad;
4021
4022                                 pad = seg;
4023                                 memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
4024                                 seg += sizeof(struct mlx5_wqe_eth_pad);
4025                                 size += sizeof(struct mlx5_wqe_eth_pad) / 16;
4026
4027                                 seg = set_eth_seg(seg, wr, qend, qp, &size);
4028
4029                                 if (unlikely((seg == qend)))
4030                                         seg = mlx5_get_send_wqe(qp, 0);
4031                         }
4032                         break;
4033                 case MLX5_IB_QPT_REG_UMR:
4034                         if (wr->opcode != MLX5_IB_WR_UMR) {
4035                                 err = -EINVAL;
4036                                 mlx5_ib_warn(dev, "bad opcode\n");
4037                                 goto out;
4038                         }
4039                         qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
4040                         ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
4041                         set_reg_umr_segment(seg, wr);
4042                         seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
4043                         size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
4044                         if (unlikely((seg == qend)))
4045                                 seg = mlx5_get_send_wqe(qp, 0);
4046                         set_reg_mkey_segment(seg, wr);
4047                         seg += sizeof(struct mlx5_mkey_seg);
4048                         size += sizeof(struct mlx5_mkey_seg) / 16;
4049                         if (unlikely((seg == qend)))
4050                                 seg = mlx5_get_send_wqe(qp, 0);
4051                         break;
4052
4053                 default:
4054                         break;
4055                 }
4056
4057                 if (wr->send_flags & IB_SEND_INLINE && num_sge) {
4058                         int uninitialized_var(sz);
4059
4060                         err = set_data_inl_seg(qp, wr, seg, &sz);
4061                         if (unlikely(err)) {
4062                                 mlx5_ib_warn(dev, "\n");
4063                                 *bad_wr = wr;
4064                                 goto out;
4065                         }
4066                         inl = 1;
4067                         size += sz;
4068                 } else {
4069                         dpseg = seg;
4070                         for (i = 0; i < num_sge; i++) {
4071                                 if (unlikely(dpseg == qend)) {
4072                                         seg = mlx5_get_send_wqe(qp, 0);
4073                                         dpseg = seg;
4074                                 }
4075                                 if (likely(wr->sg_list[i].length)) {
4076                                         set_data_ptr_seg(dpseg, wr->sg_list + i);
4077                                         size += sizeof(struct mlx5_wqe_data_seg) / 16;
4078                                         dpseg++;
4079                                 }
4080                         }
4081                 }
4082
4083                 finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
4084                            get_fence(fence, wr), next_fence,
4085                            mlx5_ib_opcode[wr->opcode]);
4086 skip_psv:
4087                 if (0)
4088                         dump_wqe(qp, idx, size);
4089         }
4090
4091 out:
4092         if (likely(nreq)) {
4093                 qp->sq.head += nreq;
4094
4095                 /* Make sure that descriptors are written before
4096                  * updating doorbell record and ringing the doorbell
4097                  */
4098                 wmb();
4099
4100                 qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
4101
4102                 /* Make sure doorbell record is visible to the HCA before
4103                  * we hit doorbell */
4104                 wmb();
4105
4106                 if (bf->need_lock)
4107                         spin_lock(&bf->lock);
4108                 else
4109                         __acquire(&bf->lock);
4110
4111                 /* TBD enable WC */
4112                 if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) {
4113                         mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp);
4114                         /* wc_wmb(); */
4115                 } else {
4116                         mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
4117                                      MLX5_GET_DOORBELL_LOCK(&bf->lock32));
4118                         /* Make sure doorbells don't leak out of SQ spinlock
4119                          * and reach the HCA out of order.
4120                          */
4121                         mmiowb();
4122                 }
4123                 bf->offset ^= bf->buf_size;
4124                 if (bf->need_lock)
4125                         spin_unlock(&bf->lock);
4126                 else
4127                         __release(&bf->lock);
4128         }
4129
4130         spin_unlock_irqrestore(&qp->sq.lock, flags);
4131
4132         return err;
4133 }
4134
4135 static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
4136 {
4137         sig->signature = calc_sig(sig, size);
4138 }
4139
4140 int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
4141                       struct ib_recv_wr **bad_wr)
4142 {
4143         struct mlx5_ib_qp *qp = to_mqp(ibqp);
4144         struct mlx5_wqe_data_seg *scat;
4145         struct mlx5_rwqe_sig *sig;
4146         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
4147         struct mlx5_core_dev *mdev = dev->mdev;
4148         unsigned long flags;
4149         int err = 0;
4150         int nreq;
4151         int ind;
4152         int i;
4153
4154         if (unlikely(ibqp->qp_type == IB_QPT_GSI))
4155                 return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
4156
4157         spin_lock_irqsave(&qp->rq.lock, flags);
4158
4159         if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
4160                 err = -EIO;
4161                 *bad_wr = wr;
4162                 nreq = 0;
4163                 goto out;
4164         }
4165
4166         ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
4167
4168         for (nreq = 0; wr; nreq++, wr = wr->next) {
4169                 if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
4170                         err = -ENOMEM;
4171                         *bad_wr = wr;
4172                         goto out;
4173                 }
4174
4175                 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
4176                         err = -EINVAL;
4177                         *bad_wr = wr;
4178                         goto out;
4179                 }
4180
4181                 scat = get_recv_wqe(qp, ind);
4182                 if (qp->wq_sig)
4183                         scat++;
4184
4185                 for (i = 0; i < wr->num_sge; i++)
4186                         set_data_ptr_seg(scat + i, wr->sg_list + i);
4187
4188                 if (i < qp->rq.max_gs) {
4189                         scat[i].byte_count = 0;
4190                         scat[i].lkey       = cpu_to_be32(MLX5_INVALID_LKEY);
4191                         scat[i].addr       = 0;
4192                 }
4193
4194                 if (qp->wq_sig) {
4195                         sig = (struct mlx5_rwqe_sig *)scat;
4196                         set_sig_seg(sig, (qp->rq.max_gs + 1) << 2);
4197                 }
4198
4199                 qp->rq.wrid[ind] = wr->wr_id;
4200
4201                 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
4202         }
4203
4204 out:
4205         if (likely(nreq)) {
4206                 qp->rq.head += nreq;
4207
4208                 /* Make sure that descriptors are written before
4209                  * doorbell record.
4210                  */
4211                 wmb();
4212
4213                 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
4214         }
4215
4216         spin_unlock_irqrestore(&qp->rq.lock, flags);
4217
4218         return err;
4219 }
4220
4221 static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
4222 {
4223         switch (mlx5_state) {
4224         case MLX5_QP_STATE_RST:      return IB_QPS_RESET;
4225         case MLX5_QP_STATE_INIT:     return IB_QPS_INIT;
4226         case MLX5_QP_STATE_RTR:      return IB_QPS_RTR;
4227         case MLX5_QP_STATE_RTS:      return IB_QPS_RTS;
4228         case MLX5_QP_STATE_SQ_DRAINING:
4229         case MLX5_QP_STATE_SQD:      return IB_QPS_SQD;
4230         case MLX5_QP_STATE_SQER:     return IB_QPS_SQE;
4231         case MLX5_QP_STATE_ERR:      return IB_QPS_ERR;
4232         default:                     return -1;
4233         }
4234 }
4235
4236 static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
4237 {
4238         switch (mlx5_mig_state) {
4239         case MLX5_QP_PM_ARMED:          return IB_MIG_ARMED;
4240         case MLX5_QP_PM_REARM:          return IB_MIG_REARM;
4241         case MLX5_QP_PM_MIGRATED:       return IB_MIG_MIGRATED;
4242         default: return -1;
4243         }
4244 }
4245
4246 static int to_ib_qp_access_flags(int mlx5_flags)
4247 {
4248         int ib_flags = 0;
4249
4250         if (mlx5_flags & MLX5_QP_BIT_RRE)
4251                 ib_flags |= IB_ACCESS_REMOTE_READ;
4252         if (mlx5_flags & MLX5_QP_BIT_RWE)
4253                 ib_flags |= IB_ACCESS_REMOTE_WRITE;
4254         if (mlx5_flags & MLX5_QP_BIT_RAE)
4255                 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
4256
4257         return ib_flags;
4258 }
4259
4260 static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
4261                                 struct mlx5_qp_path *path)
4262 {
4263         struct mlx5_core_dev *dev = ibdev->mdev;
4264
4265         memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
4266         ib_ah_attr->port_num      = path->port;
4267
4268         if (ib_ah_attr->port_num == 0 ||
4269             ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
4270                 return;
4271
4272         ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
4273
4274         ib_ah_attr->dlid          = be16_to_cpu(path->rlid);
4275         ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
4276         ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;
4277         ib_ah_attr->ah_flags      = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0;
4278         if (ib_ah_attr->ah_flags) {
4279                 ib_ah_attr->grh.sgid_index = path->mgid_index;
4280                 ib_ah_attr->grh.hop_limit  = path->hop_limit;
4281                 ib_ah_attr->grh.traffic_class =
4282                         (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
4283                 ib_ah_attr->grh.flow_label =
4284                         be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
4285                 memcpy(ib_ah_attr->grh.dgid.raw,
4286                        path->rgid, sizeof(ib_ah_attr->grh.dgid.raw));
4287         }
4288 }
4289
4290 static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
4291                                         struct mlx5_ib_sq *sq,
4292                                         u8 *sq_state)
4293 {
4294         void *out;
4295         void *sqc;
4296         int inlen;
4297         int err;
4298
4299         inlen = MLX5_ST_SZ_BYTES(query_sq_out);
4300         out = mlx5_vzalloc(inlen);
4301         if (!out)
4302                 return -ENOMEM;
4303
4304         err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out);
4305         if (err)
4306                 goto out;
4307
4308         sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
4309         *sq_state = MLX5_GET(sqc, sqc, state);
4310         sq->state = *sq_state;
4311
4312 out:
4313         kvfree(out);
4314         return err;
4315 }
4316
4317 static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
4318                                         struct mlx5_ib_rq *rq,
4319                                         u8 *rq_state)
4320 {
4321         void *out;
4322         void *rqc;
4323         int inlen;
4324         int err;
4325
4326         inlen = MLX5_ST_SZ_BYTES(query_rq_out);
4327         out = mlx5_vzalloc(inlen);
4328         if (!out)
4329                 return -ENOMEM;
4330
4331         err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out);
4332         if (err)
4333                 goto out;
4334
4335         rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
4336         *rq_state = MLX5_GET(rqc, rqc, state);
4337         rq->state = *rq_state;
4338
4339 out:
4340         kvfree(out);
4341         return err;
4342 }
4343
4344 static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
4345                                   struct mlx5_ib_qp *qp, u8 *qp_state)
4346 {
4347         static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = {
4348                 [MLX5_RQC_STATE_RST] = {
4349                         [MLX5_SQC_STATE_RST]    = IB_QPS_RESET,
4350                         [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE_BAD,
4351                         [MLX5_SQC_STATE_ERR]    = MLX5_QP_STATE_BAD,
4352                         [MLX5_SQ_STATE_NA]      = IB_QPS_RESET,
4353                 },
4354                 [MLX5_RQC_STATE_RDY] = {
4355                         [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE_BAD,
4356                         [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE,
4357                         [MLX5_SQC_STATE_ERR]    = IB_QPS_SQE,
4358                         [MLX5_SQ_STATE_NA]      = MLX5_QP_STATE,
4359                 },
4360                 [MLX5_RQC_STATE_ERR] = {
4361                         [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE_BAD,
4362                         [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE_BAD,
4363                         [MLX5_SQC_STATE_ERR]    = IB_QPS_ERR,
4364                         [MLX5_SQ_STATE_NA]      = IB_QPS_ERR,
4365                 },
4366                 [MLX5_RQ_STATE_NA] = {
4367                         [MLX5_SQC_STATE_RST]    = IB_QPS_RESET,
4368                         [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE,
4369                         [MLX5_SQC_STATE_ERR]    = MLX5_QP_STATE,
4370                         [MLX5_SQ_STATE_NA]      = MLX5_QP_STATE_BAD,
4371                 },
4372         };
4373
4374         *qp_state = sqrq_trans[rq_state][sq_state];
4375
4376         if (*qp_state == MLX5_QP_STATE_BAD) {
4377                 WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x",
4378                      qp->raw_packet_qp.sq.base.mqp.qpn, sq_state,
4379                      qp->raw_packet_qp.rq.base.mqp.qpn, rq_state);
4380                 return -EINVAL;
4381         }
4382
4383         if (*qp_state == MLX5_QP_STATE)
4384                 *qp_state = qp->state;
4385
4386         return 0;
4387 }
4388
4389 static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
4390                                      struct mlx5_ib_qp *qp,
4391                                      u8 *raw_packet_qp_state)
4392 {
4393         struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
4394         struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
4395         struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
4396         int err;
4397         u8 sq_state = MLX5_SQ_STATE_NA;
4398         u8 rq_state = MLX5_RQ_STATE_NA;
4399
4400         if (qp->sq.wqe_cnt) {
4401                 err = query_raw_packet_qp_sq_state(dev, sq, &sq_state);
4402                 if (err)
4403                         return err;
4404         }
4405
4406         if (qp->rq.wqe_cnt) {
4407                 err = query_raw_packet_qp_rq_state(dev, rq, &rq_state);
4408                 if (err)
4409                         return err;
4410         }
4411
4412         return sqrq_state_to_qp_state(sq_state, rq_state, qp,
4413                                       raw_packet_qp_state);
4414 }
4415
4416 static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
4417                          struct ib_qp_attr *qp_attr)
4418 {
4419         int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
4420         struct mlx5_qp_context *context;
4421         int mlx5_state;
4422         u32 *outb;
4423         int err = 0;
4424
4425         outb = kzalloc(outlen, GFP_KERNEL);
4426         if (!outb)
4427                 return -ENOMEM;
4428
4429         err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
4430                                  outlen);
4431         if (err)
4432                 goto out;
4433
4434         /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */
4435         context = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, outb, qpc);
4436
4437         mlx5_state = be32_to_cpu(context->flags) >> 28;
4438
4439         qp->state                    = to_ib_qp_state(mlx5_state);
4440         qp_attr->path_mtu            = context->mtu_msgmax >> 5;
4441         qp_attr->path_mig_state      =
4442                 to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
4443         qp_attr->qkey                = be32_to_cpu(context->qkey);
4444         qp_attr->rq_psn              = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
4445         qp_attr->sq_psn              = be32_to_cpu(context->next_send_psn) & 0xffffff;
4446         qp_attr->dest_qp_num         = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff;
4447         qp_attr->qp_access_flags     =
4448                 to_ib_qp_access_flags(be32_to_cpu(context->params2));
4449
4450         if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
4451                 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
4452                 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
4453                 qp_attr->alt_pkey_index =
4454                         be16_to_cpu(context->alt_path.pkey_index);
4455                 qp_attr->alt_port_num   = qp_attr->alt_ah_attr.port_num;
4456         }
4457
4458         qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index);
4459         qp_attr->port_num = context->pri_path.port;
4460
4461         /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
4462         qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING;
4463
4464         qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
4465
4466         qp_attr->max_dest_rd_atomic =
4467                 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
4468         qp_attr->min_rnr_timer      =
4469                 (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
4470         qp_attr->timeout            = context->pri_path.ackto_lt >> 3;
4471         qp_attr->retry_cnt          = (be32_to_cpu(context->params1) >> 16) & 0x7;
4472         qp_attr->rnr_retry          = (be32_to_cpu(context->params1) >> 13) & 0x7;
4473         qp_attr->alt_timeout        = context->alt_path.ackto_lt >> 3;
4474
4475 out:
4476         kfree(outb);
4477         return err;
4478 }
4479
4480 int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
4481                      int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
4482 {
4483         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
4484         struct mlx5_ib_qp *qp = to_mqp(ibqp);
4485         int err = 0;
4486         u8 raw_packet_qp_state;
4487
4488         if (ibqp->rwq_ind_tbl)
4489                 return -ENOSYS;
4490
4491         if (unlikely(ibqp->qp_type == IB_QPT_GSI))
4492                 return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
4493                                             qp_init_attr);
4494
4495 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
4496         /*
4497          * Wait for any outstanding page faults, in case the user frees memory
4498          * based upon this query's result.
4499          */
4500         flush_workqueue(mlx5_ib_page_fault_wq);
4501 #endif
4502
4503         mutex_lock(&qp->mutex);
4504
4505         if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
4506                 err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
4507                 if (err)
4508                         goto out;
4509                 qp->state = raw_packet_qp_state;
4510                 qp_attr->port_num = 1;
4511         } else {
4512                 err = query_qp_attr(dev, qp, qp_attr);
4513                 if (err)
4514                         goto out;
4515         }
4516
4517         qp_attr->qp_state            = qp->state;
4518         qp_attr->cur_qp_state        = qp_attr->qp_state;
4519         qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
4520         qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
4521
4522         if (!ibqp->uobject) {
4523                 qp_attr->cap.max_send_wr  = qp->sq.max_post;
4524                 qp_attr->cap.max_send_sge = qp->sq.max_gs;
4525                 qp_init_attr->qp_context = ibqp->qp_context;
4526         } else {
4527                 qp_attr->cap.max_send_wr  = 0;
4528                 qp_attr->cap.max_send_sge = 0;
4529         }
4530
4531         qp_init_attr->qp_type = ibqp->qp_type;
4532         qp_init_attr->recv_cq = ibqp->recv_cq;
4533         qp_init_attr->send_cq = ibqp->send_cq;
4534         qp_init_attr->srq = ibqp->srq;
4535         qp_attr->cap.max_inline_data = qp->max_inline_data;
4536
4537         qp_init_attr->cap            = qp_attr->cap;
4538
4539         qp_init_attr->create_flags = 0;
4540         if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
4541                 qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
4542
4543         if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
4544                 qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
4545         if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
4546                 qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
4547         if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
4548                 qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
4549         if (qp->flags & MLX5_IB_QP_SQPN_QP1)
4550                 qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1();
4551
4552         qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
4553                 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
4554
4555 out:
4556         mutex_unlock(&qp->mutex);
4557         return err;
4558 }
4559
4560 struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
4561                                           struct ib_ucontext *context,
4562                                           struct ib_udata *udata)
4563 {
4564         struct mlx5_ib_dev *dev = to_mdev(ibdev);
4565         struct mlx5_ib_xrcd *xrcd;
4566         int err;
4567
4568         if (!MLX5_CAP_GEN(dev->mdev, xrc))
4569                 return ERR_PTR(-ENOSYS);
4570
4571         xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
4572         if (!xrcd)
4573                 return ERR_PTR(-ENOMEM);
4574
4575         err = mlx5_core_xrcd_alloc(dev->mdev, &xrcd->xrcdn);
4576         if (err) {
4577                 kfree(xrcd);
4578                 return ERR_PTR(-ENOMEM);
4579         }
4580
4581         return &xrcd->ibxrcd;
4582 }
4583
4584 int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
4585 {
4586         struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
4587         u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
4588         int err;
4589
4590         err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn);
4591         if (err) {
4592                 mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
4593                 return err;
4594         }
4595
4596         kfree(xrcd);
4597
4598         return 0;
4599 }
4600
4601 static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
4602 {
4603         struct mlx5_ib_rwq *rwq = to_mibrwq(core_qp);
4604         struct mlx5_ib_dev *dev = to_mdev(rwq->ibwq.device);
4605         struct ib_event event;
4606
4607         if (rwq->ibwq.event_handler) {
4608                 event.device     = rwq->ibwq.device;
4609                 event.element.wq = &rwq->ibwq;
4610                 switch (type) {
4611                 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
4612                         event.event = IB_EVENT_WQ_FATAL;
4613                         break;
4614                 default:
4615                         mlx5_ib_warn(dev, "Unexpected event type %d on WQ %06x\n", type, core_qp->qpn);
4616                         return;
4617                 }
4618
4619                 rwq->ibwq.event_handler(&event, rwq->ibwq.wq_context);
4620         }
4621 }
4622
4623 static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
4624                       struct ib_wq_init_attr *init_attr)
4625 {
4626         struct mlx5_ib_dev *dev;
4627         __be64 *rq_pas0;
4628         void *in;
4629         void *rqc;
4630         void *wq;
4631         int inlen;
4632         int err;
4633
4634         dev = to_mdev(pd->device);
4635
4636         inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas;
4637         in = mlx5_vzalloc(inlen);
4638         if (!in)
4639                 return -ENOMEM;
4640
4641         rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
4642         MLX5_SET(rqc,  rqc, mem_rq_type,
4643                  MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE);
4644         MLX5_SET(rqc, rqc, user_index, rwq->user_index);
4645         MLX5_SET(rqc,  rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn);
4646         MLX5_SET(rqc,  rqc, state, MLX5_RQC_STATE_RST);
4647         MLX5_SET(rqc,  rqc, flush_in_error_en, 1);
4648         wq = MLX5_ADDR_OF(rqc, rqc, wq);
4649         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
4650         MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
4651         MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride);
4652         MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size);
4653         MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn);
4654         MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset);
4655         MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
4656         MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
4657         MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
4658         rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
4659         mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
4660         err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
4661         kvfree(in);
4662         return err;
4663 }
4664
4665 static int set_user_rq_size(struct mlx5_ib_dev *dev,
4666                             struct ib_wq_init_attr *wq_init_attr,
4667                             struct mlx5_ib_create_wq *ucmd,
4668                             struct mlx5_ib_rwq *rwq)
4669 {
4670         /* Sanity check RQ size before proceeding */
4671         if (wq_init_attr->max_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_wq_sz)))
4672                 return -EINVAL;
4673
4674         if (!ucmd->rq_wqe_count)
4675                 return -EINVAL;
4676
4677         rwq->wqe_count = ucmd->rq_wqe_count;
4678         rwq->wqe_shift = ucmd->rq_wqe_shift;
4679         rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift);
4680         rwq->log_rq_stride = rwq->wqe_shift;
4681         rwq->log_rq_size = ilog2(rwq->wqe_count);
4682         return 0;
4683 }
4684
4685 static int prepare_user_rq(struct ib_pd *pd,
4686                            struct ib_wq_init_attr *init_attr,
4687                            struct ib_udata *udata,
4688                            struct mlx5_ib_rwq *rwq)
4689 {
4690         struct mlx5_ib_dev *dev = to_mdev(pd->device);
4691         struct mlx5_ib_create_wq ucmd = {};
4692         int err;
4693         size_t required_cmd_sz;
4694
4695         required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
4696         if (udata->inlen < required_cmd_sz) {
4697                 mlx5_ib_dbg(dev, "invalid inlen\n");
4698                 return -EINVAL;
4699         }
4700
4701         if (udata->inlen > sizeof(ucmd) &&
4702             !ib_is_udata_cleared(udata, sizeof(ucmd),
4703                                  udata->inlen - sizeof(ucmd))) {
4704                 mlx5_ib_dbg(dev, "inlen is not supported\n");
4705                 return -EOPNOTSUPP;
4706         }
4707
4708         if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
4709                 mlx5_ib_dbg(dev, "copy failed\n");
4710                 return -EFAULT;
4711         }
4712
4713         if (ucmd.comp_mask) {
4714                 mlx5_ib_dbg(dev, "invalid comp mask\n");
4715                 return -EOPNOTSUPP;
4716         }
4717
4718         if (ucmd.reserved) {
4719                 mlx5_ib_dbg(dev, "invalid reserved\n");
4720                 return -EOPNOTSUPP;
4721         }
4722
4723         err = set_user_rq_size(dev, init_attr, &ucmd, rwq);
4724         if (err) {
4725                 mlx5_ib_dbg(dev, "err %d\n", err);
4726                 return err;
4727         }
4728
4729         err = create_user_rq(dev, pd, rwq, &ucmd);
4730         if (err) {
4731                 mlx5_ib_dbg(dev, "err %d\n", err);
4732                 if (err)
4733                         return err;
4734         }
4735
4736         rwq->user_index = ucmd.user_index;
4737         return 0;
4738 }
4739
4740 struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
4741                                 struct ib_wq_init_attr *init_attr,
4742                                 struct ib_udata *udata)
4743 {
4744         struct mlx5_ib_dev *dev;
4745         struct mlx5_ib_rwq *rwq;
4746         struct mlx5_ib_create_wq_resp resp = {};
4747         size_t min_resp_len;
4748         int err;
4749
4750         if (!udata)
4751                 return ERR_PTR(-ENOSYS);
4752
4753         min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
4754         if (udata->outlen && udata->outlen < min_resp_len)
4755                 return ERR_PTR(-EINVAL);
4756
4757         dev = to_mdev(pd->device);
4758         switch (init_attr->wq_type) {
4759         case IB_WQT_RQ:
4760                 rwq = kzalloc(sizeof(*rwq), GFP_KERNEL);
4761                 if (!rwq)
4762                         return ERR_PTR(-ENOMEM);
4763                 err = prepare_user_rq(pd, init_attr, udata, rwq);
4764                 if (err)
4765                         goto err;
4766                 err = create_rq(rwq, pd, init_attr);
4767                 if (err)
4768                         goto err_user_rq;
4769                 break;
4770         default:
4771                 mlx5_ib_dbg(dev, "unsupported wq type %d\n",
4772                             init_attr->wq_type);
4773                 return ERR_PTR(-EINVAL);
4774         }
4775
4776         rwq->ibwq.wq_num = rwq->core_qp.qpn;
4777         rwq->ibwq.state = IB_WQS_RESET;
4778         if (udata->outlen) {
4779                 resp.response_length = offsetof(typeof(resp), response_length) +
4780                                 sizeof(resp.response_length);
4781                 err = ib_copy_to_udata(udata, &resp, resp.response_length);
4782                 if (err)
4783                         goto err_copy;
4784         }
4785
4786         rwq->core_qp.event = mlx5_ib_wq_event;
4787         rwq->ibwq.event_handler = init_attr->event_handler;
4788         return &rwq->ibwq;
4789
4790 err_copy:
4791         mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
4792 err_user_rq:
4793         destroy_user_rq(pd, rwq);
4794 err:
4795         kfree(rwq);
4796         return ERR_PTR(err);
4797 }
4798
4799 int mlx5_ib_destroy_wq(struct ib_wq *wq)
4800 {
4801         struct mlx5_ib_dev *dev = to_mdev(wq->device);
4802         struct mlx5_ib_rwq *rwq = to_mrwq(wq);
4803
4804         mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
4805         destroy_user_rq(wq->pd, rwq);
4806         kfree(rwq);
4807
4808         return 0;
4809 }
4810
4811 struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
4812                                                       struct ib_rwq_ind_table_init_attr *init_attr,
4813                                                       struct ib_udata *udata)
4814 {
4815         struct mlx5_ib_dev *dev = to_mdev(device);
4816         struct mlx5_ib_rwq_ind_table *rwq_ind_tbl;
4817         int sz = 1 << init_attr->log_ind_tbl_size;
4818         struct mlx5_ib_create_rwq_ind_tbl_resp resp = {};
4819         size_t min_resp_len;
4820         int inlen;
4821         int err;
4822         int i;
4823         u32 *in;
4824         void *rqtc;
4825
4826         if (udata->inlen > 0 &&
4827             !ib_is_udata_cleared(udata, 0,
4828                                  udata->inlen))
4829                 return ERR_PTR(-EOPNOTSUPP);
4830
4831         if (init_attr->log_ind_tbl_size >
4832             MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
4833                 mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n",
4834                             init_attr->log_ind_tbl_size,
4835                             MLX5_CAP_GEN(dev->mdev, log_max_rqt_size));
4836                 return ERR_PTR(-EINVAL);
4837         }
4838
4839         min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
4840         if (udata->outlen && udata->outlen < min_resp_len)
4841                 return ERR_PTR(-EINVAL);
4842
4843         rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL);
4844         if (!rwq_ind_tbl)
4845                 return ERR_PTR(-ENOMEM);
4846
4847         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
4848         in = mlx5_vzalloc(inlen);
4849         if (!in) {
4850                 err = -ENOMEM;
4851                 goto err;
4852         }
4853
4854         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
4855
4856         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
4857         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
4858
4859         for (i = 0; i < sz; i++)
4860                 MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num);
4861
4862         err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
4863         kvfree(in);
4864
4865         if (err)
4866                 goto err;
4867
4868         rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn;
4869         if (udata->outlen) {
4870                 resp.response_length = offsetof(typeof(resp), response_length) +
4871                                         sizeof(resp.response_length);
4872                 err = ib_copy_to_udata(udata, &resp, resp.response_length);
4873                 if (err)
4874                         goto err_copy;
4875         }
4876
4877         return &rwq_ind_tbl->ib_rwq_ind_tbl;
4878
4879 err_copy:
4880         mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
4881 err:
4882         kfree(rwq_ind_tbl);
4883         return ERR_PTR(err);
4884 }
4885
4886 int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
4887 {
4888         struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
4889         struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
4890
4891         mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
4892
4893         kfree(rwq_ind_tbl);
4894         return 0;
4895 }
4896
4897 int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
4898                       u32 wq_attr_mask, struct ib_udata *udata)
4899 {
4900         struct mlx5_ib_dev *dev = to_mdev(wq->device);
4901         struct mlx5_ib_rwq *rwq = to_mrwq(wq);
4902         struct mlx5_ib_modify_wq ucmd = {};
4903         size_t required_cmd_sz;
4904         int curr_wq_state;
4905         int wq_state;
4906         int inlen;
4907         int err;
4908         void *rqc;
4909         void *in;
4910
4911         required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
4912         if (udata->inlen < required_cmd_sz)
4913                 return -EINVAL;
4914
4915         if (udata->inlen > sizeof(ucmd) &&
4916             !ib_is_udata_cleared(udata, sizeof(ucmd),
4917                                  udata->inlen - sizeof(ucmd)))
4918                 return -EOPNOTSUPP;
4919
4920         if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)))
4921                 return -EFAULT;
4922
4923         if (ucmd.comp_mask || ucmd.reserved)
4924                 return -EOPNOTSUPP;
4925
4926         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
4927         in = mlx5_vzalloc(inlen);
4928         if (!in)
4929                 return -ENOMEM;
4930
4931         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
4932
4933         MLX5_SET(modify_rq_in, in, rqn, rwq->core_qp.qpn);
4934         curr_wq_state = (wq_attr_mask & IB_WQ_CUR_STATE) ?
4935                 wq_attr->curr_wq_state : wq->state;
4936         wq_state = (wq_attr_mask & IB_WQ_STATE) ?
4937                 wq_attr->wq_state : curr_wq_state;
4938         if (curr_wq_state == IB_WQS_ERR)
4939                 curr_wq_state = MLX5_RQC_STATE_ERR;
4940         if (wq_state == IB_WQS_ERR)
4941                 wq_state = MLX5_RQC_STATE_ERR;
4942         MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
4943         MLX5_SET(rqc, rqc, state, wq_state);
4944
4945         err = mlx5_core_modify_rq(dev->mdev, in, inlen);
4946         kvfree(in);
4947         if (!err)
4948                 rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
4949
4950         return err;
4951 }