]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx4/mlx4_ib/mlx4_ib_cq.c
Import mandoc 1.4.1rc2
[FreeBSD/FreeBSD.git] / sys / dev / mlx4 / mlx4_ib / mlx4_ib_cq.c
1 /*
2  * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include <dev/mlx4/cq.h>
35 #include <dev/mlx4/qp.h>
36 #include <dev/mlx4/srq.h>
37 #include <linux/slab.h>
38
39 #include "mlx4_ib.h"
40 #include "user.h"
41
42 /* Which firmware version adds support for Resize CQ */
43 #define MLX4_FW_VER_RESIZE_CQ  mlx4_fw_ver(2, 5, 0)
44 #define MLX4_FW_VER_IGNORE_OVERRUN_CQ mlx4_fw_ver(2, 7, 8200)
45
46 static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
47 {
48         struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
49         ibcq->comp_handler(ibcq, ibcq->cq_context);
50 }
51
52 static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
53 {
54         struct ib_event event;
55         struct ib_cq *ibcq;
56
57         if (type != MLX4_EVENT_TYPE_CQ_ERROR) {
58                 pr_warn("Unexpected event type %d "
59                        "on CQ %06x\n", type, cq->cqn);
60                 return;
61         }
62
63         ibcq = &to_mibcq(cq)->ibcq;
64         if (ibcq->event_handler) {
65                 event.device     = ibcq->device;
66                 event.event      = IB_EVENT_CQ_ERR;
67                 event.element.cq = ibcq;
68                 ibcq->event_handler(&event, ibcq->cq_context);
69         }
70 }
71
72 static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
73 {
74         return mlx4_buf_offset(&buf->buf, n * buf->entry_size);
75 }
76
77 static void *get_cqe(struct mlx4_ib_cq *cq, int n)
78 {
79         return get_cqe_from_buf(&cq->buf, n);
80 }
81
82 static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
83 {
84         struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
85         struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
86
87         return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
88                 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
89 }
90
91 static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq)
92 {
93         return get_sw_cqe(cq, cq->mcq.cons_index);
94 }
95
96 int mlx4_ib_modify_cq(struct ib_cq *cq,
97                       struct ib_cq_attr *cq_attr,
98                       int cq_attr_mask)
99 {
100         int err = 0;
101         struct mlx4_ib_cq *mcq = to_mcq(cq);
102         struct mlx4_ib_dev *dev = to_mdev(cq->device);
103
104         if (cq_attr_mask & IB_CQ_CAP_FLAGS) {
105                 if (cq_attr->cq_cap_flags & IB_CQ_TIMESTAMP)
106                         return -ENOTSUPP;
107
108                 if (cq_attr->cq_cap_flags & IB_CQ_IGNORE_OVERRUN) {
109                         if (dev->dev->caps.cq_flags & MLX4_DEV_CAP_CQ_FLAG_IO)
110                                 err = mlx4_cq_ignore_overrun(dev->dev, &mcq->mcq);
111                         else
112                                 err = -ENOSYS;
113                 }
114         }
115
116         if (!err)
117                 if (cq_attr_mask & IB_CQ_MODERATION)
118                         err = mlx4_cq_modify(dev->dev, &mcq->mcq,
119                                         cq_attr->moderation.cq_count,
120                                         cq_attr->moderation.cq_period);
121
122         return err;
123 }
124
125 static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int nent)
126 {
127         int err;
128
129         err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
130                              PAGE_SIZE * 2, &buf->buf);
131
132         if (err)
133                 goto out;
134
135         buf->entry_size = dev->dev->caps.cqe_size;
136         err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
137                                     &buf->mtt);
138         if (err)
139                 goto err_buf;
140
141         err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
142         if (err)
143                 goto err_mtt;
144
145         return 0;
146
147 err_mtt:
148         mlx4_mtt_cleanup(dev->dev, &buf->mtt);
149
150 err_buf:
151         mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf);
152
153 out:
154         return err;
155 }
156
157 static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
158 {
159         mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
160 }
161
162 static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
163                                struct mlx4_ib_cq_buf *buf, struct ib_umem **umem,
164                                u64 buf_addr, int cqe)
165 {
166         int err;
167         int cqe_size = dev->dev->caps.cqe_size;
168         int shift;
169         int n;
170
171         *umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
172                             IB_ACCESS_LOCAL_WRITE, 1);
173         if (IS_ERR(*umem))
174                 return PTR_ERR(*umem);
175
176         n = ib_umem_page_count(*umem);
177         shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
178         err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
179
180         if (err)
181                 goto err_buf;
182
183         err = mlx4_ib_umem_write_mtt(dev, &buf->mtt, *umem);
184         if (err)
185                 goto err_mtt;
186
187         return 0;
188
189 err_mtt:
190         mlx4_mtt_cleanup(dev->dev, &buf->mtt);
191
192 err_buf:
193         ib_umem_release(*umem);
194
195         return err;
196 }
197
198 /* we don't support system timestamping */
199 #define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_TIMESTAMP
200
201 struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
202                                 struct ib_cq_init_attr *attr,
203                                 struct ib_ucontext *context,
204                                 struct ib_udata *udata)
205 {
206         struct mlx4_ib_dev *dev = to_mdev(ibdev);
207         struct mlx4_ib_cq *cq;
208         struct mlx4_uar *uar;
209         int err;
210         int entries = attr->cqe;
211         int vector = attr->comp_vector;
212
213         if (entries < 1 || entries > dev->dev->caps.max_cqes)
214                 return ERR_PTR(-EINVAL);
215
216         if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
217                 return ERR_PTR(-EINVAL);
218
219         cq = kzalloc(sizeof(*cq), GFP_KERNEL);
220         if (!cq)
221                 return ERR_PTR(-ENOMEM);
222
223         entries      = roundup_pow_of_two(entries + 1);
224         cq->ibcq.cqe = entries - 1;
225         mutex_init(&cq->resize_mutex);
226         spin_lock_init(&cq->lock);
227         cq->resize_buf = NULL;
228         cq->resize_umem = NULL;
229         cq->create_flags = attr->flags;
230
231         if (context) {
232                 struct mlx4_ib_create_cq ucmd;
233
234                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
235                         err = -EFAULT;
236                         goto err_cq;
237                 }
238
239                 err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem,
240                                           ucmd.buf_addr, entries);
241                 if (err)
242                         goto err_cq;
243
244                 err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
245                                           &cq->db);
246                 if (err)
247                         goto err_mtt;
248
249                 uar = &to_mucontext(context)->uar;
250         } else {
251                 err = mlx4_db_alloc(dev->dev, &cq->db, 1);
252                 if (err)
253                         goto err_cq;
254
255                 cq->mcq.set_ci_db  = cq->db.db;
256                 cq->mcq.arm_db     = cq->db.db + 1;
257                 *cq->mcq.set_ci_db = 0;
258                 *cq->mcq.arm_db    = 0;
259
260                 err = mlx4_ib_alloc_cq_buf(dev, &cq->buf, entries);
261                 if (err)
262                         goto err_db;
263
264                 uar = &dev->priv_uar;
265         }
266
267         if (dev->eq_table)
268                 vector = dev->eq_table[vector % ibdev->num_comp_vectors];
269
270         err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
271                             cq->db.dma, &cq->mcq, vector, 0,
272                             !!(cq->create_flags & IB_CQ_TIMESTAMP));
273         if (err)
274                 goto err_dbmap;
275
276         cq->mcq.comp  = mlx4_ib_cq_comp;
277         cq->mcq.event = mlx4_ib_cq_event;
278
279         if (context)
280                 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
281                         err = -EFAULT;
282                         goto err_dbmap;
283                 }
284
285         return &cq->ibcq;
286
287 err_dbmap:
288         if (context)
289                 mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
290
291 err_mtt:
292         mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
293
294         if (context)
295                 ib_umem_release(cq->umem);
296         else
297                 mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
298
299 err_db:
300         if (!context)
301                 mlx4_db_free(dev->dev, &cq->db);
302
303 err_cq:
304         kfree(cq);
305
306         return ERR_PTR(err);
307 }
308
309 static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
310                                   int entries)
311 {
312         int err;
313
314         if (cq->resize_buf)
315                 return -EBUSY;
316
317         cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
318         if (!cq->resize_buf)
319                 return -ENOMEM;
320
321         err = mlx4_ib_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
322         if (err) {
323                 kfree(cq->resize_buf);
324                 cq->resize_buf = NULL;
325                 return err;
326         }
327
328         cq->resize_buf->cqe = entries - 1;
329
330         return 0;
331 }
332
333 static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
334                                    int entries, struct ib_udata *udata)
335 {
336         struct mlx4_ib_resize_cq ucmd;
337         int err;
338
339         if (cq->resize_umem)
340                 return -EBUSY;
341
342         if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
343                 return -EFAULT;
344
345         cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
346         if (!cq->resize_buf)
347                 return -ENOMEM;
348
349         err = mlx4_ib_get_cq_umem(dev, cq->umem->context, &cq->resize_buf->buf,
350                                   &cq->resize_umem, ucmd.buf_addr, entries);
351         if (err) {
352                 kfree(cq->resize_buf);
353                 cq->resize_buf = NULL;
354                 return err;
355         }
356
357         cq->resize_buf->cqe = entries - 1;
358
359         return 0;
360 }
361
362 static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
363 {
364         u32 i;
365
366         i = cq->mcq.cons_index;
367         while (get_sw_cqe(cq, i))
368                 ++i;
369
370         return i - cq->mcq.cons_index;
371 }
372
373 static int mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
374 {
375         struct mlx4_cqe *cqe, *new_cqe;
376         int i;
377         int cqe_size = cq->buf.entry_size;
378         int cqe_inc = cqe_size == 64 ? 1 : 0;
379         struct mlx4_cqe *start_cqe;
380
381         i = cq->mcq.cons_index;
382         cqe = get_cqe(cq, i & cq->ibcq.cqe);
383         start_cqe = cqe;
384         cqe += cqe_inc;
385
386         while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
387                 new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
388                                            (i + 1) & cq->resize_buf->cqe);
389                 memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
390                 new_cqe += cqe_inc;
391
392                 new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
393                         (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
394                 cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
395                 if (cqe == start_cqe) {
396                         pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", cq->mcq.cqn);
397                         return -ENOMEM;
398                 }
399                 cqe += cqe_inc;
400
401         }
402         ++cq->mcq.cons_index;
403         return 0;
404 }
405
406 int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
407 {
408         struct mlx4_ib_dev *dev = to_mdev(ibcq->device);
409         struct mlx4_ib_cq *cq = to_mcq(ibcq);
410         struct mlx4_mtt mtt;
411         int outst_cqe;
412         int err;
413
414         if (dev->dev->caps.fw_ver < MLX4_FW_VER_RESIZE_CQ)
415                 return -ENOSYS;
416
417         mutex_lock(&cq->resize_mutex);
418         if (entries < 1 || entries > dev->dev->caps.max_cqes) {
419                 err = -EINVAL;
420                 goto out;
421         }
422
423         entries = roundup_pow_of_two(entries + 1);
424         if (entries == ibcq->cqe + 1) {
425                 err = 0;
426                 goto out;
427         }
428
429         if (entries > dev->dev->caps.max_cqes + 1) {
430                 err = -EINVAL;
431                 goto out;
432         }
433
434         if (ibcq->uobject) {
435                 err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
436                 if (err)
437                         goto out;
438         } else {
439                 /* Can't be smaller than the number of outstanding CQEs */
440                 outst_cqe = mlx4_ib_get_outstanding_cqes(cq);
441                 if (entries < outst_cqe + 1) {
442                         err = 0;
443                         goto out;
444                 }
445
446                 err = mlx4_alloc_resize_buf(dev, cq, entries);
447                 if (err)
448                         goto out;
449         }
450
451         mtt = cq->buf.mtt;
452
453         err = mlx4_cq_resize(dev->dev, &cq->mcq, entries, &cq->resize_buf->buf.mtt);
454         if (err)
455                 goto err_buf;
456
457         mlx4_mtt_cleanup(dev->dev, &mtt);
458         if (ibcq->uobject) {
459                 cq->buf      = cq->resize_buf->buf;
460                 cq->ibcq.cqe = cq->resize_buf->cqe;
461                 ib_umem_release(cq->umem);
462                 cq->umem     = cq->resize_umem;
463
464                 kfree(cq->resize_buf);
465                 cq->resize_buf = NULL;
466                 cq->resize_umem = NULL;
467         } else {
468                 struct mlx4_ib_cq_buf tmp_buf;
469                 int tmp_cqe = 0;
470
471                 spin_lock_irq(&cq->lock);
472                 if (cq->resize_buf) {
473                         err = mlx4_ib_cq_resize_copy_cqes(cq);
474                         tmp_buf = cq->buf;
475                         tmp_cqe = cq->ibcq.cqe;
476                         cq->buf      = cq->resize_buf->buf;
477                         cq->ibcq.cqe = cq->resize_buf->cqe;
478
479                         kfree(cq->resize_buf);
480                         cq->resize_buf = NULL;
481                 }
482                 spin_unlock_irq(&cq->lock);
483
484                 if (tmp_cqe)
485                         mlx4_ib_free_cq_buf(dev, &tmp_buf, tmp_cqe);
486         }
487
488         goto out;
489
490 err_buf:
491         mlx4_mtt_cleanup(dev->dev, &cq->resize_buf->buf.mtt);
492         if (!ibcq->uobject)
493                 mlx4_ib_free_cq_buf(dev, &cq->resize_buf->buf,
494                                     cq->resize_buf->cqe);
495
496         kfree(cq->resize_buf);
497         cq->resize_buf = NULL;
498
499         if (cq->resize_umem) {
500                 ib_umem_release(cq->resize_umem);
501                 cq->resize_umem = NULL;
502         }
503
504 out:
505         mutex_unlock(&cq->resize_mutex);
506
507         return err;
508 }
509
510 int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq)
511 {
512         struct mlx4_ib_dev *dev = to_mdev(ibcq->device);
513         struct mlx4_ib_cq *cq = to_mcq(ibcq);
514
515         if (dev->dev->caps.fw_ver < MLX4_FW_VER_IGNORE_OVERRUN_CQ)
516                 return -ENOSYS;
517
518         return mlx4_cq_ignore_overrun(dev->dev, &cq->mcq);
519 }
520
521 int mlx4_ib_destroy_cq(struct ib_cq *cq)
522 {
523         struct mlx4_ib_dev *dev = to_mdev(cq->device);
524         struct mlx4_ib_cq *mcq = to_mcq(cq);
525
526         mlx4_cq_free(dev->dev, &mcq->mcq);
527         mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt);
528
529         if (cq->uobject) {
530                 mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
531                 ib_umem_release(mcq->umem);
532         } else {
533                 mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe);
534                 mlx4_db_free(dev->dev, &mcq->db);
535         }
536
537         kfree(mcq);
538
539         return 0;
540 }
541
542 static void dump_cqe(void *cqe)
543 {
544         __be32 *buf = cqe;
545
546         pr_debug("CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
547                be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
548                be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
549                be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
550 }
551
552 static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
553                                      struct ib_wc *wc)
554 {
555         if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
556                 pr_debug("local QP operation err "
557                        "(QPN %06x, WQE index %x, vendor syndrome %02x, "
558                        "opcode = %02x)\n",
559                        be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
560                        cqe->vendor_err_syndrome,
561                        cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
562                 dump_cqe(cqe);
563         }
564
565         switch (cqe->syndrome) {
566         case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR:
567                 wc->status = IB_WC_LOC_LEN_ERR;
568                 break;
569         case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR:
570                 wc->status = IB_WC_LOC_QP_OP_ERR;
571                 break;
572         case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR:
573                 wc->status = IB_WC_LOC_PROT_ERR;
574                 break;
575         case MLX4_CQE_SYNDROME_WR_FLUSH_ERR:
576                 wc->status = IB_WC_WR_FLUSH_ERR;
577                 break;
578         case MLX4_CQE_SYNDROME_MW_BIND_ERR:
579                 wc->status = IB_WC_MW_BIND_ERR;
580                 break;
581         case MLX4_CQE_SYNDROME_BAD_RESP_ERR:
582                 wc->status = IB_WC_BAD_RESP_ERR;
583                 break;
584         case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR:
585                 wc->status = IB_WC_LOC_ACCESS_ERR;
586                 break;
587         case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
588                 wc->status = IB_WC_REM_INV_REQ_ERR;
589                 break;
590         case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR:
591                 wc->status = IB_WC_REM_ACCESS_ERR;
592                 break;
593         case MLX4_CQE_SYNDROME_REMOTE_OP_ERR:
594                 wc->status = IB_WC_REM_OP_ERR;
595                 break;
596         case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
597                 wc->status = IB_WC_RETRY_EXC_ERR;
598                 break;
599         case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
600                 wc->status = IB_WC_RNR_RETRY_EXC_ERR;
601                 break;
602         case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR:
603                 wc->status = IB_WC_REM_ABORT_ERR;
604                 break;
605         default:
606                 wc->status = IB_WC_GENERAL_ERR;
607                 break;
608         }
609
610         wc->vendor_err = cqe->vendor_err_syndrome;
611 }
612
613 static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
614 {
615         return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4      |
616                                       MLX4_CQE_STATUS_IPV4F     |
617                                       MLX4_CQE_STATUS_IPV4OPT   |
618                                       MLX4_CQE_STATUS_IPV6      |
619                                       MLX4_CQE_STATUS_IPOK)) ==
620                 cpu_to_be16(MLX4_CQE_STATUS_IPV4        |
621                             MLX4_CQE_STATUS_IPOK))              &&
622                 (status & cpu_to_be16(MLX4_CQE_STATUS_UDP       |
623                                       MLX4_CQE_STATUS_TCP))     &&
624                 checksum == cpu_to_be16(0xffff);
625 }
626
627 static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
628                            unsigned tail, struct mlx4_cqe *cqe, int is_eth)
629 {
630         struct mlx4_ib_proxy_sqp_hdr *hdr;
631
632         ib_dma_sync_single_for_cpu(qp->ibqp.device,
633                                    qp->sqp_proxy_rcv[tail].map,
634                                    sizeof (struct mlx4_ib_proxy_sqp_hdr),
635                                    DMA_FROM_DEVICE);
636         hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
637         wc->pkey_index  = be16_to_cpu(hdr->tun.pkey_index);
638         wc->src_qp      = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
639         wc->wc_flags   |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
640         wc->dlid_path_bits = 0;
641
642         if (is_eth) {
643                 wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid);
644                 memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4);
645                 memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2);
646         } else {
647                 wc->slid        = be16_to_cpu(hdr->tun.slid_mac_47_32);
648                 wc->sl          = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
649         }
650
651         return 0;
652 }
653
654 static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
655                             struct mlx4_ib_qp **cur_qp,
656                             struct ib_wc *wc)
657 {
658         struct mlx4_cqe *cqe;
659         struct mlx4_qp *mqp;
660         struct mlx4_ib_wq *wq;
661         struct mlx4_ib_srq *srq;
662         struct mlx4_srq *msrq = NULL;
663         int is_send;
664         int is_error;
665         u32 g_mlpath_rqpn;
666         u16 wqe_ctr;
667         unsigned tail = 0;
668         int timestamp_en = !!(cq->create_flags & IB_CQ_TIMESTAMP);
669
670
671 repoll:
672         cqe = next_cqe_sw(cq);
673         if (!cqe)
674                 return -EAGAIN;
675
676         if (cq->buf.entry_size == 64)
677                 cqe++;
678
679         ++cq->mcq.cons_index;
680
681         /*
682          * Make sure we read CQ entry contents after we've checked the
683          * ownership bit.
684          */
685         rmb();
686
687         is_send  = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
688         is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
689                 MLX4_CQE_OPCODE_ERROR;
690
691         if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
692                      is_send)) {
693                 pr_warn("Completion for NOP opcode detected!\n");
694                 return -EINVAL;
695         }
696
697         /* Resize CQ in progress */
698         if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) {
699                 if (cq->resize_buf) {
700                         struct mlx4_ib_dev *dev = to_mdev(cq->ibcq.device);
701
702                         mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
703                         cq->buf      = cq->resize_buf->buf;
704                         cq->ibcq.cqe = cq->resize_buf->cqe;
705
706                         kfree(cq->resize_buf);
707                         cq->resize_buf = NULL;
708                 }
709
710                 goto repoll;
711         }
712
713         if (!*cur_qp ||
714             (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) {
715                 /*
716                  * We do not have to take the QP table lock here,
717                  * because CQs will be locked while QPs are removed
718                  * from the table.
719                  */
720                 mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
721                                        be32_to_cpu(cqe->vlan_my_qpn));
722                 if (unlikely(!mqp)) {
723                         pr_warn("CQ %06x with entry for unknown QPN %06x\n",
724                                cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
725                         return -EINVAL;
726                 }
727
728                 *cur_qp = to_mibqp(mqp);
729         }
730
731         wc->qp = &(*cur_qp)->ibqp;
732
733         if (wc->qp->qp_type == IB_QPT_XRC_TGT) {
734                 u32 srq_num;
735                 g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
736                 srq_num       = g_mlpath_rqpn & 0xffffff;
737                 /* SRQ is also in the radix tree */
738                 msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
739                                        srq_num);
740                 if (unlikely(!msrq)) {
741                         pr_warn("CQ %06x with entry for unknown SRQN %06x\n",
742                                 cq->mcq.cqn, srq_num);
743                         return -EINVAL;
744                 }
745         }
746
747         if (is_send) {
748                 wq = &(*cur_qp)->sq;
749                 if (!(*cur_qp)->sq_signal_bits) {
750                         wqe_ctr = be16_to_cpu(cqe->wqe_index);
751                         wq->tail += (u16) (wqe_ctr - (u16) wq->tail);
752                 }
753                 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
754                 ++wq->tail;
755         } else if ((*cur_qp)->ibqp.srq) {
756                 srq = to_msrq((*cur_qp)->ibqp.srq);
757                 wqe_ctr = be16_to_cpu(cqe->wqe_index);
758                 wc->wr_id = srq->wrid[wqe_ctr];
759                 mlx4_ib_free_srq_wqe(srq, wqe_ctr);
760         } else if (msrq) {
761                 srq = to_mibsrq(msrq);
762                 wqe_ctr = be16_to_cpu(cqe->wqe_index);
763                 wc->wr_id = srq->wrid[wqe_ctr];
764                 mlx4_ib_free_srq_wqe(srq, wqe_ctr);
765         } else {
766                 wq        = &(*cur_qp)->rq;
767                 tail      = wq->tail & (wq->wqe_cnt - 1);
768                 wc->wr_id = wq->wrid[tail];
769                 ++wq->tail;
770         }
771
772         if (unlikely(is_error)) {
773                 mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc);
774                 return 0;
775         }
776
777         wc->status = IB_WC_SUCCESS;
778
779         if (is_send) {
780                 wc->wc_flags = 0;
781                 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
782                 case MLX4_OPCODE_RDMA_WRITE_IMM:
783                         wc->wc_flags |= IB_WC_WITH_IMM;
784                         /* fall through */
785                 case MLX4_OPCODE_RDMA_WRITE:
786                         wc->opcode    = IB_WC_RDMA_WRITE;
787                         break;
788                 case MLX4_OPCODE_SEND_IMM:
789                         wc->wc_flags |= IB_WC_WITH_IMM;
790                 case MLX4_OPCODE_SEND:
791                 case MLX4_OPCODE_SEND_INVAL:
792                         wc->opcode    = IB_WC_SEND;
793                         break;
794                 case MLX4_OPCODE_RDMA_READ:
795                         wc->opcode    = IB_WC_RDMA_READ;
796                         wc->byte_len  = be32_to_cpu(cqe->byte_cnt);
797                         break;
798                 case MLX4_OPCODE_ATOMIC_CS:
799                         wc->opcode    = IB_WC_COMP_SWAP;
800                         wc->byte_len  = 8;
801                         break;
802                 case MLX4_OPCODE_ATOMIC_FA:
803                         wc->opcode    = IB_WC_FETCH_ADD;
804                         wc->byte_len  = 8;
805                         break;
806                 case MLX4_OPCODE_MASKED_ATOMIC_CS:
807                         wc->opcode    = IB_WC_MASKED_COMP_SWAP;
808                         wc->byte_len  = 8;
809                         break;
810                 case MLX4_OPCODE_MASKED_ATOMIC_FA:
811                         wc->opcode    = IB_WC_MASKED_FETCH_ADD;
812                         wc->byte_len  = 8;
813                         break;
814                 case MLX4_OPCODE_BIND_MW:
815                         wc->opcode    = IB_WC_BIND_MW;
816                         break;
817                 case MLX4_OPCODE_LSO:
818                         wc->opcode    = IB_WC_LSO;
819                         break;
820                 case MLX4_OPCODE_FMR:
821                         wc->opcode    = IB_WC_FAST_REG_MR;
822                         break;
823                 case MLX4_OPCODE_LOCAL_INVAL:
824                         wc->opcode    = IB_WC_LOCAL_INV;
825                         break;
826                 }
827         } else {
828                 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
829
830                 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
831                 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
832                         wc->opcode      = IB_WC_RECV_RDMA_WITH_IMM;
833                         wc->wc_flags    = IB_WC_WITH_IMM;
834                         wc->ex.imm_data = cqe->immed_rss_invalid;
835                         break;
836                 case MLX4_RECV_OPCODE_SEND_INVAL:
837                         wc->opcode      = IB_WC_RECV;
838                         wc->wc_flags    = IB_WC_WITH_INVALIDATE;
839                         wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid);
840                         break;
841                 case MLX4_RECV_OPCODE_SEND:
842                         wc->opcode   = IB_WC_RECV;
843                         wc->wc_flags = 0;
844                         break;
845                 case MLX4_RECV_OPCODE_SEND_IMM:
846                         wc->opcode      = IB_WC_RECV;
847                         wc->wc_flags    = IB_WC_WITH_IMM;
848                         wc->ex.imm_data = cqe->immed_rss_invalid;
849                         break;
850                 }
851
852                 if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
853                         if ((*cur_qp)->mlx4_ib_qp_type &
854                             (MLX4_IB_QPT_PROXY_SMI_OWNER |
855                              MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
856                                 return use_tunnel_data
857                                         (*cur_qp, cq, wc, tail, cqe,
858                                          rdma_port_get_link_layer
859                                                 (wc->qp->device,
860                                                  (*cur_qp)->port) ==
861                                                 IB_LINK_LAYER_ETHERNET);
862                 }
863
864                 if (timestamp_en) {
865                         /* currently, only CQ_CREATE_WITH_TIMESTAMPING_RAW is
866                          * supported. CQ_CREATE_WITH_TIMESTAMPING_SYS isn't
867                          * supported */
868                         if (cq->create_flags & IB_CQ_TIMESTAMP_TO_SYS_TIME) {
869                                 wc->ts.timestamp = 0;
870                         } else {
871                                 wc->ts.timestamp =
872                                         ((u64)(be32_to_cpu(cqe->timestamp_16_47)
873                                                + !cqe->timestamp_0_15) << 16)
874                                         | be16_to_cpu(cqe->timestamp_0_15);
875                                 wc->wc_flags |= IB_WC_WITH_TIMESTAMP;
876                         }
877                 } else {
878                         wc->wc_flags |= IB_WC_WITH_SLID;
879                         wc->slid           = be16_to_cpu(cqe->rlid);
880                 }
881                 g_mlpath_rqpn      = be32_to_cpu(cqe->g_mlpath_rqpn);
882                 wc->src_qp         = g_mlpath_rqpn & 0xffffff;
883                 wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
884                 wc->wc_flags      |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
885                 wc->pkey_index     = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
886                 wc->wc_flags      |= mlx4_ib_ipoib_csum_ok(cqe->status,
887                                         cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
888                 if (!timestamp_en) {
889                         if (rdma_port_get_link_layer(wc->qp->device,
890                                                      (*cur_qp)->port) ==
891                                                       IB_LINK_LAYER_ETHERNET)
892                                 wc->sl  = be16_to_cpu(cqe->sl_vid) >> 13;
893                         else
894                                 wc->sl  = be16_to_cpu(cqe->sl_vid) >> 12;
895                         wc->wc_flags      |= IB_WC_WITH_SL;
896                 }
897                 if ((be32_to_cpu(cqe->vlan_my_qpn) &
898                     MLX4_CQE_VLAN_PRESENT_MASK) && !timestamp_en) {
899                         wc->vlan_id = be16_to_cpu(cqe->sl_vid) &
900                                 MLX4_CQE_VID_MASK;
901                         wc->wc_flags      |= IB_WC_WITH_VLAN;
902                 } else {
903                         wc->vlan_id = 0xffff;
904                 }
905                 if (!timestamp_en) {
906                         memcpy(wc->smac, cqe->smac, 6);
907                         wc->wc_flags |= IB_WC_WITH_SMAC;
908                 }
909         }
910
911         return 0;
912 }
913
914 int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
915 {
916         struct mlx4_ib_cq *cq = to_mcq(ibcq);
917         struct mlx4_ib_qp *cur_qp = NULL;
918         unsigned long flags;
919         int npolled;
920         int err = 0;
921
922         spin_lock_irqsave(&cq->lock, flags);
923
924         for (npolled = 0; npolled < num_entries; ++npolled) {
925                 err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
926                 if (err)
927                         break;
928         }
929
930         mlx4_cq_set_ci(&cq->mcq);
931
932         spin_unlock_irqrestore(&cq->lock, flags);
933
934         if (err == 0 || err == -EAGAIN)
935                 return npolled;
936         else
937                 return err;
938 }
939
940 int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
941 {
942         mlx4_cq_arm(&to_mcq(ibcq)->mcq,
943                     (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
944                     MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT,
945                     to_mdev(ibcq->device)->priv_uar.map,
946                     MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock));
947
948         return 0;
949 }
950
951 void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
952 {
953         u32 prod_index;
954         int nfreed = 0;
955         struct mlx4_cqe *cqe, *dest;
956         u8 owner_bit;
957         int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
958
959         /*
960          * First we need to find the current producer index, so we
961          * know where to start cleaning from.  It doesn't matter if HW
962          * adds new entries after this loop -- the QP we're worried
963          * about is already in RESET, so the new entries won't come
964          * from our QP and therefore don't need to be checked.
965          */
966         for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
967                 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
968                         break;
969
970         /*
971          * Now sweep backwards through the CQ, removing CQ entries
972          * that match our QP by copying older entries on top of them.
973          */
974         while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
975                 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
976                 cqe += cqe_inc;
977
978                 if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
979                         if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
980                                 mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
981                         ++nfreed;
982                 } else if (nfreed) {
983                         dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
984                         dest += cqe_inc;
985
986                         owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
987                         memcpy(dest, cqe, sizeof *cqe);
988                         dest->owner_sr_opcode = owner_bit |
989                                 (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
990                 }
991         }
992
993         if (nfreed) {
994                 cq->mcq.cons_index += nfreed;
995                 /*
996                  * Make sure update of buffer contents is done before
997                  * updating consumer index.
998                  */
999                 wmb();
1000                 mlx4_cq_set_ci(&cq->mcq);
1001         }
1002 }
1003
1004 void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
1005 {
1006         spin_lock_irq(&cq->lock);
1007         __mlx4_ib_cq_clean(cq, qpn, srq);
1008         spin_unlock_irq(&cq->lock);
1009 }