]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/cxgbe/iw_cxgbe/cq.c
sys/dev: further adoption of SPDX licensing ID tags.
[FreeBSD/FreeBSD.git] / sys / dev / cxgbe / iw_cxgbe / cq.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include "opt_inet.h"
38
39 #ifdef TCP_OFFLOAD
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/ktr.h>
44 #include <sys/bus.h>
45 #include <sys/lock.h>
46 #include <sys/mutex.h>
47 #include <sys/rwlock.h>
48 #include <sys/socket.h>
49 #include <sys/sbuf.h>
50
51 #include "iw_cxgbe.h"
52 #include "user.h"
53
54 static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
55                       struct c4iw_dev_ucontext *uctx)
56 {
57         struct adapter *sc = rdev->adap;
58         struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev);
59         struct fw_ri_res_wr *res_wr;
60         struct fw_ri_res *res;
61         int wr_len;
62         struct c4iw_wr_wait wr_wait;
63         struct wrqe *wr;
64
65         wr_len = sizeof *res_wr + sizeof *res;
66         wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
67                 if (wr == NULL)
68                         return (0);
69         res_wr = wrtod(wr);
70         memset(res_wr, 0, wr_len);
71         res_wr->op_nres = cpu_to_be32(
72                         V_FW_WR_OP(FW_RI_RES_WR) |
73                         V_FW_RI_RES_WR_NRES(1) |
74                         F_FW_WR_COMPL);
75         res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
76         res_wr->cookie = (unsigned long) &wr_wait;
77         res = res_wr->res;
78         res->u.cq.restype = FW_RI_RES_TYPE_CQ;
79         res->u.cq.op = FW_RI_RES_OP_RESET;
80         res->u.cq.iqid = cpu_to_be32(cq->cqid);
81
82         c4iw_init_wr_wait(&wr_wait);
83
84         t4_wrq_tx(sc, wr);
85
86         c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__);
87
88         kfree(cq->sw_queue);
89         dma_free_coherent(rhp->ibdev.dma_device,
90                           cq->memsize, cq->queue,
91                           dma_unmap_addr(cq, mapping));
92         c4iw_put_cqid(rdev, cq->cqid, uctx);
93         return 0;
94 }
95
96 static int
97 create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
98     struct c4iw_dev_ucontext *uctx)
99 {
100         struct adapter *sc = rdev->adap;
101         struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev);
102         struct fw_ri_res_wr *res_wr;
103         struct fw_ri_res *res;
104         int wr_len;
105         int user = (uctx != &rdev->uctx);
106         struct c4iw_wr_wait wr_wait;
107         int ret;
108         struct wrqe *wr;
109         u64 cq_bar2_qoffset = 0;
110
111         cq->cqid = c4iw_get_cqid(rdev, uctx);
112         if (!cq->cqid) {
113                 ret = -ENOMEM;
114                 goto err1;
115         }
116
117         if (!user) {
118                 cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL);
119                 if (!cq->sw_queue) {
120                         ret = -ENOMEM;
121                         goto err2;
122                 }
123         }
124         cq->queue = dma_alloc_coherent(rhp->ibdev.dma_device, cq->memsize,
125                                        &cq->dma_addr, GFP_KERNEL);
126         if (!cq->queue) {
127                 ret = -ENOMEM;
128                 goto err3;
129         }
130         dma_unmap_addr_set(cq, mapping, cq->dma_addr);
131         memset(cq->queue, 0, cq->memsize);
132
133         /* build fw_ri_res_wr */
134         wr_len = sizeof *res_wr + sizeof *res;
135
136         wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
137         if (wr == NULL)
138                 return (0);
139         res_wr = wrtod(wr);
140
141         memset(res_wr, 0, wr_len);
142         res_wr->op_nres = cpu_to_be32(
143                         V_FW_WR_OP(FW_RI_RES_WR) |
144                         V_FW_RI_RES_WR_NRES(1) |
145                         F_FW_WR_COMPL);
146         res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
147         res_wr->cookie = (unsigned long) &wr_wait;
148         res = res_wr->res;
149         res->u.cq.restype = FW_RI_RES_TYPE_CQ;
150         res->u.cq.op = FW_RI_RES_OP_WRITE;
151         res->u.cq.iqid = cpu_to_be32(cq->cqid);
152         //Fixme: Always use first queue id for IQANDSTINDEX. Linux does the same.
153         res->u.cq.iqandst_to_iqandstindex = cpu_to_be32(
154                         V_FW_RI_RES_WR_IQANUS(0) |
155                         V_FW_RI_RES_WR_IQANUD(1) |
156                         F_FW_RI_RES_WR_IQANDST |
157                         V_FW_RI_RES_WR_IQANDSTINDEX(sc->sge.ofld_rxq[0].iq.abs_id));
158         res->u.cq.iqdroprss_to_iqesize = cpu_to_be16(
159                         F_FW_RI_RES_WR_IQDROPRSS |
160                         V_FW_RI_RES_WR_IQPCIECH(2) |
161                         V_FW_RI_RES_WR_IQINTCNTTHRESH(0) |
162                         F_FW_RI_RES_WR_IQO |
163                         V_FW_RI_RES_WR_IQESIZE(1));
164         res->u.cq.iqsize = cpu_to_be16(cq->size);
165         res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
166
167         c4iw_init_wr_wait(&wr_wait);
168
169         t4_wrq_tx(sc, wr);
170
171         CTR2(KTR_IW_CXGBE, "%s wait_event wr_wait %p", __func__, &wr_wait);
172         ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__);
173         if (ret)
174                 goto err4;
175
176         cq->gen = 1;
177         cq->rdev = rdev;
178
179         /* Determine the BAR2 queue offset and qid. */
180         t4_bar2_sge_qregs(rdev->adap, cq->cqid, T4_BAR2_QTYPE_INGRESS, user,
181                         &cq_bar2_qoffset, &cq->bar2_qid);
182
183         /* If user mapping then compute the page-aligned physical
184          * address for mapping.
185          */
186         if (user)
187                 cq->bar2_pa = (rdev->bar2_pa + cq_bar2_qoffset) & PAGE_MASK;
188         else
189                 cq->bar2_va = (void __iomem *)((u64)rdev->bar2_kva +
190                         cq_bar2_qoffset);
191
192         return 0;
193 err4:
194         dma_free_coherent(rhp->ibdev.dma_device, cq->memsize, cq->queue,
195                           dma_unmap_addr(cq, mapping));
196 err3:
197         kfree(cq->sw_queue);
198 err2:
199         c4iw_put_cqid(rdev, cq->cqid, uctx);
200 err1:
201         return ret;
202 }
203
204 static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
205 {
206         struct t4_cqe cqe;
207
208         CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
209             cq, cq->sw_cidx, cq->sw_pidx);
210         memset(&cqe, 0, sizeof(cqe));
211         cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
212                                  V_CQE_OPCODE(FW_RI_SEND) |
213                                  V_CQE_TYPE(0) |
214                                  V_CQE_SWCQE(1) |
215                                  V_CQE_QPID(wq->sq.qid));
216         cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
217         cq->sw_queue[cq->sw_pidx] = cqe;
218         t4_swcq_produce(cq);
219 }
220
221 int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
222 {
223         int flushed = 0;
224         int in_use = wq->rq.in_use - count;
225
226         BUG_ON(in_use < 0);
227         CTR5(KTR_IW_CXGBE, "%s wq %p cq %p rq.in_use %u skip count %u",
228             __func__, wq, cq, wq->rq.in_use, count);
229         while (in_use--) {
230                 insert_recv_cqe(wq, cq);
231                 flushed++;
232         }
233         return flushed;
234 }
235
236 static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
237                           struct t4_swsqe *swcqe)
238 {
239         struct t4_cqe cqe;
240
241         CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
242             cq, cq->sw_cidx, cq->sw_pidx);
243         memset(&cqe, 0, sizeof(cqe));
244         cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
245                                  V_CQE_OPCODE(swcqe->opcode) |
246                                  V_CQE_TYPE(1) |
247                                  V_CQE_SWCQE(1) |
248                                  V_CQE_QPID(wq->sq.qid));
249         CQE_WRID_SQ_IDX(&cqe) = swcqe->idx;
250         cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
251         cq->sw_queue[cq->sw_pidx] = cqe;
252         t4_swcq_produce(cq);
253 }
254
255 static void advance_oldest_read(struct t4_wq *wq);
256
257 int c4iw_flush_sq(struct c4iw_qp *qhp)
258 {
259         int flushed = 0;
260         struct t4_wq *wq = &qhp->wq;
261         struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq);
262         struct t4_cq *cq = &chp->cq;
263         int idx;
264         struct t4_swsqe *swsqe;
265
266         if (wq->sq.flush_cidx == -1)
267                 wq->sq.flush_cidx = wq->sq.cidx;
268         idx = wq->sq.flush_cidx;
269         BUG_ON(idx >= wq->sq.size);
270         while (idx != wq->sq.pidx) {
271                 swsqe = &wq->sq.sw_sq[idx];
272                 BUG_ON(swsqe->flushed);
273                 swsqe->flushed = 1;
274                 insert_sq_cqe(wq, cq, swsqe);
275                 if (wq->sq.oldest_read == swsqe) {
276                         BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
277                         advance_oldest_read(wq);
278                 }
279                 flushed++;
280                 if (++idx == wq->sq.size)
281                         idx = 0;
282         }
283         wq->sq.flush_cidx += flushed;
284         if (wq->sq.flush_cidx >= wq->sq.size)
285                 wq->sq.flush_cidx -= wq->sq.size;
286         return flushed;
287 }
288
289 static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
290 {
291         struct t4_swsqe *swsqe;
292         int cidx;
293
294         if (wq->sq.flush_cidx == -1)
295                 wq->sq.flush_cidx = wq->sq.cidx;
296         cidx = wq->sq.flush_cidx;
297         BUG_ON(cidx > wq->sq.size);
298
299         while (cidx != wq->sq.pidx) {
300                 swsqe = &wq->sq.sw_sq[cidx];
301                 if (!swsqe->signaled) {
302                         if (++cidx == wq->sq.size)
303                                 cidx = 0;
304                 } else if (swsqe->complete) {
305
306                         BUG_ON(swsqe->flushed);
307
308                         /*
309                          * Insert this completed cqe into the swcq.
310                          */
311                         CTR3(KTR_IW_CXGBE,
312                                 "%s moving cqe into swcq sq idx %u cq idx %u\n",
313                                 __func__, cidx, cq->sw_pidx);
314                         swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
315                         cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
316                         t4_swcq_produce(cq);
317                         swsqe->flushed = 1;
318                         if (++cidx == wq->sq.size)
319                                 cidx = 0;
320                         wq->sq.flush_cidx = cidx;
321                 } else
322                         break;
323         }
324 }
325
326 static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
327                 struct t4_cqe *read_cqe)
328 {
329         read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
330         read_cqe->len = htonl(wq->sq.oldest_read->read_len);
331         read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
332                         V_CQE_SWCQE(SW_CQE(hw_cqe)) |
333                         V_CQE_OPCODE(FW_RI_READ_REQ) |
334                         V_CQE_TYPE(1));
335         read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
336 }
337
338 static void advance_oldest_read(struct t4_wq *wq)
339 {
340
341         u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
342
343         if (rptr == wq->sq.size)
344                 rptr = 0;
345         while (rptr != wq->sq.pidx) {
346                 wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
347
348                 if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
349                         return;
350                 if (++rptr == wq->sq.size)
351                         rptr = 0;
352         }
353         wq->sq.oldest_read = NULL;
354 }
355
356 /*
357  * Move all CQEs from the HWCQ into the SWCQ.
358  * Deal with out-of-order and/or completions that complete
359  * prior unsignalled WRs.
360  */
361 void c4iw_flush_hw_cq(struct c4iw_cq *chp)
362 {
363         struct t4_cqe *hw_cqe, *swcqe, read_cqe;
364         struct c4iw_qp *qhp;
365         struct t4_swsqe *swsqe;
366         int ret;
367
368         CTR3(KTR_IW_CXGBE, "%s cq %p cqid 0x%x", __func__, &chp->cq,
369                         chp->cq.cqid);
370         ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
371
372         /*
373          * This logic is similar to poll_cq(), but not quite the same
374          * unfortunately.  Need to move pertinent HW CQEs to the SW CQ but
375          * also do any translation magic that poll_cq() normally does.
376          */
377         while (!ret) {
378                 qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe));
379
380                 /*
381                  * drop CQEs with no associated QP
382                  */
383                 if (qhp == NULL)
384                         goto next_cqe;
385
386                 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
387                         goto next_cqe;
388
389                 if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
390
391                         /* If we have reached here because of async
392                          * event or other error, and have egress error
393                          * then drop
394                          */
395                         if (CQE_TYPE(hw_cqe) == 1)
396                                 goto next_cqe;
397
398                         /* drop peer2peer RTR reads.
399                          */
400                         if (CQE_WRID_STAG(hw_cqe) == 1)
401                                 goto next_cqe;
402
403                         /*
404                          * Eat completions for unsignaled read WRs.
405                          */
406                         if (!qhp->wq.sq.oldest_read->signaled) {
407                                 advance_oldest_read(&qhp->wq);
408                                 goto next_cqe;
409                         }
410
411                         /*
412                          * Don't write to the HWCQ, create a new read req CQE
413                          * in local memory and move it into the swcq.
414                          */
415                         create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe);
416                         hw_cqe = &read_cqe;
417                         advance_oldest_read(&qhp->wq);
418                 }
419
420                 /* if its a SQ completion, then do the magic to move all the
421                  * unsignaled and now in-order completions into the swcq.
422                  */
423                 if (SQ_TYPE(hw_cqe)) {
424                         swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
425                         swsqe->cqe = *hw_cqe;
426                         swsqe->complete = 1;
427                         flush_completed_wrs(&qhp->wq, &chp->cq);
428                 } else {
429                         swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx];
430                         *swcqe = *hw_cqe;
431                         swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
432                         t4_swcq_produce(&chp->cq);
433                 }
434 next_cqe:
435                 t4_hwcq_consume(&chp->cq);
436                 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
437         }
438 }
439
440 static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
441 {
442         if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
443                 return 0;
444
445         if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe))
446                 return 0;
447
448         if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe))
449                 return 0;
450
451         if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq))
452                 return 0;
453         return 1;
454 }
455
456 void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
457 {
458         struct t4_cqe *cqe;
459         u32 ptr;
460
461         *count = 0;
462         CTR2(KTR_IW_CXGBE, "%s count zero %d", __func__, *count);
463         ptr = cq->sw_cidx;
464         while (ptr != cq->sw_pidx) {
465                 cqe = &cq->sw_queue[ptr];
466                 if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) &&
467                     (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq))
468                         (*count)++;
469                 if (++ptr == cq->size)
470                         ptr = 0;
471         }
472         CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count);
473 }
474
475 /*
476  * poll_cq
477  *
478  * Caller must:
479  *     check the validity of the first CQE,
480  *     supply the wq assicated with the qpid.
481  *
482  * credit: cq credit to return to sge.
483  * cqe_flushed: 1 iff the CQE is flushed.
484  * cqe: copy of the polled CQE.
485  *
486  * return value:
487  *    0             CQE returned ok.
488  *    -EAGAIN       CQE skipped, try again.
489  *    -EOVERFLOW    CQ overflow detected.
490  */
491 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
492                    u8 *cqe_flushed, u64 *cookie, u32 *credit)
493 {
494         int ret = 0;
495         struct t4_cqe *hw_cqe, read_cqe;
496
497         *cqe_flushed = 0;
498         *credit = 0;
499         ret = t4_next_cqe(cq, &hw_cqe);
500         if (ret)
501                 return ret;
502
503         CTR6(KTR_IW_CXGBE,
504             "%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x", __func__,
505             CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), CQE_GENBIT(hw_cqe),
506             CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe));
507         CTR5(KTR_IW_CXGBE,
508             "%s opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
509             __func__, CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
510             CQE_WRID_LOW(hw_cqe));
511
512         /*
513          * skip cqe's not affiliated with a QP.
514          */
515         if (wq == NULL) {
516                 ret = -EAGAIN;
517                 goto skip_cqe;
518         }
519
520         /*
521         * skip hw cqe's if the wq is flushed.
522         */
523         if (wq->flushed && !SW_CQE(hw_cqe)) {
524                 ret = -EAGAIN;
525                 goto skip_cqe;
526         }
527
528         /*
529          * skip TERMINATE cqes...
530          */
531         if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
532                 ret = -EAGAIN;
533                 goto skip_cqe;
534         }
535
536         /*
537          * Special cqe for drain WR completions...
538          */
539         if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) {
540                 *cookie = CQE_DRAIN_COOKIE(hw_cqe);
541                 *cqe = *hw_cqe;
542                 goto skip_cqe;
543         }
544
545         /*
546          * Gotta tweak READ completions:
547          *      1) the cqe doesn't contain the sq_wptr from the wr.
548          *      2) opcode not reflected from the wr.
549          *      3) read_len not reflected from the wr.
550          *      4) cq_type is RQ_TYPE not SQ_TYPE.
551          */
552         if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
553
554                 /* If we have reached here because of async
555                  * event or other error, and have egress error
556                  * then drop
557                  */
558                 if (CQE_TYPE(hw_cqe) == 1) {
559                         if (CQE_STATUS(hw_cqe))
560                                 t4_set_wq_in_error(wq);
561                         ret = -EAGAIN;
562                         goto skip_cqe;
563                 }
564
565                 /* If this is an unsolicited read response, then the read
566                  * was generated by the kernel driver as part of peer-2-peer
567                  * connection setup.  So ignore the completion.
568                  */
569                 if (CQE_WRID_STAG(hw_cqe) == 1) {
570                         if (CQE_STATUS(hw_cqe))
571                                 t4_set_wq_in_error(wq);
572                         ret = -EAGAIN;
573                         goto skip_cqe;
574                 }
575
576                 /*
577                  * Eat completions for unsignaled read WRs.
578                  */
579                 if (!wq->sq.oldest_read->signaled) {
580                         advance_oldest_read(wq);
581                         ret = -EAGAIN;
582                         goto skip_cqe;
583                 }
584
585                 /*
586                  * Don't write to the HWCQ, so create a new read req CQE
587                  * in local memory.
588                  */
589                 create_read_req_cqe(wq, hw_cqe, &read_cqe);
590                 hw_cqe = &read_cqe;
591                 advance_oldest_read(wq);
592         }
593
594         if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
595                 *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
596                 t4_set_wq_in_error(wq);
597         }
598
599         /*
600          * RECV completion.
601          */
602         if (RQ_TYPE(hw_cqe)) {
603
604                 /*
605                  * HW only validates 4 bits of MSN.  So we must validate that
606                  * the MSN in the SEND is the next expected MSN.  If its not,
607                  * then we complete this with T4_ERR_MSN and mark the wq in
608                  * error.
609                  */
610
611                 if (t4_rq_empty(wq)) {
612                         t4_set_wq_in_error(wq);
613                         ret = -EAGAIN;
614                         goto skip_cqe;
615                 }
616                 if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
617                         t4_set_wq_in_error(wq);
618                         hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN));
619                         goto proc_cqe;
620                 }
621                 goto proc_cqe;
622         }
623
624         /*
625          * If we get here its a send completion.
626          *
627          * Handle out of order completion. These get stuffed
628          * in the SW SQ. Then the SW SQ is walked to move any
629          * now in-order completions into the SW CQ.  This handles
630          * 2 cases:
631          *      1) reaping unsignaled WRs when the first subsequent
632          *         signaled WR is completed.
633          *      2) out of order read completions.
634          */
635         if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) {
636                 struct t4_swsqe *swsqe;
637
638                 CTR2(KTR_IW_CXGBE,
639                     "%s out of order completion going in sw_sq at idx %u",
640                     __func__, CQE_WRID_SQ_IDX(hw_cqe));
641                 swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
642                 swsqe->cqe = *hw_cqe;
643                 swsqe->complete = 1;
644                 ret = -EAGAIN;
645                 goto flush_wq;
646         }
647
648 proc_cqe:
649         *cqe = *hw_cqe;
650
651         /*
652          * Reap the associated WR(s) that are freed up with this
653          * completion.
654          */
655         if (SQ_TYPE(hw_cqe)) {
656                 int idx = CQE_WRID_SQ_IDX(hw_cqe);
657                 BUG_ON(idx >= wq->sq.size);
658
659                 /*
660                 * Account for any unsignaled completions completed by
661                 * this signaled completion.  In this case, cidx points
662                 * to the first unsignaled one, and idx points to the
663                 * signaled one.  So adjust in_use based on this delta.
664                 * if this is not completing any unsigned wrs, then the
665                 * delta will be 0. Handle wrapping also!
666                 */
667                 if (idx < wq->sq.cidx)
668                         wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
669                 else
670                         wq->sq.in_use -= idx - wq->sq.cidx;
671                 BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size);
672
673                 wq->sq.cidx = (uint16_t)idx;
674                 CTR2(KTR_IW_CXGBE, "%s completing sq idx %u\n",
675                                 __func__, wq->sq.cidx);
676                 *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
677                 t4_sq_consume(wq);
678         } else {
679                 CTR2(KTR_IW_CXGBE, "%s completing rq idx %u",
680                      __func__, wq->rq.cidx);
681                 *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
682                 BUG_ON(t4_rq_empty(wq));
683                 t4_rq_consume(wq);
684                 goto skip_cqe;
685         }
686
687 flush_wq:
688         /*
689          * Flush any completed cqes that are now in-order.
690          */
691         flush_completed_wrs(wq, cq);
692
693 skip_cqe:
694         if (SW_CQE(hw_cqe)) {
695                 CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip sw cqe cidx %u",
696                      __func__, cq, cq->cqid, cq->sw_cidx);
697                 t4_swcq_consume(cq);
698         } else {
699                 CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip hw cqe cidx %u",
700                      __func__, cq, cq->cqid, cq->cidx);
701                 t4_hwcq_consume(cq);
702         }
703         return ret;
704 }
705
706 /*
707  * Get one cq entry from c4iw and map it to openib.
708  *
709  * Returns:
710  *      0                       cqe returned
711  *      -ENODATA                EMPTY;
712  *      -EAGAIN                 caller must try again
713  *      any other -errno        fatal error
714  */
715 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
716 {
717         struct c4iw_qp *qhp = NULL;
718         struct t4_cqe cqe = {0, 0}, *rd_cqe;
719         struct t4_wq *wq;
720         u32 credit = 0;
721         u8 cqe_flushed;
722         u64 cookie = 0;
723         int ret;
724
725         ret = t4_next_cqe(&chp->cq, &rd_cqe);
726
727         if (ret)
728                 return ret;
729
730         qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
731         if (!qhp)
732                 wq = NULL;
733         else {
734                 spin_lock(&qhp->lock);
735                 wq = &(qhp->wq);
736         }
737         ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
738         if (ret)
739                 goto out;
740
741         wc->wr_id = cookie;
742         wc->qp = &qhp->ibqp;
743         wc->vendor_err = CQE_STATUS(&cqe);
744         wc->wc_flags = 0;
745
746         CTR5(KTR_IW_CXGBE, "%s qpid 0x%x type %d opcode %d status 0x%x",
747             __func__, CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
748             CQE_STATUS(&cqe));
749         CTR5(KTR_IW_CXGBE, "%s len %u wrid hi 0x%x lo 0x%x cookie 0x%llx",
750             __func__, CQE_LEN(&cqe), CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe),
751             (unsigned long long)cookie);
752
753         if (CQE_TYPE(&cqe) == 0) {
754                 if (!CQE_STATUS(&cqe))
755                         wc->byte_len = CQE_LEN(&cqe);
756                 else
757                         wc->byte_len = 0;
758                 wc->opcode = IB_WC_RECV;
759                 if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV ||
760                     CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
761                         wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
762                         wc->wc_flags |= IB_WC_WITH_INVALIDATE;
763                         c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
764                 }
765         } else {
766                 switch (CQE_OPCODE(&cqe)) {
767                 case FW_RI_RDMA_WRITE:
768                         wc->opcode = IB_WC_RDMA_WRITE;
769                         break;
770                 case FW_RI_READ_REQ:
771                         wc->opcode = IB_WC_RDMA_READ;
772                         wc->byte_len = CQE_LEN(&cqe);
773                         break;
774                 case FW_RI_SEND_WITH_INV:
775                 case FW_RI_SEND_WITH_SE_INV:
776                         wc->opcode = IB_WC_SEND;
777                         wc->wc_flags |= IB_WC_WITH_INVALIDATE;
778                         break;
779                 case FW_RI_SEND:
780                 case FW_RI_SEND_WITH_SE:
781                         wc->opcode = IB_WC_SEND;
782                         break;
783                 case FW_RI_LOCAL_INV:
784                         wc->opcode = IB_WC_LOCAL_INV;
785                         break;
786                 case FW_RI_FAST_REGISTER:
787                         wc->opcode = IB_WC_REG_MR;
788
789                         /* Invalidate the MR if the fastreg failed */
790                         if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS)
791                                 c4iw_invalidate_mr(qhp->rhp,
792                                                    CQE_WRID_FR_STAG(&cqe));
793                         break;
794                 case C4IW_DRAIN_OPCODE:
795                         wc->opcode = IB_WC_SEND;
796                         break;
797                 default:
798                         printf("Unexpected opcode %d "
799                                "in the CQE received for QPID = 0x%0x\n",
800                                CQE_OPCODE(&cqe), CQE_QPID(&cqe));
801                         ret = -EINVAL;
802                         goto out;
803                 }
804         }
805
806         if (cqe_flushed)
807                 wc->status = IB_WC_WR_FLUSH_ERR;
808         else {
809
810                 switch (CQE_STATUS(&cqe)) {
811                 case T4_ERR_SUCCESS:
812                         wc->status = IB_WC_SUCCESS;
813                         break;
814                 case T4_ERR_STAG:
815                         wc->status = IB_WC_LOC_ACCESS_ERR;
816                         break;
817                 case T4_ERR_PDID:
818                         wc->status = IB_WC_LOC_PROT_ERR;
819                         break;
820                 case T4_ERR_QPID:
821                 case T4_ERR_ACCESS:
822                         wc->status = IB_WC_LOC_ACCESS_ERR;
823                         break;
824                 case T4_ERR_WRAP:
825                         wc->status = IB_WC_GENERAL_ERR;
826                         break;
827                 case T4_ERR_BOUND:
828                         wc->status = IB_WC_LOC_LEN_ERR;
829                         break;
830                 case T4_ERR_INVALIDATE_SHARED_MR:
831                 case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
832                         wc->status = IB_WC_MW_BIND_ERR;
833                         break;
834                 case T4_ERR_CRC:
835                 case T4_ERR_MARKER:
836                 case T4_ERR_PDU_LEN_ERR:
837                 case T4_ERR_OUT_OF_RQE:
838                 case T4_ERR_DDP_VERSION:
839                 case T4_ERR_RDMA_VERSION:
840                 case T4_ERR_DDP_QUEUE_NUM:
841                 case T4_ERR_MSN:
842                 case T4_ERR_TBIT:
843                 case T4_ERR_MO:
844                 case T4_ERR_MSN_RANGE:
845                 case T4_ERR_IRD_OVERFLOW:
846                 case T4_ERR_OPCODE:
847                 case T4_ERR_INTERNAL_ERR:
848                         wc->status = IB_WC_FATAL_ERR;
849                         break;
850                 case T4_ERR_SWFLUSH:
851                         wc->status = IB_WC_WR_FLUSH_ERR;
852                         break;
853                 default:
854                         printf("Unexpected cqe_status 0x%x for QPID = 0x%0x\n",
855                                CQE_STATUS(&cqe), CQE_QPID(&cqe));
856                         wc->status = IB_WC_FATAL_ERR;
857                 }
858         }
859 out:
860         if (wq)
861                 spin_unlock(&qhp->lock);
862         return ret;
863 }
864
865 int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
866 {
867         struct c4iw_cq *chp;
868         unsigned long flags;
869         int npolled;
870         int err = 0;
871
872         chp = to_c4iw_cq(ibcq);
873
874         spin_lock_irqsave(&chp->lock, flags);
875         for (npolled = 0; npolled < num_entries; ++npolled) {
876                 do {
877                         err = c4iw_poll_cq_one(chp, wc + npolled);
878                 } while (err == -EAGAIN);
879                 if (err)
880                         break;
881         }
882         spin_unlock_irqrestore(&chp->lock, flags);
883         return !err || err == -ENODATA ? npolled : err;
884 }
885
886 int c4iw_destroy_cq(struct ib_cq *ib_cq)
887 {
888         struct c4iw_cq *chp;
889         struct c4iw_ucontext *ucontext;
890
891         CTR2(KTR_IW_CXGBE, "%s ib_cq %p", __func__, ib_cq);
892         chp = to_c4iw_cq(ib_cq);
893
894         remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
895         atomic_dec(&chp->refcnt);
896         wait_event(chp->wait, !atomic_read(&chp->refcnt));
897
898         ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
899                                   : NULL;
900         destroy_cq(&chp->rhp->rdev, &chp->cq,
901                    ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx);
902         kfree(chp);
903         return 0;
904 }
905
906 struct ib_cq *
907 c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr,
908     struct ib_ucontext *ib_context, struct ib_udata *udata)
909 {
910         int entries = attr->cqe;
911         int vector = attr->comp_vector;
912         struct c4iw_dev *rhp;
913         struct c4iw_cq *chp;
914         struct c4iw_create_cq_resp uresp;
915         struct c4iw_ucontext *ucontext = NULL;
916         int ret;
917         size_t memsize, hwentries;
918         struct c4iw_mm_entry *mm, *mm2;
919
920         CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries);
921         if (attr->flags)
922                 return ERR_PTR(-EINVAL);
923
924         rhp = to_c4iw_dev(ibdev);
925
926         chp = kzalloc(sizeof(*chp), GFP_KERNEL);
927         if (!chp)
928                 return ERR_PTR(-ENOMEM);
929
930
931         if (ib_context)
932                 ucontext = to_c4iw_ucontext(ib_context);
933
934         /* account for the status page. */
935         entries++;
936
937         /* IQ needs one extra entry to differentiate full vs empty. */
938         entries++;
939
940         /*
941          * entries must be multiple of 16 for HW.
942          */
943         entries = roundup(entries, 16);
944
945         /*
946          * Make actual HW queue 2x to avoid cdix_inc overflows.
947          */
948         hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size);
949
950         /*
951          * Make HW queue at least 64 entries so GTS updates aren't too
952          * frequent.
953          */
954         if (hwentries < 64)
955                 hwentries = 64;
956
957         memsize = hwentries * sizeof *chp->cq.queue;
958
959         /*
960          * memsize must be a multiple of the page size if its a user cq.
961          */
962         if (ucontext)
963                 memsize = roundup(memsize, PAGE_SIZE);
964         chp->cq.size = hwentries;
965         chp->cq.memsize = memsize;
966         chp->cq.vector = vector;
967
968         ret = create_cq(&rhp->rdev, &chp->cq,
969                         ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
970         if (ret)
971                 goto err1;
972
973         chp->rhp = rhp;
974         chp->cq.size--;                         /* status page */
975         chp->ibcq.cqe = entries - 2;
976         spin_lock_init(&chp->lock);
977         spin_lock_init(&chp->comp_handler_lock);
978         atomic_set(&chp->refcnt, 1);
979         init_waitqueue_head(&chp->wait);
980         ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
981         if (ret)
982                 goto err2;
983
984         if (ucontext) {
985                 ret = -ENOMEM;
986                 mm = kmalloc(sizeof *mm, GFP_KERNEL);
987                 if (!mm)
988                         goto err3;
989                 mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
990                 if (!mm2)
991                         goto err4;
992
993                 memset(&uresp, 0, sizeof(uresp));
994                 uresp.qid_mask = rhp->rdev.cqmask;
995                 uresp.cqid = chp->cq.cqid;
996                 uresp.size = chp->cq.size;
997                 uresp.memsize = chp->cq.memsize;
998                 spin_lock(&ucontext->mmap_lock);
999                 uresp.key = ucontext->key;
1000                 ucontext->key += PAGE_SIZE;
1001                 uresp.gts_key = ucontext->key;
1002                 ucontext->key += PAGE_SIZE;
1003                 spin_unlock(&ucontext->mmap_lock);
1004                 ret = ib_copy_to_udata(udata, &uresp,
1005                                         sizeof(uresp) - sizeof(uresp.reserved));
1006                 if (ret)
1007                         goto err5;
1008
1009                 mm->key = uresp.key;
1010                 mm->addr = vtophys(chp->cq.queue);
1011                 mm->len = chp->cq.memsize;
1012                 insert_mmap(ucontext, mm);
1013
1014                 mm2->key = uresp.gts_key;
1015                 mm2->addr = chp->cq.bar2_pa;
1016                 mm2->len = PAGE_SIZE;
1017                 insert_mmap(ucontext, mm2);
1018         }
1019         CTR6(KTR_IW_CXGBE,
1020             "%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx",
1021             __func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
1022             (unsigned long long) chp->cq.dma_addr);
1023         return &chp->ibcq;
1024 err5:
1025         kfree(mm2);
1026 err4:
1027         kfree(mm);
1028 err3:
1029         remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
1030 err2:
1031         destroy_cq(&chp->rhp->rdev, &chp->cq,
1032                    ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
1033 err1:
1034         kfree(chp);
1035         return ERR_PTR(ret);
1036 }
1037
1038 int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
1039 {
1040         return -ENOSYS;
1041 }
1042
1043 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
1044 {
1045         struct c4iw_cq *chp;
1046         int ret = 0;
1047         unsigned long flag;
1048
1049         chp = to_c4iw_cq(ibcq);
1050         spin_lock_irqsave(&chp->lock, flag);
1051         t4_arm_cq(&chp->cq,
1052                   (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
1053         if (flags & IB_CQ_REPORT_MISSED_EVENTS)
1054                 ret = t4_cq_notempty(&chp->cq);
1055         spin_unlock_irqrestore(&chp->lock, flag);
1056         return ret;
1057 }
1058 #endif