sys/dev/cxgbe/iw_cxgbe/cq.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3  *
   4  * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
   5  *
   6  * This software is available to you under a choice of one of two
   7  * licenses.  You may choose to be licensed under the terms of the GNU
   8  * General Public License (GPL) Version 2, available from the file
   9  * COPYING in the main directory of this source tree, or the
  10  * OpenIB.org BSD license below:
  11  *
  12  *     Redistribution and use in source and binary forms, with or
  13  *     without modification, are permitted provided that the following
  14  *     conditions are met:
  15  *
  16  *      - Redistributions of source code must retain the above
  17  *        copyright notice, this list of conditions and the following
  18  *        disclaimer.
  19  *
  20  *      - Redistributions in binary form must reproduce the above
  21  *        copyright notice, this list of conditions and the following
  22  *        disclaimer in the documentation and/or other materials
  23  *        provided with the distribution.
  24  *
  25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32  * SOFTWARE.
  33  */
  34 #include <sys/cdefs.h>
  35 __FBSDID("$FreeBSD$");
  36
  37 #include "opt_inet.h"
  38
  39 #ifdef TCP_OFFLOAD
  40 #include <sys/param.h>
  41 #include <sys/systm.h>
  42 #include <sys/kernel.h>
  43 #include <sys/ktr.h>
  44 #include <sys/bus.h>
  45 #include <sys/lock.h>
  46 #include <sys/mutex.h>
  47 #include <sys/rwlock.h>
  48 #include <sys/socket.h>
  49 #include <sys/sbuf.h>
  50
  51 #include "iw_cxgbe.h"
  52 #include "user.h"
  53
  54 static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
  55                       struct c4iw_dev_ucontext *uctx)
  56 {
  57         struct adapter *sc = rdev->adap;
  58         struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev);
  59         struct fw_ri_res_wr *res_wr;
  60         struct fw_ri_res *res;
  61         int wr_len;
  62         struct c4iw_wr_wait wr_wait;
  63         struct wrqe *wr;
  64
  65         wr_len = sizeof *res_wr + sizeof *res;
  66         wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
  67                 if (wr == NULL)
  68                         return (0);
  69         res_wr = wrtod(wr);
  70         memset(res_wr, 0, wr_len);
  71         res_wr->op_nres = cpu_to_be32(
  72                         V_FW_WR_OP(FW_RI_RES_WR) |
  73                         V_FW_RI_RES_WR_NRES(1) |
  74                         F_FW_WR_COMPL);
  75         res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
  76         res_wr->cookie = (unsigned long) &wr_wait;
  77         res = res_wr->res;
  78         res->u.cq.restype = FW_RI_RES_TYPE_CQ;
  79         res->u.cq.op = FW_RI_RES_OP_RESET;
  80         res->u.cq.iqid = cpu_to_be32(cq->cqid);
  81
  82         c4iw_init_wr_wait(&wr_wait);
  83
  84         t4_wrq_tx(sc, wr);
  85
  86         c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__);
  87
  88         kfree(cq->sw_queue);
  89         dma_free_coherent(rhp->ibdev.dma_device,
  90                           cq->memsize, cq->queue,
  91                           dma_unmap_addr(cq, mapping));
  92         c4iw_put_cqid(rdev, cq->cqid, uctx);
  93         return 0;
  94 }
  95
  96 static int
  97 create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
  98     struct c4iw_dev_ucontext *uctx)
  99 {
 100         struct adapter *sc = rdev->adap;
 101         struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev);
 102         struct fw_ri_res_wr *res_wr;
 103         struct fw_ri_res *res;
 104         int wr_len;
 105         int user = (uctx != &rdev->uctx);
 106         struct c4iw_wr_wait wr_wait;
 107         int ret;
 108         struct wrqe *wr;
 109         u64 cq_bar2_qoffset = 0;
 110
 111         cq->cqid = c4iw_get_cqid(rdev, uctx);
 112         if (!cq->cqid) {
 113                 ret = -ENOMEM;
 114                 goto err1;
 115         }
 116
 117         if (!user) {
 118                 cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL);
 119                 if (!cq->sw_queue) {
 120                         ret = -ENOMEM;
 121                         goto err2;
 122                 }
 123         }
 124         cq->queue = dma_alloc_coherent(rhp->ibdev.dma_device, cq->memsize,
 125                                        &cq->dma_addr, GFP_KERNEL);
 126         if (!cq->queue) {
 127                 ret = -ENOMEM;
 128                 goto err3;
 129         }
 130         dma_unmap_addr_set(cq, mapping, cq->dma_addr);
 131         memset(cq->queue, 0, cq->memsize);
 132
 133         /* build fw_ri_res_wr */
 134         wr_len = sizeof *res_wr + sizeof *res;
 135
 136         wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
 137         if (wr == NULL)
 138                 return (0);
 139         res_wr = wrtod(wr);
 140
 141         memset(res_wr, 0, wr_len);
 142         res_wr->op_nres = cpu_to_be32(
 143                         V_FW_WR_OP(FW_RI_RES_WR) |
 144                         V_FW_RI_RES_WR_NRES(1) |
 145                         F_FW_WR_COMPL);
 146         res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
 147         res_wr->cookie = (unsigned long) &wr_wait;
 148         res = res_wr->res;
 149         res->u.cq.restype = FW_RI_RES_TYPE_CQ;
 150         res->u.cq.op = FW_RI_RES_OP_WRITE;
 151         res->u.cq.iqid = cpu_to_be32(cq->cqid);
 152         //Fixme: Always use first queue id for IQANDSTINDEX. Linux does the same.
 153         res->u.cq.iqandst_to_iqandstindex = cpu_to_be32(
 154                         V_FW_RI_RES_WR_IQANUS(0) |
 155                         V_FW_RI_RES_WR_IQANUD(1) |
 156                         F_FW_RI_RES_WR_IQANDST |
 157                         V_FW_RI_RES_WR_IQANDSTINDEX(sc->sge.ofld_rxq[0].iq.abs_id));
 158         res->u.cq.iqdroprss_to_iqesize = cpu_to_be16(
 159                         F_FW_RI_RES_WR_IQDROPRSS |
 160                         V_FW_RI_RES_WR_IQPCIECH(2) |
 161                         V_FW_RI_RES_WR_IQINTCNTTHRESH(0) |
 162                         F_FW_RI_RES_WR_IQO |
 163                         V_FW_RI_RES_WR_IQESIZE(1));
 164         res->u.cq.iqsize = cpu_to_be16(cq->size);
 165         res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
 166
 167         c4iw_init_wr_wait(&wr_wait);
 168
 169         t4_wrq_tx(sc, wr);
 170
 171         CTR2(KTR_IW_CXGBE, "%s wait_event wr_wait %p", __func__, &wr_wait);
 172         ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__);
 173         if (ret)
 174                 goto err4;
 175
 176         cq->gen = 1;
 177         cq->rdev = rdev;
 178
 179         /* Determine the BAR2 queue offset and qid. */
 180         t4_bar2_sge_qregs(rdev->adap, cq->cqid, T4_BAR2_QTYPE_INGRESS, user,
 181                         &cq_bar2_qoffset, &cq->bar2_qid);
 182
 183         /* If user mapping then compute the page-aligned physical
 184          * address for mapping.
 185          */
 186         if (user)
 187                 cq->bar2_pa = (rdev->bar2_pa + cq_bar2_qoffset) & PAGE_MASK;
 188         else
 189                 cq->bar2_va = (void __iomem *)((u64)rdev->bar2_kva +
 190                         cq_bar2_qoffset);
 191
 192         return 0;
 193 err4:
 194         dma_free_coherent(rhp->ibdev.dma_device, cq->memsize, cq->queue,
 195                           dma_unmap_addr(cq, mapping));
 196 err3:
 197         kfree(cq->sw_queue);
 198 err2:
 199         c4iw_put_cqid(rdev, cq->cqid, uctx);
 200 err1:
 201         return ret;
 202 }
 203
 204 static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
 205 {
 206         struct t4_cqe cqe;
 207
 208         CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
 209             cq, cq->sw_cidx, cq->sw_pidx);
 210         memset(&cqe, 0, sizeof(cqe));
 211         cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
 212                                  V_CQE_OPCODE(FW_RI_SEND) |
 213                                  V_CQE_TYPE(0) |
 214                                  V_CQE_SWCQE(1) |
 215                                  V_CQE_QPID(wq->sq.qid));
 216         cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
 217         cq->sw_queue[cq->sw_pidx] = cqe;
 218         t4_swcq_produce(cq);
 219 }
 220
 221 int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
 222 {
 223         int flushed = 0;
 224         int in_use = wq->rq.in_use - count;
 225
 226         BUG_ON(in_use < 0);
 227         CTR5(KTR_IW_CXGBE, "%s wq %p cq %p rq.in_use %u skip count %u",
 228             __func__, wq, cq, wq->rq.in_use, count);
 229         while (in_use--) {
 230                 insert_recv_cqe(wq, cq);
 231                 flushed++;
 232         }
 233         return flushed;
 234 }
 235
 236 static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
 237                           struct t4_swsqe *swcqe)
 238 {
 239         struct t4_cqe cqe;
 240
 241         CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
 242             cq, cq->sw_cidx, cq->sw_pidx);
 243         memset(&cqe, 0, sizeof(cqe));
 244         cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
 245                                  V_CQE_OPCODE(swcqe->opcode) |
 246                                  V_CQE_TYPE(1) |
 247                                  V_CQE_SWCQE(1) |
 248                                  V_CQE_QPID(wq->sq.qid));
 249         CQE_WRID_SQ_IDX(&cqe) = swcqe->idx;
 250         cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
 251         cq->sw_queue[cq->sw_pidx] = cqe;
 252         t4_swcq_produce(cq);
 253 }
 254
 255 static void advance_oldest_read(struct t4_wq *wq);
 256
 257 int c4iw_flush_sq(struct c4iw_qp *qhp)
 258 {
 259         int flushed = 0;
 260         struct t4_wq *wq = &qhp->wq;
 261         struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq);
 262         struct t4_cq *cq = &chp->cq;
 263         int idx;
 264         struct t4_swsqe *swsqe;
 265
 266         if (wq->sq.flush_cidx == -1)
 267                 wq->sq.flush_cidx = wq->sq.cidx;
 268         idx = wq->sq.flush_cidx;
 269         BUG_ON(idx >= wq->sq.size);
 270         while (idx != wq->sq.pidx) {
 271                 swsqe = &wq->sq.sw_sq[idx];
 272                 BUG_ON(swsqe->flushed);
 273                 swsqe->flushed = 1;
 274                 insert_sq_cqe(wq, cq, swsqe);
 275                 if (wq->sq.oldest_read == swsqe) {
 276                         BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
 277                         advance_oldest_read(wq);
 278                 }
 279                 flushed++;
 280                 if (++idx == wq->sq.size)
 281                         idx = 0;
 282         }
 283         wq->sq.flush_cidx += flushed;
 284         if (wq->sq.flush_cidx >= wq->sq.size)
 285                 wq->sq.flush_cidx -= wq->sq.size;
 286         return flushed;
 287 }
 288
 289 static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
 290 {
 291         struct t4_swsqe *swsqe;
 292         int cidx;
 293
 294         if (wq->sq.flush_cidx == -1)
 295                 wq->sq.flush_cidx = wq->sq.cidx;
 296         cidx = wq->sq.flush_cidx;
 297         BUG_ON(cidx > wq->sq.size);
 298
 299         while (cidx != wq->sq.pidx) {
 300                 swsqe = &wq->sq.sw_sq[cidx];
 301                 if (!swsqe->signaled) {
 302                         if (++cidx == wq->sq.size)
 303                                 cidx = 0;
 304                 } else if (swsqe->complete) {
 305
 306                         BUG_ON(swsqe->flushed);
 307
 308                         /*
 309                          * Insert this completed cqe into the swcq.
 310                          */
 311                         CTR3(KTR_IW_CXGBE,
 312                                 "%s moving cqe into swcq sq idx %u cq idx %u\n",
 313                                 __func__, cidx, cq->sw_pidx);
 314                         swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
 315                         cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
 316                         t4_swcq_produce(cq);
 317                         swsqe->flushed = 1;
 318                         if (++cidx == wq->sq.size)
 319                                 cidx = 0;
 320                         wq->sq.flush_cidx = cidx;
 321                 } else
 322                         break;
 323         }
 324 }
 325
 326 static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
 327                 struct t4_cqe *read_cqe)
 328 {
 329         read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
 330         read_cqe->len = htonl(wq->sq.oldest_read->read_len);
 331         read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
 332                         V_CQE_SWCQE(SW_CQE(hw_cqe)) |
 333                         V_CQE_OPCODE(FW_RI_READ_REQ) |
 334                         V_CQE_TYPE(1));
 335         read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
 336 }
 337
 338 static void advance_oldest_read(struct t4_wq *wq)
 339 {
 340
 341         u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
 342
 343         if (rptr == wq->sq.size)
 344                 rptr = 0;
 345         while (rptr != wq->sq.pidx) {
 346                 wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
 347
 348                 if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
 349                         return;
 350                 if (++rptr == wq->sq.size)
 351                         rptr = 0;
 352         }
 353         wq->sq.oldest_read = NULL;
 354 }
 355
 356 /*
 357  * Move all CQEs from the HWCQ into the SWCQ.
 358  * Deal with out-of-order and/or completions that complete
 359  * prior unsignalled WRs.
 360  */
 361 void c4iw_flush_hw_cq(struct c4iw_cq *chp)
 362 {
 363         struct t4_cqe *hw_cqe, *swcqe, read_cqe;
 364         struct c4iw_qp *qhp;
 365         struct t4_swsqe *swsqe;
 366         int ret;
 367
 368         CTR3(KTR_IW_CXGBE, "%s cq %p cqid 0x%x", __func__, &chp->cq,
 369                         chp->cq.cqid);
 370         ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
 371
 372         /*
 373          * This logic is similar to poll_cq(), but not quite the same
 374          * unfortunately.  Need to move pertinent HW CQEs to the SW CQ but
 375          * also do any translation magic that poll_cq() normally does.
 376          */
 377         while (!ret) {
 378                 qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe));
 379
 380                 /*
 381                  * drop CQEs with no associated QP
 382                  */
 383                 if (qhp == NULL)
 384                         goto next_cqe;
 385
 386                 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
 387                         goto next_cqe;
 388
 389                 if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
 390
 391                         /* If we have reached here because of async
 392                          * event or other error, and have egress error
 393                          * then drop
 394                          */
 395                         if (CQE_TYPE(hw_cqe) == 1)
 396                                 goto next_cqe;
 397
 398                         /* drop peer2peer RTR reads.
 399                          */
 400                         if (CQE_WRID_STAG(hw_cqe) == 1)
 401                                 goto next_cqe;
 402
 403                         /*
 404                          * Eat completions for unsignaled read WRs.
 405                          */
 406                         if (!qhp->wq.sq.oldest_read->signaled) {
 407                                 advance_oldest_read(&qhp->wq);
 408                                 goto next_cqe;
 409                         }
 410
 411                         /*
 412                          * Don't write to the HWCQ, create a new read req CQE
 413                          * in local memory and move it into the swcq.
 414                          */
 415                         create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe);
 416                         hw_cqe = &read_cqe;
 417                         advance_oldest_read(&qhp->wq);
 418                 }
 419
 420                 /* if its a SQ completion, then do the magic to move all the
 421                  * unsignaled and now in-order completions into the swcq.
 422                  */
 423                 if (SQ_TYPE(hw_cqe)) {
 424                         swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
 425                         swsqe->cqe = *hw_cqe;
 426                         swsqe->complete = 1;
 427                         flush_completed_wrs(&qhp->wq, &chp->cq);
 428                 } else {
 429                         swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx];
 430                         *swcqe = *hw_cqe;
 431                         swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
 432                         t4_swcq_produce(&chp->cq);
 433                 }
 434 next_cqe:
 435                 t4_hwcq_consume(&chp->cq);
 436                 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
 437         }
 438 }
 439
 440 static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
 441 {
 442         if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
 443                 return 0;
 444
 445         if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe))
 446                 return 0;
 447
 448         if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe))
 449                 return 0;
 450
 451         if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq))
 452                 return 0;
 453         return 1;
 454 }
 455
 456 void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
 457 {
 458         struct t4_cqe *cqe;
 459         u32 ptr;
 460
 461         *count = 0;
 462         CTR2(KTR_IW_CXGBE, "%s count zero %d", __func__, *count);
 463         ptr = cq->sw_cidx;
 464         while (ptr != cq->sw_pidx) {
 465                 cqe = &cq->sw_queue[ptr];
 466                 if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) &&
 467                     (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq))
 468                         (*count)++;
 469                 if (++ptr == cq->size)
 470                         ptr = 0;
 471         }
 472         CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count);
 473 }
 474
 475 /*
 476  * poll_cq
 477  *
 478  * Caller must:
 479  *     check the validity of the first CQE,
 480  *     supply the wq assicated with the qpid.
 481  *
 482  * credit: cq credit to return to sge.
 483  * cqe_flushed: 1 iff the CQE is flushed.
 484  * cqe: copy of the polled CQE.
 485  *
 486  * return value:
 487  *    0             CQE returned ok.
 488  *    -EAGAIN       CQE skipped, try again.
 489  *    -EOVERFLOW    CQ overflow detected.
 490  */
 491 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 492                    u8 *cqe_flushed, u64 *cookie, u32 *credit)
 493 {
 494         int ret = 0;
 495         struct t4_cqe *hw_cqe, read_cqe;
 496
 497         *cqe_flushed = 0;
 498         *credit = 0;
 499         ret = t4_next_cqe(cq, &hw_cqe);
 500         if (ret)
 501                 return ret;
 502
 503         CTR6(KTR_IW_CXGBE,
 504             "%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x", __func__,
 505             CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), CQE_GENBIT(hw_cqe),
 506             CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe));
 507         CTR5(KTR_IW_CXGBE,
 508             "%s opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
 509             __func__, CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
 510             CQE_WRID_LOW(hw_cqe));
 511
 512         /*
 513          * skip cqe's not affiliated with a QP.
 514          */
 515         if (wq == NULL) {
 516                 ret = -EAGAIN;
 517                 goto skip_cqe;
 518         }
 519
 520         /*
 521         * skip hw cqe's if the wq is flushed.
 522         */
 523         if (wq->flushed && !SW_CQE(hw_cqe)) {
 524                 ret = -EAGAIN;
 525                 goto skip_cqe;
 526         }
 527
 528         /*
 529          * skip TERMINATE cqes...
 530          */
 531         if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
 532                 ret = -EAGAIN;
 533                 goto skip_cqe;
 534         }
 535
 536         /*
 537          * Special cqe for drain WR completions...
 538          */
 539         if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) {
 540                 *cookie = CQE_DRAIN_COOKIE(hw_cqe);
 541                 *cqe = *hw_cqe;
 542                 goto skip_cqe;
 543         }
 544
 545         /*
 546          * Gotta tweak READ completions:
 547          *      1) the cqe doesn't contain the sq_wptr from the wr.
 548          *      2) opcode not reflected from the wr.
 549          *      3) read_len not reflected from the wr.
 550          *      4) cq_type is RQ_TYPE not SQ_TYPE.
 551          */
 552         if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
 553
 554                 /* If we have reached here because of async
 555                  * event or other error, and have egress error
 556                  * then drop
 557                  */
 558                 if (CQE_TYPE(hw_cqe) == 1) {
 559                         if (CQE_STATUS(hw_cqe))
 560                                 t4_set_wq_in_error(wq);
 561                         ret = -EAGAIN;
 562                         goto skip_cqe;
 563                 }
 564
 565                 /* If this is an unsolicited read response, then the read
 566                  * was generated by the kernel driver as part of peer-2-peer
 567                  * connection setup.  So ignore the completion.
 568                  */
 569                 if (CQE_WRID_STAG(hw_cqe) == 1) {
 570                         if (CQE_STATUS(hw_cqe))
 571                                 t4_set_wq_in_error(wq);
 572                         ret = -EAGAIN;
 573                         goto skip_cqe;
 574                 }
 575
 576                 /*
 577                  * Eat completions for unsignaled read WRs.
 578                  */
 579                 if (!wq->sq.oldest_read->signaled) {
 580                         advance_oldest_read(wq);
 581                         ret = -EAGAIN;
 582                         goto skip_cqe;
 583                 }
 584
 585                 /*
 586                  * Don't write to the HWCQ, so create a new read req CQE
 587                  * in local memory.
 588                  */
 589                 create_read_req_cqe(wq, hw_cqe, &read_cqe);
 590                 hw_cqe = &read_cqe;
 591                 advance_oldest_read(wq);
 592         }
 593
 594         if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
 595                 *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
 596                 t4_set_wq_in_error(wq);
 597         }
 598
 599         /*
 600          * RECV completion.
 601          */
 602         if (RQ_TYPE(hw_cqe)) {
 603
 604                 /*
 605                  * HW only validates 4 bits of MSN.  So we must validate that
 606                  * the MSN in the SEND is the next expected MSN.  If its not,
 607                  * then we complete this with T4_ERR_MSN and mark the wq in
 608                  * error.
 609                  */
 610
 611                 if (t4_rq_empty(wq)) {
 612                         t4_set_wq_in_error(wq);
 613                         ret = -EAGAIN;
 614                         goto skip_cqe;
 615                 }
 616                 if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
 617                         t4_set_wq_in_error(wq);
 618                         hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN));
 619                         goto proc_cqe;
 620                 }
 621                 goto proc_cqe;
 622         }
 623
 624         /*
 625          * If we get here its a send completion.
 626          *
 627          * Handle out of order completion. These get stuffed
 628          * in the SW SQ. Then the SW SQ is walked to move any
 629          * now in-order completions into the SW CQ.  This handles
 630          * 2 cases:
 631          *      1) reaping unsignaled WRs when the first subsequent
 632          *         signaled WR is completed.
 633          *      2) out of order read completions.
 634          */
 635         if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) {
 636                 struct t4_swsqe *swsqe;
 637
 638                 CTR2(KTR_IW_CXGBE,
 639                     "%s out of order completion going in sw_sq at idx %u",
 640                     __func__, CQE_WRID_SQ_IDX(hw_cqe));
 641                 swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
 642                 swsqe->cqe = *hw_cqe;
 643                 swsqe->complete = 1;
 644                 ret = -EAGAIN;
 645                 goto flush_wq;
 646         }
 647
 648 proc_cqe:
 649         *cqe = *hw_cqe;
 650
 651         /*
 652          * Reap the associated WR(s) that are freed up with this
 653          * completion.
 654          */
 655         if (SQ_TYPE(hw_cqe)) {
 656                 int idx = CQE_WRID_SQ_IDX(hw_cqe);
 657                 BUG_ON(idx >= wq->sq.size);
 658
 659                 /*
 660                 * Account for any unsignaled completions completed by
 661                 * this signaled completion.  In this case, cidx points
 662                 * to the first unsignaled one, and idx points to the
 663                 * signaled one.  So adjust in_use based on this delta.
 664                 * if this is not completing any unsigned wrs, then the
 665                 * delta will be 0. Handle wrapping also!
 666                 */
 667                 if (idx < wq->sq.cidx)
 668                         wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
 669                 else
 670                         wq->sq.in_use -= idx - wq->sq.cidx;
 671                 BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size);
 672
 673                 wq->sq.cidx = (uint16_t)idx;
 674                 CTR2(KTR_IW_CXGBE, "%s completing sq idx %u\n",
 675                                 __func__, wq->sq.cidx);
 676                 *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
 677                 t4_sq_consume(wq);
 678         } else {
 679                 CTR2(KTR_IW_CXGBE, "%s completing rq idx %u",
 680                      __func__, wq->rq.cidx);
 681                 *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
 682                 BUG_ON(t4_rq_empty(wq));
 683                 t4_rq_consume(wq);
 684                 goto skip_cqe;
 685         }
 686
 687 flush_wq:
 688         /*
 689          * Flush any completed cqes that are now in-order.
 690          */
 691         flush_completed_wrs(wq, cq);
 692
 693 skip_cqe:
 694         if (SW_CQE(hw_cqe)) {
 695                 CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip sw cqe cidx %u",
 696                      __func__, cq, cq->cqid, cq->sw_cidx);
 697                 t4_swcq_consume(cq);
 698         } else {
 699                 CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip hw cqe cidx %u",
 700                      __func__, cq, cq->cqid, cq->cidx);
 701                 t4_hwcq_consume(cq);
 702         }
 703         return ret;
 704 }
 705
 706 /*
 707  * Get one cq entry from c4iw and map it to openib.
 708  *
 709  * Returns:
 710  *      0                       cqe returned
 711  *      -ENODATA                EMPTY;
 712  *      -EAGAIN                 caller must try again
 713  *      any other -errno        fatal error
 714  */
 715 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 716 {
 717         struct c4iw_qp *qhp = NULL;
 718         struct t4_cqe cqe = {0, 0}, *rd_cqe;
 719         struct t4_wq *wq;
 720         u32 credit = 0;
 721         u8 cqe_flushed;
 722         u64 cookie = 0;
 723         int ret;
 724
 725         ret = t4_next_cqe(&chp->cq, &rd_cqe);
 726
 727         if (ret)
 728                 return ret;
 729
 730         qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
 731         if (!qhp)
 732                 wq = NULL;
 733         else {
 734                 spin_lock(&qhp->lock);
 735                 wq = &(qhp->wq);
 736         }
 737         ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
 738         if (ret)
 739                 goto out;
 740
 741         wc->wr_id = cookie;
 742         wc->qp = &qhp->ibqp;
 743         wc->vendor_err = CQE_STATUS(&cqe);
 744         wc->wc_flags = 0;
 745
 746         CTR5(KTR_IW_CXGBE, "%s qpid 0x%x type %d opcode %d status 0x%x",
 747             __func__, CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
 748             CQE_STATUS(&cqe));
 749         CTR5(KTR_IW_CXGBE, "%s len %u wrid hi 0x%x lo 0x%x cookie 0x%llx",
 750             __func__, CQE_LEN(&cqe), CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe),
 751             (unsigned long long)cookie);
 752
 753         if (CQE_TYPE(&cqe) == 0) {
 754                 if (!CQE_STATUS(&cqe))
 755                         wc->byte_len = CQE_LEN(&cqe);
 756                 else
 757                         wc->byte_len = 0;
 758                 wc->opcode = IB_WC_RECV;
 759                 if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV ||
 760                     CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
 761                         wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
 762                         wc->wc_flags |= IB_WC_WITH_INVALIDATE;
 763                         c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
 764                 }
 765         } else {
 766                 switch (CQE_OPCODE(&cqe)) {
 767                 case FW_RI_RDMA_WRITE:
 768                         wc->opcode = IB_WC_RDMA_WRITE;
 769                         break;
 770                 case FW_RI_READ_REQ:
 771                         wc->opcode = IB_WC_RDMA_READ;
 772                         wc->byte_len = CQE_LEN(&cqe);
 773                         break;
 774                 case FW_RI_SEND_WITH_INV:
 775                 case FW_RI_SEND_WITH_SE_INV:
 776                         wc->opcode = IB_WC_SEND;
 777                         wc->wc_flags |= IB_WC_WITH_INVALIDATE;
 778                         break;
 779                 case FW_RI_SEND:
 780                 case FW_RI_SEND_WITH_SE:
 781                         wc->opcode = IB_WC_SEND;
 782                         break;
 783                 case FW_RI_LOCAL_INV:
 784                         wc->opcode = IB_WC_LOCAL_INV;
 785                         break;
 786                 case FW_RI_FAST_REGISTER:
 787                         wc->opcode = IB_WC_REG_MR;
 788
 789                         /* Invalidate the MR if the fastreg failed */
 790                         if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS)
 791                                 c4iw_invalidate_mr(qhp->rhp,
 792                                                    CQE_WRID_FR_STAG(&cqe));
 793                         break;
 794                 case C4IW_DRAIN_OPCODE:
 795                         wc->opcode = IB_WC_SEND;
 796                         break;
 797                 default:
 798                         printf("Unexpected opcode %d "
 799                                "in the CQE received for QPID = 0x%0x\n",
 800                                CQE_OPCODE(&cqe), CQE_QPID(&cqe));
 801                         ret = -EINVAL;
 802                         goto out;
 803                 }
 804         }
 805
 806         if (cqe_flushed)
 807                 wc->status = IB_WC_WR_FLUSH_ERR;
 808         else {
 809
 810                 switch (CQE_STATUS(&cqe)) {
 811                 case T4_ERR_SUCCESS:
 812                         wc->status = IB_WC_SUCCESS;
 813                         break;
 814                 case T4_ERR_STAG:
 815                         wc->status = IB_WC_LOC_ACCESS_ERR;
 816                         break;
 817                 case T4_ERR_PDID:
 818                         wc->status = IB_WC_LOC_PROT_ERR;
 819                         break;
 820                 case T4_ERR_QPID:
 821                 case T4_ERR_ACCESS:
 822                         wc->status = IB_WC_LOC_ACCESS_ERR;
 823                         break;
 824                 case T4_ERR_WRAP:
 825                         wc->status = IB_WC_GENERAL_ERR;
 826                         break;
 827                 case T4_ERR_BOUND:
 828                         wc->status = IB_WC_LOC_LEN_ERR;
 829                         break;
 830                 case T4_ERR_INVALIDATE_SHARED_MR:
 831                 case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
 832                         wc->status = IB_WC_MW_BIND_ERR;
 833                         break;
 834                 case T4_ERR_CRC:
 835                 case T4_ERR_MARKER:
 836                 case T4_ERR_PDU_LEN_ERR:
 837                 case T4_ERR_OUT_OF_RQE:
 838                 case T4_ERR_DDP_VERSION:
 839                 case T4_ERR_RDMA_VERSION:
 840                 case T4_ERR_DDP_QUEUE_NUM:
 841                 case T4_ERR_MSN:
 842                 case T4_ERR_TBIT:
 843                 case T4_ERR_MO:
 844                 case T4_ERR_MSN_RANGE:
 845                 case T4_ERR_IRD_OVERFLOW:
 846                 case T4_ERR_OPCODE:
 847                 case T4_ERR_INTERNAL_ERR:
 848                         wc->status = IB_WC_FATAL_ERR;
 849                         break;
 850                 case T4_ERR_SWFLUSH:
 851                         wc->status = IB_WC_WR_FLUSH_ERR;
 852                         break;
 853                 default:
 854                         printf("Unexpected cqe_status 0x%x for QPID = 0x%0x\n",
 855                                CQE_STATUS(&cqe), CQE_QPID(&cqe));
 856                         wc->status = IB_WC_FATAL_ERR;
 857                 }
 858         }
 859 out:
 860         if (wq)
 861                 spin_unlock(&qhp->lock);
 862         return ret;
 863 }
 864
 865 int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 866 {
 867         struct c4iw_cq *chp;
 868         unsigned long flags;
 869         int npolled;
 870         int err = 0;
 871
 872         chp = to_c4iw_cq(ibcq);
 873
 874         spin_lock_irqsave(&chp->lock, flags);
 875         for (npolled = 0; npolled < num_entries; ++npolled) {
 876                 do {
 877                         err = c4iw_poll_cq_one(chp, wc + npolled);
 878                 } while (err == -EAGAIN);
 879                 if (err)
 880                         break;
 881         }
 882         spin_unlock_irqrestore(&chp->lock, flags);
 883         return !err || err == -ENODATA ? npolled : err;
 884 }
 885
 886 int c4iw_destroy_cq(struct ib_cq *ib_cq)
 887 {
 888         struct c4iw_cq *chp;
 889         struct c4iw_ucontext *ucontext;
 890
 891         CTR2(KTR_IW_CXGBE, "%s ib_cq %p", __func__, ib_cq);
 892         chp = to_c4iw_cq(ib_cq);
 893
 894         remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
 895         atomic_dec(&chp->refcnt);
 896         wait_event(chp->wait, !atomic_read(&chp->refcnt));
 897
 898         ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
 899                                   : NULL;
 900         destroy_cq(&chp->rhp->rdev, &chp->cq,
 901                    ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx);
 902         kfree(chp);
 903         return 0;
 904 }
 905
 906 struct ib_cq *
 907 c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr,
 908     struct ib_ucontext *ib_context, struct ib_udata *udata)
 909 {
 910         int entries = attr->cqe;
 911         int vector = attr->comp_vector;
 912         struct c4iw_dev *rhp;
 913         struct c4iw_cq *chp;
 914         struct c4iw_create_cq_resp uresp;
 915         struct c4iw_ucontext *ucontext = NULL;
 916         int ret;
 917         size_t memsize, hwentries;
 918         struct c4iw_mm_entry *mm, *mm2;
 919
 920         CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries);
 921         if (attr->flags)
 922                 return ERR_PTR(-EINVAL);
 923
 924         rhp = to_c4iw_dev(ibdev);
 925
 926         chp = kzalloc(sizeof(*chp), GFP_KERNEL);
 927         if (!chp)
 928                 return ERR_PTR(-ENOMEM);
 929
 930
 931         if (ib_context)
 932                 ucontext = to_c4iw_ucontext(ib_context);
 933
 934         /* account for the status page. */
 935         entries++;
 936
 937         /* IQ needs one extra entry to differentiate full vs empty. */
 938         entries++;
 939
 940         /*
 941          * entries must be multiple of 16 for HW.
 942          */
 943         entries = roundup(entries, 16);
 944
 945         /*
 946          * Make actual HW queue 2x to avoid cdix_inc overflows.
 947          */
 948         hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size);
 949
 950         /*
 951          * Make HW queue at least 64 entries so GTS updates aren't too
 952          * frequent.
 953          */
 954         if (hwentries < 64)
 955                 hwentries = 64;
 956
 957         memsize = hwentries * sizeof *chp->cq.queue;
 958
 959         /*
 960          * memsize must be a multiple of the page size if its a user cq.
 961          */
 962         if (ucontext)
 963                 memsize = roundup(memsize, PAGE_SIZE);
 964         chp->cq.size = hwentries;
 965         chp->cq.memsize = memsize;
 966         chp->cq.vector = vector;
 967
 968         ret = create_cq(&rhp->rdev, &chp->cq,
 969                         ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
 970         if (ret)
 971                 goto err1;
 972
 973         chp->rhp = rhp;
 974         chp->cq.size--;                         /* status page */
 975         chp->ibcq.cqe = entries - 2;
 976         spin_lock_init(&chp->lock);
 977         spin_lock_init(&chp->comp_handler_lock);
 978         atomic_set(&chp->refcnt, 1);
 979         init_waitqueue_head(&chp->wait);
 980         ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
 981         if (ret)
 982                 goto err2;
 983
 984         if (ucontext) {
 985                 ret = -ENOMEM;
 986                 mm = kmalloc(sizeof *mm, GFP_KERNEL);
 987                 if (!mm)
 988                         goto err3;
 989                 mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
 990                 if (!mm2)
 991                         goto err4;
 992
 993                 memset(&uresp, 0, sizeof(uresp));
 994                 uresp.qid_mask = rhp->rdev.cqmask;
 995                 uresp.cqid = chp->cq.cqid;
 996                 uresp.size = chp->cq.size;
 997                 uresp.memsize = chp->cq.memsize;
 998                 spin_lock(&ucontext->mmap_lock);
 999                 uresp.key = ucontext->key;
1000                 ucontext->key += PAGE_SIZE;
1001                 uresp.gts_key = ucontext->key;
1002                 ucontext->key += PAGE_SIZE;
1003                 spin_unlock(&ucontext->mmap_lock);
1004                 ret = ib_copy_to_udata(udata, &uresp,
1005                                         sizeof(uresp) - sizeof(uresp.reserved));
1006                 if (ret)
1007                         goto err5;
1008
1009                 mm->key = uresp.key;
1010                 mm->addr = vtophys(chp->cq.queue);
1011                 mm->len = chp->cq.memsize;
1012                 insert_mmap(ucontext, mm);
1013
1014                 mm2->key = uresp.gts_key;
1015                 mm2->addr = chp->cq.bar2_pa;
1016                 mm2->len = PAGE_SIZE;
1017                 insert_mmap(ucontext, mm2);
1018         }
1019         CTR6(KTR_IW_CXGBE,
1020             "%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx",
1021             __func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
1022             (unsigned long long) chp->cq.dma_addr);
1023         return &chp->ibcq;
1024 err5:
1025         kfree(mm2);
1026 err4:
1027         kfree(mm);
1028 err3:
1029         remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
1030 err2:
1031         destroy_cq(&chp->rhp->rdev, &chp->cq,
1032                    ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
1033 err1:
1034         kfree(chp);
1035         return ERR_PTR(ret);
1036 }
1037
1038 int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
1039 {
1040         return -ENOSYS;
1041 }
1042
1043 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
1044 {
1045         struct c4iw_cq *chp;
1046         int ret = 0;
1047         unsigned long flag;
1048
1049         chp = to_c4iw_cq(ibcq);
1050         spin_lock_irqsave(&chp->lock, flag);
1051         t4_arm_cq(&chp->cq,
1052                   (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
1053         if (flags & IB_CQ_REPORT_MISSED_EVENTS)
1054                 ret = t4_cq_notempty(&chp->cq);
1055         spin_unlock_irqrestore(&chp->lock, flag);
1056         return ret;
1057 }
1058 #endif