contrib/ofed/libcxgb4/qp.c

   1 /*
   2  * Copyright (c) 2006-2016 Chelsio, Inc. All rights reserved.
   3  *
   4  * This software is available to you under a choice of one of two
   5  * licenses.  You may choose to be licensed under the terms of the GNU
   6  * General Public License (GPL) Version 2, available from the file
   7  * COPYING in the main directory of this source tree, or the
   8  * OpenIB.org BSD license below:
   9  *
  10  *     Redistribution and use in source and binary forms, with or
  11  *     without modification, are permitted provided that the following
  12  *     conditions are met:
  13  *
  14  *      - Redistributions of source code must retain the above
  15  *        copyright notice, this list of conditions and the following
  16  *        disclaimer.
  17  *
  18  *      - Redistributions in binary form must reproduce the above
  19  *        copyright notice, this list of conditions and the following
  20  *        disclaimer in the documentation and/or other materials
  21  *        provided with the distribution.
  22  *
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30  * SOFTWARE.
  31  */
  32 #include <config.h>
  33
  34 #include <assert.h>
  35 #include <stdlib.h>
  36 #include <pthread.h>
  37 #include <string.h>
  38 #include <stdio.h>
  39 #include "libcxgb4.h"
  40
  41 #ifdef STATS
  42 struct c4iw_stats c4iw_stats;
  43 #endif
  44
  45 static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16)
  46 {
  47         void *src, *dst;
  48         uintptr_t end;
  49         int total, len;
  50
  51         src = &wqe->flits[0];
  52         dst = &wq->sq.queue->flits[wq->sq.wq_pidx *
  53             (T4_EQ_ENTRY_SIZE / sizeof(__be64))];
  54         if (t4_sq_onchip(wq)) {
  55                 len16 = align(len16, 4);
  56
  57                 /* In onchip mode the copy below will be made to WC memory and
  58                  * could trigger DMA. In offchip mode the copy below only
  59                  * queues the WQE, DMA cannot start until t4_ring_sq_db
  60                  * happens */
  61                 mmio_wc_start();
  62         }
  63
  64         /* NOTE len16 cannot be large enough to write to the
  65            same sq.queue memory twice in this loop */
  66         total = len16 * 16;
  67         end = (uintptr_t)&wq->sq.queue[wq->sq.size];
  68         if (__predict_true((uintptr_t)dst + total <= end)) {
  69                 /* Won't wrap around. */
  70                 memcpy(dst, src, total);
  71         } else {
  72                 len = end - (uintptr_t)dst;
  73                 memcpy(dst, src, len);
  74                 memcpy(wq->sq.queue, src + len, total - len);
  75         }
  76
  77         if (t4_sq_onchip(wq))
  78                 mmio_flush_writes();
  79 }
  80
  81 static void copy_wr_to_rq(struct t4_wq *wq, union t4_recv_wr *wqe, u8 len16)
  82 {
  83         void *src, *dst;
  84         uintptr_t end;
  85         int total, len;
  86
  87         src = &wqe->flits[0];
  88         dst = &wq->rq.queue->flits[wq->rq.wq_pidx *
  89             (T4_EQ_ENTRY_SIZE / sizeof(__be64))];
  90
  91         total = len16 * 16;
  92         end = (uintptr_t)&wq->rq.queue[wq->rq.size];
  93         if (__predict_true((uintptr_t)dst + total <= end)) {
  94                 /* Won't wrap around. */
  95                 memcpy(dst, src, total);
  96         } else {
  97                 len = end - (uintptr_t)dst;
  98                 memcpy(dst, src, len);
  99                 memcpy(wq->rq.queue, src + len, total - len);
 100         }
 101 }
 102
 103 static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
 104                       struct ibv_send_wr *wr, int max, u32 *plenp)
 105 {
 106         u8 *dstp, *srcp;
 107         u32 plen = 0;
 108         int i;
 109         int len;
 110
 111         dstp = (u8 *)immdp->data;
 112         for (i = 0; i < wr->num_sge; i++) {
 113                 if ((plen + wr->sg_list[i].length) > max)
 114                         return -EMSGSIZE;
 115                 srcp = (u8 *)(unsigned long)wr->sg_list[i].addr;
 116                 plen += wr->sg_list[i].length;
 117                 len = wr->sg_list[i].length;
 118                 memcpy(dstp, srcp, len);
 119                 dstp += len;
 120                 srcp += len;
 121         }
 122         len = ROUND_UP(plen + 8, 16) - (plen + 8);
 123         if (len)
 124                 memset(dstp, 0, len);
 125         immdp->op = FW_RI_DATA_IMMD;
 126         immdp->r1 = 0;
 127         immdp->r2 = 0;
 128         immdp->immdlen = htobe32(plen);
 129         *plenp = plen;
 130         return 0;
 131 }
 132
 133 static int build_isgl(struct fw_ri_isgl *isglp, struct ibv_sge *sg_list,
 134                       int num_sge, u32 *plenp)
 135 {
 136         int i;
 137         u32 plen = 0;
 138         __be64 *flitp = (__be64 *)isglp->sge;
 139
 140         for (i = 0; i < num_sge; i++) {
 141                 if ((plen + sg_list[i].length) < plen)
 142                         return -EMSGSIZE;
 143                 plen += sg_list[i].length;
 144                 *flitp++ = htobe64(((u64)sg_list[i].lkey << 32) |
 145                                      sg_list[i].length);
 146                 *flitp++ = htobe64(sg_list[i].addr);
 147         }
 148         *flitp = 0;
 149         isglp->op = FW_RI_DATA_ISGL;
 150         isglp->r1 = 0;
 151         isglp->nsge = htobe16(num_sge);
 152         isglp->r2 = 0;
 153         if (plenp)
 154                 *plenp = plen;
 155         return 0;
 156 }
 157
 158 static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
 159                            struct ibv_send_wr *wr, u8 *len16)
 160 {
 161         u32 plen;
 162         int size;
 163         int ret;
 164
 165         if (wr->num_sge > T4_MAX_SEND_SGE)
 166                 return -EINVAL;
 167         if (wr->send_flags & IBV_SEND_SOLICITED)
 168                 wqe->send.sendop_pkd = htobe32(
 169                         FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE));
 170         else
 171                 wqe->send.sendop_pkd = htobe32(
 172                         FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND));
 173         wqe->send.stag_inv = 0;
 174         wqe->send.r3 = 0;
 175         wqe->send.r4 = 0;
 176
 177         plen = 0;
 178         if (wr->num_sge) {
 179                 if (wr->send_flags & IBV_SEND_INLINE) {
 180                         ret = build_immd(sq, wqe->send.u.immd_src, wr,
 181                                          T4_MAX_SEND_INLINE, &plen);
 182                         if (ret)
 183                                 return ret;
 184                         size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
 185                                plen;
 186                 } else {
 187                         ret = build_isgl(wqe->send.u.isgl_src,
 188                                          wr->sg_list, wr->num_sge, &plen);
 189                         if (ret)
 190                                 return ret;
 191                         size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
 192                                wr->num_sge * sizeof (struct fw_ri_sge);
 193                 }
 194         } else {
 195                 wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD;
 196                 wqe->send.u.immd_src[0].r1 = 0;
 197                 wqe->send.u.immd_src[0].r2 = 0;
 198                 wqe->send.u.immd_src[0].immdlen = 0;
 199                 size = sizeof wqe->send + sizeof(struct fw_ri_immd);
 200                 plen = 0;
 201         }
 202         *len16 = DIV_ROUND_UP(size, 16);
 203         wqe->send.plen = htobe32(plen);
 204         return 0;
 205 }
 206
 207 static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
 208                             struct ibv_send_wr *wr, u8 *len16)
 209 {
 210         u32 plen;
 211         int size;
 212         int ret;
 213
 214         if (wr->num_sge > T4_MAX_SEND_SGE)
 215                 return -EINVAL;
 216         wqe->write.r2 = 0;
 217         wqe->write.stag_sink = htobe32(wr->wr.rdma.rkey);
 218         wqe->write.to_sink = htobe64(wr->wr.rdma.remote_addr);
 219         if (wr->num_sge) {
 220                 if (wr->send_flags & IBV_SEND_INLINE) {
 221                         ret = build_immd(sq, wqe->write.u.immd_src, wr,
 222                                          T4_MAX_WRITE_INLINE, &plen);
 223                         if (ret)
 224                                 return ret;
 225                         size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
 226                                plen;
 227                 } else {
 228                         ret = build_isgl(wqe->write.u.isgl_src,
 229                                          wr->sg_list, wr->num_sge, &plen);
 230                         if (ret)
 231                                 return ret;
 232                         size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
 233                                wr->num_sge * sizeof (struct fw_ri_sge);
 234                 }
 235         } else {
 236                 wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD;
 237                 wqe->write.u.immd_src[0].r1 = 0;
 238                 wqe->write.u.immd_src[0].r2 = 0;
 239                 wqe->write.u.immd_src[0].immdlen = 0;
 240                 size = sizeof wqe->write + sizeof(struct fw_ri_immd);
 241                 plen = 0;
 242         }
 243         *len16 = DIV_ROUND_UP(size, 16);
 244         wqe->write.plen = htobe32(plen);
 245         return 0;
 246 }
 247
 248 static int build_rdma_read(union t4_wr *wqe, struct ibv_send_wr *wr, u8 *len16)
 249 {
 250         if (wr->num_sge > 1)
 251                 return -EINVAL;
 252         if (wr->num_sge) {
 253                 wqe->read.stag_src = htobe32(wr->wr.rdma.rkey);
 254                 wqe->read.to_src_hi = htobe32((u32)(wr->wr.rdma.remote_addr >>32));
 255                 wqe->read.to_src_lo = htobe32((u32)wr->wr.rdma.remote_addr);
 256                 wqe->read.stag_sink = htobe32(wr->sg_list[0].lkey);
 257                 wqe->read.plen = htobe32(wr->sg_list[0].length);
 258                 wqe->read.to_sink_hi = htobe32((u32)(wr->sg_list[0].addr >> 32));
 259                 wqe->read.to_sink_lo = htobe32((u32)(wr->sg_list[0].addr));
 260         } else {
 261                 wqe->read.stag_src = htobe32(2);
 262                 wqe->read.to_src_hi = 0;
 263                 wqe->read.to_src_lo = 0;
 264                 wqe->read.stag_sink = htobe32(2);
 265                 wqe->read.plen = 0;
 266                 wqe->read.to_sink_hi = 0;
 267                 wqe->read.to_sink_lo = 0;
 268         }
 269         wqe->read.r2 = 0;
 270         wqe->read.r5 = 0;
 271         *len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
 272         return 0;
 273 }
 274
 275 static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
 276                            struct ibv_recv_wr *wr, u8 *len16)
 277 {
 278         int ret;
 279
 280         ret = build_isgl(&wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
 281         if (ret)
 282                 return ret;
 283         *len16 = DIV_ROUND_UP(sizeof wqe->recv +
 284                               wr->num_sge * sizeof(struct fw_ri_sge), 16);
 285         return 0;
 286 }
 287
 288 static void ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 idx)
 289 {
 290         struct ibv_modify_qp cmd = {};
 291         struct ibv_qp_attr attr;
 292         int mask;
 293         int __attribute__((unused)) ret;
 294
 295         /* FIXME: Why do we need this barrier if the kernel is going to
 296            trigger the DMA? */
 297         udma_to_device_barrier();
 298         if (qid == qhp->wq.sq.qid) {
 299                 attr.sq_psn = idx;
 300                 mask = IBV_QP_SQ_PSN;
 301         } else  {
 302                 attr.rq_psn = idx;
 303                 mask = IBV_QP_RQ_PSN;
 304         }
 305         ret = ibv_cmd_modify_qp(&qhp->ibv_qp, &attr, mask, &cmd, sizeof cmd);
 306         assert(!ret);
 307 }
 308
 309 int c4iw_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 310                    struct ibv_send_wr **bad_wr)
 311 {
 312         int err = 0;
 313         u8 len16 = 0;
 314         enum fw_wr_opcodes fw_opcode;
 315         enum fw_ri_wr_flags fw_flags;
 316         struct c4iw_qp *qhp;
 317         union t4_wr *wqe, lwqe;
 318         u32 num_wrs;
 319         struct t4_swsqe *swsqe;
 320         u16 idx = 0;
 321
 322         qhp = to_c4iw_qp(ibqp);
 323         pthread_spin_lock(&qhp->lock);
 324         if (t4_wq_in_error(&qhp->wq)) {
 325                 pthread_spin_unlock(&qhp->lock);
 326                 *bad_wr = wr;
 327                 return -EINVAL;
 328         }
 329         num_wrs = t4_sq_avail(&qhp->wq);
 330         if (num_wrs == 0) {
 331                 pthread_spin_unlock(&qhp->lock);
 332                 *bad_wr = wr;
 333                 return -ENOMEM;
 334         }
 335         while (wr) {
 336                 if (num_wrs == 0) {
 337                         err = -ENOMEM;
 338                         *bad_wr = wr;
 339                         break;
 340                 }
 341
 342                 wqe = &lwqe;
 343                 fw_flags = 0;
 344                 if (wr->send_flags & IBV_SEND_SOLICITED)
 345                         fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
 346                 if (wr->send_flags & IBV_SEND_SIGNALED || qhp->sq_sig_all)
 347                         fw_flags |= FW_RI_COMPLETION_FLAG;
 348                 swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
 349                 switch (wr->opcode) {
 350                 case IBV_WR_SEND:
 351                         INC_STAT(send);
 352                         if (wr->send_flags & IBV_SEND_FENCE)
 353                                 fw_flags |= FW_RI_READ_FENCE_FLAG;
 354                         fw_opcode = FW_RI_SEND_WR;
 355                         swsqe->opcode = FW_RI_SEND;
 356                         err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
 357                         break;
 358                 case IBV_WR_RDMA_WRITE:
 359                         INC_STAT(write);
 360                         fw_opcode = FW_RI_RDMA_WRITE_WR;
 361                         swsqe->opcode = FW_RI_RDMA_WRITE;
 362                         err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16);
 363                         break;
 364                 case IBV_WR_RDMA_READ:
 365                         INC_STAT(read);
 366                         fw_opcode = FW_RI_RDMA_READ_WR;
 367                         swsqe->opcode = FW_RI_READ_REQ;
 368                         fw_flags = 0;
 369                         err = build_rdma_read(wqe, wr, &len16);
 370                         if (err)
 371                                 break;
 372                         swsqe->read_len = wr->sg_list ? wr->sg_list[0].length :
 373                                           0;
 374                         if (!qhp->wq.sq.oldest_read)
 375                                 qhp->wq.sq.oldest_read = swsqe;
 376                         break;
 377                 default:
 378                         PDBG("%s post of type=%d TBD!\n", __func__,
 379                              wr->opcode);
 380                         err = -EINVAL;
 381                 }
 382                 if (err) {
 383                         *bad_wr = wr;
 384                         break;
 385                 }
 386                 swsqe->idx = qhp->wq.sq.pidx;
 387                 swsqe->complete = 0;
 388                 swsqe->signaled = (wr->send_flags & IBV_SEND_SIGNALED) ||
 389                                   qhp->sq_sig_all;
 390                 swsqe->flushed = 0;
 391                 swsqe->wr_id = wr->wr_id;
 392
 393                 init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
 394                 PDBG("%s cookie 0x%llx pidx 0x%x opcode 0x%x\n",
 395                      __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
 396                      swsqe->opcode);
 397                 wr = wr->next;
 398                 num_wrs--;
 399                 copy_wr_to_sq(&qhp->wq, wqe, len16);
 400                 t4_sq_produce(&qhp->wq, len16);
 401                 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
 402         }
 403         if (t4_wq_db_enabled(&qhp->wq)) {
 404                 t4_ring_sq_db(&qhp->wq, idx, dev_is_t4(qhp->rhp),
 405                               len16, wqe);
 406         } else
 407                 ring_kernel_db(qhp, qhp->wq.sq.qid, idx);
 408         /* This write is only for debugging, the value does not matter for DMA
 409          */
 410         qhp->wq.sq.queue[qhp->wq.sq.size].status.host_wq_pidx = \
 411                         (qhp->wq.sq.wq_pidx);
 412
 413         pthread_spin_unlock(&qhp->lock);
 414         return err;
 415 }
 416
 417 int c4iw_post_receive(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
 418                            struct ibv_recv_wr **bad_wr)
 419 {
 420         int err = 0;
 421         struct c4iw_qp *qhp;
 422         union t4_recv_wr *wqe, lwqe;
 423         u32 num_wrs;
 424         u8 len16 = 0;
 425         u16 idx = 0;
 426
 427         qhp = to_c4iw_qp(ibqp);
 428         pthread_spin_lock(&qhp->lock);
 429         if (t4_wq_in_error(&qhp->wq)) {
 430                 pthread_spin_unlock(&qhp->lock);
 431                 *bad_wr = wr;
 432                 return -EINVAL;
 433         }
 434         INC_STAT(recv);
 435         num_wrs = t4_rq_avail(&qhp->wq);
 436         if (num_wrs == 0) {
 437                 pthread_spin_unlock(&qhp->lock);
 438                 *bad_wr = wr;
 439                 return -ENOMEM;
 440         }
 441         while (wr) {
 442                 if (wr->num_sge > T4_MAX_RECV_SGE) {
 443                         err = -EINVAL;
 444                         *bad_wr = wr;
 445                         break;
 446                 }
 447                 wqe = &lwqe;
 448                 if (num_wrs)
 449                         err = build_rdma_recv(qhp, wqe, wr, &len16);
 450                 else
 451                         err = -ENOMEM;
 452                 if (err) {
 453                         *bad_wr = wr;
 454                         break;
 455                 }
 456
 457                 qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id;
 458
 459                 wqe->recv.opcode = FW_RI_RECV_WR;
 460                 wqe->recv.r1 = 0;
 461                 wqe->recv.wrid = qhp->wq.rq.pidx;
 462                 wqe->recv.r2[0] = 0;
 463                 wqe->recv.r2[1] = 0;
 464                 wqe->recv.r2[2] = 0;
 465                 wqe->recv.len16 = len16;
 466                 PDBG("%s cookie 0x%llx pidx %u\n", __func__,
 467                      (unsigned long long) wr->wr_id, qhp->wq.rq.pidx);
 468                 copy_wr_to_rq(&qhp->wq, wqe, len16);
 469                 t4_rq_produce(&qhp->wq, len16);
 470                 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
 471                 wr = wr->next;
 472                 num_wrs--;
 473         }
 474         if (t4_wq_db_enabled(&qhp->wq))
 475                 t4_ring_rq_db(&qhp->wq, idx, dev_is_t4(qhp->rhp),
 476                               len16, wqe);
 477         else
 478                 ring_kernel_db(qhp, qhp->wq.rq.qid, idx);
 479         qhp->wq.rq.queue[qhp->wq.rq.size].status.host_wq_pidx = \
 480                         (qhp->wq.rq.wq_pidx);
 481         pthread_spin_unlock(&qhp->lock);
 482         return err;
 483 }
 484
 485 static void update_qp_state(struct c4iw_qp *qhp)
 486 {
 487         struct ibv_query_qp cmd;
 488         struct ibv_qp_attr attr;
 489         struct ibv_qp_init_attr iattr;
 490         int ret;
 491
 492         ret = ibv_cmd_query_qp(&qhp->ibv_qp, &attr, IBV_QP_STATE, &iattr,
 493                                &cmd, sizeof cmd);
 494         assert(!ret);
 495         if (!ret)
 496                 qhp->ibv_qp.state = attr.qp_state;
 497 }
 498
 499 /*
 500  * Assumes qhp lock is held.
 501  */
 502 void c4iw_flush_qp(struct c4iw_qp *qhp)
 503 {
 504         struct c4iw_cq *rchp, *schp;
 505         int count;
 506
 507         if (qhp->wq.flushed)
 508                 return;
 509
 510         update_qp_state(qhp);
 511
 512         rchp = to_c4iw_cq(qhp->ibv_qp.recv_cq);
 513         schp = to_c4iw_cq(qhp->ibv_qp.send_cq);
 514
 515         PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
 516         qhp->wq.flushed = 1;
 517         pthread_spin_unlock(&qhp->lock);
 518
 519         /* locking heirarchy: cq lock first, then qp lock. */
 520         pthread_spin_lock(&rchp->lock);
 521         pthread_spin_lock(&qhp->lock);
 522         c4iw_flush_hw_cq(rchp);
 523         c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
 524         c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
 525         pthread_spin_unlock(&qhp->lock);
 526         pthread_spin_unlock(&rchp->lock);
 527
 528         /* locking heirarchy: cq lock first, then qp lock. */
 529         pthread_spin_lock(&schp->lock);
 530         pthread_spin_lock(&qhp->lock);
 531         if (schp != rchp)
 532                 c4iw_flush_hw_cq(schp);
 533         c4iw_flush_sq(qhp);
 534         pthread_spin_unlock(&qhp->lock);
 535         pthread_spin_unlock(&schp->lock);
 536         pthread_spin_lock(&qhp->lock);
 537 }
 538
 539 void c4iw_flush_qps(struct c4iw_dev *dev)
 540 {
 541         int i;
 542
 543         pthread_spin_lock(&dev->lock);
 544         for (i=0; i < dev->max_qp; i++) {
 545                 struct c4iw_qp *qhp = dev->qpid2ptr[i];
 546                 if (qhp) {
 547                         if (!qhp->wq.flushed && t4_wq_in_error(&qhp->wq)) {
 548                                 pthread_spin_lock(&qhp->lock);
 549                                 c4iw_flush_qp(qhp);
 550                                 pthread_spin_unlock(&qhp->lock);
 551                         }
 552                 }
 553         }
 554         pthread_spin_unlock(&dev->lock);
 555 }