]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/ofed/libcxgb4/qp.c
bhnd(9): Fix a few mandoc related issues
[FreeBSD/FreeBSD.git] / contrib / ofed / libcxgb4 / qp.c
1 /*
2  * Copyright (c) 2006-2016 Chelsio, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <config.h>
33
34 #include <assert.h>
35 #include <stdlib.h>
36 #include <pthread.h>
37 #include <string.h>
38 #include <stdio.h>
39 #include "libcxgb4.h"
40
41 #ifdef STATS
42 struct c4iw_stats c4iw_stats;
43 #endif
44
45 static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16)
46 {
47         void *src, *dst;
48         uintptr_t end;
49         int total, len;
50
51         src = &wqe->flits[0];
52         dst = &wq->sq.queue->flits[wq->sq.wq_pidx *
53             (T4_EQ_ENTRY_SIZE / sizeof(__be64))];
54         if (t4_sq_onchip(wq)) {
55                 len16 = align(len16, 4);
56
57                 /* In onchip mode the copy below will be made to WC memory and
58                  * could trigger DMA. In offchip mode the copy below only
59                  * queues the WQE, DMA cannot start until t4_ring_sq_db
60                  * happens */
61                 mmio_wc_start();
62         }
63
64         /* NOTE len16 cannot be large enough to write to the
65            same sq.queue memory twice in this loop */
66         total = len16 * 16;
67         end = (uintptr_t)&wq->sq.queue[wq->sq.size];
68         if (__predict_true((uintptr_t)dst + total <= end)) {
69                 /* Won't wrap around. */
70                 memcpy(dst, src, total);
71         } else {
72                 len = end - (uintptr_t)dst;
73                 memcpy(dst, src, len);
74                 memcpy(wq->sq.queue, src + len, total - len);
75         }
76
77         if (t4_sq_onchip(wq))
78                 mmio_flush_writes();
79 }
80
81 static void copy_wr_to_rq(struct t4_wq *wq, union t4_recv_wr *wqe, u8 len16)
82 {
83         void *src, *dst;
84         uintptr_t end;
85         int total, len;
86
87         src = &wqe->flits[0];
88         dst = &wq->rq.queue->flits[wq->rq.wq_pidx *
89             (T4_EQ_ENTRY_SIZE / sizeof(__be64))];
90
91         total = len16 * 16;
92         end = (uintptr_t)&wq->rq.queue[wq->rq.size];
93         if (__predict_true((uintptr_t)dst + total <= end)) {
94                 /* Won't wrap around. */
95                 memcpy(dst, src, total);
96         } else {
97                 len = end - (uintptr_t)dst;
98                 memcpy(dst, src, len);
99                 memcpy(wq->rq.queue, src + len, total - len);
100         }
101 }
102
103 static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
104                       struct ibv_send_wr *wr, int max, u32 *plenp)
105 {
106         u8 *dstp, *srcp;
107         u32 plen = 0;
108         int i;
109         int len;
110
111         dstp = (u8 *)immdp->data;
112         for (i = 0; i < wr->num_sge; i++) {
113                 if ((plen + wr->sg_list[i].length) > max)
114                         return -EMSGSIZE;
115                 srcp = (u8 *)(unsigned long)wr->sg_list[i].addr;
116                 plen += wr->sg_list[i].length;
117                 len = wr->sg_list[i].length;
118                 memcpy(dstp, srcp, len);
119                 dstp += len;
120                 srcp += len;
121         }
122         len = ROUND_UP(plen + 8, 16) - (plen + 8);
123         if (len)
124                 memset(dstp, 0, len);
125         immdp->op = FW_RI_DATA_IMMD;
126         immdp->r1 = 0;
127         immdp->r2 = 0;
128         immdp->immdlen = htobe32(plen);
129         *plenp = plen;
130         return 0;
131 }
132
133 static int build_isgl(struct fw_ri_isgl *isglp, struct ibv_sge *sg_list,
134                       int num_sge, u32 *plenp)
135 {
136         int i;
137         u32 plen = 0;
138         __be64 *flitp = (__be64 *)isglp->sge;
139
140         for (i = 0; i < num_sge; i++) {
141                 if ((plen + sg_list[i].length) < plen)
142                         return -EMSGSIZE;
143                 plen += sg_list[i].length;
144                 *flitp++ = htobe64(((u64)sg_list[i].lkey << 32) |
145                                      sg_list[i].length);
146                 *flitp++ = htobe64(sg_list[i].addr);
147         }
148         *flitp = 0;
149         isglp->op = FW_RI_DATA_ISGL;
150         isglp->r1 = 0;
151         isglp->nsge = htobe16(num_sge);
152         isglp->r2 = 0;
153         if (plenp)
154                 *plenp = plen;
155         return 0;
156 }
157
158 static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
159                            struct ibv_send_wr *wr, u8 *len16)
160 {
161         u32 plen;
162         int size;
163         int ret;
164
165         if (wr->num_sge > T4_MAX_SEND_SGE)
166                 return -EINVAL;
167         if (wr->send_flags & IBV_SEND_SOLICITED)
168                 wqe->send.sendop_pkd = htobe32(
169                         FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE));
170         else
171                 wqe->send.sendop_pkd = htobe32(
172                         FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND));
173         wqe->send.stag_inv = 0;
174         wqe->send.r3 = 0;
175         wqe->send.r4 = 0;
176
177         plen = 0;
178         if (wr->num_sge) {
179                 if (wr->send_flags & IBV_SEND_INLINE) {
180                         ret = build_immd(sq, wqe->send.u.immd_src, wr,
181                                          T4_MAX_SEND_INLINE, &plen);
182                         if (ret)
183                                 return ret;
184                         size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
185                                plen;
186                 } else {
187                         ret = build_isgl(wqe->send.u.isgl_src,
188                                          wr->sg_list, wr->num_sge, &plen);
189                         if (ret)
190                                 return ret;
191                         size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
192                                wr->num_sge * sizeof (struct fw_ri_sge);
193                 }
194         } else {
195                 wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD;
196                 wqe->send.u.immd_src[0].r1 = 0;
197                 wqe->send.u.immd_src[0].r2 = 0;
198                 wqe->send.u.immd_src[0].immdlen = 0;
199                 size = sizeof wqe->send + sizeof(struct fw_ri_immd);
200                 plen = 0;
201         }
202         *len16 = DIV_ROUND_UP(size, 16);
203         wqe->send.plen = htobe32(plen);
204         return 0;
205 }
206
207 static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
208                             struct ibv_send_wr *wr, u8 *len16)
209 {
210         u32 plen;
211         int size;
212         int ret;
213
214         if (wr->num_sge > T4_MAX_SEND_SGE)
215                 return -EINVAL;
216         wqe->write.r2 = 0;
217         wqe->write.stag_sink = htobe32(wr->wr.rdma.rkey);
218         wqe->write.to_sink = htobe64(wr->wr.rdma.remote_addr);
219         if (wr->num_sge) {
220                 if (wr->send_flags & IBV_SEND_INLINE) {
221                         ret = build_immd(sq, wqe->write.u.immd_src, wr,
222                                          T4_MAX_WRITE_INLINE, &plen);
223                         if (ret)
224                                 return ret;
225                         size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
226                                plen;
227                 } else {
228                         ret = build_isgl(wqe->write.u.isgl_src,
229                                          wr->sg_list, wr->num_sge, &plen);
230                         if (ret)
231                                 return ret;
232                         size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
233                                wr->num_sge * sizeof (struct fw_ri_sge);
234                 }
235         } else {
236                 wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD;
237                 wqe->write.u.immd_src[0].r1 = 0;
238                 wqe->write.u.immd_src[0].r2 = 0;
239                 wqe->write.u.immd_src[0].immdlen = 0;
240                 size = sizeof wqe->write + sizeof(struct fw_ri_immd);
241                 plen = 0;
242         }
243         *len16 = DIV_ROUND_UP(size, 16);
244         wqe->write.plen = htobe32(plen);
245         return 0;
246 }
247
248 static int build_rdma_read(union t4_wr *wqe, struct ibv_send_wr *wr, u8 *len16)
249 {
250         if (wr->num_sge > 1)
251                 return -EINVAL;
252         if (wr->num_sge) {
253                 wqe->read.stag_src = htobe32(wr->wr.rdma.rkey);
254                 wqe->read.to_src_hi = htobe32((u32)(wr->wr.rdma.remote_addr >>32));
255                 wqe->read.to_src_lo = htobe32((u32)wr->wr.rdma.remote_addr);
256                 wqe->read.stag_sink = htobe32(wr->sg_list[0].lkey);
257                 wqe->read.plen = htobe32(wr->sg_list[0].length);
258                 wqe->read.to_sink_hi = htobe32((u32)(wr->sg_list[0].addr >> 32));
259                 wqe->read.to_sink_lo = htobe32((u32)(wr->sg_list[0].addr));
260         } else {
261                 wqe->read.stag_src = htobe32(2);
262                 wqe->read.to_src_hi = 0;
263                 wqe->read.to_src_lo = 0;
264                 wqe->read.stag_sink = htobe32(2);
265                 wqe->read.plen = 0;
266                 wqe->read.to_sink_hi = 0;
267                 wqe->read.to_sink_lo = 0;
268         }
269         wqe->read.r2 = 0;
270         wqe->read.r5 = 0;
271         *len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
272         return 0;
273 }
274
275 static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
276                            struct ibv_recv_wr *wr, u8 *len16)
277 {
278         int ret;
279
280         ret = build_isgl(&wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
281         if (ret)
282                 return ret;
283         *len16 = DIV_ROUND_UP(sizeof wqe->recv +
284                               wr->num_sge * sizeof(struct fw_ri_sge), 16);
285         return 0;
286 }
287
288 static void ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 idx)
289 {
290         struct ibv_modify_qp cmd = {};
291         struct ibv_qp_attr attr;
292         int mask;
293         int __attribute__((unused)) ret;
294
295         /* FIXME: Why do we need this barrier if the kernel is going to
296            trigger the DMA? */
297         udma_to_device_barrier();
298         if (qid == qhp->wq.sq.qid) {
299                 attr.sq_psn = idx;
300                 mask = IBV_QP_SQ_PSN;
301         } else  {
302                 attr.rq_psn = idx;
303                 mask = IBV_QP_RQ_PSN;
304         }
305         ret = ibv_cmd_modify_qp(&qhp->ibv_qp, &attr, mask, &cmd, sizeof cmd);
306         assert(!ret);
307 }
308
309 int c4iw_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
310                    struct ibv_send_wr **bad_wr)
311 {
312         int err = 0;
313         u8 len16 = 0;
314         enum fw_wr_opcodes fw_opcode;
315         enum fw_ri_wr_flags fw_flags;
316         struct c4iw_qp *qhp;
317         union t4_wr *wqe, lwqe;
318         u32 num_wrs;
319         struct t4_swsqe *swsqe;
320         u16 idx = 0;
321
322         qhp = to_c4iw_qp(ibqp);
323         pthread_spin_lock(&qhp->lock);
324         if (t4_wq_in_error(&qhp->wq)) {
325                 pthread_spin_unlock(&qhp->lock);
326                 *bad_wr = wr;
327                 return -EINVAL;
328         }
329         num_wrs = t4_sq_avail(&qhp->wq);
330         if (num_wrs == 0) {
331                 pthread_spin_unlock(&qhp->lock);
332                 *bad_wr = wr;
333                 return -ENOMEM;
334         }
335         while (wr) {
336                 if (num_wrs == 0) {
337                         err = -ENOMEM;
338                         *bad_wr = wr;
339                         break;
340                 }
341
342                 wqe = &lwqe;
343                 fw_flags = 0;
344                 if (wr->send_flags & IBV_SEND_SOLICITED)
345                         fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
346                 if (wr->send_flags & IBV_SEND_SIGNALED || qhp->sq_sig_all)
347                         fw_flags |= FW_RI_COMPLETION_FLAG;
348                 swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
349                 switch (wr->opcode) {
350                 case IBV_WR_SEND:
351                         INC_STAT(send);
352                         if (wr->send_flags & IBV_SEND_FENCE)
353                                 fw_flags |= FW_RI_READ_FENCE_FLAG;
354                         fw_opcode = FW_RI_SEND_WR;
355                         swsqe->opcode = FW_RI_SEND;
356                         err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
357                         break;
358                 case IBV_WR_RDMA_WRITE:
359                         INC_STAT(write);
360                         fw_opcode = FW_RI_RDMA_WRITE_WR;
361                         swsqe->opcode = FW_RI_RDMA_WRITE;
362                         err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16);
363                         break;
364                 case IBV_WR_RDMA_READ:
365                         INC_STAT(read);
366                         fw_opcode = FW_RI_RDMA_READ_WR;
367                         swsqe->opcode = FW_RI_READ_REQ;
368                         fw_flags = 0;
369                         err = build_rdma_read(wqe, wr, &len16);
370                         if (err)
371                                 break;
372                         swsqe->read_len = wr->sg_list ? wr->sg_list[0].length :
373                                           0;
374                         if (!qhp->wq.sq.oldest_read)
375                                 qhp->wq.sq.oldest_read = swsqe;
376                         break;
377                 default:
378                         PDBG("%s post of type=%d TBD!\n", __func__,
379                              wr->opcode);
380                         err = -EINVAL;
381                 }
382                 if (err) {
383                         *bad_wr = wr;
384                         break;
385                 }
386                 swsqe->idx = qhp->wq.sq.pidx;
387                 swsqe->complete = 0;
388                 swsqe->signaled = (wr->send_flags & IBV_SEND_SIGNALED) ||
389                                   qhp->sq_sig_all;
390                 swsqe->flushed = 0;
391                 swsqe->wr_id = wr->wr_id;
392
393                 init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
394                 PDBG("%s cookie 0x%llx pidx 0x%x opcode 0x%x\n",
395                      __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
396                      swsqe->opcode);
397                 wr = wr->next;
398                 num_wrs--;
399                 copy_wr_to_sq(&qhp->wq, wqe, len16);
400                 t4_sq_produce(&qhp->wq, len16);
401                 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
402         }
403         if (t4_wq_db_enabled(&qhp->wq)) {
404                 t4_ring_sq_db(&qhp->wq, idx, dev_is_t4(qhp->rhp),
405                               len16, wqe);
406         } else
407                 ring_kernel_db(qhp, qhp->wq.sq.qid, idx);
408         /* This write is only for debugging, the value does not matter for DMA
409          */
410         qhp->wq.sq.queue[qhp->wq.sq.size].status.host_wq_pidx = \
411                         (qhp->wq.sq.wq_pidx);
412
413         pthread_spin_unlock(&qhp->lock);
414         return err;
415 }
416
417 int c4iw_post_receive(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
418                            struct ibv_recv_wr **bad_wr)
419 {
420         int err = 0;
421         struct c4iw_qp *qhp;
422         union t4_recv_wr *wqe, lwqe;
423         u32 num_wrs;
424         u8 len16 = 0;
425         u16 idx = 0;
426
427         qhp = to_c4iw_qp(ibqp);
428         pthread_spin_lock(&qhp->lock);
429         if (t4_wq_in_error(&qhp->wq)) {
430                 pthread_spin_unlock(&qhp->lock);
431                 *bad_wr = wr;
432                 return -EINVAL;
433         }
434         INC_STAT(recv);
435         num_wrs = t4_rq_avail(&qhp->wq);
436         if (num_wrs == 0) {
437                 pthread_spin_unlock(&qhp->lock);
438                 *bad_wr = wr;
439                 return -ENOMEM;
440         }
441         while (wr) {
442                 if (wr->num_sge > T4_MAX_RECV_SGE) {
443                         err = -EINVAL;
444                         *bad_wr = wr;
445                         break;
446                 }
447                 wqe = &lwqe;
448                 if (num_wrs)
449                         err = build_rdma_recv(qhp, wqe, wr, &len16);
450                 else
451                         err = -ENOMEM;
452                 if (err) {
453                         *bad_wr = wr;
454                         break;
455                 }
456
457                 qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id;
458
459                 wqe->recv.opcode = FW_RI_RECV_WR;
460                 wqe->recv.r1 = 0;
461                 wqe->recv.wrid = qhp->wq.rq.pidx;
462                 wqe->recv.r2[0] = 0;
463                 wqe->recv.r2[1] = 0;
464                 wqe->recv.r2[2] = 0;
465                 wqe->recv.len16 = len16;
466                 PDBG("%s cookie 0x%llx pidx %u\n", __func__,
467                      (unsigned long long) wr->wr_id, qhp->wq.rq.pidx);
468                 copy_wr_to_rq(&qhp->wq, wqe, len16);
469                 t4_rq_produce(&qhp->wq, len16);
470                 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
471                 wr = wr->next;
472                 num_wrs--;
473         }
474         if (t4_wq_db_enabled(&qhp->wq))
475                 t4_ring_rq_db(&qhp->wq, idx, dev_is_t4(qhp->rhp),
476                               len16, wqe);
477         else
478                 ring_kernel_db(qhp, qhp->wq.rq.qid, idx);
479         qhp->wq.rq.queue[qhp->wq.rq.size].status.host_wq_pidx = \
480                         (qhp->wq.rq.wq_pidx);
481         pthread_spin_unlock(&qhp->lock);
482         return err;
483 }
484
485 static void update_qp_state(struct c4iw_qp *qhp)
486 {
487         struct ibv_query_qp cmd;
488         struct ibv_qp_attr attr;
489         struct ibv_qp_init_attr iattr;
490         int ret;
491
492         ret = ibv_cmd_query_qp(&qhp->ibv_qp, &attr, IBV_QP_STATE, &iattr,
493                                &cmd, sizeof cmd);
494         assert(!ret);
495         if (!ret)
496                 qhp->ibv_qp.state = attr.qp_state;
497 }
498
499 /*
500  * Assumes qhp lock is held.
501  */
502 void c4iw_flush_qp(struct c4iw_qp *qhp)
503 {
504         struct c4iw_cq *rchp, *schp;
505         int count;
506
507         if (qhp->wq.flushed)
508                 return;
509
510         update_qp_state(qhp);
511
512         rchp = to_c4iw_cq(qhp->ibv_qp.recv_cq);
513         schp = to_c4iw_cq(qhp->ibv_qp.send_cq);
514
515         PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
516         qhp->wq.flushed = 1;
517         pthread_spin_unlock(&qhp->lock);
518
519         /* locking heirarchy: cq lock first, then qp lock. */
520         pthread_spin_lock(&rchp->lock);
521         pthread_spin_lock(&qhp->lock);
522         c4iw_flush_hw_cq(rchp);
523         c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
524         c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
525         pthread_spin_unlock(&qhp->lock);
526         pthread_spin_unlock(&rchp->lock);
527
528         /* locking heirarchy: cq lock first, then qp lock. */
529         pthread_spin_lock(&schp->lock);
530         pthread_spin_lock(&qhp->lock);
531         if (schp != rchp)
532                 c4iw_flush_hw_cq(schp);
533         c4iw_flush_sq(qhp);
534         pthread_spin_unlock(&qhp->lock);
535         pthread_spin_unlock(&schp->lock);
536         pthread_spin_lock(&qhp->lock);
537 }
538
539 void c4iw_flush_qps(struct c4iw_dev *dev)
540 {
541         int i;
542
543         pthread_spin_lock(&dev->lock);
544         for (i=0; i < dev->max_qp; i++) {
545                 struct c4iw_qp *qhp = dev->qpid2ptr[i];
546                 if (qhp) {
547                         if (!qhp->wq.flushed && t4_wq_in_error(&qhp->wq)) {
548                                 pthread_spin_lock(&qhp->lock);
549                                 c4iw_flush_qp(qhp);
550                                 pthread_spin_unlock(&qhp->lock);
551                         }
552                 }
553         }
554         pthread_spin_unlock(&dev->lock);
555 }