2 /**************************************************************************
4 Copyright (c) 2007, Chelsio Inc.
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
13 2. Neither the name of the Chelsio Corporation nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
29 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
44 #include <sys/ioccom.h>
46 #include <sys/mutex.h>
47 #include <sys/rwlock.h>
48 #include <sys/linker.h>
49 #include <sys/firmware.h>
50 #include <sys/socket.h>
51 #include <sys/sockio.h>
53 #include <sys/sysctl.h>
54 #include <sys/syslog.h>
55 #include <sys/queue.h>
56 #include <sys/taskqueue.h>
58 #include <sys/queue.h>
60 #include <netinet/in.h>
62 #include <contrib/rdma/ib_verbs.h>
63 #include <contrib/rdma/ib_umem.h>
64 #include <contrib/rdma/ib_user_verbs.h>
68 #include <cxgb_include.h>
69 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
70 #include <ulp/iw_cxgb/iw_cxgb_hal.h>
71 #include <ulp/iw_cxgb/iw_cxgb_provider.h>
72 #include <ulp/iw_cxgb/iw_cxgb_cm.h>
73 #include <ulp/iw_cxgb/iw_cxgb.h>
74 #include <ulp/iw_cxgb/iw_cxgb_resource.h>
75 #include <ulp/iw_cxgb/iw_cxgb_user.h>
77 #include <dev/cxgb/cxgb_include.h>
78 #include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h>
79 #include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h>
80 #include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h>
81 #include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h>
82 #include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h>
83 #include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h>
84 #include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h>
90 static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
98 case IB_WR_SEND_WITH_IMM:
99 if (wr->send_flags & IB_SEND_SOLICITED)
100 wqe->send.rdmaop = T3_SEND_WITH_SE;
102 wqe->send.rdmaop = T3_SEND;
103 wqe->send.rem_stag = 0;
105 #if 0 /* Not currently supported */
106 case TYPE_SEND_INVALIDATE:
107 case TYPE_SEND_INVALIDATE_IMMEDIATE:
108 wqe->send.rdmaop = T3_SEND_WITH_INV;
109 wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
111 case TYPE_SEND_SE_INVALIDATE:
112 wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
113 wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
119 if (wr->num_sge > T3_MAX_SGE)
121 wqe->send.reserved[0] = 0;
122 wqe->send.reserved[1] = 0;
123 wqe->send.reserved[2] = 0;
124 if (wr->opcode == IB_WR_SEND_WITH_IMM) {
126 wqe->send.sgl[0].stag = wr->imm_data;
127 wqe->send.sgl[0].len = 0;
128 wqe->send.num_sgle = 0;
132 for (i = 0; i < wr->num_sge; i++) {
133 if ((plen + wr->sg_list[i].length) < plen) {
136 plen += wr->sg_list[i].length;
137 wqe->send.sgl[i].stag =
138 htobe32(wr->sg_list[i].lkey);
139 wqe->send.sgl[i].len =
140 htobe32(wr->sg_list[i].length);
141 wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr);
143 wqe->send.num_sgle = htobe32(wr->num_sge);
144 *flit_cnt = 4 + ((wr->num_sge) << 1);
146 wqe->send.plen = htobe32(plen);
150 static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
156 if (wr->num_sge > T3_MAX_SGE)
158 wqe->write.rdmaop = T3_RDMA_WRITE;
159 wqe->write.reserved[0] = 0;
160 wqe->write.reserved[1] = 0;
161 wqe->write.reserved[2] = 0;
162 wqe->write.stag_sink = htobe32(wr->wr.rdma.rkey);
163 wqe->write.to_sink = htobe64(wr->wr.rdma.remote_addr);
165 if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
167 wqe->write.sgl[0].stag = wr->imm_data;
168 wqe->write.sgl[0].len = 0;
169 wqe->write.num_sgle = 0;
173 for (i = 0; i < wr->num_sge; i++) {
174 if ((plen + wr->sg_list[i].length) < plen) {
177 plen += wr->sg_list[i].length;
178 wqe->write.sgl[i].stag =
179 htobe32(wr->sg_list[i].lkey);
180 wqe->write.sgl[i].len =
181 htobe32(wr->sg_list[i].length);
182 wqe->write.sgl[i].to =
183 htobe64(wr->sg_list[i].addr);
185 wqe->write.num_sgle = htobe32(wr->num_sge);
186 *flit_cnt = 5 + ((wr->num_sge) << 1);
188 wqe->write.plen = htobe32(plen);
192 static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
197 wqe->read.rdmaop = T3_READ_REQ;
198 wqe->read.reserved[0] = 0;
199 wqe->read.reserved[1] = 0;
200 wqe->read.reserved[2] = 0;
201 wqe->read.rem_stag = htobe32(wr->wr.rdma.rkey);
202 wqe->read.rem_to = htobe64(wr->wr.rdma.remote_addr);
203 wqe->read.local_stag = htobe32(wr->sg_list[0].lkey);
204 wqe->read.local_len = htobe32(wr->sg_list[0].length);
205 wqe->read.local_to = htobe64(wr->sg_list[0].addr);
206 *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
211 * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
213 static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
214 u32 num_sgle, u32 * pbl_addr, u8 * page_size)
219 for (i = 0; i < num_sgle; i++) {
221 mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
223 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
226 if (!mhp->attr.state) {
227 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
230 if (mhp->attr.zbva) {
231 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
235 if (sg_list[i].addr < mhp->attr.va_fbo) {
236 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
239 if (sg_list[i].addr + ((u64) sg_list[i].length) <
241 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
244 if (sg_list[i].addr + ((u64) sg_list[i].length) >
245 mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
246 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
249 offset = sg_list[i].addr - mhp->attr.va_fbo;
250 offset += ((u32) mhp->attr.va_fbo) %
251 (1UL << (12 + mhp->attr.page_size));
252 pbl_addr[i] = ((mhp->attr.pbl_addr -
253 rhp->rdev.rnic_info.pbl_base) >> 3) +
254 (offset >> (12 + mhp->attr.page_size));
255 page_size[i] = mhp->attr.page_size;
260 static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe,
261 struct ib_recv_wr *wr)
264 if (wr->num_sge > T3_MAX_SGE)
266 wqe->recv.num_sgle = htobe32(wr->num_sge);
267 for (i = 0; i < wr->num_sge; i++) {
268 wqe->recv.sgl[i].stag = htobe32(wr->sg_list[i].lkey);
269 wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
270 wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr);
272 for (; i < T3_MAX_SGE; i++) {
273 wqe->recv.sgl[i].stag = 0;
274 wqe->recv.sgl[i].len = 0;
275 wqe->recv.sgl[i].to = 0;
280 int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
281 struct ib_send_wr **bad_wr)
284 u8 t3_wr_flit_cnt = 0;
285 enum t3_wr_opcode t3_wr_opcode = 0;
286 enum t3_wr_flags t3_wr_flags;
293 qhp = to_iwch_qp(ibqp);
294 mtx_lock(&qhp->lock);
295 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
296 mtx_unlock(&qhp->lock);
299 num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
300 qhp->wq.sq_size_log2);
302 mtx_unlock(&qhp->lock);
311 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
312 wqe = (union t3_wr *) (qhp->wq.queue + idx);
314 if (wr->send_flags & IB_SEND_SOLICITED)
315 t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
316 if (wr->send_flags & IB_SEND_FENCE)
317 t3_wr_flags |= T3_READ_FENCE_FLAG;
318 if (wr->send_flags & IB_SEND_SIGNALED)
319 t3_wr_flags |= T3_COMPLETION_FLAG;
321 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
322 switch (wr->opcode) {
324 case IB_WR_SEND_WITH_IMM:
325 t3_wr_opcode = T3_WR_SEND;
326 err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
328 case IB_WR_RDMA_WRITE:
329 case IB_WR_RDMA_WRITE_WITH_IMM:
330 t3_wr_opcode = T3_WR_WRITE;
331 err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
333 case IB_WR_RDMA_READ:
334 t3_wr_opcode = T3_WR_READ;
335 t3_wr_flags = 0; /* T3 reads are always signaled */
336 err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
339 sqp->read_len = wqe->read.local_len;
340 if (!qhp->wq.oldest_read)
341 qhp->wq.oldest_read = sqp;
344 CTR2(KTR_IW_CXGB, "%s post of type=%d TBD!", __FUNCTION__,
352 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
353 sqp->wr_id = wr->wr_id;
354 sqp->opcode = wr2opcode(t3_wr_opcode);
355 sqp->sq_wptr = qhp->wq.sq_wptr;
357 sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
359 build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
360 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
362 CTR5(KTR_IW_CXGB, "%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d",
363 __FUNCTION__, (unsigned long long) wr->wr_id, idx,
364 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
371 mtx_unlock(&qhp->lock);
372 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
376 int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
377 struct ib_recv_wr **bad_wr)
385 qhp = to_iwch_qp(ibqp);
386 mtx_lock(&qhp->lock);
387 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
388 mtx_unlock(&qhp->lock);
391 num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
392 qhp->wq.rq_size_log2) - 1;
394 mtx_unlock(&qhp->lock);
398 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
399 wqe = (union t3_wr *) (qhp->wq.queue + idx);
401 err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
408 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
410 build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
411 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
412 0, sizeof(struct t3_receive_wr) >> 3);
413 CTR6(KTR_IW_CXGB, "%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
414 "wqe %p ", __FUNCTION__, (unsigned long long) wr->wr_id,
415 idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
421 mtx_unlock(&qhp->lock);
422 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
426 int iwch_bind_mw(struct ib_qp *qp,
428 struct ib_mw_bind *mw_bind)
430 struct iwch_dev *rhp;
439 enum t3_wr_flags t3_wr_flags;
443 qhp = to_iwch_qp(qp);
444 mhp = to_iwch_mw(mw);
447 mtx_lock(&qhp->lock);
448 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
449 mtx_unlock(&qhp->lock);
452 num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
453 qhp->wq.sq_size_log2);
454 if ((num_wrs) <= 0) {
455 mtx_unlock(&qhp->lock);
458 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
459 CTR4(KTR_IW_CXGB, "%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p", __FUNCTION__, idx,
461 wqe = (union t3_wr *) (qhp->wq.queue + idx);
464 if (mw_bind->send_flags & IB_SEND_SIGNALED)
465 t3_wr_flags = T3_COMPLETION_FLAG;
467 sgl.addr = mw_bind->addr;
468 sgl.lkey = mw_bind->mr->lkey;
469 sgl.length = mw_bind->length;
470 wqe->bind.reserved = 0;
471 wqe->bind.type = T3_VA_BASED_TO;
473 /* TBD: check perms */
474 wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags);
475 wqe->bind.mr_stag = htobe32(mw_bind->mr->lkey);
476 wqe->bind.mw_stag = htobe32(mw->rkey);
477 wqe->bind.mw_len = htobe32(mw_bind->length);
478 wqe->bind.mw_va = htobe64(mw_bind->addr);
479 err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
481 mtx_unlock(&qhp->lock);
484 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
485 sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
486 sqp->wr_id = mw_bind->wr_id;
487 sqp->opcode = T3_BIND_MW;
488 sqp->sq_wptr = qhp->wq.sq_wptr;
490 sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
491 wqe->bind.mr_pbl_addr = htobe32(pbl_addr);
492 wqe->bind.mr_pagesz = page_size;
493 wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id;
494 build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
495 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
496 sizeof(struct t3_bind_mw_wr) >> 3);
499 mtx_unlock(&qhp->lock);
501 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
506 static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
507 u8 *layer_type, u8 *ecode)
509 int status = TPT_ERR_INTERNAL_ERR;
516 status = CQE_STATUS(rsp_msg->cqe);
517 opcode = CQE_OPCODE(rsp_msg->cqe);
518 rqtype = RQ_TYPE(rsp_msg->cqe);
519 send_inv = (opcode == T3_SEND_WITH_INV) ||
520 (opcode == T3_SEND_WITH_SE_INV);
521 tagged = (opcode == T3_RDMA_WRITE) ||
522 (rqtype && (opcode == T3_READ_RESP));
528 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
529 *ecode = RDMAP_CANT_INV_STAG;
531 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
532 *ecode = RDMAP_INV_STAG;
536 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
537 if ((opcode == T3_SEND_WITH_INV) ||
538 (opcode == T3_SEND_WITH_SE_INV))
539 *ecode = RDMAP_CANT_INV_STAG;
541 *ecode = RDMAP_STAG_NOT_ASSOC;
544 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
545 *ecode = RDMAP_STAG_NOT_ASSOC;
548 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
549 *ecode = RDMAP_ACC_VIOL;
552 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
553 *ecode = RDMAP_TO_WRAP;
557 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
558 *ecode = DDPT_BASE_BOUNDS;
560 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
561 *ecode = RDMAP_BASE_BOUNDS;
564 case TPT_ERR_INVALIDATE_SHARED_MR:
565 case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
566 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
567 *ecode = RDMAP_CANT_INV_STAG;
570 case TPT_ERR_ECC_PSTAG:
571 case TPT_ERR_INTERNAL_ERR:
572 *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
575 case TPT_ERR_OUT_OF_RQE:
576 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
577 *ecode = DDPU_INV_MSN_NOBUF;
579 case TPT_ERR_PBL_ADDR_BOUND:
580 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
581 *ecode = DDPT_BASE_BOUNDS;
584 *layer_type = LAYER_MPA|DDP_LLP;
585 *ecode = MPA_CRC_ERR;
588 *layer_type = LAYER_MPA|DDP_LLP;
589 *ecode = MPA_MARKER_ERR;
591 case TPT_ERR_PDU_LEN_ERR:
592 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
593 *ecode = DDPU_MSG_TOOBIG;
595 case TPT_ERR_DDP_VERSION:
597 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
598 *ecode = DDPT_INV_VERS;
600 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
601 *ecode = DDPU_INV_VERS;
604 case TPT_ERR_RDMA_VERSION:
605 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
606 *ecode = RDMAP_INV_VERS;
609 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
610 *ecode = RDMAP_INV_OPCODE;
612 case TPT_ERR_DDP_QUEUE_NUM:
613 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
614 *ecode = DDPU_INV_QN;
617 case TPT_ERR_MSN_GAP:
618 case TPT_ERR_MSN_RANGE:
619 case TPT_ERR_IRD_OVERFLOW:
620 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
621 *ecode = DDPU_INV_MSN_RANGE;
624 *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
628 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
629 *ecode = DDPU_INV_MO;
632 *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
639 * This posts a TERMINATE with layer=RDMA, type=catastrophic.
641 int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
644 struct terminate_message *term;
647 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
648 m = m_gethdr(MT_DATA, M_NOWAIT);
650 log(LOG_ERR, "%s cannot send TERMINATE!\n", __FUNCTION__);
653 wqe = mtod(m, union t3_wr *);
654 m->m_len = m->m_pkthdr.len = 40;
656 wqe->send.rdmaop = T3_TERMINATE;
658 /* immediate data length */
659 wqe->send.plen = htonl(4);
661 /* immediate data starts here. */
662 term = (struct terminate_message *)wqe->send.sgl;
663 build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
664 wqe->send.wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_SEND) |
665 V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
666 wqe->send.wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(qhp->ep->hwtid));
668 m_set_priority(m, CPL_PRIORITY_DATA);
671 return cxgb_ofld_send(qhp->rhp->rdev.t3cdev_p, m);
675 * Assumes qhp lock is held.
677 static void __flush_qp(struct iwch_qp *qhp)
679 struct iwch_cq *rchp, *schp;
682 rchp = get_chp(qhp->rhp, qhp->attr.rcq);
683 schp = get_chp(qhp->rhp, qhp->attr.scq);
685 CTR4(KTR_IW_CXGB, "%s qhp %p rchp %p schp %p", __FUNCTION__, qhp, rchp, schp);
686 /* take a ref on the qhp since we must release the lock */
688 mtx_unlock(&qhp->lock);
690 /* locking heirarchy: cq lock first, then qp lock. */
691 mtx_lock(&rchp->lock);
692 mtx_lock(&qhp->lock);
693 cxio_flush_hw_cq(&rchp->cq);
694 cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
695 cxio_flush_rq(&qhp->wq, &rchp->cq, count);
696 mtx_unlock(&qhp->lock);
697 mtx_unlock(&rchp->lock);
698 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
700 /* locking heirarchy: cq lock first, then qp lock. */
701 mtx_lock(&schp->lock);
702 mtx_lock(&qhp->lock);
703 cxio_flush_hw_cq(&schp->cq);
704 cxio_count_scqes(&schp->cq, &qhp->wq, &count);
705 cxio_flush_sq(&qhp->wq, &schp->cq, count);
706 mtx_unlock(&qhp->lock);
707 mtx_unlock(&schp->lock);
708 (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
711 mtx_lock(&qhp->lock);
712 if (--qhp->refcnt == 0)
716 static void flush_qp(struct iwch_qp *qhp)
718 if (qhp->ibqp.uobject)
719 cxio_set_wq_in_error(&qhp->wq);
726 * Return non zero if at least one RECV was pre-posted.
728 static int rqes_posted(struct iwch_qp *qhp)
730 return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
733 static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
734 enum iwch_qp_attr_mask mask,
735 struct iwch_qp_attributes *attrs)
737 struct t3_rdma_init_attr init_attr;
740 init_attr.tid = qhp->ep->hwtid;
741 init_attr.qpid = qhp->wq.qpid;
742 init_attr.pdid = qhp->attr.pd;
743 init_attr.scqid = qhp->attr.scq;
744 init_attr.rcqid = qhp->attr.rcq;
745 init_attr.rq_addr = qhp->wq.rq_addr;
746 init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
747 init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
748 qhp->attr.mpa_attr.recv_marker_enabled |
749 (qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
750 (qhp->attr.mpa_attr.crc_enabled << 2);
753 * XXX - The IWCM doesn't quite handle getting these
754 * attrs set before going into RTS. For now, just turn
758 init_attr.qpcaps = qhp->attr.enableRdmaRead |
759 (qhp->attr.enableRdmaWrite << 1) |
760 (qhp->attr.enableBind << 2) |
761 (qhp->attr.enable_stag0_fastreg << 3) |
762 (qhp->attr.enable_stag0_fastreg << 4);
764 init_attr.qpcaps = 0x1f;
766 init_attr.tcp_emss = qhp->ep->emss;
767 init_attr.ord = qhp->attr.max_ord;
768 init_attr.ird = qhp->attr.max_ird;
769 init_attr.qp_dma_addr = qhp->wq.dma_addr;
770 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
771 init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
772 init_attr.irs = qhp->ep->rcv_seq;
773 CTR5(KTR_IW_CXGB, "%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
774 "flags 0x%x qpcaps 0x%x", __FUNCTION__,
775 init_attr.rq_addr, init_attr.rq_size,
776 init_attr.flags, init_attr.qpcaps);
777 ret = cxio_rdma_init(&rhp->rdev, &init_attr);
778 CTR2(KTR_IW_CXGB, "%s ret %d", __FUNCTION__, ret);
782 int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
783 enum iwch_qp_attr_mask mask,
784 struct iwch_qp_attributes *attrs,
788 struct iwch_qp_attributes newattr = qhp->attr;
793 struct iwch_ep *ep = NULL;
795 CTR6(KTR_IW_CXGB, "%s qhp %p qpid 0x%x ep %p state %d -> %d", __FUNCTION__,
796 qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
797 (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
799 mtx_lock(&qhp->lock);
801 /* Process attr changes if in IDLE */
802 if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
803 if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
807 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
808 newattr.enable_rdma_read = attrs->enable_rdma_read;
809 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
810 newattr.enable_rdma_write = attrs->enable_rdma_write;
811 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
812 newattr.enable_bind = attrs->enable_bind;
813 if (mask & IWCH_QP_ATTR_MAX_ORD) {
815 rhp->attr.max_rdma_read_qp_depth) {
819 newattr.max_ord = attrs->max_ord;
821 if (mask & IWCH_QP_ATTR_MAX_IRD) {
823 rhp->attr.max_rdma_reads_per_qp) {
827 newattr.max_ird = attrs->max_ird;
832 if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
834 if (qhp->attr.state == attrs->next_state)
837 switch (qhp->attr.state) {
838 case IWCH_QP_STATE_IDLE:
839 switch (attrs->next_state) {
840 case IWCH_QP_STATE_RTS:
841 if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
845 if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
849 qhp->attr.mpa_attr = attrs->mpa_attr;
850 qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
851 qhp->ep = qhp->attr.llp_stream_handle;
852 qhp->attr.state = IWCH_QP_STATE_RTS;
855 * Ref the endpoint here and deref when we
856 * disassociate the endpoint from the QP. This
857 * happens in CLOSING->IDLE transition or *->ERROR
860 get_ep(&qhp->ep->com);
861 mtx_unlock(&qhp->lock);
862 ret = rdma_init(rhp, qhp, mask, attrs);
863 mtx_lock(&qhp->lock);
867 case IWCH_QP_STATE_ERROR:
868 qhp->attr.state = IWCH_QP_STATE_ERROR;
876 case IWCH_QP_STATE_RTS:
877 switch (attrs->next_state) {
878 case IWCH_QP_STATE_CLOSING:
879 PANIC_IF(atomic_load_acq_int(&qhp->ep->com.refcount) < 2);
880 qhp->attr.state = IWCH_QP_STATE_CLOSING;
888 case IWCH_QP_STATE_TERMINATE:
889 qhp->attr.state = IWCH_QP_STATE_TERMINATE;
890 if (qhp->ibqp.uobject)
891 cxio_set_wq_in_error(&qhp->wq);
895 case IWCH_QP_STATE_ERROR:
896 qhp->attr.state = IWCH_QP_STATE_ERROR;
909 case IWCH_QP_STATE_CLOSING:
914 switch (attrs->next_state) {
915 case IWCH_QP_STATE_IDLE:
916 qhp->attr.state = IWCH_QP_STATE_IDLE;
917 qhp->attr.llp_stream_handle = NULL;
918 put_ep(&qhp->ep->com);
922 case IWCH_QP_STATE_ERROR:
930 case IWCH_QP_STATE_ERROR:
931 if (attrs->next_state != IWCH_QP_STATE_IDLE) {
936 if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
937 !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
941 qhp->attr.state = IWCH_QP_STATE_IDLE;
942 memset(&qhp->attr, 0, sizeof(qhp->attr));
944 case IWCH_QP_STATE_TERMINATE:
952 log(LOG_ERR, "%s in a bad state %d\n",
953 __FUNCTION__, qhp->attr.state);
960 CTR3(KTR_IW_CXGB, "%s disassociating ep %p qpid 0x%x", __FUNCTION__, qhp->ep,
963 /* disassociate the LLP connection */
964 qhp->attr.llp_stream_handle = NULL;
967 qhp->attr.state = IWCH_QP_STATE_ERROR;
973 mtx_unlock(&qhp->lock);
976 iwch_post_terminate(qhp, NULL);
979 * If disconnect is 1, then we need to initiate a disconnect
980 * on the EP. This can be a normal close (RTS->CLOSING) or
981 * an abnormal close (RTS/CLOSING->ERROR).
984 iwch_ep_disconnect(ep, abort, M_NOWAIT);
987 * If free is 1, then we've disassociated the EP from the QP
988 * and we need to dereference the EP.
993 CTR2(KTR_IW_CXGB, "%s exit state %d", __FUNCTION__, qhp->attr.state);
997 static int quiesce_qp(struct iwch_qp *qhp)
999 mtx_lock(&qhp->lock);
1000 iwch_quiesce_tid(qhp->ep);
1001 qhp->flags |= QP_QUIESCED;
1002 mtx_unlock(&qhp->lock);
1006 static int resume_qp(struct iwch_qp *qhp)
1008 mtx_lock(&qhp->lock);
1009 iwch_resume_tid(qhp->ep);
1010 qhp->flags &= ~QP_QUIESCED;
1011 mtx_lock(&qhp->lock);
1015 int iwch_quiesce_qps(struct iwch_cq *chp)
1018 struct iwch_qp *qhp;
1020 for (i=0; i < T3_MAX_NUM_QP; i++) {
1021 qhp = get_qhp(chp->rhp, i);
1024 if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
1028 if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
1034 int iwch_resume_qps(struct iwch_cq *chp)
1037 struct iwch_qp *qhp;
1039 for (i=0; i < T3_MAX_NUM_QP; i++) {
1040 qhp = get_qhp(chp->rhp, i);
1043 if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
1047 if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))