1 /**************************************************************************
3 Copyright (c) 2007, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
39 #include <sys/pciio.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
45 #include <sys/ioccom.h>
47 #include <sys/mutex.h>
48 #include <sys/rwlock.h>
49 #include <sys/linker.h>
50 #include <sys/firmware.h>
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/syslog.h>
57 #include <sys/queue.h>
58 #include <sys/taskqueue.h>
60 #include <sys/queue.h>
62 #include <net/route.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/toecore.h>
66 #include <netinet/in_pcb.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip_var.h>
69 #include <netinet/tcp_var.h>
70 #include <netinet/tcp.h>
71 #include <netinet/tcpip.h>
73 #include <rdma/ib_verbs.h>
74 #include <rdma/ib_umem.h>
75 #include <rdma/ib_user_verbs.h>
76 #include <linux/idr.h>
77 #include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
79 #include <cxgb_include.h>
80 #include <ulp/tom/cxgb_l2t.h>
81 #include <ulp/tom/cxgb_toepcb.h>
82 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
83 #include <ulp/iw_cxgb/iw_cxgb_hal.h>
84 #include <ulp/iw_cxgb/iw_cxgb_provider.h>
85 #include <ulp/iw_cxgb/iw_cxgb_cm.h>
86 #include <ulp/iw_cxgb/iw_cxgb.h>
87 #include <ulp/iw_cxgb/iw_cxgb_resource.h>
88 #include <ulp/iw_cxgb/iw_cxgb_user.h>
92 static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
100 if (wr->send_flags & IB_SEND_SOLICITED)
101 wqe->send.rdmaop = T3_SEND_WITH_SE;
103 wqe->send.rdmaop = T3_SEND;
104 wqe->send.rem_stag = 0;
106 case IB_WR_SEND_WITH_IMM:
107 if (wr->send_flags & IB_SEND_SOLICITED)
108 wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
110 wqe->send.rdmaop = T3_SEND_WITH_INV;
111 wqe->send.rem_stag = 0;
116 if (wr->num_sge > T3_MAX_SGE)
118 wqe->send.reserved[0] = 0;
119 wqe->send.reserved[1] = 0;
120 wqe->send.reserved[2] = 0;
122 for (i = 0; i < wr->num_sge; i++) {
123 if ((plen + wr->sg_list[i].length) < plen) {
126 plen += wr->sg_list[i].length;
127 wqe->send.sgl[i].stag =
128 htobe32(wr->sg_list[i].lkey);
129 wqe->send.sgl[i].len =
130 htobe32(wr->sg_list[i].length);
131 wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr);
133 wqe->send.num_sgle = htobe32(wr->num_sge);
134 *flit_cnt = 4 + ((wr->num_sge) << 1);
135 wqe->send.plen = htobe32(plen);
139 static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
145 if (wr->num_sge > T3_MAX_SGE)
147 wqe->write.rdmaop = T3_RDMA_WRITE;
148 wqe->write.reserved[0] = 0;
149 wqe->write.reserved[1] = 0;
150 wqe->write.reserved[2] = 0;
151 wqe->write.stag_sink = htobe32(wr->wr.rdma.rkey);
152 wqe->write.to_sink = htobe64(wr->wr.rdma.remote_addr);
154 if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
156 wqe->write.sgl[0].stag = wr->ex.imm_data;
157 wqe->write.sgl[0].len = 0;
158 wqe->write.num_sgle = 0;
162 for (i = 0; i < wr->num_sge; i++) {
163 if ((plen + wr->sg_list[i].length) < plen) {
166 plen += wr->sg_list[i].length;
167 wqe->write.sgl[i].stag =
168 htobe32(wr->sg_list[i].lkey);
169 wqe->write.sgl[i].len =
170 htobe32(wr->sg_list[i].length);
171 wqe->write.sgl[i].to =
172 htobe64(wr->sg_list[i].addr);
174 wqe->write.num_sgle = htobe32(wr->num_sge);
175 *flit_cnt = 5 + ((wr->num_sge) << 1);
177 wqe->write.plen = htobe32(plen);
181 static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
186 wqe->read.rdmaop = T3_READ_REQ;
187 wqe->read.reserved[0] = 0;
188 wqe->read.reserved[1] = 0;
189 wqe->read.reserved[2] = 0;
190 wqe->read.rem_stag = htobe32(wr->wr.rdma.rkey);
191 wqe->read.rem_to = htobe64(wr->wr.rdma.remote_addr);
192 wqe->read.local_stag = htobe32(wr->sg_list[0].lkey);
193 wqe->read.local_len = htobe32(wr->sg_list[0].length);
194 wqe->read.local_to = htobe64(wr->sg_list[0].addr);
195 *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
199 static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
200 u32 num_sgle, u32 * pbl_addr, u8 * page_size)
205 for (i = 0; i < num_sgle; i++) {
207 mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
209 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
212 if (!mhp->attr.state) {
213 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
216 if (mhp->attr.zbva) {
217 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
221 if (sg_list[i].addr < mhp->attr.va_fbo) {
222 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
225 if (sg_list[i].addr + ((u64) sg_list[i].length) <
227 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
230 if (sg_list[i].addr + ((u64) sg_list[i].length) >
231 mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
232 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
235 offset = sg_list[i].addr - mhp->attr.va_fbo;
236 offset += mhp->attr.va_fbo &
237 ((1UL << (12 + mhp->attr.page_size)) - 1);
238 pbl_addr[i] = ((mhp->attr.pbl_addr -
239 rhp->rdev.rnic_info.pbl_base) >> 3) +
240 (offset >> (12 + mhp->attr.page_size));
241 page_size[i] = mhp->attr.page_size;
246 static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
247 struct ib_recv_wr *wr)
250 u32 pbl_addr[T3_MAX_SGE];
251 u8 page_size[T3_MAX_SGE];
253 if (wr->num_sge > T3_MAX_SGE)
257 err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr,
261 wqe->recv.pagesz[0] = page_size[0];
262 wqe->recv.pagesz[1] = page_size[1];
263 wqe->recv.pagesz[2] = page_size[2];
264 wqe->recv.pagesz[3] = page_size[3];
265 wqe->recv.num_sgle = htobe32(wr->num_sge);
267 for (i = 0; i < wr->num_sge; i++) {
268 wqe->recv.sgl[i].stag = htobe32(wr->sg_list[i].lkey);
269 wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
270 wqe->recv.sgl[i].to = htobe64(((u32)wr->sg_list[i].addr) &
271 ((1UL << (12 + page_size[i])) - 1));
272 /* pbl_addr is the adapters address in the PBL */
273 wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
275 for (; i < T3_MAX_SGE; i++) {
276 wqe->recv.sgl[i].stag = 0;
277 wqe->recv.sgl[i].len = 0;
278 wqe->recv.sgl[i].to = 0;
279 wqe->recv.pbl_addr[i] = 0;
282 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
283 qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
284 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
285 qhp->wq.rq_size_log2)].pbl_addr = 0;
290 static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
291 struct ib_recv_wr *wr)
299 * The T3 HW requires the PBL in the HW recv descriptor to reference
300 * a PBL entry. So we allocate the max needed PBL memory here and pass
301 * it to the uP in the recv WR. The uP will build the PBL and setup
302 * the HW recv descriptor.
304 pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE);
309 * Compute the 8B aligned offset.
311 pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3;
313 wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
315 for (i = 0; i < wr->num_sge; i++) {
318 * Use a 128MB page size. This and an imposed 128MB
319 * sge length limit allows us to require only a 2-entry HW
320 * PBL for each SGE. This restriction is acceptable since
321 * since it is not possible to allocate 128MB of contiguous
322 * DMA coherent memory!
324 if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN)
326 wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT;
329 * T3 restricts a recv to all zero-stag or all non-zero-stag.
331 if (wr->sg_list[i].lkey != 0)
333 wqe->recv.sgl[i].stag = 0;
334 wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
335 wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr);
336 wqe->recv.pbl_addr[i] = htobe32(pbl_offset);
339 for (; i < T3_MAX_SGE; i++) {
340 wqe->recv.pagesz[i] = 0;
341 wqe->recv.sgl[i].stag = 0;
342 wqe->recv.sgl[i].len = 0;
343 wqe->recv.sgl[i].to = 0;
344 wqe->recv.pbl_addr[i] = 0;
346 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
347 qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
348 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
349 qhp->wq.rq_size_log2)].pbl_addr = pbl_addr;
353 int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
354 struct ib_send_wr **bad_wr)
357 u8 t3_wr_flit_cnt = 0;
358 enum t3_wr_opcode t3_wr_opcode = 0;
359 enum t3_wr_flags t3_wr_flags;
366 qhp = to_iwch_qp(ibqp);
367 mtx_lock(&qhp->lock);
368 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
369 mtx_unlock(&qhp->lock);
373 num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
374 qhp->wq.sq_size_log2);
376 mtx_unlock(&qhp->lock);
385 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
386 wqe = (union t3_wr *) (qhp->wq.queue + idx);
388 if (wr->send_flags & IB_SEND_SOLICITED)
389 t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
390 if (wr->send_flags & IB_SEND_FENCE)
391 t3_wr_flags |= T3_READ_FENCE_FLAG;
392 if (wr->send_flags & IB_SEND_SIGNALED)
393 t3_wr_flags |= T3_COMPLETION_FLAG;
395 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
396 switch (wr->opcode) {
398 case IB_WR_SEND_WITH_IMM:
399 t3_wr_opcode = T3_WR_SEND;
400 err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
402 case IB_WR_RDMA_WRITE:
403 case IB_WR_RDMA_WRITE_WITH_IMM:
404 t3_wr_opcode = T3_WR_WRITE;
405 err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
407 case IB_WR_RDMA_READ:
408 t3_wr_opcode = T3_WR_READ;
409 t3_wr_flags = 0; /* T3 reads are always signaled */
410 err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
413 sqp->read_len = wqe->read.local_len;
414 if (!qhp->wq.oldest_read)
415 qhp->wq.oldest_read = sqp;
418 CTR2(KTR_IW_CXGB, "%s post of type=%d TBD!", __FUNCTION__,
425 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
426 sqp->wr_id = wr->wr_id;
427 sqp->opcode = wr2opcode(t3_wr_opcode);
428 sqp->sq_wptr = qhp->wq.sq_wptr;
430 sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
432 build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
433 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
435 CTR5(KTR_IW_CXGB, "%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d",
436 __FUNCTION__, (unsigned long long) wr->wr_id, idx,
437 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
444 mtx_unlock(&qhp->lock);
445 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
452 int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
453 struct ib_recv_wr **bad_wr)
461 qhp = to_iwch_qp(ibqp);
462 mtx_lock(&qhp->lock);
463 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
464 mtx_unlock(&qhp->lock);
468 num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
469 qhp->wq.rq_size_log2) - 1;
471 mtx_unlock(&qhp->lock);
477 if (wr->num_sge > T3_MAX_SGE) {
482 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
483 wqe = (union t3_wr *) (qhp->wq.queue + idx);
485 if (wr->sg_list[0].lkey)
486 err = build_rdma_recv(qhp, wqe, wr);
488 err = build_zero_stag_recv(qhp, wqe, wr);
494 build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
495 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
496 0, sizeof(struct t3_receive_wr) >> 3);
497 CTR6(KTR_IW_CXGB, "%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
498 "wqe %p ", __FUNCTION__, (unsigned long long) wr->wr_id,
499 idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
505 mtx_unlock(&qhp->lock);
506 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
513 int iwch_bind_mw(struct ib_qp *qp,
515 struct ib_mw_bind *mw_bind)
517 struct iwch_dev *rhp;
526 enum t3_wr_flags t3_wr_flags;
530 qhp = to_iwch_qp(qp);
531 mhp = to_iwch_mw(mw);
534 mtx_lock(&qhp->lock);
535 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
536 mtx_unlock(&qhp->lock);
539 num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
540 qhp->wq.sq_size_log2);
541 if ((num_wrs) == 0) {
542 mtx_unlock(&qhp->lock);
545 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
546 CTR4(KTR_IW_CXGB, "%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p", __FUNCTION__, idx,
548 wqe = (union t3_wr *) (qhp->wq.queue + idx);
551 if (mw_bind->send_flags & IB_SEND_SIGNALED)
552 t3_wr_flags = T3_COMPLETION_FLAG;
554 sgl.addr = mw_bind->addr;
555 sgl.lkey = mw_bind->mr->lkey;
556 sgl.length = mw_bind->length;
557 wqe->bind.reserved = 0;
558 wqe->bind.type = T3_VA_BASED_TO;
560 /* TBD: check perms */
561 wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags);
562 wqe->bind.mr_stag = htobe32(mw_bind->mr->lkey);
563 wqe->bind.mw_stag = htobe32(mw->rkey);
564 wqe->bind.mw_len = htobe32(mw_bind->length);
565 wqe->bind.mw_va = htobe64(mw_bind->addr);
566 err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
568 mtx_unlock(&qhp->lock);
571 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
572 sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
573 sqp->wr_id = mw_bind->wr_id;
574 sqp->opcode = T3_BIND_MW;
575 sqp->sq_wptr = qhp->wq.sq_wptr;
577 sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
578 wqe->bind.mr_pbl_addr = htobe32(pbl_addr);
579 wqe->bind.mr_pagesz = page_size;
580 wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id;
581 build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
582 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
583 sizeof(struct t3_bind_mw_wr) >> 3);
586 mtx_unlock(&qhp->lock);
588 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
593 static void build_term_codes(struct respQ_msg_t *rsp_msg,
594 u8 *layer_type, u8 *ecode)
596 int status = TPT_ERR_INTERNAL_ERR;
603 status = CQE_STATUS(rsp_msg->cqe);
604 opcode = CQE_OPCODE(rsp_msg->cqe);
605 rqtype = RQ_TYPE(rsp_msg->cqe);
606 send_inv = (opcode == T3_SEND_WITH_INV) ||
607 (opcode == T3_SEND_WITH_SE_INV);
608 tagged = (opcode == T3_RDMA_WRITE) ||
609 (rqtype && (opcode == T3_READ_RESP));
615 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
616 *ecode = RDMAP_CANT_INV_STAG;
618 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
619 *ecode = RDMAP_INV_STAG;
623 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
624 if ((opcode == T3_SEND_WITH_INV) ||
625 (opcode == T3_SEND_WITH_SE_INV))
626 *ecode = RDMAP_CANT_INV_STAG;
628 *ecode = RDMAP_STAG_NOT_ASSOC;
631 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
632 *ecode = RDMAP_STAG_NOT_ASSOC;
635 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
636 *ecode = RDMAP_ACC_VIOL;
639 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
640 *ecode = RDMAP_TO_WRAP;
644 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
645 *ecode = DDPT_BASE_BOUNDS;
647 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
648 *ecode = RDMAP_BASE_BOUNDS;
651 case TPT_ERR_INVALIDATE_SHARED_MR:
652 case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
653 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
654 *ecode = RDMAP_CANT_INV_STAG;
657 case TPT_ERR_ECC_PSTAG:
658 case TPT_ERR_INTERNAL_ERR:
659 *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
662 case TPT_ERR_OUT_OF_RQE:
663 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
664 *ecode = DDPU_INV_MSN_NOBUF;
666 case TPT_ERR_PBL_ADDR_BOUND:
667 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
668 *ecode = DDPT_BASE_BOUNDS;
671 *layer_type = LAYER_MPA|DDP_LLP;
672 *ecode = MPA_CRC_ERR;
675 *layer_type = LAYER_MPA|DDP_LLP;
676 *ecode = MPA_MARKER_ERR;
678 case TPT_ERR_PDU_LEN_ERR:
679 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
680 *ecode = DDPU_MSG_TOOBIG;
682 case TPT_ERR_DDP_VERSION:
684 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
685 *ecode = DDPT_INV_VERS;
687 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
688 *ecode = DDPU_INV_VERS;
691 case TPT_ERR_RDMA_VERSION:
692 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
693 *ecode = RDMAP_INV_VERS;
696 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
697 *ecode = RDMAP_INV_OPCODE;
699 case TPT_ERR_DDP_QUEUE_NUM:
700 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
701 *ecode = DDPU_INV_QN;
704 case TPT_ERR_MSN_GAP:
705 case TPT_ERR_MSN_RANGE:
706 case TPT_ERR_IRD_OVERFLOW:
707 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
708 *ecode = DDPU_INV_MSN_RANGE;
711 *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
715 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
716 *ecode = DDPU_INV_MO;
719 *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
726 * This posts a TERMINATE with layer=RDMA, type=catastrophic.
728 int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
731 struct terminate_message *term;
735 CTR3(KTR_IW_CXGB, "%s: tid %u, %p", __func__, qhp->ep->hwtid, rsp_msg);
736 m = m_gethdr(M_NOWAIT, MT_DATA);
738 log(LOG_ERR, "%s cannot send TERMINATE!\n", __FUNCTION__);
741 oh = mtod(m, struct ofld_hdr *);
742 m->m_pkthdr.len = m->m_len = sizeof(*oh) + 40;
743 oh->flags = V_HDR_NDESC(1) | V_HDR_CTRL(CPL_PRIORITY_DATA) | V_HDR_QSET(0);
744 wqe = (void *)(oh + 1);
746 wqe->send.rdmaop = T3_TERMINATE;
748 /* immediate data length */
749 wqe->send.plen = htonl(4);
751 /* immediate data starts here. */
752 term = (struct terminate_message *)wqe->send.sgl;
753 build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
754 wqe->send.wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_SEND) |
755 V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
756 wqe->send.wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(qhp->ep->hwtid));
758 return t3_offload_tx(qhp->rhp->rdev.adap, m);
762 * Assumes qhp lock is held.
764 static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
765 struct iwch_cq *schp)
770 CTR4(KTR_IW_CXGB, "%s qhp %p rchp %p schp %p", __FUNCTION__, qhp, rchp, schp);
771 /* take a ref on the qhp since we must release the lock */
773 mtx_unlock(&qhp->lock);
775 /* locking hierarchy: cq lock first, then qp lock. */
776 mtx_lock(&rchp->lock);
777 mtx_lock(&qhp->lock);
778 cxio_flush_hw_cq(&rchp->cq);
779 cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
780 flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
781 mtx_unlock(&qhp->lock);
782 mtx_unlock(&rchp->lock);
784 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
786 /* locking hierarchy: cq lock first, then qp lock. */
787 mtx_lock(&schp->lock);
788 mtx_lock(&qhp->lock);
789 cxio_flush_hw_cq(&schp->cq);
790 cxio_count_scqes(&schp->cq, &qhp->wq, &count);
791 flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
792 mtx_unlock(&qhp->lock);
793 mtx_unlock(&schp->lock);
795 (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
798 mtx_lock(&qhp->lock);
799 if (--qhp->refcnt == 0)
803 static void flush_qp(struct iwch_qp *qhp)
805 struct iwch_cq *rchp, *schp;
807 rchp = get_chp(qhp->rhp, qhp->attr.rcq);
808 schp = get_chp(qhp->rhp, qhp->attr.scq);
810 if (qhp->ibqp.uobject) {
811 cxio_set_wq_in_error(&qhp->wq);
812 cxio_set_cq_in_error(&rchp->cq);
813 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
815 cxio_set_cq_in_error(&schp->cq);
816 (*schp->ibcq.comp_handler)(&schp->ibcq,
817 schp->ibcq.cq_context);
821 __flush_qp(qhp, rchp, schp);
826 * Return non zero if at least one RECV was pre-posted.
828 static int rqes_posted(struct iwch_qp *qhp)
830 union t3_wr *wqe = qhp->wq.queue;
832 while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
839 static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
840 enum iwch_qp_attr_mask mask,
841 struct iwch_qp_attributes *attrs)
843 struct t3_rdma_init_attr init_attr;
845 struct socket *so = qhp->ep->com.so;
846 struct inpcb *inp = sotoinpcb(so);
850 init_attr.tid = qhp->ep->hwtid;
851 init_attr.qpid = qhp->wq.qpid;
852 init_attr.pdid = qhp->attr.pd;
853 init_attr.scqid = qhp->attr.scq;
854 init_attr.rcqid = qhp->attr.rcq;
855 init_attr.rq_addr = qhp->wq.rq_addr;
856 init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
857 init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
858 qhp->attr.mpa_attr.recv_marker_enabled |
859 (qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
860 (qhp->attr.mpa_attr.crc_enabled << 2);
862 init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE |
863 uP_RI_QP_RDMA_WRITE_ENABLE |
864 uP_RI_QP_BIND_ENABLE;
865 if (!qhp->ibqp.uobject)
866 init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE;
867 init_attr.tcp_emss = qhp->ep->emss;
868 init_attr.ord = qhp->attr.max_ord;
869 init_attr.ird = qhp->attr.max_ird;
870 init_attr.qp_dma_addr = qhp->wq.dma_addr;
871 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
872 init_attr.rqe_count = rqes_posted(qhp);
873 init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
874 init_attr.rtr_type = 0;
877 init_attr.chan = toep->tp_l2t->smt_idx;
878 init_attr.irs = qhp->ep->rcv_seq;
879 CTR5(KTR_IW_CXGB, "%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
880 "flags 0x%x qpcaps 0x%x", __FUNCTION__,
881 init_attr.rq_addr, init_attr.rq_size,
882 init_attr.flags, init_attr.qpcaps);
883 ret = cxio_rdma_init(&rhp->rdev, &init_attr, qhp->ep->com.so);
884 CTR2(KTR_IW_CXGB, "%s ret %d", __FUNCTION__, ret);
888 int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
889 enum iwch_qp_attr_mask mask,
890 struct iwch_qp_attributes *attrs,
894 struct iwch_qp_attributes newattr = qhp->attr;
899 struct iwch_ep *ep = NULL;
901 CTR6(KTR_IW_CXGB, "%s qhp %p qpid 0x%x ep %p state %d -> %d", __FUNCTION__,
902 qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
903 (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
905 mtx_lock(&qhp->lock);
907 /* Process attr changes if in IDLE */
908 if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
909 if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
913 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
914 newattr.enable_rdma_read = attrs->enable_rdma_read;
915 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
916 newattr.enable_rdma_write = attrs->enable_rdma_write;
917 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
918 newattr.enable_bind = attrs->enable_bind;
919 if (mask & IWCH_QP_ATTR_MAX_ORD) {
921 rhp->attr.max_rdma_read_qp_depth) {
925 newattr.max_ord = attrs->max_ord;
927 if (mask & IWCH_QP_ATTR_MAX_IRD) {
929 rhp->attr.max_rdma_reads_per_qp) {
933 newattr.max_ird = attrs->max_ird;
938 if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
940 if (qhp->attr.state == attrs->next_state)
943 switch (qhp->attr.state) {
944 case IWCH_QP_STATE_IDLE:
945 switch (attrs->next_state) {
946 case IWCH_QP_STATE_RTS:
947 if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
951 if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
955 qhp->attr.mpa_attr = attrs->mpa_attr;
956 qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
957 qhp->ep = qhp->attr.llp_stream_handle;
958 qhp->attr.state = IWCH_QP_STATE_RTS;
961 * Ref the endpoint here and deref when we
962 * disassociate the endpoint from the QP. This
963 * happens in CLOSING->IDLE transition or *->ERROR
966 get_ep(&qhp->ep->com);
967 mtx_unlock(&qhp->lock);
968 ret = rdma_init(rhp, qhp, mask, attrs);
969 mtx_lock(&qhp->lock);
973 case IWCH_QP_STATE_ERROR:
974 qhp->attr.state = IWCH_QP_STATE_ERROR;
982 case IWCH_QP_STATE_RTS:
983 switch (attrs->next_state) {
984 case IWCH_QP_STATE_CLOSING:
985 PANIC_IF(atomic_load_acq_int(&qhp->ep->com.refcount) < 2);
986 qhp->attr.state = IWCH_QP_STATE_CLOSING;
994 case IWCH_QP_STATE_TERMINATE:
995 qhp->attr.state = IWCH_QP_STATE_TERMINATE;
996 if (qhp->ibqp.uobject)
997 cxio_set_wq_in_error(&qhp->wq);
1001 case IWCH_QP_STATE_ERROR:
1002 qhp->attr.state = IWCH_QP_STATE_ERROR;
1016 case IWCH_QP_STATE_CLOSING:
1021 switch (attrs->next_state) {
1022 case IWCH_QP_STATE_IDLE:
1024 qhp->attr.state = IWCH_QP_STATE_IDLE;
1025 qhp->attr.llp_stream_handle = NULL;
1026 put_ep(&qhp->ep->com);
1030 case IWCH_QP_STATE_ERROR:
1037 case IWCH_QP_STATE_ERROR:
1038 if (attrs->next_state != IWCH_QP_STATE_IDLE) {
1043 if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
1044 !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
1048 qhp->attr.state = IWCH_QP_STATE_IDLE;
1049 memset(&qhp->attr, 0, sizeof(qhp->attr));
1051 case IWCH_QP_STATE_TERMINATE:
1059 log(LOG_ERR, "%s in a bad state %d\n",
1060 __FUNCTION__, qhp->attr.state);
1067 CTR3(KTR_IW_CXGB, "%s disassociating ep %p qpid 0x%x", __FUNCTION__, qhp->ep,
1070 /* disassociate the LLP connection */
1071 qhp->attr.llp_stream_handle = NULL;
1074 qhp->attr.state = IWCH_QP_STATE_ERROR;
1080 mtx_unlock(&qhp->lock);
1083 iwch_post_terminate(qhp, NULL);
1087 * If disconnect is 1, then we need to initiate a disconnect
1088 * on the EP. This can be a normal close (RTS->CLOSING) or
1089 * an abnormal close (RTS/CLOSING->ERROR).
1092 iwch_ep_disconnect(ep, abort, M_NOWAIT);
1097 * If free is 1, then we've disassociated the EP from the QP
1098 * and we need to dereference the EP.
1104 CTR2(KTR_IW_CXGB, "%s exit state %d", __FUNCTION__, qhp->attr.state);