]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / sys / dev / cxgb / ulp / iw_cxgb / iw_cxgb_qp.c
1
2 /**************************************************************************
3
4 Copyright (c) 2007, Chelsio Inc.
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9
10  1. Redistributions of source code must retain the above copyright notice,
11     this list of conditions and the following disclaimer.
12
13  2. Neither the name of the Chelsio Corporation nor the names of its
14     contributors may be used to endorse or promote products derived from
15     this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28
29 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/rman.h>
44 #include <sys/ioccom.h>
45 #include <sys/mbuf.h>
46 #include <sys/mutex.h>
47 #include <sys/rwlock.h>
48 #include <sys/linker.h>
49 #include <sys/firmware.h>
50 #include <sys/socket.h>
51 #include <sys/sockio.h>
52 #include <sys/smp.h>
53 #include <sys/sysctl.h>
54 #include <sys/syslog.h>
55 #include <sys/queue.h>
56 #include <sys/taskqueue.h>
57 #include <sys/proc.h>
58 #include <sys/queue.h>
59
60 #include <netinet/in.h>
61
62 #include <contrib/rdma/ib_verbs.h>
63 #include <contrib/rdma/ib_umem.h>
64 #include <contrib/rdma/ib_user_verbs.h>
65
66 #include <cxgb_include.h>
67 #include <ulp/tom/cxgb_l2t.h>
68 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
69 #include <ulp/iw_cxgb/iw_cxgb_hal.h>
70 #include <ulp/iw_cxgb/iw_cxgb_provider.h>
71 #include <ulp/iw_cxgb/iw_cxgb_cm.h>
72 #include <ulp/iw_cxgb/iw_cxgb.h>
73 #include <ulp/iw_cxgb/iw_cxgb_resource.h>
74 #include <ulp/iw_cxgb/iw_cxgb_user.h>
75
76 #define NO_SUPPORT -1
77
78 static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
79                                 u8 * flit_cnt)
80 {
81         int i;
82         u32 plen;
83
84         switch (wr->opcode) {
85         case IB_WR_SEND:
86         case IB_WR_SEND_WITH_IMM:
87                 if (wr->send_flags & IB_SEND_SOLICITED)
88                         wqe->send.rdmaop = T3_SEND_WITH_SE;
89                 else
90                         wqe->send.rdmaop = T3_SEND;
91                 wqe->send.rem_stag = 0;
92                 break;
93 #if 0                           /* Not currently supported */
94         case TYPE_SEND_INVALIDATE:
95         case TYPE_SEND_INVALIDATE_IMMEDIATE:
96                 wqe->send.rdmaop = T3_SEND_WITH_INV;
97                 wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
98                 break;
99         case TYPE_SEND_SE_INVALIDATE:
100                 wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
101                 wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
102                 break;
103 #endif
104         default:
105                 break;
106         }
107         if (wr->num_sge > T3_MAX_SGE)
108                 return (-EINVAL);
109         wqe->send.reserved[0] = 0;
110         wqe->send.reserved[1] = 0;
111         wqe->send.reserved[2] = 0;
112         if (wr->opcode == IB_WR_SEND_WITH_IMM) {
113                 plen = 4;
114                 wqe->send.sgl[0].stag = wr->imm_data;
115                 wqe->send.sgl[0].len = 0;
116                 wqe->send.num_sgle = 0;
117                 *flit_cnt = 5;
118         } else {
119                 plen = 0;
120                 for (i = 0; i < wr->num_sge; i++) {
121                         if ((plen + wr->sg_list[i].length) < plen) {
122                                 return (-EMSGSIZE);
123                         }
124                         plen += wr->sg_list[i].length;
125                         wqe->send.sgl[i].stag =
126                             htobe32(wr->sg_list[i].lkey);
127                         wqe->send.sgl[i].len =
128                             htobe32(wr->sg_list[i].length);
129                         wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr);
130                 }
131                 wqe->send.num_sgle = htobe32(wr->num_sge);
132                 *flit_cnt = 4 + ((wr->num_sge) << 1);
133         }
134         wqe->send.plen = htobe32(plen);
135         return 0;
136 }
137
138 static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
139                                  u8 *flit_cnt)
140 {
141         int i;
142         u32 plen;
143         
144         if (wr->num_sge > T3_MAX_SGE)
145                 return (-EINVAL);
146         wqe->write.rdmaop = T3_RDMA_WRITE;
147         wqe->write.reserved[0] = 0;
148         wqe->write.reserved[1] = 0;
149         wqe->write.reserved[2] = 0;
150         wqe->write.stag_sink = htobe32(wr->wr.rdma.rkey);
151         wqe->write.to_sink = htobe64(wr->wr.rdma.remote_addr);
152
153         if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
154                 plen = 4;
155                 wqe->write.sgl[0].stag = wr->imm_data;
156                 wqe->write.sgl[0].len = 0;
157                 wqe->write.num_sgle = 0; 
158                 *flit_cnt = 6;
159         } else {
160                 plen = 0;
161                 for (i = 0; i < wr->num_sge; i++) {
162                         if ((plen + wr->sg_list[i].length) < plen) {
163                                 return (-EMSGSIZE);
164                         }
165                         plen += wr->sg_list[i].length;
166                         wqe->write.sgl[i].stag =
167                             htobe32(wr->sg_list[i].lkey);
168                         wqe->write.sgl[i].len =
169                             htobe32(wr->sg_list[i].length);
170                         wqe->write.sgl[i].to =
171                             htobe64(wr->sg_list[i].addr);
172                 }
173                 wqe->write.num_sgle = htobe32(wr->num_sge);
174                 *flit_cnt = 5 + ((wr->num_sge) << 1);
175         }
176         wqe->write.plen = htobe32(plen);
177         return 0;
178 }
179
180 static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
181                                 u8 *flit_cnt)
182 {
183         if (wr->num_sge > 1)
184                 return (-EINVAL);
185         wqe->read.rdmaop = T3_READ_REQ;
186         wqe->read.reserved[0] = 0;
187         wqe->read.reserved[1] = 0;
188         wqe->read.reserved[2] = 0;
189         wqe->read.rem_stag = htobe32(wr->wr.rdma.rkey);
190         wqe->read.rem_to = htobe64(wr->wr.rdma.remote_addr);
191         wqe->read.local_stag = htobe32(wr->sg_list[0].lkey);
192         wqe->read.local_len = htobe32(wr->sg_list[0].length);
193         wqe->read.local_to = htobe64(wr->sg_list[0].addr);
194         *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
195         return 0;
196 }
197
198 /*
199  * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
200  */
201 static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
202                             u32 num_sgle, u32 * pbl_addr, u8 * page_size)
203 {
204         int i;
205         struct iwch_mr *mhp;
206         u32 offset;
207         for (i = 0; i < num_sgle; i++) {
208
209                 mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
210                 if (!mhp) {
211                         CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
212                         return (-EIO);
213                 }
214                 if (!mhp->attr.state) {
215                         CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
216                         return (-EIO);
217                 }
218                 if (mhp->attr.zbva) {
219                         CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
220                         return (-EIO);
221                 }
222
223                 if (sg_list[i].addr < mhp->attr.va_fbo) {
224                         CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
225                         return (-EINVAL);
226                 }
227                 if (sg_list[i].addr + ((u64) sg_list[i].length) <
228                     sg_list[i].addr) {
229                         CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
230                         return (-EINVAL);
231                 }
232                 if (sg_list[i].addr + ((u64) sg_list[i].length) >
233                     mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
234                         CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
235                         return (-EINVAL);
236                 }
237                 offset = sg_list[i].addr - mhp->attr.va_fbo;
238                 offset += ((u32) mhp->attr.va_fbo) %
239                           (1UL << (12 + mhp->attr.page_size));
240                 pbl_addr[i] = ((mhp->attr.pbl_addr -
241                                 rhp->rdev.rnic_info.pbl_base) >> 3) +
242                               (offset >> (12 + mhp->attr.page_size));
243                 page_size[i] = mhp->attr.page_size;
244         }
245         return 0;
246 }
247
248 static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe,
249                                 struct ib_recv_wr *wr)
250 {
251         int i;
252         if (wr->num_sge > T3_MAX_SGE)
253                 return (-EINVAL);
254         wqe->recv.num_sgle = htobe32(wr->num_sge);
255         for (i = 0; i < wr->num_sge; i++) {
256                 wqe->recv.sgl[i].stag = htobe32(wr->sg_list[i].lkey);
257                 wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
258                 wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr);
259         }
260         for (; i < T3_MAX_SGE; i++) {
261                 wqe->recv.sgl[i].stag = 0;
262                 wqe->recv.sgl[i].len = 0;
263                 wqe->recv.sgl[i].to = 0;
264         }
265         return 0;
266 }
267
268 int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
269                       struct ib_send_wr **bad_wr)
270 {
271         int err = 0;
272         u8 t3_wr_flit_cnt = 0;
273         enum t3_wr_opcode t3_wr_opcode = 0;
274         enum t3_wr_flags t3_wr_flags;
275         struct iwch_qp *qhp;
276         u32 idx;
277         union t3_wr *wqe;
278         u32 num_wrs;
279         struct t3_swsq *sqp;
280
281         qhp = to_iwch_qp(ibqp);
282         mtx_lock(&qhp->lock);
283         if (qhp->attr.state > IWCH_QP_STATE_RTS) {
284                 mtx_unlock(&qhp->lock);
285                 return (-EINVAL);
286         }
287         num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
288                   qhp->wq.sq_size_log2);
289         if (num_wrs <= 0) {
290                 mtx_unlock(&qhp->lock);
291                 return (-ENOMEM);
292         }
293         while (wr) {
294                 if (num_wrs == 0) {
295                         err = -ENOMEM;
296                         *bad_wr = wr;
297                         break;
298                 }
299                 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
300                 wqe = (union t3_wr *) (qhp->wq.queue + idx);
301                 t3_wr_flags = 0;
302                 if (wr->send_flags & IB_SEND_SOLICITED)
303                         t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
304                 if (wr->send_flags & IB_SEND_FENCE)
305                         t3_wr_flags |= T3_READ_FENCE_FLAG;
306                 if (wr->send_flags & IB_SEND_SIGNALED)
307                         t3_wr_flags |= T3_COMPLETION_FLAG;
308                 sqp = qhp->wq.sq +
309                       Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
310                 switch (wr->opcode) {
311                 case IB_WR_SEND:
312                 case IB_WR_SEND_WITH_IMM:
313                         t3_wr_opcode = T3_WR_SEND;
314                         err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
315                         break;
316                 case IB_WR_RDMA_WRITE:
317                 case IB_WR_RDMA_WRITE_WITH_IMM:
318                         t3_wr_opcode = T3_WR_WRITE;
319                         err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
320                         break;
321                 case IB_WR_RDMA_READ:
322                         t3_wr_opcode = T3_WR_READ;
323                         t3_wr_flags = 0; /* T3 reads are always signaled */
324                         err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
325                         if (err)
326                                 break;
327                         sqp->read_len = wqe->read.local_len;
328                         if (!qhp->wq.oldest_read)
329                                 qhp->wq.oldest_read = sqp;
330                         break;
331                 default:
332                         CTR2(KTR_IW_CXGB, "%s post of type=%d TBD!", __FUNCTION__,
333                              wr->opcode);
334                         err = -EINVAL;
335                 }
336                 if (err) {
337                         *bad_wr = wr;
338                         break;
339                 }
340                 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
341                 sqp->wr_id = wr->wr_id;
342                 sqp->opcode = wr2opcode(t3_wr_opcode);
343                 sqp->sq_wptr = qhp->wq.sq_wptr;
344                 sqp->complete = 0;
345                 sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
346
347                 build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
348                                Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
349                                0, t3_wr_flit_cnt);
350                 CTR5(KTR_IW_CXGB, "%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d",
351                      __FUNCTION__, (unsigned long long) wr->wr_id, idx,
352                      Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
353                      sqp->opcode);
354                 wr = wr->next;
355                 num_wrs--;
356                 ++(qhp->wq.wptr);
357                 ++(qhp->wq.sq_wptr);
358         }
359         mtx_unlock(&qhp->lock);
360         ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
361         return err;
362 }
363
364 int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
365                       struct ib_recv_wr **bad_wr)
366 {
367         int err = 0;
368         struct iwch_qp *qhp;
369         u32 idx;
370         union t3_wr *wqe;
371         u32 num_wrs;
372
373         qhp = to_iwch_qp(ibqp);
374         mtx_lock(&qhp->lock);
375         if (qhp->attr.state > IWCH_QP_STATE_RTS) {
376                 mtx_unlock(&qhp->lock);
377                 return (-EINVAL);
378         }
379         num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
380                             qhp->wq.rq_size_log2) - 1;
381         if (!wr) {
382                 mtx_unlock(&qhp->lock);
383                 return (-EINVAL);
384         }
385         while (wr) {
386                 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
387                 wqe = (union t3_wr *) (qhp->wq.queue + idx);
388                 if (num_wrs)
389                         err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
390                 else
391                         err = -ENOMEM;
392                 if (err) {
393                         *bad_wr = wr;
394                         break;
395                 }
396                 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
397                         wr->wr_id;
398                 build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
399                                Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
400                                0, sizeof(struct t3_receive_wr) >> 3);
401                 CTR6(KTR_IW_CXGB, "%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
402                      "wqe %p ", __FUNCTION__, (unsigned long long) wr->wr_id,
403                      idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
404                 ++(qhp->wq.rq_wptr);
405                 ++(qhp->wq.wptr);
406                 wr = wr->next;
407                 num_wrs--;
408         }
409         mtx_unlock(&qhp->lock);
410         ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
411         return err;
412 }
413
414 int iwch_bind_mw(struct ib_qp *qp,
415                              struct ib_mw *mw,
416                              struct ib_mw_bind *mw_bind)
417 {
418         struct iwch_dev *rhp;
419         struct iwch_mw *mhp;
420         struct iwch_qp *qhp;
421         union t3_wr *wqe;
422         u32 pbl_addr;
423         u8 page_size;
424         u32 num_wrs;
425         struct ib_sge sgl;
426         int err=0;
427         enum t3_wr_flags t3_wr_flags;
428         u32 idx;
429         struct t3_swsq *sqp;
430
431         qhp = to_iwch_qp(qp);
432         mhp = to_iwch_mw(mw);
433         rhp = qhp->rhp;
434
435         mtx_lock(&qhp->lock);
436         if (qhp->attr.state > IWCH_QP_STATE_RTS) {
437                 mtx_unlock(&qhp->lock);
438                 return (-EINVAL);
439         }
440         num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
441                             qhp->wq.sq_size_log2);
442         if ((num_wrs) <= 0) {
443                 mtx_unlock(&qhp->lock);
444                 return (-ENOMEM);
445         }
446         idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
447         CTR4(KTR_IW_CXGB, "%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p", __FUNCTION__, idx,
448              mw, mw_bind);
449         wqe = (union t3_wr *) (qhp->wq.queue + idx);
450
451         t3_wr_flags = 0;
452         if (mw_bind->send_flags & IB_SEND_SIGNALED)
453                 t3_wr_flags = T3_COMPLETION_FLAG;
454
455         sgl.addr = mw_bind->addr;
456         sgl.lkey = mw_bind->mr->lkey;
457         sgl.length = mw_bind->length;
458         wqe->bind.reserved = 0;
459         wqe->bind.type = T3_VA_BASED_TO;
460
461         /* TBD: check perms */
462         wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags);
463         wqe->bind.mr_stag = htobe32(mw_bind->mr->lkey);
464         wqe->bind.mw_stag = htobe32(mw->rkey);
465         wqe->bind.mw_len = htobe32(mw_bind->length);
466         wqe->bind.mw_va = htobe64(mw_bind->addr);
467         err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
468         if (err) {
469                 mtx_unlock(&qhp->lock);
470                 return (err);
471         }
472         wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
473         sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
474         sqp->wr_id = mw_bind->wr_id;
475         sqp->opcode = T3_BIND_MW;
476         sqp->sq_wptr = qhp->wq.sq_wptr;
477         sqp->complete = 0;
478         sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
479         wqe->bind.mr_pbl_addr = htobe32(pbl_addr);
480         wqe->bind.mr_pagesz = page_size;
481         wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id;
482         build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
483                        Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
484                                 sizeof(struct t3_bind_mw_wr) >> 3);
485         ++(qhp->wq.wptr);
486         ++(qhp->wq.sq_wptr);
487         mtx_unlock(&qhp->lock);
488
489         ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
490
491         return err;
492 }
493
494 static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
495                                     u8 *layer_type, u8 *ecode)
496 {
497         int status = TPT_ERR_INTERNAL_ERR;
498         int tagged = 0;
499         int opcode = -1;
500         int rqtype = 0;
501         int send_inv = 0;
502
503         if (rsp_msg) {
504                 status = CQE_STATUS(rsp_msg->cqe);
505                 opcode = CQE_OPCODE(rsp_msg->cqe);
506                 rqtype = RQ_TYPE(rsp_msg->cqe);
507                 send_inv = (opcode == T3_SEND_WITH_INV) ||
508                            (opcode == T3_SEND_WITH_SE_INV);
509                 tagged = (opcode == T3_RDMA_WRITE) ||
510                          (rqtype && (opcode == T3_READ_RESP));
511         }
512
513         switch (status) {
514         case TPT_ERR_STAG:
515                 if (send_inv) {
516                         *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
517                         *ecode = RDMAP_CANT_INV_STAG;
518                 } else {
519                         *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
520                         *ecode = RDMAP_INV_STAG;
521                 }
522                 break;
523         case TPT_ERR_PDID:
524                 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
525                 if ((opcode == T3_SEND_WITH_INV) ||
526                     (opcode == T3_SEND_WITH_SE_INV))
527                         *ecode = RDMAP_CANT_INV_STAG;
528                 else
529                         *ecode = RDMAP_STAG_NOT_ASSOC;
530                 break;
531         case TPT_ERR_QPID:
532                 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
533                 *ecode = RDMAP_STAG_NOT_ASSOC;
534                 break;
535         case TPT_ERR_ACCESS:
536                 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
537                 *ecode = RDMAP_ACC_VIOL;
538                 break;
539         case TPT_ERR_WRAP:
540                 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
541                 *ecode = RDMAP_TO_WRAP;
542                 break;
543         case TPT_ERR_BOUND:
544                 if (tagged) {
545                         *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
546                         *ecode = DDPT_BASE_BOUNDS;
547                 } else {
548                         *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
549                         *ecode = RDMAP_BASE_BOUNDS;
550                 }
551                 break;
552         case TPT_ERR_INVALIDATE_SHARED_MR:
553         case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
554                 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
555                 *ecode = RDMAP_CANT_INV_STAG;
556                 break;
557         case TPT_ERR_ECC:
558         case TPT_ERR_ECC_PSTAG:
559         case TPT_ERR_INTERNAL_ERR:
560                 *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
561                 *ecode = 0;
562                 break;
563         case TPT_ERR_OUT_OF_RQE:
564                 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
565                 *ecode = DDPU_INV_MSN_NOBUF;
566                 break;
567         case TPT_ERR_PBL_ADDR_BOUND:
568                 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
569                 *ecode = DDPT_BASE_BOUNDS;
570                 break;
571         case TPT_ERR_CRC:
572                 *layer_type = LAYER_MPA|DDP_LLP;
573                 *ecode = MPA_CRC_ERR;
574                 break;
575         case TPT_ERR_MARKER:
576                 *layer_type = LAYER_MPA|DDP_LLP;
577                 *ecode = MPA_MARKER_ERR;
578                 break;
579         case TPT_ERR_PDU_LEN_ERR:
580                 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
581                 *ecode = DDPU_MSG_TOOBIG;
582                 break;
583         case TPT_ERR_DDP_VERSION:
584                 if (tagged) {
585                         *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
586                         *ecode = DDPT_INV_VERS;
587                 } else {
588                         *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
589                         *ecode = DDPU_INV_VERS;
590                 }
591                 break;
592         case TPT_ERR_RDMA_VERSION:
593                 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
594                 *ecode = RDMAP_INV_VERS;
595                 break;
596         case TPT_ERR_OPCODE:
597                 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
598                 *ecode = RDMAP_INV_OPCODE;
599                 break;
600         case TPT_ERR_DDP_QUEUE_NUM:
601                 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
602                 *ecode = DDPU_INV_QN;
603                 break;
604         case TPT_ERR_MSN:
605         case TPT_ERR_MSN_GAP:
606         case TPT_ERR_MSN_RANGE:
607         case TPT_ERR_IRD_OVERFLOW:
608                 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
609                 *ecode = DDPU_INV_MSN_RANGE;
610                 break;
611         case TPT_ERR_TBIT:
612                 *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
613                 *ecode = 0;
614                 break;
615         case TPT_ERR_MO:
616                 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
617                 *ecode = DDPU_INV_MO;
618                 break;
619         default:
620                 *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
621                 *ecode = 0;
622                 break;
623         }
624 }
625
626 /*
627  * This posts a TERMINATE with layer=RDMA, type=catastrophic.
628  */
629 int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
630 {
631         union t3_wr *wqe;
632         struct terminate_message *term;
633         struct mbuf *m;
634
635         CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
636         m = m_gethdr(MT_DATA, M_NOWAIT);
637         if (!m) {
638                 log(LOG_ERR, "%s cannot send TERMINATE!\n", __FUNCTION__);
639                 return (-ENOMEM);
640         }
641         wqe = mtod(m, union t3_wr *);
642         m->m_len = m->m_pkthdr.len = 40;
643         memset(wqe, 0, 40);
644         wqe->send.rdmaop = T3_TERMINATE;
645
646         /* immediate data length */
647         wqe->send.plen = htonl(4);
648
649         /* immediate data starts here. */
650         term = (struct terminate_message *)wqe->send.sgl;
651         build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
652         wqe->send.wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_SEND) |
653                 V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
654         wqe->send.wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(qhp->ep->hwtid));
655
656         m_set_priority(m, CPL_PRIORITY_DATA);
657         m_set_sgl(m, NULL);
658         m_set_sgllen(m, 0);
659         return cxgb_ofld_send(qhp->rhp->rdev.t3cdev_p, m);
660 }
661
662 /*
663  * Assumes qhp lock is held.
664  */
665 static void __flush_qp(struct iwch_qp *qhp)
666 {
667         struct iwch_cq *rchp, *schp;
668         int count;
669
670         rchp = get_chp(qhp->rhp, qhp->attr.rcq);
671         schp = get_chp(qhp->rhp, qhp->attr.scq);
672
673         CTR4(KTR_IW_CXGB, "%s qhp %p rchp %p schp %p", __FUNCTION__, qhp, rchp, schp);
674         /* take a ref on the qhp since we must release the lock */
675         qhp->refcnt++;
676         mtx_unlock(&qhp->lock);
677
678         /* locking heirarchy: cq lock first, then qp lock. */
679         mtx_lock(&rchp->lock);
680         mtx_lock(&qhp->lock);
681         cxio_flush_hw_cq(&rchp->cq);
682         cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
683         cxio_flush_rq(&qhp->wq, &rchp->cq, count);
684         mtx_unlock(&qhp->lock);
685         mtx_unlock(&rchp->lock);
686         (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
687
688         /* locking heirarchy: cq lock first, then qp lock. */
689         mtx_lock(&schp->lock);
690         mtx_lock(&qhp->lock);
691         cxio_flush_hw_cq(&schp->cq);
692         cxio_count_scqes(&schp->cq, &qhp->wq, &count);
693         cxio_flush_sq(&qhp->wq, &schp->cq, count);
694         mtx_unlock(&qhp->lock);
695         mtx_unlock(&schp->lock);
696         (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
697
698         /* deref */
699         mtx_lock(&qhp->lock);
700         if (--qhp->refcnt == 0)
701                 wakeup(qhp);
702 }
703
704 static void flush_qp(struct iwch_qp *qhp)
705 {
706         if (qhp->ibqp.uobject)
707                 cxio_set_wq_in_error(&qhp->wq);
708         else
709                 __flush_qp(qhp);
710 }
711
712
713 /*
714  * Return non zero if at least one RECV was pre-posted.
715  */
716 static int rqes_posted(struct iwch_qp *qhp)
717 {
718         return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
719 }
720
721 static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
722                                 enum iwch_qp_attr_mask mask,
723                                 struct iwch_qp_attributes *attrs)
724 {
725         struct t3_rdma_init_attr init_attr;
726         int ret;
727
728         init_attr.tid = qhp->ep->hwtid;
729         init_attr.qpid = qhp->wq.qpid;
730         init_attr.pdid = qhp->attr.pd;
731         init_attr.scqid = qhp->attr.scq;
732         init_attr.rcqid = qhp->attr.rcq;
733         init_attr.rq_addr = qhp->wq.rq_addr;
734         init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
735         init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
736                 qhp->attr.mpa_attr.recv_marker_enabled |
737                 (qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
738                 (qhp->attr.mpa_attr.crc_enabled << 2);
739
740         /*
741          * XXX - The IWCM doesn't quite handle getting these
742          * attrs set before going into RTS.  For now, just turn
743          * them on always...
744          */
745 #if 0
746         init_attr.qpcaps = qhp->attr.enableRdmaRead |
747                 (qhp->attr.enableRdmaWrite << 1) |
748                 (qhp->attr.enableBind << 2) |
749                 (qhp->attr.enable_stag0_fastreg << 3) |
750                 (qhp->attr.enable_stag0_fastreg << 4);
751 #else
752         init_attr.qpcaps = 0x1f;
753 #endif
754         init_attr.tcp_emss = qhp->ep->emss;
755         init_attr.ord = qhp->attr.max_ord;
756         init_attr.ird = qhp->attr.max_ird;
757         init_attr.qp_dma_addr = qhp->wq.dma_addr;
758         init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
759         init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
760         init_attr.irs = qhp->ep->rcv_seq;
761         CTR5(KTR_IW_CXGB, "%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
762              "flags 0x%x qpcaps 0x%x", __FUNCTION__,
763              init_attr.rq_addr, init_attr.rq_size,
764              init_attr.flags, init_attr.qpcaps);
765         ret = cxio_rdma_init(&rhp->rdev, &init_attr);
766         CTR2(KTR_IW_CXGB, "%s ret %d", __FUNCTION__, ret);
767         return ret;
768 }
769
770 int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
771                                 enum iwch_qp_attr_mask mask,
772                                 struct iwch_qp_attributes *attrs,
773                                 int internal)
774 {
775         int ret = 0;
776         struct iwch_qp_attributes newattr = qhp->attr;
777         int disconnect = 0;
778         int terminate = 0;
779         int abort = 0;
780         int free = 0;
781         struct iwch_ep *ep = NULL;
782
783         CTR6(KTR_IW_CXGB, "%s qhp %p qpid 0x%x ep %p state %d -> %d", __FUNCTION__,
784              qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
785              (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
786
787         mtx_lock(&qhp->lock);
788
789         /* Process attr changes if in IDLE */
790         if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
791                 if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
792                         ret = -EIO;
793                         goto out;
794                 }
795                 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
796                         newattr.enable_rdma_read = attrs->enable_rdma_read;
797                 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
798                         newattr.enable_rdma_write = attrs->enable_rdma_write;
799                 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
800                         newattr.enable_bind = attrs->enable_bind;
801                 if (mask & IWCH_QP_ATTR_MAX_ORD) {
802                         if (attrs->max_ord >
803                             rhp->attr.max_rdma_read_qp_depth) {
804                                 ret = -EINVAL;
805                                 goto out;
806                         }
807                         newattr.max_ord = attrs->max_ord;
808                 }
809                 if (mask & IWCH_QP_ATTR_MAX_IRD) {
810                         if (attrs->max_ird >
811                             rhp->attr.max_rdma_reads_per_qp) {
812                                 ret = -EINVAL;
813                                 goto out;
814                         }
815                         newattr.max_ird = attrs->max_ird;
816                 }
817                 qhp->attr = newattr;
818         }
819
820         if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
821                 goto out;
822         if (qhp->attr.state == attrs->next_state)
823                 goto out;
824
825         switch (qhp->attr.state) {
826         case IWCH_QP_STATE_IDLE:
827                 switch (attrs->next_state) {
828                 case IWCH_QP_STATE_RTS:
829                         if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
830                                 ret = -EINVAL;
831                                 goto out;
832                         }
833                         if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
834                                 ret = -EINVAL;
835                                 goto out;
836                         }
837                         qhp->attr.mpa_attr = attrs->mpa_attr;
838                         qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
839                         qhp->ep = qhp->attr.llp_stream_handle;
840                         qhp->attr.state = IWCH_QP_STATE_RTS;
841
842                         /*
843                          * Ref the endpoint here and deref when we
844                          * disassociate the endpoint from the QP.  This
845                          * happens in CLOSING->IDLE transition or *->ERROR
846                          * transition.
847                          */
848                         get_ep(&qhp->ep->com);
849                         mtx_unlock(&qhp->lock);
850                         ret = rdma_init(rhp, qhp, mask, attrs);
851                         mtx_lock(&qhp->lock);
852                         if (ret)
853                                 goto err;
854                         break;
855                 case IWCH_QP_STATE_ERROR:
856                         qhp->attr.state = IWCH_QP_STATE_ERROR;
857                         flush_qp(qhp);
858                         break;
859                 default:
860                         ret = -EINVAL;
861                         goto out;
862                 }
863                 break;
864         case IWCH_QP_STATE_RTS:
865                 switch (attrs->next_state) {
866                 case IWCH_QP_STATE_CLOSING:
867                         PANIC_IF(atomic_load_acq_int(&qhp->ep->com.refcount) < 2);
868                         qhp->attr.state = IWCH_QP_STATE_CLOSING;
869                         if (!internal) {
870                                 abort=0;
871                                 disconnect = 1;
872                                 ep = qhp->ep;
873                         }
874                         flush_qp(qhp);
875                         break;
876                 case IWCH_QP_STATE_TERMINATE:
877                         qhp->attr.state = IWCH_QP_STATE_TERMINATE;
878                         if (qhp->ibqp.uobject)
879                                 cxio_set_wq_in_error(&qhp->wq);
880                         if (!internal)
881                                 terminate = 1;
882                         break;
883                 case IWCH_QP_STATE_ERROR:
884                         qhp->attr.state = IWCH_QP_STATE_ERROR;
885                         if (!internal) {
886                                 abort=1;
887                                 disconnect = 1;
888                                 ep = qhp->ep;
889                         }
890                         goto err;
891                         break;
892                 default:
893                         ret = -EINVAL;
894                         goto out;
895                 }
896                 break;
897         case IWCH_QP_STATE_CLOSING:
898                 if (!internal) {
899                         ret = -EINVAL;
900                         goto out;
901                 }
902                 switch (attrs->next_state) {
903                         case IWCH_QP_STATE_IDLE:
904                                 qhp->attr.state = IWCH_QP_STATE_IDLE;
905                                 qhp->attr.llp_stream_handle = NULL;
906                                 put_ep(&qhp->ep->com);
907                                 qhp->ep = NULL;
908                                 wakeup(qhp);
909                                 break;
910                         case IWCH_QP_STATE_ERROR:
911                                 disconnect=1;
912                                 goto err;
913                         default:
914                                 ret = -EINVAL;
915                                 goto err;
916                 }
917                 break;
918         case IWCH_QP_STATE_ERROR:
919                 if (attrs->next_state != IWCH_QP_STATE_IDLE) {
920                         ret = -EINVAL;
921                         goto out;
922                 }
923
924                 if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
925                     !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
926                         ret = -EINVAL;
927                         goto out;
928                 }
929                 qhp->attr.state = IWCH_QP_STATE_IDLE;
930                 memset(&qhp->attr, 0, sizeof(qhp->attr));
931                 break;
932         case IWCH_QP_STATE_TERMINATE:
933                 if (!internal) {
934                         ret = -EINVAL;
935                         goto out;
936                 }
937                 goto err;
938                 break;
939         default:
940                 log(LOG_ERR, "%s in a bad state %d\n",
941                        __FUNCTION__, qhp->attr.state);
942                 ret = -EINVAL;
943                 goto err;
944                 break;
945         }
946         goto out;
947 err:
948         CTR3(KTR_IW_CXGB, "%s disassociating ep %p qpid 0x%x", __FUNCTION__, qhp->ep,
949              qhp->wq.qpid);
950
951         /* disassociate the LLP connection */
952         qhp->attr.llp_stream_handle = NULL;
953         ep = qhp->ep;
954         qhp->ep = NULL;
955         qhp->attr.state = IWCH_QP_STATE_ERROR;
956         free=1;
957         wakeup(qhp);
958         PANIC_IF(!ep);
959         flush_qp(qhp);
960 out:
961         mtx_unlock(&qhp->lock);
962
963         if (terminate)
964                 iwch_post_terminate(qhp, NULL);
965
966         /*
967          * If disconnect is 1, then we need to initiate a disconnect
968          * on the EP.  This can be a normal close (RTS->CLOSING) or
969          * an abnormal close (RTS/CLOSING->ERROR).
970          */
971         if (disconnect)
972                 iwch_ep_disconnect(ep, abort, M_NOWAIT);
973
974         /*
975          * If free is 1, then we've disassociated the EP from the QP
976          * and we need to dereference the EP.
977          */
978         if (free)
979                 put_ep(&ep->com);
980
981         CTR2(KTR_IW_CXGB, "%s exit state %d", __FUNCTION__, qhp->attr.state);
982         return ret;
983 }
984
985 static int quiesce_qp(struct iwch_qp *qhp)
986 {
987         mtx_lock(&qhp->lock);
988         iwch_quiesce_tid(qhp->ep);
989         qhp->flags |= QP_QUIESCED;
990         mtx_unlock(&qhp->lock);
991         return 0;
992 }
993
994 static int resume_qp(struct iwch_qp *qhp)
995 {
996         mtx_lock(&qhp->lock);
997         iwch_resume_tid(qhp->ep);
998         qhp->flags &= ~QP_QUIESCED;
999         mtx_lock(&qhp->lock);
1000         return 0;
1001 }
1002
1003 int iwch_quiesce_qps(struct iwch_cq *chp)
1004 {
1005         int i;
1006         struct iwch_qp *qhp;
1007
1008         for (i=0; i < T3_MAX_NUM_QP; i++) {
1009                 qhp = get_qhp(chp->rhp, i);
1010                 if (!qhp)
1011                         continue;
1012                 if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
1013                         quiesce_qp(qhp);
1014                         continue;
1015                 }
1016                 if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
1017                         quiesce_qp(qhp);
1018         }
1019         return 0;
1020 }
1021
1022 int iwch_resume_qps(struct iwch_cq *chp)
1023 {
1024         int i;
1025         struct iwch_qp *qhp;
1026
1027         for (i=0; i < T3_MAX_NUM_QP; i++) {
1028                 qhp = get_qhp(chp->rhp, i);
1029                 if (!qhp)
1030                         continue;
1031                 if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
1032                         resume_qp(qhp);
1033                         continue;
1034                 }
1035                 if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
1036                         resume_qp(qhp);
1037         }
1038         return 0;
1039 }
1040