2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #endif /* HAVE_CONFIG_H */
39 #include <netinet/in.h>
48 MTHCA_SEND_DOORBELL_FENCE = 1 << 5
51 static const uint8_t mthca_opcode[] = {
52 [IBV_WR_SEND] = MTHCA_OPCODE_SEND,
53 [IBV_WR_SEND_WITH_IMM] = MTHCA_OPCODE_SEND_IMM,
54 [IBV_WR_RDMA_WRITE] = MTHCA_OPCODE_RDMA_WRITE,
55 [IBV_WR_RDMA_WRITE_WITH_IMM] = MTHCA_OPCODE_RDMA_WRITE_IMM,
56 [IBV_WR_RDMA_READ] = MTHCA_OPCODE_RDMA_READ,
57 [IBV_WR_ATOMIC_CMP_AND_SWP] = MTHCA_OPCODE_ATOMIC_CS,
58 [IBV_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA,
61 static void *get_recv_wqe(struct mthca_qp *qp, int n)
63 return qp->buf.buf + (n << qp->rq.wqe_shift);
66 static void *get_send_wqe(struct mthca_qp *qp, int n)
68 return qp->buf.buf + qp->send_wqe_offset + (n << qp->sq.wqe_shift);
71 void mthca_init_qp_indices(struct mthca_qp *qp)
74 qp->sq.last_comp = qp->sq.max - 1;
77 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
80 qp->rq.last_comp = qp->rq.max - 1;
83 qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
86 static inline int wq_overflow(struct mthca_wq *wq, int nreq, struct mthca_cq *cq)
90 cur = wq->head - wq->tail;
91 if (cur + nreq < wq->max)
94 pthread_spin_lock(&cq->lock);
95 cur = wq->head - wq->tail;
96 pthread_spin_unlock(&cq->lock);
98 return cur + nreq >= wq->max;
101 int mthca_tavor_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
102 struct ibv_send_wr **bad_wr)
104 struct mthca_qp *qp = to_mqp(ibqp);
105 void *wqe, *prev_wqe;
113 * f0 and op0 cannot be used unless nreq > 0, which means this
114 * function makes it through the loop at least once. So the
115 * code inside the if (!size0) will be executed, and f0 and
116 * op0 will be initialized. So any gcc warning about "may be
117 * used unitialized" is bogus.
122 pthread_spin_lock(&qp->sq.lock);
124 ind = qp->sq.next_ind;
126 for (nreq = 0; wr; ++nreq, wr = wr->next) {
127 if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
133 wqe = get_send_wqe(qp, ind);
134 prev_wqe = qp->sq.last;
137 ((struct mthca_next_seg *) wqe)->nda_op = 0;
138 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
139 ((struct mthca_next_seg *) wqe)->flags =
140 ((wr->send_flags & IBV_SEND_SIGNALED) ?
141 htonl(MTHCA_NEXT_CQ_UPDATE) : 0) |
142 ((wr->send_flags & IBV_SEND_SOLICITED) ?
143 htonl(MTHCA_NEXT_SOLICIT) : 0) |
145 if (wr->opcode == IBV_WR_SEND_WITH_IMM ||
146 wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
147 ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
149 wqe += sizeof (struct mthca_next_seg);
150 size = sizeof (struct mthca_next_seg) / 16;
152 switch (ibqp->qp_type) {
154 switch (wr->opcode) {
155 case IBV_WR_ATOMIC_CMP_AND_SWP:
156 case IBV_WR_ATOMIC_FETCH_AND_ADD:
157 ((struct mthca_raddr_seg *) wqe)->raddr =
158 htonll(wr->wr.atomic.remote_addr);
159 ((struct mthca_raddr_seg *) wqe)->rkey =
160 htonl(wr->wr.atomic.rkey);
161 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
163 wqe += sizeof (struct mthca_raddr_seg);
165 if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) {
166 ((struct mthca_atomic_seg *) wqe)->swap_add =
167 htonll(wr->wr.atomic.swap);
168 ((struct mthca_atomic_seg *) wqe)->compare =
169 htonll(wr->wr.atomic.compare_add);
171 ((struct mthca_atomic_seg *) wqe)->swap_add =
172 htonll(wr->wr.atomic.compare_add);
173 ((struct mthca_atomic_seg *) wqe)->compare = 0;
176 wqe += sizeof (struct mthca_atomic_seg);
177 size += (sizeof (struct mthca_raddr_seg) +
178 sizeof (struct mthca_atomic_seg)) / 16;
181 case IBV_WR_RDMA_WRITE:
182 case IBV_WR_RDMA_WRITE_WITH_IMM:
183 case IBV_WR_RDMA_READ:
184 ((struct mthca_raddr_seg *) wqe)->raddr =
185 htonll(wr->wr.rdma.remote_addr);
186 ((struct mthca_raddr_seg *) wqe)->rkey =
187 htonl(wr->wr.rdma.rkey);
188 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
189 wqe += sizeof (struct mthca_raddr_seg);
190 size += sizeof (struct mthca_raddr_seg) / 16;
194 /* No extra segments required for sends */
201 switch (wr->opcode) {
202 case IBV_WR_RDMA_WRITE:
203 case IBV_WR_RDMA_WRITE_WITH_IMM:
204 ((struct mthca_raddr_seg *) wqe)->raddr =
205 htonll(wr->wr.rdma.remote_addr);
206 ((struct mthca_raddr_seg *) wqe)->rkey =
207 htonl(wr->wr.rdma.rkey);
208 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
209 wqe += sizeof (struct mthca_raddr_seg);
210 size += sizeof (struct mthca_raddr_seg) / 16;
214 /* No extra segments required for sends */
221 ((struct mthca_tavor_ud_seg *) wqe)->lkey =
222 htonl(to_mah(wr->wr.ud.ah)->key);
223 ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
224 htonll((uintptr_t) to_mah(wr->wr.ud.ah)->av);
225 ((struct mthca_tavor_ud_seg *) wqe)->dqpn =
226 htonl(wr->wr.ud.remote_qpn);
227 ((struct mthca_tavor_ud_seg *) wqe)->qkey =
228 htonl(wr->wr.ud.remote_qkey);
230 wqe += sizeof (struct mthca_tavor_ud_seg);
231 size += sizeof (struct mthca_tavor_ud_seg) / 16;
238 if (wr->num_sge > qp->sq.max_gs) {
244 if (wr->send_flags & IBV_SEND_INLINE) {
246 struct mthca_inline_seg *seg = wqe;
250 for (i = 0; i < wr->num_sge; ++i) {
251 struct ibv_sge *sge = &wr->sg_list[i];
255 if (s > qp->max_inline_data) {
261 memcpy(wqe, (void *) (intptr_t) sge->addr,
266 seg->byte_count = htonl(MTHCA_INLINE_SEG | s);
267 size += align(s + sizeof *seg, 16) / 16;
270 struct mthca_data_seg *seg;
272 for (i = 0; i < wr->num_sge; ++i) {
274 seg->byte_count = htonl(wr->sg_list[i].length);
275 seg->lkey = htonl(wr->sg_list[i].lkey);
276 seg->addr = htonll(wr->sg_list[i].addr);
280 size += wr->num_sge * (sizeof *seg / 16);
283 qp->wrid[ind + qp->rq.max] = wr->wr_id;
285 if (wr->opcode >= sizeof mthca_opcode / sizeof mthca_opcode[0]) {
291 ((struct mthca_next_seg *) prev_wqe)->nda_op =
292 htonl(((ind << qp->sq.wqe_shift) +
293 qp->send_wqe_offset) |
294 mthca_opcode[wr->opcode]);
296 * Make sure that nda_op is written before setting ee_nds.
299 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
300 htonl((size0 ? 0 : MTHCA_NEXT_DBD) | size |
301 ((wr->send_flags & IBV_SEND_FENCE) ?
302 MTHCA_NEXT_FENCE : 0));
306 op0 = mthca_opcode[wr->opcode];
307 f0 = wr->send_flags & IBV_SEND_FENCE ?
308 MTHCA_SEND_DOORBELL_FENCE : 0;
312 if (ind >= qp->sq.max)
318 uint32_t doorbell[2];
320 doorbell[0] = htonl(((qp->sq.next_ind << qp->sq.wqe_shift) +
321 qp->send_wqe_offset) | f0 | op0);
322 doorbell[1] = htonl((ibqp->qp_num << 8) | size0);
324 mthca_write64(doorbell, to_mctx(ibqp->context), MTHCA_SEND_DOORBELL);
327 qp->sq.next_ind = ind;
330 pthread_spin_unlock(&qp->sq.lock);
334 int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
335 struct ibv_recv_wr **bad_wr)
337 struct mthca_qp *qp = to_mqp(ibqp);
338 uint32_t doorbell[2];
348 pthread_spin_lock(&qp->rq.lock);
350 ind = qp->rq.next_ind;
352 for (nreq = 0; wr; wr = wr->next) {
353 if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
359 wqe = get_recv_wqe(qp, ind);
360 prev_wqe = qp->rq.last;
363 ((struct mthca_next_seg *) wqe)->ee_nds =
364 htonl(MTHCA_NEXT_DBD);
365 ((struct mthca_next_seg *) wqe)->flags =
366 htonl(MTHCA_NEXT_CQ_UPDATE);
368 wqe += sizeof (struct mthca_next_seg);
369 size = sizeof (struct mthca_next_seg) / 16;
371 if (wr->num_sge > qp->rq.max_gs) {
377 for (i = 0; i < wr->num_sge; ++i) {
378 ((struct mthca_data_seg *) wqe)->byte_count =
379 htonl(wr->sg_list[i].length);
380 ((struct mthca_data_seg *) wqe)->lkey =
381 htonl(wr->sg_list[i].lkey);
382 ((struct mthca_data_seg *) wqe)->addr =
383 htonll(wr->sg_list[i].addr);
384 wqe += sizeof (struct mthca_data_seg);
385 size += sizeof (struct mthca_data_seg) / 16;
388 qp->wrid[ind] = wr->wr_id;
390 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
391 htonl(MTHCA_NEXT_DBD | size);
397 if (ind >= qp->rq.max)
401 if (nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB) {
404 doorbell[0] = htonl((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
405 doorbell[1] = htonl(ibqp->qp_num << 8);
408 * Make sure that descriptors are written
409 * before doorbell is rung.
413 mthca_write64(doorbell, to_mctx(ibqp->context), MTHCA_RECV_DOORBELL);
415 qp->rq.next_ind = ind;
416 qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
423 doorbell[0] = htonl((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
424 doorbell[1] = htonl((ibqp->qp_num << 8) | nreq);
427 * Make sure that descriptors are written before
432 mthca_write64(doorbell, to_mctx(ibqp->context), MTHCA_RECV_DOORBELL);
435 qp->rq.next_ind = ind;
438 pthread_spin_unlock(&qp->rq.lock);
442 int mthca_arbel_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
443 struct ibv_send_wr **bad_wr)
445 struct mthca_qp *qp = to_mqp(ibqp);
446 uint32_t doorbell[2];
447 void *wqe, *prev_wqe;
455 * f0 and op0 cannot be used unless nreq > 0, which means this
456 * function makes it through the loop at least once. So the
457 * code inside the if (!size0) will be executed, and f0 and
458 * op0 will be initialized. So any gcc warning about "may be
459 * used unitialized" is bogus.
464 pthread_spin_lock(&qp->sq.lock);
466 /* XXX check that state is OK to post send */
468 ind = qp->sq.head & (qp->sq.max - 1);
470 for (nreq = 0; wr; ++nreq, wr = wr->next) {
471 if (nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB) {
474 doorbell[0] = htonl((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
475 ((qp->sq.head & 0xffff) << 8) | f0 | op0);
476 doorbell[1] = htonl((ibqp->qp_num << 8) | size0);
478 qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
481 * Make sure that descriptors are written before
485 *qp->sq.db = htonl(qp->sq.head & 0xffff);
488 * Make sure doorbell record is written before we
489 * write MMIO send doorbell.
492 mthca_write64(doorbell, to_mctx(ibqp->context), MTHCA_SEND_DOORBELL);
497 if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
503 wqe = get_send_wqe(qp, ind);
504 prev_wqe = qp->sq.last;
507 ((struct mthca_next_seg *) wqe)->flags =
508 ((wr->send_flags & IBV_SEND_SIGNALED) ?
509 htonl(MTHCA_NEXT_CQ_UPDATE) : 0) |
510 ((wr->send_flags & IBV_SEND_SOLICITED) ?
511 htonl(MTHCA_NEXT_SOLICIT) : 0) |
513 if (wr->opcode == IBV_WR_SEND_WITH_IMM ||
514 wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
515 ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
517 wqe += sizeof (struct mthca_next_seg);
518 size = sizeof (struct mthca_next_seg) / 16;
520 switch (ibqp->qp_type) {
522 switch (wr->opcode) {
523 case IBV_WR_ATOMIC_CMP_AND_SWP:
524 case IBV_WR_ATOMIC_FETCH_AND_ADD:
525 ((struct mthca_raddr_seg *) wqe)->raddr =
526 htonll(wr->wr.atomic.remote_addr);
527 ((struct mthca_raddr_seg *) wqe)->rkey =
528 htonl(wr->wr.atomic.rkey);
529 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
531 wqe += sizeof (struct mthca_raddr_seg);
533 if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) {
534 ((struct mthca_atomic_seg *) wqe)->swap_add =
535 htonll(wr->wr.atomic.swap);
536 ((struct mthca_atomic_seg *) wqe)->compare =
537 htonll(wr->wr.atomic.compare_add);
539 ((struct mthca_atomic_seg *) wqe)->swap_add =
540 htonll(wr->wr.atomic.compare_add);
541 ((struct mthca_atomic_seg *) wqe)->compare = 0;
544 wqe += sizeof (struct mthca_atomic_seg);
545 size += (sizeof (struct mthca_raddr_seg) +
546 sizeof (struct mthca_atomic_seg)) / 16;
549 case IBV_WR_RDMA_WRITE:
550 case IBV_WR_RDMA_WRITE_WITH_IMM:
551 case IBV_WR_RDMA_READ:
552 ((struct mthca_raddr_seg *) wqe)->raddr =
553 htonll(wr->wr.rdma.remote_addr);
554 ((struct mthca_raddr_seg *) wqe)->rkey =
555 htonl(wr->wr.rdma.rkey);
556 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
557 wqe += sizeof (struct mthca_raddr_seg);
558 size += sizeof (struct mthca_raddr_seg) / 16;
562 /* No extra segments required for sends */
569 switch (wr->opcode) {
570 case IBV_WR_RDMA_WRITE:
571 case IBV_WR_RDMA_WRITE_WITH_IMM:
572 ((struct mthca_raddr_seg *) wqe)->raddr =
573 htonll(wr->wr.rdma.remote_addr);
574 ((struct mthca_raddr_seg *) wqe)->rkey =
575 htonl(wr->wr.rdma.rkey);
576 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
577 wqe += sizeof (struct mthca_raddr_seg);
578 size += sizeof (struct mthca_raddr_seg) / 16;
582 /* No extra segments required for sends */
589 memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
590 to_mah(wr->wr.ud.ah)->av, sizeof (struct mthca_av));
591 ((struct mthca_arbel_ud_seg *) wqe)->dqpn =
592 htonl(wr->wr.ud.remote_qpn);
593 ((struct mthca_arbel_ud_seg *) wqe)->qkey =
594 htonl(wr->wr.ud.remote_qkey);
596 wqe += sizeof (struct mthca_arbel_ud_seg);
597 size += sizeof (struct mthca_arbel_ud_seg) / 16;
604 if (wr->num_sge > qp->sq.max_gs) {
610 if (wr->send_flags & IBV_SEND_INLINE) {
612 struct mthca_inline_seg *seg = wqe;
616 for (i = 0; i < wr->num_sge; ++i) {
617 struct ibv_sge *sge = &wr->sg_list[i];
621 if (s > qp->max_inline_data) {
627 memcpy(wqe, (void *) (uintptr_t) sge->addr,
632 seg->byte_count = htonl(MTHCA_INLINE_SEG | s);
633 size += align(s + sizeof *seg, 16) / 16;
636 struct mthca_data_seg *seg;
638 for (i = 0; i < wr->num_sge; ++i) {
640 seg->byte_count = htonl(wr->sg_list[i].length);
641 seg->lkey = htonl(wr->sg_list[i].lkey);
642 seg->addr = htonll(wr->sg_list[i].addr);
646 size += wr->num_sge * (sizeof *seg / 16);
649 qp->wrid[ind + qp->rq.max] = wr->wr_id;
651 if (wr->opcode >= sizeof mthca_opcode / sizeof mthca_opcode[0]) {
657 ((struct mthca_next_seg *) prev_wqe)->nda_op =
658 htonl(((ind << qp->sq.wqe_shift) +
659 qp->send_wqe_offset) |
660 mthca_opcode[wr->opcode]);
662 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
663 htonl(MTHCA_NEXT_DBD | size |
664 ((wr->send_flags & IBV_SEND_FENCE) ?
665 MTHCA_NEXT_FENCE : 0));
669 op0 = mthca_opcode[wr->opcode];
670 f0 = wr->send_flags & IBV_SEND_FENCE ?
671 MTHCA_SEND_DOORBELL_FENCE : 0;
675 if (ind >= qp->sq.max)
681 doorbell[0] = htonl((nreq << 24) |
682 ((qp->sq.head & 0xffff) << 8) |
684 doorbell[1] = htonl((ibqp->qp_num << 8) | size0);
689 * Make sure that descriptors are written before
693 *qp->sq.db = htonl(qp->sq.head & 0xffff);
696 * Make sure doorbell record is written before we
697 * write MMIO send doorbell.
700 mthca_write64(doorbell, to_mctx(ibqp->context), MTHCA_SEND_DOORBELL);
703 pthread_spin_unlock(&qp->sq.lock);
707 int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
708 struct ibv_recv_wr **bad_wr)
710 struct mthca_qp *qp = to_mqp(ibqp);
717 pthread_spin_lock(&qp->rq.lock);
719 /* XXX check that state is OK to post receive */
721 ind = qp->rq.head & (qp->rq.max - 1);
723 for (nreq = 0; wr; ++nreq, wr = wr->next) {
724 if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
730 wqe = get_recv_wqe(qp, ind);
732 ((struct mthca_next_seg *) wqe)->flags = 0;
734 wqe += sizeof (struct mthca_next_seg);
736 if (wr->num_sge > qp->rq.max_gs) {
742 for (i = 0; i < wr->num_sge; ++i) {
743 ((struct mthca_data_seg *) wqe)->byte_count =
744 htonl(wr->sg_list[i].length);
745 ((struct mthca_data_seg *) wqe)->lkey =
746 htonl(wr->sg_list[i].lkey);
747 ((struct mthca_data_seg *) wqe)->addr =
748 htonll(wr->sg_list[i].addr);
749 wqe += sizeof (struct mthca_data_seg);
752 if (i < qp->rq.max_gs) {
753 ((struct mthca_data_seg *) wqe)->byte_count = 0;
754 ((struct mthca_data_seg *) wqe)->lkey = htonl(MTHCA_INVAL_LKEY);
755 ((struct mthca_data_seg *) wqe)->addr = 0;
758 qp->wrid[ind] = wr->wr_id;
761 if (ind >= qp->rq.max)
769 * Make sure that descriptors are written before
773 *qp->rq.db = htonl(qp->rq.head & 0xffff);
776 pthread_spin_unlock(&qp->rq.lock);
780 int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
781 enum ibv_qp_type type, struct mthca_qp *qp)
785 struct mthca_next_seg *next;
788 qp->rq.max_gs = cap->max_recv_sge;
789 qp->sq.max_gs = cap->max_send_sge;
790 max_sq_sge = align(cap->max_inline_data + sizeof (struct mthca_inline_seg),
791 sizeof (struct mthca_data_seg)) / sizeof (struct mthca_data_seg);
792 if (max_sq_sge < cap->max_send_sge)
793 max_sq_sge = cap->max_send_sge;
795 qp->wrid = malloc((qp->rq.max + qp->sq.max) * sizeof (uint64_t));
799 size = sizeof (struct mthca_next_seg) +
800 qp->rq.max_gs * sizeof (struct mthca_data_seg);
802 for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
806 size = max_sq_sge * sizeof (struct mthca_data_seg);
809 size += mthca_is_memfree(pd->context) ?
810 sizeof (struct mthca_arbel_ud_seg) :
811 sizeof (struct mthca_tavor_ud_seg);
815 size += sizeof (struct mthca_raddr_seg);
819 size += sizeof (struct mthca_raddr_seg);
821 * An atomic op will require an atomic segment, a
822 * remote address segment and one scatter entry.
824 if (size < (sizeof (struct mthca_atomic_seg) +
825 sizeof (struct mthca_raddr_seg) +
826 sizeof (struct mthca_data_seg)))
827 size = (sizeof (struct mthca_atomic_seg) +
828 sizeof (struct mthca_raddr_seg) +
829 sizeof (struct mthca_data_seg));
836 /* Make sure that we have enough space for a bind request */
837 if (size < sizeof (struct mthca_bind_seg))
838 size = sizeof (struct mthca_bind_seg);
840 size += sizeof (struct mthca_next_seg);
842 for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
846 qp->send_wqe_offset = align(qp->rq.max << qp->rq.wqe_shift,
847 1 << qp->sq.wqe_shift);
849 qp->buf_size = qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift);
851 if (mthca_alloc_buf(&qp->buf,
852 align(qp->buf_size, to_mdev(pd->context->device)->page_size),
853 to_mdev(pd->context->device)->page_size)) {
858 memset(qp->buf.buf, 0, qp->buf_size);
860 if (mthca_is_memfree(pd->context)) {
861 struct mthca_data_seg *scatter;
864 sz = htonl((sizeof (struct mthca_next_seg) +
865 qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16);
867 for (i = 0; i < qp->rq.max; ++i) {
868 next = get_recv_wqe(qp, i);
869 next->nda_op = htonl(((i + 1) & (qp->rq.max - 1)) <<
873 for (scatter = (void *) (next + 1);
874 (void *) scatter < (void *) next + (1 << qp->rq.wqe_shift);
876 scatter->lkey = htonl(MTHCA_INVAL_LKEY);
879 for (i = 0; i < qp->sq.max; ++i) {
880 next = get_send_wqe(qp, i);
881 next->nda_op = htonl((((i + 1) & (qp->sq.max - 1)) <<
883 qp->send_wqe_offset);
886 for (i = 0; i < qp->rq.max; ++i) {
887 next = get_recv_wqe(qp, i);
888 next->nda_op = htonl((((i + 1) % qp->rq.max) <<
889 qp->rq.wqe_shift) | 1);
893 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
894 qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
899 struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn)
901 int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
903 if (ctx->qp_table[tind].refcnt)
904 return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
909 int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp)
911 int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
913 if (!ctx->qp_table[tind].refcnt) {
914 ctx->qp_table[tind].table = calloc(ctx->qp_table_mask + 1,
915 sizeof (struct mthca_qp *));
916 if (!ctx->qp_table[tind].table)
920 ++ctx->qp_table[tind].refcnt;
921 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
925 void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn)
927 int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
929 if (!--ctx->qp_table[tind].refcnt)
930 free(ctx->qp_table[tind].table);
932 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
935 int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
936 int index, int *dbd, uint32_t *new_wqe)
938 struct mthca_next_seg *next;
941 * For SRQs, all receive WQEs generate a CQE, so we're always
942 * at the end of the doorbell chain.
944 if (qp->ibv_qp.srq && !is_send) {
950 next = get_send_wqe(qp, index);
952 next = get_recv_wqe(qp, index);
954 *dbd = !!(next->ee_nds & htonl(MTHCA_NEXT_DBD));
955 if (next->ee_nds & htonl(0x3f))
956 *new_wqe = (next->nda_op & htonl(~0x3f)) |
957 (next->ee_nds & htonl(0x3f));