3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 static MALLOC_DEFINE(M_ISER_VERBS, "iser_verbs", "iser verbs backend");
30 static int iser_cq_poll_limit = 512;
33 iser_cq_event_callback(struct ib_event *cause, void *context)
35 ISER_ERR("got cq event %d", cause->event);
39 iser_qp_event_callback(struct ib_event *cause, void *context)
41 ISER_ERR("got qp event %d", cause->event);
45 iser_event_handler(struct ib_event_handler *handler,
46 struct ib_event *event)
48 ISER_ERR("async event %d on device %s port %d",
49 event->event, event->device->name,
50 event->element.port_num);
54 * is_iser_tx_desc - Indicate if the completion wr_id
55 * is a TX descriptor or not.
56 * @iser_conn: iser connection
57 * @wr_id: completion WR identifier
59 * Since we cannot rely on wc opcode in FLUSH errors
60 * we must work around it by checking if the wr_id address
61 * falls in the iser connection rx_descs buffer. If so
62 * it is an RX descriptor, otherwize it is a TX.
65 is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
67 void *start = iser_conn->rx_descs;
68 u64 len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
69 void *end = (void *)((uintptr_t)start + (uintptr_t)len);
72 if (wr_id >= start && wr_id < end)
75 return ((uintptr_t)wr_id != (uintptr_t)iser_conn->login_resp_buf);
82 * iser_handle_comp_error() - Handle error completion
83 * @ib_conn: connection RDMA resources
84 * @wc: work completion
86 * Notes: Update post_recv_buf_count in case of recv error completion.
87 * For non-FLUSH error completion we should also notify iscsi layer that
88 * connection is failed (in case we passed bind stage).
91 iser_handle_comp_error(struct ib_conn *ib_conn,
94 void *wr_id = (void *)(uintptr_t)wc->wr_id;
95 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
98 if (is_iser_tx_desc(iser_conn, wr_id)) {
99 ISER_DBG("conn %p got send comp error", iser_conn);
101 ISER_DBG("conn %p got recv comp error", iser_conn);
102 ib_conn->post_recv_buf_count--;
104 if (wc->status != IB_WC_WR_FLUSH_ERR)
105 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn);
109 * iser_handle_wc - handle a single work completion
110 * @wc: work completion
112 * Soft-IRQ context, work completion can be either
113 * SEND or RECV, and can turn out successful or
114 * with error (or flush error).
116 static void iser_handle_wc(struct ib_wc *wc)
118 struct ib_conn *ib_conn;
119 struct iser_tx_desc *tx_desc;
120 struct iser_rx_desc *rx_desc;
122 ib_conn = wc->qp->qp_context;
123 if (likely(wc->status == IB_WC_SUCCESS)) {
124 if (wc->opcode == IB_WC_RECV) {
125 rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
126 iser_rcv_completion(rx_desc, wc->byte_len,
129 if (wc->opcode == IB_WC_SEND) {
130 tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
131 iser_snd_completion(tx_desc, ib_conn);
133 ISER_ERR("Unknown wc opcode %d", wc->opcode);
136 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
138 if (wc->status != IB_WC_WR_FLUSH_ERR) {
139 ISER_ERR("conn %p wr id %llx status %d vend_err %x",
140 iser_conn, (unsigned long long)wc->wr_id,
141 wc->status, wc->vendor_err);
143 ISER_DBG("flush error: conn %p wr id %llx",
144 iser_conn, (unsigned long long)wc->wr_id);
147 if (wc->wr_id == ISER_BEACON_WRID) {
148 /* all flush errors were consumed */
149 mtx_lock(&ib_conn->beacon.flush_lock);
150 ISER_DBG("conn %p got ISER_BEACON_WRID", iser_conn);
151 cv_signal(&ib_conn->beacon.flush_cv);
152 mtx_unlock(&ib_conn->beacon.flush_lock);
154 iser_handle_comp_error(ib_conn, wc);
160 iser_cq_tasklet_fn(void *data, int pending)
162 struct iser_comp *comp = (struct iser_comp *)data;
163 struct ib_cq *cq = comp->cq;
164 struct ib_wc *const wcs = comp->wcs;
169 while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
170 for (i = 0; i < n; i++)
171 iser_handle_wc(&wcs[i]);
174 if (completed >= iser_cq_poll_limit)
179 * It is assumed here that arming CQ only once its empty
180 * would not cause interrupts to be missed.
182 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
186 iser_cq_callback(struct ib_cq *cq, void *cq_context)
188 struct iser_comp *comp = cq_context;
190 taskqueue_enqueue(comp->tq, &comp->task);
194 * iser_create_device_ib_res - creates Protection Domain (PD), Completion
195 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
198 * returns 0 on success, -1 on failure
201 iser_create_device_ib_res(struct iser_device *device)
203 struct ib_device_attr *dev_attr = &device->dev_attr;
206 ret = ib_query_device(device->ib_device, dev_attr);
208 ISER_ERR("Query device failed for %s", device->ib_device->name);
212 if (!(dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) {
213 ISER_ERR("device %s doesn't support Fastreg, "
214 "can't register memory", device->ib_device->name);
218 device->comps_used = min(mp_ncpus, device->ib_device->num_comp_vectors);
220 device->comps = malloc(device->comps_used * sizeof(*device->comps),
221 M_ISER_VERBS, M_WAITOK | M_ZERO);
225 max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
227 ISER_DBG("using %d CQs, device %s supports %d vectors max_cqe %d",
228 device->comps_used, device->ib_device->name,
229 device->ib_device->num_comp_vectors, max_cqe);
231 device->pd = ib_alloc_pd(device->ib_device);
232 if (IS_ERR(device->pd))
235 for (i = 0; i < device->comps_used; i++) {
236 struct iser_comp *comp = &device->comps[i];
238 comp->device = device;
239 comp->cq = ib_create_cq(device->ib_device,
241 iser_cq_event_callback,
244 if (IS_ERR(comp->cq)) {
249 if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
252 TASK_INIT(&comp->task, 0, iser_cq_tasklet_fn, comp);
253 comp->tq = taskqueue_create_fast("iser_taskq", M_NOWAIT,
254 taskqueue_thread_enqueue, &comp->tq);
257 taskqueue_start_threads(&comp->tq, 1, PI_NET, "iser taskq");
260 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
261 IB_ACCESS_REMOTE_WRITE |
262 IB_ACCESS_REMOTE_READ);
263 if (IS_ERR(device->mr))
266 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
268 if (ib_register_event_handler(&device->event_handler))
274 ib_dereg_mr(device->mr);
276 for (i = 0; i < device->comps_used; i++) {
277 struct iser_comp *comp = &device->comps[i];
279 taskqueue_free(comp->tq);
282 for (i = 0; i < device->comps_used; i++) {
283 struct iser_comp *comp = &device->comps[i];
285 ib_destroy_cq(comp->cq);
287 ib_dealloc_pd(device->pd);
289 free(device->comps, M_ISER_VERBS);
291 ISER_ERR("failed to allocate an IB resource");
296 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR,
297 * CQ and PD created with the device associated with the adapator.
300 iser_free_device_ib_res(struct iser_device *device)
304 for (i = 0; i < device->comps_used; i++) {
305 struct iser_comp *comp = &device->comps[i];
307 taskqueue_free(comp->tq);
308 ib_destroy_cq(comp->cq);
312 (void)ib_unregister_event_handler(&device->event_handler);
313 (void)ib_dereg_mr(device->mr);
314 (void)ib_dealloc_pd(device->pd);
316 free(device->comps, M_ISER_VERBS);
317 device->comps = NULL;
324 iser_alloc_reg_res(struct ib_device *ib_device,
326 struct iser_reg_resources *res)
330 res->frpl = ib_alloc_fast_reg_page_list(ib_device,
331 ISCSI_ISER_SG_TABLESIZE + 1);
332 if (IS_ERR(res->frpl)) {
333 ret = -PTR_ERR(res->frpl);
334 ISER_ERR("Failed to allocate fast reg page list err=%d", ret);
338 res->mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1);
339 if (IS_ERR(res->mr)) {
340 ret = -PTR_ERR(res->mr);
341 ISER_ERR("Failed to allocate fast reg mr err=%d", ret);
342 goto fast_reg_mr_failure;
349 ib_free_fast_reg_page_list(res->frpl);
355 iser_free_reg_res(struct iser_reg_resources *rsc)
357 ib_dereg_mr(rsc->mr);
358 ib_free_fast_reg_page_list(rsc->frpl);
361 static struct fast_reg_descriptor *
362 iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd)
364 struct fast_reg_descriptor *desc;
367 desc = malloc(sizeof(*desc), M_ISER_VERBS, M_WAITOK | M_ZERO);
369 ISER_ERR("Failed to allocate a new fastreg descriptor");
373 ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc);
375 ISER_ERR("failed to allocate reg_resources");
381 free(desc, M_ISER_VERBS);
386 * iser_create_fmr_pool - Creates FMR pool and page_vector
388 * returns 0 on success, or errno code on failure
391 iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max)
393 struct iser_device *device = ib_conn->device;
394 struct fast_reg_descriptor *desc;
397 INIT_LIST_HEAD(&ib_conn->fastreg.pool);
398 ib_conn->fastreg.pool_size = 0;
399 for (i = 0; i < cmds_max; i++) {
400 desc = iser_create_fastreg_desc(device->ib_device, device->pd);
402 ISER_ERR("Failed to create fastreg descriptor");
406 list_add_tail(&desc->list, &ib_conn->fastreg.pool);
407 ib_conn->fastreg.pool_size++;
413 iser_free_fastreg_pool(ib_conn);
418 * iser_free_fmr_pool - releases the FMR pool and page vec
421 iser_free_fastreg_pool(struct ib_conn *ib_conn)
423 struct fast_reg_descriptor *desc, *tmp;
426 if (list_empty(&ib_conn->fastreg.pool))
429 ISER_DBG("freeing conn %p fr pool", ib_conn);
431 list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {
432 list_del(&desc->list);
433 iser_free_reg_res(&desc->rsc);
434 free(desc, M_ISER_VERBS);
438 if (i < ib_conn->fastreg.pool_size)
439 ISER_WARN("pool still has %d regions registered",
440 ib_conn->fastreg.pool_size - i);
444 * iser_create_ib_conn_res - Queue-Pair (QP)
446 * returns 0 on success, 1 on failure
449 iser_create_ib_conn_res(struct ib_conn *ib_conn)
451 struct iser_conn *iser_conn;
452 struct iser_device *device;
453 struct ib_device_attr *dev_attr;
454 struct ib_qp_init_attr init_attr;
455 int index, min_index = 0;
458 iser_conn = container_of(ib_conn, struct iser_conn, ib_conn);
459 device = ib_conn->device;
460 dev_attr = &device->dev_attr;
462 mtx_lock(&ig.connlist_mutex);
463 /* select the CQ with the minimal number of usages */
464 for (index = 0; index < device->comps_used; index++) {
465 if (device->comps[index].active_qps <
466 device->comps[min_index].active_qps)
469 ib_conn->comp = &device->comps[min_index];
470 ib_conn->comp->active_qps++;
471 mtx_unlock(&ig.connlist_mutex);
472 ISER_INFO("cq index %d used for ib_conn %p", min_index, ib_conn);
474 memset(&init_attr, 0, sizeof init_attr);
475 init_attr.event_handler = iser_qp_event_callback;
476 init_attr.qp_context = (void *)ib_conn;
477 init_attr.send_cq = ib_conn->comp->cq;
478 init_attr.recv_cq = ib_conn->comp->cq;
479 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
480 init_attr.cap.max_send_sge = 2;
481 init_attr.cap.max_recv_sge = 1;
482 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
483 init_attr.qp_type = IB_QPT_RC;
485 if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
486 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
487 iser_conn->max_cmds =
488 ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
490 init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
491 iser_conn->max_cmds =
492 ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
494 ISER_DBG("device %s supports max_send_wr %d",
495 device->ib_device->name, dev_attr->max_qp_wr);
497 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
501 ib_conn->qp = ib_conn->cma_id->qp;
502 ISER_DBG("setting conn %p cma_id %p qp %p",
503 ib_conn, ib_conn->cma_id,
504 ib_conn->cma_id->qp);
509 mtx_lock(&ig.connlist_mutex);
510 ib_conn->comp->active_qps--;
511 mtx_unlock(&ig.connlist_mutex);
512 ISER_ERR("unable to alloc mem or create resource, err %d", ret);
518 * based on the resolved device node GUID see if there already allocated
519 * device for this device. If there's no such, create one.
521 static struct iser_device *
522 iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
524 struct iser_device *device;
526 sx_xlock(&ig.device_list_mutex);
528 list_for_each_entry(device, &ig.device_list, ig_list)
529 /* find if there's a match using the node GUID */
530 if (device->ib_device->node_guid == cma_id->device->node_guid)
533 device = malloc(sizeof *device, M_ISER_VERBS, M_WAITOK | M_ZERO);
537 /* assign this device to the device */
538 device->ib_device = cma_id->device;
539 /* init the device and link it into ig device list */
540 if (iser_create_device_ib_res(device)) {
541 free(device, M_ISER_VERBS);
545 list_add(&device->ig_list, &ig.device_list);
549 ISER_INFO("device %p refcount %d", device, device->refcount);
551 sx_xunlock(&ig.device_list_mutex);
555 /* if there's no demand for this device, release it */
557 iser_device_try_release(struct iser_device *device)
559 sx_xlock(&ig.device_list_mutex);
561 ISER_INFO("device %p refcount %d", device, device->refcount);
562 if (!device->refcount) {
563 iser_free_device_ib_res(device);
564 list_del(&device->ig_list);
565 free(device, M_ISER_VERBS);
568 sx_xunlock(&ig.device_list_mutex);
572 * Called with state mutex held
574 static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
575 enum iser_conn_state comp,
576 enum iser_conn_state exch)
580 ret = (iser_conn->state == comp);
582 iser_conn->state = exch;
588 * iser_free_ib_conn_res - release IB related resources
589 * @iser_conn: iser connection struct
590 * @destroy: indicator if we need to try to release the
591 * iser device and memory regoins pool (only iscsi
592 * shutdown and DEVICE_REMOVAL will use this).
594 * This routine is called with the iser state mutex held
595 * so the cm_id removal is out of here. It is Safe to
596 * be invoked multiple times.
599 iser_free_ib_conn_res(struct iser_conn *iser_conn,
602 struct ib_conn *ib_conn = &iser_conn->ib_conn;
603 struct iser_device *device = ib_conn->device;
605 ISER_INFO("freeing conn %p cma_id %p qp %p",
606 iser_conn, ib_conn->cma_id, ib_conn->qp);
608 if (ib_conn->qp != NULL) {
609 mtx_lock(&ig.connlist_mutex);
610 ib_conn->comp->active_qps--;
611 mtx_unlock(&ig.connlist_mutex);
612 rdma_destroy_qp(ib_conn->cma_id);
617 if (iser_conn->login_buf)
618 iser_free_login_buf(iser_conn);
620 if (iser_conn->rx_descs)
621 iser_free_rx_descriptors(iser_conn);
623 if (device != NULL) {
624 iser_device_try_release(device);
625 ib_conn->device = NULL;
631 * triggers start of the disconnect procedures and wait for them to be done
632 * Called with state mutex held
635 iser_conn_terminate(struct iser_conn *iser_conn)
637 struct ib_conn *ib_conn = &iser_conn->ib_conn;
638 struct ib_send_wr *bad_send_wr;
639 struct ib_recv_wr *bad_recv_wr;
642 /* terminate the iser conn only if the conn state is UP */
643 if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
644 ISER_CONN_TERMINATING))
647 ISER_INFO("iser_conn %p state %d\n", iser_conn, iser_conn->state);
649 if (ib_conn->qp == NULL) {
650 /* HOW can this be??? */
651 ISER_WARN("qp wasn't created");
656 * Todo: This is a temporary workaround.
657 * We serialize the connection closure using global lock in order to
658 * receive all posted beacons completions.
659 * Without Serialization, in case we open many connections (QPs) on
660 * the same CQ, we might miss beacons because of missing interrupts.
662 sx_xlock(&ig.close_conns_mutex);
665 * In case we didn't already clean up the cma_id (peer initiated
666 * a disconnection), we need to Cause the CMA to change the QP
669 if (ib_conn->cma_id) {
670 err = rdma_disconnect(ib_conn->cma_id);
672 ISER_ERR("Failed to disconnect, conn: 0x%p err %d",
675 mtx_lock(&ib_conn->beacon.flush_lock);
676 memset(&ib_conn->beacon.send, 0, sizeof(struct ib_send_wr));
677 ib_conn->beacon.send.wr_id = ISER_BEACON_WRID;
678 ib_conn->beacon.send.opcode = IB_WR_SEND;
679 /* post an indication that all send flush errors were consumed */
680 err = ib_post_send(ib_conn->qp, &ib_conn->beacon.send, &bad_send_wr);
682 ISER_ERR("conn %p failed to post send_beacon", ib_conn);
683 mtx_unlock(&ib_conn->beacon.flush_lock);
687 ISER_DBG("before send cv_wait: %p", iser_conn);
688 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock);
689 ISER_DBG("after send cv_wait: %p", iser_conn);
691 memset(&ib_conn->beacon.recv, 0, sizeof(struct ib_recv_wr));
692 ib_conn->beacon.recv.wr_id = ISER_BEACON_WRID;
693 /* post an indication that all recv flush errors were consumed */
694 err = ib_post_recv(ib_conn->qp, &ib_conn->beacon.recv, &bad_recv_wr);
696 ISER_ERR("conn %p failed to post recv_beacon", ib_conn);
697 mtx_unlock(&ib_conn->beacon.flush_lock);
701 ISER_DBG("before recv cv_wait: %p", iser_conn);
702 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock);
703 mtx_unlock(&ib_conn->beacon.flush_lock);
704 ISER_DBG("after recv cv_wait: %p", iser_conn);
707 sx_xunlock(&ig.close_conns_mutex);
712 * Called with state mutex held
715 iser_connect_error(struct rdma_cm_id *cma_id)
717 struct iser_conn *iser_conn;
719 iser_conn = cma_id->context;
721 ISER_ERR("conn %p", iser_conn);
723 iser_conn->state = ISER_CONN_TERMINATING;
725 cv_signal(&iser_conn->up_cv);
729 * Called with state mutex held
732 iser_addr_handler(struct rdma_cm_id *cma_id)
734 struct iser_device *device;
735 struct iser_conn *iser_conn;
736 struct ib_conn *ib_conn;
739 iser_conn = cma_id->context;
741 ib_conn = &iser_conn->ib_conn;
742 device = iser_device_find_by_ib_device(cma_id);
744 ISER_ERR("conn %p device lookup/creation failed",
746 iser_connect_error(cma_id);
750 ib_conn->device = device;
752 ret = rdma_resolve_route(cma_id, 1000);
754 ISER_ERR("conn %p resolve route failed: %d", iser_conn, ret);
755 iser_connect_error(cma_id);
761 * Called with state mutex held
764 iser_route_handler(struct rdma_cm_id *cma_id)
766 struct rdma_conn_param conn_param;
768 struct iser_cm_hdr req_hdr;
769 struct iser_conn *iser_conn = cma_id->context;
770 struct ib_conn *ib_conn = &iser_conn->ib_conn;
771 struct iser_device *device = ib_conn->device;
773 ret = iser_create_ib_conn_res(ib_conn);
777 memset(&conn_param, 0, sizeof conn_param);
778 conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
779 conn_param.retry_count = 7;
780 conn_param.rnr_retry_count = 6;
782 * Initiaotr depth should not be set, but in order to compat
783 * with old targets, we keep this value set.
785 conn_param.initiator_depth = 1;
787 memset(&req_hdr, 0, sizeof(req_hdr));
788 req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
789 ISER_SEND_W_INV_NOT_SUPPORTED);
790 conn_param.private_data = (void *)&req_hdr;
791 conn_param.private_data_len = sizeof(struct iser_cm_hdr);
793 ret = rdma_connect(cma_id, &conn_param);
795 ISER_ERR("conn %p failure connecting: %d", iser_conn, ret);
801 iser_connect_error(cma_id);
805 * Called with state mutex held
808 iser_connected_handler(struct rdma_cm_id *cma_id)
810 struct iser_conn *iser_conn;
811 struct ib_qp_attr attr;
812 struct ib_qp_init_attr init_attr;
814 iser_conn = cma_id->context;
816 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
818 ISER_INFO("remote qpn:%x my qpn:%x",
819 attr.dest_qp_num, cma_id->qp->qp_num);
821 iser_conn->state = ISER_CONN_UP;
823 cv_signal(&iser_conn->up_cv);
827 * Called with state mutex held
830 iser_cleanup_handler(struct rdma_cm_id *cma_id, bool destroy)
832 struct iser_conn *iser_conn = cma_id->context;
834 if (iser_conn_terminate(iser_conn))
835 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn);
840 iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
842 struct iser_conn *iser_conn;
845 iser_conn = cma_id->context;
846 ISER_INFO("event %d status %d conn %p id %p",
847 event->event, event->status, cma_id->context, cma_id);
849 sx_xlock(&iser_conn->state_mutex);
850 switch (event->event) {
851 case RDMA_CM_EVENT_ADDR_RESOLVED:
852 iser_addr_handler(cma_id);
854 case RDMA_CM_EVENT_ROUTE_RESOLVED:
855 iser_route_handler(cma_id);
857 case RDMA_CM_EVENT_ESTABLISHED:
858 iser_connected_handler(cma_id);
860 case RDMA_CM_EVENT_ADDR_ERROR:
861 case RDMA_CM_EVENT_ROUTE_ERROR:
862 case RDMA_CM_EVENT_CONNECT_ERROR:
863 case RDMA_CM_EVENT_UNREACHABLE:
864 case RDMA_CM_EVENT_REJECTED:
865 iser_connect_error(cma_id);
867 case RDMA_CM_EVENT_DISCONNECTED:
868 case RDMA_CM_EVENT_ADDR_CHANGE:
869 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
870 iser_cleanup_handler(cma_id, false);
873 ISER_ERR("Unexpected RDMA CM event (%d)", event->event);
876 sx_xunlock(&iser_conn->state_mutex);
882 iser_post_recvl(struct iser_conn *iser_conn)
884 struct ib_recv_wr rx_wr, *rx_wr_failed;
885 struct ib_conn *ib_conn = &iser_conn->ib_conn;
889 sge.addr = iser_conn->login_resp_dma;
890 sge.length = ISER_RX_LOGIN_SIZE;
891 sge.lkey = ib_conn->device->mr->lkey;
893 rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf;
894 rx_wr.sg_list = &sge;
898 ib_conn->post_recv_buf_count++;
899 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
901 ISER_ERR("ib_post_recv failed ret=%d", ib_ret);
902 ib_conn->post_recv_buf_count--;
909 iser_post_recvm(struct iser_conn *iser_conn, int count)
911 struct ib_recv_wr *rx_wr, *rx_wr_failed;
913 struct ib_conn *ib_conn = &iser_conn->ib_conn;
914 unsigned int my_rx_head = iser_conn->rx_desc_head;
915 struct iser_rx_desc *rx_desc;
917 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
918 rx_desc = &iser_conn->rx_descs[my_rx_head];
919 rx_wr->wr_id = (uintptr_t)rx_desc;
920 rx_wr->sg_list = &rx_desc->rx_sg;
922 rx_wr->next = rx_wr + 1;
923 my_rx_head = (my_rx_head + 1) % iser_conn->qp_max_recv_dtos;
927 rx_wr->next = NULL; /* mark end of work requests list */
929 ib_conn->post_recv_buf_count += count;
930 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
932 ISER_ERR("ib_post_recv failed ret=%d", ib_ret);
933 ib_conn->post_recv_buf_count -= count;
935 iser_conn->rx_desc_head = my_rx_head;
941 * iser_start_send - Initiate a Send DTO operation
943 * returns 0 on success, -1 on failure
945 int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
949 struct ib_send_wr send_wr, *send_wr_failed;
951 ib_dma_sync_single_for_device(ib_conn->device->ib_device,
952 tx_desc->dma_addr, ISER_HEADERS_LEN,
956 send_wr.wr_id = (uintptr_t)tx_desc;
957 send_wr.sg_list = tx_desc->tx_sg;
958 send_wr.num_sge = tx_desc->num_sge;
959 send_wr.opcode = IB_WR_SEND;
960 send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
962 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
964 ISER_ERR("ib_post_send failed, ret:%d", ib_ret);