3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 static MALLOC_DEFINE(M_ISER_INITIATOR, "iser_initiator", "iser initiator backend");
31 /* Register user buffer memory and initialize passive rdma
32 * dto descriptor. Data size is stored in
33 * task->data[ISER_DIR_IN].data_len, Protection size
34 * os stored in task->prot[ISER_DIR_IN].data_len
37 iser_prepare_read_cmd(struct icl_iser_pdu *iser_pdu)
39 struct iser_hdr *hdr = &iser_pdu->desc.iser_header;
40 struct iser_data_buf *buf_in = &iser_pdu->data[ISER_DIR_IN];
41 struct iser_mem_reg *mem_reg;
44 err = iser_dma_map_task_data(iser_pdu,
51 err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_IN);
53 ISER_ERR("Failed to set up Data-IN RDMA");
57 mem_reg = &iser_pdu->rdma_reg[ISER_DIR_IN];
59 hdr->flags |= ISER_RSV;
60 hdr->read_stag = cpu_to_be32(mem_reg->rkey);
61 hdr->read_va = cpu_to_be64(mem_reg->sge.addr);
66 /* Register user buffer memory and initialize passive rdma
67 * dto descriptor. Data size is stored in
68 * task->data[ISER_DIR_OUT].data_len, Protection size
69 * is stored at task->prot[ISER_DIR_OUT].data_len
72 iser_prepare_write_cmd(struct icl_iser_pdu *iser_pdu)
74 struct iser_hdr *hdr = &iser_pdu->desc.iser_header;
75 struct iser_data_buf *buf_out = &iser_pdu->data[ISER_DIR_OUT];
76 struct iser_mem_reg *mem_reg;
79 err = iser_dma_map_task_data(iser_pdu,
86 err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_OUT);
88 ISER_ERR("Failed to set up Data-out RDMA");
92 mem_reg = &iser_pdu->rdma_reg[ISER_DIR_OUT];
94 hdr->flags |= ISER_WSV;
95 hdr->write_stag = cpu_to_be32(mem_reg->rkey);
96 hdr->write_va = cpu_to_be64(mem_reg->sge.addr);
101 /* creates a new tx descriptor and adds header regd buffer */
103 iser_create_send_desc(struct iser_conn *iser_conn,
104 struct iser_tx_desc *tx_desc)
106 struct iser_device *device = iser_conn->ib_conn.device;
108 ib_dma_sync_single_for_cpu(device->ib_device,
109 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
111 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
112 tx_desc->iser_header.flags = ISER_VER;
114 tx_desc->num_sge = 1;
116 if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
117 tx_desc->tx_sg[0].lkey = device->mr->lkey;
118 ISER_DBG("sdesc %p lkey mismatch, fixing", tx_desc);
123 iser_free_login_buf(struct iser_conn *iser_conn)
125 struct iser_device *device = iser_conn->ib_conn.device;
127 if (!iser_conn->login_buf)
130 if (iser_conn->login_req_dma)
131 ib_dma_unmap_single(device->ib_device,
132 iser_conn->login_req_dma,
133 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
135 if (iser_conn->login_resp_dma)
136 ib_dma_unmap_single(device->ib_device,
137 iser_conn->login_resp_dma,
138 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
140 free(iser_conn->login_buf, M_ISER_INITIATOR);
142 /* make sure we never redo any unmapping */
143 iser_conn->login_req_dma = 0;
144 iser_conn->login_resp_dma = 0;
145 iser_conn->login_buf = NULL;
149 iser_alloc_login_buf(struct iser_conn *iser_conn)
151 struct iser_device *device = iser_conn->ib_conn.device;
152 int req_err, resp_err;
154 BUG_ON(device == NULL);
156 iser_conn->login_buf = malloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE,
157 M_ISER_INITIATOR, M_WAITOK | M_ZERO);
159 if (!iser_conn->login_buf)
162 iser_conn->login_req_buf = iser_conn->login_buf;
163 iser_conn->login_resp_buf = iser_conn->login_buf +
164 ISCSI_DEF_MAX_RECV_SEG_LEN;
166 iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
167 iser_conn->login_req_buf,
168 ISCSI_DEF_MAX_RECV_SEG_LEN,
171 iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
172 iser_conn->login_resp_buf,
176 req_err = ib_dma_mapping_error(device->ib_device,
177 iser_conn->login_req_dma);
178 resp_err = ib_dma_mapping_error(device->ib_device,
179 iser_conn->login_resp_dma);
181 if (req_err || resp_err) {
183 iser_conn->login_req_dma = 0;
185 iser_conn->login_resp_dma = 0;
192 iser_free_login_buf(iser_conn);
195 ISER_DBG("unable to alloc or map login buf");
199 int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, int cmds_max)
203 struct iser_rx_desc *rx_desc;
204 struct ib_sge *rx_sg;
205 struct ib_conn *ib_conn = &iser_conn->ib_conn;
206 struct iser_device *device = ib_conn->device;
208 iser_conn->qp_max_recv_dtos = cmds_max;
209 iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
211 if (iser_create_fastreg_pool(ib_conn, cmds_max))
212 goto create_rdma_reg_res_failed;
215 iser_conn->num_rx_descs = cmds_max;
216 iser_conn->rx_descs = malloc(iser_conn->num_rx_descs *
217 sizeof(struct iser_rx_desc), M_ISER_INITIATOR,
219 if (!iser_conn->rx_descs)
220 goto rx_desc_alloc_fail;
222 rx_desc = iser_conn->rx_descs;
224 for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) {
225 dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
226 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
227 if (ib_dma_mapping_error(device->ib_device, dma_addr))
228 goto rx_desc_dma_map_failed;
230 rx_desc->dma_addr = dma_addr;
232 rx_sg = &rx_desc->rx_sg;
233 rx_sg->addr = rx_desc->dma_addr;
234 rx_sg->length = ISER_RX_PAYLOAD_SIZE;
235 rx_sg->lkey = device->mr->lkey;
238 iser_conn->rx_desc_head = 0;
242 rx_desc_dma_map_failed:
243 rx_desc = iser_conn->rx_descs;
244 for (j = 0; j < i; j++, rx_desc++)
245 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
246 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
247 free(iser_conn->rx_descs, M_ISER_INITIATOR);
248 iser_conn->rx_descs = NULL;
250 iser_free_fastreg_pool(ib_conn);
251 create_rdma_reg_res_failed:
252 ISER_ERR("failed allocating rx descriptors / data buffers");
258 iser_free_rx_descriptors(struct iser_conn *iser_conn)
261 struct iser_rx_desc *rx_desc;
262 struct ib_conn *ib_conn = &iser_conn->ib_conn;
263 struct iser_device *device = ib_conn->device;
265 iser_free_fastreg_pool(ib_conn);
267 rx_desc = iser_conn->rx_descs;
268 for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)
269 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
270 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
272 free(iser_conn->rx_descs, M_ISER_INITIATOR);
274 /* make sure we never redo any unmapping */
275 iser_conn->rx_descs = NULL;
279 iser_buf_to_sg(void *buf, struct iser_data_buf *data_buf)
281 struct scatterlist *sg;
286 tlen = data_buf->data_len;
288 for (i = 0; 0 < tlen; i++, tlen -= len) {
289 sg = &data_buf->sgl[i];
290 offset = ((uintptr_t)buf) & ~PAGE_MASK;
291 len = min(PAGE_SIZE - offset, tlen);
292 sg_set_buf(sg, buf, len);
293 buf = (void *)(((u64)buf) + (u64)len);
302 iser_bio_to_sg(struct bio *bp, struct iser_data_buf *data_buf)
304 struct scatterlist *sg;
309 tlen = bp->bio_bcount;
310 offset = bp->bio_ma_offset;
312 for (i = 0; 0 < tlen; i++, tlen -= len) {
313 sg = &data_buf->sgl[i];
314 len = min(PAGE_SIZE - offset, tlen);
315 sg_set_page(sg, bp->bio_ma[i], len, offset);
324 iser_csio_to_sg(struct ccb_scsiio *csio, struct iser_data_buf *data_buf)
326 struct ccb_hdr *ccbh;
330 switch ((ccbh->flags & CAM_DATA_MASK)) {
332 iser_bio_to_sg((struct bio *) csio->data_ptr, data_buf);
336 * Support KVA buffers for various scsi commands such as:
340 * - SERVICE_ACTION_IN.
341 * The data of these commands always mapped into KVA.
343 iser_buf_to_sg(csio->data_ptr, data_buf);
346 ISER_ERR("flags 0x%X unimplemented", ccbh->flags);
353 iser_signal_comp(u8 sig_count)
355 return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
359 iser_send_command(struct iser_conn *iser_conn,
360 struct icl_iser_pdu *iser_pdu)
362 struct iser_data_buf *data_buf;
363 struct iser_tx_desc *tx_desc = &iser_pdu->desc;
364 struct iscsi_bhs_scsi_command *hdr = (struct iscsi_bhs_scsi_command *) &(iser_pdu->desc.iscsi_header);
365 struct ccb_scsiio *csio = iser_pdu->csio;
367 u8 sig_count = ++iser_conn->ib_conn.sig_count;
369 /* build the tx desc regd header and add it to the tx desc dto */
370 tx_desc->type = ISCSI_TX_SCSI_COMMAND;
371 iser_create_send_desc(iser_conn, tx_desc);
373 if (hdr->bhssc_flags & BHSSC_FLAGS_R) {
374 data_buf = &iser_pdu->data[ISER_DIR_IN];
376 data_buf = &iser_pdu->data[ISER_DIR_OUT];
379 data_buf->sg = csio->data_ptr;
380 data_buf->data_len = csio->dxfer_len;
382 if (likely(csio->dxfer_len)) {
383 err = iser_csio_to_sg(csio, data_buf);
385 goto send_command_error;
388 if (hdr->bhssc_flags & BHSSC_FLAGS_R) {
389 err = iser_prepare_read_cmd(iser_pdu);
391 goto send_command_error;
392 } else if (hdr->bhssc_flags & BHSSC_FLAGS_W) {
393 err = iser_prepare_write_cmd(iser_pdu);
395 goto send_command_error;
398 err = iser_post_send(&iser_conn->ib_conn, tx_desc,
399 iser_signal_comp(sig_count));
404 ISER_ERR("iser_conn %p itt %u len %u err %d", iser_conn,
405 hdr->bhssc_initiator_task_tag,
406 hdr->bhssc_expected_data_transfer_length,
412 iser_send_control(struct iser_conn *iser_conn,
413 struct icl_iser_pdu *iser_pdu)
415 struct iser_tx_desc *mdesc;
416 struct iser_device *device;
417 size_t datalen = iser_pdu->icl_pdu.ip_data_len;
420 mdesc = &iser_pdu->desc;
422 /* build the tx desc regd header and add it to the tx desc dto */
423 mdesc->type = ISCSI_TX_CONTROL;
424 iser_create_send_desc(iser_conn, mdesc);
426 device = iser_conn->ib_conn.device;
429 struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
430 ib_dma_sync_single_for_cpu(device->ib_device,
431 iser_conn->login_req_dma, datalen,
434 ib_dma_sync_single_for_device(device->ib_device,
435 iser_conn->login_req_dma, datalen,
438 tx_dsg->addr = iser_conn->login_req_dma;
439 tx_dsg->length = datalen;
440 tx_dsg->lkey = device->mr->lkey;
444 /* For login phase and discovery session we re-use the login buffer */
445 if (!iser_conn->handoff_done) {
446 err = iser_post_recvl(iser_conn);
448 goto send_control_error;
451 err = iser_post_send(&iser_conn->ib_conn, mdesc, true);
456 ISER_ERR("conn %p failed err %d", iser_conn, err);
463 * iser_rcv_dto_completion - recv DTO completion
466 iser_rcv_completion(struct iser_rx_desc *rx_desc,
467 unsigned long rx_xfer_len,
468 struct ib_conn *ib_conn)
470 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
472 struct icl_conn *ic = &iser_conn->icl_conn;
473 struct icl_pdu *response;
474 struct iscsi_bhs *hdr;
477 int outstanding, count, err;
479 /* differentiate between login to all other PDUs */
480 if ((char *)rx_desc == iser_conn->login_resp_buf) {
481 rx_dma = iser_conn->login_resp_dma;
482 rx_buflen = ISER_RX_LOGIN_SIZE;
484 rx_dma = rx_desc->dma_addr;
485 rx_buflen = ISER_RX_PAYLOAD_SIZE;
488 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
489 rx_buflen, DMA_FROM_DEVICE);
491 hdr = &rx_desc->iscsi_header;
493 response = iser_new_pdu(ic, M_NOWAIT);
494 response->ip_bhs = hdr;
495 response->ip_data_len = rx_xfer_len - ISER_HEADERS_LEN;
498 * In case we got data in the receive buffer, assign the ip_data_mbuf
499 * to the rx_buffer - later we'll copy it to upper layer buffers
501 if (response->ip_data_len)
502 response->ip_data_mbuf = (struct mbuf *)(rx_desc->data);
504 ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
505 rx_buflen, DMA_FROM_DEVICE);
507 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
508 * task eliminates the need to worry on tasks which are completed in *
509 * parallel to the execution of iser_conn_term. So the code that waits *
510 * for the posted rx bufs refcount to become zero handles everything */
511 ib_conn->post_recv_buf_count--;
513 if (rx_dma == iser_conn->login_resp_dma)
516 outstanding = ib_conn->post_recv_buf_count;
517 if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
518 count = min(iser_conn->qp_max_recv_dtos - outstanding,
519 iser_conn->min_posted_rx);
520 err = iser_post_recvm(iser_conn, count);
522 ISER_ERR("posting %d rx bufs err %d", count, err);
526 (ic->ic_receive)(response);
530 iser_snd_completion(struct iser_tx_desc *tx_desc,
531 struct ib_conn *ib_conn)
533 struct icl_iser_pdu *iser_pdu = container_of(tx_desc, struct icl_iser_pdu, desc);
534 struct iser_conn *iser_conn = iser_pdu->iser_conn;
536 if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL)
537 iser_pdu_free(&iser_conn->icl_conn, &iser_pdu->icl_pdu);