3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
32 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN,
33 &iser_debug, 0, "Enable iser debug messages");
35 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend");
36 static uma_zone_t icl_pdu_zone;
38 static volatile u_int icl_iser_ncons;
39 struct iser_global ig;
41 static void iser_conn_release(struct icl_conn *ic);
43 static icl_conn_new_pdu_t iser_conn_new_pdu;
44 static icl_conn_pdu_free_t iser_conn_pdu_free;
45 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length;
46 static icl_conn_pdu_append_data_t iser_conn_pdu_append_data;
47 static icl_conn_pdu_queue_t iser_conn_pdu_queue;
48 static icl_conn_handoff_t iser_conn_handoff;
49 static icl_conn_free_t iser_conn_free;
50 static icl_conn_close_t iser_conn_close;
51 static icl_conn_connect_t iser_conn_connect;
52 static icl_conn_task_setup_t iser_conn_task_setup;
53 static icl_conn_task_done_t iser_conn_task_done;
54 static icl_conn_pdu_get_data_t iser_conn_pdu_get_data;
56 static kobj_method_t icl_iser_methods[] = {
57 KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu),
58 KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free),
59 KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length),
60 KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data),
61 KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue),
62 KOBJMETHOD(icl_conn_handoff, iser_conn_handoff),
63 KOBJMETHOD(icl_conn_free, iser_conn_free),
64 KOBJMETHOD(icl_conn_close, iser_conn_close),
65 KOBJMETHOD(icl_conn_connect, iser_conn_connect),
66 KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup),
67 KOBJMETHOD(icl_conn_task_done, iser_conn_task_done),
68 KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data),
72 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn));
75 * iser_initialize_headers() - Initialize task headers
77 * @iser_conn: iser connection
80 * This routine may race with iser teardown flow for scsi
81 * error handling TMFs. So for TMF we should acquire the
82 * state mutex to avoid dereferencing the IB device which
83 * may have already been terminated (racing teardown sequence).
86 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn)
88 struct iser_tx_desc *tx_desc = &pdu->desc;
89 struct iser_device *device = iser_conn->ib_conn.device;
93 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
94 ISER_HEADERS_LEN, DMA_TO_DEVICE);
95 if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
100 tx_desc->mapped = true;
101 tx_desc->dma_addr = dma_addr;
102 tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
103 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
104 tx_desc->tx_sg[0].lkey = device->mr->lkey;
112 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
113 const void *addr, size_t len, int flags)
115 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
117 if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_LOGIN_REQUEST ||
118 request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_TEXT_REQUEST) {
119 ISER_DBG("copy to login buff");
120 memcpy(iser_conn->login_req_buf, addr, len);
121 request->ip_data_len = len;
128 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
129 size_t off, void *addr, size_t len)
131 /* If we have a receive data, copy it to upper layer buffer */
132 if (ip->ip_data_mbuf)
133 memcpy(addr, ip->ip_data_mbuf + off, len);
137 * Allocate icl_pdu with empty BHS to fill up by the caller.
140 iser_new_pdu(struct icl_conn *ic, int flags)
142 struct icl_iser_pdu *iser_pdu;
144 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
146 iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
147 if (iser_pdu == NULL) {
148 ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu));
152 iser_pdu->iser_conn = iser_conn;
153 ip = &iser_pdu->icl_pdu;
155 ip->ip_bhs = &iser_pdu->desc.iscsi_header;
161 iser_conn_new_pdu(struct icl_conn *ic, int flags)
163 return (iser_new_pdu(ic, flags));
167 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
169 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
171 uma_zfree(icl_pdu_zone, iser_pdu);
175 iser_conn_pdu_data_segment_length(struct icl_conn *ic,
176 const struct icl_pdu *request)
180 len += request->ip_bhs->bhs_data_segment_len[0];
182 len += request->ip_bhs->bhs_data_segment_len[1];
184 len += request->ip_bhs->bhs_data_segment_len[2];
190 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
192 iser_pdu_free(ic, ip);
196 is_control_opcode(uint8_t opcode)
198 bool is_control = false;
200 switch (opcode & ISCSI_OPCODE_MASK) {
201 case ISCSI_BHS_OPCODE_NOP_OUT:
202 case ISCSI_BHS_OPCODE_LOGIN_REQUEST:
203 case ISCSI_BHS_OPCODE_LOGOUT_REQUEST:
204 case ISCSI_BHS_OPCODE_TEXT_REQUEST:
207 case ISCSI_BHS_OPCODE_SCSI_COMMAND:
211 ISER_ERR("unknown opcode %d", opcode);
218 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
220 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
221 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
224 if (iser_conn->state != ISER_CONN_UP)
227 ret = iser_initialize_headers(iser_pdu, iser_conn);
229 ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu);
233 if (is_control_opcode(ip->ip_bhs->bhs_opcode)) {
234 ret = iser_send_control(iser_conn, iser_pdu);
236 ISER_ERR("Failed to send control pdu %p", iser_pdu);
238 ret = iser_send_command(iser_conn, iser_pdu);
240 ISER_ERR("Failed to send command pdu %p", iser_pdu);
244 static struct icl_conn *
245 iser_new_conn(const char *name, struct mtx *lock)
247 struct iser_conn *iser_conn;
250 refcount_acquire(&icl_iser_ncons);
252 iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO);
254 ISER_ERR("failed to allocate iser conn");
255 refcount_release(&icl_iser_ncons);
259 cv_init(&iser_conn->up_cv, "iser_cv");
260 sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex");
261 mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "iser_flush_lock", NULL, MTX_DEF);
262 cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv");
263 mtx_init(&iser_conn->ib_conn.lock, "iser_lock", NULL, MTX_DEF);
265 ic = &iser_conn->icl_conn;
268 ic->ic_offload = strdup("iser", M_TEMP);
270 ic->ic_unmapped = true;
276 iser_conn_free(struct icl_conn *ic)
278 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
280 iser_conn_release(ic);
281 mtx_destroy(&iser_conn->ib_conn.lock);
282 cv_destroy(&iser_conn->ib_conn.beacon.flush_cv);
283 mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock);
284 sx_destroy(&iser_conn->state_mutex);
285 cv_destroy(&iser_conn->up_cv);
286 kobj_delete((struct kobj *)iser_conn, M_ICL_ISER);
287 refcount_release(&icl_iser_ncons);
291 iser_conn_handoff(struct icl_conn *ic, int fd)
293 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
296 sx_xlock(&iser_conn->state_mutex);
297 if (iser_conn->state != ISER_CONN_UP) {
299 ISER_ERR("iser_conn %p state is %d, teardown started\n",
300 iser_conn, iser_conn->state);
304 error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags);
308 error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx);
312 iser_conn->handoff_done = true;
314 sx_xunlock(&iser_conn->state_mutex);
318 iser_free_rx_descriptors(iser_conn);
320 sx_xunlock(&iser_conn->state_mutex);
326 * Frees all conn objects
329 iser_conn_release(struct icl_conn *ic)
331 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
332 struct ib_conn *ib_conn = &iser_conn->ib_conn;
333 struct iser_conn *curr, *tmp;
335 mtx_lock(&ig.connlist_mutex);
337 * Search for iser connection in global list.
338 * It may not be there in case of failure in connection establishment
341 list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) {
342 if (iser_conn == curr) {
343 ISER_WARN("found iser_conn %p", iser_conn);
344 list_del(&iser_conn->conn_list);
347 mtx_unlock(&ig.connlist_mutex);
350 * In case we reconnecting or removing session, we need to
351 * release IB resources (which is safe to call more than once).
353 sx_xlock(&iser_conn->state_mutex);
354 iser_free_ib_conn_res(iser_conn, true);
355 sx_xunlock(&iser_conn->state_mutex);
357 if (ib_conn->cma_id != NULL) {
358 rdma_destroy_id(ib_conn->cma_id);
359 ib_conn->cma_id = NULL;
365 iser_conn_close(struct icl_conn *ic)
367 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
369 ISER_INFO("closing conn %p", iser_conn);
371 sx_xlock(&iser_conn->state_mutex);
373 * In case iser connection is waiting on conditional variable
374 * (state PENDING) and we try to close it before connection establishment,
375 * we need to signal it to continue releasing connection properly.
377 if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING)
378 cv_signal(&iser_conn->up_cv);
379 sx_xunlock(&iser_conn->state_mutex);
384 iser_conn_connect(struct icl_conn *ic, int domain, int socktype,
385 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
387 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
388 struct ib_conn *ib_conn = &iser_conn->ib_conn;
391 iser_conn_release(ic);
393 sx_xlock(&iser_conn->state_mutex);
394 /* the device is known only --after-- address resolution */
395 ib_conn->device = NULL;
396 iser_conn->handoff_done = false;
398 iser_conn->state = ISER_CONN_PENDING;
400 ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn,
401 RDMA_PS_TCP, IB_QPT_RC);
402 if (IS_ERR(ib_conn->cma_id)) {
403 err = -PTR_ERR(ib_conn->cma_id);
404 ISER_ERR("rdma_create_id failed: %d", err);
408 err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000);
410 ISER_ERR("rdma_resolve_addr failed: %d", err);
416 ISER_DBG("before cv_wait: %p", iser_conn);
417 cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex);
418 ISER_DBG("after cv_wait: %p", iser_conn);
420 if (iser_conn->state != ISER_CONN_UP) {
425 err = iser_alloc_login_buf(iser_conn);
428 sx_xunlock(&iser_conn->state_mutex);
430 mtx_lock(&ig.connlist_mutex);
431 list_add(&iser_conn->conn_list, &ig.connlist);
432 mtx_unlock(&ig.connlist_mutex);
437 ib_conn->cma_id = NULL;
439 sx_xunlock(&iser_conn->state_mutex);
444 iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
445 struct ccb_scsiio *csio,
446 uint32_t *task_tagp, void **prvp)
448 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
451 iser_pdu->csio = csio;
457 iser_conn_task_done(struct icl_conn *ic, void *prv)
459 struct icl_pdu *ip = prv;
460 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
461 struct iser_device *device = iser_pdu->iser_conn->ib_conn.device;
462 struct iser_tx_desc *tx_desc = &iser_pdu->desc;
464 if (iser_pdu->dir[ISER_DIR_IN]) {
465 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN);
466 iser_dma_unmap_task_data(iser_pdu,
467 &iser_pdu->data[ISER_DIR_IN],
471 if (iser_pdu->dir[ISER_DIR_OUT]) {
472 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT);
473 iser_dma_unmap_task_data(iser_pdu,
474 &iser_pdu->data[ISER_DIR_OUT],
478 if (likely(tx_desc->mapped)) {
479 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
480 ISER_HEADERS_LEN, DMA_TO_DEVICE);
481 tx_desc->mapped = false;
484 iser_pdu_free(ic, ip);
488 iser_limits(struct icl_drv_limits *idl)
491 idl->idl_max_recv_data_segment_length = 128 * 1024;
492 idl->idl_max_send_data_segment_length = 128 * 1024;
493 idl->idl_max_burst_length = 262144;
494 idl->idl_first_burst_length = 65536;
504 ISER_DBG("Starting iSER datamover...");
506 icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu),
507 NULL, NULL, NULL, NULL,
509 /* FIXME: Check rc */
511 refcount_init(&icl_iser_ncons, 0);
513 error = icl_register("iser", true, 0, iser_limits, iser_new_conn);
514 KASSERT(error == 0, ("failed to register iser"));
516 memset(&ig, 0, sizeof(struct iser_global));
518 /* device init is called only after the first addr resolution */
519 sx_init(&ig.device_list_mutex, "global_device_lock");
520 INIT_LIST_HEAD(&ig.device_list);
521 mtx_init(&ig.connlist_mutex, "iser_global_conn_lock", NULL, MTX_DEF);
522 INIT_LIST_HEAD(&ig.connlist);
523 sx_init(&ig.close_conns_mutex, "global_close_conns_lock");
529 icl_iser_unload(void)
531 ISER_DBG("Removing iSER datamover...");
533 if (icl_iser_ncons != 0)
536 sx_destroy(&ig.close_conns_mutex);
537 mtx_destroy(&ig.connlist_mutex);
538 sx_destroy(&ig.device_list_mutex);
540 icl_unregister("iser", true);
542 uma_zdestroy(icl_pdu_zone);
548 icl_iser_modevent(module_t mod, int what, void *arg)
552 return (icl_iser_load());
554 return (icl_iser_unload());
560 moduledata_t icl_iser_data = {
562 .evhand = icl_iser_modevent,
566 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
567 MODULE_DEPEND(icl_iser, icl, 1, 1, 1);
568 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1);
569 MODULE_VERSION(icl_iser, 1);