]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/iser/icl_iser.c
Upgrade to Unbound 1.5.10.
[FreeBSD/FreeBSD.git] / sys / dev / iser / icl_iser.c
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include "icl_iser.h"
28
29 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW, 0, "iSER module");
30 int iser_debug = 0;
31 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN,
32     &iser_debug, 0, "Enable iser debug messages");
33
34 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend");
35 static uma_zone_t icl_pdu_zone;
36
37 static volatile u_int   icl_iser_ncons;
38 struct iser_global ig;
39
40 static void iser_conn_release(struct icl_conn *ic);
41
42 static icl_conn_new_pdu_t       iser_conn_new_pdu;
43 static icl_conn_pdu_free_t      iser_conn_pdu_free;
44 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length;
45 static icl_conn_pdu_append_data_t       iser_conn_pdu_append_data;
46 static icl_conn_pdu_queue_t     iser_conn_pdu_queue;
47 static icl_conn_handoff_t       iser_conn_handoff;
48 static icl_conn_free_t          iser_conn_free;
49 static icl_conn_close_t         iser_conn_close;
50 static icl_conn_connect_t       iser_conn_connect;
51 static icl_conn_task_setup_t    iser_conn_task_setup;
52 static icl_conn_task_done_t     iser_conn_task_done;
53 static icl_conn_pdu_get_data_t  iser_conn_pdu_get_data;
54
55 static kobj_method_t icl_iser_methods[] = {
56         KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu),
57         KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free),
58         KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length),
59         KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data),
60         KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue),
61         KOBJMETHOD(icl_conn_handoff, iser_conn_handoff),
62         KOBJMETHOD(icl_conn_free, iser_conn_free),
63         KOBJMETHOD(icl_conn_close, iser_conn_close),
64         KOBJMETHOD(icl_conn_connect, iser_conn_connect),
65         KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup),
66         KOBJMETHOD(icl_conn_task_done, iser_conn_task_done),
67         KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data),
68         { 0, 0 }
69 };
70
71 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn));
72
73 /**
74  * iser_initialize_headers() - Initialize task headers
75  * @pdu:       iser pdu
76  * @iser_conn:    iser connection
77  *
78  * Notes:
79  * This routine may race with iser teardown flow for scsi
80  * error handling TMFs. So for TMF we should acquire the
81  * state mutex to avoid dereferencing the IB device which
82  * may have already been terminated (racing teardown sequence).
83  */
84 int
85 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn)
86 {
87         struct iser_tx_desc *tx_desc = &pdu->desc;
88         struct iser_device *device = iser_conn->ib_conn.device;
89         u64 dma_addr;
90         int ret = 0;
91
92         dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
93                                 ISER_HEADERS_LEN, DMA_TO_DEVICE);
94         if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
95                 ret = -ENOMEM;
96                 goto out;
97         }
98
99         tx_desc->mapped = true;
100         tx_desc->dma_addr = dma_addr;
101         tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
102         tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
103         tx_desc->tx_sg[0].lkey   = device->mr->lkey;
104
105 out:
106
107         return (ret);
108 }
109
110 int
111 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
112                           const void *addr, size_t len, int flags)
113 {
114         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
115
116         if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_LOGIN_REQUEST ||
117             request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_TEXT_REQUEST) {
118                 ISER_DBG("copy to login buff");
119                 memcpy(iser_conn->login_req_buf, addr, len);
120                 request->ip_data_len = len;
121         }
122
123         return (0);
124 }
125
126 void
127 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
128                        size_t off, void *addr, size_t len)
129 {
130         /* If we have a receive data, copy it to upper layer buffer */
131         if (ip->ip_data_mbuf)
132                 memcpy(addr, ip->ip_data_mbuf + off, len);
133 }
134
135 /*
136  * Allocate icl_pdu with empty BHS to fill up by the caller.
137  */
138 struct icl_pdu *
139 iser_new_pdu(struct icl_conn *ic, int flags)
140 {
141         struct icl_iser_pdu *iser_pdu;
142         struct icl_pdu *ip;
143         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
144
145         iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
146         if (iser_pdu == NULL) {
147                 ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu));
148                 return (NULL);
149         }
150
151         iser_pdu->iser_conn = iser_conn;
152         ip = &iser_pdu->icl_pdu;
153         ip->ip_conn = ic;
154         ip->ip_bhs = &iser_pdu->desc.iscsi_header;
155
156         return (ip);
157 }
158
159 struct icl_pdu *
160 iser_conn_new_pdu(struct icl_conn *ic, int flags)
161 {
162         return (iser_new_pdu(ic, flags));
163 }
164
165 void
166 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
167 {
168         struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
169
170         uma_zfree(icl_pdu_zone, iser_pdu);
171 }
172
173 size_t
174 iser_conn_pdu_data_segment_length(struct icl_conn *ic,
175                                   const struct icl_pdu *request)
176 {
177         uint32_t len = 0;
178
179         len += request->ip_bhs->bhs_data_segment_len[0];
180         len <<= 8;
181         len += request->ip_bhs->bhs_data_segment_len[1];
182         len <<= 8;
183         len += request->ip_bhs->bhs_data_segment_len[2];
184
185         return (len);
186 }
187
188 void
189 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
190 {
191         iser_pdu_free(ic, ip);
192 }
193
194 static bool
195 is_control_opcode(uint8_t opcode)
196 {
197         bool is_control = false;
198
199         switch (opcode & ISCSI_OPCODE_MASK) {
200                 case ISCSI_BHS_OPCODE_NOP_OUT:
201                 case ISCSI_BHS_OPCODE_LOGIN_REQUEST:
202                 case ISCSI_BHS_OPCODE_LOGOUT_REQUEST:
203                 case ISCSI_BHS_OPCODE_TEXT_REQUEST:
204                         is_control = true;
205                         break;
206                 case ISCSI_BHS_OPCODE_SCSI_COMMAND:
207                         is_control = false;
208                         break;
209                 default:
210                         ISER_ERR("unknown opcode %d", opcode);
211         }
212
213         return (is_control);
214 }
215
216 void
217 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
218 {
219         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
220         struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
221         int ret;
222
223         if (iser_conn->state != ISER_CONN_UP)
224                 return;
225
226         ret = iser_initialize_headers(iser_pdu, iser_conn);
227         if (ret) {
228                 ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu);
229                 return;
230         }
231
232         if (is_control_opcode(ip->ip_bhs->bhs_opcode)) {
233                 ret = iser_send_control(iser_conn, iser_pdu);
234                 if (unlikely(ret))
235                         ISER_ERR("Failed to send control pdu %p", iser_pdu);
236         } else {
237                 ret = iser_send_command(iser_conn, iser_pdu);
238                 if (unlikely(ret))
239                         ISER_ERR("Failed to send command pdu %p", iser_pdu);
240         }
241 }
242
243 static struct icl_conn *
244 iser_new_conn(const char *name, struct mtx *lock)
245 {
246         struct iser_conn *iser_conn;
247         struct icl_conn *ic;
248
249         refcount_acquire(&icl_iser_ncons);
250
251         iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO);
252         if (!iser_conn) {
253                 ISER_ERR("failed to allocate iser conn");
254                 refcount_release(&icl_iser_ncons);
255                 return (NULL);
256         }
257
258         cv_init(&iser_conn->up_cv, "iser_cv");
259         sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex");
260         mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "flush_lock", NULL, MTX_DEF);
261         cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv");
262         mtx_init(&iser_conn->ib_conn.lock, "lock", NULL, MTX_DEF);
263
264         ic = &iser_conn->icl_conn;
265         ic->ic_lock = lock;
266         ic->ic_name = name;
267         ic->ic_offload = strdup("iser", M_TEMP);
268         ic->ic_iser = true;
269         ic->ic_unmapped = true;
270
271         return (ic);
272 }
273
274 void
275 iser_conn_free(struct icl_conn *ic)
276 {
277         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
278
279         iser_conn_release(ic);
280         cv_destroy(&iser_conn->ib_conn.beacon.flush_cv);
281         mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock);
282         sx_destroy(&iser_conn->state_mutex);
283         cv_destroy(&iser_conn->up_cv);
284         kobj_delete((struct kobj *)iser_conn, M_ICL_ISER);
285         refcount_release(&icl_iser_ncons);
286 }
287
288 int
289 iser_conn_handoff(struct icl_conn *ic, int fd)
290 {
291         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
292         int error = 0;
293
294         sx_xlock(&iser_conn->state_mutex);
295         if (iser_conn->state != ISER_CONN_UP) {
296                 error = EINVAL;
297                 ISER_ERR("iser_conn %p state is %d, teardown started\n",
298                          iser_conn, iser_conn->state);
299                 goto out;
300         }
301
302         error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags);
303         if (error)
304                 goto out;
305
306         error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx);
307         if (error)
308                 goto post_error;
309
310         iser_conn->handoff_done = true;
311
312         sx_xunlock(&iser_conn->state_mutex);
313         return (error);
314
315 post_error:
316         iser_free_rx_descriptors(iser_conn);
317 out:
318         sx_xunlock(&iser_conn->state_mutex);
319         return (error);
320
321 }
322
323 /**
324  * Frees all conn objects
325  */
326 static void
327 iser_conn_release(struct icl_conn *ic)
328 {
329         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
330         struct ib_conn *ib_conn = &iser_conn->ib_conn;
331         struct iser_conn *curr, *tmp;
332
333         mtx_lock(&ig.connlist_mutex);
334         /*
335          * Search for iser connection in global list.
336          * It may not be there in case of failure in connection establishment
337          * stage.
338          */
339         list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) {
340                 if (iser_conn == curr) {
341                         ISER_WARN("found iser_conn %p", iser_conn);
342                         list_del(&iser_conn->conn_list);
343                 }
344         }
345         mtx_unlock(&ig.connlist_mutex);
346
347         /*
348          * In case we reconnecting or removing session, we need to
349          * release IB resources (which is safe to call more than once).
350          */
351         sx_xlock(&iser_conn->state_mutex);
352         iser_free_ib_conn_res(iser_conn, true);
353         sx_xunlock(&iser_conn->state_mutex);
354
355         if (ib_conn->cma_id != NULL) {
356                 rdma_destroy_id(ib_conn->cma_id);
357                 ib_conn->cma_id = NULL;
358         }
359
360 }
361
362 void
363 iser_conn_close(struct icl_conn *ic)
364 {
365         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
366
367         ISER_INFO("closing conn %p", iser_conn);
368
369         sx_xlock(&iser_conn->state_mutex);
370         /*
371          * In case iser connection is waiting on conditional variable
372          * (state PENDING) and we try to close it before connection establishment,
373          * we need to signal it to continue releasing connection properly.
374          */
375         if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING)
376                 cv_signal(&iser_conn->up_cv);
377         sx_xunlock(&iser_conn->state_mutex);
378
379 }
380
381 int
382 iser_conn_connect(struct icl_conn *ic, int domain, int socktype,
383                 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
384 {
385         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
386         struct ib_conn *ib_conn = &iser_conn->ib_conn;
387         int err = 0;
388
389         iser_conn_release(ic);
390
391         sx_xlock(&iser_conn->state_mutex);
392          /* the device is known only --after-- address resolution */
393         ib_conn->device = NULL;
394         iser_conn->handoff_done = false;
395
396         iser_conn->state = ISER_CONN_PENDING;
397
398         ib_conn->cma_id = rdma_create_id(iser_cma_handler, (void *)iser_conn,
399                         RDMA_PS_TCP, IB_QPT_RC);
400         if (IS_ERR(ib_conn->cma_id)) {
401                 err = -PTR_ERR(ib_conn->cma_id);
402                 ISER_ERR("rdma_create_id failed: %d", err);
403                 goto id_failure;
404         }
405
406         err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000);
407         if (err) {
408                 ISER_ERR("rdma_resolve_addr failed: %d", err);
409                 if (err < 0)
410                         err = -err;
411                 goto addr_failure;
412         }
413
414         ISER_DBG("before cv_wait: %p", iser_conn);
415         cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex);
416         ISER_DBG("after cv_wait: %p", iser_conn);
417
418         if (iser_conn->state != ISER_CONN_UP) {
419                 err = EIO;
420                 goto addr_failure;
421         }
422
423         err = iser_alloc_login_buf(iser_conn);
424         if (err)
425                 goto addr_failure;
426         sx_xunlock(&iser_conn->state_mutex);
427
428         mtx_lock(&ig.connlist_mutex);
429         list_add(&iser_conn->conn_list, &ig.connlist);
430         mtx_unlock(&ig.connlist_mutex);
431
432         return (0);
433
434 id_failure:
435         ib_conn->cma_id = NULL;
436 addr_failure:
437         sx_xunlock(&iser_conn->state_mutex);
438         return (err);
439 }
440
441 int
442 iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
443                      struct ccb_scsiio *csio,
444                      uint32_t *task_tagp, void **prvp)
445 {
446         struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
447
448         *prvp = ip;
449         iser_pdu->csio = csio;
450
451         return (0);
452 }
453
454 void
455 iser_conn_task_done(struct icl_conn *ic, void *prv)
456 {
457         struct icl_pdu *ip = prv;
458         struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
459         struct iser_device *device = iser_pdu->iser_conn->ib_conn.device;
460         struct iser_tx_desc *tx_desc = &iser_pdu->desc;
461
462         if (iser_pdu->dir[ISER_DIR_IN]) {
463                 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN);
464                 iser_dma_unmap_task_data(iser_pdu,
465                                          &iser_pdu->data[ISER_DIR_IN],
466                                          DMA_FROM_DEVICE);
467         }
468
469         if (iser_pdu->dir[ISER_DIR_OUT]) {
470                 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT);
471                 iser_dma_unmap_task_data(iser_pdu,
472                                          &iser_pdu->data[ISER_DIR_OUT],
473                                          DMA_TO_DEVICE);
474         }
475
476         if (likely(tx_desc->mapped)) {
477                 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
478                                     ISER_HEADERS_LEN, DMA_TO_DEVICE);
479                 tx_desc->mapped = false;
480         }
481
482         iser_pdu_free(ic, ip);
483 }
484
485 static int
486 iser_limits(struct icl_drv_limits *idl)
487 {
488         idl->idl_max_recv_data_segment_length = 128 * 1024;
489
490         return (0);
491 }
492
493 static int
494 icl_iser_load(void)
495 {
496         int error;
497
498         ISER_DBG("Starting iSER datamover...");
499
500         icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu),
501                                    NULL, NULL, NULL, NULL,
502                                    UMA_ALIGN_PTR, 0);
503         /* FIXME: Check rc */
504
505         refcount_init(&icl_iser_ncons, 0);
506
507         error = icl_register("iser", true, 0, iser_limits, iser_new_conn);
508         KASSERT(error == 0, ("failed to register iser"));
509
510         memset(&ig, 0, sizeof(struct iser_global));
511
512         /* device init is called only after the first addr resolution */
513         sx_init(&ig.device_list_mutex,  "global_device_lock");
514         INIT_LIST_HEAD(&ig.device_list);
515         mtx_init(&ig.connlist_mutex, "global_conn_lock", NULL, MTX_DEF);
516         INIT_LIST_HEAD(&ig.connlist);
517         sx_init(&ig.close_conns_mutex,  "global_close_conns_lock");
518
519         return (error);
520 }
521
522 static int
523 icl_iser_unload(void)
524 {
525         ISER_DBG("Removing iSER datamover...");
526
527         if (icl_iser_ncons != 0)
528                 return (EBUSY);
529
530         sx_destroy(&ig.close_conns_mutex);
531         mtx_destroy(&ig.connlist_mutex);
532         sx_destroy(&ig.device_list_mutex);
533
534         icl_unregister("iser", true);
535
536         uma_zdestroy(icl_pdu_zone);
537
538         return (0);
539 }
540
541 static int
542 icl_iser_modevent(module_t mod, int what, void *arg)
543 {
544         switch (what) {
545         case MOD_LOAD:
546                 return (icl_iser_load());
547         case MOD_UNLOAD:
548                 return (icl_iser_unload());
549         default:
550                 return (EINVAL);
551         }
552 }
553
554 moduledata_t icl_iser_data = {
555         .name = "icl_iser",
556         .evhand = icl_iser_modevent,
557         .priv = 0
558 };
559
560 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
561 MODULE_DEPEND(icl_iser, icl, 1, 1, 1);
562 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1);
563 MODULE_DEPEND(icl_iser, linuxkpi, 1, 1, 1);
564 MODULE_VERSION(icl_iser, 1);