]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/iser/icl_iser.c
Mark more nodes as CTLFLAG_MPSAFE or CTLFLAG_NEEDGIANT (17 of many)
[FreeBSD/FreeBSD.git] / sys / dev / iser / icl_iser.c
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include "icl_iser.h"
28
29 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
30     "iSER module");
31 int iser_debug = 0;
32 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN,
33     &iser_debug, 0, "Enable iser debug messages");
34
35 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend");
36 static uma_zone_t icl_pdu_zone;
37
38 static volatile u_int   icl_iser_ncons;
39 struct iser_global ig;
40
41 static void iser_conn_release(struct icl_conn *ic);
42
43 static icl_conn_new_pdu_t       iser_conn_new_pdu;
44 static icl_conn_pdu_free_t      iser_conn_pdu_free;
45 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length;
46 static icl_conn_pdu_append_data_t       iser_conn_pdu_append_data;
47 static icl_conn_pdu_queue_t     iser_conn_pdu_queue;
48 static icl_conn_handoff_t       iser_conn_handoff;
49 static icl_conn_free_t          iser_conn_free;
50 static icl_conn_close_t         iser_conn_close;
51 static icl_conn_connect_t       iser_conn_connect;
52 static icl_conn_task_setup_t    iser_conn_task_setup;
53 static icl_conn_task_done_t     iser_conn_task_done;
54 static icl_conn_pdu_get_data_t  iser_conn_pdu_get_data;
55
56 static kobj_method_t icl_iser_methods[] = {
57         KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu),
58         KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free),
59         KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length),
60         KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data),
61         KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue),
62         KOBJMETHOD(icl_conn_handoff, iser_conn_handoff),
63         KOBJMETHOD(icl_conn_free, iser_conn_free),
64         KOBJMETHOD(icl_conn_close, iser_conn_close),
65         KOBJMETHOD(icl_conn_connect, iser_conn_connect),
66         KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup),
67         KOBJMETHOD(icl_conn_task_done, iser_conn_task_done),
68         KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data),
69         { 0, 0 }
70 };
71
72 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn));
73
74 /**
75  * iser_initialize_headers() - Initialize task headers
76  * @pdu:       iser pdu
77  * @iser_conn:    iser connection
78  *
79  * Notes:
80  * This routine may race with iser teardown flow for scsi
81  * error handling TMFs. So for TMF we should acquire the
82  * state mutex to avoid dereferencing the IB device which
83  * may have already been terminated (racing teardown sequence).
84  */
85 int
86 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn)
87 {
88         struct iser_tx_desc *tx_desc = &pdu->desc;
89         struct iser_device *device = iser_conn->ib_conn.device;
90         u64 dma_addr;
91         int ret = 0;
92
93         dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
94                                 ISER_HEADERS_LEN, DMA_TO_DEVICE);
95         if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
96                 ret = -ENOMEM;
97                 goto out;
98         }
99
100         tx_desc->mapped = true;
101         tx_desc->dma_addr = dma_addr;
102         tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
103         tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
104         tx_desc->tx_sg[0].lkey   = device->mr->lkey;
105
106 out:
107
108         return (ret);
109 }
110
111 int
112 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
113                           const void *addr, size_t len, int flags)
114 {
115         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
116
117         if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_LOGIN_REQUEST ||
118             request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_TEXT_REQUEST) {
119                 ISER_DBG("copy to login buff");
120                 memcpy(iser_conn->login_req_buf, addr, len);
121                 request->ip_data_len = len;
122         }
123
124         return (0);
125 }
126
127 void
128 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
129                        size_t off, void *addr, size_t len)
130 {
131         /* If we have a receive data, copy it to upper layer buffer */
132         if (ip->ip_data_mbuf)
133                 memcpy(addr, ip->ip_data_mbuf + off, len);
134 }
135
136 /*
137  * Allocate icl_pdu with empty BHS to fill up by the caller.
138  */
139 struct icl_pdu *
140 iser_new_pdu(struct icl_conn *ic, int flags)
141 {
142         struct icl_iser_pdu *iser_pdu;
143         struct icl_pdu *ip;
144         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
145
146         iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
147         if (iser_pdu == NULL) {
148                 ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu));
149                 return (NULL);
150         }
151
152         iser_pdu->iser_conn = iser_conn;
153         ip = &iser_pdu->icl_pdu;
154         ip->ip_conn = ic;
155         ip->ip_bhs = &iser_pdu->desc.iscsi_header;
156
157         return (ip);
158 }
159
160 struct icl_pdu *
161 iser_conn_new_pdu(struct icl_conn *ic, int flags)
162 {
163         return (iser_new_pdu(ic, flags));
164 }
165
166 void
167 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
168 {
169         struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
170
171         uma_zfree(icl_pdu_zone, iser_pdu);
172 }
173
174 size_t
175 iser_conn_pdu_data_segment_length(struct icl_conn *ic,
176                                   const struct icl_pdu *request)
177 {
178         uint32_t len = 0;
179
180         len += request->ip_bhs->bhs_data_segment_len[0];
181         len <<= 8;
182         len += request->ip_bhs->bhs_data_segment_len[1];
183         len <<= 8;
184         len += request->ip_bhs->bhs_data_segment_len[2];
185
186         return (len);
187 }
188
189 void
190 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
191 {
192         iser_pdu_free(ic, ip);
193 }
194
195 static bool
196 is_control_opcode(uint8_t opcode)
197 {
198         bool is_control = false;
199
200         switch (opcode & ISCSI_OPCODE_MASK) {
201                 case ISCSI_BHS_OPCODE_NOP_OUT:
202                 case ISCSI_BHS_OPCODE_LOGIN_REQUEST:
203                 case ISCSI_BHS_OPCODE_LOGOUT_REQUEST:
204                 case ISCSI_BHS_OPCODE_TEXT_REQUEST:
205                         is_control = true;
206                         break;
207                 case ISCSI_BHS_OPCODE_SCSI_COMMAND:
208                         is_control = false;
209                         break;
210                 default:
211                         ISER_ERR("unknown opcode %d", opcode);
212         }
213
214         return (is_control);
215 }
216
217 void
218 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
219 {
220         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
221         struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
222         int ret;
223
224         if (iser_conn->state != ISER_CONN_UP)
225                 return;
226
227         ret = iser_initialize_headers(iser_pdu, iser_conn);
228         if (ret) {
229                 ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu);
230                 return;
231         }
232
233         if (is_control_opcode(ip->ip_bhs->bhs_opcode)) {
234                 ret = iser_send_control(iser_conn, iser_pdu);
235                 if (unlikely(ret))
236                         ISER_ERR("Failed to send control pdu %p", iser_pdu);
237         } else {
238                 ret = iser_send_command(iser_conn, iser_pdu);
239                 if (unlikely(ret))
240                         ISER_ERR("Failed to send command pdu %p", iser_pdu);
241         }
242 }
243
244 static struct icl_conn *
245 iser_new_conn(const char *name, struct mtx *lock)
246 {
247         struct iser_conn *iser_conn;
248         struct icl_conn *ic;
249
250         refcount_acquire(&icl_iser_ncons);
251
252         iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO);
253         if (!iser_conn) {
254                 ISER_ERR("failed to allocate iser conn");
255                 refcount_release(&icl_iser_ncons);
256                 return (NULL);
257         }
258
259         cv_init(&iser_conn->up_cv, "iser_cv");
260         sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex");
261         mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "iser_flush_lock", NULL, MTX_DEF);
262         cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv");
263         mtx_init(&iser_conn->ib_conn.lock, "iser_lock", NULL, MTX_DEF);
264
265         ic = &iser_conn->icl_conn;
266         ic->ic_lock = lock;
267         ic->ic_name = name;
268         ic->ic_offload = strdup("iser", M_TEMP);
269         ic->ic_iser = true;
270         ic->ic_unmapped = true;
271
272         return (ic);
273 }
274
275 void
276 iser_conn_free(struct icl_conn *ic)
277 {
278         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
279
280         iser_conn_release(ic);
281         mtx_destroy(&iser_conn->ib_conn.lock);
282         cv_destroy(&iser_conn->ib_conn.beacon.flush_cv);
283         mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock);
284         sx_destroy(&iser_conn->state_mutex);
285         cv_destroy(&iser_conn->up_cv);
286         kobj_delete((struct kobj *)iser_conn, M_ICL_ISER);
287         refcount_release(&icl_iser_ncons);
288 }
289
290 int
291 iser_conn_handoff(struct icl_conn *ic, int fd)
292 {
293         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
294         int error = 0;
295
296         sx_xlock(&iser_conn->state_mutex);
297         if (iser_conn->state != ISER_CONN_UP) {
298                 error = EINVAL;
299                 ISER_ERR("iser_conn %p state is %d, teardown started\n",
300                          iser_conn, iser_conn->state);
301                 goto out;
302         }
303
304         error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags);
305         if (error)
306                 goto out;
307
308         error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx);
309         if (error)
310                 goto post_error;
311
312         iser_conn->handoff_done = true;
313
314         sx_xunlock(&iser_conn->state_mutex);
315         return (error);
316
317 post_error:
318         iser_free_rx_descriptors(iser_conn);
319 out:
320         sx_xunlock(&iser_conn->state_mutex);
321         return (error);
322
323 }
324
325 /**
326  * Frees all conn objects
327  */
328 static void
329 iser_conn_release(struct icl_conn *ic)
330 {
331         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
332         struct ib_conn *ib_conn = &iser_conn->ib_conn;
333         struct iser_conn *curr, *tmp;
334
335         mtx_lock(&ig.connlist_mutex);
336         /*
337          * Search for iser connection in global list.
338          * It may not be there in case of failure in connection establishment
339          * stage.
340          */
341         list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) {
342                 if (iser_conn == curr) {
343                         ISER_WARN("found iser_conn %p", iser_conn);
344                         list_del(&iser_conn->conn_list);
345                 }
346         }
347         mtx_unlock(&ig.connlist_mutex);
348
349         /*
350          * In case we reconnecting or removing session, we need to
351          * release IB resources (which is safe to call more than once).
352          */
353         sx_xlock(&iser_conn->state_mutex);
354         iser_free_ib_conn_res(iser_conn, true);
355         sx_xunlock(&iser_conn->state_mutex);
356
357         if (ib_conn->cma_id != NULL) {
358                 rdma_destroy_id(ib_conn->cma_id);
359                 ib_conn->cma_id = NULL;
360         }
361
362 }
363
364 void
365 iser_conn_close(struct icl_conn *ic)
366 {
367         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
368
369         ISER_INFO("closing conn %p", iser_conn);
370
371         sx_xlock(&iser_conn->state_mutex);
372         /*
373          * In case iser connection is waiting on conditional variable
374          * (state PENDING) and we try to close it before connection establishment,
375          * we need to signal it to continue releasing connection properly.
376          */
377         if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING)
378                 cv_signal(&iser_conn->up_cv);
379         sx_xunlock(&iser_conn->state_mutex);
380
381 }
382
383 int
384 iser_conn_connect(struct icl_conn *ic, int domain, int socktype,
385                 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
386 {
387         struct iser_conn *iser_conn = icl_to_iser_conn(ic);
388         struct ib_conn *ib_conn = &iser_conn->ib_conn;
389         int err = 0;
390
391         iser_conn_release(ic);
392
393         sx_xlock(&iser_conn->state_mutex);
394          /* the device is known only --after-- address resolution */
395         ib_conn->device = NULL;
396         iser_conn->handoff_done = false;
397
398         iser_conn->state = ISER_CONN_PENDING;
399
400         ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn,
401                         RDMA_PS_TCP, IB_QPT_RC);
402         if (IS_ERR(ib_conn->cma_id)) {
403                 err = -PTR_ERR(ib_conn->cma_id);
404                 ISER_ERR("rdma_create_id failed: %d", err);
405                 goto id_failure;
406         }
407
408         err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000);
409         if (err) {
410                 ISER_ERR("rdma_resolve_addr failed: %d", err);
411                 if (err < 0)
412                         err = -err;
413                 goto addr_failure;
414         }
415
416         ISER_DBG("before cv_wait: %p", iser_conn);
417         cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex);
418         ISER_DBG("after cv_wait: %p", iser_conn);
419
420         if (iser_conn->state != ISER_CONN_UP) {
421                 err = EIO;
422                 goto addr_failure;
423         }
424
425         err = iser_alloc_login_buf(iser_conn);
426         if (err)
427                 goto addr_failure;
428         sx_xunlock(&iser_conn->state_mutex);
429
430         mtx_lock(&ig.connlist_mutex);
431         list_add(&iser_conn->conn_list, &ig.connlist);
432         mtx_unlock(&ig.connlist_mutex);
433
434         return (0);
435
436 id_failure:
437         ib_conn->cma_id = NULL;
438 addr_failure:
439         sx_xunlock(&iser_conn->state_mutex);
440         return (err);
441 }
442
443 int
444 iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
445                      struct ccb_scsiio *csio,
446                      uint32_t *task_tagp, void **prvp)
447 {
448         struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
449
450         *prvp = ip;
451         iser_pdu->csio = csio;
452
453         return (0);
454 }
455
456 void
457 iser_conn_task_done(struct icl_conn *ic, void *prv)
458 {
459         struct icl_pdu *ip = prv;
460         struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
461         struct iser_device *device = iser_pdu->iser_conn->ib_conn.device;
462         struct iser_tx_desc *tx_desc = &iser_pdu->desc;
463
464         if (iser_pdu->dir[ISER_DIR_IN]) {
465                 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN);
466                 iser_dma_unmap_task_data(iser_pdu,
467                                          &iser_pdu->data[ISER_DIR_IN],
468                                          DMA_FROM_DEVICE);
469         }
470
471         if (iser_pdu->dir[ISER_DIR_OUT]) {
472                 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT);
473                 iser_dma_unmap_task_data(iser_pdu,
474                                          &iser_pdu->data[ISER_DIR_OUT],
475                                          DMA_TO_DEVICE);
476         }
477
478         if (likely(tx_desc->mapped)) {
479                 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
480                                     ISER_HEADERS_LEN, DMA_TO_DEVICE);
481                 tx_desc->mapped = false;
482         }
483
484         iser_pdu_free(ic, ip);
485 }
486
487 static int
488 iser_limits(struct icl_drv_limits *idl)
489 {
490
491         idl->idl_max_recv_data_segment_length = 128 * 1024;
492         idl->idl_max_send_data_segment_length = 128 * 1024;
493         idl->idl_max_burst_length = 262144;
494         idl->idl_first_burst_length = 65536;
495
496         return (0);
497 }
498
499 static int
500 icl_iser_load(void)
501 {
502         int error;
503
504         ISER_DBG("Starting iSER datamover...");
505
506         icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu),
507                                    NULL, NULL, NULL, NULL,
508                                    UMA_ALIGN_PTR, 0);
509         /* FIXME: Check rc */
510
511         refcount_init(&icl_iser_ncons, 0);
512
513         error = icl_register("iser", true, 0, iser_limits, iser_new_conn);
514         KASSERT(error == 0, ("failed to register iser"));
515
516         memset(&ig, 0, sizeof(struct iser_global));
517
518         /* device init is called only after the first addr resolution */
519         sx_init(&ig.device_list_mutex,  "global_device_lock");
520         INIT_LIST_HEAD(&ig.device_list);
521         mtx_init(&ig.connlist_mutex, "iser_global_conn_lock", NULL, MTX_DEF);
522         INIT_LIST_HEAD(&ig.connlist);
523         sx_init(&ig.close_conns_mutex,  "global_close_conns_lock");
524
525         return (error);
526 }
527
528 static int
529 icl_iser_unload(void)
530 {
531         ISER_DBG("Removing iSER datamover...");
532
533         if (icl_iser_ncons != 0)
534                 return (EBUSY);
535
536         sx_destroy(&ig.close_conns_mutex);
537         mtx_destroy(&ig.connlist_mutex);
538         sx_destroy(&ig.device_list_mutex);
539
540         icl_unregister("iser", true);
541
542         uma_zdestroy(icl_pdu_zone);
543
544         return (0);
545 }
546
547 static int
548 icl_iser_modevent(module_t mod, int what, void *arg)
549 {
550         switch (what) {
551         case MOD_LOAD:
552                 return (icl_iser_load());
553         case MOD_UNLOAD:
554                 return (icl_iser_unload());
555         default:
556                 return (EINVAL);
557         }
558 }
559
560 moduledata_t icl_iser_data = {
561         .name = "icl_iser",
562         .evhand = icl_iser_modevent,
563         .priv = 0
564 };
565
566 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
567 MODULE_DEPEND(icl_iser, icl, 1, 1, 1);
568 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1);
569 MODULE_DEPEND(icl_iser, linuxkpi, 1, 1, 1);
570 MODULE_VERSION(icl_iser, 1);