]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/iscsi/icl_soft.c
MFV r349134:
[FreeBSD/FreeBSD.git] / sys / dev / iscsi / icl_soft.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2012 The FreeBSD Foundation
5  * All rights reserved.
6  *
7  * This software was developed by Edward Tomasz Napierala under sponsorship
8  * from the FreeBSD Foundation.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  */
32
33 /*
34  * Software implementation of iSCSI Common Layer kobj(9) interface.
35  */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39
40 #include <sys/param.h>
41 #include <sys/capsicum.h>
42 #include <sys/condvar.h>
43 #include <sys/conf.h>
44 #include <sys/file.h>
45 #include <sys/kernel.h>
46 #include <sys/kthread.h>
47 #include <sys/lock.h>
48 #include <sys/mbuf.h>
49 #include <sys/mutex.h>
50 #include <sys/module.h>
51 #include <sys/protosw.h>
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/sysctl.h>
55 #include <sys/systm.h>
56 #include <sys/sx.h>
57 #include <sys/uio.h>
58 #include <vm/uma.h>
59 #include <netinet/in.h>
60 #include <netinet/tcp.h>
61
62 #include <dev/iscsi/icl.h>
63 #include <dev/iscsi/iscsi_proto.h>
64 #include <icl_conn_if.h>
65
66 static int coalesce = 1;
67 SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN,
68     &coalesce, 0, "Try to coalesce PDUs before sending");
69 static int partial_receive_len = 128 * 1024;
70 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
71     &partial_receive_len, 0, "Minimum read size for partially received "
72     "data segment");
73 static int sendspace = 1048576;
74 SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN,
75     &sendspace, 0, "Default send socket buffer size");
76 static int recvspace = 1048576;
77 SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN,
78     &recvspace, 0, "Default receive socket buffer size");
79
80 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend");
81 static uma_zone_t icl_pdu_zone;
82
83 static volatile u_int   icl_ncons;
84
85 #define ICL_CONN_LOCK(X)                mtx_lock(X->ic_lock)
86 #define ICL_CONN_UNLOCK(X)              mtx_unlock(X->ic_lock)
87 #define ICL_CONN_LOCK_ASSERT(X)         mtx_assert(X->ic_lock, MA_OWNED)
88 #define ICL_CONN_LOCK_ASSERT_NOT(X)     mtx_assert(X->ic_lock, MA_NOTOWNED)
89
90 STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
91
92 static icl_conn_new_pdu_t       icl_soft_conn_new_pdu;
93 static icl_conn_pdu_free_t      icl_soft_conn_pdu_free;
94 static icl_conn_pdu_data_segment_length_t
95                                     icl_soft_conn_pdu_data_segment_length;
96 static icl_conn_pdu_append_data_t       icl_soft_conn_pdu_append_data;
97 static icl_conn_pdu_get_data_t  icl_soft_conn_pdu_get_data;
98 static icl_conn_pdu_queue_t     icl_soft_conn_pdu_queue;
99 static icl_conn_handoff_t       icl_soft_conn_handoff;
100 static icl_conn_free_t          icl_soft_conn_free;
101 static icl_conn_close_t         icl_soft_conn_close;
102 static icl_conn_task_setup_t    icl_soft_conn_task_setup;
103 static icl_conn_task_done_t     icl_soft_conn_task_done;
104 static icl_conn_transfer_setup_t        icl_soft_conn_transfer_setup;
105 static icl_conn_transfer_done_t icl_soft_conn_transfer_done;
106 #ifdef ICL_KERNEL_PROXY
107 static icl_conn_connect_t       icl_soft_conn_connect;
108 #endif
109
110 static kobj_method_t icl_soft_methods[] = {
111         KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu),
112         KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free),
113         KOBJMETHOD(icl_conn_pdu_data_segment_length,
114             icl_soft_conn_pdu_data_segment_length),
115         KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data),
116         KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data),
117         KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue),
118         KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff),
119         KOBJMETHOD(icl_conn_free, icl_soft_conn_free),
120         KOBJMETHOD(icl_conn_close, icl_soft_conn_close),
121         KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup),
122         KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done),
123         KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup),
124         KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done),
125 #ifdef ICL_KERNEL_PROXY
126         KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect),
127 #endif
128         { 0, 0 }
129 };
130
131 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn));
132
133 static void
134 icl_conn_fail(struct icl_conn *ic)
135 {
136         if (ic->ic_socket == NULL)
137                 return;
138
139         /*
140          * XXX
141          */
142         ic->ic_socket->so_error = EDOOFUS;
143         (ic->ic_error)(ic);
144 }
145
146 static struct mbuf *
147 icl_conn_receive(struct icl_conn *ic, size_t len)
148 {
149         struct uio uio;
150         struct socket *so;
151         struct mbuf *m;
152         int error, flags;
153
154         so = ic->ic_socket;
155
156         memset(&uio, 0, sizeof(uio));
157         uio.uio_resid = len;
158
159         flags = MSG_DONTWAIT;
160         error = soreceive(so, NULL, &uio, &m, NULL, &flags);
161         if (error != 0) {
162                 ICL_DEBUG("soreceive error %d", error);
163                 return (NULL);
164         }
165         if (uio.uio_resid != 0) {
166                 m_freem(m);
167                 ICL_DEBUG("short read");
168                 return (NULL);
169         }
170
171         return (m);
172 }
173
174 static int
175 icl_conn_receive_buf(struct icl_conn *ic, void *buf, size_t len)
176 {
177         struct iovec iov[1];
178         struct uio uio;
179         struct socket *so;
180         int error, flags;
181
182         so = ic->ic_socket;
183
184         memset(&uio, 0, sizeof(uio));
185         iov[0].iov_base = buf;
186         iov[0].iov_len = len;
187         uio.uio_iov = iov;
188         uio.uio_iovcnt = 1;
189         uio.uio_offset = 0;
190         uio.uio_resid = len;
191         uio.uio_segflg = UIO_SYSSPACE;
192         uio.uio_rw = UIO_READ;
193
194         flags = MSG_DONTWAIT;
195         error = soreceive(so, NULL, &uio, NULL, NULL, &flags);
196         if (error != 0) {
197                 ICL_DEBUG("soreceive error %d", error);
198                 return (-1);
199         }
200         if (uio.uio_resid != 0) {
201                 ICL_DEBUG("short read");
202                 return (-1);
203         }
204
205         return (0);
206 }
207
208 static void
209 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
210 {
211
212         m_freem(ip->ip_bhs_mbuf);
213         m_freem(ip->ip_ahs_mbuf);
214         m_freem(ip->ip_data_mbuf);
215         uma_zfree(icl_pdu_zone, ip);
216 #ifdef DIAGNOSTIC
217         refcount_release(&ic->ic_outstanding_pdus);
218 #endif
219 }
220
221 /*
222  * Allocate icl_pdu with empty BHS to fill up by the caller.
223  */
224 struct icl_pdu *
225 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags)
226 {
227         struct icl_pdu *ip;
228
229 #ifdef DIAGNOSTIC
230         refcount_acquire(&ic->ic_outstanding_pdus);
231 #endif
232         ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
233         if (ip == NULL) {
234                 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
235 #ifdef DIAGNOSTIC
236                 refcount_release(&ic->ic_outstanding_pdus);
237 #endif
238                 return (NULL);
239         }
240         ip->ip_conn = ic;
241
242         CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN);
243         ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA);
244         if (ip->ip_bhs_mbuf == NULL) {
245                 ICL_WARN("failed to allocate BHS mbuf");
246                 icl_soft_conn_pdu_free(ic, ip);
247                 return (NULL);
248         }
249         ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
250         memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
251         ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
252
253         return (ip);
254 }
255
256 static int
257 icl_pdu_ahs_length(const struct icl_pdu *request)
258 {
259
260         return (request->ip_bhs->bhs_total_ahs_len * 4);
261 }
262
263 static size_t
264 icl_pdu_data_segment_length(const struct icl_pdu *request)
265 {
266         uint32_t len = 0;
267
268         len += request->ip_bhs->bhs_data_segment_len[0];
269         len <<= 8;
270         len += request->ip_bhs->bhs_data_segment_len[1];
271         len <<= 8;
272         len += request->ip_bhs->bhs_data_segment_len[2];
273
274         return (len);
275 }
276
277 size_t
278 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic,
279     const struct icl_pdu *request)
280 {
281
282         return (icl_pdu_data_segment_length(request));
283 }
284
285 static void
286 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
287 {
288
289         response->ip_bhs->bhs_data_segment_len[2] = len;
290         response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
291         response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
292 }
293
294 static size_t
295 icl_pdu_padding(const struct icl_pdu *ip)
296 {
297
298         if ((ip->ip_data_len % 4) != 0)
299                 return (4 - (ip->ip_data_len % 4));
300
301         return (0);
302 }
303
304 static size_t
305 icl_pdu_size(const struct icl_pdu *response)
306 {
307         size_t len;
308
309         KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
310
311         len = sizeof(struct iscsi_bhs) + response->ip_data_len +
312             icl_pdu_padding(response);
313         if (response->ip_conn->ic_header_crc32c)
314                 len += ISCSI_HEADER_DIGEST_SIZE;
315         if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
316                 len += ISCSI_DATA_DIGEST_SIZE;
317
318         return (len);
319 }
320
321 static int
322 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep)
323 {
324
325         if (icl_conn_receive_buf(request->ip_conn,
326             request->ip_bhs, sizeof(struct iscsi_bhs))) {
327                 ICL_DEBUG("failed to receive BHS");
328                 return (-1);
329         }
330
331         *availablep -= sizeof(struct iscsi_bhs);
332         return (0);
333 }
334
335 static int
336 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep)
337 {
338
339         request->ip_ahs_len = icl_pdu_ahs_length(request);
340         if (request->ip_ahs_len == 0)
341                 return (0);
342
343         request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn,
344             request->ip_ahs_len);
345         if (request->ip_ahs_mbuf == NULL) {
346                 ICL_DEBUG("failed to receive AHS");
347                 return (-1);
348         }
349
350         *availablep -= request->ip_ahs_len;
351         return (0);
352 }
353
354 static uint32_t
355 icl_mbuf_to_crc32c(const struct mbuf *m0)
356 {
357         uint32_t digest = 0xffffffff;
358         const struct mbuf *m;
359
360         for (m = m0; m != NULL; m = m->m_next)
361                 digest = calculate_crc32c(digest,
362                     mtod(m, const void *), m->m_len);
363
364         digest = digest ^ 0xffffffff;
365
366         return (digest);
367 }
368
369 static int
370 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep)
371 {
372         uint32_t received_digest, valid_digest;
373
374         if (request->ip_conn->ic_header_crc32c == false)
375                 return (0);
376
377         CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
378         if (icl_conn_receive_buf(request->ip_conn,
379             &received_digest, ISCSI_HEADER_DIGEST_SIZE)) {
380                 ICL_DEBUG("failed to receive header digest");
381                 return (-1);
382         }
383         *availablep -= ISCSI_HEADER_DIGEST_SIZE;
384
385         /* Temporary attach AHS to BHS to calculate header digest. */
386         request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf;
387         valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
388         request->ip_bhs_mbuf->m_next = NULL;
389         if (received_digest != valid_digest) {
390                 ICL_WARN("header digest check failed; got 0x%x, "
391                     "should be 0x%x", received_digest, valid_digest);
392                 return (-1);
393         }
394
395         return (0);
396 }
397
398 /*
399  * Return the number of bytes that should be waiting in the receive socket
400  * before icl_pdu_receive_data_segment() gets called.
401  */
402 static size_t
403 icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
404 {
405         size_t len;
406
407         len = icl_pdu_data_segment_length(request);
408         if (len == 0)
409                 return (0);
410
411         /*
412          * Account for the parts of data segment already read from
413          * the socket buffer.
414          */
415         KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
416         len -= request->ip_data_len;
417
418         /*
419          * Don't always wait for the full data segment to be delivered
420          * to the socket; this might badly affect performance due to
421          * TCP window scaling.
422          */
423         if (len > partial_receive_len) {
424 #if 0
425                 ICL_DEBUG("need %zd bytes of data, limiting to %zd",
426                     len, partial_receive_len));
427 #endif
428                 len = partial_receive_len;
429
430                 return (len);
431         }
432
433         /*
434          * Account for padding.  Note that due to the way code is written,
435          * the icl_pdu_receive_data_segment() must always receive padding
436          * along with the last part of data segment, because it would be
437          * impossible to tell whether we've already received the full data
438          * segment including padding, or without it.
439          */
440         if ((len % 4) != 0)
441                 len += 4 - (len % 4);
442
443 #if 0
444         ICL_DEBUG("need %zd bytes of data", len));
445 #endif
446
447         return (len);
448 }
449
450 static int
451 icl_pdu_receive_data_segment(struct icl_pdu *request,
452     size_t *availablep, bool *more_neededp)
453 {
454         struct icl_conn *ic;
455         size_t len, padding = 0;
456         struct mbuf *m;
457
458         ic = request->ip_conn;
459
460         *more_neededp = false;
461         ic->ic_receive_len = 0;
462
463         len = icl_pdu_data_segment_length(request);
464         if (len == 0)
465                 return (0);
466
467         if ((len % 4) != 0)
468                 padding = 4 - (len % 4);
469
470         /*
471          * Account for already received parts of data segment.
472          */
473         KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
474         len -= request->ip_data_len;
475
476         if (len + padding > *availablep) {
477                 /*
478                  * Not enough data in the socket buffer.  Receive as much
479                  * as we can.  Don't receive padding, since, obviously, it's
480                  * not the end of data segment yet.
481                  */
482 #if 0
483                 ICL_DEBUG("limited from %zd to %zd",
484                     len + padding, *availablep - padding));
485 #endif
486                 len = *availablep - padding;
487                 *more_neededp = true;
488                 padding = 0;
489         }
490
491         /*
492          * Must not try to receive padding without at least one byte
493          * of actual data segment.
494          */
495         if (len > 0) {
496                 m = icl_conn_receive(request->ip_conn, len + padding);
497                 if (m == NULL) {
498                         ICL_DEBUG("failed to receive data segment");
499                         return (-1);
500                 }
501
502                 if (request->ip_data_mbuf == NULL)
503                         request->ip_data_mbuf = m;
504                 else
505                         m_cat(request->ip_data_mbuf, m);
506
507                 request->ip_data_len += len;
508                 *availablep -= len + padding;
509         } else
510                 ICL_DEBUG("len 0");
511
512         if (*more_neededp)
513                 ic->ic_receive_len =
514                     icl_pdu_data_segment_receive_len(request);
515
516         return (0);
517 }
518
519 static int
520 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep)
521 {
522         uint32_t received_digest, valid_digest;
523
524         if (request->ip_conn->ic_data_crc32c == false)
525                 return (0);
526
527         if (request->ip_data_len == 0)
528                 return (0);
529
530         CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
531         if (icl_conn_receive_buf(request->ip_conn,
532             &received_digest, ISCSI_DATA_DIGEST_SIZE)) {
533                 ICL_DEBUG("failed to receive data digest");
534                 return (-1);
535         }
536         *availablep -= ISCSI_DATA_DIGEST_SIZE;
537
538         /*
539          * Note that ip_data_mbuf also contains padding; since digest
540          * calculation is supposed to include that, we iterate over
541          * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
542          */
543         valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
544         if (received_digest != valid_digest) {
545                 ICL_WARN("data digest check failed; got 0x%x, "
546                     "should be 0x%x", received_digest, valid_digest);
547                 return (-1);
548         }
549
550         return (0);
551 }
552
553 /*
554  * Somewhat contrary to the name, this attempts to receive only one
555  * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
556  */
557 static struct icl_pdu *
558 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep)
559 {
560         struct icl_pdu *request;
561         struct socket *so;
562         size_t len;
563         int error;
564         bool more_needed;
565
566         so = ic->ic_socket;
567
568         if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
569                 KASSERT(ic->ic_receive_pdu == NULL,
570                     ("ic->ic_receive_pdu != NULL"));
571                 request = icl_soft_conn_new_pdu(ic, M_NOWAIT);
572                 if (request == NULL) {
573                         ICL_DEBUG("failed to allocate PDU; "
574                             "dropping connection");
575                         icl_conn_fail(ic);
576                         return (NULL);
577                 }
578                 ic->ic_receive_pdu = request;
579         } else {
580                 KASSERT(ic->ic_receive_pdu != NULL,
581                     ("ic->ic_receive_pdu == NULL"));
582                 request = ic->ic_receive_pdu;
583         }
584
585         if (*availablep < ic->ic_receive_len) {
586 #if 0
587                 ICL_DEBUG("not enough data; need %zd, "
588                     "have %zd", ic->ic_receive_len, *availablep);
589 #endif
590                 return (NULL);
591         }
592
593         switch (ic->ic_receive_state) {
594         case ICL_CONN_STATE_BHS:
595                 //ICL_DEBUG("receiving BHS");
596                 error = icl_pdu_receive_bhs(request, availablep);
597                 if (error != 0) {
598                         ICL_DEBUG("failed to receive BHS; "
599                             "dropping connection");
600                         break;
601                 }
602
603                 /*
604                  * We don't enforce any limit for AHS length;
605                  * its length is stored in 8 bit field.
606                  */
607
608                 len = icl_pdu_data_segment_length(request);
609                 if (len > ic->ic_max_data_segment_length) {
610                         ICL_WARN("received data segment "
611                             "length %zd is larger than negotiated "
612                             "MaxDataSegmentLength %zd; "
613                             "dropping connection",
614                             len, ic->ic_max_data_segment_length);
615                         error = EINVAL;
616                         break;
617                 }
618
619                 ic->ic_receive_state = ICL_CONN_STATE_AHS;
620                 ic->ic_receive_len = icl_pdu_ahs_length(request);
621                 break;
622
623         case ICL_CONN_STATE_AHS:
624                 //ICL_DEBUG("receiving AHS");
625                 error = icl_pdu_receive_ahs(request, availablep);
626                 if (error != 0) {
627                         ICL_DEBUG("failed to receive AHS; "
628                             "dropping connection");
629                         break;
630                 }
631                 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
632                 if (ic->ic_header_crc32c == false)
633                         ic->ic_receive_len = 0;
634                 else
635                         ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
636                 break;
637
638         case ICL_CONN_STATE_HEADER_DIGEST:
639                 //ICL_DEBUG("receiving header digest");
640                 error = icl_pdu_check_header_digest(request, availablep);
641                 if (error != 0) {
642                         ICL_DEBUG("header digest failed; "
643                             "dropping connection");
644                         break;
645                 }
646
647                 ic->ic_receive_state = ICL_CONN_STATE_DATA;
648                 ic->ic_receive_len =
649                     icl_pdu_data_segment_receive_len(request);
650                 break;
651
652         case ICL_CONN_STATE_DATA:
653                 //ICL_DEBUG("receiving data segment");
654                 error = icl_pdu_receive_data_segment(request, availablep,
655                     &more_needed);
656                 if (error != 0) {
657                         ICL_DEBUG("failed to receive data segment;"
658                             "dropping connection");
659                         break;
660                 }
661
662                 if (more_needed)
663                         break;
664
665                 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
666                 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
667                         ic->ic_receive_len = 0;
668                 else
669                         ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
670                 break;
671
672         case ICL_CONN_STATE_DATA_DIGEST:
673                 //ICL_DEBUG("receiving data digest");
674                 error = icl_pdu_check_data_digest(request, availablep);
675                 if (error != 0) {
676                         ICL_DEBUG("data digest failed; "
677                             "dropping connection");
678                         break;
679                 }
680
681                 /*
682                  * We've received complete PDU; reset the receive state machine
683                  * and return the PDU.
684                  */
685                 ic->ic_receive_state = ICL_CONN_STATE_BHS;
686                 ic->ic_receive_len = sizeof(struct iscsi_bhs);
687                 ic->ic_receive_pdu = NULL;
688                 return (request);
689
690         default:
691                 panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
692         }
693
694         if (error != 0) {
695                 /*
696                  * Don't free the PDU; it's pointed to by ic->ic_receive_pdu
697                  * and will get freed in icl_soft_conn_close().
698                  */
699                 icl_conn_fail(ic);
700         }
701
702         return (NULL);
703 }
704
705 static void
706 icl_conn_receive_pdus(struct icl_conn *ic, size_t available)
707 {
708         struct icl_pdu *response;
709         struct socket *so;
710
711         so = ic->ic_socket;
712
713         /*
714          * This can never happen; we're careful to only mess with ic->ic_socket
715          * pointer when the send/receive threads are not running.
716          */
717         KASSERT(so != NULL, ("NULL socket"));
718
719         for (;;) {
720                 if (ic->ic_disconnecting)
721                         return;
722
723                 if (so->so_error != 0) {
724                         ICL_DEBUG("connection error %d; "
725                             "dropping connection", so->so_error);
726                         icl_conn_fail(ic);
727                         return;
728                 }
729
730                 /*
731                  * Loop until we have a complete PDU or there is not enough
732                  * data in the socket buffer.
733                  */
734                 if (available < ic->ic_receive_len) {
735 #if 0
736                         ICL_DEBUG("not enough data; have %zd, "
737                             "need %zd", available,
738                             ic->ic_receive_len);
739 #endif
740                         return;
741                 }
742
743                 response = icl_conn_receive_pdu(ic, &available);
744                 if (response == NULL)
745                         continue;
746
747                 if (response->ip_ahs_len > 0) {
748                         ICL_WARN("received PDU with unsupported "
749                             "AHS; opcode 0x%x; dropping connection",
750                             response->ip_bhs->bhs_opcode);
751                         icl_soft_conn_pdu_free(ic, response);
752                         icl_conn_fail(ic);
753                         return;
754                 }
755
756                 (ic->ic_receive)(response);
757         }
758 }
759
760 static void
761 icl_receive_thread(void *arg)
762 {
763         struct icl_conn *ic;
764         size_t available;
765         struct socket *so;
766
767         ic = arg;
768         so = ic->ic_socket;
769
770         for (;;) {
771                 if (ic->ic_disconnecting) {
772                         //ICL_DEBUG("terminating");
773                         break;
774                 }
775
776                 /*
777                  * Set the low watermark, to be checked by
778                  * soreadable() in icl_soupcall_receive()
779                  * to avoid unnecessary wakeups until there
780                  * is enough data received to read the PDU.
781                  */
782                 SOCKBUF_LOCK(&so->so_rcv);
783                 available = sbavail(&so->so_rcv);
784                 if (available < ic->ic_receive_len) {
785                         so->so_rcv.sb_lowat = ic->ic_receive_len;
786                         cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
787                 } else
788                         so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
789                 SOCKBUF_UNLOCK(&so->so_rcv);
790
791                 icl_conn_receive_pdus(ic, available);
792         }
793
794         ICL_CONN_LOCK(ic);
795         ic->ic_receive_running = false;
796         cv_signal(&ic->ic_send_cv);
797         ICL_CONN_UNLOCK(ic);
798         kthread_exit();
799 }
800
801 static int
802 icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
803 {
804         struct icl_conn *ic;
805
806         if (!soreadable(so))
807                 return (SU_OK);
808
809         ic = arg;
810         cv_signal(&ic->ic_receive_cv);
811         return (SU_OK);
812 }
813
814 static int
815 icl_pdu_finalize(struct icl_pdu *request)
816 {
817         size_t padding, pdu_len;
818         uint32_t digest, zero = 0;
819         int ok;
820         struct icl_conn *ic;
821
822         ic = request->ip_conn;
823
824         icl_pdu_set_data_segment_length(request, request->ip_data_len);
825
826         pdu_len = icl_pdu_size(request);
827
828         if (ic->ic_header_crc32c) {
829                 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
830                 ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
831                     (void *)&digest);
832                 if (ok != 1) {
833                         ICL_WARN("failed to append header digest");
834                         return (1);
835                 }
836         }
837
838         if (request->ip_data_len != 0) {
839                 padding = icl_pdu_padding(request);
840                 if (padding > 0) {
841                         ok = m_append(request->ip_data_mbuf, padding,
842                             (void *)&zero);
843                         if (ok != 1) {
844                                 ICL_WARN("failed to append padding");
845                                 return (1);
846                         }
847                 }
848
849                 if (ic->ic_data_crc32c) {
850                         digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
851
852                         ok = m_append(request->ip_data_mbuf, sizeof(digest),
853                             (void *)&digest);
854                         if (ok != 1) {
855                                 ICL_WARN("failed to append data digest");
856                                 return (1);
857                         }
858                 }
859
860                 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
861                 request->ip_data_mbuf = NULL;
862         }
863
864         request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
865
866         return (0);
867 }
868
869 static void
870 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue)
871 {
872         struct icl_pdu *request, *request2;
873         struct socket *so;
874         long available, size, size2;
875         int coalesced, error;
876
877         ICL_CONN_LOCK_ASSERT_NOT(ic);
878
879         so = ic->ic_socket;
880
881         SOCKBUF_LOCK(&so->so_snd);
882         /*
883          * Check how much space do we have for transmit.  We can't just
884          * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
885          * as it always frees the mbuf chain passed to it, even in case
886          * of error.
887          */
888         available = sbspace(&so->so_snd);
889
890         /*
891          * Notify the socket upcall that we don't need wakeups
892          * for the time being.
893          */
894         so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
895         SOCKBUF_UNLOCK(&so->so_snd);
896
897         while (!STAILQ_EMPTY(queue)) {
898                 request = STAILQ_FIRST(queue);
899                 size = icl_pdu_size(request);
900                 if (available < size) {
901
902                         /*
903                          * Set the low watermark, to be checked by
904                          * sowriteable() in icl_soupcall_send()
905                          * to avoid unnecessary wakeups until there
906                          * is enough space for the PDU to fit.
907                          */
908                         SOCKBUF_LOCK(&so->so_snd);
909                         available = sbspace(&so->so_snd);
910                         if (available < size) {
911 #if 1
912                                 ICL_DEBUG("no space to send; "
913                                     "have %ld, need %ld",
914                                     available, size);
915 #endif
916                                 so->so_snd.sb_lowat = size;
917                                 SOCKBUF_UNLOCK(&so->so_snd);
918                                 return;
919                         }
920                         SOCKBUF_UNLOCK(&so->so_snd);
921                 }
922                 STAILQ_REMOVE_HEAD(queue, ip_next);
923                 error = icl_pdu_finalize(request);
924                 if (error != 0) {
925                         ICL_DEBUG("failed to finalize PDU; "
926                             "dropping connection");
927                         icl_soft_conn_pdu_free(ic, request);
928                         icl_conn_fail(ic);
929                         return;
930                 }
931                 if (coalesce) {
932                         coalesced = 1;
933                         for (;;) {
934                                 request2 = STAILQ_FIRST(queue);
935                                 if (request2 == NULL)
936                                         break;
937                                 size2 = icl_pdu_size(request2);
938                                 if (available < size + size2)
939                                         break;
940                                 STAILQ_REMOVE_HEAD(queue, ip_next);
941                                 error = icl_pdu_finalize(request2);
942                                 if (error != 0) {
943                                         ICL_DEBUG("failed to finalize PDU; "
944                                             "dropping connection");
945                                         icl_soft_conn_pdu_free(ic, request);
946                                         icl_soft_conn_pdu_free(ic, request2);
947                                         icl_conn_fail(ic);
948                                         return;
949                                 }
950                                 m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf);
951                                 request2->ip_bhs_mbuf = NULL;
952                                 request->ip_bhs_mbuf->m_pkthdr.len += size2;
953                                 size += size2;
954                                 STAILQ_REMOVE_AFTER(queue, request, ip_next);
955                                 icl_soft_conn_pdu_free(ic, request2);
956                                 coalesced++;
957                         }
958 #if 0
959                         if (coalesced > 1) {
960                                 ICL_DEBUG("coalesced %d PDUs into %ld bytes",
961                                     coalesced, size);
962                         }
963 #endif
964                 }
965                 available -= size;
966                 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
967                     NULL, MSG_DONTWAIT, curthread);
968                 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
969                 if (error != 0) {
970                         ICL_DEBUG("failed to send PDU, error %d; "
971                             "dropping connection", error);
972                         icl_soft_conn_pdu_free(ic, request);
973                         icl_conn_fail(ic);
974                         return;
975                 }
976                 icl_soft_conn_pdu_free(ic, request);
977         }
978 }
979
980 static void
981 icl_send_thread(void *arg)
982 {
983         struct icl_conn *ic;
984         struct icl_pdu_stailq queue;
985
986         ic = arg;
987
988         STAILQ_INIT(&queue);
989
990         ICL_CONN_LOCK(ic);
991         for (;;) {
992                 for (;;) {
993                         /*
994                          * If the local queue is empty, populate it from
995                          * the main one.  This way the icl_conn_send_pdus()
996                          * can go through all the queued PDUs without holding
997                          * any locks.
998                          */
999                         if (STAILQ_EMPTY(&queue))
1000                                 STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu);
1001
1002                         ic->ic_check_send_space = false;
1003                         ICL_CONN_UNLOCK(ic);
1004                         icl_conn_send_pdus(ic, &queue);
1005                         ICL_CONN_LOCK(ic);
1006
1007                         /*
1008                          * The icl_soupcall_send() was called since the last
1009                          * call to sbspace(); go around;
1010                          */
1011                         if (ic->ic_check_send_space)
1012                                 continue;
1013
1014                         /*
1015                          * Local queue is empty, but we still have PDUs
1016                          * in the main one; go around.
1017                          */
1018                         if (STAILQ_EMPTY(&queue) &&
1019                             !STAILQ_EMPTY(&ic->ic_to_send))
1020                                 continue;
1021
1022                         /*
1023                          * There might be some stuff in the local queue,
1024                          * which didn't get sent due to not having enough send
1025                          * space.  Wait for socket upcall.
1026                          */
1027                         break;
1028                 }
1029
1030                 if (ic->ic_disconnecting) {
1031                         //ICL_DEBUG("terminating");
1032                         break;
1033                 }
1034
1035                 cv_wait(&ic->ic_send_cv, ic->ic_lock);
1036         }
1037
1038         /*
1039          * We're exiting; move PDUs back to the main queue, so they can
1040          * get freed properly.  At this point ordering doesn't matter.
1041          */
1042         STAILQ_CONCAT(&ic->ic_to_send, &queue);
1043
1044         ic->ic_send_running = false;
1045         cv_signal(&ic->ic_send_cv);
1046         ICL_CONN_UNLOCK(ic);
1047         kthread_exit();
1048 }
1049
1050 static int
1051 icl_soupcall_send(struct socket *so, void *arg, int waitflag)
1052 {
1053         struct icl_conn *ic;
1054
1055         if (!sowriteable(so))
1056                 return (SU_OK);
1057
1058         ic = arg;
1059
1060         ICL_CONN_LOCK(ic);
1061         ic->ic_check_send_space = true;
1062         ICL_CONN_UNLOCK(ic);
1063
1064         cv_signal(&ic->ic_send_cv);
1065
1066         return (SU_OK);
1067 }
1068
1069 static int
1070 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
1071     const void *addr, size_t len, int flags)
1072 {
1073         struct mbuf *mb, *newmb;
1074         size_t copylen, off = 0;
1075
1076         KASSERT(len > 0, ("len == 0"));
1077
1078         newmb = m_getm2(NULL, len, flags, MT_DATA, 0);
1079         if (newmb == NULL) {
1080                 ICL_WARN("failed to allocate mbuf for %zd bytes", len);
1081                 return (ENOMEM);
1082         }
1083
1084         for (mb = newmb; mb != NULL; mb = mb->m_next) {
1085                 copylen = min(M_TRAILINGSPACE(mb), len - off);
1086                 memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
1087                 mb->m_len = copylen;
1088                 off += copylen;
1089         }
1090         KASSERT(off == len, ("%s: off != len", __func__));
1091
1092         if (request->ip_data_mbuf == NULL) {
1093                 request->ip_data_mbuf = newmb;
1094                 request->ip_data_len = len;
1095         } else {
1096                 m_cat(request->ip_data_mbuf, newmb);
1097                 request->ip_data_len += len;
1098         }
1099
1100         return (0);
1101 }
1102
1103 void
1104 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
1105     size_t off, void *addr, size_t len)
1106 {
1107
1108         m_copydata(ip->ip_data_mbuf, off, len, addr);
1109 }
1110
1111 static void
1112 icl_pdu_queue(struct icl_pdu *ip)
1113 {
1114         struct icl_conn *ic;
1115
1116         ic = ip->ip_conn;
1117
1118         ICL_CONN_LOCK_ASSERT(ic);
1119
1120         if (ic->ic_disconnecting || ic->ic_socket == NULL) {
1121                 ICL_DEBUG("icl_pdu_queue on closed connection");
1122                 icl_soft_conn_pdu_free(ic, ip);
1123                 return;
1124         }
1125
1126         if (!STAILQ_EMPTY(&ic->ic_to_send)) {
1127                 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1128                 /*
1129                  * If the queue is not empty, someone else had already
1130                  * signaled the send thread; no need to do that again,
1131                  * just return.
1132                  */
1133                 return;
1134         }
1135
1136         STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1137         cv_signal(&ic->ic_send_cv);
1138 }
1139
1140 void
1141 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
1142 {
1143
1144         icl_pdu_queue(ip);
1145 }
1146
1147 static struct icl_conn *
1148 icl_soft_new_conn(const char *name, struct mtx *lock)
1149 {
1150         struct icl_conn *ic;
1151
1152         refcount_acquire(&icl_ncons);
1153
1154         ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO);
1155
1156         STAILQ_INIT(&ic->ic_to_send);
1157         ic->ic_lock = lock;
1158         cv_init(&ic->ic_send_cv, "icl_tx");
1159         cv_init(&ic->ic_receive_cv, "icl_rx");
1160 #ifdef DIAGNOSTIC
1161         refcount_init(&ic->ic_outstanding_pdus, 0);
1162 #endif
1163         ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH;
1164         ic->ic_name = name;
1165         ic->ic_offload = "None";
1166         ic->ic_unmapped = false;
1167
1168         return (ic);
1169 }
1170
1171 void
1172 icl_soft_conn_free(struct icl_conn *ic)
1173 {
1174
1175 #ifdef DIAGNOSTIC
1176         KASSERT(ic->ic_outstanding_pdus == 0,
1177             ("destroying session with %d outstanding PDUs",
1178              ic->ic_outstanding_pdus));
1179 #endif
1180         cv_destroy(&ic->ic_send_cv);
1181         cv_destroy(&ic->ic_receive_cv);
1182         kobj_delete((struct kobj *)ic, M_ICL_SOFT);
1183         refcount_release(&icl_ncons);
1184 }
1185
1186 static int
1187 icl_conn_start(struct icl_conn *ic)
1188 {
1189         size_t minspace;
1190         struct sockopt opt;
1191         int error, one = 1;
1192
1193         ICL_CONN_LOCK(ic);
1194
1195         /*
1196          * XXX: Ugly hack.
1197          */
1198         if (ic->ic_socket == NULL) {
1199                 ICL_CONN_UNLOCK(ic);
1200                 return (EINVAL);
1201         }
1202
1203         ic->ic_receive_state = ICL_CONN_STATE_BHS;
1204         ic->ic_receive_len = sizeof(struct iscsi_bhs);
1205         ic->ic_disconnecting = false;
1206
1207         ICL_CONN_UNLOCK(ic);
1208
1209         /*
1210          * For sendspace, this is required because the current code cannot
1211          * send a PDU in pieces; thus, the minimum buffer size is equal
1212          * to the maximum PDU size.  "+4" is to account for possible padding.
1213          *
1214          * What we should actually do here is to use autoscaling, but set
1215          * some minimal buffer size to "minspace".  I don't know a way to do
1216          * that, though.
1217          */
1218         minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
1219             ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1220         if (sendspace < minspace) {
1221                 ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1222                     minspace);
1223                 sendspace = minspace;
1224         }
1225         if (recvspace < minspace) {
1226                 ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1227                     minspace);
1228                 recvspace = minspace;
1229         }
1230
1231         error = soreserve(ic->ic_socket, sendspace, recvspace);
1232         if (error != 0) {
1233                 ICL_WARN("soreserve failed with error %d", error);
1234                 icl_soft_conn_close(ic);
1235                 return (error);
1236         }
1237         ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE;
1238         ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE;
1239
1240         /*
1241          * Disable Nagle.
1242          */
1243         bzero(&opt, sizeof(opt));
1244         opt.sopt_dir = SOPT_SET;
1245         opt.sopt_level = IPPROTO_TCP;
1246         opt.sopt_name = TCP_NODELAY;
1247         opt.sopt_val = &one;
1248         opt.sopt_valsize = sizeof(one);
1249         error = sosetopt(ic->ic_socket, &opt);
1250         if (error != 0) {
1251                 ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1252                 icl_soft_conn_close(ic);
1253                 return (error);
1254         }
1255
1256         /*
1257          * Register socket upcall, to get notified about incoming PDUs
1258          * and free space to send outgoing ones.
1259          */
1260         SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1261         soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1262         SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1263         SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1264         soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1265         SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1266
1267         /*
1268          * Start threads.
1269          */
1270         ICL_CONN_LOCK(ic);
1271         ic->ic_send_running = ic->ic_receive_running = true;
1272         ICL_CONN_UNLOCK(ic);
1273         error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1274             ic->ic_name);
1275         if (error != 0) {
1276                 ICL_WARN("kthread_add(9) failed with error %d", error);
1277                 ICL_CONN_LOCK(ic);
1278                 ic->ic_send_running = ic->ic_receive_running = false;
1279                 cv_signal(&ic->ic_send_cv);
1280                 ICL_CONN_UNLOCK(ic);
1281                 icl_soft_conn_close(ic);
1282                 return (error);
1283         }
1284         error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1285             ic->ic_name);
1286         if (error != 0) {
1287                 ICL_WARN("kthread_add(9) failed with error %d", error);
1288                 ICL_CONN_LOCK(ic);
1289                 ic->ic_receive_running = false;
1290                 cv_signal(&ic->ic_send_cv);
1291                 ICL_CONN_UNLOCK(ic);
1292                 icl_soft_conn_close(ic);
1293                 return (error);
1294         }
1295
1296         return (0);
1297 }
1298
1299 int
1300 icl_soft_conn_handoff(struct icl_conn *ic, int fd)
1301 {
1302         struct file *fp;
1303         struct socket *so;
1304         cap_rights_t rights;
1305         int error;
1306
1307         ICL_CONN_LOCK_ASSERT_NOT(ic);
1308
1309 #ifdef ICL_KERNEL_PROXY
1310         /*
1311          * We're transitioning to Full Feature phase, and we don't
1312          * really care.
1313          */
1314         if (fd == 0) {
1315                 ICL_CONN_LOCK(ic);
1316                 if (ic->ic_socket == NULL) {
1317                         ICL_CONN_UNLOCK(ic);
1318                         ICL_WARN("proxy handoff without connect"); 
1319                         return (EINVAL);
1320                 }
1321                 ICL_CONN_UNLOCK(ic);
1322                 return (0);
1323         }
1324 #endif
1325
1326         /*
1327          * Steal the socket from userland.
1328          */
1329         error = fget(curthread, fd,
1330             cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1331         if (error != 0)
1332                 return (error);
1333         if (fp->f_type != DTYPE_SOCKET) {
1334                 fdrop(fp, curthread);
1335                 return (EINVAL);
1336         }
1337         so = fp->f_data;
1338         if (so->so_type != SOCK_STREAM) {
1339                 fdrop(fp, curthread);
1340                 return (EINVAL);
1341         }
1342
1343         ICL_CONN_LOCK(ic);
1344
1345         if (ic->ic_socket != NULL) {
1346                 ICL_CONN_UNLOCK(ic);
1347                 fdrop(fp, curthread);
1348                 return (EBUSY);
1349         }
1350
1351         ic->ic_socket = fp->f_data;
1352         fp->f_ops = &badfileops;
1353         fp->f_data = NULL;
1354         fdrop(fp, curthread);
1355         ICL_CONN_UNLOCK(ic);
1356
1357         error = icl_conn_start(ic);
1358
1359         return (error);
1360 }
1361
1362 void
1363 icl_soft_conn_close(struct icl_conn *ic)
1364 {
1365         struct icl_pdu *pdu;
1366         struct socket *so;
1367
1368         ICL_CONN_LOCK(ic);
1369
1370         /*
1371          * Wake up the threads, so they can properly terminate.
1372          */
1373         ic->ic_disconnecting = true;
1374         while (ic->ic_receive_running || ic->ic_send_running) {
1375                 cv_signal(&ic->ic_receive_cv);
1376                 cv_signal(&ic->ic_send_cv);
1377                 cv_wait(&ic->ic_send_cv, ic->ic_lock);
1378         }
1379
1380         /* Some other thread could close the connection same time. */
1381         so = ic->ic_socket;
1382         if (so == NULL) {
1383                 ICL_CONN_UNLOCK(ic);
1384                 return;
1385         }
1386         ic->ic_socket = NULL;
1387
1388         /*
1389          * Deregister socket upcalls.
1390          */
1391         ICL_CONN_UNLOCK(ic);
1392         SOCKBUF_LOCK(&so->so_snd);
1393         if (so->so_snd.sb_upcall != NULL)
1394                 soupcall_clear(so, SO_SND);
1395         SOCKBUF_UNLOCK(&so->so_snd);
1396         SOCKBUF_LOCK(&so->so_rcv);
1397         if (so->so_rcv.sb_upcall != NULL)
1398                 soupcall_clear(so, SO_RCV);
1399         SOCKBUF_UNLOCK(&so->so_rcv);
1400         soclose(so);
1401         ICL_CONN_LOCK(ic);
1402
1403         if (ic->ic_receive_pdu != NULL) {
1404                 //ICL_DEBUG("freeing partially received PDU");
1405                 icl_soft_conn_pdu_free(ic, ic->ic_receive_pdu);
1406                 ic->ic_receive_pdu = NULL;
1407         }
1408
1409         /*
1410          * Remove any outstanding PDUs from the send queue.
1411          */
1412         while (!STAILQ_EMPTY(&ic->ic_to_send)) {
1413                 pdu = STAILQ_FIRST(&ic->ic_to_send);
1414                 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
1415                 icl_soft_conn_pdu_free(ic, pdu);
1416         }
1417
1418         KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
1419             ("destroying session with non-empty send queue"));
1420         ICL_CONN_UNLOCK(ic);
1421 }
1422
1423 int
1424 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
1425     struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp)
1426 {
1427
1428         return (0);
1429 }
1430
1431 void
1432 icl_soft_conn_task_done(struct icl_conn *ic, void *prv)
1433 {
1434 }
1435
1436 int
1437 icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
1438     uint32_t *transfer_tag, void **prvp)
1439 {
1440
1441         return (0);
1442 }
1443
1444 void
1445 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv)
1446 {
1447 }
1448
1449 static int
1450 icl_soft_limits(struct icl_drv_limits *idl)
1451 {
1452
1453         idl->idl_max_recv_data_segment_length = 128 * 1024;
1454         idl->idl_max_send_data_segment_length = 128 * 1024;
1455         idl->idl_max_burst_length = 262144;
1456         idl->idl_first_burst_length = 65536;
1457
1458         return (0);
1459 }
1460
1461 #ifdef ICL_KERNEL_PROXY
1462 int
1463 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype,
1464     int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
1465 {
1466
1467         return (icl_soft_proxy_connect(ic, domain, socktype, protocol,
1468             from_sa, to_sa));
1469 }
1470
1471 int
1472 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so)
1473 {
1474         int error;
1475
1476         ICL_CONN_LOCK_ASSERT_NOT(ic);
1477
1478         if (so->so_type != SOCK_STREAM)
1479                 return (EINVAL);
1480
1481         ICL_CONN_LOCK(ic);
1482         if (ic->ic_socket != NULL) {
1483                 ICL_CONN_UNLOCK(ic);
1484                 return (EBUSY);
1485         }
1486         ic->ic_socket = so;
1487         ICL_CONN_UNLOCK(ic);
1488
1489         error = icl_conn_start(ic);
1490
1491         return (error);
1492 }
1493 #endif /* ICL_KERNEL_PROXY */
1494
1495 static int
1496 icl_soft_load(void)
1497 {
1498         int error;
1499
1500         icl_pdu_zone = uma_zcreate("icl_pdu",
1501             sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1502             UMA_ALIGN_PTR, 0);
1503         refcount_init(&icl_ncons, 0);
1504
1505         /*
1506          * The reason we call this "none" is that to the user,
1507          * it's known as "offload driver"; "offload driver: soft"
1508          * doesn't make much sense.
1509          */
1510         error = icl_register("none", false, 0,
1511             icl_soft_limits, icl_soft_new_conn);
1512         KASSERT(error == 0, ("failed to register"));
1513
1514 #if defined(ICL_KERNEL_PROXY) && 0
1515         /*
1516          * Debugging aid for kernel proxy functionality.
1517          */
1518         error = icl_register("proxytest", true, 0,
1519             icl_soft_limits, icl_soft_new_conn);
1520         KASSERT(error == 0, ("failed to register"));
1521 #endif
1522
1523         return (error);
1524 }
1525
1526 static int
1527 icl_soft_unload(void)
1528 {
1529
1530         if (icl_ncons != 0)
1531                 return (EBUSY);
1532
1533         icl_unregister("none", false);
1534 #if defined(ICL_KERNEL_PROXY) && 0
1535         icl_unregister("proxytest", true);
1536 #endif
1537
1538         uma_zdestroy(icl_pdu_zone);
1539
1540         return (0);
1541 }
1542
1543 static int
1544 icl_soft_modevent(module_t mod, int what, void *arg)
1545 {
1546
1547         switch (what) {
1548         case MOD_LOAD:
1549                 return (icl_soft_load());
1550         case MOD_UNLOAD:
1551                 return (icl_soft_unload());
1552         default:
1553                 return (EINVAL);
1554         }
1555 }
1556
1557 moduledata_t icl_soft_data = {
1558         "icl_soft",
1559         icl_soft_modevent,
1560         0
1561 };
1562
1563 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
1564 MODULE_DEPEND(icl_soft, icl, 1, 1, 1);
1565 MODULE_VERSION(icl_soft, 1);