]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/iscsi/icl_soft.c
Upgrade to Unbound 1.5.7.
[FreeBSD/FreeBSD.git] / sys / dev / iscsi / icl_soft.c
1 /*-
2  * Copyright (c) 2012 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  */
30
31 /*
32  * Software implementation of iSCSI Common Layer kobj(9) interface.
33  */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include <sys/param.h>
39 #include <sys/capsicum.h>
40 #include <sys/condvar.h>
41 #include <sys/conf.h>
42 #include <sys/file.h>
43 #include <sys/kernel.h>
44 #include <sys/kthread.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/mutex.h>
48 #include <sys/module.h>
49 #include <sys/protosw.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 #include <sys/sx.h>
55 #include <sys/uio.h>
56 #include <vm/uma.h>
57 #include <netinet/in.h>
58 #include <netinet/tcp.h>
59
60 #include <dev/iscsi/icl.h>
61 #include <dev/iscsi/iscsi_proto.h>
62 #include <icl_conn_if.h>
63
64 static int coalesce = 1;
65 SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN,
66     &coalesce, 0, "Try to coalesce PDUs before sending");
67 static int partial_receive_len = 128 * 1024;
68 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
69     &partial_receive_len, 0, "Minimum read size for partially received "
70     "data segment");
71 static int sendspace = 1048576;
72 SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN,
73     &sendspace, 0, "Default send socket buffer size");
74 static int recvspace = 1048576;
75 SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN,
76     &recvspace, 0, "Default receive socket buffer size");
77
78 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend");
79 static uma_zone_t icl_pdu_zone;
80
81 static volatile u_int   icl_ncons;
82
83 #define ICL_CONN_LOCK(X)                mtx_lock(X->ic_lock)
84 #define ICL_CONN_UNLOCK(X)              mtx_unlock(X->ic_lock)
85 #define ICL_CONN_LOCK_ASSERT(X)         mtx_assert(X->ic_lock, MA_OWNED)
86 #define ICL_CONN_LOCK_ASSERT_NOT(X)     mtx_assert(X->ic_lock, MA_NOTOWNED)
87
88 STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
89
90 static icl_conn_new_pdu_t       icl_soft_conn_new_pdu;
91 static icl_conn_pdu_free_t      icl_soft_conn_pdu_free;
92 static icl_conn_pdu_data_segment_length_t
93                                     icl_soft_conn_pdu_data_segment_length;
94 static icl_conn_pdu_append_data_t       icl_soft_conn_pdu_append_data;
95 static icl_conn_pdu_get_data_t  icl_soft_conn_pdu_get_data;
96 static icl_conn_pdu_queue_t     icl_soft_conn_pdu_queue;
97 static icl_conn_handoff_t       icl_soft_conn_handoff;
98 static icl_conn_free_t          icl_soft_conn_free;
99 static icl_conn_close_t         icl_soft_conn_close;
100 static icl_conn_task_setup_t    icl_soft_conn_task_setup;
101 static icl_conn_task_done_t     icl_soft_conn_task_done;
102 static icl_conn_transfer_setup_t        icl_soft_conn_transfer_setup;
103 static icl_conn_transfer_done_t icl_soft_conn_transfer_done;
104
105 static kobj_method_t icl_soft_methods[] = {
106         KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu),
107         KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free),
108         KOBJMETHOD(icl_conn_pdu_data_segment_length,
109             icl_soft_conn_pdu_data_segment_length),
110         KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data),
111         KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data),
112         KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue),
113         KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff),
114         KOBJMETHOD(icl_conn_free, icl_soft_conn_free),
115         KOBJMETHOD(icl_conn_close, icl_soft_conn_close),
116         KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup),
117         KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done),
118         KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup),
119         KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done),
120         { 0, 0 }
121 };
122
123 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn));
124
125 static void
126 icl_conn_fail(struct icl_conn *ic)
127 {
128         if (ic->ic_socket == NULL)
129                 return;
130
131         /*
132          * XXX
133          */
134         ic->ic_socket->so_error = EDOOFUS;
135         (ic->ic_error)(ic);
136 }
137
138 static struct mbuf *
139 icl_conn_receive(struct icl_conn *ic, size_t len)
140 {
141         struct uio uio;
142         struct socket *so;
143         struct mbuf *m;
144         int error, flags;
145
146         so = ic->ic_socket;
147
148         memset(&uio, 0, sizeof(uio));
149         uio.uio_resid = len;
150
151         flags = MSG_DONTWAIT;
152         error = soreceive(so, NULL, &uio, &m, NULL, &flags);
153         if (error != 0) {
154                 ICL_DEBUG("soreceive error %d", error);
155                 return (NULL);
156         }
157         if (uio.uio_resid != 0) {
158                 m_freem(m);
159                 ICL_DEBUG("short read");
160                 return (NULL);
161         }
162
163         return (m);
164 }
165
166 static struct icl_pdu *
167 icl_pdu_new_empty(struct icl_conn *ic, int flags)
168 {
169         struct icl_pdu *ip;
170
171 #ifdef DIAGNOSTIC
172         refcount_acquire(&ic->ic_outstanding_pdus);
173 #endif
174         ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
175         if (ip == NULL) {
176                 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
177 #ifdef DIAGNOSTIC
178                 refcount_release(&ic->ic_outstanding_pdus);
179 #endif
180                 return (NULL);
181         }
182
183         ip->ip_conn = ic;
184
185         return (ip);
186 }
187
188 static void
189 icl_pdu_free(struct icl_pdu *ip)
190 {
191         struct icl_conn *ic;
192
193         ic = ip->ip_conn;
194
195         m_freem(ip->ip_bhs_mbuf);
196         m_freem(ip->ip_ahs_mbuf);
197         m_freem(ip->ip_data_mbuf);
198         uma_zfree(icl_pdu_zone, ip);
199 #ifdef DIAGNOSTIC
200         refcount_release(&ic->ic_outstanding_pdus);
201 #endif
202 }
203
204 void
205 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
206 {
207
208         icl_pdu_free(ip);
209 }
210
211 /*
212  * Allocate icl_pdu with empty BHS to fill up by the caller.
213  */
214 struct icl_pdu *
215 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags)
216 {
217         struct icl_pdu *ip;
218
219         ip = icl_pdu_new_empty(ic, flags);
220         if (ip == NULL)
221                 return (NULL);
222
223         ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs),
224             flags, MT_DATA, M_PKTHDR);
225         if (ip->ip_bhs_mbuf == NULL) {
226                 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
227                 icl_pdu_free(ip);
228                 return (NULL);
229         }
230         ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
231         memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
232         ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
233
234         return (ip);
235 }
236
237 static int
238 icl_pdu_ahs_length(const struct icl_pdu *request)
239 {
240
241         return (request->ip_bhs->bhs_total_ahs_len * 4);
242 }
243
244 static size_t
245 icl_pdu_data_segment_length(const struct icl_pdu *request)
246 {
247         uint32_t len = 0;
248
249         len += request->ip_bhs->bhs_data_segment_len[0];
250         len <<= 8;
251         len += request->ip_bhs->bhs_data_segment_len[1];
252         len <<= 8;
253         len += request->ip_bhs->bhs_data_segment_len[2];
254
255         return (len);
256 }
257
258 size_t
259 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic,
260     const struct icl_pdu *request)
261 {
262
263         return (icl_pdu_data_segment_length(request));
264 }
265
266 static void
267 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
268 {
269
270         response->ip_bhs->bhs_data_segment_len[2] = len;
271         response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
272         response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
273 }
274
275 static size_t
276 icl_pdu_padding(const struct icl_pdu *ip)
277 {
278
279         if ((ip->ip_data_len % 4) != 0)
280                 return (4 - (ip->ip_data_len % 4));
281
282         return (0);
283 }
284
285 static size_t
286 icl_pdu_size(const struct icl_pdu *response)
287 {
288         size_t len;
289
290         KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
291
292         len = sizeof(struct iscsi_bhs) + response->ip_data_len +
293             icl_pdu_padding(response);
294         if (response->ip_conn->ic_header_crc32c)
295                 len += ISCSI_HEADER_DIGEST_SIZE;
296         if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
297                 len += ISCSI_DATA_DIGEST_SIZE;
298
299         return (len);
300 }
301
302 static int
303 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep)
304 {
305         struct mbuf *m;
306
307         m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs));
308         if (m == NULL) {
309                 ICL_DEBUG("failed to receive BHS");
310                 return (-1);
311         }
312
313         request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs));
314         if (request->ip_bhs_mbuf == NULL) {
315                 ICL_WARN("m_pullup failed");
316                 return (-1);
317         }
318         request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *);
319
320         /*
321          * XXX: For architectures with strict alignment requirements
322          *      we may need to allocate ip_bhs and copy the data into it.
323          *      For some reason, though, not doing this doesn't seem
324          *      to cause problems; tested on sparc64.
325          */
326
327         *availablep -= sizeof(struct iscsi_bhs);
328         return (0);
329 }
330
331 static int
332 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep)
333 {
334
335         request->ip_ahs_len = icl_pdu_ahs_length(request);
336         if (request->ip_ahs_len == 0)
337                 return (0);
338
339         request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn,
340             request->ip_ahs_len);
341         if (request->ip_ahs_mbuf == NULL) {
342                 ICL_DEBUG("failed to receive AHS");
343                 return (-1);
344         }
345
346         *availablep -= request->ip_ahs_len;
347         return (0);
348 }
349
350 static uint32_t
351 icl_mbuf_to_crc32c(const struct mbuf *m0)
352 {
353         uint32_t digest = 0xffffffff;
354         const struct mbuf *m;
355
356         for (m = m0; m != NULL; m = m->m_next)
357                 digest = calculate_crc32c(digest,
358                     mtod(m, const void *), m->m_len);
359
360         digest = digest ^ 0xffffffff;
361
362         return (digest);
363 }
364
365 static int
366 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep)
367 {
368         struct mbuf *m;
369         uint32_t received_digest, valid_digest;
370
371         if (request->ip_conn->ic_header_crc32c == false)
372                 return (0);
373
374         m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE);
375         if (m == NULL) {
376                 ICL_DEBUG("failed to receive header digest");
377                 return (-1);
378         }
379
380         CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
381         m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest);
382         m_freem(m);
383
384         *availablep -= ISCSI_HEADER_DIGEST_SIZE;
385
386         /*
387          * XXX: Handle AHS.
388          */
389         valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
390         if (received_digest != valid_digest) {
391                 ICL_WARN("header digest check failed; got 0x%x, "
392                     "should be 0x%x", received_digest, valid_digest);
393                 return (-1);
394         }
395
396         return (0);
397 }
398
399 /*
400  * Return the number of bytes that should be waiting in the receive socket
401  * before icl_pdu_receive_data_segment() gets called.
402  */
403 static size_t
404 icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
405 {
406         size_t len;
407
408         len = icl_pdu_data_segment_length(request);
409         if (len == 0)
410                 return (0);
411
412         /*
413          * Account for the parts of data segment already read from
414          * the socket buffer.
415          */
416         KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
417         len -= request->ip_data_len;
418
419         /*
420          * Don't always wait for the full data segment to be delivered
421          * to the socket; this might badly affect performance due to
422          * TCP window scaling.
423          */
424         if (len > partial_receive_len) {
425 #if 0
426                 ICL_DEBUG("need %zd bytes of data, limiting to %zd",
427                     len, partial_receive_len));
428 #endif
429                 len = partial_receive_len;
430
431                 return (len);
432         }
433
434         /*
435          * Account for padding.  Note that due to the way code is written,
436          * the icl_pdu_receive_data_segment() must always receive padding
437          * along with the last part of data segment, because it would be
438          * impossible to tell whether we've already received the full data
439          * segment including padding, or without it.
440          */
441         if ((len % 4) != 0)
442                 len += 4 - (len % 4);
443
444 #if 0
445         ICL_DEBUG("need %zd bytes of data", len));
446 #endif
447
448         return (len);
449 }
450
451 static int
452 icl_pdu_receive_data_segment(struct icl_pdu *request,
453     size_t *availablep, bool *more_neededp)
454 {
455         struct icl_conn *ic;
456         size_t len, padding = 0;
457         struct mbuf *m;
458
459         ic = request->ip_conn;
460
461         *more_neededp = false;
462         ic->ic_receive_len = 0;
463
464         len = icl_pdu_data_segment_length(request);
465         if (len == 0)
466                 return (0);
467
468         if ((len % 4) != 0)
469                 padding = 4 - (len % 4);
470
471         /*
472          * Account for already received parts of data segment.
473          */
474         KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
475         len -= request->ip_data_len;
476
477         if (len + padding > *availablep) {
478                 /*
479                  * Not enough data in the socket buffer.  Receive as much
480                  * as we can.  Don't receive padding, since, obviously, it's
481                  * not the end of data segment yet.
482                  */
483 #if 0
484                 ICL_DEBUG("limited from %zd to %zd",
485                     len + padding, *availablep - padding));
486 #endif
487                 len = *availablep - padding;
488                 *more_neededp = true;
489                 padding = 0;
490         }
491
492         /*
493          * Must not try to receive padding without at least one byte
494          * of actual data segment.
495          */
496         if (len > 0) {
497                 m = icl_conn_receive(request->ip_conn, len + padding);
498                 if (m == NULL) {
499                         ICL_DEBUG("failed to receive data segment");
500                         return (-1);
501                 }
502
503                 if (request->ip_data_mbuf == NULL)
504                         request->ip_data_mbuf = m;
505                 else
506                         m_cat(request->ip_data_mbuf, m);
507
508                 request->ip_data_len += len;
509                 *availablep -= len + padding;
510         } else
511                 ICL_DEBUG("len 0");
512
513         if (*more_neededp)
514                 ic->ic_receive_len =
515                     icl_pdu_data_segment_receive_len(request);
516
517         return (0);
518 }
519
520 static int
521 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep)
522 {
523         struct mbuf *m;
524         uint32_t received_digest, valid_digest;
525
526         if (request->ip_conn->ic_data_crc32c == false)
527                 return (0);
528
529         if (request->ip_data_len == 0)
530                 return (0);
531
532         m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE);
533         if (m == NULL) {
534                 ICL_DEBUG("failed to receive data digest");
535                 return (-1);
536         }
537
538         CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
539         m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest);
540         m_freem(m);
541
542         *availablep -= ISCSI_DATA_DIGEST_SIZE;
543
544         /*
545          * Note that ip_data_mbuf also contains padding; since digest
546          * calculation is supposed to include that, we iterate over
547          * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
548          */
549         valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
550         if (received_digest != valid_digest) {
551                 ICL_WARN("data digest check failed; got 0x%x, "
552                     "should be 0x%x", received_digest, valid_digest);
553                 return (-1);
554         }
555
556         return (0);
557 }
558
559 /*
560  * Somewhat contrary to the name, this attempts to receive only one
561  * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
562  */
563 static struct icl_pdu *
564 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep)
565 {
566         struct icl_pdu *request;
567         struct socket *so;
568         size_t len;
569         int error;
570         bool more_needed;
571
572         so = ic->ic_socket;
573
574         if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
575                 KASSERT(ic->ic_receive_pdu == NULL,
576                     ("ic->ic_receive_pdu != NULL"));
577                 request = icl_pdu_new_empty(ic, M_NOWAIT);
578                 if (request == NULL) {
579                         ICL_DEBUG("failed to allocate PDU; "
580                             "dropping connection");
581                         icl_conn_fail(ic);
582                         return (NULL);
583                 }
584                 ic->ic_receive_pdu = request;
585         } else {
586                 KASSERT(ic->ic_receive_pdu != NULL,
587                     ("ic->ic_receive_pdu == NULL"));
588                 request = ic->ic_receive_pdu;
589         }
590
591         if (*availablep < ic->ic_receive_len) {
592 #if 0
593                 ICL_DEBUG("not enough data; need %zd, "
594                     "have %zd", ic->ic_receive_len, *availablep);
595 #endif
596                 return (NULL);
597         }
598
599         switch (ic->ic_receive_state) {
600         case ICL_CONN_STATE_BHS:
601                 //ICL_DEBUG("receiving BHS");
602                 error = icl_pdu_receive_bhs(request, availablep);
603                 if (error != 0) {
604                         ICL_DEBUG("failed to receive BHS; "
605                             "dropping connection");
606                         break;
607                 }
608
609                 /*
610                  * We don't enforce any limit for AHS length;
611                  * its length is stored in 8 bit field.
612                  */
613
614                 len = icl_pdu_data_segment_length(request);
615                 if (len > ic->ic_max_data_segment_length) {
616                         ICL_WARN("received data segment "
617                             "length %zd is larger than negotiated "
618                             "MaxDataSegmentLength %zd; "
619                             "dropping connection",
620                             len, ic->ic_max_data_segment_length);
621                         error = EINVAL;
622                         break;
623                 }
624
625                 ic->ic_receive_state = ICL_CONN_STATE_AHS;
626                 ic->ic_receive_len = icl_pdu_ahs_length(request);
627                 break;
628
629         case ICL_CONN_STATE_AHS:
630                 //ICL_DEBUG("receiving AHS");
631                 error = icl_pdu_receive_ahs(request, availablep);
632                 if (error != 0) {
633                         ICL_DEBUG("failed to receive AHS; "
634                             "dropping connection");
635                         break;
636                 }
637                 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
638                 if (ic->ic_header_crc32c == false)
639                         ic->ic_receive_len = 0;
640                 else
641                         ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
642                 break;
643
644         case ICL_CONN_STATE_HEADER_DIGEST:
645                 //ICL_DEBUG("receiving header digest");
646                 error = icl_pdu_check_header_digest(request, availablep);
647                 if (error != 0) {
648                         ICL_DEBUG("header digest failed; "
649                             "dropping connection");
650                         break;
651                 }
652
653                 ic->ic_receive_state = ICL_CONN_STATE_DATA;
654                 ic->ic_receive_len =
655                     icl_pdu_data_segment_receive_len(request);
656                 break;
657
658         case ICL_CONN_STATE_DATA:
659                 //ICL_DEBUG("receiving data segment");
660                 error = icl_pdu_receive_data_segment(request, availablep,
661                     &more_needed);
662                 if (error != 0) {
663                         ICL_DEBUG("failed to receive data segment;"
664                             "dropping connection");
665                         break;
666                 }
667
668                 if (more_needed)
669                         break;
670
671                 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
672                 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
673                         ic->ic_receive_len = 0;
674                 else
675                         ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
676                 break;
677
678         case ICL_CONN_STATE_DATA_DIGEST:
679                 //ICL_DEBUG("receiving data digest");
680                 error = icl_pdu_check_data_digest(request, availablep);
681                 if (error != 0) {
682                         ICL_DEBUG("data digest failed; "
683                             "dropping connection");
684                         break;
685                 }
686
687                 /*
688                  * We've received complete PDU; reset the receive state machine
689                  * and return the PDU.
690                  */
691                 ic->ic_receive_state = ICL_CONN_STATE_BHS;
692                 ic->ic_receive_len = sizeof(struct iscsi_bhs);
693                 ic->ic_receive_pdu = NULL;
694                 return (request);
695
696         default:
697                 panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
698         }
699
700         if (error != 0) {
701                 /*
702                  * Don't free the PDU; it's pointed to by ic->ic_receive_pdu
703                  * and will get freed in icl_soft_conn_close().
704                  */
705                 icl_conn_fail(ic);
706         }
707
708         return (NULL);
709 }
710
711 static void
712 icl_conn_receive_pdus(struct icl_conn *ic, size_t available)
713 {
714         struct icl_pdu *response;
715         struct socket *so;
716
717         so = ic->ic_socket;
718
719         /*
720          * This can never happen; we're careful to only mess with ic->ic_socket
721          * pointer when the send/receive threads are not running.
722          */
723         KASSERT(so != NULL, ("NULL socket"));
724
725         for (;;) {
726                 if (ic->ic_disconnecting)
727                         return;
728
729                 if (so->so_error != 0) {
730                         ICL_DEBUG("connection error %d; "
731                             "dropping connection", so->so_error);
732                         icl_conn_fail(ic);
733                         return;
734                 }
735
736                 /*
737                  * Loop until we have a complete PDU or there is not enough
738                  * data in the socket buffer.
739                  */
740                 if (available < ic->ic_receive_len) {
741 #if 0
742                         ICL_DEBUG("not enough data; have %zd, "
743                             "need %zd", available,
744                             ic->ic_receive_len);
745 #endif
746                         return;
747                 }
748
749                 response = icl_conn_receive_pdu(ic, &available);
750                 if (response == NULL)
751                         continue;
752
753                 if (response->ip_ahs_len > 0) {
754                         ICL_WARN("received PDU with unsupported "
755                             "AHS; opcode 0x%x; dropping connection",
756                             response->ip_bhs->bhs_opcode);
757                         icl_pdu_free(response);
758                         icl_conn_fail(ic);
759                         return;
760                 }
761
762                 (ic->ic_receive)(response);
763         }
764 }
765
766 static void
767 icl_receive_thread(void *arg)
768 {
769         struct icl_conn *ic;
770         size_t available;
771         struct socket *so;
772
773         ic = arg;
774         so = ic->ic_socket;
775
776         for (;;) {
777                 if (ic->ic_disconnecting) {
778                         //ICL_DEBUG("terminating");
779                         break;
780                 }
781
782                 /*
783                  * Set the low watermark, to be checked by
784                  * soreadable() in icl_soupcall_receive()
785                  * to avoid unneccessary wakeups until there
786                  * is enough data received to read the PDU.
787                  */
788                 SOCKBUF_LOCK(&so->so_rcv);
789                 available = sbavail(&so->so_rcv);
790                 if (available < ic->ic_receive_len) {
791                         so->so_rcv.sb_lowat = ic->ic_receive_len;
792                         cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
793                 } else
794                         so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
795                 SOCKBUF_UNLOCK(&so->so_rcv);
796
797                 icl_conn_receive_pdus(ic, available);
798         }
799
800         ICL_CONN_LOCK(ic);
801         ic->ic_receive_running = false;
802         cv_signal(&ic->ic_send_cv);
803         ICL_CONN_UNLOCK(ic);
804         kthread_exit();
805 }
806
807 static int
808 icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
809 {
810         struct icl_conn *ic;
811
812         if (!soreadable(so))
813                 return (SU_OK);
814
815         ic = arg;
816         cv_signal(&ic->ic_receive_cv);
817         return (SU_OK);
818 }
819
820 static int
821 icl_pdu_finalize(struct icl_pdu *request)
822 {
823         size_t padding, pdu_len;
824         uint32_t digest, zero = 0;
825         int ok;
826         struct icl_conn *ic;
827
828         ic = request->ip_conn;
829
830         icl_pdu_set_data_segment_length(request, request->ip_data_len);
831
832         pdu_len = icl_pdu_size(request);
833
834         if (ic->ic_header_crc32c) {
835                 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
836                 ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
837                     (void *)&digest);
838                 if (ok != 1) {
839                         ICL_WARN("failed to append header digest");
840                         return (1);
841                 }
842         }
843
844         if (request->ip_data_len != 0) {
845                 padding = icl_pdu_padding(request);
846                 if (padding > 0) {
847                         ok = m_append(request->ip_data_mbuf, padding,
848                             (void *)&zero);
849                         if (ok != 1) {
850                                 ICL_WARN("failed to append padding");
851                                 return (1);
852                         }
853                 }
854
855                 if (ic->ic_data_crc32c) {
856                         digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
857
858                         ok = m_append(request->ip_data_mbuf, sizeof(digest),
859                             (void *)&digest);
860                         if (ok != 1) {
861                                 ICL_WARN("failed to append data digest");
862                                 return (1);
863                         }
864                 }
865
866                 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
867                 request->ip_data_mbuf = NULL;
868         }
869
870         request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
871
872         return (0);
873 }
874
875 static void
876 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue)
877 {
878         struct icl_pdu *request, *request2;
879         struct socket *so;
880         size_t available, size, size2;
881         int coalesced, error;
882
883         ICL_CONN_LOCK_ASSERT_NOT(ic);
884
885         so = ic->ic_socket;
886
887         SOCKBUF_LOCK(&so->so_snd);
888         /*
889          * Check how much space do we have for transmit.  We can't just
890          * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
891          * as it always frees the mbuf chain passed to it, even in case
892          * of error.
893          */
894         available = sbspace(&so->so_snd);
895
896         /*
897          * Notify the socket upcall that we don't need wakeups
898          * for the time being.
899          */
900         so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
901         SOCKBUF_UNLOCK(&so->so_snd);
902
903         while (!STAILQ_EMPTY(queue)) {
904                 request = STAILQ_FIRST(queue);
905                 size = icl_pdu_size(request);
906                 if (available < size) {
907
908                         /*
909                          * Set the low watermark, to be checked by
910                          * sowriteable() in icl_soupcall_send()
911                          * to avoid unneccessary wakeups until there
912                          * is enough space for the PDU to fit.
913                          */
914                         SOCKBUF_LOCK(&so->so_snd);
915                         available = sbspace(&so->so_snd);
916                         if (available < size) {
917 #if 1
918                                 ICL_DEBUG("no space to send; "
919                                     "have %zd, need %zd",
920                                     available, size);
921 #endif
922                                 so->so_snd.sb_lowat = size;
923                                 SOCKBUF_UNLOCK(&so->so_snd);
924                                 return;
925                         }
926                         SOCKBUF_UNLOCK(&so->so_snd);
927                 }
928                 STAILQ_REMOVE_HEAD(queue, ip_next);
929                 error = icl_pdu_finalize(request);
930                 if (error != 0) {
931                         ICL_DEBUG("failed to finalize PDU; "
932                             "dropping connection");
933                         icl_conn_fail(ic);
934                         icl_pdu_free(request);
935                         return;
936                 }
937                 if (coalesce) {
938                         coalesced = 1;
939                         for (;;) {
940                                 request2 = STAILQ_FIRST(queue);
941                                 if (request2 == NULL)
942                                         break;
943                                 size2 = icl_pdu_size(request2);
944                                 if (available < size + size2)
945                                         break;
946                                 STAILQ_REMOVE_HEAD(queue, ip_next);
947                                 error = icl_pdu_finalize(request2);
948                                 if (error != 0) {
949                                         ICL_DEBUG("failed to finalize PDU; "
950                                             "dropping connection");
951                                         icl_conn_fail(ic);
952                                         icl_pdu_free(request);
953                                         icl_pdu_free(request2);
954                                         return;
955                                 }
956                                 m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf);
957                                 request2->ip_bhs_mbuf = NULL;
958                                 request->ip_bhs_mbuf->m_pkthdr.len += size2;
959                                 size += size2;
960                                 STAILQ_REMOVE_AFTER(queue, request, ip_next);
961                                 icl_pdu_free(request2);
962                                 coalesced++;
963                         }
964 #if 0
965                         if (coalesced > 1) {
966                                 ICL_DEBUG("coalesced %d PDUs into %zd bytes",
967                                     coalesced, size);
968                         }
969 #endif
970                 }
971                 available -= size;
972                 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
973                     NULL, MSG_DONTWAIT, curthread);
974                 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
975                 if (error != 0) {
976                         ICL_DEBUG("failed to send PDU, error %d; "
977                             "dropping connection", error);
978                         icl_conn_fail(ic);
979                         icl_pdu_free(request);
980                         return;
981                 }
982                 icl_pdu_free(request);
983         }
984 }
985
986 static void
987 icl_send_thread(void *arg)
988 {
989         struct icl_conn *ic;
990         struct icl_pdu_stailq queue;
991
992         ic = arg;
993
994         STAILQ_INIT(&queue);
995
996         ICL_CONN_LOCK(ic);
997         for (;;) {
998                 for (;;) {
999                         /*
1000                          * If the local queue is empty, populate it from
1001                          * the main one.  This way the icl_conn_send_pdus()
1002                          * can go through all the queued PDUs without holding
1003                          * any locks.
1004                          */
1005                         if (STAILQ_EMPTY(&queue))
1006                                 STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu);
1007
1008                         ic->ic_check_send_space = false;
1009                         ICL_CONN_UNLOCK(ic);
1010                         icl_conn_send_pdus(ic, &queue);
1011                         ICL_CONN_LOCK(ic);
1012
1013                         /*
1014                          * The icl_soupcall_send() was called since the last
1015                          * call to sbspace(); go around;
1016                          */
1017                         if (ic->ic_check_send_space)
1018                                 continue;
1019
1020                         /*
1021                          * Local queue is empty, but we still have PDUs
1022                          * in the main one; go around.
1023                          */
1024                         if (STAILQ_EMPTY(&queue) &&
1025                             !STAILQ_EMPTY(&ic->ic_to_send))
1026                                 continue;
1027
1028                         /*
1029                          * There might be some stuff in the local queue,
1030                          * which didn't get sent due to not having enough send
1031                          * space.  Wait for socket upcall.
1032                          */
1033                         break;
1034                 }
1035
1036                 if (ic->ic_disconnecting) {
1037                         //ICL_DEBUG("terminating");
1038                         break;
1039                 }
1040
1041                 cv_wait(&ic->ic_send_cv, ic->ic_lock);
1042         }
1043
1044         /*
1045          * We're exiting; move PDUs back to the main queue, so they can
1046          * get freed properly.  At this point ordering doesn't matter.
1047          */
1048         STAILQ_CONCAT(&ic->ic_to_send, &queue);
1049
1050         ic->ic_send_running = false;
1051         cv_signal(&ic->ic_send_cv);
1052         ICL_CONN_UNLOCK(ic);
1053         kthread_exit();
1054 }
1055
1056 static int
1057 icl_soupcall_send(struct socket *so, void *arg, int waitflag)
1058 {
1059         struct icl_conn *ic;
1060
1061         if (!sowriteable(so))
1062                 return (SU_OK);
1063
1064         ic = arg;
1065
1066         ICL_CONN_LOCK(ic);
1067         ic->ic_check_send_space = true;
1068         ICL_CONN_UNLOCK(ic);
1069
1070         cv_signal(&ic->ic_send_cv);
1071
1072         return (SU_OK);
1073 }
1074
1075 static int
1076 icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len,
1077     int flags)
1078 {
1079         struct mbuf *mb, *newmb;
1080         size_t copylen, off = 0;
1081
1082         KASSERT(len > 0, ("len == 0"));
1083
1084         newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR);
1085         if (newmb == NULL) {
1086                 ICL_WARN("failed to allocate mbuf for %zd bytes", len);
1087                 return (ENOMEM);
1088         }
1089
1090         for (mb = newmb; mb != NULL; mb = mb->m_next) {
1091                 copylen = min(M_TRAILINGSPACE(mb), len - off);
1092                 memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
1093                 mb->m_len = copylen;
1094                 off += copylen;
1095         }
1096         KASSERT(off == len, ("%s: off != len", __func__));
1097
1098         if (request->ip_data_mbuf == NULL) {
1099                 request->ip_data_mbuf = newmb;
1100                 request->ip_data_len = len;
1101         } else {
1102                 m_cat(request->ip_data_mbuf, newmb);
1103                 request->ip_data_len += len;
1104         }
1105
1106         return (0);
1107 }
1108
1109 int
1110 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
1111     const void *addr, size_t len, int flags)
1112 {
1113
1114         return (icl_pdu_append_data(request, addr, len, flags));
1115 }
1116
1117 static void
1118 icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len)
1119 {
1120
1121         m_copydata(ip->ip_data_mbuf, off, len, addr);
1122 }
1123
1124 void
1125 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
1126     size_t off, void *addr, size_t len)
1127 {
1128
1129         return (icl_pdu_get_data(ip, off, addr, len));
1130 }
1131
1132 static void
1133 icl_pdu_queue(struct icl_pdu *ip)
1134 {
1135         struct icl_conn *ic;
1136
1137         ic = ip->ip_conn;
1138
1139         ICL_CONN_LOCK_ASSERT(ic);
1140
1141         if (ic->ic_disconnecting || ic->ic_socket == NULL) {
1142                 ICL_DEBUG("icl_pdu_queue on closed connection");
1143                 icl_pdu_free(ip);
1144                 return;
1145         }
1146
1147         if (!STAILQ_EMPTY(&ic->ic_to_send)) {
1148                 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1149                 /*
1150                  * If the queue is not empty, someone else had already
1151                  * signaled the send thread; no need to do that again,
1152                  * just return.
1153                  */
1154                 return;
1155         }
1156
1157         STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1158         cv_signal(&ic->ic_send_cv);
1159 }
1160
1161 void
1162 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
1163 {
1164
1165         icl_pdu_queue(ip);
1166 }
1167
1168 static struct icl_conn *
1169 icl_soft_new_conn(const char *name, struct mtx *lock)
1170 {
1171         struct icl_conn *ic;
1172
1173         refcount_acquire(&icl_ncons);
1174
1175         ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO);
1176
1177         STAILQ_INIT(&ic->ic_to_send);
1178         ic->ic_lock = lock;
1179         cv_init(&ic->ic_send_cv, "icl_tx");
1180         cv_init(&ic->ic_receive_cv, "icl_rx");
1181 #ifdef DIAGNOSTIC
1182         refcount_init(&ic->ic_outstanding_pdus, 0);
1183 #endif
1184         ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH;
1185         ic->ic_name = name;
1186         ic->ic_offload = "None";
1187
1188         return (ic);
1189 }
1190
1191 void
1192 icl_soft_conn_free(struct icl_conn *ic)
1193 {
1194
1195         cv_destroy(&ic->ic_send_cv);
1196         cv_destroy(&ic->ic_receive_cv);
1197         kobj_delete((struct kobj *)ic, M_ICL_SOFT);
1198         refcount_release(&icl_ncons);
1199 }
1200
1201 static int
1202 icl_conn_start(struct icl_conn *ic)
1203 {
1204         size_t minspace;
1205         struct sockopt opt;
1206         int error, one = 1;
1207
1208         ICL_CONN_LOCK(ic);
1209
1210         /*
1211          * XXX: Ugly hack.
1212          */
1213         if (ic->ic_socket == NULL) {
1214                 ICL_CONN_UNLOCK(ic);
1215                 return (EINVAL);
1216         }
1217
1218         ic->ic_receive_state = ICL_CONN_STATE_BHS;
1219         ic->ic_receive_len = sizeof(struct iscsi_bhs);
1220         ic->ic_disconnecting = false;
1221
1222         ICL_CONN_UNLOCK(ic);
1223
1224         /*
1225          * For sendspace, this is required because the current code cannot
1226          * send a PDU in pieces; thus, the minimum buffer size is equal
1227          * to the maximum PDU size.  "+4" is to account for possible padding.
1228          *
1229          * What we should actually do here is to use autoscaling, but set
1230          * some minimal buffer size to "minspace".  I don't know a way to do
1231          * that, though.
1232          */
1233         minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
1234             ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1235         if (sendspace < minspace) {
1236                 ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1237                     minspace);
1238                 sendspace = minspace;
1239         }
1240         if (recvspace < minspace) {
1241                 ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1242                     minspace);
1243                 recvspace = minspace;
1244         }
1245
1246         error = soreserve(ic->ic_socket, sendspace, recvspace);
1247         if (error != 0) {
1248                 ICL_WARN("soreserve failed with error %d", error);
1249                 icl_soft_conn_close(ic);
1250                 return (error);
1251         }
1252         ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE;
1253         ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE;
1254
1255         /*
1256          * Disable Nagle.
1257          */
1258         bzero(&opt, sizeof(opt));
1259         opt.sopt_dir = SOPT_SET;
1260         opt.sopt_level = IPPROTO_TCP;
1261         opt.sopt_name = TCP_NODELAY;
1262         opt.sopt_val = &one;
1263         opt.sopt_valsize = sizeof(one);
1264         error = sosetopt(ic->ic_socket, &opt);
1265         if (error != 0) {
1266                 ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1267                 icl_soft_conn_close(ic);
1268                 return (error);
1269         }
1270
1271         /*
1272          * Register socket upcall, to get notified about incoming PDUs
1273          * and free space to send outgoing ones.
1274          */
1275         SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1276         soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1277         SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1278         SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1279         soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1280         SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1281
1282         /*
1283          * Start threads.
1284          */
1285         ICL_CONN_LOCK(ic);
1286         ic->ic_send_running = ic->ic_receive_running = true;
1287         ICL_CONN_UNLOCK(ic);
1288         error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1289             ic->ic_name);
1290         if (error != 0) {
1291                 ICL_WARN("kthread_add(9) failed with error %d", error);
1292                 ICL_CONN_LOCK(ic);
1293                 ic->ic_send_running = ic->ic_receive_running = false;
1294                 cv_signal(&ic->ic_send_cv);
1295                 ICL_CONN_UNLOCK(ic);
1296                 icl_soft_conn_close(ic);
1297                 return (error);
1298         }
1299         error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1300             ic->ic_name);
1301         if (error != 0) {
1302                 ICL_WARN("kthread_add(9) failed with error %d", error);
1303                 ICL_CONN_LOCK(ic);
1304                 ic->ic_receive_running = false;
1305                 cv_signal(&ic->ic_send_cv);
1306                 ICL_CONN_UNLOCK(ic);
1307                 icl_soft_conn_close(ic);
1308                 return (error);
1309         }
1310
1311         return (0);
1312 }
1313
1314 int
1315 icl_soft_conn_handoff(struct icl_conn *ic, int fd)
1316 {
1317         struct file *fp;
1318         struct socket *so;
1319         cap_rights_t rights;
1320         int error;
1321
1322         ICL_CONN_LOCK_ASSERT_NOT(ic);
1323
1324         /*
1325          * Steal the socket from userland.
1326          */
1327         error = fget(curthread, fd,
1328             cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1329         if (error != 0)
1330                 return (error);
1331         if (fp->f_type != DTYPE_SOCKET) {
1332                 fdrop(fp, curthread);
1333                 return (EINVAL);
1334         }
1335         so = fp->f_data;
1336         if (so->so_type != SOCK_STREAM) {
1337                 fdrop(fp, curthread);
1338                 return (EINVAL);
1339         }
1340
1341         ICL_CONN_LOCK(ic);
1342
1343         if (ic->ic_socket != NULL) {
1344                 ICL_CONN_UNLOCK(ic);
1345                 fdrop(fp, curthread);
1346                 return (EBUSY);
1347         }
1348
1349         ic->ic_socket = fp->f_data;
1350         fp->f_ops = &badfileops;
1351         fp->f_data = NULL;
1352         fdrop(fp, curthread);
1353         ICL_CONN_UNLOCK(ic);
1354
1355         error = icl_conn_start(ic);
1356
1357         return (error);
1358 }
1359
1360 void
1361 icl_soft_conn_close(struct icl_conn *ic)
1362 {
1363         struct icl_pdu *pdu;
1364         struct socket *so;
1365
1366         ICL_CONN_LOCK(ic);
1367
1368         /*
1369          * Wake up the threads, so they can properly terminate.
1370          */
1371         ic->ic_disconnecting = true;
1372         while (ic->ic_receive_running || ic->ic_send_running) {
1373                 cv_signal(&ic->ic_receive_cv);
1374                 cv_signal(&ic->ic_send_cv);
1375                 cv_wait(&ic->ic_send_cv, ic->ic_lock);
1376         }
1377
1378         /* Some other thread could close the connection same time. */
1379         so = ic->ic_socket;
1380         if (so == NULL) {
1381                 ICL_CONN_UNLOCK(ic);
1382                 return;
1383         }
1384         ic->ic_socket = NULL;
1385
1386         /*
1387          * Deregister socket upcalls.
1388          */
1389         ICL_CONN_UNLOCK(ic);
1390         SOCKBUF_LOCK(&so->so_snd);
1391         if (so->so_snd.sb_upcall != NULL)
1392                 soupcall_clear(so, SO_SND);
1393         SOCKBUF_UNLOCK(&so->so_snd);
1394         SOCKBUF_LOCK(&so->so_rcv);
1395         if (so->so_rcv.sb_upcall != NULL)
1396                 soupcall_clear(so, SO_RCV);
1397         SOCKBUF_UNLOCK(&so->so_rcv);
1398         soclose(so);
1399         ICL_CONN_LOCK(ic);
1400
1401         if (ic->ic_receive_pdu != NULL) {
1402                 //ICL_DEBUG("freeing partially received PDU");
1403                 icl_pdu_free(ic->ic_receive_pdu);
1404                 ic->ic_receive_pdu = NULL;
1405         }
1406
1407         /*
1408          * Remove any outstanding PDUs from the send queue.
1409          */
1410         while (!STAILQ_EMPTY(&ic->ic_to_send)) {
1411                 pdu = STAILQ_FIRST(&ic->ic_to_send);
1412                 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
1413                 icl_pdu_free(pdu);
1414         }
1415
1416         KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
1417             ("destroying session with non-empty send queue"));
1418 #ifdef DIAGNOSTIC
1419         KASSERT(ic->ic_outstanding_pdus == 0,
1420             ("destroying session with %d outstanding PDUs",
1421              ic->ic_outstanding_pdus));
1422 #endif
1423         ICL_CONN_UNLOCK(ic);
1424 }
1425
1426 int
1427 icl_soft_conn_task_setup(struct icl_conn *ic, struct ccb_scsiio *csio,
1428     uint32_t *task_tagp, void **prvp)
1429 {
1430
1431         return (0);
1432 }
1433
1434 void
1435 icl_soft_conn_task_done(struct icl_conn *ic, void *prv)
1436 {
1437 }
1438
1439 int
1440 icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
1441     uint32_t *transfer_tag, void **prvp)
1442 {
1443
1444         return (0);
1445 }
1446
1447 void
1448 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv)
1449 {
1450 }
1451
1452 static int
1453 icl_soft_limits(size_t *limitp)
1454 {
1455
1456         *limitp = 128 * 1024;
1457
1458         return (0);
1459 }
1460
1461 #ifdef ICL_KERNEL_PROXY
1462 int
1463 icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so)
1464 {
1465         int error;
1466
1467         ICL_CONN_LOCK_ASSERT_NOT(ic);
1468
1469         if (so->so_type != SOCK_STREAM)
1470                 return (EINVAL);
1471
1472         ICL_CONN_LOCK(ic);
1473         if (ic->ic_socket != NULL) {
1474                 ICL_CONN_UNLOCK(ic);
1475                 return (EBUSY);
1476         }
1477         ic->ic_socket = so;
1478         ICL_CONN_UNLOCK(ic);
1479
1480         error = icl_conn_start(ic);
1481
1482         return (error);
1483 }
1484 #endif /* ICL_KERNEL_PROXY */
1485
1486 static int
1487 icl_soft_load(void)
1488 {
1489         int error;
1490
1491         icl_pdu_zone = uma_zcreate("icl_pdu",
1492             sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1493             UMA_ALIGN_PTR, 0);
1494         refcount_init(&icl_ncons, 0);
1495
1496         /*
1497          * The reason we call this "none" is that to the user,
1498          * it's known as "offload driver"; "offload driver: soft"
1499          * doesn't make much sense.
1500          */
1501         error = icl_register("none", 0, icl_soft_limits, icl_soft_new_conn);
1502         KASSERT(error == 0, ("failed to register"));
1503
1504         return (error);
1505 }
1506
1507 static int
1508 icl_soft_unload(void)
1509 {
1510
1511         if (icl_ncons != 0)
1512                 return (EBUSY);
1513
1514         icl_unregister("none");
1515
1516         uma_zdestroy(icl_pdu_zone);
1517
1518         return (0);
1519 }
1520
1521 static int
1522 icl_soft_modevent(module_t mod, int what, void *arg)
1523 {
1524
1525         switch (what) {
1526         case MOD_LOAD:
1527                 return (icl_soft_load());
1528         case MOD_UNLOAD:
1529                 return (icl_soft_unload());
1530         default:
1531                 return (EINVAL);
1532         }
1533 }
1534
1535 moduledata_t icl_soft_data = {
1536         "icl_soft",
1537         icl_soft_modevent,
1538         0
1539 };
1540
1541 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
1542 MODULE_DEPEND(icl_soft, icl, 1, 1, 1);
1543 MODULE_VERSION(icl_soft, 1);