]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/iscsi/icl_soft.c
Make output of "iscsictl -v" and "ctladm islist -v" a little prettier
[FreeBSD/FreeBSD.git] / sys / dev / iscsi / icl_soft.c
1 /*-
2  * Copyright (c) 2012 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  */
30
31 /*
32  * Software implementation of iSCSI Common Layer kobj(9) interface.
33  */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include <sys/param.h>
39 #include <sys/capsicum.h>
40 #include <sys/condvar.h>
41 #include <sys/conf.h>
42 #include <sys/file.h>
43 #include <sys/kernel.h>
44 #include <sys/kthread.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/mutex.h>
48 #include <sys/module.h>
49 #include <sys/protosw.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 #include <sys/sx.h>
55 #include <sys/uio.h>
56 #include <vm/uma.h>
57 #include <netinet/in.h>
58 #include <netinet/tcp.h>
59
60 #include <dev/iscsi/icl.h>
61 #include <dev/iscsi/iscsi_proto.h>
62 #include <icl_conn_if.h>
63
64 static int coalesce = 1;
65 SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN,
66     &coalesce, 0, "Try to coalesce PDUs before sending");
67 static int partial_receive_len = 128 * 1024;
68 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
69     &partial_receive_len, 0, "Minimum read size for partially received "
70     "data segment");
71 static int sendspace = 1048576;
72 SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN,
73     &sendspace, 0, "Default send socket buffer size");
74 static int recvspace = 1048576;
75 SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN,
76     &recvspace, 0, "Default receive socket buffer size");
77
78 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend");
79 static uma_zone_t icl_pdu_zone;
80
81 static volatile u_int   icl_ncons;
82
83 #define ICL_CONN_LOCK(X)                mtx_lock(X->ic_lock)
84 #define ICL_CONN_UNLOCK(X)              mtx_unlock(X->ic_lock)
85 #define ICL_CONN_LOCK_ASSERT(X)         mtx_assert(X->ic_lock, MA_OWNED)
86 #define ICL_CONN_LOCK_ASSERT_NOT(X)     mtx_assert(X->ic_lock, MA_NOTOWNED)
87
88 STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
89
90 static icl_conn_new_pdu_t       icl_soft_conn_new_pdu;
91 static icl_conn_pdu_free_t      icl_soft_conn_pdu_free;
92 static icl_conn_pdu_data_segment_length_t
93                                     icl_soft_conn_pdu_data_segment_length;
94 static icl_conn_pdu_append_data_t       icl_soft_conn_pdu_append_data;
95 static icl_conn_pdu_get_data_t  icl_soft_conn_pdu_get_data;
96 static icl_conn_pdu_queue_t     icl_soft_conn_pdu_queue;
97 static icl_conn_handoff_t       icl_soft_conn_handoff;
98 static icl_conn_free_t          icl_soft_conn_free;
99 static icl_conn_close_t         icl_soft_conn_close;
100 static icl_conn_connected_t     icl_soft_conn_connected;
101
102 static kobj_method_t icl_soft_methods[] = {
103         KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu),
104         KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free),
105         KOBJMETHOD(icl_conn_pdu_data_segment_length,
106             icl_soft_conn_pdu_data_segment_length),
107         KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data),
108         KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data),
109         KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue),
110         KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff),
111         KOBJMETHOD(icl_conn_free, icl_soft_conn_free),
112         KOBJMETHOD(icl_conn_close, icl_soft_conn_close),
113         KOBJMETHOD(icl_conn_connected, icl_soft_conn_connected),
114         { 0, 0 }
115 };
116
117 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn));
118
119 static void
120 icl_conn_fail(struct icl_conn *ic)
121 {
122         if (ic->ic_socket == NULL)
123                 return;
124
125         /*
126          * XXX
127          */
128         ic->ic_socket->so_error = EDOOFUS;
129         (ic->ic_error)(ic);
130 }
131
132 static struct mbuf *
133 icl_conn_receive(struct icl_conn *ic, size_t len)
134 {
135         struct uio uio;
136         struct socket *so;
137         struct mbuf *m;
138         int error, flags;
139
140         so = ic->ic_socket;
141
142         memset(&uio, 0, sizeof(uio));
143         uio.uio_resid = len;
144
145         flags = MSG_DONTWAIT;
146         error = soreceive(so, NULL, &uio, &m, NULL, &flags);
147         if (error != 0) {
148                 ICL_DEBUG("soreceive error %d", error);
149                 return (NULL);
150         }
151         if (uio.uio_resid != 0) {
152                 m_freem(m);
153                 ICL_DEBUG("short read");
154                 return (NULL);
155         }
156
157         return (m);
158 }
159
160 static struct icl_pdu *
161 icl_pdu_new_empty(struct icl_conn *ic, int flags)
162 {
163         struct icl_pdu *ip;
164
165 #ifdef DIAGNOSTIC
166         refcount_acquire(&ic->ic_outstanding_pdus);
167 #endif
168         ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
169         if (ip == NULL) {
170                 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
171 #ifdef DIAGNOSTIC
172                 refcount_release(&ic->ic_outstanding_pdus);
173 #endif
174                 return (NULL);
175         }
176
177         ip->ip_conn = ic;
178
179         return (ip);
180 }
181
182 static void
183 icl_pdu_free(struct icl_pdu *ip)
184 {
185         struct icl_conn *ic;
186
187         ic = ip->ip_conn;
188
189         m_freem(ip->ip_bhs_mbuf);
190         m_freem(ip->ip_ahs_mbuf);
191         m_freem(ip->ip_data_mbuf);
192         uma_zfree(icl_pdu_zone, ip);
193 #ifdef DIAGNOSTIC
194         refcount_release(&ic->ic_outstanding_pdus);
195 #endif
196 }
197
198 void
199 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
200 {
201
202         icl_pdu_free(ip);
203 }
204
205 /*
206  * Allocate icl_pdu with empty BHS to fill up by the caller.
207  */
208 struct icl_pdu *
209 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags)
210 {
211         struct icl_pdu *ip;
212
213         ip = icl_pdu_new_empty(ic, flags);
214         if (ip == NULL)
215                 return (NULL);
216
217         ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs),
218             flags, MT_DATA, M_PKTHDR);
219         if (ip->ip_bhs_mbuf == NULL) {
220                 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
221                 icl_pdu_free(ip);
222                 return (NULL);
223         }
224         ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
225         memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
226         ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
227
228         return (ip);
229 }
230
231 static int
232 icl_pdu_ahs_length(const struct icl_pdu *request)
233 {
234
235         return (request->ip_bhs->bhs_total_ahs_len * 4);
236 }
237
238 static size_t
239 icl_pdu_data_segment_length(const struct icl_pdu *request)
240 {
241         uint32_t len = 0;
242
243         len += request->ip_bhs->bhs_data_segment_len[0];
244         len <<= 8;
245         len += request->ip_bhs->bhs_data_segment_len[1];
246         len <<= 8;
247         len += request->ip_bhs->bhs_data_segment_len[2];
248
249         return (len);
250 }
251
252 size_t
253 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic,
254     const struct icl_pdu *request)
255 {
256
257         return (icl_pdu_data_segment_length(request));
258 }
259
260 static void
261 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
262 {
263
264         response->ip_bhs->bhs_data_segment_len[2] = len;
265         response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
266         response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
267 }
268
269 static size_t
270 icl_pdu_padding(const struct icl_pdu *ip)
271 {
272
273         if ((ip->ip_data_len % 4) != 0)
274                 return (4 - (ip->ip_data_len % 4));
275
276         return (0);
277 }
278
279 static size_t
280 icl_pdu_size(const struct icl_pdu *response)
281 {
282         size_t len;
283
284         KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
285
286         len = sizeof(struct iscsi_bhs) + response->ip_data_len +
287             icl_pdu_padding(response);
288         if (response->ip_conn->ic_header_crc32c)
289                 len += ISCSI_HEADER_DIGEST_SIZE;
290         if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
291                 len += ISCSI_DATA_DIGEST_SIZE;
292
293         return (len);
294 }
295
296 static int
297 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep)
298 {
299         struct mbuf *m;
300
301         m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs));
302         if (m == NULL) {
303                 ICL_DEBUG("failed to receive BHS");
304                 return (-1);
305         }
306
307         request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs));
308         if (request->ip_bhs_mbuf == NULL) {
309                 ICL_WARN("m_pullup failed");
310                 return (-1);
311         }
312         request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *);
313
314         /*
315          * XXX: For architectures with strict alignment requirements
316          *      we may need to allocate ip_bhs and copy the data into it.
317          *      For some reason, though, not doing this doesn't seem
318          *      to cause problems; tested on sparc64.
319          */
320
321         *availablep -= sizeof(struct iscsi_bhs);
322         return (0);
323 }
324
325 static int
326 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep)
327 {
328
329         request->ip_ahs_len = icl_pdu_ahs_length(request);
330         if (request->ip_ahs_len == 0)
331                 return (0);
332
333         request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn,
334             request->ip_ahs_len);
335         if (request->ip_ahs_mbuf == NULL) {
336                 ICL_DEBUG("failed to receive AHS");
337                 return (-1);
338         }
339
340         *availablep -= request->ip_ahs_len;
341         return (0);
342 }
343
344 static uint32_t
345 icl_mbuf_to_crc32c(const struct mbuf *m0)
346 {
347         uint32_t digest = 0xffffffff;
348         const struct mbuf *m;
349
350         for (m = m0; m != NULL; m = m->m_next)
351                 digest = calculate_crc32c(digest,
352                     mtod(m, const void *), m->m_len);
353
354         digest = digest ^ 0xffffffff;
355
356         return (digest);
357 }
358
359 static int
360 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep)
361 {
362         struct mbuf *m;
363         uint32_t received_digest, valid_digest;
364
365         if (request->ip_conn->ic_header_crc32c == false)
366                 return (0);
367
368         m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE);
369         if (m == NULL) {
370                 ICL_DEBUG("failed to receive header digest");
371                 return (-1);
372         }
373
374         CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
375         m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest);
376         m_freem(m);
377
378         *availablep -= ISCSI_HEADER_DIGEST_SIZE;
379
380         /*
381          * XXX: Handle AHS.
382          */
383         valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
384         if (received_digest != valid_digest) {
385                 ICL_WARN("header digest check failed; got 0x%x, "
386                     "should be 0x%x", received_digest, valid_digest);
387                 return (-1);
388         }
389
390         return (0);
391 }
392
393 /*
394  * Return the number of bytes that should be waiting in the receive socket
395  * before icl_pdu_receive_data_segment() gets called.
396  */
397 static size_t
398 icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
399 {
400         size_t len;
401
402         len = icl_pdu_data_segment_length(request);
403         if (len == 0)
404                 return (0);
405
406         /*
407          * Account for the parts of data segment already read from
408          * the socket buffer.
409          */
410         KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
411         len -= request->ip_data_len;
412
413         /*
414          * Don't always wait for the full data segment to be delivered
415          * to the socket; this might badly affect performance due to
416          * TCP window scaling.
417          */
418         if (len > partial_receive_len) {
419 #if 0
420                 ICL_DEBUG("need %zd bytes of data, limiting to %zd",
421                     len, partial_receive_len));
422 #endif
423                 len = partial_receive_len;
424
425                 return (len);
426         }
427
428         /*
429          * Account for padding.  Note that due to the way code is written,
430          * the icl_pdu_receive_data_segment() must always receive padding
431          * along with the last part of data segment, because it would be
432          * impossible to tell whether we've already received the full data
433          * segment including padding, or without it.
434          */
435         if ((len % 4) != 0)
436                 len += 4 - (len % 4);
437
438 #if 0
439         ICL_DEBUG("need %zd bytes of data", len));
440 #endif
441
442         return (len);
443 }
444
445 static int
446 icl_pdu_receive_data_segment(struct icl_pdu *request,
447     size_t *availablep, bool *more_neededp)
448 {
449         struct icl_conn *ic;
450         size_t len, padding = 0;
451         struct mbuf *m;
452
453         ic = request->ip_conn;
454
455         *more_neededp = false;
456         ic->ic_receive_len = 0;
457
458         len = icl_pdu_data_segment_length(request);
459         if (len == 0)
460                 return (0);
461
462         if ((len % 4) != 0)
463                 padding = 4 - (len % 4);
464
465         /*
466          * Account for already received parts of data segment.
467          */
468         KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
469         len -= request->ip_data_len;
470
471         if (len + padding > *availablep) {
472                 /*
473                  * Not enough data in the socket buffer.  Receive as much
474                  * as we can.  Don't receive padding, since, obviously, it's
475                  * not the end of data segment yet.
476                  */
477 #if 0
478                 ICL_DEBUG("limited from %zd to %zd",
479                     len + padding, *availablep - padding));
480 #endif
481                 len = *availablep - padding;
482                 *more_neededp = true;
483                 padding = 0;
484         }
485
486         /*
487          * Must not try to receive padding without at least one byte
488          * of actual data segment.
489          */
490         if (len > 0) {
491                 m = icl_conn_receive(request->ip_conn, len + padding);
492                 if (m == NULL) {
493                         ICL_DEBUG("failed to receive data segment");
494                         return (-1);
495                 }
496
497                 if (request->ip_data_mbuf == NULL)
498                         request->ip_data_mbuf = m;
499                 else
500                         m_cat(request->ip_data_mbuf, m);
501
502                 request->ip_data_len += len;
503                 *availablep -= len + padding;
504         } else
505                 ICL_DEBUG("len 0");
506
507         if (*more_neededp)
508                 ic->ic_receive_len =
509                     icl_pdu_data_segment_receive_len(request);
510
511         return (0);
512 }
513
514 static int
515 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep)
516 {
517         struct mbuf *m;
518         uint32_t received_digest, valid_digest;
519
520         if (request->ip_conn->ic_data_crc32c == false)
521                 return (0);
522
523         if (request->ip_data_len == 0)
524                 return (0);
525
526         m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE);
527         if (m == NULL) {
528                 ICL_DEBUG("failed to receive data digest");
529                 return (-1);
530         }
531
532         CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
533         m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest);
534         m_freem(m);
535
536         *availablep -= ISCSI_DATA_DIGEST_SIZE;
537
538         /*
539          * Note that ip_data_mbuf also contains padding; since digest
540          * calculation is supposed to include that, we iterate over
541          * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
542          */
543         valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
544         if (received_digest != valid_digest) {
545                 ICL_WARN("data digest check failed; got 0x%x, "
546                     "should be 0x%x", received_digest, valid_digest);
547                 return (-1);
548         }
549
550         return (0);
551 }
552
553 /*
554  * Somewhat contrary to the name, this attempts to receive only one
555  * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
556  */
557 static struct icl_pdu *
558 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep)
559 {
560         struct icl_pdu *request;
561         struct socket *so;
562         size_t len;
563         int error;
564         bool more_needed;
565
566         so = ic->ic_socket;
567
568         if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
569                 KASSERT(ic->ic_receive_pdu == NULL,
570                     ("ic->ic_receive_pdu != NULL"));
571                 request = icl_pdu_new_empty(ic, M_NOWAIT);
572                 if (request == NULL) {
573                         ICL_DEBUG("failed to allocate PDU; "
574                             "dropping connection");
575                         icl_conn_fail(ic);
576                         return (NULL);
577                 }
578                 ic->ic_receive_pdu = request;
579         } else {
580                 KASSERT(ic->ic_receive_pdu != NULL,
581                     ("ic->ic_receive_pdu == NULL"));
582                 request = ic->ic_receive_pdu;
583         }
584
585         if (*availablep < ic->ic_receive_len) {
586 #if 0
587                 ICL_DEBUG("not enough data; need %zd, "
588                     "have %zd", ic->ic_receive_len, *availablep);
589 #endif
590                 return (NULL);
591         }
592
593         switch (ic->ic_receive_state) {
594         case ICL_CONN_STATE_BHS:
595                 //ICL_DEBUG("receiving BHS");
596                 error = icl_pdu_receive_bhs(request, availablep);
597                 if (error != 0) {
598                         ICL_DEBUG("failed to receive BHS; "
599                             "dropping connection");
600                         break;
601                 }
602
603                 /*
604                  * We don't enforce any limit for AHS length;
605                  * its length is stored in 8 bit field.
606                  */
607
608                 len = icl_pdu_data_segment_length(request);
609                 if (len > ic->ic_max_data_segment_length) {
610                         ICL_WARN("received data segment "
611                             "length %zd is larger than negotiated "
612                             "MaxDataSegmentLength %zd; "
613                             "dropping connection",
614                             len, ic->ic_max_data_segment_length);
615                         error = EINVAL;
616                         break;
617                 }
618
619                 ic->ic_receive_state = ICL_CONN_STATE_AHS;
620                 ic->ic_receive_len = icl_pdu_ahs_length(request);
621                 break;
622
623         case ICL_CONN_STATE_AHS:
624                 //ICL_DEBUG("receiving AHS");
625                 error = icl_pdu_receive_ahs(request, availablep);
626                 if (error != 0) {
627                         ICL_DEBUG("failed to receive AHS; "
628                             "dropping connection");
629                         break;
630                 }
631                 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
632                 if (ic->ic_header_crc32c == false)
633                         ic->ic_receive_len = 0;
634                 else
635                         ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
636                 break;
637
638         case ICL_CONN_STATE_HEADER_DIGEST:
639                 //ICL_DEBUG("receiving header digest");
640                 error = icl_pdu_check_header_digest(request, availablep);
641                 if (error != 0) {
642                         ICL_DEBUG("header digest failed; "
643                             "dropping connection");
644                         break;
645                 }
646
647                 ic->ic_receive_state = ICL_CONN_STATE_DATA;
648                 ic->ic_receive_len =
649                     icl_pdu_data_segment_receive_len(request);
650                 break;
651
652         case ICL_CONN_STATE_DATA:
653                 //ICL_DEBUG("receiving data segment");
654                 error = icl_pdu_receive_data_segment(request, availablep,
655                     &more_needed);
656                 if (error != 0) {
657                         ICL_DEBUG("failed to receive data segment;"
658                             "dropping connection");
659                         break;
660                 }
661
662                 if (more_needed)
663                         break;
664
665                 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
666                 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
667                         ic->ic_receive_len = 0;
668                 else
669                         ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
670                 break;
671
672         case ICL_CONN_STATE_DATA_DIGEST:
673                 //ICL_DEBUG("receiving data digest");
674                 error = icl_pdu_check_data_digest(request, availablep);
675                 if (error != 0) {
676                         ICL_DEBUG("data digest failed; "
677                             "dropping connection");
678                         break;
679                 }
680
681                 /*
682                  * We've received complete PDU; reset the receive state machine
683                  * and return the PDU.
684                  */
685                 ic->ic_receive_state = ICL_CONN_STATE_BHS;
686                 ic->ic_receive_len = sizeof(struct iscsi_bhs);
687                 ic->ic_receive_pdu = NULL;
688                 return (request);
689
690         default:
691                 panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
692         }
693
694         if (error != 0) {
695                 /*
696                  * Don't free the PDU; it's pointed to by ic->ic_receive_pdu
697                  * and will get freed in icl_soft_conn_close().
698                  */
699                 icl_conn_fail(ic);
700         }
701
702         return (NULL);
703 }
704
705 static void
706 icl_conn_receive_pdus(struct icl_conn *ic, size_t available)
707 {
708         struct icl_pdu *response;
709         struct socket *so;
710
711         so = ic->ic_socket;
712
713         /*
714          * This can never happen; we're careful to only mess with ic->ic_socket
715          * pointer when the send/receive threads are not running.
716          */
717         KASSERT(so != NULL, ("NULL socket"));
718
719         for (;;) {
720                 if (ic->ic_disconnecting)
721                         return;
722
723                 if (so->so_error != 0) {
724                         ICL_DEBUG("connection error %d; "
725                             "dropping connection", so->so_error);
726                         icl_conn_fail(ic);
727                         return;
728                 }
729
730                 /*
731                  * Loop until we have a complete PDU or there is not enough
732                  * data in the socket buffer.
733                  */
734                 if (available < ic->ic_receive_len) {
735 #if 0
736                         ICL_DEBUG("not enough data; have %zd, "
737                             "need %zd", available,
738                             ic->ic_receive_len);
739 #endif
740                         return;
741                 }
742
743                 response = icl_conn_receive_pdu(ic, &available);
744                 if (response == NULL)
745                         continue;
746
747                 if (response->ip_ahs_len > 0) {
748                         ICL_WARN("received PDU with unsupported "
749                             "AHS; opcode 0x%x; dropping connection",
750                             response->ip_bhs->bhs_opcode);
751                         icl_pdu_free(response);
752                         icl_conn_fail(ic);
753                         return;
754                 }
755
756                 (ic->ic_receive)(response);
757         }
758 }
759
760 static void
761 icl_receive_thread(void *arg)
762 {
763         struct icl_conn *ic;
764         size_t available;
765         struct socket *so;
766
767         ic = arg;
768         so = ic->ic_socket;
769
770         ICL_CONN_LOCK(ic);
771         ic->ic_receive_running = true;
772         ICL_CONN_UNLOCK(ic);
773
774         for (;;) {
775                 if (ic->ic_disconnecting) {
776                         //ICL_DEBUG("terminating");
777                         break;
778                 }
779
780                 /*
781                  * Set the low watermark, to be checked by
782                  * soreadable() in icl_soupcall_receive()
783                  * to avoid unneccessary wakeups until there
784                  * is enough data received to read the PDU.
785                  */
786                 SOCKBUF_LOCK(&so->so_rcv);
787                 available = sbavail(&so->so_rcv);
788                 if (available < ic->ic_receive_len) {
789                         so->so_rcv.sb_lowat = ic->ic_receive_len;
790                         cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
791                 } else
792                         so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
793                 SOCKBUF_UNLOCK(&so->so_rcv);
794
795                 icl_conn_receive_pdus(ic, available);
796         }
797
798         ICL_CONN_LOCK(ic);
799         ic->ic_receive_running = false;
800         cv_signal(&ic->ic_send_cv);
801         ICL_CONN_UNLOCK(ic);
802         kthread_exit();
803 }
804
805 static int
806 icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
807 {
808         struct icl_conn *ic;
809
810         if (!soreadable(so))
811                 return (SU_OK);
812
813         ic = arg;
814         cv_signal(&ic->ic_receive_cv);
815         return (SU_OK);
816 }
817
818 static int
819 icl_pdu_finalize(struct icl_pdu *request)
820 {
821         size_t padding, pdu_len;
822         uint32_t digest, zero = 0;
823         int ok;
824         struct icl_conn *ic;
825
826         ic = request->ip_conn;
827
828         icl_pdu_set_data_segment_length(request, request->ip_data_len);
829
830         pdu_len = icl_pdu_size(request);
831
832         if (ic->ic_header_crc32c) {
833                 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
834                 ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
835                     (void *)&digest);
836                 if (ok != 1) {
837                         ICL_WARN("failed to append header digest");
838                         return (1);
839                 }
840         }
841
842         if (request->ip_data_len != 0) {
843                 padding = icl_pdu_padding(request);
844                 if (padding > 0) {
845                         ok = m_append(request->ip_data_mbuf, padding,
846                             (void *)&zero);
847                         if (ok != 1) {
848                                 ICL_WARN("failed to append padding");
849                                 return (1);
850                         }
851                 }
852
853                 if (ic->ic_data_crc32c) {
854                         digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
855
856                         ok = m_append(request->ip_data_mbuf, sizeof(digest),
857                             (void *)&digest);
858                         if (ok != 1) {
859                                 ICL_WARN("failed to append data digest");
860                                 return (1);
861                         }
862                 }
863
864                 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
865                 request->ip_data_mbuf = NULL;
866         }
867
868         request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
869
870         return (0);
871 }
872
873 static void
874 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue)
875 {
876         struct icl_pdu *request, *request2;
877         struct socket *so;
878         size_t available, size, size2;
879         int coalesced, error;
880
881         ICL_CONN_LOCK_ASSERT_NOT(ic);
882
883         so = ic->ic_socket;
884
885         SOCKBUF_LOCK(&so->so_snd);
886         /*
887          * Check how much space do we have for transmit.  We can't just
888          * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
889          * as it always frees the mbuf chain passed to it, even in case
890          * of error.
891          */
892         available = sbspace(&so->so_snd);
893
894         /*
895          * Notify the socket upcall that we don't need wakeups
896          * for the time being.
897          */
898         so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
899         SOCKBUF_UNLOCK(&so->so_snd);
900
901         while (!STAILQ_EMPTY(queue)) {
902                 request = STAILQ_FIRST(queue);
903                 size = icl_pdu_size(request);
904                 if (available < size) {
905
906                         /*
907                          * Set the low watermark, to be checked by
908                          * sowriteable() in icl_soupcall_send()
909                          * to avoid unneccessary wakeups until there
910                          * is enough space for the PDU to fit.
911                          */
912                         SOCKBUF_LOCK(&so->so_snd);
913                         available = sbspace(&so->so_snd);
914                         if (available < size) {
915 #if 1
916                                 ICL_DEBUG("no space to send; "
917                                     "have %zd, need %zd",
918                                     available, size);
919 #endif
920                                 so->so_snd.sb_lowat = size;
921                                 SOCKBUF_UNLOCK(&so->so_snd);
922                                 return;
923                         }
924                         SOCKBUF_UNLOCK(&so->so_snd);
925                 }
926                 STAILQ_REMOVE_HEAD(queue, ip_next);
927                 error = icl_pdu_finalize(request);
928                 if (error != 0) {
929                         ICL_DEBUG("failed to finalize PDU; "
930                             "dropping connection");
931                         icl_conn_fail(ic);
932                         icl_pdu_free(request);
933                         return;
934                 }
935                 if (coalesce) {
936                         coalesced = 1;
937                         for (;;) {
938                                 request2 = STAILQ_FIRST(queue);
939                                 if (request2 == NULL)
940                                         break;
941                                 size2 = icl_pdu_size(request2);
942                                 if (available < size + size2)
943                                         break;
944                                 STAILQ_REMOVE_HEAD(queue, ip_next);
945                                 error = icl_pdu_finalize(request2);
946                                 if (error != 0) {
947                                         ICL_DEBUG("failed to finalize PDU; "
948                                             "dropping connection");
949                                         icl_conn_fail(ic);
950                                         icl_pdu_free(request);
951                                         icl_pdu_free(request2);
952                                         return;
953                                 }
954                                 m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf);
955                                 request2->ip_bhs_mbuf = NULL;
956                                 request->ip_bhs_mbuf->m_pkthdr.len += size2;
957                                 size += size2;
958                                 STAILQ_REMOVE_AFTER(queue, request, ip_next);
959                                 icl_pdu_free(request2);
960                                 coalesced++;
961                         }
962 #if 0
963                         if (coalesced > 1) {
964                                 ICL_DEBUG("coalesced %d PDUs into %zd bytes",
965                                     coalesced, size);
966                         }
967 #endif
968                 }
969                 available -= size;
970                 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
971                     NULL, MSG_DONTWAIT, curthread);
972                 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
973                 if (error != 0) {
974                         ICL_DEBUG("failed to send PDU, error %d; "
975                             "dropping connection", error);
976                         icl_conn_fail(ic);
977                         icl_pdu_free(request);
978                         return;
979                 }
980                 icl_pdu_free(request);
981         }
982 }
983
984 static void
985 icl_send_thread(void *arg)
986 {
987         struct icl_conn *ic;
988         struct icl_pdu_stailq queue;
989
990         ic = arg;
991
992         STAILQ_INIT(&queue);
993
994         ICL_CONN_LOCK(ic);
995         ic->ic_send_running = true;
996
997         for (;;) {
998                 for (;;) {
999                         /*
1000                          * If the local queue is empty, populate it from
1001                          * the main one.  This way the icl_conn_send_pdus()
1002                          * can go through all the queued PDUs without holding
1003                          * any locks.
1004                          */
1005                         if (STAILQ_EMPTY(&queue))
1006                                 STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu);
1007
1008                         ic->ic_check_send_space = false;
1009                         ICL_CONN_UNLOCK(ic);
1010                         icl_conn_send_pdus(ic, &queue);
1011                         ICL_CONN_LOCK(ic);
1012
1013                         /*
1014                          * The icl_soupcall_send() was called since the last
1015                          * call to sbspace(); go around;
1016                          */
1017                         if (ic->ic_check_send_space)
1018                                 continue;
1019
1020                         /*
1021                          * Local queue is empty, but we still have PDUs
1022                          * in the main one; go around.
1023                          */
1024                         if (STAILQ_EMPTY(&queue) &&
1025                             !STAILQ_EMPTY(&ic->ic_to_send))
1026                                 continue;
1027
1028                         /*
1029                          * There might be some stuff in the local queue,
1030                          * which didn't get sent due to not having enough send
1031                          * space.  Wait for socket upcall.
1032                          */
1033                         break;
1034                 }
1035
1036                 if (ic->ic_disconnecting) {
1037                         //ICL_DEBUG("terminating");
1038                         break;
1039                 }
1040
1041                 cv_wait(&ic->ic_send_cv, ic->ic_lock);
1042         }
1043
1044         /*
1045          * We're exiting; move PDUs back to the main queue, so they can
1046          * get freed properly.  At this point ordering doesn't matter.
1047          */
1048         STAILQ_CONCAT(&ic->ic_to_send, &queue);
1049
1050         ic->ic_send_running = false;
1051         cv_signal(&ic->ic_send_cv);
1052         ICL_CONN_UNLOCK(ic);
1053         kthread_exit();
1054 }
1055
1056 static int
1057 icl_soupcall_send(struct socket *so, void *arg, int waitflag)
1058 {
1059         struct icl_conn *ic;
1060
1061         if (!sowriteable(so))
1062                 return (SU_OK);
1063
1064         ic = arg;
1065
1066         ICL_CONN_LOCK(ic);
1067         ic->ic_check_send_space = true;
1068         ICL_CONN_UNLOCK(ic);
1069
1070         cv_signal(&ic->ic_send_cv);
1071
1072         return (SU_OK);
1073 }
1074
1075 static int
1076 icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len,
1077     int flags)
1078 {
1079         struct mbuf *mb, *newmb;
1080         size_t copylen, off = 0;
1081
1082         KASSERT(len > 0, ("len == 0"));
1083
1084         newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR);
1085         if (newmb == NULL) {
1086                 ICL_WARN("failed to allocate mbuf for %zd bytes", len);
1087                 return (ENOMEM);
1088         }
1089
1090         for (mb = newmb; mb != NULL; mb = mb->m_next) {
1091                 copylen = min(M_TRAILINGSPACE(mb), len - off);
1092                 memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
1093                 mb->m_len = copylen;
1094                 off += copylen;
1095         }
1096         KASSERT(off == len, ("%s: off != len", __func__));
1097
1098         if (request->ip_data_mbuf == NULL) {
1099                 request->ip_data_mbuf = newmb;
1100                 request->ip_data_len = len;
1101         } else {
1102                 m_cat(request->ip_data_mbuf, newmb);
1103                 request->ip_data_len += len;
1104         }
1105
1106         return (0);
1107 }
1108
1109 int
1110 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
1111     const void *addr, size_t len, int flags)
1112 {
1113
1114         return (icl_pdu_append_data(request, addr, len, flags));
1115 }
1116
1117 static void
1118 icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len)
1119 {
1120
1121         m_copydata(ip->ip_data_mbuf, off, len, addr);
1122 }
1123
1124 void
1125 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
1126     size_t off, void *addr, size_t len)
1127 {
1128
1129         return (icl_pdu_get_data(ip, off, addr, len));
1130 }
1131
1132 static void
1133 icl_pdu_queue(struct icl_pdu *ip)
1134 {
1135         struct icl_conn *ic;
1136
1137         ic = ip->ip_conn;
1138
1139         ICL_CONN_LOCK_ASSERT(ic);
1140
1141         if (ic->ic_disconnecting || ic->ic_socket == NULL) {
1142                 ICL_DEBUG("icl_pdu_queue on closed connection");
1143                 icl_pdu_free(ip);
1144                 return;
1145         }
1146
1147         if (!STAILQ_EMPTY(&ic->ic_to_send)) {
1148                 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1149                 /*
1150                  * If the queue is not empty, someone else had already
1151                  * signaled the send thread; no need to do that again,
1152                  * just return.
1153                  */
1154                 return;
1155         }
1156
1157         STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1158         cv_signal(&ic->ic_send_cv);
1159 }
1160
1161 void
1162 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
1163 {
1164
1165         icl_pdu_queue(ip);
1166 }
1167
1168 static struct icl_conn *
1169 icl_soft_new_conn(const char *name, struct mtx *lock)
1170 {
1171         struct icl_conn *ic;
1172
1173         refcount_acquire(&icl_ncons);
1174
1175         ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO);
1176
1177         STAILQ_INIT(&ic->ic_to_send);
1178         ic->ic_lock = lock;
1179         cv_init(&ic->ic_send_cv, "icl_tx");
1180         cv_init(&ic->ic_receive_cv, "icl_rx");
1181 #ifdef DIAGNOSTIC
1182         refcount_init(&ic->ic_outstanding_pdus, 0);
1183 #endif
1184         ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH;
1185         ic->ic_name = name;
1186         ic->ic_offload = "None";
1187
1188         return (ic);
1189 }
1190
1191 void
1192 icl_soft_conn_free(struct icl_conn *ic)
1193 {
1194
1195         cv_destroy(&ic->ic_send_cv);
1196         cv_destroy(&ic->ic_receive_cv);
1197         kobj_delete((struct kobj *)ic, M_ICL_SOFT);
1198         refcount_release(&icl_ncons);
1199 }
1200
1201 static int
1202 icl_conn_start(struct icl_conn *ic)
1203 {
1204         size_t minspace;
1205         struct sockopt opt;
1206         int error, one = 1;
1207
1208         ICL_CONN_LOCK(ic);
1209
1210         /*
1211          * XXX: Ugly hack.
1212          */
1213         if (ic->ic_socket == NULL) {
1214                 ICL_CONN_UNLOCK(ic);
1215                 return (EINVAL);
1216         }
1217
1218         ic->ic_receive_state = ICL_CONN_STATE_BHS;
1219         ic->ic_receive_len = sizeof(struct iscsi_bhs);
1220         ic->ic_disconnecting = false;
1221
1222         ICL_CONN_UNLOCK(ic);
1223
1224         /*
1225          * For sendspace, this is required because the current code cannot
1226          * send a PDU in pieces; thus, the minimum buffer size is equal
1227          * to the maximum PDU size.  "+4" is to account for possible padding.
1228          *
1229          * What we should actually do here is to use autoscaling, but set
1230          * some minimal buffer size to "minspace".  I don't know a way to do
1231          * that, though.
1232          */
1233         minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
1234             ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1235         if (sendspace < minspace) {
1236                 ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1237                     minspace);
1238                 sendspace = minspace;
1239         }
1240         if (recvspace < minspace) {
1241                 ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1242                     minspace);
1243                 recvspace = minspace;
1244         }
1245
1246         error = soreserve(ic->ic_socket, sendspace, recvspace);
1247         if (error != 0) {
1248                 ICL_WARN("soreserve failed with error %d", error);
1249                 icl_soft_conn_close(ic);
1250                 return (error);
1251         }
1252         ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE;
1253         ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE;
1254
1255         /*
1256          * Disable Nagle.
1257          */
1258         bzero(&opt, sizeof(opt));
1259         opt.sopt_dir = SOPT_SET;
1260         opt.sopt_level = IPPROTO_TCP;
1261         opt.sopt_name = TCP_NODELAY;
1262         opt.sopt_val = &one;
1263         opt.sopt_valsize = sizeof(one);
1264         error = sosetopt(ic->ic_socket, &opt);
1265         if (error != 0) {
1266                 ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1267                 icl_soft_conn_close(ic);
1268                 return (error);
1269         }
1270
1271         /*
1272          * Start threads.
1273          */
1274         error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1275             ic->ic_name);
1276         if (error != 0) {
1277                 ICL_WARN("kthread_add(9) failed with error %d", error);
1278                 icl_soft_conn_close(ic);
1279                 return (error);
1280         }
1281
1282         error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1283             ic->ic_name);
1284         if (error != 0) {
1285                 ICL_WARN("kthread_add(9) failed with error %d", error);
1286                 icl_soft_conn_close(ic);
1287                 return (error);
1288         }
1289
1290         /*
1291          * Register socket upcall, to get notified about incoming PDUs
1292          * and free space to send outgoing ones.
1293          */
1294         SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1295         soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1296         SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1297         SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1298         soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1299         SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1300
1301         return (0);
1302 }
1303
1304 int
1305 icl_soft_conn_handoff(struct icl_conn *ic, int fd)
1306 {
1307         struct file *fp;
1308         struct socket *so;
1309         cap_rights_t rights;
1310         int error;
1311
1312         ICL_CONN_LOCK_ASSERT_NOT(ic);
1313
1314         /*
1315          * Steal the socket from userland.
1316          */
1317         error = fget(curthread, fd,
1318             cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1319         if (error != 0)
1320                 return (error);
1321         if (fp->f_type != DTYPE_SOCKET) {
1322                 fdrop(fp, curthread);
1323                 return (EINVAL);
1324         }
1325         so = fp->f_data;
1326         if (so->so_type != SOCK_STREAM) {
1327                 fdrop(fp, curthread);
1328                 return (EINVAL);
1329         }
1330
1331         ICL_CONN_LOCK(ic);
1332
1333         if (ic->ic_socket != NULL) {
1334                 ICL_CONN_UNLOCK(ic);
1335                 fdrop(fp, curthread);
1336                 return (EBUSY);
1337         }
1338
1339         ic->ic_socket = fp->f_data;
1340         fp->f_ops = &badfileops;
1341         fp->f_data = NULL;
1342         fdrop(fp, curthread);
1343         ICL_CONN_UNLOCK(ic);
1344
1345         error = icl_conn_start(ic);
1346
1347         return (error);
1348 }
1349
1350 void
1351 icl_soft_conn_close(struct icl_conn *ic)
1352 {
1353         struct icl_pdu *pdu;
1354
1355         ICL_CONN_LOCK_ASSERT_NOT(ic);
1356
1357         ICL_CONN_LOCK(ic);
1358         if (ic->ic_socket == NULL) {
1359                 ICL_CONN_UNLOCK(ic);
1360                 return;
1361         }
1362
1363         /*
1364          * Deregister socket upcalls.
1365          */
1366         ICL_CONN_UNLOCK(ic);
1367         SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1368         if (ic->ic_socket->so_snd.sb_upcall != NULL)
1369                 soupcall_clear(ic->ic_socket, SO_SND);
1370         SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1371         SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1372         if (ic->ic_socket->so_rcv.sb_upcall != NULL)
1373                 soupcall_clear(ic->ic_socket, SO_RCV);
1374         SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1375         ICL_CONN_LOCK(ic);
1376
1377         ic->ic_disconnecting = true;
1378
1379         /*
1380          * Wake up the threads, so they can properly terminate.
1381          */
1382         while (ic->ic_receive_running || ic->ic_send_running) {
1383                 //ICL_DEBUG("waiting for send/receive threads to terminate");
1384                 cv_signal(&ic->ic_receive_cv);
1385                 cv_signal(&ic->ic_send_cv);
1386                 cv_wait(&ic->ic_send_cv, ic->ic_lock);
1387         }
1388         //ICL_DEBUG("send/receive threads terminated");
1389
1390         ICL_CONN_UNLOCK(ic);
1391         soclose(ic->ic_socket);
1392         ICL_CONN_LOCK(ic);
1393         ic->ic_socket = NULL;
1394
1395         if (ic->ic_receive_pdu != NULL) {
1396                 //ICL_DEBUG("freeing partially received PDU");
1397                 icl_pdu_free(ic->ic_receive_pdu);
1398                 ic->ic_receive_pdu = NULL;
1399         }
1400
1401         /*
1402          * Remove any outstanding PDUs from the send queue.
1403          */
1404         while (!STAILQ_EMPTY(&ic->ic_to_send)) {
1405                 pdu = STAILQ_FIRST(&ic->ic_to_send);
1406                 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
1407                 icl_pdu_free(pdu);
1408         }
1409
1410         KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
1411             ("destroying session with non-empty send queue"));
1412 #ifdef DIAGNOSTIC
1413         KASSERT(ic->ic_outstanding_pdus == 0,
1414             ("destroying session with %d outstanding PDUs",
1415              ic->ic_outstanding_pdus));
1416 #endif
1417         ICL_CONN_UNLOCK(ic);
1418 }
1419
1420 bool
1421 icl_soft_conn_connected(struct icl_conn *ic)
1422 {
1423         ICL_CONN_LOCK_ASSERT_NOT(ic);
1424
1425         ICL_CONN_LOCK(ic);
1426         if (ic->ic_socket == NULL) {
1427                 ICL_CONN_UNLOCK(ic);
1428                 return (false);
1429         }
1430         if (ic->ic_socket->so_error != 0) {
1431                 ICL_CONN_UNLOCK(ic);
1432                 return (false);
1433         }
1434         ICL_CONN_UNLOCK(ic);
1435         return (true);
1436 }
1437
1438 static int
1439 icl_soft_limits(size_t *limitp)
1440 {
1441
1442         *limitp = 128 * 1024;
1443
1444         return (0);
1445 }
1446
1447 #ifdef ICL_KERNEL_PROXY
1448 int
1449 icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so)
1450 {
1451         int error;
1452
1453         ICL_CONN_LOCK_ASSERT_NOT(ic);
1454
1455         if (so->so_type != SOCK_STREAM)
1456                 return (EINVAL);
1457
1458         ICL_CONN_LOCK(ic);
1459         if (ic->ic_socket != NULL) {
1460                 ICL_CONN_UNLOCK(ic);
1461                 return (EBUSY);
1462         }
1463         ic->ic_socket = so;
1464         ICL_CONN_UNLOCK(ic);
1465
1466         error = icl_conn_start(ic);
1467
1468         return (error);
1469 }
1470 #endif /* ICL_KERNEL_PROXY */
1471
1472 static int
1473 icl_soft_load(void)
1474 {
1475         int error;
1476
1477         icl_pdu_zone = uma_zcreate("icl_pdu",
1478             sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1479             UMA_ALIGN_PTR, 0);
1480         refcount_init(&icl_ncons, 0);
1481
1482         /*
1483          * The reason we call this "none" is that to the user,
1484          * it's known as "offload driver"; "offload driver: soft"
1485          * doesn't make much sense.
1486          */
1487         error = icl_register("none", 0, icl_soft_limits, icl_soft_new_conn);
1488         KASSERT(error == 0, ("failed to register"));
1489
1490         return (error);
1491 }
1492
1493 static int
1494 icl_soft_unload(void)
1495 {
1496
1497         if (icl_ncons != 0)
1498                 return (EBUSY);
1499
1500         icl_unregister("none");
1501
1502         uma_zdestroy(icl_pdu_zone);
1503
1504         return (0);
1505 }
1506
1507 static int
1508 icl_soft_modevent(module_t mod, int what, void *arg)
1509 {
1510
1511         switch (what) {
1512         case MOD_LOAD:
1513                 return (icl_soft_load());
1514         case MOD_UNLOAD:
1515                 return (icl_soft_unload());
1516         default:
1517                 return (EINVAL);
1518         }
1519 }
1520
1521 moduledata_t icl_soft_data = {
1522         "icl_soft",
1523         icl_soft_modevent,
1524         0
1525 };
1526
1527 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
1528 MODULE_DEPEND(icl_soft, icl, 1, 1, 1);
1529 MODULE_VERSION(icl_soft, 1);