]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/iscsi/icl.c
Fix LUN discovery for targets that don't support REPORT_LUNS, broken
[FreeBSD/FreeBSD.git] / sys / dev / iscsi / icl.c
1 /*-
2  * Copyright (c) 2012 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  */
30
31 /*
32  * iSCSI Common Layer.  It's used by both the initiator and target to send
33  * and receive iSCSI PDUs.
34  */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 #include <sys/param.h>
40 #include <sys/capsicum.h>
41 #include <sys/condvar.h>
42 #include <sys/conf.h>
43 #include <sys/file.h>
44 #include <sys/kernel.h>
45 #include <sys/kthread.h>
46 #include <sys/lock.h>
47 #include <sys/mbuf.h>
48 #include <sys/mutex.h>
49 #include <sys/module.h>
50 #include <sys/protosw.h>
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/sysctl.h>
54 #include <sys/systm.h>
55 #include <sys/sx.h>
56 #include <sys/uio.h>
57 #include <vm/uma.h>
58 #include <netinet/in.h>
59 #include <netinet/tcp.h>
60
61 #include <dev/iscsi/icl.h>
62 #include <dev/iscsi/iscsi_proto.h>
63
64 SYSCTL_NODE(_kern, OID_AUTO, icl, CTLFLAG_RD, 0, "iSCSI Common Layer");
65 static int debug = 1;
66 SYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RWTUN,
67     &debug, 0, "Enable debug messages");
68 static int coalesce = 1;
69 SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN,
70     &coalesce, 0, "Try to coalesce PDUs before sending");
71 static int partial_receive_len = 128 * 1024;
72 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
73     &partial_receive_len, 0, "Minimum read size for partially received "
74     "data segment");
75 static int sendspace = 1048576;
76 SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN,
77     &sendspace, 0, "Default send socket buffer size");
78 static int recvspace = 1048576;
79 SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN,
80     &recvspace, 0, "Default receive socket buffer size");
81
82 static uma_zone_t icl_conn_zone;
83 static uma_zone_t icl_pdu_zone;
84
85 static volatile u_int   icl_ncons;
86
87 #define ICL_DEBUG(X, ...)                                               \
88         do {                                                            \
89                 if (debug > 1)                                          \
90                         printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
91         } while (0)
92
93 #define ICL_WARN(X, ...)                                                \
94         do {                                                            \
95                 if (debug > 0) {                                        \
96                         printf("WARNING: %s: " X "\n",                  \
97                             __func__, ## __VA_ARGS__);                  \
98                 }                                                       \
99         } while (0)
100
101 #define ICL_CONN_LOCK(X)                mtx_lock(X->ic_lock)
102 #define ICL_CONN_UNLOCK(X)              mtx_unlock(X->ic_lock)
103 #define ICL_CONN_LOCK_ASSERT(X)         mtx_assert(X->ic_lock, MA_OWNED)
104 #define ICL_CONN_LOCK_ASSERT_NOT(X)     mtx_assert(X->ic_lock, MA_NOTOWNED)
105
106 STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
107
108 static void
109 icl_conn_fail(struct icl_conn *ic)
110 {
111         if (ic->ic_socket == NULL)
112                 return;
113
114         /*
115          * XXX
116          */
117         ic->ic_socket->so_error = EDOOFUS;
118         (ic->ic_error)(ic);
119 }
120
121 static struct mbuf *
122 icl_conn_receive(struct icl_conn *ic, size_t len)
123 {
124         struct uio uio;
125         struct socket *so;
126         struct mbuf *m;
127         int error, flags;
128
129         so = ic->ic_socket;
130
131         memset(&uio, 0, sizeof(uio));
132         uio.uio_resid = len;
133
134         flags = MSG_DONTWAIT;
135         error = soreceive(so, NULL, &uio, &m, NULL, &flags);
136         if (error != 0) {
137                 ICL_DEBUG("soreceive error %d", error);
138                 return (NULL);
139         }
140         if (uio.uio_resid != 0) {
141                 m_freem(m);
142                 ICL_DEBUG("short read");
143                 return (NULL);
144         }
145
146         return (m);
147 }
148
149 static struct icl_pdu *
150 icl_pdu_new(struct icl_conn *ic, int flags)
151 {
152         struct icl_pdu *ip;
153
154 #ifdef DIAGNOSTIC
155         refcount_acquire(&ic->ic_outstanding_pdus);
156 #endif
157         ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
158         if (ip == NULL) {
159                 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
160 #ifdef DIAGNOSTIC
161                 refcount_release(&ic->ic_outstanding_pdus);
162 #endif
163                 return (NULL);
164         }
165
166         ip->ip_conn = ic;
167
168         return (ip);
169 }
170
171 void
172 icl_pdu_free(struct icl_pdu *ip)
173 {
174         struct icl_conn *ic;
175
176         ic = ip->ip_conn;
177
178         m_freem(ip->ip_bhs_mbuf);
179         m_freem(ip->ip_ahs_mbuf);
180         m_freem(ip->ip_data_mbuf);
181         uma_zfree(icl_pdu_zone, ip);
182 #ifdef DIAGNOSTIC
183         refcount_release(&ic->ic_outstanding_pdus);
184 #endif
185 }
186
187 /*
188  * Allocate icl_pdu with empty BHS to fill up by the caller.
189  */
190 struct icl_pdu *
191 icl_pdu_new_bhs(struct icl_conn *ic, int flags)
192 {
193         struct icl_pdu *ip;
194
195         ip = icl_pdu_new(ic, flags);
196         if (ip == NULL)
197                 return (NULL);
198
199         ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs),
200             flags, MT_DATA, M_PKTHDR);
201         if (ip->ip_bhs_mbuf == NULL) {
202                 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
203                 icl_pdu_free(ip);
204                 return (NULL);
205         }
206         ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
207         memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
208         ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
209
210         return (ip);
211 }
212
213 static int
214 icl_pdu_ahs_length(const struct icl_pdu *request)
215 {
216
217         return (request->ip_bhs->bhs_total_ahs_len * 4);
218 }
219
220 size_t
221 icl_pdu_data_segment_length(const struct icl_pdu *request)
222 {
223         uint32_t len = 0;
224
225         len += request->ip_bhs->bhs_data_segment_len[0];
226         len <<= 8;
227         len += request->ip_bhs->bhs_data_segment_len[1];
228         len <<= 8;
229         len += request->ip_bhs->bhs_data_segment_len[2];
230
231         return (len);
232 }
233
234 static void
235 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
236 {
237
238         response->ip_bhs->bhs_data_segment_len[2] = len;
239         response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
240         response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
241 }
242
243 static size_t
244 icl_pdu_padding(const struct icl_pdu *ip)
245 {
246
247         if ((ip->ip_data_len % 4) != 0)
248                 return (4 - (ip->ip_data_len % 4));
249
250         return (0);
251 }
252
253 static size_t
254 icl_pdu_size(const struct icl_pdu *response)
255 {
256         size_t len;
257
258         KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
259
260         len = sizeof(struct iscsi_bhs) + response->ip_data_len +
261             icl_pdu_padding(response);
262         if (response->ip_conn->ic_header_crc32c)
263                 len += ISCSI_HEADER_DIGEST_SIZE;
264         if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
265                 len += ISCSI_DATA_DIGEST_SIZE;
266
267         return (len);
268 }
269
270 static int
271 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep)
272 {
273         struct mbuf *m;
274
275         m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs));
276         if (m == NULL) {
277                 ICL_DEBUG("failed to receive BHS");
278                 return (-1);
279         }
280
281         request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs));
282         if (request->ip_bhs_mbuf == NULL) {
283                 ICL_WARN("m_pullup failed");
284                 return (-1);
285         }
286         request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *);
287
288         /*
289          * XXX: For architectures with strict alignment requirements
290          *      we may need to allocate ip_bhs and copy the data into it.
291          *      For some reason, though, not doing this doesn't seem
292          *      to cause problems; tested on sparc64.
293          */
294
295         *availablep -= sizeof(struct iscsi_bhs);
296         return (0);
297 }
298
299 static int
300 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep)
301 {
302
303         request->ip_ahs_len = icl_pdu_ahs_length(request);
304         if (request->ip_ahs_len == 0)
305                 return (0);
306
307         request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn,
308             request->ip_ahs_len);
309         if (request->ip_ahs_mbuf == NULL) {
310                 ICL_DEBUG("failed to receive AHS");
311                 return (-1);
312         }
313
314         *availablep -= request->ip_ahs_len;
315         return (0);
316 }
317
318 static uint32_t
319 icl_mbuf_to_crc32c(const struct mbuf *m0)
320 {
321         uint32_t digest = 0xffffffff;
322         const struct mbuf *m;
323
324         for (m = m0; m != NULL; m = m->m_next)
325                 digest = calculate_crc32c(digest,
326                     mtod(m, const void *), m->m_len);
327
328         digest = digest ^ 0xffffffff;
329
330         return (digest);
331 }
332
333 static int
334 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep)
335 {
336         struct mbuf *m;
337         uint32_t received_digest, valid_digest;
338
339         if (request->ip_conn->ic_header_crc32c == false)
340                 return (0);
341
342         m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE);
343         if (m == NULL) {
344                 ICL_DEBUG("failed to receive header digest");
345                 return (-1);
346         }
347
348         CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
349         m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest);
350         m_freem(m);
351
352         *availablep -= ISCSI_HEADER_DIGEST_SIZE;
353
354         /*
355          * XXX: Handle AHS.
356          */
357         valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
358         if (received_digest != valid_digest) {
359                 ICL_WARN("header digest check failed; got 0x%x, "
360                     "should be 0x%x", received_digest, valid_digest);
361                 return (-1);
362         }
363
364         return (0);
365 }
366
367 /*
368  * Return the number of bytes that should be waiting in the receive socket
369  * before icl_pdu_receive_data_segment() gets called.
370  */
371 static size_t
372 icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
373 {
374         size_t len;
375
376         len = icl_pdu_data_segment_length(request);
377         if (len == 0)
378                 return (0);
379
380         /*
381          * Account for the parts of data segment already read from
382          * the socket buffer.
383          */
384         KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
385         len -= request->ip_data_len;
386
387         /*
388          * Don't always wait for the full data segment to be delivered
389          * to the socket; this might badly affect performance due to
390          * TCP window scaling.
391          */
392         if (len > partial_receive_len) {
393 #if 0
394                 ICL_DEBUG("need %zd bytes of data, limiting to %zd",
395                     len, partial_receive_len));
396 #endif
397                 len = partial_receive_len;
398
399                 return (len);
400         }
401
402         /*
403          * Account for padding.  Note that due to the way code is written,
404          * the icl_pdu_receive_data_segment() must always receive padding
405          * along with the last part of data segment, because it would be
406          * impossible to tell whether we've already received the full data
407          * segment including padding, or without it.
408          */
409         if ((len % 4) != 0)
410                 len += 4 - (len % 4);
411
412 #if 0
413         ICL_DEBUG("need %zd bytes of data", len));
414 #endif
415
416         return (len);
417 }
418
419 static int
420 icl_pdu_receive_data_segment(struct icl_pdu *request,
421     size_t *availablep, bool *more_neededp)
422 {
423         struct icl_conn *ic;
424         size_t len, padding = 0;
425         struct mbuf *m;
426
427         ic = request->ip_conn;
428
429         *more_neededp = false;
430         ic->ic_receive_len = 0;
431
432         len = icl_pdu_data_segment_length(request);
433         if (len == 0)
434                 return (0);
435
436         if ((len % 4) != 0)
437                 padding = 4 - (len % 4);
438
439         /*
440          * Account for already received parts of data segment.
441          */
442         KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
443         len -= request->ip_data_len;
444
445         if (len + padding > *availablep) {
446                 /*
447                  * Not enough data in the socket buffer.  Receive as much
448                  * as we can.  Don't receive padding, since, obviously, it's
449                  * not the end of data segment yet.
450                  */
451 #if 0
452                 ICL_DEBUG("limited from %zd to %zd",
453                     len + padding, *availablep - padding));
454 #endif
455                 len = *availablep - padding;
456                 *more_neededp = true;
457                 padding = 0;
458         }
459
460         /*
461          * Must not try to receive padding without at least one byte
462          * of actual data segment.
463          */
464         if (len > 0) {
465                 m = icl_conn_receive(request->ip_conn, len + padding);
466                 if (m == NULL) {
467                         ICL_DEBUG("failed to receive data segment");
468                         return (-1);
469                 }
470
471                 if (request->ip_data_mbuf == NULL)
472                         request->ip_data_mbuf = m;
473                 else
474                         m_cat(request->ip_data_mbuf, m);
475
476                 request->ip_data_len += len;
477                 *availablep -= len + padding;
478         } else
479                 ICL_DEBUG("len 0");
480
481         if (*more_neededp)
482                 ic->ic_receive_len =
483                     icl_pdu_data_segment_receive_len(request);
484
485         return (0);
486 }
487
488 static int
489 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep)
490 {
491         struct mbuf *m;
492         uint32_t received_digest, valid_digest;
493
494         if (request->ip_conn->ic_data_crc32c == false)
495                 return (0);
496
497         if (request->ip_data_len == 0)
498                 return (0);
499
500         m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE);
501         if (m == NULL) {
502                 ICL_DEBUG("failed to receive data digest");
503                 return (-1);
504         }
505
506         CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
507         m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest);
508         m_freem(m);
509
510         *availablep -= ISCSI_DATA_DIGEST_SIZE;
511
512         /*
513          * Note that ip_data_mbuf also contains padding; since digest
514          * calculation is supposed to include that, we iterate over
515          * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
516          */
517         valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
518         if (received_digest != valid_digest) {
519                 ICL_WARN("data digest check failed; got 0x%x, "
520                     "should be 0x%x", received_digest, valid_digest);
521                 return (-1);
522         }
523
524         return (0);
525 }
526
527 /*
528  * Somewhat contrary to the name, this attempts to receive only one
529  * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
530  */
531 static struct icl_pdu *
532 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep)
533 {
534         struct icl_pdu *request;
535         struct socket *so;
536         size_t len;
537         int error;
538         bool more_needed;
539
540         so = ic->ic_socket;
541
542         if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
543                 KASSERT(ic->ic_receive_pdu == NULL,
544                     ("ic->ic_receive_pdu != NULL"));
545                 request = icl_pdu_new(ic, M_NOWAIT);
546                 if (request == NULL) {
547                         ICL_DEBUG("failed to allocate PDU; "
548                             "dropping connection");
549                         icl_conn_fail(ic);
550                         return (NULL);
551                 }
552                 ic->ic_receive_pdu = request;
553         } else {
554                 KASSERT(ic->ic_receive_pdu != NULL,
555                     ("ic->ic_receive_pdu == NULL"));
556                 request = ic->ic_receive_pdu;
557         }
558
559         if (*availablep < ic->ic_receive_len) {
560 #if 0
561                 ICL_DEBUG("not enough data; need %zd, "
562                     "have %zd", ic->ic_receive_len, *availablep);
563 #endif
564                 return (NULL);
565         }
566
567         switch (ic->ic_receive_state) {
568         case ICL_CONN_STATE_BHS:
569                 //ICL_DEBUG("receiving BHS");
570                 error = icl_pdu_receive_bhs(request, availablep);
571                 if (error != 0) {
572                         ICL_DEBUG("failed to receive BHS; "
573                             "dropping connection");
574                         break;
575                 }
576
577                 /*
578                  * We don't enforce any limit for AHS length;
579                  * its length is stored in 8 bit field.
580                  */
581
582                 len = icl_pdu_data_segment_length(request);
583                 if (len > ic->ic_max_data_segment_length) {
584                         ICL_WARN("received data segment "
585                             "length %zd is larger than negotiated "
586                             "MaxDataSegmentLength %zd; "
587                             "dropping connection",
588                             len, ic->ic_max_data_segment_length);
589                         error = EINVAL;
590                         break;
591                 }
592
593                 ic->ic_receive_state = ICL_CONN_STATE_AHS;
594                 ic->ic_receive_len = icl_pdu_ahs_length(request);
595                 break;
596
597         case ICL_CONN_STATE_AHS:
598                 //ICL_DEBUG("receiving AHS");
599                 error = icl_pdu_receive_ahs(request, availablep);
600                 if (error != 0) {
601                         ICL_DEBUG("failed to receive AHS; "
602                             "dropping connection");
603                         break;
604                 }
605                 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
606                 if (ic->ic_header_crc32c == false)
607                         ic->ic_receive_len = 0;
608                 else
609                         ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
610                 break;
611
612         case ICL_CONN_STATE_HEADER_DIGEST:
613                 //ICL_DEBUG("receiving header digest");
614                 error = icl_pdu_check_header_digest(request, availablep);
615                 if (error != 0) {
616                         ICL_DEBUG("header digest failed; "
617                             "dropping connection");
618                         break;
619                 }
620
621                 ic->ic_receive_state = ICL_CONN_STATE_DATA;
622                 ic->ic_receive_len =
623                     icl_pdu_data_segment_receive_len(request);
624                 break;
625
626         case ICL_CONN_STATE_DATA:
627                 //ICL_DEBUG("receiving data segment");
628                 error = icl_pdu_receive_data_segment(request, availablep,
629                     &more_needed);
630                 if (error != 0) {
631                         ICL_DEBUG("failed to receive data segment;"
632                             "dropping connection");
633                         break;
634                 }
635
636                 if (more_needed)
637                         break;
638
639                 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
640                 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
641                         ic->ic_receive_len = 0;
642                 else
643                         ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
644                 break;
645
646         case ICL_CONN_STATE_DATA_DIGEST:
647                 //ICL_DEBUG("receiving data digest");
648                 error = icl_pdu_check_data_digest(request, availablep);
649                 if (error != 0) {
650                         ICL_DEBUG("data digest failed; "
651                             "dropping connection");
652                         break;
653                 }
654
655                 /*
656                  * We've received complete PDU; reset the receive state machine
657                  * and return the PDU.
658                  */
659                 ic->ic_receive_state = ICL_CONN_STATE_BHS;
660                 ic->ic_receive_len = sizeof(struct iscsi_bhs);
661                 ic->ic_receive_pdu = NULL;
662                 return (request);
663
664         default:
665                 panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
666         }
667
668         if (error != 0) {
669                 /*
670                  * Don't free the PDU; it's pointed to by ic->ic_receive_pdu
671                  * and will get freed in icl_conn_close().
672                  */
673                 icl_conn_fail(ic);
674         }
675
676         return (NULL);
677 }
678
679 static void
680 icl_conn_receive_pdus(struct icl_conn *ic, size_t available)
681 {
682         struct icl_pdu *response;
683         struct socket *so;
684
685         so = ic->ic_socket;
686
687         /*
688          * This can never happen; we're careful to only mess with ic->ic_socket
689          * pointer when the send/receive threads are not running.
690          */
691         KASSERT(so != NULL, ("NULL socket"));
692
693         for (;;) {
694                 if (ic->ic_disconnecting)
695                         return;
696
697                 if (so->so_error != 0) {
698                         ICL_DEBUG("connection error %d; "
699                             "dropping connection", so->so_error);
700                         icl_conn_fail(ic);
701                         return;
702                 }
703
704                 /*
705                  * Loop until we have a complete PDU or there is not enough
706                  * data in the socket buffer.
707                  */
708                 if (available < ic->ic_receive_len) {
709 #if 0
710                         ICL_DEBUG("not enough data; have %zd, "
711                             "need %zd", available,
712                             ic->ic_receive_len);
713 #endif
714                         return;
715                 }
716
717                 response = icl_conn_receive_pdu(ic, &available);
718                 if (response == NULL)
719                         continue;
720
721                 if (response->ip_ahs_len > 0) {
722                         ICL_WARN("received PDU with unsupported "
723                             "AHS; opcode 0x%x; dropping connection",
724                             response->ip_bhs->bhs_opcode);
725                         icl_pdu_free(response);
726                         icl_conn_fail(ic);
727                         return;
728                 }
729
730                 (ic->ic_receive)(response);
731         }
732 }
733
734 static void
735 icl_receive_thread(void *arg)
736 {
737         struct icl_conn *ic;
738         size_t available;
739         struct socket *so;
740
741         ic = arg;
742         so = ic->ic_socket;
743
744         ICL_CONN_LOCK(ic);
745         ic->ic_receive_running = true;
746         ICL_CONN_UNLOCK(ic);
747
748         for (;;) {
749                 if (ic->ic_disconnecting) {
750                         //ICL_DEBUG("terminating");
751                         break;
752                 }
753
754                 /*
755                  * Set the low watermark, to be checked by
756                  * soreadable() in icl_soupcall_receive()
757                  * to avoid unneccessary wakeups until there
758                  * is enough data received to read the PDU.
759                  */
760                 SOCKBUF_LOCK(&so->so_rcv);
761                 available = so->so_rcv.sb_cc;
762                 if (available < ic->ic_receive_len) {
763                         so->so_rcv.sb_lowat = ic->ic_receive_len;
764                         cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
765                 } else
766                         so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
767                 SOCKBUF_UNLOCK(&so->so_rcv);
768
769                 icl_conn_receive_pdus(ic, available);
770         }
771
772         ICL_CONN_LOCK(ic);
773         ic->ic_receive_running = false;
774         ICL_CONN_UNLOCK(ic);
775         kthread_exit();
776 }
777
778 static int
779 icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
780 {
781         struct icl_conn *ic;
782
783         if (!soreadable(so))
784                 return (SU_OK);
785
786         ic = arg;
787         cv_signal(&ic->ic_receive_cv);
788         return (SU_OK);
789 }
790
791 static int
792 icl_pdu_finalize(struct icl_pdu *request)
793 {
794         size_t padding, pdu_len;
795         uint32_t digest, zero = 0;
796         int ok;
797         struct icl_conn *ic;
798
799         ic = request->ip_conn;
800
801         icl_pdu_set_data_segment_length(request, request->ip_data_len);
802
803         pdu_len = icl_pdu_size(request);
804
805         if (ic->ic_header_crc32c) {
806                 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
807                 ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
808                     (void *)&digest);
809                 if (ok != 1) {
810                         ICL_WARN("failed to append header digest");
811                         return (1);
812                 }
813         }
814
815         if (request->ip_data_len != 0) {
816                 padding = icl_pdu_padding(request);
817                 if (padding > 0) {
818                         ok = m_append(request->ip_data_mbuf, padding,
819                             (void *)&zero);
820                         if (ok != 1) {
821                                 ICL_WARN("failed to append padding");
822                                 return (1);
823                         }
824                 }
825
826                 if (ic->ic_data_crc32c) {
827                         digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
828
829                         ok = m_append(request->ip_data_mbuf, sizeof(digest),
830                             (void *)&digest);
831                         if (ok != 1) {
832                                 ICL_WARN("failed to append data digest");
833                                 return (1);
834                         }
835                 }
836
837                 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
838                 request->ip_data_mbuf = NULL;
839         }
840
841         request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
842
843         return (0);
844 }
845
846 static void
847 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue)
848 {
849         struct icl_pdu *request, *request2;
850         struct socket *so;
851         size_t available, size, size2;
852         int coalesced, error;
853
854         ICL_CONN_LOCK_ASSERT_NOT(ic);
855
856         so = ic->ic_socket;
857
858         SOCKBUF_LOCK(&so->so_snd);
859         /*
860          * Check how much space do we have for transmit.  We can't just
861          * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
862          * as it always frees the mbuf chain passed to it, even in case
863          * of error.
864          */
865         available = sbspace(&so->so_snd);
866
867         /*
868          * Notify the socket upcall that we don't need wakeups
869          * for the time being.
870          */
871         so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
872         SOCKBUF_UNLOCK(&so->so_snd);
873
874         while (!STAILQ_EMPTY(queue)) {
875                 if (ic->ic_disconnecting)
876                         return;
877                 request = STAILQ_FIRST(queue);
878                 size = icl_pdu_size(request);
879                 if (available < size) {
880
881                         /*
882                          * Set the low watermark, to be checked by
883                          * sowriteable() in icl_soupcall_send()
884                          * to avoid unneccessary wakeups until there
885                          * is enough space for the PDU to fit.
886                          */
887                         SOCKBUF_LOCK(&so->so_snd);
888                         available = sbspace(&so->so_snd);
889                         if (available < size) {
890 #if 1
891                                 ICL_DEBUG("no space to send; "
892                                     "have %zd, need %zd",
893                                     available, size);
894 #endif
895                                 so->so_snd.sb_lowat = size;
896                                 SOCKBUF_UNLOCK(&so->so_snd);
897                                 return;
898                         }
899                         SOCKBUF_UNLOCK(&so->so_snd);
900                 }
901                 STAILQ_REMOVE_HEAD(queue, ip_next);
902                 error = icl_pdu_finalize(request);
903                 if (error != 0) {
904                         ICL_DEBUG("failed to finalize PDU; "
905                             "dropping connection");
906                         icl_conn_fail(ic);
907                         icl_pdu_free(request);
908                         return;
909                 }
910                 if (coalesce) {
911                         coalesced = 1;
912                         for (;;) {
913                                 request2 = STAILQ_FIRST(queue);
914                                 if (request2 == NULL)
915                                         break;
916                                 size2 = icl_pdu_size(request2);
917                                 if (available < size + size2)
918                                         break;
919                                 STAILQ_REMOVE_HEAD(queue, ip_next);
920                                 error = icl_pdu_finalize(request2);
921                                 if (error != 0) {
922                                         ICL_DEBUG("failed to finalize PDU; "
923                                             "dropping connection");
924                                         icl_conn_fail(ic);
925                                         icl_pdu_free(request);
926                                         icl_pdu_free(request2);
927                                         return;
928                                 }
929                                 m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf);
930                                 request2->ip_bhs_mbuf = NULL;
931                                 request->ip_bhs_mbuf->m_pkthdr.len += size2;
932                                 size += size2;
933                                 STAILQ_REMOVE_AFTER(queue, request, ip_next);
934                                 icl_pdu_free(request2);
935                                 coalesced++;
936                         }
937 #if 0
938                         if (coalesced > 1) {
939                                 ICL_DEBUG("coalesced %d PDUs into %zd bytes",
940                                     coalesced, size);
941                         }
942 #endif
943                 }
944                 available -= size;
945                 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
946                     NULL, MSG_DONTWAIT, curthread);
947                 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
948                 if (error != 0) {
949                         ICL_DEBUG("failed to send PDU, error %d; "
950                             "dropping connection", error);
951                         icl_conn_fail(ic);
952                         icl_pdu_free(request);
953                         return;
954                 }
955                 icl_pdu_free(request);
956         }
957 }
958
959 static void
960 icl_send_thread(void *arg)
961 {
962         struct icl_conn *ic;
963         struct icl_pdu_stailq queue;
964
965         ic = arg;
966
967         STAILQ_INIT(&queue);
968
969         ICL_CONN_LOCK(ic);
970         ic->ic_send_running = true;
971
972         for (;;) {
973                 if (ic->ic_disconnecting) {
974                         //ICL_DEBUG("terminating");
975                         break;
976                 }
977
978                 for (;;) {
979                         /*
980                          * If the local queue is empty, populate it from
981                          * the main one.  This way the icl_conn_send_pdus()
982                          * can go through all the queued PDUs without holding
983                          * any locks.
984                          */
985                         if (STAILQ_EMPTY(&queue))
986                                 STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu);
987
988                         ic->ic_check_send_space = false;
989                         ICL_CONN_UNLOCK(ic);
990                         icl_conn_send_pdus(ic, &queue);
991                         ICL_CONN_LOCK(ic);
992
993                         /*
994                          * The icl_soupcall_send() was called since the last
995                          * call to sbspace(); go around;
996                          */
997                         if (ic->ic_check_send_space)
998                                 continue;
999
1000                         /*
1001                          * Local queue is empty, but we still have PDUs
1002                          * in the main one; go around.
1003                          */
1004                         if (STAILQ_EMPTY(&queue) &&
1005                             !STAILQ_EMPTY(&ic->ic_to_send))
1006                                 continue;
1007
1008                         /*
1009                          * There might be some stuff in the local queue,
1010                          * which didn't get sent due to not having enough send
1011                          * space.  Wait for socket upcall.
1012                          */
1013                         break;
1014                 }
1015
1016                 cv_wait(&ic->ic_send_cv, ic->ic_lock);
1017         }
1018
1019         /*
1020          * We're exiting; move PDUs back to the main queue, so they can
1021          * get freed properly.  At this point ordering doesn't matter.
1022          */
1023         STAILQ_CONCAT(&ic->ic_to_send, &queue);
1024
1025         ic->ic_send_running = false;
1026         ICL_CONN_UNLOCK(ic);
1027         kthread_exit();
1028 }
1029
1030 static int
1031 icl_soupcall_send(struct socket *so, void *arg, int waitflag)
1032 {
1033         struct icl_conn *ic;
1034
1035         if (!sowriteable(so))
1036                 return (SU_OK);
1037
1038         ic = arg;
1039
1040         ICL_CONN_LOCK(ic);
1041         ic->ic_check_send_space = true;
1042         ICL_CONN_UNLOCK(ic);
1043
1044         cv_signal(&ic->ic_send_cv);
1045
1046         return (SU_OK);
1047 }
1048
1049 int
1050 icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len,
1051     int flags)
1052 {
1053         struct mbuf *mb, *newmb;
1054         size_t copylen, off = 0;
1055
1056         KASSERT(len > 0, ("len == 0"));
1057
1058         newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR);
1059         if (newmb == NULL) {
1060                 ICL_WARN("failed to allocate mbuf for %zd bytes", len);
1061                 return (ENOMEM);
1062         }
1063
1064         for (mb = newmb; mb != NULL; mb = mb->m_next) {
1065                 copylen = min(M_TRAILINGSPACE(mb), len - off);
1066                 memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
1067                 mb->m_len = copylen;
1068                 off += copylen;
1069         }
1070         KASSERT(off == len, ("%s: off != len", __func__));
1071
1072         if (request->ip_data_mbuf == NULL) {
1073                 request->ip_data_mbuf = newmb;
1074                 request->ip_data_len = len;
1075         } else {
1076                 m_cat(request->ip_data_mbuf, newmb);
1077                 request->ip_data_len += len;
1078         }
1079
1080         return (0);
1081 }
1082
1083 void
1084 icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len)
1085 {
1086
1087         m_copydata(ip->ip_data_mbuf, off, len, addr);
1088 }
1089
1090 void
1091 icl_pdu_queue(struct icl_pdu *ip)
1092 {
1093         struct icl_conn *ic;
1094
1095         ic = ip->ip_conn;
1096
1097         ICL_CONN_LOCK_ASSERT(ic);
1098
1099         if (ic->ic_disconnecting || ic->ic_socket == NULL) {
1100                 ICL_DEBUG("icl_pdu_queue on closed connection");
1101                 icl_pdu_free(ip);
1102                 return;
1103         }
1104
1105         if (!STAILQ_EMPTY(&ic->ic_to_send)) {
1106                 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1107                 /*
1108                  * If the queue is not empty, someone else had already
1109                  * signaled the send thread; no need to do that again,
1110                  * just return.
1111                  */
1112                 return;
1113         }
1114
1115         STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1116         cv_signal(&ic->ic_send_cv);
1117 }
1118
1119 struct icl_conn *
1120 icl_conn_new(const char *name, struct mtx *lock)
1121 {
1122         struct icl_conn *ic;
1123
1124         refcount_acquire(&icl_ncons);
1125
1126         ic = uma_zalloc(icl_conn_zone, M_WAITOK | M_ZERO);
1127
1128         STAILQ_INIT(&ic->ic_to_send);
1129         ic->ic_lock = lock;
1130         cv_init(&ic->ic_send_cv, "icl_tx");
1131         cv_init(&ic->ic_receive_cv, "icl_rx");
1132 #ifdef DIAGNOSTIC
1133         refcount_init(&ic->ic_outstanding_pdus, 0);
1134 #endif
1135         ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH;
1136         ic->ic_name = name;
1137
1138         return (ic);
1139 }
1140
1141 void
1142 icl_conn_free(struct icl_conn *ic)
1143 {
1144
1145         cv_destroy(&ic->ic_send_cv);
1146         cv_destroy(&ic->ic_receive_cv);
1147         uma_zfree(icl_conn_zone, ic);
1148         refcount_release(&icl_ncons);
1149 }
1150
1151 static int
1152 icl_conn_start(struct icl_conn *ic)
1153 {
1154         size_t minspace;
1155         struct sockopt opt;
1156         int error, one = 1;
1157
1158         ICL_CONN_LOCK(ic);
1159
1160         /*
1161          * XXX: Ugly hack.
1162          */
1163         if (ic->ic_socket == NULL) {
1164                 ICL_CONN_UNLOCK(ic);
1165                 return (EINVAL);
1166         }
1167
1168         ic->ic_receive_state = ICL_CONN_STATE_BHS;
1169         ic->ic_receive_len = sizeof(struct iscsi_bhs);
1170         ic->ic_disconnecting = false;
1171
1172         ICL_CONN_UNLOCK(ic);
1173
1174         /*
1175          * For sendspace, this is required because the current code cannot
1176          * send a PDU in pieces; thus, the minimum buffer size is equal
1177          * to the maximum PDU size.  "+4" is to account for possible padding.
1178          *
1179          * What we should actually do here is to use autoscaling, but set
1180          * some minimal buffer size to "minspace".  I don't know a way to do
1181          * that, though.
1182          */
1183         minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
1184             ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1185         if (sendspace < minspace) {
1186                 ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1187                     minspace);
1188                 sendspace = minspace;
1189         }
1190         if (recvspace < minspace) {
1191                 ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1192                     minspace);
1193                 recvspace = minspace;
1194         }
1195
1196         error = soreserve(ic->ic_socket, sendspace, recvspace);
1197         if (error != 0) {
1198                 ICL_WARN("soreserve failed with error %d", error);
1199                 icl_conn_close(ic);
1200                 return (error);
1201         }
1202
1203         /*
1204          * Disable Nagle.
1205          */
1206         bzero(&opt, sizeof(opt));
1207         opt.sopt_dir = SOPT_SET;
1208         opt.sopt_level = IPPROTO_TCP;
1209         opt.sopt_name = TCP_NODELAY;
1210         opt.sopt_val = &one;
1211         opt.sopt_valsize = sizeof(one);
1212         error = sosetopt(ic->ic_socket, &opt);
1213         if (error != 0) {
1214                 ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1215                 icl_conn_close(ic);
1216                 return (error);
1217         }
1218
1219         /*
1220          * Start threads.
1221          */
1222         error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1223             ic->ic_name);
1224         if (error != 0) {
1225                 ICL_WARN("kthread_add(9) failed with error %d", error);
1226                 icl_conn_close(ic);
1227                 return (error);
1228         }
1229
1230         error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1231             ic->ic_name);
1232         if (error != 0) {
1233                 ICL_WARN("kthread_add(9) failed with error %d", error);
1234                 icl_conn_close(ic);
1235                 return (error);
1236         }
1237
1238         /*
1239          * Register socket upcall, to get notified about incoming PDUs
1240          * and free space to send outgoing ones.
1241          */
1242         SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1243         soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1244         SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1245         SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1246         soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1247         SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1248
1249         return (0);
1250 }
1251
1252 int
1253 icl_conn_handoff(struct icl_conn *ic, int fd)
1254 {
1255         struct file *fp;
1256         struct socket *so;
1257         cap_rights_t rights;
1258         int error;
1259
1260         ICL_CONN_LOCK_ASSERT_NOT(ic);
1261
1262         /*
1263          * Steal the socket from userland.
1264          */
1265         error = fget(curthread, fd,
1266             cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1267         if (error != 0)
1268                 return (error);
1269         if (fp->f_type != DTYPE_SOCKET) {
1270                 fdrop(fp, curthread);
1271                 return (EINVAL);
1272         }
1273         so = fp->f_data;
1274         if (so->so_type != SOCK_STREAM) {
1275                 fdrop(fp, curthread);
1276                 return (EINVAL);
1277         }
1278
1279         ICL_CONN_LOCK(ic);
1280
1281         if (ic->ic_socket != NULL) {
1282                 ICL_CONN_UNLOCK(ic);
1283                 fdrop(fp, curthread);
1284                 return (EBUSY);
1285         }
1286
1287         ic->ic_socket = fp->f_data;
1288         fp->f_ops = &badfileops;
1289         fp->f_data = NULL;
1290         fdrop(fp, curthread);
1291         ICL_CONN_UNLOCK(ic);
1292
1293         error = icl_conn_start(ic);
1294
1295         return (error);
1296 }
1297
1298 void
1299 icl_conn_shutdown(struct icl_conn *ic)
1300 {
1301         ICL_CONN_LOCK_ASSERT_NOT(ic);
1302
1303         ICL_CONN_LOCK(ic);
1304         if (ic->ic_socket == NULL) {
1305                 ICL_CONN_UNLOCK(ic);
1306                 return;
1307         }
1308         ICL_CONN_UNLOCK(ic);
1309
1310         soshutdown(ic->ic_socket, SHUT_RDWR);
1311 }
1312
1313 void
1314 icl_conn_close(struct icl_conn *ic)
1315 {
1316         struct icl_pdu *pdu;
1317
1318         ICL_CONN_LOCK_ASSERT_NOT(ic);
1319
1320         ICL_CONN_LOCK(ic);
1321         if (ic->ic_socket == NULL) {
1322                 ICL_CONN_UNLOCK(ic);
1323                 return;
1324         }
1325
1326         /*
1327          * Deregister socket upcalls.
1328          */
1329         ICL_CONN_UNLOCK(ic);
1330         SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1331         if (ic->ic_socket->so_snd.sb_upcall != NULL)
1332                 soupcall_clear(ic->ic_socket, SO_SND);
1333         SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1334         SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1335         if (ic->ic_socket->so_rcv.sb_upcall != NULL)
1336                 soupcall_clear(ic->ic_socket, SO_RCV);
1337         SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1338         ICL_CONN_LOCK(ic);
1339
1340         ic->ic_disconnecting = true;
1341
1342         /*
1343          * Wake up the threads, so they can properly terminate.
1344          */
1345         cv_signal(&ic->ic_receive_cv);
1346         cv_signal(&ic->ic_send_cv);
1347         while (ic->ic_receive_running || ic->ic_send_running) {
1348                 //ICL_DEBUG("waiting for send/receive threads to terminate");
1349                 ICL_CONN_UNLOCK(ic);
1350                 cv_signal(&ic->ic_receive_cv);
1351                 cv_signal(&ic->ic_send_cv);
1352                 pause("icl_close", 1 * hz);
1353                 ICL_CONN_LOCK(ic);
1354         }
1355         //ICL_DEBUG("send/receive threads terminated");
1356
1357         ICL_CONN_UNLOCK(ic);
1358         soclose(ic->ic_socket);
1359         ICL_CONN_LOCK(ic);
1360         ic->ic_socket = NULL;
1361
1362         if (ic->ic_receive_pdu != NULL) {
1363                 //ICL_DEBUG("freeing partially received PDU");
1364                 icl_pdu_free(ic->ic_receive_pdu);
1365                 ic->ic_receive_pdu = NULL;
1366         }
1367
1368         /*
1369          * Remove any outstanding PDUs from the send queue.
1370          */
1371         while (!STAILQ_EMPTY(&ic->ic_to_send)) {
1372                 pdu = STAILQ_FIRST(&ic->ic_to_send);
1373                 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
1374                 icl_pdu_free(pdu);
1375         }
1376
1377         KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
1378             ("destroying session with non-empty send queue"));
1379 #ifdef DIAGNOSTIC
1380         KASSERT(ic->ic_outstanding_pdus == 0,
1381             ("destroying session with %d outstanding PDUs",
1382              ic->ic_outstanding_pdus));
1383 #endif
1384         ICL_CONN_UNLOCK(ic);
1385 }
1386
1387 bool
1388 icl_conn_connected(struct icl_conn *ic)
1389 {
1390         ICL_CONN_LOCK_ASSERT_NOT(ic);
1391
1392         ICL_CONN_LOCK(ic);
1393         if (ic->ic_socket == NULL) {
1394                 ICL_CONN_UNLOCK(ic);
1395                 return (false);
1396         }
1397         if (ic->ic_socket->so_error != 0) {
1398                 ICL_CONN_UNLOCK(ic);
1399                 return (false);
1400         }
1401         ICL_CONN_UNLOCK(ic);
1402         return (true);
1403 }
1404
1405 #ifdef ICL_KERNEL_PROXY
1406 int
1407 icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so)
1408 {
1409         int error;
1410
1411         ICL_CONN_LOCK_ASSERT_NOT(ic);
1412
1413         if (so->so_type != SOCK_STREAM)
1414                 return (EINVAL);
1415
1416         ICL_CONN_LOCK(ic);
1417         if (ic->ic_socket != NULL) {
1418                 ICL_CONN_UNLOCK(ic);
1419                 return (EBUSY);
1420         }
1421         ic->ic_socket = so;
1422         ICL_CONN_UNLOCK(ic);
1423
1424         error = icl_conn_start(ic);
1425
1426         return (error);
1427 }
1428 #endif /* ICL_KERNEL_PROXY */
1429
1430 static int
1431 icl_unload(void)
1432 {
1433
1434         if (icl_ncons != 0)
1435                 return (EBUSY);
1436
1437         uma_zdestroy(icl_conn_zone);
1438         uma_zdestroy(icl_pdu_zone);
1439
1440         return (0);
1441 }
1442
1443 static void
1444 icl_load(void)
1445 {
1446
1447         icl_conn_zone = uma_zcreate("icl_conn",
1448             sizeof(struct icl_conn), NULL, NULL, NULL, NULL,
1449             UMA_ALIGN_PTR, 0);
1450         icl_pdu_zone = uma_zcreate("icl_pdu",
1451             sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1452             UMA_ALIGN_PTR, 0);
1453
1454         refcount_init(&icl_ncons, 0);
1455 }
1456
1457 static int
1458 icl_modevent(module_t mod, int what, void *arg)
1459 {
1460
1461         switch (what) {
1462         case MOD_LOAD:
1463                 icl_load();
1464                 return (0);
1465         case MOD_UNLOAD:
1466                 return (icl_unload());
1467         default:
1468                 return (EINVAL);
1469         }
1470 }
1471
1472 moduledata_t icl_data = {
1473         "icl",
1474         icl_modevent,
1475         0
1476 };
1477
1478 DECLARE_MODULE(icl, icl_data, SI_SUB_DRIVERS, SI_ORDER_FIRST);
1479 MODULE_VERSION(icl, 1);