]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/iscsi/icl.c
Merge llvm 3.5.0 release from ^/vendor/llvm/dist, resolve conflicts, and
[FreeBSD/FreeBSD.git] / sys / dev / iscsi / icl.c
1 /*-
2  * Copyright (c) 2012 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  */
30
31 /*
32  * iSCSI Common Layer.  It's used by both the initiator and target to send
33  * and receive iSCSI PDUs.
34  */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 #include <sys/param.h>
40 #include <sys/capsicum.h>
41 #include <sys/condvar.h>
42 #include <sys/conf.h>
43 #include <sys/file.h>
44 #include <sys/kernel.h>
45 #include <sys/kthread.h>
46 #include <sys/lock.h>
47 #include <sys/mbuf.h>
48 #include <sys/mutex.h>
49 #include <sys/module.h>
50 #include <sys/protosw.h>
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/sysctl.h>
54 #include <sys/systm.h>
55 #include <sys/sx.h>
56 #include <sys/uio.h>
57 #include <vm/uma.h>
58 #include <netinet/in.h>
59 #include <netinet/tcp.h>
60
61 #include <dev/iscsi/icl.h>
62 #include <dev/iscsi/iscsi_proto.h>
63
64 SYSCTL_NODE(_kern, OID_AUTO, icl, CTLFLAG_RD, 0, "iSCSI Common Layer");
65 static int debug = 1;
66 SYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RWTUN,
67     &debug, 0, "Enable debug messages");
68 static int coalesce = 1;
69 SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN,
70     &coalesce, 0, "Try to coalesce PDUs before sending");
71 static int partial_receive_len = 128 * 1024;
72 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
73     &partial_receive_len, 0, "Minimum read size for partially received "
74     "data segment");
75 static int sendspace = 1048576;
76 SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN,
77     &sendspace, 0, "Default send socket buffer size");
78 static int recvspace = 1048576;
79 SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN,
80     &recvspace, 0, "Default receive socket buffer size");
81
82 static uma_zone_t icl_conn_zone;
83 static uma_zone_t icl_pdu_zone;
84
85 static volatile u_int   icl_ncons;
86
87 #define ICL_DEBUG(X, ...)                                               \
88         do {                                                            \
89                 if (debug > 1)                                          \
90                         printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
91         } while (0)
92
93 #define ICL_WARN(X, ...)                                                \
94         do {                                                            \
95                 if (debug > 0) {                                        \
96                         printf("WARNING: %s: " X "\n",                  \
97                             __func__, ## __VA_ARGS__);                  \
98                 }                                                       \
99         } while (0)
100
101 #define ICL_CONN_LOCK(X)                mtx_lock(X->ic_lock)
102 #define ICL_CONN_UNLOCK(X)              mtx_unlock(X->ic_lock)
103 #define ICL_CONN_LOCK_ASSERT(X)         mtx_assert(X->ic_lock, MA_OWNED)
104 #define ICL_CONN_LOCK_ASSERT_NOT(X)     mtx_assert(X->ic_lock, MA_NOTOWNED)
105
106 STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
107
108 static void
109 icl_conn_fail(struct icl_conn *ic)
110 {
111         if (ic->ic_socket == NULL)
112                 return;
113
114         /*
115          * XXX
116          */
117         ic->ic_socket->so_error = EDOOFUS;
118         (ic->ic_error)(ic);
119 }
120
121 static struct mbuf *
122 icl_conn_receive(struct icl_conn *ic, size_t len)
123 {
124         struct uio uio;
125         struct socket *so;
126         struct mbuf *m;
127         int error, flags;
128
129         so = ic->ic_socket;
130
131         memset(&uio, 0, sizeof(uio));
132         uio.uio_resid = len;
133
134         flags = MSG_DONTWAIT;
135         error = soreceive(so, NULL, &uio, &m, NULL, &flags);
136         if (error != 0) {
137                 ICL_DEBUG("soreceive error %d", error);
138                 return (NULL);
139         }
140         if (uio.uio_resid != 0) {
141                 m_freem(m);
142                 ICL_DEBUG("short read");
143                 return (NULL);
144         }
145
146         return (m);
147 }
148
149 static struct icl_pdu *
150 icl_pdu_new_empty(struct icl_conn *ic, int flags)
151 {
152         struct icl_pdu *ip;
153
154 #ifdef DIAGNOSTIC
155         refcount_acquire(&ic->ic_outstanding_pdus);
156 #endif
157         ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
158         if (ip == NULL) {
159                 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
160 #ifdef DIAGNOSTIC
161                 refcount_release(&ic->ic_outstanding_pdus);
162 #endif
163                 return (NULL);
164         }
165
166         ip->ip_conn = ic;
167
168         return (ip);
169 }
170
171 void
172 icl_pdu_free(struct icl_pdu *ip)
173 {
174         struct icl_conn *ic;
175
176         ic = ip->ip_conn;
177
178         m_freem(ip->ip_bhs_mbuf);
179         m_freem(ip->ip_ahs_mbuf);
180         m_freem(ip->ip_data_mbuf);
181         uma_zfree(icl_pdu_zone, ip);
182 #ifdef DIAGNOSTIC
183         refcount_release(&ic->ic_outstanding_pdus);
184 #endif
185 }
186
187 /*
188  * Allocate icl_pdu with empty BHS to fill up by the caller.
189  */
190 struct icl_pdu *
191 icl_pdu_new(struct icl_conn *ic, int flags)
192 {
193         struct icl_pdu *ip;
194
195         ip = icl_pdu_new_empty(ic, flags);
196         if (ip == NULL)
197                 return (NULL);
198
199         ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs),
200             flags, MT_DATA, M_PKTHDR);
201         if (ip->ip_bhs_mbuf == NULL) {
202                 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
203                 icl_pdu_free(ip);
204                 return (NULL);
205         }
206         ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
207         memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
208         ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
209
210         return (ip);
211 }
212
213 static int
214 icl_pdu_ahs_length(const struct icl_pdu *request)
215 {
216
217         return (request->ip_bhs->bhs_total_ahs_len * 4);
218 }
219
220 size_t
221 icl_pdu_data_segment_length(const struct icl_pdu *request)
222 {
223         uint32_t len = 0;
224
225         len += request->ip_bhs->bhs_data_segment_len[0];
226         len <<= 8;
227         len += request->ip_bhs->bhs_data_segment_len[1];
228         len <<= 8;
229         len += request->ip_bhs->bhs_data_segment_len[2];
230
231         return (len);
232 }
233
234 static void
235 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
236 {
237
238         response->ip_bhs->bhs_data_segment_len[2] = len;
239         response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
240         response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
241 }
242
243 static size_t
244 icl_pdu_padding(const struct icl_pdu *ip)
245 {
246
247         if ((ip->ip_data_len % 4) != 0)
248                 return (4 - (ip->ip_data_len % 4));
249
250         return (0);
251 }
252
253 static size_t
254 icl_pdu_size(const struct icl_pdu *response)
255 {
256         size_t len;
257
258         KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
259
260         len = sizeof(struct iscsi_bhs) + response->ip_data_len +
261             icl_pdu_padding(response);
262         if (response->ip_conn->ic_header_crc32c)
263                 len += ISCSI_HEADER_DIGEST_SIZE;
264         if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
265                 len += ISCSI_DATA_DIGEST_SIZE;
266
267         return (len);
268 }
269
270 static int
271 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep)
272 {
273         struct mbuf *m;
274
275         m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs));
276         if (m == NULL) {
277                 ICL_DEBUG("failed to receive BHS");
278                 return (-1);
279         }
280
281         request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs));
282         if (request->ip_bhs_mbuf == NULL) {
283                 ICL_WARN("m_pullup failed");
284                 return (-1);
285         }
286         request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *);
287
288         /*
289          * XXX: For architectures with strict alignment requirements
290          *      we may need to allocate ip_bhs and copy the data into it.
291          *      For some reason, though, not doing this doesn't seem
292          *      to cause problems; tested on sparc64.
293          */
294
295         *availablep -= sizeof(struct iscsi_bhs);
296         return (0);
297 }
298
299 static int
300 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep)
301 {
302
303         request->ip_ahs_len = icl_pdu_ahs_length(request);
304         if (request->ip_ahs_len == 0)
305                 return (0);
306
307         request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn,
308             request->ip_ahs_len);
309         if (request->ip_ahs_mbuf == NULL) {
310                 ICL_DEBUG("failed to receive AHS");
311                 return (-1);
312         }
313
314         *availablep -= request->ip_ahs_len;
315         return (0);
316 }
317
318 static uint32_t
319 icl_mbuf_to_crc32c(const struct mbuf *m0)
320 {
321         uint32_t digest = 0xffffffff;
322         const struct mbuf *m;
323
324         for (m = m0; m != NULL; m = m->m_next)
325                 digest = calculate_crc32c(digest,
326                     mtod(m, const void *), m->m_len);
327
328         digest = digest ^ 0xffffffff;
329
330         return (digest);
331 }
332
333 static int
334 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep)
335 {
336         struct mbuf *m;
337         uint32_t received_digest, valid_digest;
338
339         if (request->ip_conn->ic_header_crc32c == false)
340                 return (0);
341
342         m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE);
343         if (m == NULL) {
344                 ICL_DEBUG("failed to receive header digest");
345                 return (-1);
346         }
347
348         CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
349         m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest);
350         m_freem(m);
351
352         *availablep -= ISCSI_HEADER_DIGEST_SIZE;
353
354         /*
355          * XXX: Handle AHS.
356          */
357         valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
358         if (received_digest != valid_digest) {
359                 ICL_WARN("header digest check failed; got 0x%x, "
360                     "should be 0x%x", received_digest, valid_digest);
361                 return (-1);
362         }
363
364         return (0);
365 }
366
367 /*
368  * Return the number of bytes that should be waiting in the receive socket
369  * before icl_pdu_receive_data_segment() gets called.
370  */
371 static size_t
372 icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
373 {
374         size_t len;
375
376         len = icl_pdu_data_segment_length(request);
377         if (len == 0)
378                 return (0);
379
380         /*
381          * Account for the parts of data segment already read from
382          * the socket buffer.
383          */
384         KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
385         len -= request->ip_data_len;
386
387         /*
388          * Don't always wait for the full data segment to be delivered
389          * to the socket; this might badly affect performance due to
390          * TCP window scaling.
391          */
392         if (len > partial_receive_len) {
393 #if 0
394                 ICL_DEBUG("need %zd bytes of data, limiting to %zd",
395                     len, partial_receive_len));
396 #endif
397                 len = partial_receive_len;
398
399                 return (len);
400         }
401
402         /*
403          * Account for padding.  Note that due to the way code is written,
404          * the icl_pdu_receive_data_segment() must always receive padding
405          * along with the last part of data segment, because it would be
406          * impossible to tell whether we've already received the full data
407          * segment including padding, or without it.
408          */
409         if ((len % 4) != 0)
410                 len += 4 - (len % 4);
411
412 #if 0
413         ICL_DEBUG("need %zd bytes of data", len));
414 #endif
415
416         return (len);
417 }
418
419 static int
420 icl_pdu_receive_data_segment(struct icl_pdu *request,
421     size_t *availablep, bool *more_neededp)
422 {
423         struct icl_conn *ic;
424         size_t len, padding = 0;
425         struct mbuf *m;
426
427         ic = request->ip_conn;
428
429         *more_neededp = false;
430         ic->ic_receive_len = 0;
431
432         len = icl_pdu_data_segment_length(request);
433         if (len == 0)
434                 return (0);
435
436         if ((len % 4) != 0)
437                 padding = 4 - (len % 4);
438
439         /*
440          * Account for already received parts of data segment.
441          */
442         KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
443         len -= request->ip_data_len;
444
445         if (len + padding > *availablep) {
446                 /*
447                  * Not enough data in the socket buffer.  Receive as much
448                  * as we can.  Don't receive padding, since, obviously, it's
449                  * not the end of data segment yet.
450                  */
451 #if 0
452                 ICL_DEBUG("limited from %zd to %zd",
453                     len + padding, *availablep - padding));
454 #endif
455                 len = *availablep - padding;
456                 *more_neededp = true;
457                 padding = 0;
458         }
459
460         /*
461          * Must not try to receive padding without at least one byte
462          * of actual data segment.
463          */
464         if (len > 0) {
465                 m = icl_conn_receive(request->ip_conn, len + padding);
466                 if (m == NULL) {
467                         ICL_DEBUG("failed to receive data segment");
468                         return (-1);
469                 }
470
471                 if (request->ip_data_mbuf == NULL)
472                         request->ip_data_mbuf = m;
473                 else
474                         m_cat(request->ip_data_mbuf, m);
475
476                 request->ip_data_len += len;
477                 *availablep -= len + padding;
478         } else
479                 ICL_DEBUG("len 0");
480
481         if (*more_neededp)
482                 ic->ic_receive_len =
483                     icl_pdu_data_segment_receive_len(request);
484
485         return (0);
486 }
487
488 static int
489 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep)
490 {
491         struct mbuf *m;
492         uint32_t received_digest, valid_digest;
493
494         if (request->ip_conn->ic_data_crc32c == false)
495                 return (0);
496
497         if (request->ip_data_len == 0)
498                 return (0);
499
500         m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE);
501         if (m == NULL) {
502                 ICL_DEBUG("failed to receive data digest");
503                 return (-1);
504         }
505
506         CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
507         m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest);
508         m_freem(m);
509
510         *availablep -= ISCSI_DATA_DIGEST_SIZE;
511
512         /*
513          * Note that ip_data_mbuf also contains padding; since digest
514          * calculation is supposed to include that, we iterate over
515          * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
516          */
517         valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
518         if (received_digest != valid_digest) {
519                 ICL_WARN("data digest check failed; got 0x%x, "
520                     "should be 0x%x", received_digest, valid_digest);
521                 return (-1);
522         }
523
524         return (0);
525 }
526
527 /*
528  * Somewhat contrary to the name, this attempts to receive only one
529  * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
530  */
531 static struct icl_pdu *
532 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep)
533 {
534         struct icl_pdu *request;
535         struct socket *so;
536         size_t len;
537         int error;
538         bool more_needed;
539
540         so = ic->ic_socket;
541
542         if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
543                 KASSERT(ic->ic_receive_pdu == NULL,
544                     ("ic->ic_receive_pdu != NULL"));
545                 request = icl_pdu_new_empty(ic, M_NOWAIT);
546                 if (request == NULL) {
547                         ICL_DEBUG("failed to allocate PDU; "
548                             "dropping connection");
549                         icl_conn_fail(ic);
550                         return (NULL);
551                 }
552                 ic->ic_receive_pdu = request;
553         } else {
554                 KASSERT(ic->ic_receive_pdu != NULL,
555                     ("ic->ic_receive_pdu == NULL"));
556                 request = ic->ic_receive_pdu;
557         }
558
559         if (*availablep < ic->ic_receive_len) {
560 #if 0
561                 ICL_DEBUG("not enough data; need %zd, "
562                     "have %zd", ic->ic_receive_len, *availablep);
563 #endif
564                 return (NULL);
565         }
566
567         switch (ic->ic_receive_state) {
568         case ICL_CONN_STATE_BHS:
569                 //ICL_DEBUG("receiving BHS");
570                 error = icl_pdu_receive_bhs(request, availablep);
571                 if (error != 0) {
572                         ICL_DEBUG("failed to receive BHS; "
573                             "dropping connection");
574                         break;
575                 }
576
577                 /*
578                  * We don't enforce any limit for AHS length;
579                  * its length is stored in 8 bit field.
580                  */
581
582                 len = icl_pdu_data_segment_length(request);
583                 if (len > ic->ic_max_data_segment_length) {
584                         ICL_WARN("received data segment "
585                             "length %zd is larger than negotiated "
586                             "MaxDataSegmentLength %zd; "
587                             "dropping connection",
588                             len, ic->ic_max_data_segment_length);
589                         error = EINVAL;
590                         break;
591                 }
592
593                 ic->ic_receive_state = ICL_CONN_STATE_AHS;
594                 ic->ic_receive_len = icl_pdu_ahs_length(request);
595                 break;
596
597         case ICL_CONN_STATE_AHS:
598                 //ICL_DEBUG("receiving AHS");
599                 error = icl_pdu_receive_ahs(request, availablep);
600                 if (error != 0) {
601                         ICL_DEBUG("failed to receive AHS; "
602                             "dropping connection");
603                         break;
604                 }
605                 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
606                 if (ic->ic_header_crc32c == false)
607                         ic->ic_receive_len = 0;
608                 else
609                         ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
610                 break;
611
612         case ICL_CONN_STATE_HEADER_DIGEST:
613                 //ICL_DEBUG("receiving header digest");
614                 error = icl_pdu_check_header_digest(request, availablep);
615                 if (error != 0) {
616                         ICL_DEBUG("header digest failed; "
617                             "dropping connection");
618                         break;
619                 }
620
621                 ic->ic_receive_state = ICL_CONN_STATE_DATA;
622                 ic->ic_receive_len =
623                     icl_pdu_data_segment_receive_len(request);
624                 break;
625
626         case ICL_CONN_STATE_DATA:
627                 //ICL_DEBUG("receiving data segment");
628                 error = icl_pdu_receive_data_segment(request, availablep,
629                     &more_needed);
630                 if (error != 0) {
631                         ICL_DEBUG("failed to receive data segment;"
632                             "dropping connection");
633                         break;
634                 }
635
636                 if (more_needed)
637                         break;
638
639                 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
640                 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
641                         ic->ic_receive_len = 0;
642                 else
643                         ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
644                 break;
645
646         case ICL_CONN_STATE_DATA_DIGEST:
647                 //ICL_DEBUG("receiving data digest");
648                 error = icl_pdu_check_data_digest(request, availablep);
649                 if (error != 0) {
650                         ICL_DEBUG("data digest failed; "
651                             "dropping connection");
652                         break;
653                 }
654
655                 /*
656                  * We've received complete PDU; reset the receive state machine
657                  * and return the PDU.
658                  */
659                 ic->ic_receive_state = ICL_CONN_STATE_BHS;
660                 ic->ic_receive_len = sizeof(struct iscsi_bhs);
661                 ic->ic_receive_pdu = NULL;
662                 return (request);
663
664         default:
665                 panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
666         }
667
668         if (error != 0) {
669                 /*
670                  * Don't free the PDU; it's pointed to by ic->ic_receive_pdu
671                  * and will get freed in icl_conn_close().
672                  */
673                 icl_conn_fail(ic);
674         }
675
676         return (NULL);
677 }
678
679 static void
680 icl_conn_receive_pdus(struct icl_conn *ic, size_t available)
681 {
682         struct icl_pdu *response;
683         struct socket *so;
684
685         so = ic->ic_socket;
686
687         /*
688          * This can never happen; we're careful to only mess with ic->ic_socket
689          * pointer when the send/receive threads are not running.
690          */
691         KASSERT(so != NULL, ("NULL socket"));
692
693         for (;;) {
694                 if (ic->ic_disconnecting)
695                         return;
696
697                 if (so->so_error != 0) {
698                         ICL_DEBUG("connection error %d; "
699                             "dropping connection", so->so_error);
700                         icl_conn_fail(ic);
701                         return;
702                 }
703
704                 /*
705                  * Loop until we have a complete PDU or there is not enough
706                  * data in the socket buffer.
707                  */
708                 if (available < ic->ic_receive_len) {
709 #if 0
710                         ICL_DEBUG("not enough data; have %zd, "
711                             "need %zd", available,
712                             ic->ic_receive_len);
713 #endif
714                         return;
715                 }
716
717                 response = icl_conn_receive_pdu(ic, &available);
718                 if (response == NULL)
719                         continue;
720
721                 if (response->ip_ahs_len > 0) {
722                         ICL_WARN("received PDU with unsupported "
723                             "AHS; opcode 0x%x; dropping connection",
724                             response->ip_bhs->bhs_opcode);
725                         icl_pdu_free(response);
726                         icl_conn_fail(ic);
727                         return;
728                 }
729
730                 (ic->ic_receive)(response);
731         }
732 }
733
734 static void
735 icl_receive_thread(void *arg)
736 {
737         struct icl_conn *ic;
738         size_t available;
739         struct socket *so;
740
741         ic = arg;
742         so = ic->ic_socket;
743
744         ICL_CONN_LOCK(ic);
745         ic->ic_receive_running = true;
746         ICL_CONN_UNLOCK(ic);
747
748         for (;;) {
749                 if (ic->ic_disconnecting) {
750                         //ICL_DEBUG("terminating");
751                         break;
752                 }
753
754                 /*
755                  * Set the low watermark, to be checked by
756                  * soreadable() in icl_soupcall_receive()
757                  * to avoid unneccessary wakeups until there
758                  * is enough data received to read the PDU.
759                  */
760                 SOCKBUF_LOCK(&so->so_rcv);
761                 available = sbavail(&so->so_rcv);
762                 if (available < ic->ic_receive_len) {
763                         so->so_rcv.sb_lowat = ic->ic_receive_len;
764                         cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
765                 } else
766                         so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
767                 SOCKBUF_UNLOCK(&so->so_rcv);
768
769                 icl_conn_receive_pdus(ic, available);
770         }
771
772         ICL_CONN_LOCK(ic);
773         ic->ic_receive_running = false;
774         cv_signal(&ic->ic_send_cv);
775         ICL_CONN_UNLOCK(ic);
776         kthread_exit();
777 }
778
779 static int
780 icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
781 {
782         struct icl_conn *ic;
783
784         if (!soreadable(so))
785                 return (SU_OK);
786
787         ic = arg;
788         cv_signal(&ic->ic_receive_cv);
789         return (SU_OK);
790 }
791
792 static int
793 icl_pdu_finalize(struct icl_pdu *request)
794 {
795         size_t padding, pdu_len;
796         uint32_t digest, zero = 0;
797         int ok;
798         struct icl_conn *ic;
799
800         ic = request->ip_conn;
801
802         icl_pdu_set_data_segment_length(request, request->ip_data_len);
803
804         pdu_len = icl_pdu_size(request);
805
806         if (ic->ic_header_crc32c) {
807                 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
808                 ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
809                     (void *)&digest);
810                 if (ok != 1) {
811                         ICL_WARN("failed to append header digest");
812                         return (1);
813                 }
814         }
815
816         if (request->ip_data_len != 0) {
817                 padding = icl_pdu_padding(request);
818                 if (padding > 0) {
819                         ok = m_append(request->ip_data_mbuf, padding,
820                             (void *)&zero);
821                         if (ok != 1) {
822                                 ICL_WARN("failed to append padding");
823                                 return (1);
824                         }
825                 }
826
827                 if (ic->ic_data_crc32c) {
828                         digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
829
830                         ok = m_append(request->ip_data_mbuf, sizeof(digest),
831                             (void *)&digest);
832                         if (ok != 1) {
833                                 ICL_WARN("failed to append data digest");
834                                 return (1);
835                         }
836                 }
837
838                 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
839                 request->ip_data_mbuf = NULL;
840         }
841
842         request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
843
844         return (0);
845 }
846
847 static void
848 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue)
849 {
850         struct icl_pdu *request, *request2;
851         struct socket *so;
852         size_t available, size, size2;
853         int coalesced, error;
854
855         ICL_CONN_LOCK_ASSERT_NOT(ic);
856
857         so = ic->ic_socket;
858
859         SOCKBUF_LOCK(&so->so_snd);
860         /*
861          * Check how much space do we have for transmit.  We can't just
862          * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
863          * as it always frees the mbuf chain passed to it, even in case
864          * of error.
865          */
866         available = sbspace(&so->so_snd);
867
868         /*
869          * Notify the socket upcall that we don't need wakeups
870          * for the time being.
871          */
872         so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
873         SOCKBUF_UNLOCK(&so->so_snd);
874
875         while (!STAILQ_EMPTY(queue)) {
876                 request = STAILQ_FIRST(queue);
877                 size = icl_pdu_size(request);
878                 if (available < size) {
879
880                         /*
881                          * Set the low watermark, to be checked by
882                          * sowriteable() in icl_soupcall_send()
883                          * to avoid unneccessary wakeups until there
884                          * is enough space for the PDU to fit.
885                          */
886                         SOCKBUF_LOCK(&so->so_snd);
887                         available = sbspace(&so->so_snd);
888                         if (available < size) {
889 #if 1
890                                 ICL_DEBUG("no space to send; "
891                                     "have %zd, need %zd",
892                                     available, size);
893 #endif
894                                 so->so_snd.sb_lowat = size;
895                                 SOCKBUF_UNLOCK(&so->so_snd);
896                                 return;
897                         }
898                         SOCKBUF_UNLOCK(&so->so_snd);
899                 }
900                 STAILQ_REMOVE_HEAD(queue, ip_next);
901                 error = icl_pdu_finalize(request);
902                 if (error != 0) {
903                         ICL_DEBUG("failed to finalize PDU; "
904                             "dropping connection");
905                         icl_conn_fail(ic);
906                         icl_pdu_free(request);
907                         return;
908                 }
909                 if (coalesce) {
910                         coalesced = 1;
911                         for (;;) {
912                                 request2 = STAILQ_FIRST(queue);
913                                 if (request2 == NULL)
914                                         break;
915                                 size2 = icl_pdu_size(request2);
916                                 if (available < size + size2)
917                                         break;
918                                 STAILQ_REMOVE_HEAD(queue, ip_next);
919                                 error = icl_pdu_finalize(request2);
920                                 if (error != 0) {
921                                         ICL_DEBUG("failed to finalize PDU; "
922                                             "dropping connection");
923                                         icl_conn_fail(ic);
924                                         icl_pdu_free(request);
925                                         icl_pdu_free(request2);
926                                         return;
927                                 }
928                                 m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf);
929                                 request2->ip_bhs_mbuf = NULL;
930                                 request->ip_bhs_mbuf->m_pkthdr.len += size2;
931                                 size += size2;
932                                 STAILQ_REMOVE_AFTER(queue, request, ip_next);
933                                 icl_pdu_free(request2);
934                                 coalesced++;
935                         }
936 #if 0
937                         if (coalesced > 1) {
938                                 ICL_DEBUG("coalesced %d PDUs into %zd bytes",
939                                     coalesced, size);
940                         }
941 #endif
942                 }
943                 available -= size;
944                 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
945                     NULL, MSG_DONTWAIT, curthread);
946                 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
947                 if (error != 0) {
948                         ICL_DEBUG("failed to send PDU, error %d; "
949                             "dropping connection", error);
950                         icl_conn_fail(ic);
951                         icl_pdu_free(request);
952                         return;
953                 }
954                 icl_pdu_free(request);
955         }
956 }
957
958 static void
959 icl_send_thread(void *arg)
960 {
961         struct icl_conn *ic;
962         struct icl_pdu_stailq queue;
963
964         ic = arg;
965
966         STAILQ_INIT(&queue);
967
968         ICL_CONN_LOCK(ic);
969         ic->ic_send_running = true;
970
971         for (;;) {
972                 for (;;) {
973                         /*
974                          * If the local queue is empty, populate it from
975                          * the main one.  This way the icl_conn_send_pdus()
976                          * can go through all the queued PDUs without holding
977                          * any locks.
978                          */
979                         if (STAILQ_EMPTY(&queue))
980                                 STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu);
981
982                         ic->ic_check_send_space = false;
983                         ICL_CONN_UNLOCK(ic);
984                         icl_conn_send_pdus(ic, &queue);
985                         ICL_CONN_LOCK(ic);
986
987                         /*
988                          * The icl_soupcall_send() was called since the last
989                          * call to sbspace(); go around;
990                          */
991                         if (ic->ic_check_send_space)
992                                 continue;
993
994                         /*
995                          * Local queue is empty, but we still have PDUs
996                          * in the main one; go around.
997                          */
998                         if (STAILQ_EMPTY(&queue) &&
999                             !STAILQ_EMPTY(&ic->ic_to_send))
1000                                 continue;
1001
1002                         /*
1003                          * There might be some stuff in the local queue,
1004                          * which didn't get sent due to not having enough send
1005                          * space.  Wait for socket upcall.
1006                          */
1007                         break;
1008                 }
1009
1010                 if (ic->ic_disconnecting) {
1011                         //ICL_DEBUG("terminating");
1012                         break;
1013                 }
1014
1015                 cv_wait(&ic->ic_send_cv, ic->ic_lock);
1016         }
1017
1018         /*
1019          * We're exiting; move PDUs back to the main queue, so they can
1020          * get freed properly.  At this point ordering doesn't matter.
1021          */
1022         STAILQ_CONCAT(&ic->ic_to_send, &queue);
1023
1024         ic->ic_send_running = false;
1025         cv_signal(&ic->ic_send_cv);
1026         ICL_CONN_UNLOCK(ic);
1027         kthread_exit();
1028 }
1029
1030 static int
1031 icl_soupcall_send(struct socket *so, void *arg, int waitflag)
1032 {
1033         struct icl_conn *ic;
1034
1035         if (!sowriteable(so))
1036                 return (SU_OK);
1037
1038         ic = arg;
1039
1040         ICL_CONN_LOCK(ic);
1041         ic->ic_check_send_space = true;
1042         ICL_CONN_UNLOCK(ic);
1043
1044         cv_signal(&ic->ic_send_cv);
1045
1046         return (SU_OK);
1047 }
1048
1049 int
1050 icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len,
1051     int flags)
1052 {
1053         struct mbuf *mb, *newmb;
1054         size_t copylen, off = 0;
1055
1056         KASSERT(len > 0, ("len == 0"));
1057
1058         newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR);
1059         if (newmb == NULL) {
1060                 ICL_WARN("failed to allocate mbuf for %zd bytes", len);
1061                 return (ENOMEM);
1062         }
1063
1064         for (mb = newmb; mb != NULL; mb = mb->m_next) {
1065                 copylen = min(M_TRAILINGSPACE(mb), len - off);
1066                 memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
1067                 mb->m_len = copylen;
1068                 off += copylen;
1069         }
1070         KASSERT(off == len, ("%s: off != len", __func__));
1071
1072         if (request->ip_data_mbuf == NULL) {
1073                 request->ip_data_mbuf = newmb;
1074                 request->ip_data_len = len;
1075         } else {
1076                 m_cat(request->ip_data_mbuf, newmb);
1077                 request->ip_data_len += len;
1078         }
1079
1080         return (0);
1081 }
1082
1083 void
1084 icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len)
1085 {
1086
1087         m_copydata(ip->ip_data_mbuf, off, len, addr);
1088 }
1089
1090 void
1091 icl_pdu_queue(struct icl_pdu *ip)
1092 {
1093         struct icl_conn *ic;
1094
1095         ic = ip->ip_conn;
1096
1097         ICL_CONN_LOCK_ASSERT(ic);
1098
1099         if (ic->ic_disconnecting || ic->ic_socket == NULL) {
1100                 ICL_DEBUG("icl_pdu_queue on closed connection");
1101                 icl_pdu_free(ip);
1102                 return;
1103         }
1104
1105         if (!STAILQ_EMPTY(&ic->ic_to_send)) {
1106                 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1107                 /*
1108                  * If the queue is not empty, someone else had already
1109                  * signaled the send thread; no need to do that again,
1110                  * just return.
1111                  */
1112                 return;
1113         }
1114
1115         STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1116         cv_signal(&ic->ic_send_cv);
1117 }
1118
1119 struct icl_conn *
1120 icl_conn_new(const char *name, struct mtx *lock)
1121 {
1122         struct icl_conn *ic;
1123
1124         refcount_acquire(&icl_ncons);
1125
1126         ic = uma_zalloc(icl_conn_zone, M_WAITOK | M_ZERO);
1127
1128         STAILQ_INIT(&ic->ic_to_send);
1129         ic->ic_lock = lock;
1130         cv_init(&ic->ic_send_cv, "icl_tx");
1131         cv_init(&ic->ic_receive_cv, "icl_rx");
1132 #ifdef DIAGNOSTIC
1133         refcount_init(&ic->ic_outstanding_pdus, 0);
1134 #endif
1135         ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH;
1136         ic->ic_name = name;
1137
1138         return (ic);
1139 }
1140
1141 void
1142 icl_conn_free(struct icl_conn *ic)
1143 {
1144
1145         cv_destroy(&ic->ic_send_cv);
1146         cv_destroy(&ic->ic_receive_cv);
1147         uma_zfree(icl_conn_zone, ic);
1148         refcount_release(&icl_ncons);
1149 }
1150
1151 static int
1152 icl_conn_start(struct icl_conn *ic)
1153 {
1154         size_t minspace;
1155         struct sockopt opt;
1156         int error, one = 1;
1157
1158         ICL_CONN_LOCK(ic);
1159
1160         /*
1161          * XXX: Ugly hack.
1162          */
1163         if (ic->ic_socket == NULL) {
1164                 ICL_CONN_UNLOCK(ic);
1165                 return (EINVAL);
1166         }
1167
1168         ic->ic_receive_state = ICL_CONN_STATE_BHS;
1169         ic->ic_receive_len = sizeof(struct iscsi_bhs);
1170         ic->ic_disconnecting = false;
1171
1172         ICL_CONN_UNLOCK(ic);
1173
1174         /*
1175          * For sendspace, this is required because the current code cannot
1176          * send a PDU in pieces; thus, the minimum buffer size is equal
1177          * to the maximum PDU size.  "+4" is to account for possible padding.
1178          *
1179          * What we should actually do here is to use autoscaling, but set
1180          * some minimal buffer size to "minspace".  I don't know a way to do
1181          * that, though.
1182          */
1183         minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
1184             ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1185         if (sendspace < minspace) {
1186                 ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1187                     minspace);
1188                 sendspace = minspace;
1189         }
1190         if (recvspace < minspace) {
1191                 ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1192                     minspace);
1193                 recvspace = minspace;
1194         }
1195
1196         error = soreserve(ic->ic_socket, sendspace, recvspace);
1197         if (error != 0) {
1198                 ICL_WARN("soreserve failed with error %d", error);
1199                 icl_conn_close(ic);
1200                 return (error);
1201         }
1202         ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE;
1203         ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE;
1204
1205         /*
1206          * Disable Nagle.
1207          */
1208         bzero(&opt, sizeof(opt));
1209         opt.sopt_dir = SOPT_SET;
1210         opt.sopt_level = IPPROTO_TCP;
1211         opt.sopt_name = TCP_NODELAY;
1212         opt.sopt_val = &one;
1213         opt.sopt_valsize = sizeof(one);
1214         error = sosetopt(ic->ic_socket, &opt);
1215         if (error != 0) {
1216                 ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1217                 icl_conn_close(ic);
1218                 return (error);
1219         }
1220
1221         /*
1222          * Start threads.
1223          */
1224         error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1225             ic->ic_name);
1226         if (error != 0) {
1227                 ICL_WARN("kthread_add(9) failed with error %d", error);
1228                 icl_conn_close(ic);
1229                 return (error);
1230         }
1231
1232         error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1233             ic->ic_name);
1234         if (error != 0) {
1235                 ICL_WARN("kthread_add(9) failed with error %d", error);
1236                 icl_conn_close(ic);
1237                 return (error);
1238         }
1239
1240         /*
1241          * Register socket upcall, to get notified about incoming PDUs
1242          * and free space to send outgoing ones.
1243          */
1244         SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1245         soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1246         SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1247         SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1248         soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1249         SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1250
1251         return (0);
1252 }
1253
1254 int
1255 icl_conn_handoff(struct icl_conn *ic, int fd)
1256 {
1257         struct file *fp;
1258         struct socket *so;
1259         cap_rights_t rights;
1260         int error;
1261
1262         ICL_CONN_LOCK_ASSERT_NOT(ic);
1263
1264         /*
1265          * Steal the socket from userland.
1266          */
1267         error = fget(curthread, fd,
1268             cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1269         if (error != 0)
1270                 return (error);
1271         if (fp->f_type != DTYPE_SOCKET) {
1272                 fdrop(fp, curthread);
1273                 return (EINVAL);
1274         }
1275         so = fp->f_data;
1276         if (so->so_type != SOCK_STREAM) {
1277                 fdrop(fp, curthread);
1278                 return (EINVAL);
1279         }
1280
1281         ICL_CONN_LOCK(ic);
1282
1283         if (ic->ic_socket != NULL) {
1284                 ICL_CONN_UNLOCK(ic);
1285                 fdrop(fp, curthread);
1286                 return (EBUSY);
1287         }
1288
1289         ic->ic_socket = fp->f_data;
1290         fp->f_ops = &badfileops;
1291         fp->f_data = NULL;
1292         fdrop(fp, curthread);
1293         ICL_CONN_UNLOCK(ic);
1294
1295         error = icl_conn_start(ic);
1296
1297         return (error);
1298 }
1299
1300 void
1301 icl_conn_close(struct icl_conn *ic)
1302 {
1303         struct icl_pdu *pdu;
1304
1305         ICL_CONN_LOCK_ASSERT_NOT(ic);
1306
1307         ICL_CONN_LOCK(ic);
1308         if (ic->ic_socket == NULL) {
1309                 ICL_CONN_UNLOCK(ic);
1310                 return;
1311         }
1312
1313         /*
1314          * Deregister socket upcalls.
1315          */
1316         ICL_CONN_UNLOCK(ic);
1317         SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1318         if (ic->ic_socket->so_snd.sb_upcall != NULL)
1319                 soupcall_clear(ic->ic_socket, SO_SND);
1320         SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1321         SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1322         if (ic->ic_socket->so_rcv.sb_upcall != NULL)
1323                 soupcall_clear(ic->ic_socket, SO_RCV);
1324         SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1325         ICL_CONN_LOCK(ic);
1326
1327         ic->ic_disconnecting = true;
1328
1329         /*
1330          * Wake up the threads, so they can properly terminate.
1331          */
1332         while (ic->ic_receive_running || ic->ic_send_running) {
1333                 //ICL_DEBUG("waiting for send/receive threads to terminate");
1334                 cv_signal(&ic->ic_receive_cv);
1335                 cv_signal(&ic->ic_send_cv);
1336                 cv_wait(&ic->ic_send_cv, ic->ic_lock);
1337         }
1338         //ICL_DEBUG("send/receive threads terminated");
1339
1340         ICL_CONN_UNLOCK(ic);
1341         soclose(ic->ic_socket);
1342         ICL_CONN_LOCK(ic);
1343         ic->ic_socket = NULL;
1344
1345         if (ic->ic_receive_pdu != NULL) {
1346                 //ICL_DEBUG("freeing partially received PDU");
1347                 icl_pdu_free(ic->ic_receive_pdu);
1348                 ic->ic_receive_pdu = NULL;
1349         }
1350
1351         /*
1352          * Remove any outstanding PDUs from the send queue.
1353          */
1354         while (!STAILQ_EMPTY(&ic->ic_to_send)) {
1355                 pdu = STAILQ_FIRST(&ic->ic_to_send);
1356                 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
1357                 icl_pdu_free(pdu);
1358         }
1359
1360         KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
1361             ("destroying session with non-empty send queue"));
1362 #ifdef DIAGNOSTIC
1363         KASSERT(ic->ic_outstanding_pdus == 0,
1364             ("destroying session with %d outstanding PDUs",
1365              ic->ic_outstanding_pdus));
1366 #endif
1367         ICL_CONN_UNLOCK(ic);
1368 }
1369
1370 bool
1371 icl_conn_connected(struct icl_conn *ic)
1372 {
1373         ICL_CONN_LOCK_ASSERT_NOT(ic);
1374
1375         ICL_CONN_LOCK(ic);
1376         if (ic->ic_socket == NULL) {
1377                 ICL_CONN_UNLOCK(ic);
1378                 return (false);
1379         }
1380         if (ic->ic_socket->so_error != 0) {
1381                 ICL_CONN_UNLOCK(ic);
1382                 return (false);
1383         }
1384         ICL_CONN_UNLOCK(ic);
1385         return (true);
1386 }
1387
1388 #ifdef ICL_KERNEL_PROXY
1389 int
1390 icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so)
1391 {
1392         int error;
1393
1394         ICL_CONN_LOCK_ASSERT_NOT(ic);
1395
1396         if (so->so_type != SOCK_STREAM)
1397                 return (EINVAL);
1398
1399         ICL_CONN_LOCK(ic);
1400         if (ic->ic_socket != NULL) {
1401                 ICL_CONN_UNLOCK(ic);
1402                 return (EBUSY);
1403         }
1404         ic->ic_socket = so;
1405         ICL_CONN_UNLOCK(ic);
1406
1407         error = icl_conn_start(ic);
1408
1409         return (error);
1410 }
1411 #endif /* ICL_KERNEL_PROXY */
1412
1413 static int
1414 icl_unload(void)
1415 {
1416
1417         if (icl_ncons != 0)
1418                 return (EBUSY);
1419
1420         uma_zdestroy(icl_conn_zone);
1421         uma_zdestroy(icl_pdu_zone);
1422
1423         return (0);
1424 }
1425
1426 static void
1427 icl_load(void)
1428 {
1429
1430         icl_conn_zone = uma_zcreate("icl_conn",
1431             sizeof(struct icl_conn), NULL, NULL, NULL, NULL,
1432             UMA_ALIGN_PTR, 0);
1433         icl_pdu_zone = uma_zcreate("icl_pdu",
1434             sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1435             UMA_ALIGN_PTR, 0);
1436
1437         refcount_init(&icl_ncons, 0);
1438 }
1439
1440 static int
1441 icl_modevent(module_t mod, int what, void *arg)
1442 {
1443
1444         switch (what) {
1445         case MOD_LOAD:
1446                 icl_load();
1447                 return (0);
1448         case MOD_UNLOAD:
1449                 return (icl_unload());
1450         default:
1451                 return (EINVAL);
1452         }
1453 }
1454
1455 moduledata_t icl_data = {
1456         "icl",
1457         icl_modevent,
1458         0
1459 };
1460
1461 DECLARE_MODULE(icl, icl_data, SI_SUB_DRIVERS, SI_ORDER_FIRST);
1462 MODULE_VERSION(icl, 1);