2 * Copyright (c) 2012 The FreeBSD Foundation
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * iSCSI Common Layer. It's used by both the initiator and target to send
34 * and receive iSCSI PDUs.
37 #include <sys/param.h>
38 #include <sys/capsicum.h>
39 #include <sys/condvar.h>
42 #include <sys/kernel.h>
43 #include <sys/kthread.h>
46 #include <sys/mutex.h>
47 #include <sys/module.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/sysctl.h>
51 #include <sys/systm.h>
55 #include <netinet/in.h>
56 #include <netinet/tcp.h>
59 #include "iscsi_proto.h"
61 SYSCTL_NODE(_kern, OID_AUTO, icl, CTLFLAG_RD, 0, "iSCSI Common Layer");
63 TUNABLE_INT("kern.icl.debug", &debug);
64 SYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RW,
65 &debug, 1, "Enable debug messages");
66 static int partial_receive_len = 1 * 1024; /* XXX: More? */
67 TUNABLE_INT("kern.icl.partial_receive_len", &partial_receive_len);
68 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RW,
69 &partial_receive_len, 1 * 1024, "Minimum read size for partially received "
72 static uma_zone_t icl_conn_zone;
73 static uma_zone_t icl_pdu_zone;
75 static volatile u_int icl_ncons;
77 #define ICL_DEBUG(X, ...) \
80 printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
83 #define ICL_WARN(X, ...) \
86 printf("WARNING: %s: " X "\n", \
87 __func__, ## __VA_ARGS__); \
91 #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock)
92 #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock)
93 #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED)
94 #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED)
97 icl_conn_fail(struct icl_conn *ic)
99 if (ic->ic_socket == NULL)
105 ic->ic_socket->so_error = EDOOFUS;
110 icl_conn_receive(struct icl_conn *ic, size_t len)
119 memset(&uio, 0, sizeof(uio));
122 flags = MSG_DONTWAIT;
123 error = soreceive(so, NULL, &uio, &m, NULL, &flags);
125 ICL_DEBUG("soreceive error %d", error);
128 if (uio.uio_resid != 0) {
130 ICL_DEBUG("short read");
137 static struct icl_pdu *
138 icl_pdu_new(struct icl_conn *ic, int flags)
143 refcount_acquire(&ic->ic_outstanding_pdus);
145 ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
147 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
149 refcount_release(&ic->ic_outstanding_pdus);
160 icl_pdu_free(struct icl_pdu *ip)
166 m_freem(ip->ip_bhs_mbuf);
167 m_freem(ip->ip_ahs_mbuf);
168 m_freem(ip->ip_data_mbuf);
169 uma_zfree(icl_pdu_zone, ip);
171 refcount_release(&ic->ic_outstanding_pdus);
176 * Allocate icl_pdu with empty BHS to fill up by the caller.
179 icl_pdu_new_bhs(struct icl_conn *ic, int flags)
183 ip = icl_pdu_new(ic, flags);
187 ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs),
188 flags, MT_DATA, M_PKTHDR);
189 if (ip->ip_bhs_mbuf == NULL) {
190 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
194 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
195 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
196 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
202 icl_pdu_ahs_length(const struct icl_pdu *request)
205 return (request->ip_bhs->bhs_total_ahs_len * 4);
209 icl_pdu_data_segment_length(const struct icl_pdu *request)
213 len += request->ip_bhs->bhs_data_segment_len[0];
215 len += request->ip_bhs->bhs_data_segment_len[1];
217 len += request->ip_bhs->bhs_data_segment_len[2];
223 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
226 response->ip_bhs->bhs_data_segment_len[2] = len;
227 response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
228 response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
232 icl_pdu_padding(const struct icl_pdu *ip)
235 if ((ip->ip_data_len % 4) != 0)
236 return (4 - (ip->ip_data_len % 4));
242 icl_pdu_size(const struct icl_pdu *response)
246 KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
248 len = sizeof(struct iscsi_bhs) + response->ip_data_len +
249 icl_pdu_padding(response);
250 if (response->ip_conn->ic_header_crc32c)
251 len += ISCSI_HEADER_DIGEST_SIZE;
252 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
253 len += ISCSI_DATA_DIGEST_SIZE;
259 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep)
263 m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs));
265 ICL_DEBUG("failed to receive BHS");
269 request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs));
270 if (request->ip_bhs_mbuf == NULL) {
271 ICL_WARN("m_pullup failed");
274 request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *);
277 * XXX: For architectures with strict alignment requirements
278 * we may need to allocate ip_bhs and copy the data into it.
279 * For some reason, though, not doing this doesn't seem
280 * to cause problems; tested on sparc64.
283 *availablep -= sizeof(struct iscsi_bhs);
288 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep)
291 request->ip_ahs_len = icl_pdu_ahs_length(request);
292 if (request->ip_ahs_len == 0)
295 request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn,
296 request->ip_ahs_len);
297 if (request->ip_ahs_mbuf == NULL) {
298 ICL_DEBUG("failed to receive AHS");
302 *availablep -= request->ip_ahs_len;
307 icl_mbuf_to_crc32c(const struct mbuf *m0)
309 uint32_t digest = 0xffffffff;
310 const struct mbuf *m;
312 for (m = m0; m != NULL; m = m->m_next)
313 digest = calculate_crc32c(digest,
314 mtod(m, const void *), m->m_len);
316 digest = digest ^ 0xffffffff;
322 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep)
325 uint32_t received_digest, valid_digest;
327 if (request->ip_conn->ic_header_crc32c == false)
330 m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE);
332 ICL_DEBUG("failed to receive header digest");
336 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
337 m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest);
340 *availablep -= ISCSI_HEADER_DIGEST_SIZE;
345 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
346 if (received_digest != valid_digest) {
347 ICL_WARN("header digest check failed; got 0x%x, "
348 "should be 0x%x", received_digest, valid_digest);
356 * Return the number of bytes that should be waiting in the receive socket
357 * before icl_pdu_receive_data_segment() gets called.
360 icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
364 len = icl_pdu_data_segment_length(request);
369 * Account for the parts of data segment already read from
372 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
373 len -= request->ip_data_len;
376 * Don't always wait for the full data segment to be delivered
377 * to the socket; this might badly affect performance due to
378 * TCP window scaling.
380 if (len > partial_receive_len) {
382 ICL_DEBUG("need %zd bytes of data, limiting to %zd",
383 len, partial_receive_len));
385 len = partial_receive_len;
391 * Account for padding. Note that due to the way code is written,
392 * the icl_pdu_receive_data_segment() must always receive padding
393 * along with the last part of data segment, because it would be
394 * impossible to tell whether we've already received the full data
395 * segment including padding, or without it.
398 len += 4 - (len % 4);
401 ICL_DEBUG("need %zd bytes of data", len));
408 icl_pdu_receive_data_segment(struct icl_pdu *request,
409 size_t *availablep, bool *more_neededp)
412 size_t len, padding = 0;
415 ic = request->ip_conn;
417 *more_neededp = false;
418 ic->ic_receive_len = 0;
420 len = icl_pdu_data_segment_length(request);
425 padding = 4 - (len % 4);
428 * Account for already received parts of data segment.
430 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
431 len -= request->ip_data_len;
433 if (len + padding > *availablep) {
435 * Not enough data in the socket buffer. Receive as much
436 * as we can. Don't receive padding, since, obviously, it's
437 * not the end of data segment yet.
440 ICL_DEBUG("limited from %zd to %zd",
441 len + padding, *availablep - padding));
443 len = *availablep - padding;
444 *more_neededp = true;
449 * Must not try to receive padding without at least one byte
450 * of actual data segment.
453 m = icl_conn_receive(request->ip_conn, len + padding);
455 ICL_DEBUG("failed to receive data segment");
459 if (request->ip_data_mbuf == NULL)
460 request->ip_data_mbuf = m;
462 m_cat(request->ip_data_mbuf, m);
464 request->ip_data_len += len;
465 *availablep -= len + padding;
471 icl_pdu_data_segment_receive_len(request);
477 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep)
480 uint32_t received_digest, valid_digest;
482 if (request->ip_conn->ic_data_crc32c == false)
485 if (request->ip_data_len == 0)
488 m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE);
490 ICL_DEBUG("failed to receive data digest");
494 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
495 m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest);
498 *availablep -= ISCSI_DATA_DIGEST_SIZE;
501 * Note that ip_data_mbuf also contains padding; since digest
502 * calculation is supposed to include that, we iterate over
503 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
505 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
506 if (received_digest != valid_digest) {
507 ICL_WARN("data digest check failed; got 0x%x, "
508 "should be 0x%x", received_digest, valid_digest);
516 * Somewhat contrary to the name, this attempts to receive only one
517 * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
519 static struct icl_pdu *
520 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep)
522 struct icl_pdu *request;
530 if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
531 KASSERT(ic->ic_receive_pdu == NULL,
532 ("ic->ic_receive_pdu != NULL"));
533 request = icl_pdu_new(ic, M_NOWAIT);
534 if (request == NULL) {
535 ICL_DEBUG("failed to allocate PDU; "
536 "dropping connection");
540 ic->ic_receive_pdu = request;
542 KASSERT(ic->ic_receive_pdu != NULL,
543 ("ic->ic_receive_pdu == NULL"));
544 request = ic->ic_receive_pdu;
547 if (*availablep < ic->ic_receive_len) {
549 ICL_DEBUG("not enough data; need %zd, "
550 "have %zd", ic->ic_receive_len, *availablep);
555 switch (ic->ic_receive_state) {
556 case ICL_CONN_STATE_BHS:
557 //ICL_DEBUG("receiving BHS");
558 error = icl_pdu_receive_bhs(request, availablep);
560 ICL_DEBUG("failed to receive BHS; "
561 "dropping connection");
566 * We don't enforce any limit for AHS length;
567 * its length is stored in 8 bit field.
570 len = icl_pdu_data_segment_length(request);
571 if (len > ic->ic_max_data_segment_length) {
572 ICL_WARN("received data segment "
573 "length %zd is larger than negotiated "
574 "MaxDataSegmentLength %zd; "
575 "dropping connection",
576 len, ic->ic_max_data_segment_length);
581 ic->ic_receive_state = ICL_CONN_STATE_AHS;
582 ic->ic_receive_len = icl_pdu_ahs_length(request);
585 case ICL_CONN_STATE_AHS:
586 //ICL_DEBUG("receiving AHS");
587 error = icl_pdu_receive_ahs(request, availablep);
589 ICL_DEBUG("failed to receive AHS; "
590 "dropping connection");
593 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
594 if (ic->ic_header_crc32c == false)
595 ic->ic_receive_len = 0;
597 ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
600 case ICL_CONN_STATE_HEADER_DIGEST:
601 //ICL_DEBUG("receiving header digest");
602 error = icl_pdu_check_header_digest(request, availablep);
604 ICL_DEBUG("header digest failed; "
605 "dropping connection");
609 ic->ic_receive_state = ICL_CONN_STATE_DATA;
611 icl_pdu_data_segment_receive_len(request);
614 case ICL_CONN_STATE_DATA:
615 //ICL_DEBUG("receiving data segment");
616 error = icl_pdu_receive_data_segment(request, availablep,
619 ICL_DEBUG("failed to receive data segment;"
620 "dropping connection");
627 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
628 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
629 ic->ic_receive_len = 0;
631 ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
634 case ICL_CONN_STATE_DATA_DIGEST:
635 //ICL_DEBUG("receiving data digest");
636 error = icl_pdu_check_data_digest(request, availablep);
638 ICL_DEBUG("data digest failed; "
639 "dropping connection");
644 * We've received complete PDU; reset the receive state machine
645 * and return the PDU.
647 ic->ic_receive_state = ICL_CONN_STATE_BHS;
648 ic->ic_receive_len = sizeof(struct iscsi_bhs);
649 ic->ic_receive_pdu = NULL;
653 panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
657 icl_pdu_free(request);
665 icl_conn_receive_pdus(struct icl_conn *ic, size_t available)
667 struct icl_pdu *response;
673 * This can never happen; we're careful to only mess with ic->ic_socket
674 * pointer when the send/receive threads are not running.
676 KASSERT(so != NULL, ("NULL socket"));
679 if (ic->ic_disconnecting)
682 if (so->so_error != 0) {
683 ICL_DEBUG("connection error %d; "
684 "dropping connection", so->so_error);
690 * Loop until we have a complete PDU or there is not enough
691 * data in the socket buffer.
693 if (available < ic->ic_receive_len) {
695 ICL_DEBUG("not enough data; have %zd, "
696 "need %zd", available,
702 response = icl_conn_receive_pdu(ic, &available);
703 if (response == NULL)
706 if (response->ip_ahs_len > 0) {
707 ICL_WARN("received PDU with unsupported "
708 "AHS; opcode 0x%x; dropping connection",
709 response->ip_bhs->bhs_opcode);
710 icl_pdu_free(response);
715 (ic->ic_receive)(response);
720 icl_receive_thread(void *arg)
730 ic->ic_receive_running = true;
734 if (ic->ic_disconnecting) {
735 //ICL_DEBUG("terminating");
739 SOCKBUF_LOCK(&so->so_rcv);
740 available = so->so_rcv.sb_cc;
741 if (available < ic->ic_receive_len) {
742 so->so_rcv.sb_lowat = ic->ic_receive_len;
743 cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
745 SOCKBUF_UNLOCK(&so->so_rcv);
747 icl_conn_receive_pdus(ic, available);
751 ic->ic_receive_running = false;
757 icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
762 cv_signal(&ic->ic_receive_cv);
767 icl_pdu_send(struct icl_pdu *request)
769 size_t padding, pdu_len;
770 uint32_t digest, zero = 0;
775 ic = request->ip_conn;
776 so = request->ip_conn->ic_socket;
778 ICL_CONN_LOCK_ASSERT(ic);
780 icl_pdu_set_data_segment_length(request, request->ip_data_len);
782 pdu_len = icl_pdu_size(request);
784 if (ic->ic_header_crc32c) {
785 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
786 ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
789 ICL_WARN("failed to append header digest");
794 if (request->ip_data_len != 0) {
795 padding = icl_pdu_padding(request);
797 ok = m_append(request->ip_data_mbuf, padding,
800 ICL_WARN("failed to append padding");
805 if (ic->ic_data_crc32c) {
806 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
808 ok = m_append(request->ip_data_mbuf, sizeof(digest),
811 ICL_WARN("failed to append header digest");
816 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
817 request->ip_data_mbuf = NULL;
820 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
822 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
823 NULL, MSG_DONTWAIT, curthread);
824 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
826 ICL_DEBUG("sosend error %d", error);
834 icl_conn_send_pdus(struct icl_conn *ic)
836 struct icl_pdu *request;
838 size_t available, size;
841 ICL_CONN_LOCK_ASSERT(ic);
845 SOCKBUF_LOCK(&so->so_snd);
846 available = sbspace(&so->so_snd);
847 SOCKBUF_UNLOCK(&so->so_snd);
849 while (!TAILQ_EMPTY(&ic->ic_to_send)) {
850 if (ic->ic_disconnecting)
853 request = TAILQ_FIRST(&ic->ic_to_send);
854 size = icl_pdu_size(request);
855 if (available < size) {
857 * Set the low watermark on the socket,
858 * to avoid waking up until there is enough
861 SOCKBUF_LOCK(&so->so_snd);
862 so->so_snd.sb_lowat = size;
863 SOCKBUF_UNLOCK(&so->so_snd);
865 ICL_DEBUG("no space to send; "
866 "have %zd, need %zd",
872 TAILQ_REMOVE(&ic->ic_to_send, request, ip_next);
873 error = icl_pdu_send(request);
875 ICL_DEBUG("failed to send PDU; "
876 "dropping connection");
880 icl_pdu_free(request);
885 icl_send_thread(void *arg)
892 ic->ic_send_running = true;
895 if (ic->ic_disconnecting) {
896 //ICL_DEBUG("terminating");
899 icl_conn_send_pdus(ic);
900 cv_wait(&ic->ic_send_cv, ic->ic_lock);
903 ic->ic_send_running = false;
909 icl_soupcall_send(struct socket *so, void *arg, int waitflag)
914 cv_signal(&ic->ic_send_cv);
919 icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, int flags)
921 struct mbuf *mb, *newmb;
922 size_t copylen, off = 0;
924 KASSERT(len > 0, ("len == 0"));
926 newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR);
928 ICL_WARN("failed to allocate mbuf for %zd bytes", len);
932 for (mb = newmb; mb != NULL; mb = mb->m_next) {
933 copylen = min(M_TRAILINGSPACE(mb), len - off);
934 memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
938 KASSERT(off == len, ("%s: off != len", __func__));
940 if (request->ip_data_mbuf == NULL) {
941 request->ip_data_mbuf = newmb;
942 request->ip_data_len = len;
944 m_cat(request->ip_data_mbuf, newmb);
945 request->ip_data_len += len;
952 icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len)
955 m_copydata(ip->ip_data_mbuf, off, len, addr);
959 icl_pdu_queue(struct icl_pdu *ip)
965 ICL_CONN_LOCK_ASSERT(ic);
967 if (ic->ic_disconnecting || ic->ic_socket == NULL) {
968 ICL_DEBUG("icl_pdu_queue on closed connection");
972 TAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
973 cv_signal(&ic->ic_send_cv);
977 icl_conn_new(struct mtx *lock)
981 refcount_acquire(&icl_ncons);
983 ic = uma_zalloc(icl_conn_zone, M_WAITOK | M_ZERO);
985 TAILQ_INIT(&ic->ic_to_send);
987 cv_init(&ic->ic_send_cv, "icl_tx");
988 cv_init(&ic->ic_receive_cv, "icl_rx");
990 refcount_init(&ic->ic_outstanding_pdus, 0);
992 ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH;
998 icl_conn_free(struct icl_conn *ic)
1001 cv_destroy(&ic->ic_send_cv);
1002 cv_destroy(&ic->ic_receive_cv);
1003 uma_zfree(icl_conn_zone, ic);
1004 refcount_release(&icl_ncons);
1008 icl_conn_start(struct icl_conn *ic)
1019 if (ic->ic_socket == NULL) {
1020 ICL_CONN_UNLOCK(ic);
1024 ic->ic_receive_state = ICL_CONN_STATE_BHS;
1025 ic->ic_receive_len = sizeof(struct iscsi_bhs);
1026 ic->ic_disconnecting = false;
1028 ICL_CONN_UNLOCK(ic);
1031 * Use max available sockbuf size for sending. Do it manually
1032 * instead of sbreserve(9) to work around resource limits.
1034 * XXX: This kind of sucks. On one hand, we don't currently support
1035 * sending a part of data segment; we always do it in one piece,
1036 * so we have to make sure it can fit in the socket buffer.
1037 * Once I've implemented partial send, we'll get rid of this
1038 * and use autoscaling.
1040 bufsize = (sizeof(struct iscsi_bhs) +
1041 ic->ic_max_data_segment_length) * 8;
1042 error = soreserve(ic->ic_socket, bufsize, bufsize);
1044 ICL_WARN("soreserve failed with error %d", error);
1052 bzero(&opt, sizeof(opt));
1053 opt.sopt_dir = SOPT_SET;
1054 opt.sopt_level = IPPROTO_TCP;
1055 opt.sopt_name = TCP_NODELAY;
1056 opt.sopt_val = &one;
1057 opt.sopt_valsize = sizeof(one);
1058 error = sosetopt(ic->ic_socket, &opt);
1060 ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1068 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "icltx");
1070 ICL_WARN("kthread_add(9) failed with error %d", error);
1075 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "iclrx");
1077 ICL_WARN("kthread_add(9) failed with error %d", error);
1083 * Register socket upcall, to get notified about incoming PDUs
1084 * and free space to send outgoing ones.
1086 SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1087 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1088 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1089 SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1090 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1091 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1097 icl_conn_handoff(struct icl_conn *ic, int fd)
1101 cap_rights_t rights;
1104 ICL_CONN_LOCK_ASSERT_NOT(ic);
1107 * Steal the socket from userland.
1109 error = fget(curthread, fd,
1110 cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1113 if (fp->f_type != DTYPE_SOCKET) {
1114 fdrop(fp, curthread);
1118 if (so->so_type != SOCK_STREAM) {
1119 fdrop(fp, curthread);
1125 if (ic->ic_socket != NULL) {
1126 ICL_CONN_UNLOCK(ic);
1127 fdrop(fp, curthread);
1131 ic->ic_socket = fp->f_data;
1132 fp->f_ops = &badfileops;
1134 fdrop(fp, curthread);
1135 ICL_CONN_UNLOCK(ic);
1137 error = icl_conn_start(ic);
1143 icl_conn_shutdown(struct icl_conn *ic)
1145 ICL_CONN_LOCK_ASSERT_NOT(ic);
1148 if (ic->ic_socket == NULL) {
1149 ICL_CONN_UNLOCK(ic);
1152 ICL_CONN_UNLOCK(ic);
1154 soshutdown(ic->ic_socket, SHUT_RDWR);
1158 icl_conn_close(struct icl_conn *ic)
1160 struct icl_pdu *pdu;
1162 ICL_CONN_LOCK_ASSERT_NOT(ic);
1165 if (ic->ic_socket == NULL) {
1166 ICL_CONN_UNLOCK(ic);
1170 ic->ic_disconnecting = true;
1173 * Wake up the threads, so they can properly terminate.
1175 cv_signal(&ic->ic_receive_cv);
1176 cv_signal(&ic->ic_send_cv);
1177 while (ic->ic_receive_running || ic->ic_send_running) {
1178 //ICL_DEBUG("waiting for send/receive threads to terminate");
1179 ICL_CONN_UNLOCK(ic);
1180 cv_signal(&ic->ic_receive_cv);
1181 cv_signal(&ic->ic_send_cv);
1182 pause("icl_close", 1 * hz);
1185 //ICL_DEBUG("send/receive threads terminated");
1187 soclose(ic->ic_socket);
1188 ic->ic_socket = NULL;
1190 if (ic->ic_receive_pdu != NULL) {
1191 //ICL_DEBUG("freeing partially received PDU");
1192 icl_pdu_free(ic->ic_receive_pdu);
1193 ic->ic_receive_pdu = NULL;
1197 * Remove any outstanding PDUs from the send queue.
1199 while (!TAILQ_EMPTY(&ic->ic_to_send)) {
1200 pdu = TAILQ_FIRST(&ic->ic_to_send);
1201 TAILQ_REMOVE(&ic->ic_to_send, pdu, ip_next);
1205 KASSERT(TAILQ_EMPTY(&ic->ic_to_send),
1206 ("destroying session with non-empty send queue"));
1211 KASSERT(ic->ic_outstanding_pdus == 0,
1212 ("destroying session with %d outstanding PDUs",
1213 ic->ic_outstanding_pdus));
1215 ICL_CONN_UNLOCK(ic);
1219 icl_conn_connected(struct icl_conn *ic)
1221 ICL_CONN_LOCK_ASSERT_NOT(ic);
1224 if (ic->ic_socket == NULL) {
1225 ICL_CONN_UNLOCK(ic);
1228 if (ic->ic_socket->so_error != 0) {
1229 ICL_CONN_UNLOCK(ic);
1232 ICL_CONN_UNLOCK(ic);
1236 #ifdef ICL_KERNEL_PROXY
1238 icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so)
1242 ICL_CONN_LOCK_ASSERT_NOT(ic);
1244 if (so->so_type != SOCK_STREAM)
1248 if (ic->ic_socket != NULL) {
1249 ICL_CONN_UNLOCK(ic);
1253 ICL_CONN_UNLOCK(ic);
1255 error = icl_conn_start(ic);
1259 #endif /* ICL_KERNEL_PROXY */
1268 uma_zdestroy(icl_conn_zone);
1269 uma_zdestroy(icl_pdu_zone);
1278 icl_conn_zone = uma_zcreate("icl_conn",
1279 sizeof(struct icl_conn), NULL, NULL, NULL, NULL,
1281 icl_pdu_zone = uma_zcreate("icl_pdu",
1282 sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1285 refcount_init(&icl_ncons, 0);
1289 icl_modevent(module_t mod, int what, void *arg)
1297 return (icl_unload());
1303 moduledata_t icl_data = {
1309 DECLARE_MODULE(icl, icl_data, SI_SUB_DRIVERS, SI_ORDER_FIRST);
1310 MODULE_VERSION(icl, 1);