2 * Copyright (c) 2012 The FreeBSD Foundation
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * iSCSI Common Layer. It's used by both the initiator and target to send
34 * and receive iSCSI PDUs.
37 #include <sys/param.h>
38 #include <sys/capability.h>
39 #include <sys/condvar.h>
42 #include <sys/kernel.h>
43 #include <sys/kthread.h>
46 #include <sys/mutex.h>
47 #include <sys/module.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/sysctl.h>
51 #include <sys/systm.h>
55 #include <netinet/in.h>
56 #include <netinet/tcp.h>
59 #include "iscsi_proto.h"
61 SYSCTL_NODE(_kern, OID_AUTO, icl, CTLFLAG_RD, 0, "iSCSI Common Layer");
63 TUNABLE_INT("kern.icl.debug", &debug);
64 SYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RWTUN,
65 &debug, 1, "Enable debug messages");
66 static int partial_receive_len = 1 * 1024; /* XXX: More? */
67 TUNABLE_INT("kern.icl.partial_receive_len", &partial_receive_len);
68 SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
69 &partial_receive_len, 1 * 1024, "Minimum read size for partially received "
71 static int sendspace = 1048576;
72 TUNABLE_INT("kern.icl.sendspace", &sendspace);
73 SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN,
74 &sendspace, 1048576, "Default send socket buffer size");
75 static int recvspace = 1048576;
76 TUNABLE_INT("kern.icl.recvspace", &recvspace);
77 SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN,
78 &recvspace, 1048576, "Default receive socket buffer size");
80 static uma_zone_t icl_conn_zone;
81 static uma_zone_t icl_pdu_zone;
83 static volatile u_int icl_ncons;
85 #define ICL_DEBUG(X, ...) \
87 printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
90 #define ICL_WARN(X, ...) \
92 printf("WARNING: %s: " X "\n", \
93 __func__, ## __VA_ARGS__); \
96 #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock)
97 #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock)
98 #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED)
99 #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED)
102 icl_conn_fail(struct icl_conn *ic)
104 if (ic->ic_socket == NULL)
110 ic->ic_socket->so_error = EDOOFUS;
115 icl_conn_receive(struct icl_conn *ic, size_t len)
124 memset(&uio, 0, sizeof(uio));
127 flags = MSG_DONTWAIT;
128 error = soreceive(so, NULL, &uio, &m, NULL, &flags);
130 ICL_DEBUG("soreceive error %d", error);
133 if (uio.uio_resid != 0) {
135 ICL_DEBUG("short read");
142 static struct icl_pdu *
143 icl_pdu_new(struct icl_conn *ic, int flags)
148 refcount_acquire(&ic->ic_outstanding_pdus);
150 ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
152 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
154 refcount_release(&ic->ic_outstanding_pdus);
165 icl_pdu_free(struct icl_pdu *ip)
171 m_freem(ip->ip_bhs_mbuf);
172 m_freem(ip->ip_ahs_mbuf);
173 m_freem(ip->ip_data_mbuf);
174 uma_zfree(icl_pdu_zone, ip);
176 refcount_release(&ic->ic_outstanding_pdus);
181 * Allocate icl_pdu with empty BHS to fill up by the caller.
184 icl_pdu_new_bhs(struct icl_conn *ic, int flags)
188 ip = icl_pdu_new(ic, flags);
192 ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs),
193 flags, MT_DATA, M_PKTHDR);
194 if (ip->ip_bhs_mbuf == NULL) {
195 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
199 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
200 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
201 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
207 icl_pdu_ahs_length(const struct icl_pdu *request)
210 return (request->ip_bhs->bhs_total_ahs_len * 4);
214 icl_pdu_data_segment_length(const struct icl_pdu *request)
218 len += request->ip_bhs->bhs_data_segment_len[0];
220 len += request->ip_bhs->bhs_data_segment_len[1];
222 len += request->ip_bhs->bhs_data_segment_len[2];
228 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
231 response->ip_bhs->bhs_data_segment_len[2] = len;
232 response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
233 response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
237 icl_pdu_padding(const struct icl_pdu *ip)
240 if ((ip->ip_data_len % 4) != 0)
241 return (4 - (ip->ip_data_len % 4));
247 icl_pdu_size(const struct icl_pdu *response)
251 KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
253 len = sizeof(struct iscsi_bhs) + response->ip_data_len +
254 icl_pdu_padding(response);
255 if (response->ip_conn->ic_header_crc32c)
256 len += ISCSI_HEADER_DIGEST_SIZE;
257 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
258 len += ISCSI_DATA_DIGEST_SIZE;
264 icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep)
268 m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs));
270 ICL_DEBUG("failed to receive BHS");
274 request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs));
275 if (request->ip_bhs_mbuf == NULL) {
276 ICL_WARN("m_pullup failed");
279 request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *);
282 * XXX: For architectures with strict alignment requirements
283 * we may need to allocate ip_bhs and copy the data into it.
284 * For some reason, though, not doing this doesn't seem
285 * to cause problems; tested on sparc64.
288 *availablep -= sizeof(struct iscsi_bhs);
293 icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep)
296 request->ip_ahs_len = icl_pdu_ahs_length(request);
297 if (request->ip_ahs_len == 0)
300 request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn,
301 request->ip_ahs_len);
302 if (request->ip_ahs_mbuf == NULL) {
303 ICL_DEBUG("failed to receive AHS");
307 *availablep -= request->ip_ahs_len;
312 icl_mbuf_to_crc32c(const struct mbuf *m0)
314 uint32_t digest = 0xffffffff;
315 const struct mbuf *m;
317 for (m = m0; m != NULL; m = m->m_next)
318 digest = calculate_crc32c(digest,
319 mtod(m, const void *), m->m_len);
321 digest = digest ^ 0xffffffff;
327 icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep)
330 uint32_t received_digest, valid_digest;
332 if (request->ip_conn->ic_header_crc32c == false)
335 m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE);
337 ICL_DEBUG("failed to receive header digest");
341 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
342 m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest);
345 *availablep -= ISCSI_HEADER_DIGEST_SIZE;
350 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
351 if (received_digest != valid_digest) {
352 ICL_WARN("header digest check failed; got 0x%x, "
353 "should be 0x%x", received_digest, valid_digest);
361 * Return the number of bytes that should be waiting in the receive socket
362 * before icl_pdu_receive_data_segment() gets called.
365 icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
369 len = icl_pdu_data_segment_length(request);
374 * Account for the parts of data segment already read from
377 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
378 len -= request->ip_data_len;
381 * Don't always wait for the full data segment to be delivered
382 * to the socket; this might badly affect performance due to
383 * TCP window scaling.
385 if (len > partial_receive_len) {
387 ICL_DEBUG("need %zd bytes of data, limiting to %zd",
388 len, partial_receive_len));
390 len = partial_receive_len;
396 * Account for padding. Note that due to the way code is written,
397 * the icl_pdu_receive_data_segment() must always receive padding
398 * along with the last part of data segment, because it would be
399 * impossible to tell whether we've already received the full data
400 * segment including padding, or without it.
403 len += 4 - (len % 4);
406 ICL_DEBUG("need %zd bytes of data", len));
413 icl_pdu_receive_data_segment(struct icl_pdu *request,
414 size_t *availablep, bool *more_neededp)
417 size_t len, padding = 0;
420 ic = request->ip_conn;
422 *more_neededp = false;
423 ic->ic_receive_len = 0;
425 len = icl_pdu_data_segment_length(request);
430 padding = 4 - (len % 4);
433 * Account for already received parts of data segment.
435 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
436 len -= request->ip_data_len;
438 if (len + padding > *availablep) {
440 * Not enough data in the socket buffer. Receive as much
441 * as we can. Don't receive padding, since, obviously, it's
442 * not the end of data segment yet.
445 ICL_DEBUG("limited from %zd to %zd",
446 len + padding, *availablep - padding));
448 len = *availablep - padding;
449 *more_neededp = true;
454 * Must not try to receive padding without at least one byte
455 * of actual data segment.
458 m = icl_conn_receive(request->ip_conn, len + padding);
460 ICL_DEBUG("failed to receive data segment");
464 if (request->ip_data_mbuf == NULL)
465 request->ip_data_mbuf = m;
467 m_cat(request->ip_data_mbuf, m);
469 request->ip_data_len += len;
470 *availablep -= len + padding;
476 icl_pdu_data_segment_receive_len(request);
482 icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep)
485 uint32_t received_digest, valid_digest;
487 if (request->ip_conn->ic_data_crc32c == false)
490 if (request->ip_data_len == 0)
493 m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE);
495 ICL_DEBUG("failed to receive data digest");
499 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
500 m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest);
503 *availablep -= ISCSI_DATA_DIGEST_SIZE;
506 * Note that ip_data_mbuf also contains padding; since digest
507 * calculation is supposed to include that, we iterate over
508 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
510 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
511 if (received_digest != valid_digest) {
512 ICL_WARN("data digest check failed; got 0x%x, "
513 "should be 0x%x", received_digest, valid_digest);
521 * Somewhat contrary to the name, this attempts to receive only one
522 * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
524 static struct icl_pdu *
525 icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep)
527 struct icl_pdu *request;
535 if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
536 KASSERT(ic->ic_receive_pdu == NULL,
537 ("ic->ic_receive_pdu != NULL"));
538 request = icl_pdu_new(ic, M_NOWAIT);
539 if (request == NULL) {
540 ICL_DEBUG("failed to allocate PDU; "
541 "dropping connection");
545 ic->ic_receive_pdu = request;
547 KASSERT(ic->ic_receive_pdu != NULL,
548 ("ic->ic_receive_pdu == NULL"));
549 request = ic->ic_receive_pdu;
552 if (*availablep < ic->ic_receive_len) {
554 ICL_DEBUG("not enough data; need %zd, "
555 "have %zd", ic->ic_receive_len, *availablep);
560 switch (ic->ic_receive_state) {
561 case ICL_CONN_STATE_BHS:
562 //ICL_DEBUG("receiving BHS");
563 error = icl_pdu_receive_bhs(request, availablep);
565 ICL_DEBUG("failed to receive BHS; "
566 "dropping connection");
571 * We don't enforce any limit for AHS length;
572 * its length is stored in 8 bit field.
575 len = icl_pdu_data_segment_length(request);
576 if (len > ic->ic_max_data_segment_length) {
577 ICL_WARN("received data segment "
578 "length %zd is larger than negotiated "
579 "MaxDataSegmentLength %zd; "
580 "dropping connection",
581 len, ic->ic_max_data_segment_length);
586 ic->ic_receive_state = ICL_CONN_STATE_AHS;
587 ic->ic_receive_len = icl_pdu_ahs_length(request);
590 case ICL_CONN_STATE_AHS:
591 //ICL_DEBUG("receiving AHS");
592 error = icl_pdu_receive_ahs(request, availablep);
594 ICL_DEBUG("failed to receive AHS; "
595 "dropping connection");
598 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
599 if (ic->ic_header_crc32c == false)
600 ic->ic_receive_len = 0;
602 ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
605 case ICL_CONN_STATE_HEADER_DIGEST:
606 //ICL_DEBUG("receiving header digest");
607 error = icl_pdu_check_header_digest(request, availablep);
609 ICL_DEBUG("header digest failed; "
610 "dropping connection");
614 ic->ic_receive_state = ICL_CONN_STATE_DATA;
616 icl_pdu_data_segment_receive_len(request);
619 case ICL_CONN_STATE_DATA:
620 //ICL_DEBUG("receiving data segment");
621 error = icl_pdu_receive_data_segment(request, availablep,
624 ICL_DEBUG("failed to receive data segment;"
625 "dropping connection");
632 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
633 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
634 ic->ic_receive_len = 0;
636 ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
639 case ICL_CONN_STATE_DATA_DIGEST:
640 //ICL_DEBUG("receiving data digest");
641 error = icl_pdu_check_data_digest(request, availablep);
643 ICL_DEBUG("data digest failed; "
644 "dropping connection");
649 * We've received complete PDU; reset the receive state machine
650 * and return the PDU.
652 ic->ic_receive_state = ICL_CONN_STATE_BHS;
653 ic->ic_receive_len = sizeof(struct iscsi_bhs);
654 ic->ic_receive_pdu = NULL;
658 panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
662 icl_pdu_free(request);
670 icl_conn_receive_pdus(struct icl_conn *ic, size_t available)
672 struct icl_pdu *response;
678 * This can never happen; we're careful to only mess with ic->ic_socket
679 * pointer when the send/receive threads are not running.
681 KASSERT(so != NULL, ("NULL socket"));
684 if (ic->ic_disconnecting)
687 if (so->so_error != 0) {
688 ICL_DEBUG("connection error %d; "
689 "dropping connection", so->so_error);
695 * Loop until we have a complete PDU or there is not enough
696 * data in the socket buffer.
698 if (available < ic->ic_receive_len) {
700 ICL_DEBUG("not enough data; have %zd, "
701 "need %zd", available,
707 response = icl_conn_receive_pdu(ic, &available);
708 if (response == NULL)
711 if (response->ip_ahs_len > 0) {
712 ICL_WARN("received PDU with unsupported "
713 "AHS; opcode 0x%x; dropping connection",
714 response->ip_bhs->bhs_opcode);
715 icl_pdu_free(response);
720 (ic->ic_receive)(response);
725 icl_receive_thread(void *arg)
735 ic->ic_receive_running = true;
739 if (ic->ic_disconnecting) {
740 //ICL_DEBUG("terminating");
744 SOCKBUF_LOCK(&so->so_rcv);
745 available = so->so_rcv.sb_cc;
746 if (available < ic->ic_receive_len) {
747 so->so_rcv.sb_lowat = ic->ic_receive_len;
748 cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
750 SOCKBUF_UNLOCK(&so->so_rcv);
752 icl_conn_receive_pdus(ic, available);
756 ic->ic_receive_running = false;
762 icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
767 cv_signal(&ic->ic_receive_cv);
772 icl_pdu_send(struct icl_pdu *request)
774 size_t padding, pdu_len;
775 uint32_t digest, zero = 0;
780 ic = request->ip_conn;
781 so = request->ip_conn->ic_socket;
783 ICL_CONN_LOCK_ASSERT(ic);
785 icl_pdu_set_data_segment_length(request, request->ip_data_len);
787 pdu_len = icl_pdu_size(request);
789 if (ic->ic_header_crc32c) {
790 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
791 ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
794 ICL_WARN("failed to append header digest");
799 if (request->ip_data_len != 0) {
800 padding = icl_pdu_padding(request);
802 ok = m_append(request->ip_data_mbuf, padding,
805 ICL_WARN("failed to append padding");
810 if (ic->ic_data_crc32c) {
811 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
813 ok = m_append(request->ip_data_mbuf, sizeof(digest),
816 ICL_WARN("failed to append header digest");
821 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
822 request->ip_data_mbuf = NULL;
825 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
827 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
828 NULL, MSG_DONTWAIT, curthread);
829 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
831 ICL_DEBUG("sosend error %d", error);
839 icl_conn_send_pdus(struct icl_conn *ic)
841 struct icl_pdu *request;
843 size_t available, size;
846 ICL_CONN_LOCK_ASSERT(ic);
850 SOCKBUF_LOCK(&so->so_snd);
851 available = sbspace(&so->so_snd);
852 SOCKBUF_UNLOCK(&so->so_snd);
854 while (!STAILQ_EMPTY(&ic->ic_to_send)) {
855 if (ic->ic_disconnecting)
858 request = STAILQ_FIRST(&ic->ic_to_send);
859 size = icl_pdu_size(request);
860 if (available < size) {
862 * Set the low watermark on the socket,
863 * to avoid waking up until there is enough
866 SOCKBUF_LOCK(&so->so_snd);
867 so->so_snd.sb_lowat = size;
868 SOCKBUF_UNLOCK(&so->so_snd);
870 ICL_DEBUG("no space to send; "
871 "have %zd, need %zd",
877 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
878 error = icl_pdu_send(request);
880 ICL_DEBUG("failed to send PDU; "
881 "dropping connection");
885 icl_pdu_free(request);
890 icl_send_thread(void *arg)
897 ic->ic_send_running = true;
900 if (ic->ic_disconnecting) {
901 //ICL_DEBUG("terminating");
904 icl_conn_send_pdus(ic);
905 cv_wait(&ic->ic_send_cv, ic->ic_lock);
908 ic->ic_send_running = false;
914 icl_soupcall_send(struct socket *so, void *arg, int waitflag)
919 cv_signal(&ic->ic_send_cv);
924 icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, int flags)
926 struct mbuf *mb, *newmb;
927 size_t copylen, off = 0;
929 KASSERT(len > 0, ("len == 0"));
931 newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR);
933 ICL_WARN("failed to allocate mbuf for %zd bytes", len);
937 for (mb = newmb; mb != NULL; mb = mb->m_next) {
938 copylen = min(M_TRAILINGSPACE(mb), len - off);
939 memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
943 KASSERT(off == len, ("%s: off != len", __func__));
945 if (request->ip_data_mbuf == NULL) {
946 request->ip_data_mbuf = newmb;
947 request->ip_data_len = len;
949 m_cat(request->ip_data_mbuf, newmb);
950 request->ip_data_len += len;
957 icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len)
960 m_copydata(ip->ip_data_mbuf, off, len, addr);
964 icl_pdu_queue(struct icl_pdu *ip)
970 ICL_CONN_LOCK_ASSERT(ic);
972 if (ic->ic_disconnecting || ic->ic_socket == NULL) {
973 ICL_DEBUG("icl_pdu_queue on closed connection");
977 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
978 cv_signal(&ic->ic_send_cv);
982 icl_conn_new(const char *name, struct mtx *lock)
986 refcount_acquire(&icl_ncons);
988 ic = uma_zalloc(icl_conn_zone, M_WAITOK | M_ZERO);
990 STAILQ_INIT(&ic->ic_to_send);
992 cv_init(&ic->ic_send_cv, "icl_tx");
993 cv_init(&ic->ic_receive_cv, "icl_rx");
995 refcount_init(&ic->ic_outstanding_pdus, 0);
997 ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH;
1004 icl_conn_free(struct icl_conn *ic)
1007 cv_destroy(&ic->ic_send_cv);
1008 cv_destroy(&ic->ic_receive_cv);
1009 uma_zfree(icl_conn_zone, ic);
1010 refcount_release(&icl_ncons);
1014 icl_conn_start(struct icl_conn *ic)
1025 if (ic->ic_socket == NULL) {
1026 ICL_CONN_UNLOCK(ic);
1030 ic->ic_receive_state = ICL_CONN_STATE_BHS;
1031 ic->ic_receive_len = sizeof(struct iscsi_bhs);
1032 ic->ic_disconnecting = false;
1034 ICL_CONN_UNLOCK(ic);
1037 * For sendspace, this is required because the current code cannot
1038 * send a PDU in pieces; thus, the minimum buffer size is equal
1039 * to the maximum PDU size. "+4" is to account for possible padding.
1041 * What we should actually do here is to use autoscaling, but set
1042 * some minimal buffer size to "minspace". I don't know a way to do
1045 minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
1046 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1047 if (sendspace < minspace) {
1048 ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1050 sendspace = minspace;
1052 if (recvspace < minspace) {
1053 ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1055 recvspace = minspace;
1058 error = soreserve(ic->ic_socket, sendspace, recvspace);
1060 ICL_WARN("soreserve failed with error %d", error);
1068 bzero(&opt, sizeof(opt));
1069 opt.sopt_dir = SOPT_SET;
1070 opt.sopt_level = IPPROTO_TCP;
1071 opt.sopt_name = TCP_NODELAY;
1072 opt.sopt_val = &one;
1073 opt.sopt_valsize = sizeof(one);
1074 error = sosetopt(ic->ic_socket, &opt);
1076 ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1084 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1087 ICL_WARN("kthread_add(9) failed with error %d", error);
1092 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1095 ICL_WARN("kthread_add(9) failed with error %d", error);
1101 * Register socket upcall, to get notified about incoming PDUs
1102 * and free space to send outgoing ones.
1104 SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1105 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1106 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1107 SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1108 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1109 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1115 icl_conn_handoff(struct icl_conn *ic, int fd)
1119 cap_rights_t rights;
1122 ICL_CONN_LOCK_ASSERT_NOT(ic);
1125 * Steal the socket from userland.
1127 error = fget(curthread, fd,
1128 cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1131 if (fp->f_type != DTYPE_SOCKET) {
1132 fdrop(fp, curthread);
1136 if (so->so_type != SOCK_STREAM) {
1137 fdrop(fp, curthread);
1143 if (ic->ic_socket != NULL) {
1144 ICL_CONN_UNLOCK(ic);
1145 fdrop(fp, curthread);
1149 ic->ic_socket = fp->f_data;
1150 fp->f_ops = &badfileops;
1152 fdrop(fp, curthread);
1153 ICL_CONN_UNLOCK(ic);
1155 error = icl_conn_start(ic);
1161 icl_conn_shutdown(struct icl_conn *ic)
1163 ICL_CONN_LOCK_ASSERT_NOT(ic);
1166 if (ic->ic_socket == NULL) {
1167 ICL_CONN_UNLOCK(ic);
1170 ICL_CONN_UNLOCK(ic);
1172 soshutdown(ic->ic_socket, SHUT_RDWR);
1176 icl_conn_close(struct icl_conn *ic)
1178 struct icl_pdu *pdu;
1180 ICL_CONN_LOCK_ASSERT_NOT(ic);
1183 if (ic->ic_socket == NULL) {
1184 ICL_CONN_UNLOCK(ic);
1188 ic->ic_disconnecting = true;
1191 * Wake up the threads, so they can properly terminate.
1193 cv_signal(&ic->ic_receive_cv);
1194 cv_signal(&ic->ic_send_cv);
1195 while (ic->ic_receive_running || ic->ic_send_running) {
1196 //ICL_DEBUG("waiting for send/receive threads to terminate");
1197 ICL_CONN_UNLOCK(ic);
1198 cv_signal(&ic->ic_receive_cv);
1199 cv_signal(&ic->ic_send_cv);
1200 pause("icl_close", 1 * hz);
1203 //ICL_DEBUG("send/receive threads terminated");
1205 soclose(ic->ic_socket);
1206 ic->ic_socket = NULL;
1208 if (ic->ic_receive_pdu != NULL) {
1209 //ICL_DEBUG("freeing partially received PDU");
1210 icl_pdu_free(ic->ic_receive_pdu);
1211 ic->ic_receive_pdu = NULL;
1215 * Remove any outstanding PDUs from the send queue.
1217 while (!STAILQ_EMPTY(&ic->ic_to_send)) {
1218 pdu = STAILQ_FIRST(&ic->ic_to_send);
1219 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
1223 KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
1224 ("destroying session with non-empty send queue"));
1226 KASSERT(ic->ic_outstanding_pdus == 0,
1227 ("destroying session with %d outstanding PDUs",
1228 ic->ic_outstanding_pdus));
1230 ICL_CONN_UNLOCK(ic);
1234 icl_conn_connected(struct icl_conn *ic)
1236 ICL_CONN_LOCK_ASSERT_NOT(ic);
1239 if (ic->ic_socket == NULL) {
1240 ICL_CONN_UNLOCK(ic);
1243 if (ic->ic_socket->so_error != 0) {
1244 ICL_CONN_UNLOCK(ic);
1247 ICL_CONN_UNLOCK(ic);
1251 #ifdef ICL_KERNEL_PROXY
1253 icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so)
1257 ICL_CONN_LOCK_ASSERT_NOT(ic);
1259 if (so->so_type != SOCK_STREAM)
1263 if (ic->ic_socket != NULL) {
1264 ICL_CONN_UNLOCK(ic);
1268 ICL_CONN_UNLOCK(ic);
1270 error = icl_conn_start(ic);
1274 #endif /* ICL_KERNEL_PROXY */
1283 uma_zdestroy(icl_conn_zone);
1284 uma_zdestroy(icl_pdu_zone);
1293 icl_conn_zone = uma_zcreate("icl_conn",
1294 sizeof(struct icl_conn), NULL, NULL, NULL, NULL,
1296 icl_pdu_zone = uma_zcreate("icl_pdu",
1297 sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1300 refcount_init(&icl_ncons, 0);
1304 icl_modevent(module_t mod, int what, void *arg)
1312 return (icl_unload());
1318 moduledata_t icl_data = {
1324 DECLARE_MODULE(icl, icl_data, SI_SUB_DRIVERS, SI_ORDER_FIRST);
1325 MODULE_VERSION(icl, 1);