1 /**************************************************************************
3 Copyright (c) 2007, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
36 #include <sys/module.h>
37 #include <sys/pciio.h>
39 #include <machine/bus.h>
40 #include <machine/resource.h>
41 #include <sys/bus_dma.h>
43 #include <sys/ioccom.h>
45 #include <sys/rwlock.h>
46 #include <sys/linker.h>
47 #include <sys/firmware.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/sockio.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
59 #include <net/route.h>
60 #include <netinet/in_systm.h>
61 #include <netinet/in.h>
62 #include <netinet/in_pcb.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_var.h>
65 #include <netinet/tcp_var.h>
66 #include <netinet/tcp.h>
67 #include <netinet/tcpip.h>
69 #include <contrib/rdma/ib_verbs.h>
71 #include <cxgb_include.h>
72 #include <ulp/tom/cxgb_tom.h>
73 #include <ulp/tom/cxgb_t3_ddp.h>
74 #include <ulp/tom/cxgb_defs.h>
75 #include <ulp/tom/cxgb_toepcb.h>
76 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
77 #include <ulp/iw_cxgb/iw_cxgb_hal.h>
78 #include <ulp/iw_cxgb/iw_cxgb_provider.h>
79 #include <ulp/iw_cxgb/iw_cxgb_cm.h>
80 #include <ulp/iw_cxgb/iw_cxgb.h>
83 static char *states[] = {
100 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters");
102 static int ep_timeout_secs = 10;
103 TUNABLE_INT("hw.iw_cxgb.ep_timeout_secs", &ep_timeout_secs);
104 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RDTUN, &ep_timeout_secs, 0,
105 "CM Endpoint operation timeout in seconds (default=10)");
107 static int mpa_rev = 1;
108 TUNABLE_INT("hw.iw_cxgb.mpa_rev", &mpa_rev);
109 SYSCTL_UINT(_hw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RDTUN, &mpa_rev, 0,
110 "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)");
112 static int markers_enabled = 0;
113 TUNABLE_INT("hw.iw_cxgb.markers_enabled", &markers_enabled);
114 SYSCTL_UINT(_hw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RDTUN, &markers_enabled, 0,
115 "Enable MPA MARKERS (default(0)=disabled)");
117 static int crc_enabled = 1;
118 TUNABLE_INT("hw.iw_cxgb.crc_enabled", &crc_enabled);
119 SYSCTL_UINT(_hw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RDTUN, &crc_enabled, 0,
120 "Enable MPA CRC (default(1)=enabled)");
122 static int rcv_win = 256 * 1024;
123 TUNABLE_INT("hw.iw_cxgb.rcv_win", &rcv_win);
124 SYSCTL_UINT(_hw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RDTUN, &rcv_win, 0,
125 "TCP receive window in bytes (default=256KB)");
127 static int snd_win = 32 * 1024;
128 TUNABLE_INT("hw.iw_cxgb.snd_win", &snd_win);
129 SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_win, CTLFLAG_RDTUN, &snd_win, 0,
130 "TCP send window in bytes (default=32KB)");
132 static unsigned int nocong = 0;
133 TUNABLE_INT("hw.iw_cxgb.nocong", &nocong);
134 SYSCTL_UINT(_hw_cxgb, OID_AUTO, nocong, CTLFLAG_RDTUN, &nocong, 0,
135 "Turn off congestion control (default=0)");
137 static unsigned int cong_flavor = 1;
138 TUNABLE_INT("hw.iw_cxgb.cong_flavor", &cong_flavor);
139 SYSCTL_UINT(_hw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RDTUN, &cong_flavor, 0,
140 "TCP Congestion control flavor (default=1)");
142 static void ep_timeout(void *arg);
143 static void connect_reply_upcall(struct iwch_ep *ep, int status);
144 static int iwch_so_upcall(struct socket *so, void *arg, int waitflag);
147 * Cruft to offload socket upcalls onto thread.
149 static struct mtx req_lock;
150 static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list;
151 static struct task iw_cxgb_task;
152 static struct taskqueue *iw_cxgb_taskq;
153 static void process_req(void *ctx, int pending);
156 start_ep_timer(struct iwch_ep *ep)
158 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
159 if (callout_pending(&ep->timer)) {
160 CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep);
161 callout_deactivate(&ep->timer);
162 callout_drain(&ep->timer);
165 * XXX this looks racy
168 callout_init(&ep->timer, TRUE);
170 callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep);
174 stop_ep_timer(struct iwch_ep *ep)
176 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
177 callout_drain(&ep->timer);
181 static int set_tcpinfo(struct iwch_ep *ep)
187 sopt.sopt_dir = SOPT_GET;
188 sopt.sopt_level = IPPROTO_TCP;
189 sopt.sopt_name = TCP_INFO;
190 sopt.sopt_val = (caddr_t)&ti;
191 sopt.sopt_valsize = sizeof ti;
194 err = sogetopt(ep->com.so, &sopt);
196 printf("%s can't get tcpinfo\n", __FUNCTION__);
199 if (!(ti.tcpi_options & TCPI_OPT_TOE)) {
200 printf("%s connection NOT OFFLOADED!\n", __FUNCTION__);
204 ep->snd_seq = ti.tcpi_snd_nxt;
205 ep->rcv_seq = ti.tcpi_rcv_nxt;
206 ep->emss = ti.tcpi_snd_mss - sizeof(struct tcpiphdr);
207 ep->hwtid = TOEPCB(ep->com.so)->tp_tid; /* XXX */
208 if (ti.tcpi_options & TCPI_OPT_TIMESTAMPS)
215 static enum iwch_ep_state
216 state_read(struct iwch_ep_common *epc)
218 enum iwch_ep_state state;
220 mtx_lock(&epc->lock);
222 mtx_unlock(&epc->lock);
227 __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
233 state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
236 mtx_lock(&epc->lock);
237 CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]);
238 __state_set(epc, new);
239 mtx_unlock(&epc->lock);
244 alloc_ep(int size, int flags)
246 struct iwch_ep_common *epc;
248 epc = malloc(size, M_DEVBUF, flags);
250 memset(epc, 0, size);
251 refcount_init(&epc->refcount, 1);
252 mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK);
253 cv_init(&epc->waitq, "iwch_epc cv");
255 CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc);
259 void __free_ep(struct iwch_ep_common *epc)
261 CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]);
262 KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so));
263 KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc));
268 iwch_quiesce_tid(struct iwch_ep *ep)
271 struct cpl_set_tcb_field *req;
272 struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT);
276 req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req));
277 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
278 req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
279 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
282 req->word = htons(W_TCB_RX_QUIESCE);
283 req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
284 req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
286 m_set_priority(m, CPL_PRIORITY_DATA);
287 cxgb_ofld_send(ep->com.tdev, m);
293 iwch_resume_tid(struct iwch_ep *ep)
296 struct cpl_set_tcb_field *req;
297 struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT);
301 req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req));
302 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
303 req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
304 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
307 req->word = htons(W_TCB_RX_QUIESCE);
308 req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
311 m_set_priority(m, CPL_PRIORITY_DATA);
312 cxgb_ofld_send(ep->com.tdev, m);
317 static struct rtentry *
318 find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
319 __be16 peer_port, u8 tos)
321 struct route iproute;
322 struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst;
324 bzero(&iproute, sizeof iproute);
325 dst->sin_family = AF_INET;
326 dst->sin_len = sizeof *dst;
327 dst->sin_addr.s_addr = peer_ip;
330 return iproute.ro_rt;
334 close_socket(struct iwch_ep_common *epc)
336 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
338 soupcall_clear(epc->so, SO_RCV);
339 SOCK_UNLOCK(epc->so);
340 soshutdown(epc->so, SHUT_WR|SHUT_RD);
345 shutdown_socket(struct iwch_ep_common *epc)
347 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
348 soshutdown(epc->so, SHUT_WR);
352 abort_socket(struct iwch_ep *ep)
358 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
362 /* linger_time of 0 forces RST to be sent */
363 sopt.sopt_dir = SOPT_SET;
364 sopt.sopt_level = SOL_SOCKET;
365 sopt.sopt_name = SO_LINGER;
366 sopt.sopt_val = (caddr_t)&l;
367 sopt.sopt_valsize = sizeof l;
369 err = sosetopt(ep->com.so, &sopt);
371 printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err);
375 send_mpa_req(struct iwch_ep *ep)
378 struct mpa_message *mpa;
382 CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen);
384 mpalen = sizeof(*mpa) + ep->plen;
385 m = m_gethdr(mpalen, M_NOWAIT);
387 connect_reply_upcall(ep, -ENOMEM);
390 mpa = mtod(m, struct mpa_message *);
392 m->m_pkthdr.len = mpalen;
393 memset(mpa, 0, sizeof(*mpa));
394 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
395 mpa->flags = (crc_enabled ? MPA_CRC : 0) |
396 (markers_enabled ? MPA_MARKERS : 0);
397 mpa->private_data_size = htons(ep->plen);
398 mpa->revision = mpa_rev;
400 memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
402 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
405 connect_reply_upcall(ep, -ENOMEM);
410 state_set(&ep->com, MPA_REQ_SENT);
415 send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
418 struct mpa_message *mpa;
422 CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen);
424 mpalen = sizeof(*mpa) + plen;
426 m = m_gethdr(mpalen, M_NOWAIT);
428 printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
431 mpa = mtod(m, struct mpa_message *);
433 m->m_pkthdr.len = mpalen;
434 memset(mpa, 0, sizeof(*mpa));
435 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
436 mpa->flags = MPA_REJECT;
437 mpa->revision = mpa_rev;
438 mpa->private_data_size = htons(plen);
440 memcpy(mpa->private_data, pdata, plen);
441 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
447 send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
450 struct mpa_message *mpa;
453 CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen);
455 mpalen = sizeof(*mpa) + plen;
457 m = m_gethdr(mpalen, M_NOWAIT);
459 printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
462 mpa = mtod(m, struct mpa_message *);
464 m->m_pkthdr.len = mpalen;
465 memset(mpa, 0, sizeof(*mpa));
466 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
467 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
468 (markers_enabled ? MPA_MARKERS : 0);
469 mpa->revision = mpa_rev;
470 mpa->private_data_size = htons(plen);
472 memcpy(mpa->private_data, pdata, plen);
474 state_set(&ep->com, MPA_REP_SENT);
475 return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
480 close_complete_upcall(struct iwch_ep *ep)
482 struct iw_cm_event event;
484 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
485 memset(&event, 0, sizeof(event));
486 event.event = IW_CM_EVENT_CLOSE;
488 CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d",
489 ep, ep->com.cm_id, ep->hwtid);
490 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
491 ep->com.cm_id->rem_ref(ep->com.cm_id);
492 ep->com.cm_id = NULL;
498 abort_connection(struct iwch_ep *ep)
500 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
501 state_set(&ep->com, ABORTING);
503 close_socket(&ep->com);
504 close_complete_upcall(ep);
505 state_set(&ep->com, DEAD);
510 peer_close_upcall(struct iwch_ep *ep)
512 struct iw_cm_event event;
514 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
515 memset(&event, 0, sizeof(event));
516 event.event = IW_CM_EVENT_DISCONNECT;
518 CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d",
519 ep, ep->com.cm_id, ep->hwtid);
520 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
525 peer_abort_upcall(struct iwch_ep *ep)
527 struct iw_cm_event event;
529 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
530 memset(&event, 0, sizeof(event));
531 event.event = IW_CM_EVENT_CLOSE;
532 event.status = ECONNRESET;
534 CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep,
535 ep->com.cm_id, ep->hwtid);
536 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
537 ep->com.cm_id->rem_ref(ep->com.cm_id);
538 ep->com.cm_id = NULL;
544 connect_reply_upcall(struct iwch_ep *ep, int status)
546 struct iw_cm_event event;
548 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status);
549 memset(&event, 0, sizeof(event));
550 event.event = IW_CM_EVENT_CONNECT_REPLY;
551 event.status = status;
552 event.local_addr = ep->com.local_addr;
553 event.remote_addr = ep->com.remote_addr;
555 if ((status == 0) || (status == ECONNREFUSED)) {
556 event.private_data_len = ep->plen;
557 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
560 CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep,
562 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
565 ep->com.cm_id->rem_ref(ep->com.cm_id);
566 ep->com.cm_id = NULL;
572 connect_request_upcall(struct iwch_ep *ep)
574 struct iw_cm_event event;
576 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
577 memset(&event, 0, sizeof(event));
578 event.event = IW_CM_EVENT_CONNECT_REQUEST;
579 event.local_addr = ep->com.local_addr;
580 event.remote_addr = ep->com.remote_addr;
581 event.private_data_len = ep->plen;
582 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
583 event.provider_data = ep;
584 event.so = ep->com.so;
585 if (state_read(&ep->parent_ep->com) != DEAD)
586 ep->parent_ep->com.cm_id->event_handler(
587 ep->parent_ep->com.cm_id,
589 put_ep(&ep->parent_ep->com);
590 ep->parent_ep = NULL;
594 established_upcall(struct iwch_ep *ep)
596 struct iw_cm_event event;
598 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
599 memset(&event, 0, sizeof(event));
600 event.event = IW_CM_EVENT_ESTABLISHED;
602 CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid);
603 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
608 process_mpa_reply(struct iwch_ep *ep)
610 struct mpa_message *mpa;
612 struct iwch_qp_attributes attrs;
613 enum iwch_qp_attr_mask mask;
615 struct mbuf *top, *m;
616 int flags = MSG_DONTWAIT;
620 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
623 * Stop mpa timer. If it expired, then the state has
624 * changed and we bail since ep_timeout already aborted
628 if (state_read(&ep->com) != MPA_REQ_SENT)
631 uio.uio_resid = len = 1000000;
632 uio.uio_td = ep->com.thread;
633 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
635 if (err == EWOULDBLOCK) {
643 if (ep->com.so->so_rcv.sb_mb) {
644 printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
645 __FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
651 * If we get more than the supported amount of private data
652 * then we must fail this connection.
654 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
660 * copy the new data into our accumulation buffer.
662 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
663 ep->mpa_pkt_len += m->m_len;
673 * if we don't even have the mpa message, then bail.
675 if (ep->mpa_pkt_len < sizeof(*mpa))
677 mpa = (struct mpa_message *)ep->mpa_pkt;
679 /* Validate MPA header. */
680 if (mpa->revision != mpa_rev) {
681 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
685 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
686 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
691 plen = ntohs(mpa->private_data_size);
694 * Fail if there's too much private data.
696 if (plen > MPA_MAX_PRIVATE_DATA) {
697 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
703 * If plen does not account for pkt size
705 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
706 CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len);
711 ep->plen = (u8) plen;
714 * If we don't have all the pdata yet, then bail.
715 * We'll continue process when more data arrives.
717 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
720 if (mpa->flags & MPA_REJECT) {
726 * If we get here we have accumulated the entire mpa
727 * start reply message including private data. And
728 * the MPA header is valid.
730 CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__);
731 state_set(&ep->com, FPDU_MODE);
732 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
733 ep->mpa_attr.recv_marker_enabled = markers_enabled;
734 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
735 ep->mpa_attr.version = mpa_rev;
736 if (set_tcpinfo(ep)) {
737 printf("%s set_tcpinfo error\n", __FUNCTION__);
740 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
741 "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
742 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
743 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
745 attrs.mpa_attr = ep->mpa_attr;
746 attrs.max_ird = ep->ird;
747 attrs.max_ord = ep->ord;
748 attrs.llp_stream_handle = ep;
749 attrs.next_state = IWCH_QP_STATE_RTS;
751 mask = IWCH_QP_ATTR_NEXT_STATE |
752 IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
753 IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
755 /* bind QP and TID with INIT_WR */
756 err = iwch_modify_qp(ep->com.qp->rhp,
757 ep->com.qp, mask, &attrs, 1);
761 abort_connection(ep);
763 connect_reply_upcall(ep, err);
768 process_mpa_request(struct iwch_ep *ep)
770 struct mpa_message *mpa;
772 int flags = MSG_DONTWAIT;
773 struct mbuf *top, *m;
778 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
781 * Stop mpa timer. If it expired, then the state has
782 * changed and we bail since ep_timeout already aborted
786 if (state_read(&ep->com) != MPA_REQ_WAIT)
789 uio.uio_resid = len = 1000000;
790 uio.uio_td = ep->com.thread;
791 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
793 if (err == EWOULDBLOCK) {
805 * If we get more than the supported amount of private data
806 * then we must fail this connection.
808 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
809 CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__,
810 ep->mpa_pkt_len + m->m_len);
816 * Copy the new data into our accumulation buffer.
818 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
819 ep->mpa_pkt_len += m->m_len;
830 * If we don't even have the mpa message, then bail.
831 * We'll continue process when more data arrives.
833 if (ep->mpa_pkt_len < sizeof(*mpa)) {
835 CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__,
839 mpa = (struct mpa_message *) ep->mpa_pkt;
842 * Validate MPA Header.
844 if (mpa->revision != mpa_rev) {
845 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
849 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
850 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
854 plen = ntohs(mpa->private_data_size);
857 * Fail if there's too much private data.
859 if (plen > MPA_MAX_PRIVATE_DATA) {
860 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
865 * If plen does not account for pkt size
867 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
868 CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__,
872 ep->plen = (u8) plen;
875 * If we don't have all the pdata yet, then bail.
877 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
879 CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__,
885 * If we get here we have accumulated the entire mpa
886 * start reply message including private data.
888 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
889 ep->mpa_attr.recv_marker_enabled = markers_enabled;
890 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
891 ep->mpa_attr.version = mpa_rev;
892 if (set_tcpinfo(ep)) {
893 printf("%s set_tcpinfo error\n", __FUNCTION__);
896 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
897 "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
898 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
899 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
901 state_set(&ep->com, MPA_REQ_RCVD);
904 connect_request_upcall(ep);
907 abort_connection(ep);
912 process_peer_close(struct iwch_ep *ep)
914 struct iwch_qp_attributes attrs;
918 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
920 mtx_lock(&ep->com.lock);
921 switch (ep->com.state) {
923 __state_set(&ep->com, CLOSING);
926 __state_set(&ep->com, CLOSING);
927 connect_reply_upcall(ep, -ECONNRESET);
932 * We're gonna mark this puppy DEAD, but keep
933 * the reference on it until the ULP accepts or
936 __state_set(&ep->com, CLOSING);
940 __state_set(&ep->com, CLOSING);
944 __state_set(&ep->com, CLOSING);
945 attrs.next_state = IWCH_QP_STATE_CLOSING;
946 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
947 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
948 peer_close_upcall(ep);
954 __state_set(&ep->com, MORIBUND);
959 if (ep->com.cm_id && ep->com.qp) {
960 attrs.next_state = IWCH_QP_STATE_IDLE;
961 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
962 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
964 close_socket(&ep->com);
965 close_complete_upcall(ep);
966 __state_set(&ep->com, DEAD);
976 mtx_unlock(&ep->com.lock);
978 iwch_ep_disconnect(ep, 0, M_NOWAIT);
985 process_conn_error(struct iwch_ep *ep)
987 struct iwch_qp_attributes attrs;
991 state = state_read(&ep->com);
992 CTR5(KTR_IW_CXGB, "%s ep %p so %p so->so_error %u state %s", __FUNCTION__, ep, ep->com.so, ep->com.so->so_error, states[ep->com.state]);
999 connect_reply_upcall(ep, -ECONNRESET);
1002 ep->com.rpl_err = ECONNRESET;
1003 CTR1(KTR_IW_CXGB, "waking up ep %p", ep);
1008 * We're gonna mark this puppy DEAD, but keep
1009 * the reference on it until the ULP accepts or
1019 if (ep->com.cm_id && ep->com.qp) {
1020 attrs.next_state = IWCH_QP_STATE_ERROR;
1021 ret = iwch_modify_qp(ep->com.qp->rhp,
1022 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1026 "%s - qp <- error failed!\n",
1029 peer_abort_upcall(ep);
1034 CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__,
1035 ep->com.so->so_error);
1042 if (state != ABORTING) {
1043 close_socket(&ep->com);
1044 state_set(&ep->com, DEAD);
1051 process_close_complete(struct iwch_ep *ep)
1053 struct iwch_qp_attributes attrs;
1056 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1059 /* The cm_id may be null if we failed to connect */
1060 mtx_lock(&ep->com.lock);
1061 switch (ep->com.state) {
1063 __state_set(&ep->com, MORIBUND);
1067 if ((ep->com.cm_id) && (ep->com.qp)) {
1068 attrs.next_state = IWCH_QP_STATE_IDLE;
1069 iwch_modify_qp(ep->com.qp->rhp,
1071 IWCH_QP_ATTR_NEXT_STATE,
1074 close_socket(&ep->com);
1075 close_complete_upcall(ep);
1076 __state_set(&ep->com, DEAD);
1086 mtx_unlock(&ep->com.lock);
1093 * T3A does 3 things when a TERM is received:
1094 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
1095 * 2) generate an async event on the QP with the TERMINATE opcode
1096 * 3) post a TERMINATE opcde cqe into the associated CQ.
1098 * For (1), we save the message in the qp for later consumer consumption.
1099 * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
1100 * For (3), we toss the CQE in cxio_poll_cq().
1102 * terminate() handles case (1)...
1105 terminate(struct t3cdev *tdev, struct mbuf *m, void *ctx)
1107 struct toepcb *toep = (struct toepcb *)ctx;
1108 struct socket *so = toeptoso(toep);
1109 struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
1111 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
1112 m_adj(m, sizeof(struct cpl_rdma_terminate));
1113 CTR2(KTR_IW_CXGB, "%s saving %d bytes of term msg", __FUNCTION__, m->m_len);
1114 m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer);
1115 ep->com.qp->attr.terminate_msg_len = m->m_len;
1116 ep->com.qp->attr.is_terminate_local = 0;
1117 return CPL_RET_BUF_DONE;
1121 ec_status(struct t3cdev *tdev, struct mbuf *m, void *ctx)
1123 struct toepcb *toep = (struct toepcb *)ctx;
1124 struct socket *so = toeptoso(toep);
1125 struct cpl_rdma_ec_status *rep = cplhdr(m);
1127 struct iwch_qp_attributes attrs;
1130 ep = so->so_rcv.sb_upcallarg;
1131 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s ec_status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], rep->status);
1133 panic("bogosity ep %p state %d, so %p state %x\n", ep, ep ? ep->com.state : -1, so, so ? so->so_state : -1);
1135 mtx_lock(&ep->com.lock);
1136 switch (ep->com.state) {
1139 __state_set(&ep->com, MORIBUND);
1141 __state_set(&ep->com, ABORTING);
1146 if ((ep->com.cm_id) && (ep->com.qp)) {
1147 attrs.next_state = IWCH_QP_STATE_IDLE;
1148 iwch_modify_qp(ep->com.qp->rhp,
1150 IWCH_QP_ATTR_NEXT_STATE,
1153 close_socket(&ep->com);
1154 close_complete_upcall(ep);
1155 __state_set(&ep->com, DEAD);
1162 panic("unknown state: %d\n", ep->com.state);
1164 mtx_unlock(&ep->com.lock);
1166 log(LOG_ERR, "%s BAD CLOSE - Aborting tid %u\n",
1167 __FUNCTION__, ep->hwtid);
1168 attrs.next_state = IWCH_QP_STATE_ERROR;
1169 iwch_modify_qp(ep->com.qp->rhp,
1170 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1175 return CPL_RET_BUF_DONE;
1179 ep_timeout(void *arg)
1181 struct iwch_ep *ep = (struct iwch_ep *)arg;
1182 struct iwch_qp_attributes attrs;
1185 mtx_lock(&ep->com.lock);
1186 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1187 switch (ep->com.state) {
1189 connect_reply_upcall(ep, -ETIMEDOUT);
1195 if (ep->com.cm_id && ep->com.qp)
1199 panic("unknown state: %d\n", ep->com.state);
1201 __state_set(&ep->com, ABORTING);
1202 mtx_unlock(&ep->com.lock);
1204 attrs.next_state = IWCH_QP_STATE_ERROR;
1205 iwch_modify_qp(ep->com.qp->rhp,
1206 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1209 abort_connection(ep);
1214 iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
1217 struct iwch_ep *ep = to_ep(cm_id);
1218 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1220 if (state_read(&ep->com) == DEAD) {
1222 return (-ECONNRESET);
1224 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
1226 abort_connection(ep);
1228 err = send_mpa_reject(ep, pdata, pdata_len);
1229 err = soshutdown(ep->com.so, 3);
1235 iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1238 struct iwch_qp_attributes attrs;
1239 enum iwch_qp_attr_mask mask;
1240 struct iwch_ep *ep = to_ep(cm_id);
1241 struct iwch_dev *h = to_iwch_dev(cm_id->device);
1242 struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
1244 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1245 if (state_read(&ep->com) == DEAD)
1246 return (-ECONNRESET);
1248 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
1251 if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
1252 (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
1253 abort_connection(ep);
1257 cm_id->add_ref(cm_id);
1258 ep->com.cm_id = cm_id;
1261 ep->com.rpl_err = 0;
1262 ep->com.rpl_done = 0;
1263 ep->ird = conn_param->ird;
1264 ep->ord = conn_param->ord;
1265 CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord);
1268 /* bind QP to EP and move to RTS */
1269 attrs.mpa_attr = ep->mpa_attr;
1270 attrs.max_ird = ep->ord;
1271 attrs.max_ord = ep->ord;
1272 attrs.llp_stream_handle = ep;
1273 attrs.next_state = IWCH_QP_STATE_RTS;
1275 /* bind QP and TID with INIT_WR */
1276 mask = IWCH_QP_ATTR_NEXT_STATE |
1277 IWCH_QP_ATTR_LLP_STREAM_HANDLE |
1278 IWCH_QP_ATTR_MPA_ATTR |
1279 IWCH_QP_ATTR_MAX_IRD |
1280 IWCH_QP_ATTR_MAX_ORD;
1282 err = iwch_modify_qp(ep->com.qp->rhp,
1283 ep->com.qp, mask, &attrs, 1);
1288 err = send_mpa_reply(ep, conn_param->private_data,
1289 conn_param->private_data_len);
1292 state_set(&ep->com, FPDU_MODE);
1293 established_upcall(ep);
1297 ep->com.cm_id = NULL;
1299 cm_id->rem_ref(cm_id);
1304 static int init_sock(struct iwch_ep_common *epc)
1307 struct sockopt sopt;
1311 soupcall_set(epc->so, SO_RCV, iwch_so_upcall, epc);
1312 epc->so->so_state |= SS_NBIO;
1313 SOCK_UNLOCK(epc->so);
1314 sopt.sopt_dir = SOPT_SET;
1315 sopt.sopt_level = SOL_SOCKET;
1316 sopt.sopt_name = SO_NO_DDP;
1317 sopt.sopt_val = (caddr_t)&on;
1318 sopt.sopt_valsize = sizeof on;
1319 sopt.sopt_td = NULL;
1320 err = sosetopt(epc->so, &sopt);
1322 printf("%s can't set SO_NO_DDP err %d\n", __FUNCTION__, err);
1323 sopt.sopt_dir = SOPT_SET;
1324 sopt.sopt_level = IPPROTO_TCP;
1325 sopt.sopt_name = TCP_NODELAY;
1326 sopt.sopt_val = (caddr_t)&on;
1327 sopt.sopt_valsize = sizeof on;
1328 sopt.sopt_td = NULL;
1329 err = sosetopt(epc->so, &sopt);
1331 printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err);
1337 is_loopback_dst(struct iw_cm_id *cm_id)
1339 uint16_t port = cm_id->remote_addr.sin_port;
1342 cm_id->remote_addr.sin_port = 0;
1343 ifa_present = ifa_ifwithaddr_check(
1344 (struct sockaddr *)&cm_id->remote_addr);
1345 cm_id->remote_addr.sin_port = port;
1346 return (ifa_present);
1350 iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1353 struct iwch_dev *h = to_iwch_dev(cm_id->device);
1356 struct toedev *tdev;
1358 if (is_loopback_dst(cm_id)) {
1363 ep = alloc_ep(sizeof(*ep), M_NOWAIT);
1365 printf("%s - cannot alloc ep.\n", __FUNCTION__);
1369 callout_init(&ep->timer, TRUE);
1370 ep->plen = conn_param->private_data_len;
1372 memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
1373 conn_param->private_data, ep->plen);
1374 ep->ird = conn_param->ird;
1375 ep->ord = conn_param->ord;
1377 cm_id->add_ref(cm_id);
1378 ep->com.cm_id = cm_id;
1379 ep->com.qp = get_qhp(h, conn_param->qpn);
1380 ep->com.thread = curthread;
1381 PANIC_IF(!ep->com.qp);
1382 CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn,
1385 ep->com.so = cm_id->so;
1386 err = init_sock(&ep->com);
1391 rt = find_route(cm_id->local_addr.sin_addr.s_addr,
1392 cm_id->remote_addr.sin_addr.s_addr,
1393 cm_id->local_addr.sin_port,
1394 cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
1396 printf("%s - cannot find route.\n", __FUNCTION__);
1401 if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) {
1402 printf("%s - interface not TOE capable.\n", __FUNCTION__);
1405 tdev = TOEDEV(rt->rt_ifp);
1407 printf("%s - No toedev for interface.\n", __FUNCTION__);
1410 if (!tdev->tod_can_offload(tdev, ep->com.so)) {
1411 printf("%s - interface cannot offload!.\n", __FUNCTION__);
1416 state_set(&ep->com, CONNECTING);
1417 ep->com.local_addr = cm_id->local_addr;
1418 ep->com.remote_addr = cm_id->remote_addr;
1419 err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
1432 iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
1435 struct iwch_listen_ep *ep;
1437 ep = alloc_ep(sizeof(*ep), M_NOWAIT);
1439 printf("%s - cannot alloc ep.\n", __FUNCTION__);
1443 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
1444 cm_id->add_ref(cm_id);
1445 ep->com.cm_id = cm_id;
1446 ep->backlog = backlog;
1447 ep->com.local_addr = cm_id->local_addr;
1448 ep->com.thread = curthread;
1449 state_set(&ep->com, LISTEN);
1451 ep->com.so = cm_id->so;
1452 err = init_sock(&ep->com);
1456 err = solisten(ep->com.so, ep->backlog, ep->com.thread);
1458 cm_id->provider_data = ep;
1461 close_socket(&ep->com);
1463 cm_id->rem_ref(cm_id);
1470 iwch_destroy_listen(struct iw_cm_id *cm_id)
1472 struct iwch_listen_ep *ep = to_listen_ep(cm_id);
1474 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
1476 state_set(&ep->com, DEAD);
1477 close_socket(&ep->com);
1478 cm_id->rem_ref(cm_id);
1484 iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags)
1488 mtx_lock(&ep->com.lock);
1491 PANIC_IF(!ep->com.so);
1493 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep,
1494 ep->com.so, states[ep->com.state], abrupt);
1496 if (ep->com.state == DEAD) {
1497 CTR2(KTR_IW_CXGB, "%s already dead ep %p", __FUNCTION__, ep);
1502 if (ep->com.state != ABORTING) {
1503 ep->com.state = ABORTING;
1509 switch (ep->com.state) {
1516 ep->com.state = CLOSING;
1520 ep->com.state = MORIBUND;
1527 panic("unknown state: %d\n", ep->com.state);
1531 mtx_unlock(&ep->com.lock);
1534 abort_connection(ep);
1536 shutdown_socket(&ep->com);
1542 process_data(struct iwch_ep *ep)
1544 struct sockaddr_in *local, *remote;
1546 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1548 switch (state_read(&ep->com)) {
1550 process_mpa_reply(ep);
1556 * Set local and remote addrs here because when we
1557 * dequeue the newly accepted socket, they aren't set
1560 in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
1561 in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
1562 CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__,
1563 inet_ntoa(local->sin_addr),
1564 inet_ntoa(remote->sin_addr));
1565 ep->com.local_addr = *local;
1566 ep->com.remote_addr = *remote;
1567 free(local, M_SONAME);
1568 free(remote, M_SONAME);
1569 process_mpa_request(ep);
1572 if (ep->com.so->so_rcv.sb_cc)
1573 printf("%s Unexpected streaming data."
1574 " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n",
1575 __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state,
1576 ep->com.so->so_rcv.sb_cc, ep->com.so->so_rcv.sb_mb);
1583 process_connected(struct iwch_ep *ep)
1585 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1586 if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) {
1589 connect_reply_upcall(ep, -ep->com.so->so_error);
1590 close_socket(&ep->com);
1591 state_set(&ep->com, DEAD);
1596 static struct socket *
1597 dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep)
1602 so = TAILQ_FIRST(&head->so_comp);
1607 TAILQ_REMOVE(&head->so_comp, so, so_list);
1610 so->so_qstate &= ~SQ_COMP;
1613 soupcall_set(so, SO_RCV, iwch_so_upcall, child_ep);
1614 so->so_state |= SS_NBIO;
1615 PANIC_IF(!(so->so_state & SS_ISCONNECTED));
1616 PANIC_IF(so->so_error);
1619 soaccept(so, (struct sockaddr **)remote);
1624 process_newconn(struct iwch_ep *parent_ep)
1626 struct socket *child_so;
1627 struct iwch_ep *child_ep;
1628 struct sockaddr_in *remote;
1630 CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so);
1631 child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
1633 log(LOG_ERR, "%s - failed to allocate ep entry!\n",
1637 child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
1639 log(LOG_ERR, "%s - failed to dequeue child socket!\n",
1641 __free_ep(&child_ep->com);
1644 CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__,
1645 inet_ntoa(remote->sin_addr), ntohs(remote->sin_port));
1646 child_ep->com.so = child_so;
1647 child_ep->com.cm_id = NULL;
1648 child_ep->com.thread = parent_ep->com.thread;
1649 child_ep->parent_ep = parent_ep;
1650 free(remote, M_SONAME);
1651 get_ep(&parent_ep->com);
1652 child_ep->parent_ep = parent_ep;
1653 callout_init(&child_ep->timer, TRUE);
1654 state_set(&child_ep->com, MPA_REQ_WAIT);
1655 start_ep_timer(child_ep);
1657 /* maybe the request has already been queued up on the socket... */
1658 process_mpa_request(child_ep);
1662 iwch_so_upcall(struct socket *so, void *arg, int waitflag)
1664 struct iwch_ep *ep = arg;
1666 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
1667 mtx_lock(&req_lock);
1668 if (ep && ep->com.so && !ep->com.entry.tqe_prev) {
1670 TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
1671 taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task);
1673 mtx_unlock(&req_lock);
1678 process_socket_event(struct iwch_ep *ep)
1680 int state = state_read(&ep->com);
1681 struct socket *so = ep->com.so;
1683 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
1684 if (state == CONNECTING) {
1685 process_connected(ep);
1689 if (state == LISTEN) {
1690 process_newconn(ep);
1694 /* connection error */
1696 process_conn_error(ep);
1701 if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) {
1702 process_peer_close(ep);
1706 /* close complete */
1707 if (so->so_state & (SS_ISDISCONNECTED)) {
1708 process_close_complete(ep);
1718 process_req(void *ctx, int pending)
1720 struct iwch_ep_common *epc;
1722 CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__);
1723 mtx_lock(&req_lock);
1724 while (!TAILQ_EMPTY(&req_list)) {
1725 epc = TAILQ_FIRST(&req_list);
1726 TAILQ_REMOVE(&req_list, epc, entry);
1727 epc->entry.tqe_prev = NULL;
1728 mtx_unlock(&req_lock);
1730 process_socket_event((struct iwch_ep *)epc);
1732 mtx_lock(&req_lock);
1734 mtx_unlock(&req_lock);
1740 TAILQ_INIT(&req_list);
1741 mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF);
1742 iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT,
1743 taskqueue_thread_enqueue, &iw_cxgb_taskq);
1744 if (iw_cxgb_taskq == NULL) {
1745 printf("failed to allocate iw_cxgb taskqueue\n");
1748 taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq");
1749 TASK_INIT(&iw_cxgb_task, 0, process_req, NULL);
1750 t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, terminate);
1751 t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, ec_status);
1758 t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, NULL);
1759 t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, NULL);
1760 taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task);
1761 taskqueue_free(iw_cxgb_taskq);