2 * Copyright (c) 2012 Chelsio Communications, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/kernel.h>
37 #include <sys/limits.h>
41 #include <sys/mutex.h>
42 #include <sys/sockstate.h>
43 #include <sys/sockopt.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/sockbuf.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/protosw.h>
51 #include <sys/sglist.h>
52 #include <sys/taskqueue.h>
55 #include <net/ethernet.h>
56 #include <net/route.h>
58 #include <netinet/in.h>
59 #include <netinet/in_pcb.h>
60 #include <netinet/in_systm.h>
61 #include <netinet/in_var.h>
63 #include <netinet/ip.h>
64 #include <netinet/tcp_var.h>
66 #include <netinet/tcp_fsm.h>
67 #include <netinet/toecore.h>
68 #include <netinet/tcp_seq.h>
69 #include <netinet/tcp_timer.h>
70 #include <net/route.h>
72 #include "cxgb_include.h"
73 #include "ulp/tom/cxgb_l2t.h"
74 #include "ulp/tom/cxgb_tom.h"
75 #include "ulp/tom/cxgb_toepcb.h"
77 VNET_DECLARE(int, tcp_do_autosndbuf);
78 #define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)
79 VNET_DECLARE(int, tcp_autosndbuf_inc);
80 #define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc)
81 VNET_DECLARE(int, tcp_autosndbuf_max);
82 #define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max)
83 VNET_DECLARE(int, tcp_do_autorcvbuf);
84 #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
85 VNET_DECLARE(int, tcp_autorcvbuf_inc);
86 #define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc)
87 VNET_DECLARE(int, tcp_autorcvbuf_max);
88 #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
89 extern int always_keepalive;
92 * For ULP connections HW may add headers, e.g., for digests, that aren't part
93 * of the messages sent by the host but that are part of the TCP payload and
94 * therefore consume TCP sequence space. Tx connection parameters that
95 * operate in TCP sequence space are affected by the HW additions and need to
96 * compensate for them to accurately track TCP sequence numbers. This array
97 * contains the compensating extra lengths for ULP packets. It is indexed by
98 * a packet's ULP submode.
100 const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8};
103 * Max receive window supported by HW in bytes. Only a small part of it can
104 * be set through option0, the rest needs to be set through RX_DATA_ACK.
106 #define MAX_RCV_WND ((1U << 27) - 1)
109 * Min receive window. We want it to be large enough to accommodate receive
110 * coalescing, handle jumbo frames, and not trigger sender SWS avoidance.
112 #define MIN_RCV_WND (24 * 1024U)
113 #define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS)
115 static void t3_release_offload_resources(struct toepcb *);
116 static void send_reset(struct toepcb *toep);
119 * Called after the last CPL for the toepcb has been received.
121 * The inp must be wlocked on entry and is unlocked (or maybe destroyed) by the
122 * time this function exits.
125 toepcb_release(struct toepcb *toep)
127 struct inpcb *inp = toep->tp_inp;
128 struct toedev *tod = toep->tp_tod;
129 struct tom_data *td = t3_tomdata(tod);
132 INP_WLOCK_ASSERT(inp);
133 KASSERT(!(toep->tp_flags & TP_CPL_DONE),
134 ("%s: double release?", __func__));
136 CTR2(KTR_CXGB, "%s: tid %d", __func__, toep->tp_tid);
138 toep->tp_flags |= TP_CPL_DONE;
141 mtx_lock(&td->toep_list_lock);
142 TAILQ_REMOVE(&td->toep_list, toep, link);
143 mtx_unlock(&td->toep_list_lock);
145 if (!(toep->tp_flags & TP_ATTACHED))
146 t3_release_offload_resources(toep);
148 rc = in_pcbrele_wlocked(inp);
155 * One sided detach. The tcpcb is going away and we need to unhook the toepcb
156 * hanging off it. If the TOE driver is also done with the toepcb we'll release
157 * all offload resources.
160 toepcb_detach(struct inpcb *inp)
165 KASSERT(inp, ("%s: inp is NULL", __func__));
166 INP_WLOCK_ASSERT(inp);
171 KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
172 KASSERT(toep->tp_flags & TP_ATTACHED, ("%s: not attached", __func__));
174 CTR6(KTR_CXGB, "%s: %s %u, toep %p, inp %p, tp %p", __func__,
175 tp->t_state == TCPS_SYN_SENT ? "atid" : "tid", toep->tp_tid,
179 tp->t_flags &= ~TF_TOE;
180 toep->tp_flags &= ~TP_ATTACHED;
182 if (toep->tp_flags & TP_CPL_DONE)
183 t3_release_offload_resources(toep);
187 t3_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp)
190 toepcb_detach(tp->t_inpcb);
194 alloc_atid(struct tid_info *t, void *ctx)
198 mtx_lock(&t->atid_lock);
200 union active_open_entry *p = t->afree;
202 atid = (p - t->atid_tab) + t->atid_base;
207 mtx_unlock(&t->atid_lock);
213 free_atid(struct tid_info *t, int atid)
215 union active_open_entry *p = atid2entry(t, atid);
217 mtx_lock(&t->atid_lock);
221 mtx_unlock(&t->atid_lock);
225 insert_tid(struct tom_data *td, void *ctx, unsigned int tid)
227 struct tid_info *t = &td->tid_maps;
229 t->tid_tab[tid] = ctx;
230 atomic_add_int(&t->tids_in_use, 1);
234 update_tid(struct tom_data *td, void *ctx, unsigned int tid)
236 struct tid_info *t = &td->tid_maps;
238 t->tid_tab[tid] = ctx;
242 remove_tid(struct tom_data *td, unsigned int tid)
244 struct tid_info *t = &td->tid_maps;
246 t->tid_tab[tid] = NULL;
247 atomic_add_int(&t->tids_in_use, -1);
250 /* use ctx as a next pointer in the tid release list */
252 queue_tid_release(struct toedev *tod, unsigned int tid)
254 struct tom_data *td = t3_tomdata(tod);
255 void **p = &td->tid_maps.tid_tab[tid];
256 struct adapter *sc = tod->tod_softc;
258 mtx_lock(&td->tid_release_lock);
259 *p = td->tid_release_list;
260 td->tid_release_list = p;
262 taskqueue_enqueue(sc->tq, &td->tid_release_task);
263 mtx_unlock(&td->tid_release_lock);
267 * Populate a TID_RELEASE WR.
270 mk_tid_release(struct cpl_tid_release *cpl, unsigned int tid)
273 cpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
274 OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
278 release_tid(struct toedev *tod, unsigned int tid, int qset)
280 struct tom_data *td = t3_tomdata(tod);
281 struct adapter *sc = tod->tod_softc;
283 struct cpl_tid_release *cpl;
285 struct tid_info *t = &td->tid_maps;
288 KASSERT(tid >= 0 && tid < t->ntids,
289 ("%s: tid=%d, ntids=%d", __func__, tid, t->ntids));
291 m = M_GETHDR_OFLD(qset, CPL_PRIORITY_CONTROL, cpl);
293 mk_tid_release(cpl, tid);
294 t3_offload_tx(sc, m);
297 queue_tid_release(tod, tid);
302 t3_process_tid_release_list(void *data, int pending)
305 struct tom_data *td = data;
306 struct adapter *sc = td->tod.tod_softc;
308 mtx_lock(&td->tid_release_lock);
309 while (td->tid_release_list) {
310 void **p = td->tid_release_list;
311 unsigned int tid = p - td->tid_maps.tid_tab;
312 struct cpl_tid_release *cpl;
314 td->tid_release_list = (void **)*p;
315 m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, cpl); /* qs 0 here */
317 break; /* XXX: who reschedules the release task? */
318 mtx_unlock(&td->tid_release_lock);
319 mk_tid_release(cpl, tid);
320 t3_offload_tx(sc, m);
322 mtx_lock(&td->tid_release_lock);
324 mtx_unlock(&td->tid_release_lock);
328 close_conn(struct adapter *sc, struct toepcb *toep)
331 struct cpl_close_con_req *req;
333 if (toep->tp_flags & TP_FIN_SENT)
336 m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_DATA, req);
338 CXGB_UNIMPLEMENTED();
340 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
341 req->wr.wrh_lo = htonl(V_WR_TID(toep->tp_tid));
342 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, toep->tp_tid));
345 toep->tp_flags |= TP_FIN_SENT;
346 t3_offload_tx(sc, m);
350 make_tx_data_wr(struct socket *so, struct tx_data_wr *req, int len,
353 struct tcpcb *tp = so_sototcpcb(so);
354 struct toepcb *toep = tp->t_toe;
357 inp_lock_assert(tp->t_inpcb);
358 snd = so_sockbuf_snd(so);
360 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
361 req->wr.wrh_lo = htonl(V_WR_TID(toep->tp_tid));
362 /* len includes the length of any HW ULP additions */
363 req->len = htonl(len);
364 req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx));
365 /* V_TX_ULP_SUBMODE sets both the mode and submode */
366 req->flags = htonl(V_TX_ULP_SUBMODE(toep->tp_ulp_mode) | V_TX_URG(0) |
367 V_TX_SHOVE(!(tp->t_flags & TF_MORETOCOME) && (tail ? 0 : 1)));
368 req->sndseq = htonl(tp->snd_nxt);
369 if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) {
370 struct adapter *sc = toep->tp_tod->tod_softc;
371 int cpu_idx = sc->rrss_map[toep->tp_qset];
373 req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
374 V_TX_CPU_IDX(cpu_idx));
376 /* Sendbuffer is in units of 32KB. */
377 if (V_tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE)
378 req->param |= htonl(V_TX_SNDBUF(VNET(tcp_autosndbuf_max) >> 15));
380 req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15));
382 toep->tp_flags |= TP_DATASENT;
387 * TOM_XXX_DUPLICATION sgl_len, calc_tx_descs, calc_tx_descs_ofld, mbuf_wrs, etc.
388 * TOM_XXX_MOVE to some common header file.
391 * IMM_LEN: # of bytes that can be tx'd as immediate data. There are 16 flits
392 * in a tx desc; subtract 3 for tx_data_wr (including the WR header), and 1 more
393 * for the second gen bit flit. This leaves us with 12 flits.
395 * descs_to_sgllen: # of SGL entries that can fit into the given # of tx descs.
396 * The first desc has a tx_data_wr (which includes the WR header), the rest have
397 * the WR header only. All descs have the second gen bit flit.
399 * sgllen_to_descs: # of tx descs used up by an sgl of given length. The first
400 * desc has a tx_data_wr (which includes the WR header), the rest have the WR
401 * header only. All descs have the second gen bit flit.
403 * flits_to_sgllen: # of SGL entries that can be fit in the given # of flits.
407 static int descs_to_sgllen[TX_MAX_DESC + 1] = {0, 8, 17, 26, 35};
408 static int sgllen_to_descs[TX_MAX_SEGS] = {
409 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, /* 0 - 9 */
410 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, /* 10 - 19 */
411 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, /* 20 - 29 */
412 4, 4, 4, 4, 4, 4 /* 30 - 35 */
415 static int flits_to_sgllen[TX_DESC_FLITS + 1] = {
416 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10
419 #if SGE_NUM_GENBITS != 2
420 #error "SGE_NUM_GENBITS really must be 2"
424 t3_push_frames(struct socket *so, int req_completion)
426 struct tcpcb *tp = so_sototcpcb(so);
427 struct toepcb *toep = tp->t_toe;
428 struct mbuf *m0, *sndptr, *m;
429 struct toedev *tod = toep->tp_tod;
430 struct adapter *sc = tod->tod_softc;
431 int bytes, ndesc, total_bytes = 0, mlen;
436 struct tx_data_wr *wr;
438 inp_lock_assert(tp->t_inpcb);
440 snd = so_sockbuf_snd(so);
444 * Autosize the send buffer.
446 if (snd->sb_flags & SB_AUTOSIZE && VNET(tcp_do_autosndbuf)) {
447 if (snd->sb_cc >= (snd->sb_hiwat / 8 * 7) &&
448 snd->sb_cc < VNET(tcp_autosndbuf_max)) {
449 if (!sbreserve_locked(snd, min(snd->sb_hiwat +
450 VNET(tcp_autosndbuf_inc), VNET(tcp_autosndbuf_max)),
452 snd->sb_flags &= ~SB_AUTOSIZE;
456 if (toep->tp_m_last && toep->tp_m_last == snd->sb_sndptr)
457 sndptr = toep->tp_m_last->m_next;
459 sndptr = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb;
461 /* Nothing to send or no WRs available for sending data */
462 if (toep->tp_wr_avail == 0 || sndptr == NULL)
465 /* Something to send and at least 1 WR available */
466 while (toep->tp_wr_avail && sndptr != NULL) {
468 m0 = m_gethdr(M_NOWAIT, MT_DATA);
471 oh = mtod(m0, struct ofld_hdr *);
472 wr = (void *)(oh + 1);
473 dst = (void *)(wr + 1);
475 m0->m_pkthdr.len = m0->m_len = sizeof(*oh) + sizeof(*wr);
476 oh->flags = V_HDR_CTRL(CPL_PRIORITY_DATA) | F_HDR_DF |
477 V_HDR_QSET(toep->tp_qset);
480 * Try to construct an immediate data WR if possible. Stuff as
481 * much data into it as possible, one whole mbuf at a time.
483 mlen = sndptr->m_len;
485 while (mlen <= IMM_LEN - bytes) {
486 bcopy(sndptr->m_data, dst, mlen);
490 if (!(sndptr = sndptr->m_next))
492 mlen = sndptr->m_len;
497 /* Was able to fit 'bytes' bytes in an immediate WR */
500 make_tx_data_wr(so, wr, bytes, sndptr);
503 m0->m_pkthdr.len = m0->m_len;
506 int wr_avail = min(toep->tp_wr_avail, TX_MAX_DESC);
508 /* Need to make an SGL */
510 sgl = sglist_alloc(descs_to_sgllen[wr_avail], M_NOWAIT);
514 for (m = sndptr; m != NULL; m = m->m_next) {
515 if ((mlen = m->m_len) > 0) {
516 if (sglist_append(sgl, m->m_data, mlen))
526 ndesc = sgllen_to_descs[sgl->sg_nseg];
527 oh->flags |= F_HDR_SGL;
529 make_tx_data_wr(so, wr, bytes, sndptr);
532 oh->flags |= V_HDR_NDESC(ndesc);
535 snd->sb_sndptr = sndptr;
536 snd->sb_sndptroff += bytes;
537 if (sndptr == NULL) {
538 snd->sb_sndptr = snd->sb_mbtail;
539 snd->sb_sndptroff -= snd->sb_mbtail->m_len;
540 toep->tp_m_last = snd->sb_mbtail;
542 toep->tp_m_last = NULL;
544 total_bytes += bytes;
546 toep->tp_wr_avail -= ndesc;
547 toep->tp_wr_unacked += ndesc;
549 if ((req_completion && toep->tp_wr_unacked == ndesc) ||
550 toep->tp_wr_unacked >= toep->tp_wr_max / 2) {
551 wr->wr.wrh_hi |= htonl(F_WR_COMPL);
552 toep->tp_wr_unacked = 0;
555 enqueue_wr(toep, m0);
556 l2t_send(sc, m0, toep->tp_l2t);
561 if (sndptr == NULL && (toep->tp_flags & TP_SEND_FIN))
562 close_conn(sc, toep);
564 return (total_bytes);
568 send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
571 struct cpl_rx_data_ack *req;
572 uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
574 m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_CONTROL, req);
578 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
580 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
581 req->credit_dack = htonl(dack | V_RX_CREDITS(credits));
582 t3_offload_tx(sc, m);
587 t3_rcvd(struct toedev *tod, struct tcpcb *tp)
589 struct adapter *sc = tod->tod_softc;
590 struct inpcb *inp = tp->t_inpcb;
591 struct socket *so = inp->inp_socket;
592 struct sockbuf *so_rcv = &so->so_rcv;
593 struct toepcb *toep = tp->t_toe;
596 INP_WLOCK_ASSERT(inp);
598 SOCKBUF_LOCK(so_rcv);
599 KASSERT(toep->tp_enqueued >= so_rcv->sb_cc,
600 ("%s: so_rcv->sb_cc > enqueued", __func__));
601 toep->tp_rx_credits += toep->tp_enqueued - so_rcv->sb_cc;
602 toep->tp_enqueued = so_rcv->sb_cc;
603 SOCKBUF_UNLOCK(so_rcv);
605 must_send = toep->tp_rx_credits + 16384 >= tp->rcv_wnd;
606 if (must_send || toep->tp_rx_credits >= 15 * 1024) {
609 credits = send_rx_credits(sc, toep, toep->tp_rx_credits);
610 toep->tp_rx_credits -= credits;
611 tp->rcv_wnd += credits;
612 tp->rcv_adv += credits;
617 do_rx_urg_notify(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
619 struct adapter *sc = qs->adap;
620 struct tom_data *td = sc->tom_softc;
621 struct cpl_rx_urg_notify *hdr = mtod(m, void *);
622 unsigned int tid = GET_TID(hdr);
623 struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
625 log(LOG_ERR, "%s: tid %u inp %p", __func__, tid, toep->tp_inp);
632 t3_send_fin(struct toedev *tod, struct tcpcb *tp)
634 struct toepcb *toep = tp->t_toe;
635 struct inpcb *inp = tp->t_inpcb;
636 struct socket *so = inp_inpcbtosocket(inp);
638 unsigned int tid = toep->tp_tid;
641 INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
642 INP_WLOCK_ASSERT(inp);
644 CTR4(KTR_CXGB, "%s: tid %d, toep %p, flags %x", __func__, tid, toep,
647 toep->tp_flags |= TP_SEND_FIN;
648 t3_push_frames(so, 1);
654 t3_tod_output(struct toedev *tod, struct tcpcb *tp)
656 struct inpcb *inp = tp->t_inpcb;
657 struct socket *so = inp->inp_socket;
659 t3_push_frames(so, 1);
663 /* What mtu_idx to use, given a 4-tuple and/or an MSS cap */
665 find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss)
667 unsigned short *mtus = &sc->params.mtus[0];
670 KASSERT(inc != NULL || pmss > 0,
671 ("%s: at least one of inc/pmss must be specified", __func__));
673 mss = inc ? tcp_mssopt(inc) : pmss;
674 if (pmss > 0 && mss > pmss)
677 while (i < NMTUS - 1 && mtus[i + 1] <= mss + 40)
684 purge_wr_queue(struct toepcb *toep)
689 while ((m = mbufq_dequeue(&toep->wr_list)) != NULL) {
690 oh = mtod(m, struct ofld_hdr *);
691 if (oh->flags & F_HDR_SGL)
692 sglist_free(oh->sgl);
698 * Release cxgb(4) and T3 resources held by an offload connection (TID, L2T
702 t3_release_offload_resources(struct toepcb *toep)
704 struct toedev *tod = toep->tp_tod;
705 struct tom_data *td = t3_tomdata(tod);
708 * The TOM explicitly detaches its toepcb from the system's inp before
709 * it releases the offload resources.
712 panic("%s: inp %p still attached to toepcb %p",
713 __func__, toep->tp_inp, toep);
716 if (toep->tp_wr_avail != toep->tp_wr_max)
717 purge_wr_queue(toep);
720 l2t_release(td->l2t, toep->tp_l2t);
724 if (toep->tp_tid >= 0)
725 release_tid(tod, toep->tp_tid, toep->tp_qset);
731 * Determine the receive window size for a socket.
734 select_rcv_wnd(struct socket *so)
738 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
740 wnd = sbspace(&so->so_rcv);
741 if (wnd < MIN_RCV_WND)
744 return min(wnd, MAX_RCV_WND);
748 select_rcv_wscale(void)
751 unsigned long space = sb_max;
753 if (space > MAX_RCV_WND)
756 while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space)
764 * Set up the socket for TCP offload.
767 offload_socket(struct socket *so, struct toepcb *toep)
769 struct toedev *tod = toep->tp_tod;
770 struct tom_data *td = t3_tomdata(tod);
771 struct inpcb *inp = sotoinpcb(so);
772 struct tcpcb *tp = intotcpcb(inp);
774 INP_WLOCK_ASSERT(inp);
777 SOCKBUF_LOCK(&so->so_snd);
778 so_sockbuf_snd(so)->sb_flags |= SB_NOCOALESCE;
779 SOCKBUF_UNLOCK(&so->so_snd);
780 SOCKBUF_LOCK(&so->so_rcv);
781 so_sockbuf_rcv(so)->sb_flags |= SB_NOCOALESCE;
782 SOCKBUF_UNLOCK(&so->so_rcv);
785 tp->tod = toep->tp_tod;
787 tp->t_flags |= TF_TOE;
789 /* Install an extra hold on inp */
791 toep->tp_flags |= TP_ATTACHED;
794 /* Add the TOE PCB to the active list */
795 mtx_lock(&td->toep_list_lock);
796 TAILQ_INSERT_HEAD(&td->toep_list, toep, link);
797 mtx_unlock(&td->toep_list_lock);
800 /* This is _not_ the normal way to "unoffload" a socket. */
802 undo_offload_socket(struct socket *so)
804 struct inpcb *inp = sotoinpcb(so);
805 struct tcpcb *tp = intotcpcb(inp);
806 struct toepcb *toep = tp->t_toe;
807 struct toedev *tod = toep->tp_tod;
808 struct tom_data *td = t3_tomdata(tod);
810 INP_WLOCK_ASSERT(inp);
812 so_sockbuf_snd(so)->sb_flags &= ~SB_NOCOALESCE;
813 so_sockbuf_rcv(so)->sb_flags &= ~SB_NOCOALESCE;
817 tp->t_flags &= ~TF_TOE;
820 toep->tp_flags &= ~TP_ATTACHED;
821 if (in_pcbrele_wlocked(inp))
822 panic("%s: inp freed.", __func__);
824 mtx_lock(&td->toep_list_lock);
825 TAILQ_REMOVE(&td->toep_list, toep, link);
826 mtx_unlock(&td->toep_list_lock);
830 * Socket could be a listening socket, and we may not have a toepcb at all at
834 calc_opt0h(struct socket *so, int mtu_idx, int rscale, struct l2t_entry *e)
836 uint32_t opt0h = F_TCAM_BYPASS | V_WND_SCALE(rscale) |
840 struct inpcb *inp = sotoinpcb(so);
841 struct tcpcb *tp = intotcpcb(inp);
842 int keepalive = always_keepalive ||
843 so_options_get(so) & SO_KEEPALIVE;
845 opt0h |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0);
846 opt0h |= V_KEEP_ALIVE(keepalive != 0);
850 opt0h |= V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx);
852 return (htobe32(opt0h));
856 calc_opt0l(struct socket *so, int rcv_bufsize)
858 uint32_t opt0l = V_ULP_MODE(ULP_MODE_NONE) | V_RCV_BUFSIZ(rcv_bufsize);
860 KASSERT(rcv_bufsize <= M_RCV_BUFSIZ,
861 ("%s: rcv_bufsize (%d) is too high", __func__, rcv_bufsize));
863 if (so != NULL) /* optional because noone cares about IP TOS */
864 opt0l |= V_TOS(INP_TOS(sotoinpcb(so)));
866 return (htobe32(opt0l));
870 * Convert an ACT_OPEN_RPL status to an errno.
873 act_open_rpl_status_to_errno(int status)
876 case CPL_ERR_CONN_RESET:
877 return (ECONNREFUSED);
878 case CPL_ERR_ARP_MISS:
879 return (EHOSTUNREACH);
880 case CPL_ERR_CONN_TIMEDOUT:
882 case CPL_ERR_TCAM_FULL:
884 case CPL_ERR_CONN_EXIST:
885 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n");
893 * Return whether a failed active open has allocated a TID
896 act_open_has_tid(int status)
898 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
899 status != CPL_ERR_ARP_MISS;
903 * Active open failed.
906 do_act_open_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
908 struct adapter *sc = qs->adap;
909 struct tom_data *td = sc->tom_softc;
910 struct toedev *tod = &td->tod;
911 struct cpl_act_open_rpl *rpl = mtod(m, void *);
912 unsigned int atid = G_TID(ntohl(rpl->atid));
913 struct toepcb *toep = lookup_atid(&td->tid_maps, atid);
914 struct inpcb *inp = toep->tp_inp;
915 int s = rpl->status, rc;
917 CTR3(KTR_CXGB, "%s: atid %u, status %u ", __func__, atid, s);
919 free_atid(&td->tid_maps, atid);
922 if (act_open_has_tid(s))
923 queue_tid_release(tod, GET_TID(rpl));
925 rc = act_open_rpl_status_to_errno(s);
927 INP_INFO_WLOCK(&V_tcbinfo);
929 toe_connect_failed(tod, inp, rc);
930 toepcb_release(toep); /* unlocks inp */
932 INP_INFO_WUNLOCK(&V_tcbinfo);
939 * Send an active open request.
941 * State of affairs on entry:
942 * soisconnecting (so_state |= SS_ISCONNECTING)
943 * tcbinfo not locked (this has changed - used to be WLOCKed)
945 * tp->t_state = TCPS_SYN_SENT
946 * rtalloc1, RT_UNLOCK on rt.
949 t3_connect(struct toedev *tod, struct socket *so,
950 struct rtentry *rt, struct sockaddr *nam)
952 struct mbuf *m = NULL;
953 struct l2t_entry *e = NULL;
954 struct tom_data *td = t3_tomdata(tod);
955 struct adapter *sc = tod->tod_softc;
956 struct cpl_act_open_req *cpl;
957 struct inpcb *inp = sotoinpcb(so);
958 struct tcpcb *tp = intotcpcb(inp);
960 int atid = -1, mtu_idx, rscale, cpu_idx, qset;
962 struct ifnet *ifp = rt->rt_ifp;
963 struct port_info *pi = ifp->if_softc; /* XXX wrong for VLAN etc. */
965 INP_WLOCK_ASSERT(inp);
967 toep = toepcb_alloc(tod);
971 atid = alloc_atid(&td->tid_maps, toep);
975 qset = pi->first_qset + (arc4random() % pi->nqsets);
977 m = M_GETHDR_OFLD(qset, CPL_PRIORITY_CONTROL, cpl);
981 gw = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam;
982 e = t3_l2t_get(pi, ifp, gw);
987 toep->tp_tid = atid; /* used to double check response */
988 toep->tp_qset = qset;
990 SOCKBUF_LOCK(&so->so_rcv);
991 /* opt0 rcv_bufsiz initially, assumes its normal meaning later */
992 toep->tp_rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
993 SOCKBUF_UNLOCK(&so->so_rcv);
995 offload_socket(so, toep);
998 * The kernel sets request_r_scale based on sb_max whereas we need to
999 * take hardware's MAX_RCV_WND into account too. This is normally a
1000 * no-op as MAX_RCV_WND is much larger than the default sb_max.
1002 if (tp->t_flags & TF_REQ_SCALE)
1003 rscale = tp->request_r_scale = select_rcv_wscale();
1006 mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0);
1007 cpu_idx = sc->rrss_map[qset];
1009 cpl->wr.wrh_hi = htobe32(V_WR_OP(FW_WROPCODE_FORWARD));
1011 OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid));
1012 inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip,
1014 cpl->opt0h = calc_opt0h(so, mtu_idx, rscale, e);
1015 cpl->opt0l = calc_opt0l(so, toep->tp_rx_credits);
1017 cpl->opt2 = calc_opt2(cpu_idx);
1019 CTR5(KTR_CXGB, "%s: atid %u (%s), toep %p, inp %p", __func__,
1020 toep->tp_tid, tcpstates[tp->t_state], toep, inp);
1022 if (l2t_send(sc, m, e) == 0)
1025 undo_offload_socket(so);
1028 CTR5(KTR_CXGB, "%s: FAILED, atid %d, toep %p, l2te %p, mbuf %p",
1029 __func__, atid, toep, e, m);
1032 free_atid(&td->tid_maps, atid);
1035 l2t_release(td->l2t, e);
1046 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do not
1047 * send multiple ABORT_REQs for the same connection and also that we do not try
1048 * to send a message after the connection has closed.
1051 send_reset(struct toepcb *toep)
1054 struct cpl_abort_req *req;
1055 unsigned int tid = toep->tp_tid;
1056 struct inpcb *inp = toep->tp_inp;
1057 struct socket *so = inp->inp_socket;
1058 struct tcpcb *tp = intotcpcb(inp);
1059 struct toedev *tod = toep->tp_tod;
1060 struct adapter *sc = tod->tod_softc;
1063 INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
1064 INP_WLOCK_ASSERT(inp);
1066 CTR4(KTR_CXGB, "%s: tid %d, toep %p (%x)", __func__, tid, toep,
1069 if (toep->tp_flags & TP_ABORT_SHUTDOWN)
1072 toep->tp_flags |= (TP_ABORT_RPL_PENDING | TP_ABORT_SHUTDOWN);
1074 /* Purge the send queue */
1075 sbflush(so_sockbuf_snd(so));
1076 purge_wr_queue(toep);
1078 m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_DATA, req);
1080 CXGB_UNIMPLEMENTED();
1082 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
1083 req->wr.wrh_lo = htonl(V_WR_TID(tid));
1084 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
1085 req->rsvd0 = htonl(tp->snd_nxt);
1086 req->rsvd1 = !(toep->tp_flags & TP_DATASENT);
1087 req->cmd = CPL_ABORT_SEND_RST;
1089 if (tp->t_state == TCPS_SYN_SENT)
1090 mbufq_tail(&toep->out_of_order_queue, m); /* defer */
1092 l2t_send(sc, m, toep->tp_l2t);
1096 t3_send_rst(struct toedev *tod __unused, struct tcpcb *tp)
1099 send_reset(tp->t_toe);
1104 * Handler for RX_DATA CPL messages.
1107 do_rx_data(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1109 struct adapter *sc = qs->adap;
1110 struct tom_data *td = sc->tom_softc;
1111 struct cpl_rx_data *hdr = mtod(m, void *);
1112 unsigned int tid = GET_TID(hdr);
1113 struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1114 struct inpcb *inp = toep->tp_inp;
1117 struct sockbuf *so_rcv;
1119 /* Advance over CPL */
1120 m_adj(m, sizeof(*hdr));
1122 /* XXX: revisit. This comes from the T4 TOM */
1123 if (__predict_false(inp == NULL)) {
1125 * do_pass_establish failed and must be attempting to abort the
1126 * connection. Meanwhile, the T4 has sent us data for such a
1130 KASSERT(toepcb_flag(toep, TPF_ABORT_SHUTDOWN),
1131 ("%s: inp NULL and tid isn't being aborted", __func__));
1138 if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
1139 CTR4(KTR_CXGB, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
1140 __func__, tid, m->m_pkthdr.len, inp->inp_flags);
1146 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode))
1147 toep->tp_delack_mode = hdr->dack_mode;
1149 tp = intotcpcb(inp);
1152 if (__predict_false(tp->rcv_nxt != be32toh(hdr->seq))) {
1154 "%s: unexpected seq# %x for TID %u, rcv_nxt %x\n",
1155 __func__, be32toh(hdr->seq), toep->tp_tid, tp->rcv_nxt);
1158 tp->rcv_nxt += m->m_pkthdr.len;
1159 KASSERT(tp->rcv_wnd >= m->m_pkthdr.len,
1160 ("%s: negative window size", __func__));
1161 tp->rcv_wnd -= m->m_pkthdr.len;
1162 tp->t_rcvtime = ticks;
1164 so = inp->inp_socket;
1165 so_rcv = &so->so_rcv;
1166 SOCKBUF_LOCK(so_rcv);
1168 if (__predict_false(so_rcv->sb_state & SBS_CANTRCVMORE)) {
1169 CTR3(KTR_CXGB, "%s: tid %u, excess rx (%d bytes)",
1170 __func__, tid, m->m_pkthdr.len);
1171 SOCKBUF_UNLOCK(so_rcv);
1174 INP_INFO_WLOCK(&V_tcbinfo);
1176 tp = tcp_drop(tp, ECONNRESET);
1179 INP_INFO_WUNLOCK(&V_tcbinfo);
1185 /* receive buffer autosize */
1186 if (so_rcv->sb_flags & SB_AUTOSIZE &&
1187 V_tcp_do_autorcvbuf &&
1188 so_rcv->sb_hiwat < V_tcp_autorcvbuf_max &&
1189 (m->m_pkthdr.len > (sbspace(so_rcv) / 8 * 7) || tp->rcv_wnd < 32768)) {
1190 unsigned int hiwat = so_rcv->sb_hiwat;
1191 unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
1192 V_tcp_autorcvbuf_max);
1194 if (!sbreserve_locked(so_rcv, newsize, so, NULL))
1195 so_rcv->sb_flags &= ~SB_AUTOSIZE;
1197 toep->tp_rx_credits += newsize - hiwat;
1200 toep->tp_enqueued += m->m_pkthdr.len;
1201 sbappendstream_locked(so_rcv, m);
1202 sorwakeup_locked(so);
1203 SOCKBUF_UNLOCK_ASSERT(so_rcv);
1210 * Handler for PEER_CLOSE CPL messages.
1213 do_peer_close(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1215 struct adapter *sc = qs->adap;
1216 struct tom_data *td = sc->tom_softc;
1217 const struct cpl_peer_close *hdr = mtod(m, void *);
1218 unsigned int tid = GET_TID(hdr);
1219 struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1220 struct inpcb *inp = toep->tp_inp;
1224 INP_INFO_WLOCK(&V_tcbinfo);
1226 tp = intotcpcb(inp);
1228 CTR5(KTR_CXGB, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__,
1229 tid, tp ? tcpstates[tp->t_state] : "no tp" , toep->tp_flags, inp);
1231 if (toep->tp_flags & TP_ABORT_RPL_PENDING)
1234 so = inp_inpcbtosocket(inp);
1239 switch (tp->t_state) {
1240 case TCPS_SYN_RECEIVED:
1241 tp->t_starttime = ticks;
1243 case TCPS_ESTABLISHED:
1244 tp->t_state = TCPS_CLOSE_WAIT;
1246 case TCPS_FIN_WAIT_1:
1247 tp->t_state = TCPS_CLOSING;
1249 case TCPS_FIN_WAIT_2:
1251 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
1252 INP_INFO_WUNLOCK(&V_tcbinfo);
1255 toepcb_release(toep); /* no more CPLs expected */
1260 log(LOG_ERR, "%s: TID %u received PEER_CLOSE in bad state %d\n",
1261 __func__, toep->tp_tid, tp->t_state);
1266 INP_INFO_WUNLOCK(&V_tcbinfo);
1273 * Handler for CLOSE_CON_RPL CPL messages. peer ACK to our FIN received.
1276 do_close_con_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1278 struct adapter *sc = qs->adap;
1279 struct tom_data *td = sc->tom_softc;
1280 const struct cpl_close_con_rpl *rpl = mtod(m, void *);
1281 unsigned int tid = GET_TID(rpl);
1282 struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1283 struct inpcb *inp = toep->tp_inp;
1287 INP_INFO_WLOCK(&V_tcbinfo);
1289 tp = intotcpcb(inp);
1291 CTR4(KTR_CXGB, "%s: tid %u (%s), toep_flags 0x%x", __func__, tid,
1292 tp ? tcpstates[tp->t_state] : "no tp", toep->tp_flags);
1294 if ((toep->tp_flags & TP_ABORT_RPL_PENDING))
1297 so = inp_inpcbtosocket(inp);
1298 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */
1300 switch (tp->t_state) {
1304 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
1305 INP_INFO_WUNLOCK(&V_tcbinfo);
1308 toepcb_release(toep); /* no more CPLs expected */
1317 case TCPS_FIN_WAIT_1:
1318 if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
1319 soisdisconnected(so);
1320 tp->t_state = TCPS_FIN_WAIT_2;
1324 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
1325 __func__, toep->tp_tid, tp->t_state);
1330 INP_INFO_WUNLOCK(&V_tcbinfo);
1337 do_smt_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1339 struct cpl_smt_write_rpl *rpl = mtod(m, void *);
1341 if (rpl->status != CPL_ERR_NONE) {
1343 "Unexpected SMT_WRITE_RPL status %u for entry %u\n",
1344 rpl->status, GET_TID(rpl));
1352 do_set_tcb_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1354 struct cpl_set_tcb_rpl *rpl = mtod(m, void *);
1356 if (rpl->status != CPL_ERR_NONE) {
1357 log(LOG_ERR, "Unexpected SET_TCB_RPL status %u for tid %u\n",
1358 rpl->status, GET_TID(rpl));
1366 * Handle an ABORT_RPL_RSS CPL message.
1369 do_abort_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1371 struct adapter *sc = qs->adap;
1372 struct tom_data *td = sc->tom_softc;
1373 const struct cpl_abort_rpl_rss *rpl = mtod(m, void *);
1374 unsigned int tid = GET_TID(rpl);
1375 struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1379 * Ignore replies to post-close aborts indicating that the abort was
1380 * requested too late. These connections are terminated when we get
1381 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss
1382 * arrives the TID is either no longer used or it has been recycled.
1384 if (rpl->status == CPL_ERR_ABORT_FAILED) {
1389 if (toep->tp_flags & TP_IS_A_SYNQ_ENTRY)
1390 return (do_abort_rpl_synqe(qs, r, m));
1392 CTR4(KTR_CXGB, "%s: tid %d, toep %p, status %d", __func__, tid, toep,
1398 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
1399 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD)) {
1400 toep->tp_flags |= TP_ABORT_RPL_RCVD;
1403 toep->tp_flags &= ~TP_ABORT_RPL_RCVD;
1404 toep->tp_flags &= TP_ABORT_RPL_PENDING;
1405 toepcb_release(toep); /* no more CPLs expected */
1414 * Convert the status code of an ABORT_REQ into a FreeBSD error code.
1417 abort_status_to_errno(struct tcpcb *tp, int abort_reason)
1419 switch (abort_reason) {
1420 case CPL_ERR_BAD_SYN:
1421 case CPL_ERR_CONN_RESET:
1422 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
1423 case CPL_ERR_XMIT_TIMEDOUT:
1424 case CPL_ERR_PERSIST_TIMEDOUT:
1425 case CPL_ERR_FINWAIT2_TIMEDOUT:
1426 case CPL_ERR_KEEPALIVE_TIMEDOUT:
1434 * Returns whether an ABORT_REQ_RSS message is a negative advice.
1437 is_neg_adv_abort(unsigned int status)
1439 return status == CPL_ERR_RTX_NEG_ADVICE ||
1440 status == CPL_ERR_PERSIST_NEG_ADVICE;
1444 send_abort_rpl(struct toedev *tod, int tid, int qset)
1447 struct cpl_abort_rpl *rpl;
1448 struct adapter *sc = tod->tod_softc;
1450 reply = M_GETHDR_OFLD(qset, CPL_PRIORITY_DATA, rpl);
1452 CXGB_UNIMPLEMENTED();
1454 rpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
1455 rpl->wr.wrh_lo = htonl(V_WR_TID(tid));
1456 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
1457 rpl->cmd = CPL_ABORT_NO_RST;
1459 t3_offload_tx(sc, reply);
1463 * Handle an ABORT_REQ_RSS CPL message. If we're waiting for an ABORT_RPL we
1464 * ignore this request except that we need to reply to it.
1467 do_abort_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1469 struct adapter *sc = qs->adap;
1470 struct tom_data *td = sc->tom_softc;
1471 struct toedev *tod = &td->tod;
1472 const struct cpl_abort_req_rss *req = mtod(m, void *);
1473 unsigned int tid = GET_TID(req);
1474 struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1478 int qset = toep->tp_qset;
1480 if (is_neg_adv_abort(req->status)) {
1481 CTR4(KTR_CXGB, "%s: negative advice %d for tid %u (%x)",
1482 __func__, req->status, tid, toep->tp_flags);
1487 if (toep->tp_flags & TP_IS_A_SYNQ_ENTRY)
1488 return (do_abort_req_synqe(qs, r, m));
1491 INP_INFO_WLOCK(&V_tcbinfo); /* for tcp_close */
1494 tp = intotcpcb(inp);
1495 so = inp->inp_socket;
1497 CTR6(KTR_CXGB, "%s: tid %u (%s), toep %p (%x), status %d",
1498 __func__, tid, tcpstates[tp->t_state], toep, toep->tp_flags,
1501 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD)) {
1502 toep->tp_flags |= TP_ABORT_REQ_RCVD;
1503 toep->tp_flags |= TP_ABORT_SHUTDOWN;
1505 INP_INFO_WUNLOCK(&V_tcbinfo);
1509 toep->tp_flags &= ~TP_ABORT_REQ_RCVD;
1512 * If we'd sent a reset on this toep, we'll ignore this and clean up in
1513 * the T3's reply to our reset instead.
1515 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
1516 toep->tp_flags |= TP_ABORT_RPL_SENT;
1519 so_error_set(so, abort_status_to_errno(tp, req->status));
1522 INP_WLOCK(inp); /* re-acquire */
1523 toepcb_release(toep); /* no more CPLs expected */
1525 INP_INFO_WUNLOCK(&V_tcbinfo);
1527 send_abort_rpl(tod, tid, qset);
1533 assign_rxopt(struct tcpcb *tp, uint16_t tcpopt)
1535 struct toepcb *toep = tp->t_toe;
1536 struct adapter *sc = toep->tp_tod->tod_softc;
1538 tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(tcpopt)] - 40;
1540 if (G_TCPOPT_TSTAMP(tcpopt)) {
1541 tp->t_flags |= TF_RCVD_TSTMP;
1542 tp->t_flags |= TF_REQ_TSTMP; /* forcibly set */
1543 tp->ts_recent = 0; /* XXX */
1544 tp->ts_recent_age = tcp_ts_getticks();
1545 tp->t_maxseg -= TCPOLEN_TSTAMP_APPA;
1548 if (G_TCPOPT_SACK(tcpopt))
1549 tp->t_flags |= TF_SACK_PERMIT;
1551 tp->t_flags &= ~TF_SACK_PERMIT;
1553 if (G_TCPOPT_WSCALE_OK(tcpopt))
1554 tp->t_flags |= TF_RCVD_SCALE;
1556 if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
1557 (TF_RCVD_SCALE | TF_REQ_SCALE)) {
1558 tp->rcv_scale = tp->request_r_scale;
1559 tp->snd_scale = G_TCPOPT_SND_WSCALE(tcpopt);
1565 * The ISS and IRS are from after the exchange of SYNs and are off by 1.
1568 make_established(struct socket *so, uint32_t cpl_iss, uint32_t cpl_irs,
1569 uint16_t cpl_tcpopt)
1571 struct inpcb *inp = sotoinpcb(so);
1572 struct tcpcb *tp = intotcpcb(inp);
1573 struct toepcb *toep = tp->t_toe;
1575 uint32_t iss = be32toh(cpl_iss) - 1; /* true ISS */
1576 uint32_t irs = be32toh(cpl_irs) - 1; /* true IRS */
1577 uint16_t tcpopt = be16toh(cpl_tcpopt);
1579 INP_WLOCK_ASSERT(inp);
1581 tp->t_state = TCPS_ESTABLISHED;
1582 tp->t_starttime = ticks;
1583 TCPSTAT_INC(tcps_connects);
1585 CTR4(KTR_CXGB, "%s tid %u, toep %p, inp %p", tcpstates[tp->t_state],
1586 toep->tp_tid, toep, inp);
1590 tp->rcv_wnd = toep->tp_rx_credits << 10;
1591 tp->rcv_adv += tp->rcv_wnd;
1592 tp->last_ack_sent = tp->rcv_nxt;
1595 * If we were unable to send all rx credits via opt0, save the remainder
1596 * in rx_credits so that they can be handed over with the next credit
1599 SOCKBUF_LOCK(&so->so_rcv);
1600 bufsize = select_rcv_wnd(so);
1601 SOCKBUF_UNLOCK(&so->so_rcv);
1602 toep->tp_rx_credits = bufsize - tp->rcv_wnd;
1605 tcp_sendseqinit(tp);
1606 tp->snd_una = iss + 1;
1607 tp->snd_nxt = iss + 1;
1608 tp->snd_max = iss + 1;
1610 assign_rxopt(tp, tcpopt);
1615 * Fill in the right TID for CPL messages waiting in the out-of-order queue
1616 * and send them to the TOE.
1619 fixup_and_send_ofo(struct toepcb *toep)
1622 struct toedev *tod = toep->tp_tod;
1623 struct adapter *sc = tod->tod_softc;
1624 struct inpcb *inp = toep->tp_inp;
1625 unsigned int tid = toep->tp_tid;
1627 inp_lock_assert(inp);
1629 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) {
1630 struct ofld_hdr *oh = mtod(m, void *);
1632 * A variety of messages can be waiting but the fields we'll
1633 * be touching are common to all so any message type will do.
1635 struct cpl_close_con_req *p = (void *)(oh + 1);
1637 p->wr.wrh_lo = htonl(V_WR_TID(tid));
1638 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid));
1639 t3_offload_tx(sc, m);
1644 * Process a CPL_ACT_ESTABLISH message.
1647 do_act_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1649 struct adapter *sc = qs->adap;
1650 struct tom_data *td = sc->tom_softc;
1651 struct cpl_act_establish *req = mtod(m, void *);
1652 unsigned int tid = GET_TID(req);
1653 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
1654 struct toepcb *toep = lookup_atid(&td->tid_maps, atid);
1655 struct inpcb *inp = toep->tp_inp;
1659 CTR3(KTR_CXGB, "%s: atid %u, tid %u", __func__, atid, tid);
1661 free_atid(&td->tid_maps, atid);
1664 tp = intotcpcb(inp);
1666 KASSERT(toep->tp_qset == qs->idx,
1667 ("%s qset mismatch %d %d", __func__, toep->tp_qset, qs->idx));
1668 KASSERT(toep->tp_tid == atid,
1669 ("%s atid mismatch %d %d", __func__, toep->tp_tid, atid));
1672 insert_tid(td, toep, tid);
1674 if (inp->inp_flags & INP_DROPPED) {
1675 /* socket closed by the kernel before hw told us it connected */
1680 KASSERT(tp->t_state == TCPS_SYN_SENT,
1681 ("TID %u expected TCPS_SYN_SENT, found %d.", tid, tp->t_state));
1683 so = inp->inp_socket;
1684 make_established(so, req->snd_isn, req->rcv_isn, req->tcp_opt);
1687 * Now that we finally have a TID send any CPL messages that we had to
1688 * defer for lack of a TID.
1690 if (mbufq_len(&toep->out_of_order_queue))
1691 fixup_and_send_ofo(toep);
1700 * Process an acknowledgment of WR completion. Advance snd_una and send the
1701 * next batch of work requests from the write queue.
1704 wr_ack(struct toepcb *toep, struct mbuf *m)
1706 struct inpcb *inp = toep->tp_inp;
1708 struct cpl_wr_ack *hdr = mtod(m, void *);
1710 unsigned int credits = ntohs(hdr->credits);
1711 u32 snd_una = ntohl(hdr->snd_una);
1713 struct sockbuf *snd;
1715 struct ofld_hdr *oh;
1718 tp = intotcpcb(inp);
1719 so = inp->inp_socket;
1720 toep->tp_wr_avail += credits;
1721 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail)
1722 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail;
1727 if (__predict_false(!p)) {
1728 CTR5(KTR_CXGB, "%s: %u extra WR_ACK credits, "
1729 "tid %u, state %u, wr_avail %u", __func__, credits,
1730 toep->tp_tid, tp->t_state, toep->tp_wr_avail);
1732 log(LOG_ERR, "%u WR_ACK credits for TID %u with "
1733 "nothing pending, state %u wr_avail=%u\n",
1734 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail);
1738 oh = mtod(p, struct ofld_hdr *);
1740 KASSERT(credits >= G_HDR_NDESC(oh->flags),
1741 ("%s: partial credits? %d %d", __func__, credits,
1742 G_HDR_NDESC(oh->flags)));
1745 credits -= G_HDR_NDESC(oh->flags);
1748 if (oh->flags & F_HDR_SGL)
1749 sglist_free(oh->sgl);
1753 if (__predict_false(SEQ_LT(snd_una, tp->snd_una)))
1756 if (tp->snd_una != snd_una) {
1757 tp->snd_una = snd_una;
1758 tp->ts_recent_age = tcp_ts_getticks();
1759 if (tp->snd_una == tp->snd_nxt)
1760 toep->tp_flags &= ~TP_TX_WAIT_IDLE;
1763 snd = so_sockbuf_snd(so);
1766 sbdrop_locked(snd, bytes);
1767 so_sowwakeup_locked(so);
1770 if (snd->sb_sndptroff < snd->sb_cc)
1771 t3_push_frames(so, 0);
1774 inp_wunlock(tp->t_inpcb);
1779 * Handler for TX_DATA_ACK CPL messages.
1782 do_wr_ack(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1784 struct adapter *sc = qs->adap;
1785 struct tom_data *td = sc->tom_softc;
1786 struct cpl_wr_ack *hdr = mtod(m, void *);
1787 unsigned int tid = GET_TID(hdr);
1788 struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1798 t3_init_cpl_io(struct adapter *sc)
1800 t3_register_cpl_handler(sc, CPL_ACT_ESTABLISH, do_act_establish);
1801 t3_register_cpl_handler(sc, CPL_ACT_OPEN_RPL, do_act_open_rpl);
1802 t3_register_cpl_handler(sc, CPL_RX_URG_NOTIFY, do_rx_urg_notify);
1803 t3_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data);
1804 t3_register_cpl_handler(sc, CPL_TX_DMA_ACK, do_wr_ack);
1805 t3_register_cpl_handler(sc, CPL_PEER_CLOSE, do_peer_close);
1806 t3_register_cpl_handler(sc, CPL_ABORT_REQ_RSS, do_abort_req);
1807 t3_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl);
1808 t3_register_cpl_handler(sc, CPL_CLOSE_CON_RPL, do_close_con_rpl);
1809 t3_register_cpl_handler(sc, CPL_SMT_WRITE_RPL, do_smt_write_rpl);
1810 t3_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl);