1 /**************************************************************************
3 Copyright (c) 2007-2008, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/kernel.h>
37 #include <sys/limits.h>
41 #include <sys/mutex.h>
42 #include <sys/sockstate.h>
43 #include <sys/sockopt.h>
44 #include <sys/socket.h>
45 #include <sys/sockbuf.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48 #include <sys/protosw.h>
51 #if __FreeBSD_version < 800044
52 #define V_tcp_do_autosndbuf tcp_do_autosndbuf
53 #define V_tcp_autosndbuf_max tcp_autosndbuf_max
54 #define V_tcp_do_rfc1323 tcp_do_rfc1323
55 #define V_tcp_do_autorcvbuf tcp_do_autorcvbuf
56 #define V_tcp_autorcvbuf_max tcp_autorcvbuf_max
57 #define V_tcpstat tcpstat
61 #include <net/route.h>
63 #include <netinet/in.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in_var.h>
69 #include <cxgb_osdep.h>
70 #include <sys/mbufq.h>
72 #include <netinet/ip.h>
73 #include <netinet/tcp_var.h>
74 #include <netinet/tcp_fsm.h>
75 #include <netinet/tcp_offload.h>
76 #include <netinet/tcp_seq.h>
77 #include <netinet/tcp_syncache.h>
78 #include <netinet/tcp_timer.h>
79 #include <net/route.h>
82 #include <common/cxgb_firmware_exports.h>
83 #include <common/cxgb_t3_cpl.h>
84 #include <common/cxgb_tcb.h>
85 #include <common/cxgb_ctl_defs.h>
86 #include <cxgb_offload.h>
89 #include <machine/bus.h>
91 #include <ulp/toecore/cxgb_toedev.h>
92 #include <ulp/tom/cxgb_l2t.h>
93 #include <ulp/tom/cxgb_defs.h>
94 #include <ulp/tom/cxgb_tom.h>
95 #include <ulp/tom/cxgb_t3_ddp.h>
96 #include <ulp/tom/cxgb_toepcb.h>
97 #include <ulp/tom/cxgb_tcp.h>
98 #include <ulp/tom/cxgb_tcp_offload.h>
101 * For ULP connections HW may add headers, e.g., for digests, that aren't part
102 * of the messages sent by the host but that are part of the TCP payload and
103 * therefore consume TCP sequence space. Tx connection parameters that
104 * operate in TCP sequence space are affected by the HW additions and need to
105 * compensate for them to accurately track TCP sequence numbers. This array
106 * contains the compensating extra lengths for ULP packets. It is indexed by
107 * a packet's ULP submode.
109 const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8};
113 * This sk_buff holds a fake header-only TCP segment that we use whenever we
114 * need to exploit SW TCP functionality that expects TCP headers, such as
115 * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple
116 * CPUs without locking.
118 static struct mbuf *tcphdr_mbuf __read_mostly;
122 * Size of WRs in bytes. Note that we assume all devices we are handling have
125 static unsigned int wrlen __read_mostly;
128 * The number of WRs needed for an skb depends on the number of page fragments
129 * in the skb and whether it has any payload in its main body. This maps the
130 * length of the gather list represented by an skb into the # of necessary WRs.
132 static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly;
135 * Max receive window supported by HW in bytes. Only a small part of it can
136 * be set through option0, the rest needs to be set through RX_DATA_ACK.
138 #define MAX_RCV_WND ((1U << 27) - 1)
141 * Min receive window. We want it to be large enough to accommodate receive
142 * coalescing, handle jumbo frames, and not trigger sender SWS avoidance.
144 #define MIN_RCV_WND (24 * 1024U)
145 #define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS)
147 #define VALIDATE_SEQ 0
148 #define VALIDATE_SOCK(so)
151 #define TCP_TIMEWAIT 1
155 static void t3_send_reset(struct toepcb *toep);
156 static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status);
157 static inline void free_atid(struct t3cdev *cdev, unsigned int tid);
158 static void handle_syncache_event(int event, void *arg);
161 SBAPPEND(struct sockbuf *sb, struct mbuf *n)
167 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
168 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
169 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
170 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
171 m->m_next, m->m_nextpkt, m->m_flags));
176 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
177 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
178 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
179 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
180 m->m_next, m->m_nextpkt, m->m_flags));
183 KASSERT(sb->sb_flags & SB_NOCOALESCE, ("NOCOALESCE not set"));
184 sbappendstream_locked(sb, n);
188 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
189 m->m_next, m->m_nextpkt, m->m_flags));
195 is_t3a(const struct toedev *dev)
197 return (dev->tod_ttid == TOE_ID_CHELSIO_T3);
201 dump_toepcb(struct toepcb *toep)
203 DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n",
204 toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode,
205 toep->tp_mtu_idx, toep->tp_tid);
207 DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n",
208 toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked,
209 toep->tp_mss_clamp, toep->tp_flags);
212 #ifndef RTALLOC2_DEFINED
213 static struct rtentry *
214 rtalloc2(struct sockaddr *dst, int report, u_long ignflags)
216 struct rtentry *rt = NULL;
218 if ((rt = rtalloc1(dst, report, ignflags)) != NULL)
226 * Determine whether to send a CPL message now or defer it. A message is
227 * deferred if the connection is in SYN_SENT since we don't know the TID yet.
228 * For connections in other states the message is sent immediately.
229 * If through_l2t is set the message is subject to ARP processing, otherwise
230 * it is sent directly.
233 send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t)
235 struct tcpcb *tp = toep->tp_tp;
237 if (__predict_false(tp->t_state == TCPS_SYN_SENT)) {
238 inp_wlock(tp->t_inpcb);
239 mbufq_tail(&toep->out_of_order_queue, m); // defer
240 inp_wunlock(tp->t_inpcb);
241 } else if (through_l2t)
242 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T
244 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly
247 static inline unsigned int
248 mkprio(unsigned int cntrl, const struct toepcb *toep)
254 * Populate a TID_RELEASE WR. The skb must be already propely sized.
257 mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid)
259 struct cpl_tid_release *req;
261 m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep));
262 m->m_pkthdr.len = m->m_len = sizeof(*req);
263 req = mtod(m, struct cpl_tid_release *);
264 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
266 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
270 make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail)
272 struct tcpcb *tp = so_sototcpcb(so);
273 struct toepcb *toep = tp->t_toe;
274 struct tx_data_wr *req;
277 inp_lock_assert(tp->t_inpcb);
278 snd = so_sockbuf_snd(so);
280 req = mtod(m, struct tx_data_wr *);
281 m->m_len = sizeof(*req);
282 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
283 req->wr_lo = htonl(V_WR_TID(toep->tp_tid));
284 /* len includes the length of any HW ULP additions */
285 req->len = htonl(len);
286 req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx));
287 /* V_TX_ULP_SUBMODE sets both the mode and submode */
288 req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) |
289 V_TX_URG(/* skb_urgent(skb) */ 0 ) |
290 V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) &&
292 req->sndseq = htonl(tp->snd_nxt);
293 if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) {
294 req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
295 V_TX_CPU_IDX(toep->tp_qset));
297 /* Sendbuffer is in units of 32KB.
299 if (V_tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE)
300 req->param |= htonl(V_TX_SNDBUF(V_tcp_autosndbuf_max >> 15));
302 req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15));
305 toep->tp_flags |= TP_DATASENT;
309 #define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */
312 t3_push_frames(struct socket *so, int req_completion)
314 struct tcpcb *tp = so_sototcpcb(so);
315 struct toepcb *toep = tp->t_toe;
317 struct mbuf *tail, *m0, *last;
320 int state, bytes, count, total_bytes;
321 bus_dma_segment_t segs[TX_MAX_SEGS], *segp;
324 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) {
325 DPRINTF("tcp state=%d\n", tp->t_state);
329 state = so_state_get(so);
331 if (state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) {
332 DPRINTF("disconnecting\n");
337 inp_lock_assert(tp->t_inpcb);
339 snd = so_sockbuf_snd(so);
342 d = TOM_DATA(toep->tp_toedev);
345 last = tail = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb;
348 DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n",
349 toep->tp_wr_avail, tail, snd->sb_cc, toep->tp_m_last);
351 if (last && toep->tp_m_last == last && snd->sb_sndptroff != 0) {
352 KASSERT(tail, ("sbdrop error"));
353 last = tail = tail->m_next;
356 if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) {
357 DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail);
363 toep->tp_m_last = NULL;
364 while (toep->tp_wr_avail && (tail != NULL)) {
367 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) {
372 * If the data in tail fits as in-line, then
373 * make an immediate data wr.
375 if (tail->m_len <= IMM_LEN) {
382 make_tx_data_wr(so, m0, bytes, tail);
383 m_append(m0, bytes, mtod(last, caddr_t));
384 KASSERT(!m0->m_next, ("bad append"));
386 while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail)
387 && (tail != NULL) && (count < TX_MAX_SEGS-1)) {
388 bytes += tail->m_len;
392 * technically an abuse to be using this for a VA
393 * but less gross than defining my own structure
394 * or calling pmap_kextract from here :-|
396 segp->ds_addr = (bus_addr_t)tail->m_data;
397 segp->ds_len = tail->m_len;
398 DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n",
399 count, mbuf_wrs[count], tail->m_data, tail->m_len);
403 DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n",
404 toep->tp_wr_avail, count, mbuf_wrs[count], tail);
407 m_set_sgllen(m0, count);
408 make_tx_data_wr(so, m0, bytes, tail);
410 m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep));
413 snd->sb_sndptr = tail;
414 toep->tp_m_last = NULL;
416 toep->tp_m_last = snd->sb_sndptr = last;
419 DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last);
421 snd->sb_sndptroff += bytes;
422 total_bytes += bytes;
423 toep->tp_write_seq += bytes;
424 CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d"
425 " tail=%p sndptr=%p sndptroff=%d",
426 toep->tp_wr_avail, count, mbuf_wrs[count],
427 tail, snd->sb_sndptr, snd->sb_sndptroff);
429 CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d"
430 " tp_m_last=%p tailbuf=%p snd_una=0x%08x",
431 total_bytes, toep->tp_m_last, tail->m_data,
434 CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d"
435 " tp_m_last=%p snd_una=0x%08x",
436 total_bytes, toep->tp_m_last, tp->snd_una);
444 while (i < count && m_get_sgllen(m0)) {
445 if ((count - i) >= 3) {
447 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx"
448 " len=%d pa=0x%zx len=%d",
449 segs[i].ds_addr, segs[i].ds_len,
450 segs[i + 1].ds_addr, segs[i + 1].ds_len,
451 segs[i + 2].ds_addr, segs[i + 2].ds_len);
453 } else if ((count - i) == 2) {
455 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx"
457 segs[i].ds_addr, segs[i].ds_len,
458 segs[i + 1].ds_addr, segs[i + 1].ds_len);
461 CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d",
462 segs[i].ds_addr, segs[i].ds_len);
470 * remember credits used
472 m0->m_pkthdr.csum_data = mbuf_wrs[count];
473 m0->m_pkthdr.len = bytes;
474 toep->tp_wr_avail -= mbuf_wrs[count];
475 toep->tp_wr_unacked += mbuf_wrs[count];
477 if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) ||
478 toep->tp_wr_unacked >= toep->tp_wr_max / 2) {
479 struct work_request_hdr *wr = cplhdr(m0);
481 wr->wr_hi |= htonl(F_WR_COMPL);
482 toep->tp_wr_unacked = 0;
484 KASSERT((m0->m_pkthdr.csum_data > 0) &&
485 (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d",
486 m0->m_pkthdr.csum_data));
487 m0->m_type = MT_DONTFREE;
488 enqueue_wr(toep, m0);
489 DPRINTF("sending offload tx with %d bytes in %d segments\n",
491 l2t_send(cdev, m0, toep->tp_l2t);
494 return (total_bytes);
498 * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail
499 * under any circumstances. We take the easy way out and always queue the
500 * message to the write_queue. We can optimize the case where the queue is
501 * already empty though the optimization is probably not worth it.
504 close_conn(struct socket *so)
507 struct cpl_close_con_req *req;
509 struct inpcb *inp = so_sotoinpcb(so);
516 tp = so_sototcpcb(so);
519 if (tp->t_state != TCPS_SYN_SENT)
520 t3_push_frames(so, 1);
522 if (toep->tp_flags & TP_FIN_SENT) {
529 d = TOM_DATA(toep->tp_toedev);
531 m = m_gethdr_nofail(sizeof(*req));
532 m_set_priority(m, CPL_PRIORITY_DATA);
536 toep->tp_flags |= TP_FIN_SENT;
537 req = mtod(m, struct cpl_close_con_req *);
539 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
540 req->wr.wr_lo = htonl(V_WR_TID(tid));
541 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
545 * XXX - need to defer shutdown while there is still data in the queue
548 CTR4(KTR_TOM, "%s CLOSE_CON_REQ so %p tp %p tid=%u", __FUNCTION__, so, tp, tid);
549 cxgb_ofld_send(d->cdev, m);
554 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
558 abort_arp_failure(struct t3cdev *cdev, struct mbuf *m)
560 struct cpl_abort_req *req = cplhdr(m);
562 req->cmd = CPL_ABORT_NO_RST;
563 cxgb_ofld_send(cdev, m);
567 * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are
568 * permitted to return without sending the message in case we cannot allocate
569 * an sk_buff. Returns the number of credits sent.
572 t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail)
575 struct cpl_rx_data_ack *req;
576 struct toepcb *toep = tp->t_toe;
577 struct toedev *tdev = toep->tp_toedev;
579 m = m_gethdr_nofail(sizeof(*req));
581 DPRINTF("returning %u credits to HW\n", credits);
583 req = mtod(m, struct cpl_rx_data_ack *);
584 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
586 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
587 req->credit_dack = htonl(dack | V_RX_CREDITS(credits));
588 m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep));
589 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
594 * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled.
595 * This is only used in DDP mode, so we take the opportunity to also set the
596 * DACK mode and flush any Rx credits.
599 t3_send_rx_modulate(struct toepcb *toep)
602 struct cpl_rx_data_ack *req;
604 m = m_gethdr_nofail(sizeof(*req));
606 req = mtod(m, struct cpl_rx_data_ack *);
607 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
609 m->m_pkthdr.len = m->m_len = sizeof(*req);
611 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
612 req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
614 V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup));
615 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
616 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
617 toep->tp_rcv_wup = toep->tp_copied_seq;
621 * Handle receipt of an urgent pointer.
624 handle_urg_ptr(struct socket *so, uint32_t urg_seq)
626 #ifdef URGENT_DATA_SUPPORTED
627 struct tcpcb *tp = so_sototcpcb(so);
629 urg_seq--; /* initially points past the urgent data, per BSD */
631 if (tp->urg_data && !after(urg_seq, tp->urg_seq))
632 return; /* duplicate pointer */
634 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
635 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
636 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
639 if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len)
640 tom_eat_skb(sk, skb, 0);
642 tp->urg_data = TCP_URG_NOTYET;
643 tp->urg_seq = urg_seq;
648 * Returns true if a socket cannot accept new Rx data.
651 so_no_receive(const struct socket *so)
653 return (so_state_get(so) & (SS_ISDISCONNECTED|SS_ISDISCONNECTING));
657 * Process an urgent data notification.
660 rx_urg_notify(struct toepcb *toep, struct mbuf *m)
662 struct cpl_rx_urg_notify *hdr = cplhdr(m);
663 struct socket *so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
667 if (!so_no_receive(so))
668 handle_urg_ptr(so, ntohl(hdr->seq));
674 * Handler for RX_URG_NOTIFY CPL messages.
677 do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx)
679 struct toepcb *toep = (struct toepcb *)ctx;
681 rx_urg_notify(toep, m);
686 is_delack_mode_valid(struct toedev *dev, struct toepcb *toep)
688 return (toep->tp_ulp_mode ||
689 (toep->tp_ulp_mode == ULP_MODE_TCPDDP &&
690 dev->tod_ttid >= TOE_ID_CHELSIO_T3));
694 * Set of states for which we should return RX credits.
696 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)
699 * Called after some received data has been read. It returns RX credits
700 * to the HW for the amount of data processed.
703 t3_cleanup_rbuf(struct tcpcb *tp, int copied)
705 struct toepcb *toep = tp->t_toe;
708 int dack_mode, must_send, read;
709 u32 thres, credits, dack = 0;
712 so = inp_inpcbtosocket(tp->t_inpcb);
713 rcv = so_sockbuf_rcv(so);
715 if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) ||
716 (tp->t_state == TCPS_FIN_WAIT_2))) {
719 toep->tp_copied_seq += copied;
726 inp_lock_assert(tp->t_inpcb);
730 toep->tp_copied_seq += copied;
732 read = toep->tp_enqueued_bytes - rcv->sb_cc;
733 toep->tp_copied_seq += read;
735 credits = toep->tp_copied_seq - toep->tp_rcv_wup;
736 toep->tp_enqueued_bytes = rcv->sb_cc;
739 if (credits > rcv->sb_mbmax) {
740 log(LOG_ERR, "copied_seq=%u rcv_wup=%u credits=%u\n",
741 toep->tp_copied_seq, toep->tp_rcv_wup, credits);
742 credits = rcv->sb_mbmax;
747 * XXX this won't accurately reflect credit return - we need
748 * to look at the difference between the amount that has been
749 * put in the recv sockbuf and what is there now
752 if (__predict_false(!credits))
755 dev = toep->tp_toedev;
756 thres = TOM_TUNABLE(dev, rx_credit_thres);
758 if (__predict_false(thres == 0))
761 if (is_delack_mode_valid(dev, toep)) {
762 dack_mode = TOM_TUNABLE(dev, delack);
763 if (__predict_false(dack_mode != toep->tp_delack_mode)) {
764 u32 r = tp->rcv_nxt - toep->tp_delack_seq;
766 if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp)
767 dack = F_RX_DACK_CHANGE |
768 V_RX_DACK_MODE(dack_mode);
771 dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
774 * For coalescing to work effectively ensure the receive window has
775 * at least 16KB left.
777 must_send = credits + 16384 >= tp->rcv_wnd;
779 if (must_send || credits >= thres)
780 toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send);
784 cxgb_toe_disconnect(struct tcpcb *tp)
788 DPRINTF("cxgb_toe_disconnect\n");
790 so = inp_inpcbtosocket(tp->t_inpcb);
796 cxgb_toe_reset(struct tcpcb *tp)
798 struct toepcb *toep = tp->t_toe;
805 tp->t_flags &= ~TF_TOE;
812 cxgb_toe_send(struct tcpcb *tp)
816 DPRINTF("cxgb_toe_send\n");
817 dump_toepcb(tp->t_toe);
819 so = inp_inpcbtosocket(tp->t_inpcb);
820 t3_push_frames(so, 1);
825 cxgb_toe_rcvd(struct tcpcb *tp)
828 inp_lock_assert(tp->t_inpcb);
830 t3_cleanup_rbuf(tp, 0);
836 cxgb_toe_detach(struct tcpcb *tp)
841 * XXX how do we handle teardown in the SYN_SENT state?
844 inp_lock_assert(tp->t_inpcb);
851 tp->t_flags &= ~TF_TOE;
856 static struct toe_usrreqs cxgb_toe_usrreqs = {
857 .tu_disconnect = cxgb_toe_disconnect,
858 .tu_reset = cxgb_toe_reset,
859 .tu_send = cxgb_toe_send,
860 .tu_rcvd = cxgb_toe_rcvd,
861 .tu_detach = cxgb_toe_detach,
862 .tu_detach = cxgb_toe_detach,
863 .tu_syncache_event = handle_syncache_event,
868 __set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word,
869 uint64_t mask, uint64_t val, int no_reply)
871 struct cpl_set_tcb_field *req;
873 CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
874 toep->tp_tid, word, mask, val);
876 req = mtod(m, struct cpl_set_tcb_field *);
877 m->m_pkthdr.len = m->m_len = sizeof(*req);
878 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
880 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid));
881 req->reply = V_NO_REPLY(no_reply);
883 req->word = htons(word);
884 req->mask = htobe64(mask);
885 req->val = htobe64(val);
887 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
888 send_or_defer(toep, m, 0);
892 t3_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val)
895 struct tcpcb *tp = toep->tp_tp;
900 if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) {
901 printf("not seting field\n");
905 m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field));
907 __set_tcb_field(toep, m, word, mask, val, 1);
911 * Set one of the t_flags bits in the TCB.
914 set_tcb_tflag(struct toepcb *toep, unsigned int bit_pos, int val)
917 t3_set_tcb_field(toep, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos);
921 * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting.
924 t3_set_nagle(struct toepcb *toep)
926 struct tcpcb *tp = toep->tp_tp;
928 set_tcb_tflag(toep, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY));
932 * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting.
935 t3_set_keepalive(struct toepcb *toep, int on_off)
938 set_tcb_tflag(toep, S_TF_KEEPALIVE, on_off);
942 t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off)
944 set_tcb_tflag(toep, S_TF_RCV_COALESCE_ENABLE, on_off);
948 t3_set_dack_mss(struct toepcb *toep, int on_off)
951 set_tcb_tflag(toep, S_TF_DACK_MSS, on_off);
955 * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting.
958 t3_set_tos(struct toepcb *toep)
960 int tos = inp_ip_tos_get(toep->tp_tp->t_inpcb);
962 t3_set_tcb_field(toep, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS),
968 * In DDP mode, TP fails to schedule a timer to push RX data to the host when
969 * DDP is disabled (data is delivered to freelist). [Note that, the peer should
970 * set the PSH bit in the last segment, which would trigger delivery.]
971 * We work around the issue by setting a DDP buffer in a partial placed state,
972 * which guarantees that TP will schedule a timer.
974 #define TP_DDP_TIMER_WORKAROUND_MASK\
975 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\
976 ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\
977 V_TCB_RX_DDP_BUF0_LEN(3)) << 32))
978 #define TP_DDP_TIMER_WORKAROUND_VAL\
979 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\
980 ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\
984 t3_enable_ddp(struct toepcb *toep, int on)
988 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1),
991 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS,
993 TP_DDP_TIMER_WORKAROUND_MASK,
995 TP_DDP_TIMER_WORKAROUND_VAL);
1000 t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag_color)
1002 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_TAG + buf_idx,
1003 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG),
1008 t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset,
1012 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_OFFSET,
1013 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
1014 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
1015 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) |
1016 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
1018 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF1_OFFSET,
1019 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
1020 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32),
1021 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) |
1022 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32));
1026 t3_set_cong_control(struct socket *so, const char *name)
1028 #ifdef CONGESTION_CONTROL_SUPPORTED
1031 for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++)
1032 if (!strcmp(name, t3_cong_ops[cong_algo].name))
1035 if (cong_algo >= ARRAY_SIZE(t3_cong_ops))
1042 t3_get_tcb(struct toepcb *toep)
1044 struct cpl_get_tcb *req;
1045 struct tcpcb *tp = toep->tp_tp;
1046 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
1051 inp_lock_assert(tp->t_inpcb);
1052 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
1053 req = mtod(m, struct cpl_get_tcb *);
1054 m->m_pkthdr.len = m->m_len = sizeof(*req);
1055 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1057 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid));
1058 req->cpuno = htons(toep->tp_qset);
1060 if (tp->t_state == TCPS_SYN_SENT)
1061 mbufq_tail(&toep->out_of_order_queue, m); // defer
1063 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
1068 so_insert_tid(struct tom_data *d, struct toepcb *toep, unsigned int tid)
1073 cxgb_insert_tid(d->cdev, d->client, toep, tid);
1077 * find_best_mtu - find the entry in the MTU table closest to an MTU
1079 * @mtu: the target MTU
1081 * Returns the index of the value in the MTU table that is closest to but
1082 * does not exceed the target MTU.
1085 find_best_mtu(const struct t3c_data *d, unsigned short mtu)
1089 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
1095 select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu)
1100 struct rtentry *dst = so_sotoinpcb(so)->inp_route.ro_rt;
1103 tp->t_maxseg = pmtu - 40;
1104 if (tp->t_maxseg < td->mtus[0] - 40)
1105 tp->t_maxseg = td->mtus[0] - 40;
1106 idx = find_best_mtu(td, tp->t_maxseg + 40);
1108 tp->t_maxseg = td->mtus[idx] - 40;
1110 idx = find_best_mtu(td, pmtu);
1116 free_atid(struct t3cdev *cdev, unsigned int tid)
1118 struct toepcb *toep = cxgb_free_atid(cdev, tid);
1121 toepcb_release(toep);
1125 * Release resources held by an offload connection (TID, L2T entry, etc.)
1128 t3_release_offload_resources(struct toepcb *toep)
1130 struct tcpcb *tp = toep->tp_tp;
1131 struct toedev *tdev = toep->tp_toedev;
1132 struct t3cdev *cdev;
1134 unsigned int tid = toep->tp_tid;
1135 struct sockbuf *rcv;
1137 CTR0(KTR_TOM, "t3_release_offload_resources");
1142 cdev = TOEP_T3C_DEV(toep);
1147 t3_release_ddp_resources(toep);
1149 #ifdef CTRL_SKB_CACHE
1150 kfree_skb(CTRL_SKB_CACHE(tp));
1151 CTRL_SKB_CACHE(tp) = NULL;
1154 if (toep->tp_wr_avail != toep->tp_wr_max) {
1155 purge_wr_queue(toep);
1156 reset_wr_list(toep);
1160 l2t_release(L2DATA(cdev), toep->tp_l2t);
1161 toep->tp_l2t = NULL;
1165 inp_lock_assert(tp->t_inpcb);
1166 so = inp_inpcbtosocket(tp->t_inpcb);
1167 rcv = so_sockbuf_rcv(so);
1169 * cancel any offloaded reads
1174 tp->t_flags &= ~TF_TOE;
1175 if (toep->tp_ddp_state.user_ddp_pending) {
1176 t3_cancel_ubuf(toep, rcv);
1177 toep->tp_ddp_state.user_ddp_pending = 0;
1179 so_sorwakeup_locked(so);
1183 if (toep->tp_state == TCPS_SYN_SENT) {
1184 free_atid(cdev, tid);
1186 __skb_queue_purge(&tp->out_of_order_queue);
1188 } else { // we have TID
1189 cxgb_remove_tid(cdev, toep, tid);
1190 toepcb_release(toep);
1193 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state);
1198 install_offload_ops(struct socket *so)
1200 struct tcpcb *tp = so_sototcpcb(so);
1202 KASSERT(tp->t_toe != NULL, ("toepcb not set"));
1204 t3_install_socket_ops(so);
1205 tp->t_flags |= TF_TOE;
1206 tp->t_tu = &cxgb_toe_usrreqs;
1210 * Determine the receive window scaling factor given a target max
1214 select_rcv_wscale(int space, struct vnet *vnet)
1218 if (space > MAX_RCV_WND)
1219 space = MAX_RCV_WND;
1221 if (V_tcp_do_rfc1323)
1222 for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ;
1228 * Determine the receive window size for a socket.
1230 static unsigned long
1231 select_rcv_wnd(struct toedev *dev, struct socket *so)
1233 struct tom_data *d = TOM_DATA(dev);
1235 unsigned int max_rcv_wnd;
1236 struct sockbuf *rcv;
1238 rcv = so_sockbuf_rcv(so);
1240 if (V_tcp_do_autorcvbuf)
1241 wnd = V_tcp_autorcvbuf_max;
1243 wnd = rcv->sb_hiwat;
1248 * For receive coalescing to work effectively we need a receive window
1249 * that can accomodate a coalesced segment.
1251 if (wnd < MIN_RCV_WND)
1255 max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ?
1256 (uint32_t)d->rx_page_size * 23 :
1259 return min(wnd, max_rcv_wnd);
1263 * Assign offload parameters to some socket fields. This code is used by
1264 * both active and passive opens.
1267 init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid,
1268 struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep)
1270 struct tcpcb *tp = so_sototcpcb(so);
1271 struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev);
1272 struct sockbuf *snd, *rcv;
1275 SOCK_LOCK_ASSERT(so);
1278 snd = so_sockbuf_snd(so);
1279 rcv = so_sockbuf_rcv(so);
1281 log(LOG_INFO, "initializing offload socket\n");
1283 * We either need to fix push frames to work with sbcompress
1284 * or we need to add this
1286 snd->sb_flags |= SB_NOCOALESCE;
1287 rcv->sb_flags |= SB_NOCOALESCE;
1291 toep->tp_toedev = dev;
1295 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs);
1296 toep->tp_wr_unacked = 0;
1297 toep->tp_delack_mode = 0;
1299 toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu);
1304 tp->rcv_wnd = select_rcv_wnd(dev, so);
1306 toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
1307 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
1308 toep->tp_qset_idx = 0;
1310 reset_wr_list(toep);
1311 DPRINTF("initialization done\n");
1315 * The next two functions calculate the option 0 value for a socket.
1317 static inline unsigned int
1318 calc_opt0h(struct socket *so, int mtu_idx)
1320 struct tcpcb *tp = so_sototcpcb(so);
1321 int wscale = select_rcv_wscale(tp->rcv_wnd, so->so_vnet);
1323 return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) |
1324 V_KEEP_ALIVE((so_options_get(so) & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS |
1325 V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx);
1328 static inline unsigned int
1329 calc_opt0l(struct socket *so, int ulp_mode)
1331 struct tcpcb *tp = so_sototcpcb(so);
1334 val = V_TOS(INP_TOS(tp->t_inpcb)) | V_ULP_MODE(ulp_mode) |
1335 V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ));
1337 DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", INP_TOS(tp->t_inpcb), tp->rcv_wnd, val);
1341 static inline unsigned int
1342 calc_opt2(const struct socket *so, struct toedev *dev)
1346 flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1);
1348 return (V_FLAVORS_VALID(flv_valid) |
1349 V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0));
1354 count_pending_wrs(const struct toepcb *toep)
1356 const struct mbuf *m;
1359 wr_queue_walk(toep, m)
1360 n += m->m_pkthdr.csum_data;
1366 (((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1)
1370 mk_act_open_req(struct socket *so, struct mbuf *m,
1371 unsigned int atid, const struct l2t_entry *e)
1373 struct cpl_act_open_req *req;
1374 struct inpcb *inp = so_sotoinpcb(so);
1375 struct tcpcb *tp = inp_inpcbtotcpcb(inp);
1376 struct toepcb *toep = tp->t_toe;
1377 struct toedev *tdev = toep->tp_toedev;
1379 m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep));
1381 req = mtod(m, struct cpl_act_open_req *);
1382 m->m_pkthdr.len = m->m_len = sizeof(*req);
1384 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1386 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid));
1387 inp_4tuple_get(inp, &req->local_ip, &req->local_port, &req->peer_ip, &req->peer_port);
1389 req->local_port = inp->inp_lport;
1390 req->peer_port = inp->inp_fport;
1391 memcpy(&req->local_ip, &inp->inp_laddr, 4);
1392 memcpy(&req->peer_ip, &inp->inp_faddr, 4);
1394 req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) |
1395 V_TX_CHANNEL(e->smt_idx));
1396 req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode));
1398 req->opt2 = htonl(calc_opt2(so, tdev));
1403 * Convert an ACT_OPEN_RPL status to an errno.
1406 act_open_rpl_status_to_errno(int status)
1409 case CPL_ERR_CONN_RESET:
1410 return (ECONNREFUSED);
1411 case CPL_ERR_ARP_MISS:
1412 return (EHOSTUNREACH);
1413 case CPL_ERR_CONN_TIMEDOUT:
1415 case CPL_ERR_TCAM_FULL:
1417 case CPL_ERR_CONN_EXIST:
1418 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n");
1419 return (EADDRINUSE);
1426 fail_act_open(struct toepcb *toep, int errno)
1428 struct tcpcb *tp = toep->tp_tp;
1430 t3_release_offload_resources(toep);
1432 inp_wunlock(tp->t_inpcb);
1433 tcp_offload_drop(tp, errno);
1437 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1442 * Handle active open failures.
1445 active_open_failed(struct toepcb *toep, struct mbuf *m)
1447 struct cpl_act_open_rpl *rpl = cplhdr(m);
1450 if (toep->tp_tp == NULL)
1453 inp = toep->tp_tp->t_inpcb;
1456 * Don't handle connection retry for now
1459 struct inet_connection_sock *icsk = inet_csk(sk);
1461 if (rpl->status == CPL_ERR_CONN_EXIST &&
1462 icsk->icsk_retransmit_timer.function != act_open_retry_timer) {
1463 icsk->icsk_retransmit_timer.function = act_open_retry_timer;
1464 sk_reset_timer(so, &icsk->icsk_retransmit_timer,
1471 * drops the inpcb lock
1473 fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status));
1481 * Return whether a failed active open has allocated a TID
1484 act_open_has_tid(int status)
1486 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1487 status != CPL_ERR_ARP_MISS;
1491 * Process an ACT_OPEN_RPL CPL message.
1494 do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
1496 struct toepcb *toep = (struct toepcb *)ctx;
1497 struct cpl_act_open_rpl *rpl = cplhdr(m);
1499 if (cdev->type != T3A && act_open_has_tid(rpl->status))
1500 cxgb_queue_tid_release(cdev, GET_TID(rpl));
1502 active_open_failed(toep, m);
1507 * Handle an ARP failure for an active open. XXX purge ofo queue
1509 * XXX badly broken for crossed SYNs as the ATID is no longer valid.
1510 * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should
1511 * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't
1512 * free the atid. Hmm.
1516 act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m)
1518 struct toepcb *toep = m_get_toep(m);
1519 struct tcpcb *tp = toep->tp_tp;
1520 struct inpcb *inp = tp->t_inpcb;
1524 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) {
1526 * drops the inpcb lock
1528 fail_act_open(so, EHOSTUNREACH);
1529 printf("freeing %p\n", m);
1537 * Send an active open request.
1540 t3_connect(struct toedev *tdev, struct socket *so,
1541 struct rtentry *rt, struct sockaddr *nam)
1544 struct l2t_entry *e;
1545 struct tom_data *d = TOM_DATA(tdev);
1546 struct inpcb *inp = so_sotoinpcb(so);
1547 struct tcpcb *tp = intotcpcb(inp);
1548 struct toepcb *toep; /* allocated by init_offload_socket */
1552 toep = toepcb_alloc();
1556 if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0)
1559 e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam);
1563 inp_lock_assert(inp);
1564 m = m_gethdr(MT_DATA, M_WAITOK);
1567 m->m_toe.mt_toepcb = tp->t_toe;
1568 set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure);
1572 init_offload_socket(so, tdev, atid, e, rt, toep);
1574 install_offload_ops(so);
1576 mk_act_open_req(so, m, atid, e);
1581 m_set_toep(m, tp->t_toe);
1583 toep->tp_state = TCPS_SYN_SENT;
1584 l2t_send(d->cdev, (struct mbuf *)m, e);
1586 if (toep->tp_ulp_mode)
1587 t3_enable_ddp(toep, 0);
1591 printf("failing connect - free atid\n");
1593 free_atid(d->cdev, atid);
1595 printf("return ENOMEM\n");
1600 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do
1601 * not send multiple ABORT_REQs for the same connection and also that we do
1602 * not try to send a message after the connection has closed. Returns 1 if
1603 * an ABORT_REQ wasn't generated after all, 0 otherwise.
1606 t3_send_reset(struct toepcb *toep)
1609 struct cpl_abort_req *req;
1610 unsigned int tid = toep->tp_tid;
1611 int mode = CPL_ABORT_SEND_RST;
1612 struct tcpcb *tp = toep->tp_tp;
1613 struct toedev *tdev = toep->tp_toedev;
1614 struct socket *so = NULL;
1616 struct sockbuf *snd;
1619 inp_lock_assert(tp->t_inpcb);
1620 so = inp_inpcbtosocket(tp->t_inpcb);
1623 if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) ||
1626 toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN);
1628 snd = so_sockbuf_snd(so);
1629 /* Purge the send queue so we don't send anything after an abort. */
1632 if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev))
1633 mode |= CPL_ABORT_POST_CLOSE_REQ;
1635 m = m_gethdr_nofail(sizeof(*req));
1636 m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep));
1637 set_arp_failure_handler(m, abort_arp_failure);
1639 req = mtod(m, struct cpl_abort_req *);
1640 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
1641 req->wr.wr_lo = htonl(V_WR_TID(tid));
1642 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
1643 req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0;
1644 req->rsvd1 = !(toep->tp_flags & TP_DATASENT);
1646 if (tp && (tp->t_state == TCPS_SYN_SENT))
1647 mbufq_tail(&toep->out_of_order_queue, m); // defer
1649 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t);
1653 t3_ip_ctloutput(struct socket *so, struct sockopt *sopt)
1658 if (sopt->sopt_name == IP_OPTIONS)
1659 return (ENOPROTOOPT);
1661 if (sopt->sopt_name != IP_TOS)
1662 return (EOPNOTSUPP);
1664 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
1669 if (optval > IPTOS_PREC_CRITIC_ECP)
1672 inp = so_sotoinpcb(so);
1674 inp_ip_tos_set(inp, optval);
1676 inp->inp_ip_tos = optval;
1678 t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe);
1685 t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt)
1690 if (sopt->sopt_name != TCP_CONGESTION &&
1691 sopt->sopt_name != TCP_NODELAY)
1692 return (EOPNOTSUPP);
1694 if (sopt->sopt_name == TCP_CONGESTION) {
1695 char name[TCP_CA_NAME_MAX];
1696 int optlen = sopt->sopt_valsize;
1699 if (sopt->sopt_dir == SOPT_GET) {
1700 KASSERT(0, ("unimplemented"));
1701 return (EOPNOTSUPP);
1707 err = copyinstr(sopt->sopt_val, name,
1708 min(TCP_CA_NAME_MAX - 1, optlen), &copied);
1714 tp = so_sototcpcb(so);
1716 * XXX I need to revisit this
1718 if ((err = t3_set_cong_control(so, name)) == 0) {
1719 #ifdef CONGESTION_CONTROL_SUPPORTED
1720 tp->t_cong_control = strdup(name, M_CXGB);
1729 if (sopt->sopt_dir == SOPT_GET)
1730 return (EOPNOTSUPP);
1732 err = sooptcopyin(sopt, &optval, sizeof optval,
1738 inp = so_sotoinpcb(so);
1740 tp = inp_inpcbtotcpcb(inp);
1742 oldval = tp->t_flags;
1744 tp->t_flags |= TF_NODELAY;
1746 tp->t_flags &= ~TF_NODELAY;
1750 if (oldval != tp->t_flags && (tp->t_toe != NULL))
1751 t3_set_nagle(tp->t_toe);
1759 t3_ctloutput(struct socket *so, struct sockopt *sopt)
1763 if (sopt->sopt_level != IPPROTO_TCP)
1764 err = t3_ip_ctloutput(so, sopt);
1766 err = t3_tcp_ctloutput(so, sopt);
1768 if (err != EOPNOTSUPP)
1771 return (tcp_ctloutput(so, sopt));
1775 * Returns true if we need to explicitly request RST when we receive new data
1776 * on an RX-closed connection.
1779 need_rst_on_excess_rx(const struct toepcb *toep)
1785 * Handles Rx data that arrives in a state where the socket isn't accepting
1789 handle_excess_rx(struct toepcb *toep, struct mbuf *m)
1792 if (need_rst_on_excess_rx(toep) &&
1793 !(toep->tp_flags & TP_ABORT_SHUTDOWN))
1794 t3_send_reset(toep);
1799 * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE)
1800 * by getting the DDP offset from the TCB.
1803 tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m)
1805 struct ddp_state *q = &toep->tp_ddp_state;
1806 struct ddp_buf_state *bsp;
1807 struct cpl_get_tcb_rpl *hdr;
1808 unsigned int ddp_offset;
1811 struct sockbuf *rcv;
1818 so = inp_inpcbtosocket(tp->t_inpcb);
1820 inp_lock_assert(tp->t_inpcb);
1821 rcv = so_sockbuf_rcv(so);
1824 /* Note that we only accout for CPL_GET_TCB issued by the DDP code.
1825 * We really need a cookie in order to dispatch the RPLs.
1829 /* It is a possible that a previous CPL already invalidated UBUF DDP
1830 * and moved the cur_buf idx and hence no further processing of this
1831 * skb is required. However, the app might be sleeping on
1832 * !q->get_tcb_count and we need to wake it up.
1834 if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) {
1835 int state = so_state_get(so);
1838 if (__predict_true((state & SS_NOFDREF) == 0))
1839 so_sorwakeup_locked(so);
1841 sockbuf_unlock(rcv);
1846 bsp = &q->buf_state[q->cur_buf];
1848 tcb = (__be64 *)(hdr + 1);
1849 if (q->cur_buf == 0) {
1850 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]);
1851 ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET);
1853 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]);
1854 ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET;
1856 ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET;
1857 m->m_cur_offset = bsp->cur_offset;
1858 bsp->cur_offset = ddp_offset;
1859 m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset;
1862 "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u",
1863 q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset);
1864 KASSERT(ddp_offset >= m->m_cur_offset,
1865 ("ddp_offset=%u less than cur_offset=%u",
1866 ddp_offset, m->m_cur_offset));
1870 unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx;
1872 t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]);
1873 ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS;
1875 t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]);
1876 rcv_nxt = t >> S_TCB_RCV_NXT;
1877 rcv_nxt &= M_TCB_RCV_NXT;
1879 t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]);
1880 rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET);
1881 rx_hdr_offset &= M_TCB_RX_HDR_OFFSET;
1883 T3_TRACE2(TIDTB(sk),
1884 "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x",
1885 ddp_flags, rcv_nxt - rx_hdr_offset);
1887 "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u",
1888 tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf);
1890 "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u",
1891 rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset);
1893 "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x",
1894 q->buf_state[0].flags, q->buf_state[1].flags);
1898 if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) {
1899 handle_excess_rx(toep, m);
1904 if ((int)m->m_pkthdr.len < 0) {
1905 t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len");
1908 if (bsp->flags & DDP_BF_NOCOPY) {
1911 "tcb_rpl_as_ddp_complete: CANCEL UBUF");
1913 if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1914 printk("!cancel_ubuf");
1915 t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf");
1918 m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1;
1919 bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA);
1921 } else if (bsp->flags & DDP_BF_NOFLIP) {
1923 m->m_ddp_flags = 1; /* always a kernel buffer */
1925 /* now HW buffer carries a user buffer */
1926 bsp->flags &= ~DDP_BF_NOFLIP;
1927 bsp->flags |= DDP_BF_NOCOPY;
1929 /* It is possible that the CPL_GET_TCB_RPL doesn't indicate
1930 * any new data in which case we're done. If in addition the
1931 * offset is 0, then there wasn't a completion for the kbuf
1932 * and we need to decrement the posted count.
1934 if (m->m_pkthdr.len == 0) {
1935 if (ddp_offset == 0) {
1937 bsp->flags |= DDP_BF_NODATA;
1939 sockbuf_unlock(rcv);
1944 sockbuf_unlock(rcv);
1946 /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP,
1947 * but it got here way late and nobody cares anymore.
1953 m->m_ddp_gl = (unsigned char *)bsp->gl;
1954 m->m_flags |= M_DDP;
1955 m->m_seq = tp->rcv_nxt;
1956 tp->rcv_nxt += m->m_pkthdr.len;
1957 tp->t_rcvtime = ticks;
1958 CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u",
1959 m->m_seq, q->cur_buf, m->m_pkthdr.len);
1960 if (m->m_pkthdr.len == 0) {
1961 q->user_ddp_pending = 0;
1966 state = so_state_get(so);
1967 if (__predict_true((state & SS_NOFDREF) == 0))
1968 so_sorwakeup_locked(so);
1970 sockbuf_unlock(rcv);
1974 * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code,
1975 * in that case they are similar to DDP completions.
1978 do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
1980 struct toepcb *toep = (struct toepcb *)ctx;
1982 /* OK if socket doesn't exist */
1984 printf("null toep in do_get_tcb_rpl\n");
1985 return (CPL_RET_BUF_DONE);
1988 inp_wlock(toep->tp_tp->t_inpcb);
1989 tcb_rpl_as_ddp_complete(toep, m);
1990 inp_wunlock(toep->tp_tp->t_inpcb);
1996 handle_ddp_data(struct toepcb *toep, struct mbuf *m)
1998 struct tcpcb *tp = toep->tp_tp;
2000 struct ddp_state *q;
2001 struct ddp_buf_state *bsp;
2002 struct cpl_rx_data *hdr = cplhdr(m);
2003 unsigned int rcv_nxt = ntohl(hdr->seq);
2004 struct sockbuf *rcv;
2006 if (tp->rcv_nxt == rcv_nxt)
2009 inp_lock_assert(tp->t_inpcb);
2010 so = inp_inpcbtosocket(tp->t_inpcb);
2011 rcv = so_sockbuf_rcv(so);
2014 q = &toep->tp_ddp_state;
2015 bsp = &q->buf_state[q->cur_buf];
2016 KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x",
2017 rcv_nxt, tp->rcv_nxt));
2018 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
2019 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2020 CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d",
2021 rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len);
2024 if ((int)m->m_pkthdr.len < 0) {
2025 t3_ddp_error(so, "handle_ddp_data: neg len");
2028 m->m_ddp_gl = (unsigned char *)bsp->gl;
2029 m->m_flags |= M_DDP;
2030 m->m_cur_offset = bsp->cur_offset;
2031 m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
2032 if (bsp->flags & DDP_BF_NOCOPY)
2033 bsp->flags &= ~DDP_BF_NOCOPY;
2035 m->m_seq = tp->rcv_nxt;
2036 tp->rcv_nxt = rcv_nxt;
2037 bsp->cur_offset += m->m_pkthdr.len;
2038 if (!(bsp->flags & DDP_BF_NOFLIP))
2041 * For now, don't re-enable DDP after a connection fell out of DDP
2044 q->ubuf_ddp_ready = 0;
2045 sockbuf_unlock(rcv);
2049 * Process new data received for a connection.
2052 new_rx_data(struct toepcb *toep, struct mbuf *m)
2054 struct cpl_rx_data *hdr = cplhdr(m);
2055 struct tcpcb *tp = toep->tp_tp;
2057 struct sockbuf *rcv;
2059 int len = be16toh(hdr->len);
2061 inp_wlock(tp->t_inpcb);
2063 so = inp_inpcbtosocket(tp->t_inpcb);
2065 if (__predict_false(so_no_receive(so))) {
2066 handle_excess_rx(toep, m);
2067 inp_wunlock(tp->t_inpcb);
2072 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP)
2073 handle_ddp_data(toep, m);
2075 m->m_seq = ntohl(hdr->seq);
2076 m->m_ulp_mode = 0; /* for iSCSI */
2079 if (__predict_false(m->m_seq != tp->rcv_nxt)) {
2081 "%s: TID %u: Bad sequence number %u, expected %u\n",
2082 toep->tp_toedev->name, toep->tp_tid, m->m_seq,
2085 inp_wunlock(tp->t_inpcb);
2089 m_adj(m, sizeof(*hdr));
2091 #ifdef URGENT_DATA_SUPPORTED
2093 * We don't handle urgent data yet
2095 if (__predict_false(hdr->urg))
2096 handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg));
2097 if (__predict_false(tp->urg_data == TCP_URG_NOTYET &&
2098 tp->urg_seq - tp->rcv_nxt < skb->len))
2099 tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq -
2102 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) {
2103 toep->tp_delack_mode = hdr->dack_mode;
2104 toep->tp_delack_seq = tp->rcv_nxt;
2106 CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d",
2107 m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes);
2109 if (len < m->m_pkthdr.len)
2110 m->m_pkthdr.len = m->m_len = len;
2112 tp->rcv_nxt += m->m_pkthdr.len;
2113 tp->t_rcvtime = ticks;
2114 toep->tp_enqueued_bytes += m->m_pkthdr.len;
2116 "new_rx_data: seq 0x%x len %u",
2117 m->m_seq, m->m_pkthdr.len);
2118 inp_wunlock(tp->t_inpcb);
2119 rcv = so_sockbuf_rcv(so);
2123 DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, rcv->sb_flags, m->m_pkthdr.len);
2129 * We're giving too many credits to the card - but disable this check so we can keep on moving :-|
2132 KASSERT(rcv->sb_cc < (rcv->sb_mbmax << 1),
2134 ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d",
2135 so, rcv->sb_cc, rcv->sb_mbmax));
2139 CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d",
2140 rcv->sb_cc, rcv->sb_mbcnt);
2142 state = so_state_get(so);
2143 if (__predict_true((state & SS_NOFDREF) == 0))
2144 so_sorwakeup_locked(so);
2146 sockbuf_unlock(rcv);
2150 * Handler for RX_DATA CPL messages.
2153 do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2155 struct toepcb *toep = (struct toepcb *)ctx;
2157 DPRINTF("rx_data len=%d\n", m->m_pkthdr.len);
2159 new_rx_data(toep, m);
2165 new_rx_data_ddp(struct toepcb *toep, struct mbuf *m)
2168 struct ddp_state *q;
2169 struct ddp_buf_state *bsp;
2170 struct cpl_rx_data_ddp *hdr;
2172 unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx;
2174 unsigned int delack_mode;
2175 struct sockbuf *rcv;
2178 inp_wlock(tp->t_inpcb);
2179 so = inp_inpcbtosocket(tp->t_inpcb);
2181 if (__predict_false(so_no_receive(so))) {
2183 handle_excess_rx(toep, m);
2184 inp_wunlock(tp->t_inpcb);
2188 q = &toep->tp_ddp_state;
2190 ddp_report = ntohl(hdr->u.ddp_report);
2191 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
2192 bsp = &q->buf_state[buf_idx];
2195 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u "
2196 "hdr seq 0x%x len %u",
2197 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq),
2200 "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d",
2201 G_DDP_OFFSET(ddp_report), ddp_report, buf_idx);
2203 ddp_len = ntohs(hdr->len);
2204 rcv_nxt = ntohl(hdr->seq) + ddp_len;
2206 delack_mode = G_DDP_DACK_MODE(ddp_report);
2207 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
2208 toep->tp_delack_mode = delack_mode;
2209 toep->tp_delack_seq = tp->rcv_nxt;
2212 m->m_seq = tp->rcv_nxt;
2213 tp->rcv_nxt = rcv_nxt;
2215 tp->t_rcvtime = ticks;
2217 * Store the length in m->m_len. We are changing the meaning of
2218 * m->m_len here, we need to be very careful that nothing from now on
2219 * interprets ->len of this packet the usual way.
2221 m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq;
2222 inp_wunlock(tp->t_inpcb);
2224 "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ",
2225 m->m_len, rcv_nxt, m->m_seq);
2227 * Figure out where the new data was placed in the buffer and store it
2228 * in when. Assumes the buffer offset starts at 0, consumer needs to
2229 * account for page pod's pg_offset.
2231 end_offset = G_DDP_OFFSET(ddp_report) + ddp_len;
2232 m->m_cur_offset = end_offset - m->m_pkthdr.len;
2234 rcv = so_sockbuf_rcv(so);
2237 m->m_ddp_gl = (unsigned char *)bsp->gl;
2238 m->m_flags |= M_DDP;
2239 bsp->cur_offset = end_offset;
2240 toep->tp_enqueued_bytes += m->m_pkthdr.len;
2243 * Length is only meaningful for kbuf
2245 if (!(bsp->flags & DDP_BF_NOCOPY))
2246 KASSERT(m->m_len <= bsp->gl->dgl_length,
2247 ("length received exceeds ddp pages: len=%d dgl_length=%d",
2248 m->m_len, bsp->gl->dgl_length));
2250 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2251 KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next));
2253 * Bit 0 of flags stores whether the DDP buffer is completed.
2254 * Note that other parts of the code depend on this being in bit 0.
2256 if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) {
2257 panic("spurious ddp completion");
2259 m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
2260 if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP))
2261 q->cur_buf ^= 1; /* flip buffers */
2264 if (bsp->flags & DDP_BF_NOCOPY) {
2265 m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY);
2266 bsp->flags &= ~DDP_BF_NOCOPY;
2269 if (ddp_report & F_DDP_PSH)
2270 m->m_ddp_flags |= DDP_BF_PSH;
2272 m->m_ddp_flags |= DDP_BF_NODATA;
2275 skb_reset_transport_header(skb);
2276 tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */
2280 if ((so_state_get(so) & SS_NOFDREF) == 0 && ((ddp_report & F_DDP_PSH) ||
2281 (((m->m_ddp_flags & (DDP_BF_NOCOPY|1)) == (DDP_BF_NOCOPY|1))
2282 || !(m->m_ddp_flags & DDP_BF_NOCOPY))))
2283 so_sorwakeup_locked(so);
2285 sockbuf_unlock(rcv);
2288 #define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\
2289 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\
2290 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\
2294 * Handler for RX_DATA_DDP CPL messages.
2297 do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2299 struct toepcb *toep = ctx;
2300 const struct cpl_rx_data_ddp *hdr = cplhdr(m);
2304 if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) {
2305 log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n",
2306 GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status)));
2307 return (CPL_RET_BUF_DONE);
2310 skb->h.th = tcphdr_skb->h.th;
2312 new_rx_data_ddp(toep, m);
2317 process_ddp_complete(struct toepcb *toep, struct mbuf *m)
2319 struct tcpcb *tp = toep->tp_tp;
2321 struct ddp_state *q;
2322 struct ddp_buf_state *bsp;
2323 struct cpl_rx_ddp_complete *hdr;
2324 unsigned int ddp_report, buf_idx, when, delack_mode;
2326 struct sockbuf *rcv;
2328 inp_wlock(tp->t_inpcb);
2329 so = inp_inpcbtosocket(tp->t_inpcb);
2331 if (__predict_false(so_no_receive(so))) {
2332 struct inpcb *inp = so_sotoinpcb(so);
2334 handle_excess_rx(toep, m);
2338 q = &toep->tp_ddp_state;
2340 ddp_report = ntohl(hdr->ddp_report);
2341 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
2342 m->m_pkthdr.csum_data = tp->rcv_nxt;
2344 rcv = so_sockbuf_rcv(so);
2347 bsp = &q->buf_state[buf_idx];
2348 when = bsp->cur_offset;
2349 m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when;
2350 tp->rcv_nxt += m->m_len;
2351 tp->t_rcvtime = ticks;
2353 delack_mode = G_DDP_DACK_MODE(ddp_report);
2354 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
2355 toep->tp_delack_mode = delack_mode;
2356 toep->tp_delack_seq = tp->rcv_nxt;
2359 skb_reset_transport_header(skb);
2360 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
2362 inp_wunlock(tp->t_inpcb);
2364 KASSERT(m->m_len >= 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2366 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
2367 "ddp_report 0x%x offset %u, len %u",
2368 tp->rcv_nxt, bsp->cur_offset, ddp_report,
2369 G_DDP_OFFSET(ddp_report), m->m_len);
2371 m->m_cur_offset = bsp->cur_offset;
2372 bsp->cur_offset += m->m_len;
2374 if (!(bsp->flags & DDP_BF_NOFLIP)) {
2375 q->cur_buf ^= 1; /* flip buffers */
2376 if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length)
2381 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
2382 "ddp_report %u offset %u",
2383 tp->rcv_nxt, bsp->cur_offset, ddp_report,
2384 G_DDP_OFFSET(ddp_report));
2386 m->m_ddp_gl = (unsigned char *)bsp->gl;
2387 m->m_flags |= M_DDP;
2388 m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1;
2389 if (bsp->flags & DDP_BF_NOCOPY)
2390 bsp->flags &= ~DDP_BF_NOCOPY;
2392 m->m_ddp_flags |= DDP_BF_NODATA;
2395 if ((so_state_get(so) & SS_NOFDREF) == 0)
2396 so_sorwakeup_locked(so);
2398 sockbuf_unlock(rcv);
2402 * Handler for RX_DDP_COMPLETE CPL messages.
2405 do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2407 struct toepcb *toep = ctx;
2411 skb->h.th = tcphdr_skb->h.th;
2413 process_ddp_complete(toep, m);
2418 * Move a socket to TIME_WAIT state. We need to make some adjustments to the
2419 * socket state before calling tcp_time_wait to comply with its expectations.
2422 enter_timewait(struct tcpcb *tp)
2425 * Bump rcv_nxt for the peer FIN. We don't do this at the time we
2426 * process peer_close because we don't want to carry the peer FIN in
2427 * the socket's receive queue and if we increment rcv_nxt without
2428 * having the FIN in the receive queue we'll confuse facilities such
2431 inp_wlock(tp->t_inpcb);
2434 tp->ts_recent_age = 0; /* defeat recycling */
2435 tp->t_srtt = 0; /* defeat tcp_update_metrics */
2436 inp_wunlock(tp->t_inpcb);
2437 tcp_offload_twstart(tp);
2441 * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This
2442 * function deals with the data that may be reported along with the FIN.
2443 * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to
2444 * perform normal FIN-related processing. In the latter case 1 indicates that
2445 * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the
2449 handle_peer_close_data(struct socket *so, struct mbuf *m)
2451 struct tcpcb *tp = so_sototcpcb(so);
2452 struct toepcb *toep = tp->t_toe;
2453 struct ddp_state *q;
2454 struct ddp_buf_state *bsp;
2455 struct cpl_peer_close *req = cplhdr(m);
2456 unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */
2457 struct sockbuf *rcv;
2459 if (tp->rcv_nxt == rcv_nxt) /* no data */
2462 CTR0(KTR_TOM, "handle_peer_close_data");
2463 if (__predict_false(so_no_receive(so))) {
2464 handle_excess_rx(toep, m);
2467 * Although we discard the data we want to process the FIN so
2468 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP +
2469 * PEER_CLOSE without data. In particular this PEER_CLOSE
2470 * may be what will close the connection. We return 1 because
2471 * handle_excess_rx() already freed the packet.
2476 inp_lock_assert(tp->t_inpcb);
2477 q = &toep->tp_ddp_state;
2478 rcv = so_sockbuf_rcv(so);
2481 bsp = &q->buf_state[q->cur_buf];
2482 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
2483 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2484 m->m_ddp_gl = (unsigned char *)bsp->gl;
2485 m->m_flags |= M_DDP;
2486 m->m_cur_offset = bsp->cur_offset;
2488 DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
2489 m->m_seq = tp->rcv_nxt;
2490 tp->rcv_nxt = rcv_nxt;
2491 bsp->cur_offset += m->m_pkthdr.len;
2492 if (!(bsp->flags & DDP_BF_NOFLIP))
2495 skb_reset_transport_header(skb);
2496 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
2498 tp->t_rcvtime = ticks;
2500 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
2501 so_sorwakeup_locked(so);
2503 sockbuf_unlock(rcv);
2509 * Handle a peer FIN.
2512 do_peer_fin(struct toepcb *toep, struct mbuf *m)
2515 struct tcpcb *tp = toep->tp_tp;
2519 CTR1(KTR_TOM, "do_peer_fin state=%d", tp->t_state);
2520 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
2521 printf("abort_pending set\n");
2525 inp_wlock(tp->t_inpcb);
2526 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
2527 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) {
2528 keep = handle_peer_close_data(so, m);
2530 inp_wunlock(tp->t_inpcb);
2534 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
2536 "waking up waiters for cantrcvmore on %p ", so);
2540 * If connection is half-synchronized
2541 * (ie NEEDSYN flag on) then delay ACK,
2542 * so it may be piggybacked when SYN is sent.
2543 * Otherwise, since we received a FIN then no
2544 * more input can be expected, send ACK now.
2546 if (tp->t_flags & TF_NEEDSYN)
2547 tp->t_flags |= TF_DELACK;
2549 tp->t_flags |= TF_ACKNOW;
2553 switch (tp->t_state) {
2554 case TCPS_SYN_RECEIVED:
2555 tp->t_starttime = ticks;
2557 case TCPS_ESTABLISHED:
2558 tp->t_state = TCPS_CLOSE_WAIT;
2560 case TCPS_FIN_WAIT_1:
2561 tp->t_state = TCPS_CLOSING;
2563 case TCPS_FIN_WAIT_2:
2565 * If we've sent an abort_req we must have sent it too late,
2566 * HW will send us a reply telling us so, and this peer_close
2567 * is really the last message for this connection and needs to
2568 * be treated as an abort_rpl, i.e., transition the connection
2569 * to TCP_CLOSE (note that the host stack does this at the
2570 * time of generating the RST but we must wait for HW).
2571 * Otherwise we enter TIME_WAIT.
2573 t3_release_offload_resources(toep);
2574 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
2577 action = TCP_TIMEWAIT;
2582 "%s: TID %u received PEER_CLOSE in bad state %d\n",
2583 toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state);
2585 inp_wunlock(tp->t_inpcb);
2587 if (action == TCP_TIMEWAIT) {
2589 } else if (action == TCP_DROP) {
2590 tcp_offload_drop(tp, 0);
2591 } else if (action == TCP_CLOSE) {
2592 tcp_offload_close(tp);
2596 /* Do not send POLL_HUP for half duplex close. */
2597 if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
2598 sk->sk_state == TCP_CLOSE)
2599 sk_wake_async(so, 1, POLL_HUP);
2601 sk_wake_async(so, 1, POLL_IN);
2610 * Handler for PEER_CLOSE CPL messages.
2613 do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2615 struct toepcb *toep = (struct toepcb *)ctx;
2619 do_peer_fin(toep, m);
2624 process_close_con_rpl(struct toepcb *toep, struct mbuf *m)
2626 struct cpl_close_con_rpl *rpl = cplhdr(m);
2627 struct tcpcb *tp = toep->tp_tp;
2630 struct sockbuf *rcv;
2632 inp_wlock(tp->t_inpcb);
2633 so = inp_inpcbtosocket(tp->t_inpcb);
2635 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */
2637 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
2638 inp_wunlock(tp->t_inpcb);
2642 CTR3(KTR_TOM, "process_close_con_rpl(%p) state=%d dead=%d", toep,
2643 tp->t_state, !!(so_state_get(so) & SS_NOFDREF));
2645 switch (tp->t_state) {
2646 case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */
2647 t3_release_offload_resources(toep);
2648 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
2652 action = TCP_TIMEWAIT;
2657 * In this state we don't care about pending abort_rpl.
2658 * If we've sent abort_req it was post-close and was sent too
2659 * late, this close_con_rpl is the actual last message.
2661 t3_release_offload_resources(toep);
2664 case TCPS_FIN_WAIT_1:
2666 * If we can't receive any more
2667 * data, then closing user can proceed.
2668 * Starting the timer is contrary to the
2669 * specification, but if we don't get a FIN
2670 * we'll hang forever.
2673 * we should release the tp also, and use a
2677 rcv = so_sockbuf_rcv(so);
2681 if (rcv->sb_state & SBS_CANTRCVMORE) {
2685 soisdisconnected(so);
2686 timeout = (tcp_fast_finwait2_recycle) ?
2687 tcp_finwait2_timeout : tcp_maxidle;
2688 tcp_timer_activate(tp, TT_2MSL, timeout);
2690 tp->t_state = TCPS_FIN_WAIT_2;
2691 if ((so_options_get(so) & SO_LINGER) && so_linger_get(so) == 0 &&
2692 (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) {
2699 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
2700 toep->tp_toedev->tod_name, toep->tp_tid,
2703 inp_wunlock(tp->t_inpcb);
2706 if (action == TCP_TIMEWAIT) {
2708 } else if (action == TCP_DROP) {
2709 tcp_offload_drop(tp, 0);
2710 } else if (action == TCP_CLOSE) {
2711 tcp_offload_close(tp);
2718 * Handler for CLOSE_CON_RPL CPL messages.
2721 do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m,
2724 struct toepcb *toep = (struct toepcb *)ctx;
2726 process_close_con_rpl(toep, m);
2731 * Process abort replies. We only process these messages if we anticipate
2732 * them as the coordination between SW and HW in this area is somewhat lacking
2733 * and sometimes we get ABORT_RPLs after we are done with the connection that
2734 * originated the ABORT_REQ.
2737 process_abort_rpl(struct toepcb *toep, struct mbuf *m)
2739 struct tcpcb *tp = toep->tp_tp;
2744 T3_TRACE1(TIDTB(sk),
2745 "process_abort_rpl: GTS rpl pending %d",
2746 sock_flag(sk, ABORT_RPL_PENDING));
2749 inp_wlock(tp->t_inpcb);
2750 so = inp_inpcbtosocket(tp->t_inpcb);
2752 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
2754 * XXX panic on tcpdrop
2756 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(toep->tp_toedev))
2757 toep->tp_flags |= TP_ABORT_RPL_RCVD;
2759 toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING);
2760 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) ||
2761 !is_t3a(toep->tp_toedev)) {
2762 if (toep->tp_flags & TP_ABORT_REQ_RCVD)
2763 panic("TP_ABORT_REQ_RCVD set");
2764 t3_release_offload_resources(toep);
2769 inp_wunlock(tp->t_inpcb);
2772 tcp_offload_close(tp);
2778 * Handle an ABORT_RPL_RSS CPL message.
2781 do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2783 struct cpl_abort_rpl_rss *rpl = cplhdr(m);
2784 struct toepcb *toep;
2787 * Ignore replies to post-close aborts indicating that the abort was
2788 * requested too late. These connections are terminated when we get
2789 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss
2790 * arrives the TID is either no longer used or it has been recycled.
2792 if (rpl->status == CPL_ERR_ABORT_FAILED) {
2798 toep = (struct toepcb *)ctx;
2801 * Sometimes we've already closed the socket, e.g., a post-close
2802 * abort races with ABORT_REQ_RSS, the latter frees the socket
2803 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED,
2804 * but FW turns the ABORT_REQ into a regular one and so we get
2805 * ABORT_RPL_RSS with status 0 and no socket. Only on T3A.
2810 if (toep->tp_tp == NULL) {
2811 log(LOG_NOTICE, "removing tid for abort\n");
2812 cxgb_remove_tid(cdev, toep, toep->tp_tid);
2814 l2t_release(L2DATA(cdev), toep->tp_l2t);
2816 toepcb_release(toep);
2820 log(LOG_NOTICE, "toep=%p\n", toep);
2821 log(LOG_NOTICE, "tp=%p\n", toep->tp_tp);
2824 process_abort_rpl(toep, m);
2825 toepcb_release(toep);
2830 * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also
2831 * indicate whether RST should be sent in response.
2834 abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst)
2836 struct tcpcb *tp = so_sototcpcb(so);
2838 switch (abort_reason) {
2839 case CPL_ERR_BAD_SYN:
2841 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through
2843 case CPL_ERR_CONN_RESET:
2844 // XXX need to handle SYN_RECV due to crossed SYNs
2845 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
2846 case CPL_ERR_XMIT_TIMEDOUT:
2847 case CPL_ERR_PERSIST_TIMEDOUT:
2848 case CPL_ERR_FINWAIT2_TIMEDOUT:
2849 case CPL_ERR_KEEPALIVE_TIMEDOUT:
2851 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
2860 set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd)
2862 struct cpl_abort_rpl *rpl = cplhdr(m);
2864 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
2865 rpl->wr.wr_lo = htonl(V_WR_TID(tid));
2866 m->m_len = m->m_pkthdr.len = sizeof(*rpl);
2868 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
2873 send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m)
2875 struct mbuf *reply_mbuf;
2876 struct cpl_abort_req_rss *req = cplhdr(m);
2878 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl));
2879 m_set_priority(m, CPL_PRIORITY_DATA);
2880 m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl);
2881 set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status);
2882 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
2887 * Returns whether an ABORT_REQ_RSS message is a negative advice.
2890 is_neg_adv_abort(unsigned int status)
2892 return status == CPL_ERR_RTX_NEG_ADVICE ||
2893 status == CPL_ERR_PERSIST_NEG_ADVICE;
2897 send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status)
2899 struct mbuf *reply_mbuf;
2900 struct cpl_abort_req_rss *req = cplhdr(m);
2902 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
2905 /* Defer the reply. Stick rst_status into req->cmd. */
2906 req->status = rst_status;
2907 t3_defer_reply(m, tdev, send_deferred_abort_rpl);
2911 m_set_priority(reply_mbuf, CPL_PRIORITY_DATA);
2912 set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status);
2916 * XXX need to sync with ARP as for SYN_RECV connections we can send
2917 * these messages while ARP is pending. For other connection states
2918 * it's not a problem.
2920 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
2925 cleanup_syn_rcv_conn(struct socket *child, struct socket *parent)
2927 CXGB_UNIMPLEMENTED();
2929 struct request_sock *req = child->sk_user_data;
2931 inet_csk_reqsk_queue_removed(parent, req);
2932 synq_remove(tcp_sk(child));
2934 child->sk_user_data = NULL;
2940 * Performs the actual work to abort a SYN_RECV connection.
2943 do_abort_syn_rcv(struct socket *child, struct socket *parent)
2945 struct tcpcb *parenttp = so_sototcpcb(parent);
2946 struct tcpcb *childtp = so_sototcpcb(child);
2949 * If the server is still open we clean up the child connection,
2950 * otherwise the server already did the clean up as it was purging
2951 * its SYN queue and the skb was just sitting in its backlog.
2953 if (__predict_false(parenttp->t_state == TCPS_LISTEN)) {
2954 cleanup_syn_rcv_conn(child, parent);
2955 inp_wlock(childtp->t_inpcb);
2956 t3_release_offload_resources(childtp->t_toe);
2957 inp_wunlock(childtp->t_inpcb);
2958 tcp_offload_close(childtp);
2964 * Handle abort requests for a SYN_RECV connection. These need extra work
2965 * because the socket is on its parent's SYN queue.
2968 abort_syn_rcv(struct socket *so, struct mbuf *m)
2970 CXGB_UNIMPLEMENTED();
2972 struct socket *parent;
2973 struct toedev *tdev = toep->tp_toedev;
2974 struct t3cdev *cdev = TOM_DATA(tdev)->cdev;
2975 struct socket *oreq = so->so_incomp;
2976 struct t3c_tid_entry *t3c_stid;
2980 return -1; /* somehow we are not on the SYN queue */
2982 t = &(T3C_DATA(cdev))->tid_maps;
2983 t3c_stid = lookup_stid(t, oreq->ts_recent);
2984 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
2987 do_abort_syn_rcv(so, parent);
2988 send_abort_rpl(m, tdev, CPL_ABORT_NO_RST);
2995 * Process abort requests. If we are waiting for an ABORT_RPL we ignore this
2996 * request except that we need to reply to it.
2999 process_abort_req(struct toepcb *toep, struct mbuf *m, struct toedev *tdev)
3001 int rst_status = CPL_ABORT_NO_RST;
3002 const struct cpl_abort_req_rss *req = cplhdr(m);
3003 struct tcpcb *tp = toep->tp_tp;
3007 inp_wlock(tp->t_inpcb);
3008 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
3009 if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) {
3010 toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN);
3015 toep->tp_flags &= ~TP_ABORT_REQ_RCVD;
3017 * Three cases to consider:
3018 * a) We haven't sent an abort_req; close the connection.
3019 * b) We have sent a post-close abort_req that will get to TP too late
3020 * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will
3021 * be ignored and the connection should be closed now.
3022 * c) We have sent a regular abort_req that will get to TP too late.
3023 * That will generate an abort_rpl with status 0, wait for it.
3025 if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) ||
3026 (is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) {
3029 error = abort_status_to_errno(so, req->status,
3031 so_error_set(so, error);
3033 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
3036 * SYN_RECV needs special processing. If abort_syn_rcv()
3037 * returns 0 is has taken care of the abort.
3039 if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m))
3042 t3_release_offload_resources(toep);
3045 inp_wunlock(tp->t_inpcb);
3048 tcp_offload_close(tp);
3050 send_abort_rpl(m, tdev, rst_status);
3053 inp_wunlock(tp->t_inpcb);
3057 * Handle an ABORT_REQ_RSS CPL message.
3060 do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3062 const struct cpl_abort_req_rss *req = cplhdr(m);
3063 struct toepcb *toep = (struct toepcb *)ctx;
3065 if (is_neg_adv_abort(req->status)) {
3070 log(LOG_NOTICE, "aborting tid=%d\n", toep->tp_tid);
3072 if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) {
3073 cxgb_remove_tid(cdev, toep, toep->tp_tid);
3074 toep->tp_flags |= TP_ABORT_REQ_RCVD;
3076 send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST);
3078 l2t_release(L2DATA(cdev), toep->tp_l2t);
3083 toep->tp_tp->t_toe = NULL;
3084 toep->tp_tp->t_flags &= ~TF_TOE;
3087 * XXX need to call syncache_chkrst - but we don't
3088 * have a way of doing that yet
3090 toepcb_release(toep);
3091 log(LOG_ERR, "abort for unestablished connection :-(\n");
3094 if (toep->tp_tp == NULL) {
3095 log(LOG_NOTICE, "disconnected toepcb\n");
3096 /* should be freed momentarily */
3102 process_abort_req(toep, m, toep->tp_toedev);
3103 toepcb_release(toep);
3108 pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m)
3110 struct toedev *tdev = TOE_DEV(parent);
3112 do_abort_syn_rcv(child, parent);
3113 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) {
3114 struct cpl_pass_accept_rpl *rpl = cplhdr(m);
3116 rpl->opt0h = htonl(F_TCAM_BYPASS);
3117 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
3118 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
3124 handle_pass_open_arp_failure(struct socket *so, struct mbuf *m)
3126 CXGB_UNIMPLEMENTED();
3129 struct t3cdev *cdev;
3130 struct socket *parent;
3131 struct socket *oreq;
3132 struct t3c_tid_entry *t3c_stid;
3134 struct tcpcb *otp, *tp = so_sototcpcb(so);
3135 struct toepcb *toep = tp->t_toe;
3138 * If the connection is being aborted due to the parent listening
3139 * socket going away there's nothing to do, the ABORT_REQ will close
3142 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
3147 oreq = so->so_incomp;
3148 otp = so_sototcpcb(oreq);
3151 t = &(T3C_DATA(cdev))->tid_maps;
3152 t3c_stid = lookup_stid(t, otp->ts_recent);
3153 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
3156 pass_open_abort(so, parent, m);
3162 * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly
3163 * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV
3167 pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m)
3171 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
3172 BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk);
3174 handle_pass_open_arp_failure(m_get_socket(m), m);
3178 * Populate a reject CPL_PASS_ACCEPT_RPL WR.
3181 mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf)
3183 struct cpl_pass_accept_req *req = cplhdr(req_mbuf);
3184 struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf);
3185 unsigned int tid = GET_TID(req);
3187 m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP);
3188 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3189 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
3190 rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet
3191 rpl->opt0h = htonl(F_TCAM_BYPASS);
3192 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
3194 rpl->rsvd = rpl->opt2; /* workaround for HW bug */
3198 * Send a deferred reject to an accept request.
3201 reject_pass_request(struct toedev *tdev, struct mbuf *m)
3203 struct mbuf *reply_mbuf;
3205 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl));
3206 mk_pass_accept_rpl(reply_mbuf, m);
3207 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
3212 handle_syncache_event(int event, void *arg)
3214 struct toepcb *toep = arg;
3217 case TOE_SC_ENTRY_PRESENT:
3219 * entry already exists - free toepcb
3222 printf("syncache entry present\n");
3223 toepcb_release(toep);
3227 * The syncache has given up on this entry
3228 * either it timed out, or it was evicted
3229 * we need to explicitly release the tid
3231 printf("syncache entry dropped\n");
3232 toepcb_release(toep);
3235 log(LOG_ERR, "unknown syncache event %d\n", event);
3241 syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep)
3243 struct in_conninfo inc;
3247 int mss, wsf, sack, ts;
3248 uint32_t rcv_isn = ntohl(req->rcv_isn);
3250 bzero(&toeo, sizeof(struct toeopt));
3251 inp = so_sotoinpcb(lso);
3254 * Fill out information for entering us into the syncache
3256 bzero(&inc, sizeof(inc));
3257 inc.inc_fport = th.th_sport = req->peer_port;
3258 inc.inc_lport = th.th_dport = req->local_port;
3259 th.th_seq = req->rcv_isn;
3260 th.th_flags = TH_SYN;
3262 toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1;
3265 inc.inc_faddr.s_addr = req->peer_ip;
3266 inc.inc_laddr.s_addr = req->local_ip;
3268 DPRINTF("syncache add of %d:%d %d:%d\n",
3269 ntohl(req->local_ip), ntohs(req->local_port),
3270 ntohl(req->peer_ip), ntohs(req->peer_port));
3272 mss = req->tcp_options.mss;
3273 wsf = req->tcp_options.wsf;
3274 ts = req->tcp_options.tstamp;
3275 sack = req->tcp_options.sack;
3277 toeo.to_wscale = wsf;
3278 toeo.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
3279 tcp_offload_syncache_add(&inc, &toeo, &th, inp, &lso, &cxgb_toe_usrreqs,
3285 * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket
3286 * lock held. Note that the sock here is a listening socket that is not owned
3290 process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev,
3291 struct listen_ctx *lctx)
3294 struct l2t_entry *e;
3296 struct mbuf *reply_mbuf, *ddp_mbuf = NULL;
3297 struct cpl_pass_accept_rpl *rpl;
3298 struct cpl_pass_accept_req *req = cplhdr(m);
3299 unsigned int tid = GET_TID(req);
3300 struct tom_data *d = TOM_DATA(tdev);
3301 struct t3cdev *cdev = d->cdev;
3302 struct tcpcb *tp = so_sototcpcb(so);
3303 struct toepcb *newtoep;
3304 struct rtentry *dst;
3305 struct sockaddr_in nam;
3306 struct t3c_data *td = T3C_DATA(cdev);
3308 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
3309 if (__predict_false(reply_mbuf == NULL)) {
3310 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
3311 t3_defer_reply(m, tdev, reject_pass_request);
3313 cxgb_queue_tid_release(cdev, tid);
3316 DPRINTF("failed to get reply_mbuf\n");
3321 if (tp->t_state != TCPS_LISTEN) {
3322 DPRINTF("socket not in listen state\n");
3327 tim.mac_addr = req->dst_mac;
3328 tim.vlan_tag = ntohs(req->vlan_tag);
3329 if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
3330 DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n");
3336 * XXX do route lookup to confirm that we're still listening on this
3339 if (ip_route_input(skb, req->local_ip, req->peer_ip,
3340 G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev))
3342 rt_flags = ((struct rtable *)skb->dst)->rt_flags &
3343 (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL);
3344 dst_release(skb->dst); // done with the input route, release it
3347 if ((rt_flags & RTF_LOCAL) == 0)
3353 rt_flags = RTF_LOCAL;
3354 if ((rt_flags & RTF_LOCAL) == 0)
3358 * Calculate values and add to syncache
3361 newtoep = toepcb_alloc();
3362 if (newtoep == NULL)
3365 bzero(&nam, sizeof(struct sockaddr_in));
3367 nam.sin_len = sizeof(struct sockaddr_in);
3368 nam.sin_family = AF_INET;
3369 nam.sin_addr.s_addr =req->peer_ip;
3370 dst = rtalloc2((struct sockaddr *)&nam, 1, 0);
3373 printf("failed to find route\n");
3376 e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev,
3377 (struct sockaddr *)&nam);
3379 DPRINTF("failed to get l2t\n");
3382 * Point to our listen socket until accept
3384 newtoep->tp_tp = tp;
3385 newtoep->tp_flags = TP_SYN_RCVD;
3386 newtoep->tp_tid = tid;
3387 newtoep->tp_toedev = tdev;
3388 tp->rcv_wnd = select_rcv_wnd(tdev, so);
3390 cxgb_insert_tid(cdev, d->client, newtoep, tid);
3392 LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry);
3395 newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
3396 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
3398 if (newtoep->tp_ulp_mode) {
3399 ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
3401 if (ddp_mbuf == NULL)
3402 newtoep->tp_ulp_mode = 0;
3405 CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d",
3406 TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode);
3407 set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
3409 * XXX workaround for lack of syncache drop
3411 toepcb_hold(newtoep);
3412 syncache_add_accept_req(req, so, newtoep);
3414 rpl = cplhdr(reply_mbuf);
3415 reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl);
3416 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3418 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
3419 rpl->opt2 = htonl(calc_opt2(so, tdev));
3420 rpl->rsvd = rpl->opt2; /* workaround for HW bug */
3421 rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten
3423 rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) |
3424 V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx));
3425 rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) |
3426 CPL_PASS_OPEN_ACCEPT);
3428 DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status);
3430 m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep));
3432 l2t_send(cdev, reply_mbuf, e);
3434 if (newtoep->tp_ulp_mode) {
3435 __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS,
3437 TP_DDP_TIMER_WORKAROUND_MASK,
3439 TP_DDP_TIMER_WORKAROUND_VAL, 1);
3441 DPRINTF("no DDP\n");
3445 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
3446 mk_pass_accept_rpl(reply_mbuf, m);
3448 mk_tid_release(reply_mbuf, newtoep, tid);
3449 cxgb_ofld_send(cdev, reply_mbuf);
3453 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
3460 * Handle a CPL_PASS_ACCEPT_REQ message.
3463 do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3465 struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
3466 struct socket *lso = listen_ctx->lso; /* XXX need an interlock against the listen socket going away */
3467 struct tom_data *d = listen_ctx->tom_data;
3470 struct cpl_pass_accept_req *req = cplhdr(m);
3471 unsigned int tid = GET_TID(req);
3472 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
3474 if (unlikely(!lsk)) {
3475 printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n",
3477 (unsigned long)((union listen_entry *)ctx -
3479 return CPL_RET_BUF_DONE;
3481 if (unlikely(tid >= t->ntids)) {
3482 printk(KERN_ERR "%s: passive open TID %u too large\n",
3484 return CPL_RET_BUF_DONE;
3487 * For T3A the current user of the TID may have closed but its last
3488 * message(s) may have been backlogged so the TID appears to be still
3489 * in use. Just take the TID away, the connection can close at its
3490 * own leisure. For T3B this situation is a bug.
3492 if (!valid_new_tid(t, tid) &&
3493 cdev->type != T3A) {
3494 printk(KERN_ERR "%s: passive open uses existing TID %u\n",
3496 return CPL_RET_BUF_DONE;
3500 process_pass_accept_req(lso, m, &d->tdev, listen_ctx);
3505 * Called when a connection is established to translate the TCP options
3506 * reported by HW to FreeBSD's native format.
3509 assign_rxopt(struct socket *so, unsigned int opt)
3511 struct tcpcb *tp = so_sototcpcb(so);
3512 struct toepcb *toep = tp->t_toe;
3513 const struct t3c_data *td = T3C_DATA(TOEP_T3C_DEV(toep));
3515 inp_lock_assert(tp->t_inpcb);
3517 toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40;
3518 tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0;
3519 tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0;
3520 tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0;
3521 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
3522 (TF_RCVD_SCALE|TF_REQ_SCALE))
3523 tp->rcv_scale = tp->request_r_scale;
3527 * Completes some final bits of initialization for just established connections
3528 * and changes their state to TCP_ESTABLISHED.
3530 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
3533 make_established(struct socket *so, u32 snd_isn, unsigned int opt)
3535 struct tcpcb *tp = so_sototcpcb(so);
3536 struct toepcb *toep = tp->t_toe;
3538 toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn;
3539 assign_rxopt(so, opt);
3546 so->so_proto->pr_ctloutput = t3_ctloutput;
3550 inet_sk(sk)->id = tp->write_seq ^ jiffies;
3553 * XXX not clear what rcv_wup maps to
3556 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't
3557 * pass through opt0.
3559 if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10))
3560 toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10);
3566 * no clean interface for marking ARP up to date
3568 dst_confirm(sk->sk_dst_cache);
3570 tp->t_starttime = ticks;
3571 tp->t_state = TCPS_ESTABLISHED;
3576 syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep)
3579 struct in_conninfo inc;
3582 int mss, wsf, sack, ts;
3583 struct mbuf *m = NULL;
3584 const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev);
3588 #error "no MAC support"
3591 opt = ntohs(req->tcp_opt);
3593 bzero(&toeo, sizeof(struct toeopt));
3596 * Fill out information for entering us into the syncache
3598 bzero(&inc, sizeof(inc));
3599 inc.inc_fport = th.th_sport = req->peer_port;
3600 inc.inc_lport = th.th_dport = req->local_port;
3601 th.th_seq = req->rcv_isn;
3602 th.th_flags = TH_ACK;
3605 inc.inc_faddr.s_addr = req->peer_ip;
3606 inc.inc_laddr.s_addr = req->local_ip;
3608 mss = td->mtus[G_TCPOPT_MSS(opt)] - 40;
3609 wsf = G_TCPOPT_WSCALE_OK(opt);
3610 ts = G_TCPOPT_TSTAMP(opt);
3611 sack = G_TCPOPT_SACK(opt);
3614 toeo.to_wscale = G_TCPOPT_SND_WSCALE(opt);
3615 toeo.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
3617 DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n",
3618 ntohl(req->local_ip), ntohs(req->local_port),
3619 ntohl(req->peer_ip), ntohs(req->peer_port),
3620 mss, wsf, ts, sack);
3621 return tcp_offload_syncache_expand(&inc, &toeo, &th, so, m);
3626 * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work
3627 * if we are in TCP_SYN_RECV due to crossed SYNs
3630 do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3632 struct cpl_pass_establish *req = cplhdr(m);
3633 struct toepcb *toep = (struct toepcb *)ctx;
3634 struct tcpcb *tp = toep->tp_tp;
3635 struct socket *so, *lso;
3636 struct t3c_data *td = T3C_DATA(cdev);
3637 struct sockbuf *snd, *rcv;
3639 // Complete socket initialization now that we have the SND_ISN
3641 struct toedev *tdev;
3644 tdev = toep->tp_toedev;
3646 inp_wlock(tp->t_inpcb);
3650 * XXX need to add reference while we're manipulating
3652 so = lso = inp_inpcbtosocket(tp->t_inpcb);
3654 inp_wunlock(tp->t_inpcb);
3657 LIST_REMOVE(toep, synq_entry);
3660 if (!syncache_expand_establish_req(req, &so, toep)) {
3664 CXGB_UNIMPLEMENTED();
3668 * Couldn't create the socket
3670 CXGB_UNIMPLEMENTED();
3673 tp = so_sototcpcb(so);
3674 inp_wlock(tp->t_inpcb);
3676 snd = so_sockbuf_snd(so);
3677 rcv = so_sockbuf_rcv(so);
3679 snd->sb_flags |= SB_NOCOALESCE;
3680 rcv->sb_flags |= SB_NOCOALESCE;
3685 reset_wr_list(toep);
3686 tp->rcv_wnd = select_rcv_wnd(tdev, so);
3687 tp->rcv_nxt = toep->tp_copied_seq;
3688 install_offload_ops(so);
3690 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
3691 toep->tp_wr_unacked = 0;
3692 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
3693 toep->tp_qset_idx = 0;
3694 toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu);
3697 * XXX Cancel any keep alive timer
3700 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
3703 * XXX workaround for lack of syncache drop
3705 toepcb_release(toep);
3706 inp_wunlock(tp->t_inpcb);
3708 CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid);
3709 cxgb_log_tcb(cdev->adapter, toep->tp_tid);
3712 * XXX not sure how these checks map to us
3714 if (unlikely(sk->sk_socket)) { // simultaneous opens only
3715 sk->sk_state_change(sk);
3716 sk_wake_async(so, 0, POLL_OUT);
3719 * The state for the new connection is now up to date.
3720 * Next check if we should add the connection to the parent's
3721 * accept queue. When the parent closes it resets connections
3722 * on its SYN queue, so check if we are being reset. If so we
3723 * don't need to do anything more, the coming ABORT_RPL will
3724 * destroy this socket. Otherwise move the connection to the
3727 * Note that we reset the synq before closing the server so if
3728 * we are not being reset the stid is still open.
3730 if (unlikely(!tp->forward_skb_hint)) { // removed from synq
3741 * Fill in the right TID for CPL messages waiting in the out-of-order queue
3742 * and send them to the TOE.
3745 fixup_and_send_ofo(struct toepcb *toep)
3748 struct toedev *tdev = toep->tp_toedev;
3749 struct tcpcb *tp = toep->tp_tp;
3750 unsigned int tid = toep->tp_tid;
3752 log(LOG_NOTICE, "fixup_and_send_ofo\n");
3754 inp_lock_assert(tp->t_inpcb);
3755 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) {
3757 * A variety of messages can be waiting but the fields we'll
3758 * be touching are common to all so any message type will do.
3760 struct cpl_close_con_req *p = cplhdr(m);
3762 p->wr.wr_lo = htonl(V_WR_TID(tid));
3763 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid));
3764 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
3769 * Updates socket state from an active establish CPL message. Runs with the
3773 socket_act_establish(struct socket *so, struct mbuf *m)
3775 struct cpl_act_establish *req = cplhdr(m);
3776 u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */
3777 struct tcpcb *tp = so_sototcpcb(so);
3778 struct toepcb *toep = tp->t_toe;
3780 if (__predict_false(tp->t_state != TCPS_SYN_SENT))
3781 log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n",
3782 toep->tp_tid, tp->t_state);
3784 tp->ts_recent_age = ticks;
3785 tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn;
3786 toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs;
3788 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
3791 * Now that we finally have a TID send any CPL messages that we had to
3792 * defer for lack of a TID.
3794 if (mbufq_len(&toep->out_of_order_queue))
3795 fixup_and_send_ofo(toep);
3797 if (__predict_false(so_state_get(so) & SS_NOFDREF)) {
3799 * XXX does this even make sense?
3806 * XXX assume no write requests permitted while socket connection is
3810 * Currently the send queue must be empty at this point because the
3811 * socket layer does not send anything before a connection is
3812 * established. To be future proof though we handle the possibility
3813 * that there are pending buffers to send (either TX_DATA or
3814 * CLOSE_CON_REQ). First we need to adjust the sequence number of the
3815 * buffers according to the just learned write_seq, and then we send
3816 * them on their way.
3818 fixup_pending_writeq_buffers(sk);
3819 if (t3_push_frames(so, 1))
3820 sk->sk_write_space(sk);
3823 toep->tp_state = tp->t_state;
3824 KMOD_TCPSTAT_INC(tcps_connects);
3829 * Process a CPL_ACT_ESTABLISH message.
3832 do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3834 struct cpl_act_establish *req = cplhdr(m);
3835 unsigned int tid = GET_TID(req);
3836 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
3837 struct toepcb *toep = (struct toepcb *)ctx;
3838 struct tcpcb *tp = toep->tp_tp;
3840 struct toedev *tdev;
3844 free_atid(cdev, atid);
3847 inp_wlock(tp->t_inpcb);
3852 so = inp_inpcbtosocket(tp->t_inpcb);
3853 tdev = toep->tp_toedev; /* blow up here if link was down */
3857 * It's OK if the TID is currently in use, the owning socket may have
3858 * backlogged its last CPL message(s). Just take it away.
3862 so_insert_tid(d, toep, tid);
3863 free_atid(cdev, atid);
3864 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
3866 socket_act_establish(so, m);
3867 inp_wunlock(tp->t_inpcb);
3868 CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid);
3869 cxgb_log_tcb(cdev->adapter, toep->tp_tid);
3875 * Process an acknowledgment of WR completion. Advance snd_una and send the
3876 * next batch of work requests from the write queue.
3879 wr_ack(struct toepcb *toep, struct mbuf *m)
3881 struct tcpcb *tp = toep->tp_tp;
3882 struct cpl_wr_ack *hdr = cplhdr(m);
3884 unsigned int credits = ntohs(hdr->credits);
3885 u32 snd_una = ntohl(hdr->snd_una);
3887 struct sockbuf *snd;
3889 CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits);
3891 inp_wlock(tp->t_inpcb);
3892 so = inp_inpcbtosocket(tp->t_inpcb);
3893 toep->tp_wr_avail += credits;
3894 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail)
3895 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail;
3898 struct mbuf *p = peek_wr(toep);
3900 if (__predict_false(!p)) {
3901 log(LOG_ERR, "%u WR_ACK credits for TID %u with "
3902 "nothing pending, state %u wr_avail=%u\n",
3903 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail);
3907 "wr_ack: p->credits=%d p->bytes=%d",
3908 p->m_pkthdr.csum_data, p->m_pkthdr.len);
3909 KASSERT(p->m_pkthdr.csum_data != 0,
3910 ("empty request still on list"));
3912 if (__predict_false(credits < p->m_pkthdr.csum_data)) {
3915 struct tx_data_wr *w = cplhdr(p);
3917 "TID %u got %u WR credits, need %u, len %u, "
3918 "main body %u, frags %u, seq # %u, ACK una %u,"
3919 " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n",
3920 toep->tp_tid, credits, p->csum, p->len,
3921 p->len - p->data_len, skb_shinfo(p)->nr_frags,
3922 ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt),
3923 toep->tp_wr_avail, count_pending_wrs(tp) - credits);
3925 p->m_pkthdr.csum_data -= credits;
3929 credits -= p->m_pkthdr.csum_data;
3930 bytes += p->m_pkthdr.len;
3932 "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d",
3933 p->m_pkthdr.len, credits, p->m_pkthdr.csum_data);
3940 check_wr_invariants(tp);
3943 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
3945 struct tom_data *d = TOM_DATA(TOE_DEV(so));
3947 log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK "
3948 "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una,
3949 toep->tp_tid, tp->snd_una);
3954 if (tp->snd_una != snd_una) {
3955 tp->snd_una = snd_una;
3956 tp->ts_recent_age = ticks;
3959 * Keep ARP entry "minty fresh"
3961 dst_confirm(sk->sk_dst_cache);
3963 if (tp->snd_una == tp->snd_nxt)
3964 toep->tp_flags &= ~TP_TX_WAIT_IDLE;
3967 snd = so_sockbuf_snd(so);
3969 CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes);
3970 snd = so_sockbuf_snd(so);
3972 sbdrop_locked(snd, bytes);
3973 so_sowwakeup_locked(so);
3976 if (snd->sb_sndptroff < snd->sb_cc)
3977 t3_push_frames(so, 0);
3980 inp_wunlock(tp->t_inpcb);
3985 * Handler for TX_DATA_ACK CPL messages.
3988 do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx)
3990 struct toepcb *toep = (struct toepcb *)ctx;
3999 * Handler for TRACE_PKT CPL messages. Just sink these packets.
4002 do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx)
4009 * Reset a connection that is on a listener's SYN queue or accept queue,
4010 * i.e., one that has not had a struct socket associated with it.
4011 * Must be called from process context.
4013 * Modeled after code in inet_csk_listen_stop().
4016 t3_reset_listen_child(struct socket *child)
4018 struct tcpcb *tp = so_sototcpcb(child);
4020 t3_send_reset(tp->t_toe);
4025 t3_child_disconnect(struct socket *so, void *arg)
4027 struct tcpcb *tp = so_sototcpcb(so);
4029 if (tp->t_flags & TF_TOE) {
4030 inp_wlock(tp->t_inpcb);
4031 t3_reset_listen_child(so);
4032 inp_wunlock(tp->t_inpcb);
4037 * Disconnect offloaded established but not yet accepted connections sitting
4038 * on a server's accept_queue. We just send an ABORT_REQ at this point and
4039 * finish off the disconnect later as we may need to wait for the ABORT_RPL.
4042 t3_disconnect_acceptq(struct socket *listen_so)
4046 so_listeners_apply_all(listen_so, t3_child_disconnect, NULL);
4047 so_unlock(listen_so);
4051 * Reset offloaded connections sitting on a server's syn queue. As above
4052 * we send ABORT_REQ and finish off when we get ABORT_RPL.
4056 t3_reset_synq(struct listen_ctx *lctx)
4058 struct toepcb *toep;
4061 while (!LIST_EMPTY(&lctx->synq_head)) {
4062 toep = LIST_FIRST(&lctx->synq_head);
4063 LIST_REMOVE(toep, synq_entry);
4065 t3_send_reset(toep);
4066 cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid);
4067 toepcb_release(toep);
4069 so_unlock(lctx->lso);
4074 t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl,
4075 unsigned int nppods, unsigned int tag, unsigned int maxoff,
4076 unsigned int pg_off, unsigned int color)
4078 unsigned int i, j, pidx;
4081 struct ulp_mem_io *req;
4082 unsigned int tid = toep->tp_tid;
4083 const struct tom_data *td = TOM_DATA(toep->tp_toedev);
4084 unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit;
4086 CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)",
4087 gl, nppods, tag, maxoff, pg_off, color);
4089 for (i = 0; i < nppods; ++i) {
4090 m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE);
4091 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4092 req = mtod(m, struct ulp_mem_io *);
4093 m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE;
4094 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
4096 req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) |
4097 V_ULPTX_CMD(ULP_MEM_WRITE));
4098 req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) |
4099 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1));
4101 p = (struct pagepod *)(req + 1);
4102 if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) {
4103 p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid));
4104 p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) |
4105 V_PPOD_COLOR(color));
4106 p->pp_max_offset = htonl(maxoff);
4107 p->pp_page_offset = htonl(pg_off);
4109 for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx)
4110 p->pp_addr[j] = pidx < gl->dgl_nelem ?
4111 htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0;
4113 p->pp_vld_tid = 0; /* mark sentinel page pods invalid */
4114 send_or_defer(toep, m, 0);
4115 ppod_addr += PPOD_SIZE;
4121 * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command.
4124 mk_cpl_barrier_ulp(struct cpl_barrier *b)
4126 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b;
4128 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4129 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8));
4130 b->opcode = CPL_BARRIER;
4134 * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command.
4137 mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno)
4139 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
4141 txpkt = (struct ulp_txpkt *)req;
4142 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4143 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
4144 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid));
4145 req->cpuno = htons(cpuno);
4149 * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command.
4152 mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
4153 unsigned int word, uint64_t mask, uint64_t val)
4155 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
4157 CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
4158 tid, word, mask, val);
4160 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4161 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
4162 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
4163 req->reply = V_NO_REPLY(1);
4165 req->word = htons(word);
4166 req->mask = htobe64(mask);
4167 req->val = htobe64(val);
4171 * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command.
4174 mk_rx_data_ack_ulp(struct toepcb *toep, struct cpl_rx_data_ack *ack,
4175 unsigned int tid, unsigned int credits)
4177 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack;
4179 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4180 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8));
4181 OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid));
4182 ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
4183 V_RX_DACK_MODE(TOM_TUNABLE(toep->tp_toedev, delack)) |
4184 V_RX_CREDITS(credits));
4188 t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx)
4192 struct work_request_hdr *wr;
4193 struct cpl_barrier *lock;
4194 struct cpl_set_tcb_field *req;
4195 struct cpl_get_tcb *getreq;
4196 struct ddp_state *p = &toep->tp_ddp_state;
4199 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
4201 wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) +
4203 m = m_gethdr_nofail(wrlen);
4204 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4205 wr = mtod(m, struct work_request_hdr *);
4208 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
4209 m->m_pkthdr.len = m->m_len = wrlen;
4211 lock = (struct cpl_barrier *)(wr + 1);
4212 mk_cpl_barrier_ulp(lock);
4214 req = (struct cpl_set_tcb_field *)(lock + 1);
4216 CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx);
4218 /* Hmmm, not sure if this actually a good thing: reactivating
4219 * the other buffer might be an issue if it has been completed
4220 * already. However, that is unlikely, since the fact that the UBUF
4221 * is not completed indicates that there is no oustanding data.
4224 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4225 V_TF_DDP_ACTIVE_BUF(1) |
4226 V_TF_DDP_BUF0_VALID(1),
4227 V_TF_DDP_ACTIVE_BUF(1));
4229 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4230 V_TF_DDP_ACTIVE_BUF(1) |
4231 V_TF_DDP_BUF1_VALID(1), 0);
4233 getreq = (struct cpl_get_tcb *)(req + 1);
4234 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
4236 mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1));
4238 /* Keep track of the number of oustanding CPL_GET_TCB requests
4243 T3_TRACE1(TIDTB(so),
4244 "t3_cancel_ddpbuf: bufidx %u", bufidx);
4246 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
4250 * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one
4251 * @sk: the socket associated with the buffers
4252 * @bufidx: index of HW DDP buffer (0 or 1)
4253 * @tag0: new tag for HW buffer 0
4254 * @tag1: new tag for HW buffer 1
4255 * @len: new length for HW buf @bufidx
4257 * Sends a compound WR to overlay a new DDP buffer on top of an existing
4258 * buffer by changing the buffer tag and length and setting the valid and
4259 * active flag accordingly. The caller must ensure the new buffer is at
4260 * least as big as the existing one. Since we typically reprogram both HW
4261 * buffers this function sets both tags for convenience. Read the TCB to
4262 * determine how made data was written into the buffer before the overlay
4266 t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0,
4267 unsigned int tag1, unsigned int len)
4271 struct work_request_hdr *wr;
4272 struct cpl_get_tcb *getreq;
4273 struct cpl_set_tcb_field *req;
4274 struct ddp_state *p = &toep->tp_ddp_state;
4276 CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)",
4277 bufidx, tag0, tag1, len);
4279 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
4281 wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq);
4282 m = m_gethdr_nofail(wrlen);
4283 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4284 wr = mtod(m, struct work_request_hdr *);
4285 m->m_pkthdr.len = m->m_len = wrlen;
4289 /* Set the ATOMIC flag to make sure that TP processes the following
4290 * CPLs in an atomic manner and no wire segments can be interleaved.
4292 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
4293 req = (struct cpl_set_tcb_field *)(wr + 1);
4294 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG,
4295 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) |
4296 V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32,
4297 V_TCB_RX_DDP_BUF0_TAG(tag0) |
4298 V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32);
4301 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN,
4302 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
4303 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
4305 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4306 V_TF_DDP_PUSH_DISABLE_0(1) |
4307 V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
4308 V_TF_DDP_PUSH_DISABLE_0(0) |
4309 V_TF_DDP_BUF0_VALID(1));
4311 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN,
4312 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN),
4313 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len));
4315 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4316 V_TF_DDP_PUSH_DISABLE_1(1) |
4317 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
4318 V_TF_DDP_PUSH_DISABLE_1(0) |
4319 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1));
4322 getreq = (struct cpl_get_tcb *)(req + 1);
4323 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
4325 /* Keep track of the number of oustanding CPL_GET_TCB requests
4330 T3_TRACE4(TIDTB(sk),
4331 "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u "
4333 bufidx, tag0, tag1, len);
4335 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
4339 * Sends a compound WR containing all the CPL messages needed to program the
4340 * two HW DDP buffers, namely optionally setting up the length and offset of
4341 * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK.
4344 t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0,
4345 unsigned int len1, unsigned int offset1,
4346 uint64_t ddp_flags, uint64_t flag_mask, int modulate)
4350 struct work_request_hdr *wr;
4351 struct cpl_set_tcb_field *req;
4353 CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ",
4354 len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff);
4357 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
4359 wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) +
4360 (len1 ? sizeof(*req) : 0) +
4361 (modulate ? sizeof(struct cpl_rx_data_ack) : 0);
4362 m = m_gethdr_nofail(wrlen);
4363 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4364 wr = mtod(m, struct work_request_hdr *);
4367 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
4368 m->m_pkthdr.len = m->m_len = wrlen;
4370 req = (struct cpl_set_tcb_field *)(wr + 1);
4371 if (len0) { /* program buffer 0 offset and length */
4372 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET,
4373 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
4374 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
4375 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) |
4376 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0));
4379 if (len1) { /* program buffer 1 offset and length */
4380 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET,
4381 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
4382 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32,
4383 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) |
4384 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32);
4388 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask,
4392 mk_rx_data_ack_ulp(toep,
4393 (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid,
4394 toep->tp_copied_seq - toep->tp_rcv_wup);
4395 toep->tp_rcv_wup = toep->tp_copied_seq;
4399 T3_TRACE5(TIDTB(sk),
4400 "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x "
4402 len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff,
4406 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
4410 t3_init_wr_tab(unsigned int wr_len)
4414 if (mbuf_wrs[1]) /* already initialized */
4417 for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) {
4418 int sgl_len = (3 * i) / 2 + (i & 1);
4421 mbuf_wrs[i] = sgl_len <= wr_len ?
4422 1 : 1 + (sgl_len - 2) / (wr_len - 1);
4429 t3_init_cpl_io(void)
4432 tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL);
4435 "Chelsio TCP offload: can't allocate sk_buff\n");
4438 skb_put(tcphdr_skb, sizeof(struct tcphdr));
4439 tcphdr_skb->h.raw = tcphdr_skb->data;
4440 memset(tcphdr_skb->data, 0, tcphdr_skb->len);
4443 t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
4444 t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
4445 t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack);
4446 t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data);
4447 t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl);
4448 t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close);
4449 t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
4450 t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
4451 t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req);
4452 t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl);
4453 t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp);
4454 t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete);
4455 t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify);
4456 t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt);
4457 t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl);