1 /**************************************************************************
3 Copyright (c) 2007-2008, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/kernel.h>
37 #include <sys/limits.h>
41 #include <sys/mutex.h>
42 #include <sys/sockstate.h>
43 #include <sys/sockopt.h>
44 #include <sys/socket.h>
45 #include <sys/sockbuf.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48 #include <sys/protosw.h>
50 #include <sys/vimage.h>
53 #include <net/route.h>
55 #include <netinet/in.h>
56 #include <netinet/in_pcb.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/in_var.h>
61 #include <dev/cxgb/cxgb_osdep.h>
62 #include <dev/cxgb/sys/mbufq.h>
64 #include <netinet/ip.h>
65 #include <netinet/tcp_var.h>
66 #include <netinet/tcp_fsm.h>
67 #include <netinet/tcp_offload.h>
68 #include <netinet/tcp_seq.h>
69 #include <netinet/tcp_syncache.h>
70 #include <netinet/tcp_timer.h>
71 #include <net/route.h>
73 #include <dev/cxgb/t3cdev.h>
74 #include <dev/cxgb/common/cxgb_firmware_exports.h>
75 #include <dev/cxgb/common/cxgb_t3_cpl.h>
76 #include <dev/cxgb/common/cxgb_tcb.h>
77 #include <dev/cxgb/common/cxgb_ctl_defs.h>
78 #include <dev/cxgb/cxgb_offload.h>
81 #include <machine/bus.h>
82 #include <dev/cxgb/sys/mvec.h>
83 #include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
84 #include <dev/cxgb/ulp/tom/cxgb_defs.h>
85 #include <dev/cxgb/ulp/tom/cxgb_tom.h>
86 #include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
87 #include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
88 #include <dev/cxgb/ulp/tom/cxgb_tcp.h>
90 #include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h>
93 * For ULP connections HW may add headers, e.g., for digests, that aren't part
94 * of the messages sent by the host but that are part of the TCP payload and
95 * therefore consume TCP sequence space. Tx connection parameters that
96 * operate in TCP sequence space are affected by the HW additions and need to
97 * compensate for them to accurately track TCP sequence numbers. This array
98 * contains the compensating extra lengths for ULP packets. It is indexed by
99 * a packet's ULP submode.
101 const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8};
105 * This sk_buff holds a fake header-only TCP segment that we use whenever we
106 * need to exploit SW TCP functionality that expects TCP headers, such as
107 * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple
108 * CPUs without locking.
110 static struct mbuf *tcphdr_mbuf __read_mostly;
114 * Size of WRs in bytes. Note that we assume all devices we are handling have
117 static unsigned int wrlen __read_mostly;
120 * The number of WRs needed for an skb depends on the number of page fragments
121 * in the skb and whether it has any payload in its main body. This maps the
122 * length of the gather list represented by an skb into the # of necessary WRs.
124 static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly;
127 * Max receive window supported by HW in bytes. Only a small part of it can
128 * be set through option0, the rest needs to be set through RX_DATA_ACK.
130 #define MAX_RCV_WND ((1U << 27) - 1)
133 * Min receive window. We want it to be large enough to accommodate receive
134 * coalescing, handle jumbo frames, and not trigger sender SWS avoidance.
136 #define MIN_RCV_WND (24 * 1024U)
137 #define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS)
139 #define VALIDATE_SEQ 0
140 #define VALIDATE_SOCK(so)
143 #define TCP_TIMEWAIT 1
147 extern int tcp_do_autorcvbuf;
148 extern int tcp_do_autosndbuf;
149 extern int tcp_autorcvbuf_max;
150 extern int tcp_autosndbuf_max;
152 static void t3_send_reset(struct toepcb *toep);
153 static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status);
154 static inline void free_atid(struct t3cdev *cdev, unsigned int tid);
155 static void handle_syncache_event(int event, void *arg);
158 SBAPPEND(struct sockbuf *sb, struct mbuf *n)
164 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
165 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
166 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
167 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
168 m->m_next, m->m_nextpkt, m->m_flags));
173 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
174 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
175 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
176 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
177 m->m_next, m->m_nextpkt, m->m_flags));
180 KASSERT(sb->sb_flags & SB_NOCOALESCE, ("NOCOALESCE not set"));
181 sbappendstream_locked(sb, n);
185 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
186 m->m_next, m->m_nextpkt, m->m_flags));
192 is_t3a(const struct toedev *dev)
194 return (dev->tod_ttid == TOE_ID_CHELSIO_T3);
198 dump_toepcb(struct toepcb *toep)
200 DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n",
201 toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode,
202 toep->tp_mtu_idx, toep->tp_tid);
204 DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n",
205 toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked,
206 toep->tp_mss_clamp, toep->tp_flags);
209 #ifndef RTALLOC2_DEFINED
210 static struct rtentry *
211 rtalloc2(struct sockaddr *dst, int report, u_long ignflags)
213 struct rtentry *rt = NULL;
215 if ((rt = rtalloc1(dst, report, ignflags)) != NULL)
223 * Determine whether to send a CPL message now or defer it. A message is
224 * deferred if the connection is in SYN_SENT since we don't know the TID yet.
225 * For connections in other states the message is sent immediately.
226 * If through_l2t is set the message is subject to ARP processing, otherwise
227 * it is sent directly.
230 send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t)
232 struct tcpcb *tp = toep->tp_tp;
234 if (__predict_false(tp->t_state == TCPS_SYN_SENT)) {
235 inp_wlock(tp->t_inpcb);
236 mbufq_tail(&toep->out_of_order_queue, m); // defer
237 inp_wunlock(tp->t_inpcb);
238 } else if (through_l2t)
239 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T
241 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly
244 static inline unsigned int
245 mkprio(unsigned int cntrl, const struct toepcb *toep)
251 * Populate a TID_RELEASE WR. The skb must be already propely sized.
254 mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid)
256 struct cpl_tid_release *req;
258 m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep));
259 m->m_pkthdr.len = m->m_len = sizeof(*req);
260 req = mtod(m, struct cpl_tid_release *);
261 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
263 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
267 make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail)
269 struct tcpcb *tp = so_sototcpcb(so);
270 struct toepcb *toep = tp->t_toe;
271 struct tx_data_wr *req;
274 inp_lock_assert(tp->t_inpcb);
275 snd = so_sockbuf_snd(so);
277 req = mtod(m, struct tx_data_wr *);
278 m->m_len = sizeof(*req);
279 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
280 req->wr_lo = htonl(V_WR_TID(toep->tp_tid));
281 /* len includes the length of any HW ULP additions */
282 req->len = htonl(len);
283 req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx));
284 /* V_TX_ULP_SUBMODE sets both the mode and submode */
285 req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) |
286 V_TX_URG(/* skb_urgent(skb) */ 0 ) |
287 V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) &&
289 req->sndseq = htonl(tp->snd_nxt);
290 if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) {
291 req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
292 V_TX_CPU_IDX(toep->tp_qset));
294 /* Sendbuffer is in units of 32KB.
296 if (V_tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE)
297 req->param |= htonl(V_TX_SNDBUF(V_tcp_autosndbuf_max >> 15));
299 req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15));
302 toep->tp_flags |= TP_DATASENT;
306 #define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */
309 t3_push_frames(struct socket *so, int req_completion)
311 struct tcpcb *tp = so_sototcpcb(so);
312 struct toepcb *toep = tp->t_toe;
314 struct mbuf *tail, *m0, *last;
317 int state, bytes, count, total_bytes;
318 bus_dma_segment_t segs[TX_MAX_SEGS], *segp;
321 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) {
322 DPRINTF("tcp state=%d\n", tp->t_state);
326 state = so_state_get(so);
328 if (state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) {
329 DPRINTF("disconnecting\n");
334 inp_lock_assert(tp->t_inpcb);
336 snd = so_sockbuf_snd(so);
339 d = TOM_DATA(toep->tp_toedev);
342 last = tail = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb;
345 DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n",
346 toep->tp_wr_avail, tail, snd->sb_cc, toep->tp_m_last);
348 if (last && toep->tp_m_last == last && snd->sb_sndptroff != 0) {
349 KASSERT(tail, ("sbdrop error"));
350 last = tail = tail->m_next;
353 if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) {
354 DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail);
360 toep->tp_m_last = NULL;
361 while (toep->tp_wr_avail && (tail != NULL)) {
364 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) {
369 * If the data in tail fits as in-line, then
370 * make an immediate data wr.
372 if (tail->m_len <= IMM_LEN) {
379 make_tx_data_wr(so, m0, bytes, tail);
380 m_append(m0, bytes, mtod(last, caddr_t));
381 KASSERT(!m0->m_next, ("bad append"));
383 while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail)
384 && (tail != NULL) && (count < TX_MAX_SEGS-1)) {
385 bytes += tail->m_len;
389 * technically an abuse to be using this for a VA
390 * but less gross than defining my own structure
391 * or calling pmap_kextract from here :-|
393 segp->ds_addr = (bus_addr_t)tail->m_data;
394 segp->ds_len = tail->m_len;
395 DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n",
396 count, mbuf_wrs[count], tail->m_data, tail->m_len);
400 DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n",
401 toep->tp_wr_avail, count, mbuf_wrs[count], tail);
404 m_set_sgllen(m0, count);
405 make_tx_data_wr(so, m0, bytes, tail);
407 m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep));
410 snd->sb_sndptr = tail;
411 toep->tp_m_last = NULL;
413 toep->tp_m_last = snd->sb_sndptr = last;
416 DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last);
418 snd->sb_sndptroff += bytes;
419 total_bytes += bytes;
420 toep->tp_write_seq += bytes;
421 CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d"
422 " tail=%p sndptr=%p sndptroff=%d",
423 toep->tp_wr_avail, count, mbuf_wrs[count],
424 tail, snd->sb_sndptr, snd->sb_sndptroff);
426 CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d"
427 " tp_m_last=%p tailbuf=%p snd_una=0x%08x",
428 total_bytes, toep->tp_m_last, tail->m_data,
431 CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d"
432 " tp_m_last=%p snd_una=0x%08x",
433 total_bytes, toep->tp_m_last, tp->snd_una);
441 while (i < count && m_get_sgllen(m0)) {
442 if ((count - i) >= 3) {
444 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx"
445 " len=%d pa=0x%zx len=%d",
446 segs[i].ds_addr, segs[i].ds_len,
447 segs[i + 1].ds_addr, segs[i + 1].ds_len,
448 segs[i + 2].ds_addr, segs[i + 2].ds_len);
450 } else if ((count - i) == 2) {
452 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx"
454 segs[i].ds_addr, segs[i].ds_len,
455 segs[i + 1].ds_addr, segs[i + 1].ds_len);
458 CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d",
459 segs[i].ds_addr, segs[i].ds_len);
467 * remember credits used
469 m0->m_pkthdr.csum_data = mbuf_wrs[count];
470 m0->m_pkthdr.len = bytes;
471 toep->tp_wr_avail -= mbuf_wrs[count];
472 toep->tp_wr_unacked += mbuf_wrs[count];
474 if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) ||
475 toep->tp_wr_unacked >= toep->tp_wr_max / 2) {
476 struct work_request_hdr *wr = cplhdr(m0);
478 wr->wr_hi |= htonl(F_WR_COMPL);
479 toep->tp_wr_unacked = 0;
481 KASSERT((m0->m_pkthdr.csum_data > 0) &&
482 (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d",
483 m0->m_pkthdr.csum_data));
484 m0->m_type = MT_DONTFREE;
485 enqueue_wr(toep, m0);
486 DPRINTF("sending offload tx with %d bytes in %d segments\n",
488 l2t_send(cdev, m0, toep->tp_l2t);
491 return (total_bytes);
495 * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail
496 * under any circumstances. We take the easy way out and always queue the
497 * message to the write_queue. We can optimize the case where the queue is
498 * already empty though the optimization is probably not worth it.
501 close_conn(struct socket *so)
504 struct cpl_close_con_req *req;
506 struct inpcb *inp = so_sotoinpcb(so);
513 tp = so_sototcpcb(so);
516 if (tp->t_state != TCPS_SYN_SENT)
517 t3_push_frames(so, 1);
519 if (toep->tp_flags & TP_FIN_SENT) {
526 d = TOM_DATA(toep->tp_toedev);
528 m = m_gethdr_nofail(sizeof(*req));
529 m_set_priority(m, CPL_PRIORITY_DATA);
533 toep->tp_flags |= TP_FIN_SENT;
534 req = mtod(m, struct cpl_close_con_req *);
536 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
537 req->wr.wr_lo = htonl(V_WR_TID(tid));
538 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
542 * XXX - need to defer shutdown while there is still data in the queue
545 CTR4(KTR_TOM, "%s CLOSE_CON_REQ so %p tp %p tid=%u", __FUNCTION__, so, tp, tid);
546 cxgb_ofld_send(d->cdev, m);
551 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
555 abort_arp_failure(struct t3cdev *cdev, struct mbuf *m)
557 struct cpl_abort_req *req = cplhdr(m);
559 req->cmd = CPL_ABORT_NO_RST;
560 cxgb_ofld_send(cdev, m);
564 * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are
565 * permitted to return without sending the message in case we cannot allocate
566 * an sk_buff. Returns the number of credits sent.
569 t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail)
572 struct cpl_rx_data_ack *req;
573 struct toepcb *toep = tp->t_toe;
574 struct toedev *tdev = toep->tp_toedev;
576 m = m_gethdr_nofail(sizeof(*req));
578 DPRINTF("returning %u credits to HW\n", credits);
580 req = mtod(m, struct cpl_rx_data_ack *);
581 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
583 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
584 req->credit_dack = htonl(dack | V_RX_CREDITS(credits));
585 m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep));
586 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
591 * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled.
592 * This is only used in DDP mode, so we take the opportunity to also set the
593 * DACK mode and flush any Rx credits.
596 t3_send_rx_modulate(struct toepcb *toep)
599 struct cpl_rx_data_ack *req;
601 m = m_gethdr_nofail(sizeof(*req));
603 req = mtod(m, struct cpl_rx_data_ack *);
604 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
606 m->m_pkthdr.len = m->m_len = sizeof(*req);
608 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
609 req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
611 V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup));
612 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
613 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
614 toep->tp_rcv_wup = toep->tp_copied_seq;
618 * Handle receipt of an urgent pointer.
621 handle_urg_ptr(struct socket *so, uint32_t urg_seq)
623 #ifdef URGENT_DATA_SUPPORTED
624 struct tcpcb *tp = so_sototcpcb(so);
626 urg_seq--; /* initially points past the urgent data, per BSD */
628 if (tp->urg_data && !after(urg_seq, tp->urg_seq))
629 return; /* duplicate pointer */
631 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
632 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
633 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
636 if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len)
637 tom_eat_skb(sk, skb, 0);
639 tp->urg_data = TCP_URG_NOTYET;
640 tp->urg_seq = urg_seq;
645 * Returns true if a socket cannot accept new Rx data.
648 so_no_receive(const struct socket *so)
650 return (so_state_get(so) & (SS_ISDISCONNECTED|SS_ISDISCONNECTING));
654 * Process an urgent data notification.
657 rx_urg_notify(struct toepcb *toep, struct mbuf *m)
659 struct cpl_rx_urg_notify *hdr = cplhdr(m);
660 struct socket *so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
664 if (!so_no_receive(so))
665 handle_urg_ptr(so, ntohl(hdr->seq));
671 * Handler for RX_URG_NOTIFY CPL messages.
674 do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx)
676 struct toepcb *toep = (struct toepcb *)ctx;
678 rx_urg_notify(toep, m);
683 is_delack_mode_valid(struct toedev *dev, struct toepcb *toep)
685 return (toep->tp_ulp_mode ||
686 (toep->tp_ulp_mode == ULP_MODE_TCPDDP &&
687 dev->tod_ttid >= TOE_ID_CHELSIO_T3));
691 * Set of states for which we should return RX credits.
693 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)
696 * Called after some received data has been read. It returns RX credits
697 * to the HW for the amount of data processed.
700 t3_cleanup_rbuf(struct tcpcb *tp, int copied)
702 struct toepcb *toep = tp->t_toe;
705 int dack_mode, must_send, read;
706 u32 thres, credits, dack = 0;
709 so = inp_inpcbtosocket(tp->t_inpcb);
710 rcv = so_sockbuf_rcv(so);
712 if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) ||
713 (tp->t_state == TCPS_FIN_WAIT_2))) {
716 toep->tp_copied_seq += copied;
723 inp_lock_assert(tp->t_inpcb);
727 toep->tp_copied_seq += copied;
729 read = toep->tp_enqueued_bytes - rcv->sb_cc;
730 toep->tp_copied_seq += read;
732 credits = toep->tp_copied_seq - toep->tp_rcv_wup;
733 toep->tp_enqueued_bytes = rcv->sb_cc;
736 if (credits > rcv->sb_mbmax) {
737 log(LOG_ERR, "copied_seq=%u rcv_wup=%u credits=%u\n",
738 toep->tp_copied_seq, toep->tp_rcv_wup, credits);
739 credits = rcv->sb_mbmax;
744 * XXX this won't accurately reflect credit return - we need
745 * to look at the difference between the amount that has been
746 * put in the recv sockbuf and what is there now
749 if (__predict_false(!credits))
752 dev = toep->tp_toedev;
753 thres = TOM_TUNABLE(dev, rx_credit_thres);
755 if (__predict_false(thres == 0))
758 if (is_delack_mode_valid(dev, toep)) {
759 dack_mode = TOM_TUNABLE(dev, delack);
760 if (__predict_false(dack_mode != toep->tp_delack_mode)) {
761 u32 r = tp->rcv_nxt - toep->tp_delack_seq;
763 if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp)
764 dack = F_RX_DACK_CHANGE |
765 V_RX_DACK_MODE(dack_mode);
768 dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
771 * For coalescing to work effectively ensure the receive window has
772 * at least 16KB left.
774 must_send = credits + 16384 >= tp->rcv_wnd;
776 if (must_send || credits >= thres)
777 toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send);
781 cxgb_toe_disconnect(struct tcpcb *tp)
785 DPRINTF("cxgb_toe_disconnect\n");
787 so = inp_inpcbtosocket(tp->t_inpcb);
793 cxgb_toe_reset(struct tcpcb *tp)
795 struct toepcb *toep = tp->t_toe;
802 tp->t_flags &= ~TF_TOE;
809 cxgb_toe_send(struct tcpcb *tp)
813 DPRINTF("cxgb_toe_send\n");
814 dump_toepcb(tp->t_toe);
816 so = inp_inpcbtosocket(tp->t_inpcb);
817 t3_push_frames(so, 1);
822 cxgb_toe_rcvd(struct tcpcb *tp)
825 inp_lock_assert(tp->t_inpcb);
827 t3_cleanup_rbuf(tp, 0);
833 cxgb_toe_detach(struct tcpcb *tp)
838 * XXX how do we handle teardown in the SYN_SENT state?
841 inp_lock_assert(tp->t_inpcb);
848 tp->t_flags &= ~TF_TOE;
853 static struct toe_usrreqs cxgb_toe_usrreqs = {
854 .tu_disconnect = cxgb_toe_disconnect,
855 .tu_reset = cxgb_toe_reset,
856 .tu_send = cxgb_toe_send,
857 .tu_rcvd = cxgb_toe_rcvd,
858 .tu_detach = cxgb_toe_detach,
859 .tu_detach = cxgb_toe_detach,
860 .tu_syncache_event = handle_syncache_event,
865 __set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word,
866 uint64_t mask, uint64_t val, int no_reply)
868 struct cpl_set_tcb_field *req;
870 CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
871 toep->tp_tid, word, mask, val);
873 req = mtod(m, struct cpl_set_tcb_field *);
874 m->m_pkthdr.len = m->m_len = sizeof(*req);
875 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
877 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid));
878 req->reply = V_NO_REPLY(no_reply);
880 req->word = htons(word);
881 req->mask = htobe64(mask);
882 req->val = htobe64(val);
884 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
885 send_or_defer(toep, m, 0);
889 t3_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val)
892 struct tcpcb *tp = toep->tp_tp;
897 if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) {
898 printf("not seting field\n");
902 m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field));
904 __set_tcb_field(toep, m, word, mask, val, 1);
908 * Set one of the t_flags bits in the TCB.
911 set_tcb_tflag(struct toepcb *toep, unsigned int bit_pos, int val)
914 t3_set_tcb_field(toep, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos);
918 * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting.
921 t3_set_nagle(struct toepcb *toep)
923 struct tcpcb *tp = toep->tp_tp;
925 set_tcb_tflag(toep, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY));
929 * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting.
932 t3_set_keepalive(struct toepcb *toep, int on_off)
935 set_tcb_tflag(toep, S_TF_KEEPALIVE, on_off);
939 t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off)
941 set_tcb_tflag(toep, S_TF_RCV_COALESCE_ENABLE, on_off);
945 t3_set_dack_mss(struct toepcb *toep, int on_off)
948 set_tcb_tflag(toep, S_TF_DACK_MSS, on_off);
952 * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting.
955 t3_set_tos(struct toepcb *toep)
957 int tos = inp_ip_tos_get(toep->tp_tp->t_inpcb);
959 t3_set_tcb_field(toep, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS),
965 * In DDP mode, TP fails to schedule a timer to push RX data to the host when
966 * DDP is disabled (data is delivered to freelist). [Note that, the peer should
967 * set the PSH bit in the last segment, which would trigger delivery.]
968 * We work around the issue by setting a DDP buffer in a partial placed state,
969 * which guarantees that TP will schedule a timer.
971 #define TP_DDP_TIMER_WORKAROUND_MASK\
972 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\
973 ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\
974 V_TCB_RX_DDP_BUF0_LEN(3)) << 32))
975 #define TP_DDP_TIMER_WORKAROUND_VAL\
976 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\
977 ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\
981 t3_enable_ddp(struct toepcb *toep, int on)
985 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1),
988 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS,
990 TP_DDP_TIMER_WORKAROUND_MASK,
992 TP_DDP_TIMER_WORKAROUND_VAL);
997 t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag_color)
999 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_TAG + buf_idx,
1000 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG),
1005 t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset,
1009 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_OFFSET,
1010 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
1011 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
1012 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) |
1013 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
1015 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF1_OFFSET,
1016 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
1017 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32),
1018 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) |
1019 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32));
1023 t3_set_cong_control(struct socket *so, const char *name)
1025 #ifdef CONGESTION_CONTROL_SUPPORTED
1028 for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++)
1029 if (!strcmp(name, t3_cong_ops[cong_algo].name))
1032 if (cong_algo >= ARRAY_SIZE(t3_cong_ops))
1039 t3_get_tcb(struct toepcb *toep)
1041 struct cpl_get_tcb *req;
1042 struct tcpcb *tp = toep->tp_tp;
1043 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
1048 inp_lock_assert(tp->t_inpcb);
1049 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
1050 req = mtod(m, struct cpl_get_tcb *);
1051 m->m_pkthdr.len = m->m_len = sizeof(*req);
1052 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1054 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid));
1055 req->cpuno = htons(toep->tp_qset);
1057 if (tp->t_state == TCPS_SYN_SENT)
1058 mbufq_tail(&toep->out_of_order_queue, m); // defer
1060 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
1065 so_insert_tid(struct tom_data *d, struct toepcb *toep, unsigned int tid)
1070 cxgb_insert_tid(d->cdev, d->client, toep, tid);
1074 * find_best_mtu - find the entry in the MTU table closest to an MTU
1076 * @mtu: the target MTU
1078 * Returns the index of the value in the MTU table that is closest to but
1079 * does not exceed the target MTU.
1082 find_best_mtu(const struct t3c_data *d, unsigned short mtu)
1086 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
1092 select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu)
1097 struct rtentry *dst = so_sotoinpcb(so)->inp_route.ro_rt;
1100 tp->t_maxseg = pmtu - 40;
1101 if (tp->t_maxseg < td->mtus[0] - 40)
1102 tp->t_maxseg = td->mtus[0] - 40;
1103 idx = find_best_mtu(td, tp->t_maxseg + 40);
1105 tp->t_maxseg = td->mtus[idx] - 40;
1107 idx = find_best_mtu(td, pmtu);
1113 free_atid(struct t3cdev *cdev, unsigned int tid)
1115 struct toepcb *toep = cxgb_free_atid(cdev, tid);
1118 toepcb_release(toep);
1122 * Release resources held by an offload connection (TID, L2T entry, etc.)
1125 t3_release_offload_resources(struct toepcb *toep)
1127 struct tcpcb *tp = toep->tp_tp;
1128 struct toedev *tdev = toep->tp_toedev;
1129 struct t3cdev *cdev;
1131 unsigned int tid = toep->tp_tid;
1132 struct sockbuf *rcv;
1134 CTR0(KTR_TOM, "t3_release_offload_resources");
1139 cdev = TOEP_T3C_DEV(toep);
1144 t3_release_ddp_resources(toep);
1146 #ifdef CTRL_SKB_CACHE
1147 kfree_skb(CTRL_SKB_CACHE(tp));
1148 CTRL_SKB_CACHE(tp) = NULL;
1151 if (toep->tp_wr_avail != toep->tp_wr_max) {
1152 purge_wr_queue(toep);
1153 reset_wr_list(toep);
1157 l2t_release(L2DATA(cdev), toep->tp_l2t);
1158 toep->tp_l2t = NULL;
1162 inp_lock_assert(tp->t_inpcb);
1163 so = inp_inpcbtosocket(tp->t_inpcb);
1164 rcv = so_sockbuf_rcv(so);
1166 * cancel any offloaded reads
1171 tp->t_flags &= ~TF_TOE;
1172 if (toep->tp_ddp_state.user_ddp_pending) {
1173 t3_cancel_ubuf(toep, rcv);
1174 toep->tp_ddp_state.user_ddp_pending = 0;
1176 so_sorwakeup_locked(so);
1180 if (toep->tp_state == TCPS_SYN_SENT) {
1181 free_atid(cdev, tid);
1183 __skb_queue_purge(&tp->out_of_order_queue);
1185 } else { // we have TID
1186 cxgb_remove_tid(cdev, toep, tid);
1187 toepcb_release(toep);
1190 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state);
1195 install_offload_ops(struct socket *so)
1197 struct tcpcb *tp = so_sototcpcb(so);
1199 KASSERT(tp->t_toe != NULL, ("toepcb not set"));
1201 t3_install_socket_ops(so);
1202 tp->t_flags |= TF_TOE;
1203 tp->t_tu = &cxgb_toe_usrreqs;
1207 * Determine the receive window scaling factor given a target max
1211 select_rcv_wscale(int space)
1215 if (space > MAX_RCV_WND)
1216 space = MAX_RCV_WND;
1218 if (V_tcp_do_rfc1323)
1219 for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ;
1225 * Determine the receive window size for a socket.
1227 static unsigned long
1228 select_rcv_wnd(struct toedev *dev, struct socket *so)
1230 struct tom_data *d = TOM_DATA(dev);
1232 unsigned int max_rcv_wnd;
1233 struct sockbuf *rcv;
1235 rcv = so_sockbuf_rcv(so);
1237 if (V_tcp_do_autorcvbuf)
1238 wnd = V_tcp_autorcvbuf_max;
1240 wnd = rcv->sb_hiwat;
1245 * For receive coalescing to work effectively we need a receive window
1246 * that can accomodate a coalesced segment.
1248 if (wnd < MIN_RCV_WND)
1252 max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ?
1253 (uint32_t)d->rx_page_size * 23 :
1256 return min(wnd, max_rcv_wnd);
1260 * Assign offload parameters to some socket fields. This code is used by
1261 * both active and passive opens.
1264 init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid,
1265 struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep)
1267 struct tcpcb *tp = so_sototcpcb(so);
1268 struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev);
1269 struct sockbuf *snd, *rcv;
1272 SOCK_LOCK_ASSERT(so);
1275 snd = so_sockbuf_snd(so);
1276 rcv = so_sockbuf_rcv(so);
1278 log(LOG_INFO, "initializing offload socket\n");
1280 * We either need to fix push frames to work with sbcompress
1281 * or we need to add this
1283 snd->sb_flags |= SB_NOCOALESCE;
1284 rcv->sb_flags |= SB_NOCOALESCE;
1288 toep->tp_toedev = dev;
1292 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs);
1293 toep->tp_wr_unacked = 0;
1294 toep->tp_delack_mode = 0;
1296 toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu);
1301 tp->rcv_wnd = select_rcv_wnd(dev, so);
1303 toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
1304 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
1305 toep->tp_qset_idx = 0;
1307 reset_wr_list(toep);
1308 DPRINTF("initialization done\n");
1312 * The next two functions calculate the option 0 value for a socket.
1314 static inline unsigned int
1315 calc_opt0h(struct socket *so, int mtu_idx)
1317 struct tcpcb *tp = so_sototcpcb(so);
1318 int wscale = select_rcv_wscale(tp->rcv_wnd);
1320 return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) |
1321 V_KEEP_ALIVE((so_options_get(so) & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS |
1322 V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx);
1325 static inline unsigned int
1326 calc_opt0l(struct socket *so, int ulp_mode)
1328 struct tcpcb *tp = so_sototcpcb(so);
1331 val = V_TOS(INP_TOS(tp->t_inpcb)) | V_ULP_MODE(ulp_mode) |
1332 V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ));
1334 DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", INP_TOS(tp->t_inpcb), tp->rcv_wnd, val);
1338 static inline unsigned int
1339 calc_opt2(const struct socket *so, struct toedev *dev)
1343 flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1);
1345 return (V_FLAVORS_VALID(flv_valid) |
1346 V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0));
1351 count_pending_wrs(const struct toepcb *toep)
1353 const struct mbuf *m;
1356 wr_queue_walk(toep, m)
1357 n += m->m_pkthdr.csum_data;
1363 (((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1)
1367 mk_act_open_req(struct socket *so, struct mbuf *m,
1368 unsigned int atid, const struct l2t_entry *e)
1370 struct cpl_act_open_req *req;
1371 struct inpcb *inp = so_sotoinpcb(so);
1372 struct tcpcb *tp = inp_inpcbtotcpcb(inp);
1373 struct toepcb *toep = tp->t_toe;
1374 struct toedev *tdev = toep->tp_toedev;
1376 m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep));
1378 req = mtod(m, struct cpl_act_open_req *);
1379 m->m_pkthdr.len = m->m_len = sizeof(*req);
1381 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1383 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid));
1384 inp_4tuple_get(inp, &req->local_ip, &req->local_port, &req->peer_ip, &req->peer_port);
1386 req->local_port = inp->inp_lport;
1387 req->peer_port = inp->inp_fport;
1388 memcpy(&req->local_ip, &inp->inp_laddr, 4);
1389 memcpy(&req->peer_ip, &inp->inp_faddr, 4);
1391 req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) |
1392 V_TX_CHANNEL(e->smt_idx));
1393 req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode));
1395 req->opt2 = htonl(calc_opt2(so, tdev));
1400 * Convert an ACT_OPEN_RPL status to an errno.
1403 act_open_rpl_status_to_errno(int status)
1406 case CPL_ERR_CONN_RESET:
1407 return (ECONNREFUSED);
1408 case CPL_ERR_ARP_MISS:
1409 return (EHOSTUNREACH);
1410 case CPL_ERR_CONN_TIMEDOUT:
1412 case CPL_ERR_TCAM_FULL:
1414 case CPL_ERR_CONN_EXIST:
1415 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n");
1416 return (EADDRINUSE);
1423 fail_act_open(struct toepcb *toep, int errno)
1425 struct tcpcb *tp = toep->tp_tp;
1427 t3_release_offload_resources(toep);
1429 inp_wunlock(tp->t_inpcb);
1430 tcp_offload_drop(tp, errno);
1434 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1439 * Handle active open failures.
1442 active_open_failed(struct toepcb *toep, struct mbuf *m)
1444 struct cpl_act_open_rpl *rpl = cplhdr(m);
1447 if (toep->tp_tp == NULL)
1450 inp = toep->tp_tp->t_inpcb;
1453 * Don't handle connection retry for now
1456 struct inet_connection_sock *icsk = inet_csk(sk);
1458 if (rpl->status == CPL_ERR_CONN_EXIST &&
1459 icsk->icsk_retransmit_timer.function != act_open_retry_timer) {
1460 icsk->icsk_retransmit_timer.function = act_open_retry_timer;
1461 sk_reset_timer(so, &icsk->icsk_retransmit_timer,
1468 * drops the inpcb lock
1470 fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status));
1478 * Return whether a failed active open has allocated a TID
1481 act_open_has_tid(int status)
1483 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1484 status != CPL_ERR_ARP_MISS;
1488 * Process an ACT_OPEN_RPL CPL message.
1491 do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
1493 struct toepcb *toep = (struct toepcb *)ctx;
1494 struct cpl_act_open_rpl *rpl = cplhdr(m);
1496 if (cdev->type != T3A && act_open_has_tid(rpl->status))
1497 cxgb_queue_tid_release(cdev, GET_TID(rpl));
1499 active_open_failed(toep, m);
1504 * Handle an ARP failure for an active open. XXX purge ofo queue
1506 * XXX badly broken for crossed SYNs as the ATID is no longer valid.
1507 * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should
1508 * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't
1509 * free the atid. Hmm.
1513 act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m)
1515 struct toepcb *toep = m_get_toep(m);
1516 struct tcpcb *tp = toep->tp_tp;
1517 struct inpcb *inp = tp->t_inpcb;
1521 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) {
1523 * drops the inpcb lock
1525 fail_act_open(so, EHOSTUNREACH);
1526 printf("freeing %p\n", m);
1534 * Send an active open request.
1537 t3_connect(struct toedev *tdev, struct socket *so,
1538 struct rtentry *rt, struct sockaddr *nam)
1541 struct l2t_entry *e;
1542 struct tom_data *d = TOM_DATA(tdev);
1543 struct inpcb *inp = so_sotoinpcb(so);
1544 struct tcpcb *tp = intotcpcb(inp);
1545 struct toepcb *toep; /* allocated by init_offload_socket */
1549 toep = toepcb_alloc();
1553 if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0)
1556 e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam);
1560 inp_lock_assert(inp);
1561 m = m_gethdr(MT_DATA, M_WAITOK);
1564 m->m_toe.mt_toepcb = tp->t_toe;
1565 set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure);
1569 init_offload_socket(so, tdev, atid, e, rt, toep);
1571 install_offload_ops(so);
1573 mk_act_open_req(so, m, atid, e);
1578 m_set_toep(m, tp->t_toe);
1580 toep->tp_state = TCPS_SYN_SENT;
1581 l2t_send(d->cdev, (struct mbuf *)m, e);
1583 if (toep->tp_ulp_mode)
1584 t3_enable_ddp(toep, 0);
1588 printf("failing connect - free atid\n");
1590 free_atid(d->cdev, atid);
1592 printf("return ENOMEM\n");
1597 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do
1598 * not send multiple ABORT_REQs for the same connection and also that we do
1599 * not try to send a message after the connection has closed. Returns 1 if
1600 * an ABORT_REQ wasn't generated after all, 0 otherwise.
1603 t3_send_reset(struct toepcb *toep)
1606 struct cpl_abort_req *req;
1607 unsigned int tid = toep->tp_tid;
1608 int mode = CPL_ABORT_SEND_RST;
1609 struct tcpcb *tp = toep->tp_tp;
1610 struct toedev *tdev = toep->tp_toedev;
1611 struct socket *so = NULL;
1613 struct sockbuf *snd;
1616 inp_lock_assert(tp->t_inpcb);
1617 so = inp_inpcbtosocket(tp->t_inpcb);
1620 if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) ||
1623 toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN);
1625 snd = so_sockbuf_snd(so);
1626 /* Purge the send queue so we don't send anything after an abort. */
1629 if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev))
1630 mode |= CPL_ABORT_POST_CLOSE_REQ;
1632 m = m_gethdr_nofail(sizeof(*req));
1633 m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep));
1634 set_arp_failure_handler(m, abort_arp_failure);
1636 req = mtod(m, struct cpl_abort_req *);
1637 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
1638 req->wr.wr_lo = htonl(V_WR_TID(tid));
1639 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
1640 req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0;
1641 req->rsvd1 = !(toep->tp_flags & TP_DATASENT);
1643 if (tp && (tp->t_state == TCPS_SYN_SENT))
1644 mbufq_tail(&toep->out_of_order_queue, m); // defer
1646 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t);
1650 t3_ip_ctloutput(struct socket *so, struct sockopt *sopt)
1655 if (sopt->sopt_name == IP_OPTIONS)
1656 return (ENOPROTOOPT);
1658 if (sopt->sopt_name != IP_TOS)
1659 return (EOPNOTSUPP);
1661 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
1666 if (optval > IPTOS_PREC_CRITIC_ECP)
1669 inp = so_sotoinpcb(so);
1671 inp_ip_tos_set(inp, optval);
1673 inp->inp_ip_tos = optval;
1675 t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe);
1682 t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt)
1687 if (sopt->sopt_name != TCP_CONGESTION &&
1688 sopt->sopt_name != TCP_NODELAY)
1689 return (EOPNOTSUPP);
1691 if (sopt->sopt_name == TCP_CONGESTION) {
1692 char name[TCP_CA_NAME_MAX];
1693 int optlen = sopt->sopt_valsize;
1696 if (sopt->sopt_dir == SOPT_GET) {
1697 KASSERT(0, ("unimplemented"));
1698 return (EOPNOTSUPP);
1704 err = copyinstr(sopt->sopt_val, name,
1705 min(TCP_CA_NAME_MAX - 1, optlen), &copied);
1711 tp = so_sototcpcb(so);
1713 * XXX I need to revisit this
1715 if ((err = t3_set_cong_control(so, name)) == 0) {
1716 #ifdef CONGESTION_CONTROL_SUPPORTED
1717 tp->t_cong_control = strdup(name, M_CXGB);
1726 if (sopt->sopt_dir == SOPT_GET)
1727 return (EOPNOTSUPP);
1729 err = sooptcopyin(sopt, &optval, sizeof optval,
1735 inp = so_sotoinpcb(so);
1736 tp = inp_inpcbtotcpcb(inp);
1740 oldval = tp->t_flags;
1742 tp->t_flags |= TF_NODELAY;
1744 tp->t_flags &= ~TF_NODELAY;
1748 if (oldval != tp->t_flags && (tp->t_toe != NULL))
1749 t3_set_nagle(tp->t_toe);
1757 t3_ctloutput(struct socket *so, struct sockopt *sopt)
1761 if (sopt->sopt_level != IPPROTO_TCP)
1762 err = t3_ip_ctloutput(so, sopt);
1764 err = t3_tcp_ctloutput(so, sopt);
1766 if (err != EOPNOTSUPP)
1769 return (tcp_ctloutput(so, sopt));
1773 * Returns true if we need to explicitly request RST when we receive new data
1774 * on an RX-closed connection.
1777 need_rst_on_excess_rx(const struct toepcb *toep)
1783 * Handles Rx data that arrives in a state where the socket isn't accepting
1787 handle_excess_rx(struct toepcb *toep, struct mbuf *m)
1790 if (need_rst_on_excess_rx(toep) &&
1791 !(toep->tp_flags & TP_ABORT_SHUTDOWN))
1792 t3_send_reset(toep);
1797 * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE)
1798 * by getting the DDP offset from the TCB.
1801 tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m)
1803 struct ddp_state *q = &toep->tp_ddp_state;
1804 struct ddp_buf_state *bsp;
1805 struct cpl_get_tcb_rpl *hdr;
1806 unsigned int ddp_offset;
1809 struct sockbuf *rcv;
1816 so = inp_inpcbtosocket(tp->t_inpcb);
1818 inp_lock_assert(tp->t_inpcb);
1819 rcv = so_sockbuf_rcv(so);
1822 /* Note that we only accout for CPL_GET_TCB issued by the DDP code.
1823 * We really need a cookie in order to dispatch the RPLs.
1827 /* It is a possible that a previous CPL already invalidated UBUF DDP
1828 * and moved the cur_buf idx and hence no further processing of this
1829 * skb is required. However, the app might be sleeping on
1830 * !q->get_tcb_count and we need to wake it up.
1832 if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) {
1833 int state = so_state_get(so);
1836 if (__predict_true((state & SS_NOFDREF) == 0))
1837 so_sorwakeup_locked(so);
1839 sockbuf_unlock(rcv);
1844 bsp = &q->buf_state[q->cur_buf];
1846 tcb = (__be64 *)(hdr + 1);
1847 if (q->cur_buf == 0) {
1848 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]);
1849 ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET);
1851 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]);
1852 ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET;
1854 ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET;
1855 m->m_cur_offset = bsp->cur_offset;
1856 bsp->cur_offset = ddp_offset;
1857 m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset;
1860 "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u",
1861 q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset);
1862 KASSERT(ddp_offset >= m->m_cur_offset,
1863 ("ddp_offset=%u less than cur_offset=%u",
1864 ddp_offset, m->m_cur_offset));
1868 unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx;
1870 t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]);
1871 ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS;
1873 t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]);
1874 rcv_nxt = t >> S_TCB_RCV_NXT;
1875 rcv_nxt &= M_TCB_RCV_NXT;
1877 t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]);
1878 rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET);
1879 rx_hdr_offset &= M_TCB_RX_HDR_OFFSET;
1881 T3_TRACE2(TIDTB(sk),
1882 "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x",
1883 ddp_flags, rcv_nxt - rx_hdr_offset);
1885 "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u",
1886 tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf);
1888 "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u",
1889 rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset);
1891 "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x",
1892 q->buf_state[0].flags, q->buf_state[1].flags);
1896 if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) {
1897 handle_excess_rx(toep, m);
1902 if ((int)m->m_pkthdr.len < 0) {
1903 t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len");
1906 if (bsp->flags & DDP_BF_NOCOPY) {
1909 "tcb_rpl_as_ddp_complete: CANCEL UBUF");
1911 if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1912 printk("!cancel_ubuf");
1913 t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf");
1916 m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1;
1917 bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA);
1919 } else if (bsp->flags & DDP_BF_NOFLIP) {
1921 m->m_ddp_flags = 1; /* always a kernel buffer */
1923 /* now HW buffer carries a user buffer */
1924 bsp->flags &= ~DDP_BF_NOFLIP;
1925 bsp->flags |= DDP_BF_NOCOPY;
1927 /* It is possible that the CPL_GET_TCB_RPL doesn't indicate
1928 * any new data in which case we're done. If in addition the
1929 * offset is 0, then there wasn't a completion for the kbuf
1930 * and we need to decrement the posted count.
1932 if (m->m_pkthdr.len == 0) {
1933 if (ddp_offset == 0) {
1935 bsp->flags |= DDP_BF_NODATA;
1937 sockbuf_unlock(rcv);
1942 sockbuf_unlock(rcv);
1944 /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP,
1945 * but it got here way late and nobody cares anymore.
1951 m->m_ddp_gl = (unsigned char *)bsp->gl;
1952 m->m_flags |= M_DDP;
1953 m->m_seq = tp->rcv_nxt;
1954 tp->rcv_nxt += m->m_pkthdr.len;
1955 tp->t_rcvtime = ticks;
1956 CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u",
1957 m->m_seq, q->cur_buf, m->m_pkthdr.len);
1958 if (m->m_pkthdr.len == 0) {
1959 q->user_ddp_pending = 0;
1964 state = so_state_get(so);
1965 if (__predict_true((state & SS_NOFDREF) == 0))
1966 so_sorwakeup_locked(so);
1968 sockbuf_unlock(rcv);
1972 * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code,
1973 * in that case they are similar to DDP completions.
1976 do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
1978 struct toepcb *toep = (struct toepcb *)ctx;
1980 /* OK if socket doesn't exist */
1982 printf("null toep in do_get_tcb_rpl\n");
1983 return (CPL_RET_BUF_DONE);
1986 inp_wlock(toep->tp_tp->t_inpcb);
1987 tcb_rpl_as_ddp_complete(toep, m);
1988 inp_wunlock(toep->tp_tp->t_inpcb);
1994 handle_ddp_data(struct toepcb *toep, struct mbuf *m)
1996 struct tcpcb *tp = toep->tp_tp;
1998 struct ddp_state *q;
1999 struct ddp_buf_state *bsp;
2000 struct cpl_rx_data *hdr = cplhdr(m);
2001 unsigned int rcv_nxt = ntohl(hdr->seq);
2002 struct sockbuf *rcv;
2004 if (tp->rcv_nxt == rcv_nxt)
2007 inp_lock_assert(tp->t_inpcb);
2008 so = inp_inpcbtosocket(tp->t_inpcb);
2009 rcv = so_sockbuf_rcv(so);
2012 q = &toep->tp_ddp_state;
2013 bsp = &q->buf_state[q->cur_buf];
2014 KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x",
2015 rcv_nxt, tp->rcv_nxt));
2016 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
2017 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2018 CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d",
2019 rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len);
2022 if ((int)m->m_pkthdr.len < 0) {
2023 t3_ddp_error(so, "handle_ddp_data: neg len");
2026 m->m_ddp_gl = (unsigned char *)bsp->gl;
2027 m->m_flags |= M_DDP;
2028 m->m_cur_offset = bsp->cur_offset;
2029 m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
2030 if (bsp->flags & DDP_BF_NOCOPY)
2031 bsp->flags &= ~DDP_BF_NOCOPY;
2033 m->m_seq = tp->rcv_nxt;
2034 tp->rcv_nxt = rcv_nxt;
2035 bsp->cur_offset += m->m_pkthdr.len;
2036 if (!(bsp->flags & DDP_BF_NOFLIP))
2039 * For now, don't re-enable DDP after a connection fell out of DDP
2042 q->ubuf_ddp_ready = 0;
2043 sockbuf_unlock(rcv);
2047 * Process new data received for a connection.
2050 new_rx_data(struct toepcb *toep, struct mbuf *m)
2052 struct cpl_rx_data *hdr = cplhdr(m);
2053 struct tcpcb *tp = toep->tp_tp;
2055 struct sockbuf *rcv;
2057 int len = be16toh(hdr->len);
2059 inp_wlock(tp->t_inpcb);
2061 so = inp_inpcbtosocket(tp->t_inpcb);
2063 if (__predict_false(so_no_receive(so))) {
2064 handle_excess_rx(toep, m);
2065 inp_wunlock(tp->t_inpcb);
2070 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP)
2071 handle_ddp_data(toep, m);
2073 m->m_seq = ntohl(hdr->seq);
2074 m->m_ulp_mode = 0; /* for iSCSI */
2077 if (__predict_false(m->m_seq != tp->rcv_nxt)) {
2079 "%s: TID %u: Bad sequence number %u, expected %u\n",
2080 toep->tp_toedev->name, toep->tp_tid, m->m_seq,
2083 inp_wunlock(tp->t_inpcb);
2087 m_adj(m, sizeof(*hdr));
2089 #ifdef URGENT_DATA_SUPPORTED
2091 * We don't handle urgent data yet
2093 if (__predict_false(hdr->urg))
2094 handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg));
2095 if (__predict_false(tp->urg_data == TCP_URG_NOTYET &&
2096 tp->urg_seq - tp->rcv_nxt < skb->len))
2097 tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq -
2100 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) {
2101 toep->tp_delack_mode = hdr->dack_mode;
2102 toep->tp_delack_seq = tp->rcv_nxt;
2104 CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d",
2105 m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes);
2107 if (len < m->m_pkthdr.len)
2108 m->m_pkthdr.len = m->m_len = len;
2110 tp->rcv_nxt += m->m_pkthdr.len;
2111 tp->t_rcvtime = ticks;
2112 toep->tp_enqueued_bytes += m->m_pkthdr.len;
2114 "new_rx_data: seq 0x%x len %u",
2115 m->m_seq, m->m_pkthdr.len);
2116 inp_wunlock(tp->t_inpcb);
2117 rcv = so_sockbuf_rcv(so);
2121 DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, rcv->sb_flags, m->m_pkthdr.len);
2127 * We're giving too many credits to the card - but disable this check so we can keep on moving :-|
2130 KASSERT(rcv->sb_cc < (rcv->sb_mbmax << 1),
2132 ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d",
2133 so, rcv->sb_cc, rcv->sb_mbmax));
2137 CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d",
2138 rcv->sb_cc, rcv->sb_mbcnt);
2140 state = so_state_get(so);
2141 if (__predict_true((state & SS_NOFDREF) == 0))
2142 so_sorwakeup_locked(so);
2144 sockbuf_unlock(rcv);
2148 * Handler for RX_DATA CPL messages.
2151 do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2153 struct toepcb *toep = (struct toepcb *)ctx;
2155 DPRINTF("rx_data len=%d\n", m->m_pkthdr.len);
2157 new_rx_data(toep, m);
2163 new_rx_data_ddp(struct toepcb *toep, struct mbuf *m)
2166 struct ddp_state *q;
2167 struct ddp_buf_state *bsp;
2168 struct cpl_rx_data_ddp *hdr;
2170 unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx;
2172 unsigned int delack_mode;
2173 struct sockbuf *rcv;
2176 inp_wlock(tp->t_inpcb);
2177 so = inp_inpcbtosocket(tp->t_inpcb);
2179 if (__predict_false(so_no_receive(so))) {
2181 handle_excess_rx(toep, m);
2182 inp_wunlock(tp->t_inpcb);
2186 q = &toep->tp_ddp_state;
2188 ddp_report = ntohl(hdr->u.ddp_report);
2189 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
2190 bsp = &q->buf_state[buf_idx];
2193 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u "
2194 "hdr seq 0x%x len %u",
2195 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq),
2198 "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d",
2199 G_DDP_OFFSET(ddp_report), ddp_report, buf_idx);
2201 ddp_len = ntohs(hdr->len);
2202 rcv_nxt = ntohl(hdr->seq) + ddp_len;
2204 delack_mode = G_DDP_DACK_MODE(ddp_report);
2205 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
2206 toep->tp_delack_mode = delack_mode;
2207 toep->tp_delack_seq = tp->rcv_nxt;
2210 m->m_seq = tp->rcv_nxt;
2211 tp->rcv_nxt = rcv_nxt;
2213 tp->t_rcvtime = ticks;
2215 * Store the length in m->m_len. We are changing the meaning of
2216 * m->m_len here, we need to be very careful that nothing from now on
2217 * interprets ->len of this packet the usual way.
2219 m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq;
2220 inp_wunlock(tp->t_inpcb);
2222 "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ",
2223 m->m_len, rcv_nxt, m->m_seq);
2225 * Figure out where the new data was placed in the buffer and store it
2226 * in when. Assumes the buffer offset starts at 0, consumer needs to
2227 * account for page pod's pg_offset.
2229 end_offset = G_DDP_OFFSET(ddp_report) + ddp_len;
2230 m->m_cur_offset = end_offset - m->m_pkthdr.len;
2232 rcv = so_sockbuf_rcv(so);
2235 m->m_ddp_gl = (unsigned char *)bsp->gl;
2236 m->m_flags |= M_DDP;
2237 bsp->cur_offset = end_offset;
2238 toep->tp_enqueued_bytes += m->m_pkthdr.len;
2241 * Length is only meaningful for kbuf
2243 if (!(bsp->flags & DDP_BF_NOCOPY))
2244 KASSERT(m->m_len <= bsp->gl->dgl_length,
2245 ("length received exceeds ddp pages: len=%d dgl_length=%d",
2246 m->m_len, bsp->gl->dgl_length));
2248 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2249 KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next));
2251 * Bit 0 of flags stores whether the DDP buffer is completed.
2252 * Note that other parts of the code depend on this being in bit 0.
2254 if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) {
2255 panic("spurious ddp completion");
2257 m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
2258 if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP))
2259 q->cur_buf ^= 1; /* flip buffers */
2262 if (bsp->flags & DDP_BF_NOCOPY) {
2263 m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY);
2264 bsp->flags &= ~DDP_BF_NOCOPY;
2267 if (ddp_report & F_DDP_PSH)
2268 m->m_ddp_flags |= DDP_BF_PSH;
2270 m->m_ddp_flags |= DDP_BF_NODATA;
2273 skb_reset_transport_header(skb);
2274 tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */
2278 if ((so_state_get(so) & SS_NOFDREF) == 0 && ((ddp_report & F_DDP_PSH) ||
2279 (((m->m_ddp_flags & (DDP_BF_NOCOPY|1)) == (DDP_BF_NOCOPY|1))
2280 || !(m->m_ddp_flags & DDP_BF_NOCOPY))))
2281 so_sorwakeup_locked(so);
2283 sockbuf_unlock(rcv);
2286 #define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\
2287 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\
2288 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\
2292 * Handler for RX_DATA_DDP CPL messages.
2295 do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2297 struct toepcb *toep = ctx;
2298 const struct cpl_rx_data_ddp *hdr = cplhdr(m);
2302 if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) {
2303 log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n",
2304 GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status)));
2305 return (CPL_RET_BUF_DONE);
2308 skb->h.th = tcphdr_skb->h.th;
2310 new_rx_data_ddp(toep, m);
2315 process_ddp_complete(struct toepcb *toep, struct mbuf *m)
2317 struct tcpcb *tp = toep->tp_tp;
2319 struct ddp_state *q;
2320 struct ddp_buf_state *bsp;
2321 struct cpl_rx_ddp_complete *hdr;
2322 unsigned int ddp_report, buf_idx, when, delack_mode;
2324 struct sockbuf *rcv;
2326 inp_wlock(tp->t_inpcb);
2327 so = inp_inpcbtosocket(tp->t_inpcb);
2329 if (__predict_false(so_no_receive(so))) {
2330 struct inpcb *inp = so_sotoinpcb(so);
2332 handle_excess_rx(toep, m);
2336 q = &toep->tp_ddp_state;
2338 ddp_report = ntohl(hdr->ddp_report);
2339 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
2340 m->m_pkthdr.csum_data = tp->rcv_nxt;
2342 rcv = so_sockbuf_rcv(so);
2345 bsp = &q->buf_state[buf_idx];
2346 when = bsp->cur_offset;
2347 m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when;
2348 tp->rcv_nxt += m->m_len;
2349 tp->t_rcvtime = ticks;
2351 delack_mode = G_DDP_DACK_MODE(ddp_report);
2352 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
2353 toep->tp_delack_mode = delack_mode;
2354 toep->tp_delack_seq = tp->rcv_nxt;
2357 skb_reset_transport_header(skb);
2358 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
2360 inp_wunlock(tp->t_inpcb);
2362 KASSERT(m->m_len >= 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2364 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
2365 "ddp_report 0x%x offset %u, len %u",
2366 tp->rcv_nxt, bsp->cur_offset, ddp_report,
2367 G_DDP_OFFSET(ddp_report), m->m_len);
2369 m->m_cur_offset = bsp->cur_offset;
2370 bsp->cur_offset += m->m_len;
2372 if (!(bsp->flags & DDP_BF_NOFLIP)) {
2373 q->cur_buf ^= 1; /* flip buffers */
2374 if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length)
2379 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
2380 "ddp_report %u offset %u",
2381 tp->rcv_nxt, bsp->cur_offset, ddp_report,
2382 G_DDP_OFFSET(ddp_report));
2384 m->m_ddp_gl = (unsigned char *)bsp->gl;
2385 m->m_flags |= M_DDP;
2386 m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1;
2387 if (bsp->flags & DDP_BF_NOCOPY)
2388 bsp->flags &= ~DDP_BF_NOCOPY;
2390 m->m_ddp_flags |= DDP_BF_NODATA;
2393 if ((so_state_get(so) & SS_NOFDREF) == 0)
2394 so_sorwakeup_locked(so);
2396 sockbuf_unlock(rcv);
2400 * Handler for RX_DDP_COMPLETE CPL messages.
2403 do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2405 struct toepcb *toep = ctx;
2409 skb->h.th = tcphdr_skb->h.th;
2411 process_ddp_complete(toep, m);
2416 * Move a socket to TIME_WAIT state. We need to make some adjustments to the
2417 * socket state before calling tcp_time_wait to comply with its expectations.
2420 enter_timewait(struct tcpcb *tp)
2423 * Bump rcv_nxt for the peer FIN. We don't do this at the time we
2424 * process peer_close because we don't want to carry the peer FIN in
2425 * the socket's receive queue and if we increment rcv_nxt without
2426 * having the FIN in the receive queue we'll confuse facilities such
2429 inp_wlock(tp->t_inpcb);
2432 tp->ts_recent_age = 0; /* defeat recycling */
2433 tp->t_srtt = 0; /* defeat tcp_update_metrics */
2434 inp_wunlock(tp->t_inpcb);
2435 tcp_offload_twstart(tp);
2439 * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This
2440 * function deals with the data that may be reported along with the FIN.
2441 * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to
2442 * perform normal FIN-related processing. In the latter case 1 indicates that
2443 * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the
2447 handle_peer_close_data(struct socket *so, struct mbuf *m)
2449 struct tcpcb *tp = so_sototcpcb(so);
2450 struct toepcb *toep = tp->t_toe;
2451 struct ddp_state *q;
2452 struct ddp_buf_state *bsp;
2453 struct cpl_peer_close *req = cplhdr(m);
2454 unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */
2455 struct sockbuf *rcv;
2457 if (tp->rcv_nxt == rcv_nxt) /* no data */
2460 CTR0(KTR_TOM, "handle_peer_close_data");
2461 if (__predict_false(so_no_receive(so))) {
2462 handle_excess_rx(toep, m);
2465 * Although we discard the data we want to process the FIN so
2466 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP +
2467 * PEER_CLOSE without data. In particular this PEER_CLOSE
2468 * may be what will close the connection. We return 1 because
2469 * handle_excess_rx() already freed the packet.
2474 inp_lock_assert(tp->t_inpcb);
2475 q = &toep->tp_ddp_state;
2476 rcv = so_sockbuf_rcv(so);
2479 bsp = &q->buf_state[q->cur_buf];
2480 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
2481 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2482 m->m_ddp_gl = (unsigned char *)bsp->gl;
2483 m->m_flags |= M_DDP;
2484 m->m_cur_offset = bsp->cur_offset;
2486 DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
2487 m->m_seq = tp->rcv_nxt;
2488 tp->rcv_nxt = rcv_nxt;
2489 bsp->cur_offset += m->m_pkthdr.len;
2490 if (!(bsp->flags & DDP_BF_NOFLIP))
2493 skb_reset_transport_header(skb);
2494 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
2496 tp->t_rcvtime = ticks;
2498 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
2499 so_sorwakeup_locked(so);
2501 sockbuf_unlock(rcv);
2507 * Handle a peer FIN.
2510 do_peer_fin(struct toepcb *toep, struct mbuf *m)
2513 struct tcpcb *tp = toep->tp_tp;
2517 CTR1(KTR_TOM, "do_peer_fin state=%d", tp->t_state);
2518 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
2519 printf("abort_pending set\n");
2523 inp_wlock(tp->t_inpcb);
2524 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
2525 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) {
2526 keep = handle_peer_close_data(so, m);
2528 inp_wunlock(tp->t_inpcb);
2532 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
2534 "waking up waiters for cantrcvmore on %p ", so);
2538 * If connection is half-synchronized
2539 * (ie NEEDSYN flag on) then delay ACK,
2540 * so it may be piggybacked when SYN is sent.
2541 * Otherwise, since we received a FIN then no
2542 * more input can be expected, send ACK now.
2544 if (tp->t_flags & TF_NEEDSYN)
2545 tp->t_flags |= TF_DELACK;
2547 tp->t_flags |= TF_ACKNOW;
2551 switch (tp->t_state) {
2552 case TCPS_SYN_RECEIVED:
2553 tp->t_starttime = ticks;
2555 case TCPS_ESTABLISHED:
2556 tp->t_state = TCPS_CLOSE_WAIT;
2558 case TCPS_FIN_WAIT_1:
2559 tp->t_state = TCPS_CLOSING;
2561 case TCPS_FIN_WAIT_2:
2563 * If we've sent an abort_req we must have sent it too late,
2564 * HW will send us a reply telling us so, and this peer_close
2565 * is really the last message for this connection and needs to
2566 * be treated as an abort_rpl, i.e., transition the connection
2567 * to TCP_CLOSE (note that the host stack does this at the
2568 * time of generating the RST but we must wait for HW).
2569 * Otherwise we enter TIME_WAIT.
2571 t3_release_offload_resources(toep);
2572 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
2575 action = TCP_TIMEWAIT;
2580 "%s: TID %u received PEER_CLOSE in bad state %d\n",
2581 toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state);
2583 inp_wunlock(tp->t_inpcb);
2585 if (action == TCP_TIMEWAIT) {
2587 } else if (action == TCP_DROP) {
2588 tcp_offload_drop(tp, 0);
2589 } else if (action == TCP_CLOSE) {
2590 tcp_offload_close(tp);
2594 /* Do not send POLL_HUP for half duplex close. */
2595 if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
2596 sk->sk_state == TCP_CLOSE)
2597 sk_wake_async(so, 1, POLL_HUP);
2599 sk_wake_async(so, 1, POLL_IN);
2608 * Handler for PEER_CLOSE CPL messages.
2611 do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2613 struct toepcb *toep = (struct toepcb *)ctx;
2617 do_peer_fin(toep, m);
2622 process_close_con_rpl(struct toepcb *toep, struct mbuf *m)
2624 struct cpl_close_con_rpl *rpl = cplhdr(m);
2625 struct tcpcb *tp = toep->tp_tp;
2628 struct sockbuf *rcv;
2630 inp_wlock(tp->t_inpcb);
2631 so = inp_inpcbtosocket(tp->t_inpcb);
2633 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */
2635 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
2636 inp_wunlock(tp->t_inpcb);
2640 CTR3(KTR_TOM, "process_close_con_rpl(%p) state=%d dead=%d", toep,
2641 tp->t_state, !!(so_state_get(so) & SS_NOFDREF));
2643 switch (tp->t_state) {
2644 case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */
2645 t3_release_offload_resources(toep);
2646 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
2650 action = TCP_TIMEWAIT;
2655 * In this state we don't care about pending abort_rpl.
2656 * If we've sent abort_req it was post-close and was sent too
2657 * late, this close_con_rpl is the actual last message.
2659 t3_release_offload_resources(toep);
2662 case TCPS_FIN_WAIT_1:
2664 * If we can't receive any more
2665 * data, then closing user can proceed.
2666 * Starting the timer is contrary to the
2667 * specification, but if we don't get a FIN
2668 * we'll hang forever.
2671 * we should release the tp also, and use a
2675 rcv = so_sockbuf_rcv(so);
2679 if (rcv->sb_state & SBS_CANTRCVMORE) {
2683 soisdisconnected(so);
2684 timeout = (tcp_fast_finwait2_recycle) ?
2685 tcp_finwait2_timeout : tcp_maxidle;
2686 tcp_timer_activate(tp, TT_2MSL, timeout);
2688 tp->t_state = TCPS_FIN_WAIT_2;
2689 if ((so_options_get(so) & SO_LINGER) && so_linger_get(so) == 0 &&
2690 (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) {
2697 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
2698 toep->tp_toedev->tod_name, toep->tp_tid,
2701 inp_wunlock(tp->t_inpcb);
2704 if (action == TCP_TIMEWAIT) {
2706 } else if (action == TCP_DROP) {
2707 tcp_offload_drop(tp, 0);
2708 } else if (action == TCP_CLOSE) {
2709 tcp_offload_close(tp);
2716 * Handler for CLOSE_CON_RPL CPL messages.
2719 do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m,
2722 struct toepcb *toep = (struct toepcb *)ctx;
2724 process_close_con_rpl(toep, m);
2729 * Process abort replies. We only process these messages if we anticipate
2730 * them as the coordination between SW and HW in this area is somewhat lacking
2731 * and sometimes we get ABORT_RPLs after we are done with the connection that
2732 * originated the ABORT_REQ.
2735 process_abort_rpl(struct toepcb *toep, struct mbuf *m)
2737 struct tcpcb *tp = toep->tp_tp;
2742 T3_TRACE1(TIDTB(sk),
2743 "process_abort_rpl: GTS rpl pending %d",
2744 sock_flag(sk, ABORT_RPL_PENDING));
2747 inp_wlock(tp->t_inpcb);
2748 so = inp_inpcbtosocket(tp->t_inpcb);
2750 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
2752 * XXX panic on tcpdrop
2754 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(toep->tp_toedev))
2755 toep->tp_flags |= TP_ABORT_RPL_RCVD;
2757 toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING);
2758 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) ||
2759 !is_t3a(toep->tp_toedev)) {
2760 if (toep->tp_flags & TP_ABORT_REQ_RCVD)
2761 panic("TP_ABORT_REQ_RCVD set");
2762 t3_release_offload_resources(toep);
2767 inp_wunlock(tp->t_inpcb);
2770 tcp_offload_close(tp);
2776 * Handle an ABORT_RPL_RSS CPL message.
2779 do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2781 struct cpl_abort_rpl_rss *rpl = cplhdr(m);
2782 struct toepcb *toep;
2785 * Ignore replies to post-close aborts indicating that the abort was
2786 * requested too late. These connections are terminated when we get
2787 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss
2788 * arrives the TID is either no longer used or it has been recycled.
2790 if (rpl->status == CPL_ERR_ABORT_FAILED) {
2796 toep = (struct toepcb *)ctx;
2799 * Sometimes we've already closed the socket, e.g., a post-close
2800 * abort races with ABORT_REQ_RSS, the latter frees the socket
2801 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED,
2802 * but FW turns the ABORT_REQ into a regular one and so we get
2803 * ABORT_RPL_RSS with status 0 and no socket. Only on T3A.
2808 if (toep->tp_tp == NULL) {
2809 log(LOG_NOTICE, "removing tid for abort\n");
2810 cxgb_remove_tid(cdev, toep, toep->tp_tid);
2812 l2t_release(L2DATA(cdev), toep->tp_l2t);
2814 toepcb_release(toep);
2818 log(LOG_NOTICE, "toep=%p\n", toep);
2819 log(LOG_NOTICE, "tp=%p\n", toep->tp_tp);
2822 process_abort_rpl(toep, m);
2823 toepcb_release(toep);
2828 * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also
2829 * indicate whether RST should be sent in response.
2832 abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst)
2834 struct tcpcb *tp = so_sototcpcb(so);
2836 switch (abort_reason) {
2837 case CPL_ERR_BAD_SYN:
2839 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through
2841 case CPL_ERR_CONN_RESET:
2842 // XXX need to handle SYN_RECV due to crossed SYNs
2843 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
2844 case CPL_ERR_XMIT_TIMEDOUT:
2845 case CPL_ERR_PERSIST_TIMEDOUT:
2846 case CPL_ERR_FINWAIT2_TIMEDOUT:
2847 case CPL_ERR_KEEPALIVE_TIMEDOUT:
2849 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
2858 set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd)
2860 struct cpl_abort_rpl *rpl = cplhdr(m);
2862 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
2863 rpl->wr.wr_lo = htonl(V_WR_TID(tid));
2864 m->m_len = m->m_pkthdr.len = sizeof(*rpl);
2866 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
2871 send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m)
2873 struct mbuf *reply_mbuf;
2874 struct cpl_abort_req_rss *req = cplhdr(m);
2876 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl));
2877 m_set_priority(m, CPL_PRIORITY_DATA);
2878 m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl);
2879 set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status);
2880 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
2885 * Returns whether an ABORT_REQ_RSS message is a negative advice.
2888 is_neg_adv_abort(unsigned int status)
2890 return status == CPL_ERR_RTX_NEG_ADVICE ||
2891 status == CPL_ERR_PERSIST_NEG_ADVICE;
2895 send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status)
2897 struct mbuf *reply_mbuf;
2898 struct cpl_abort_req_rss *req = cplhdr(m);
2900 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
2903 /* Defer the reply. Stick rst_status into req->cmd. */
2904 req->status = rst_status;
2905 t3_defer_reply(m, tdev, send_deferred_abort_rpl);
2909 m_set_priority(reply_mbuf, CPL_PRIORITY_DATA);
2910 set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status);
2914 * XXX need to sync with ARP as for SYN_RECV connections we can send
2915 * these messages while ARP is pending. For other connection states
2916 * it's not a problem.
2918 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
2923 cleanup_syn_rcv_conn(struct socket *child, struct socket *parent)
2925 CXGB_UNIMPLEMENTED();
2927 struct request_sock *req = child->sk_user_data;
2929 inet_csk_reqsk_queue_removed(parent, req);
2930 synq_remove(tcp_sk(child));
2932 child->sk_user_data = NULL;
2938 * Performs the actual work to abort a SYN_RECV connection.
2941 do_abort_syn_rcv(struct socket *child, struct socket *parent)
2943 struct tcpcb *parenttp = so_sototcpcb(parent);
2944 struct tcpcb *childtp = so_sototcpcb(child);
2947 * If the server is still open we clean up the child connection,
2948 * otherwise the server already did the clean up as it was purging
2949 * its SYN queue and the skb was just sitting in its backlog.
2951 if (__predict_false(parenttp->t_state == TCPS_LISTEN)) {
2952 cleanup_syn_rcv_conn(child, parent);
2953 inp_wlock(childtp->t_inpcb);
2954 t3_release_offload_resources(childtp->t_toe);
2955 inp_wunlock(childtp->t_inpcb);
2956 tcp_offload_close(childtp);
2962 * Handle abort requests for a SYN_RECV connection. These need extra work
2963 * because the socket is on its parent's SYN queue.
2966 abort_syn_rcv(struct socket *so, struct mbuf *m)
2968 CXGB_UNIMPLEMENTED();
2970 struct socket *parent;
2971 struct toedev *tdev = toep->tp_toedev;
2972 struct t3cdev *cdev = TOM_DATA(tdev)->cdev;
2973 struct socket *oreq = so->so_incomp;
2974 struct t3c_tid_entry *t3c_stid;
2978 return -1; /* somehow we are not on the SYN queue */
2980 t = &(T3C_DATA(cdev))->tid_maps;
2981 t3c_stid = lookup_stid(t, oreq->ts_recent);
2982 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
2985 do_abort_syn_rcv(so, parent);
2986 send_abort_rpl(m, tdev, CPL_ABORT_NO_RST);
2993 * Process abort requests. If we are waiting for an ABORT_RPL we ignore this
2994 * request except that we need to reply to it.
2997 process_abort_req(struct toepcb *toep, struct mbuf *m, struct toedev *tdev)
2999 int rst_status = CPL_ABORT_NO_RST;
3000 const struct cpl_abort_req_rss *req = cplhdr(m);
3001 struct tcpcb *tp = toep->tp_tp;
3005 inp_wlock(tp->t_inpcb);
3006 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
3007 if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) {
3008 toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN);
3013 toep->tp_flags &= ~TP_ABORT_REQ_RCVD;
3015 * Three cases to consider:
3016 * a) We haven't sent an abort_req; close the connection.
3017 * b) We have sent a post-close abort_req that will get to TP too late
3018 * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will
3019 * be ignored and the connection should be closed now.
3020 * c) We have sent a regular abort_req that will get to TP too late.
3021 * That will generate an abort_rpl with status 0, wait for it.
3023 if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) ||
3024 (is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) {
3027 error = abort_status_to_errno(so, req->status,
3029 so_error_set(so, error);
3031 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
3034 * SYN_RECV needs special processing. If abort_syn_rcv()
3035 * returns 0 is has taken care of the abort.
3037 if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m))
3040 t3_release_offload_resources(toep);
3043 inp_wunlock(tp->t_inpcb);
3046 tcp_offload_close(tp);
3048 send_abort_rpl(m, tdev, rst_status);
3051 inp_wunlock(tp->t_inpcb);
3055 * Handle an ABORT_REQ_RSS CPL message.
3058 do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3060 const struct cpl_abort_req_rss *req = cplhdr(m);
3061 struct toepcb *toep = (struct toepcb *)ctx;
3063 if (is_neg_adv_abort(req->status)) {
3068 log(LOG_NOTICE, "aborting tid=%d\n", toep->tp_tid);
3070 if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) {
3071 cxgb_remove_tid(cdev, toep, toep->tp_tid);
3072 toep->tp_flags |= TP_ABORT_REQ_RCVD;
3074 send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST);
3076 l2t_release(L2DATA(cdev), toep->tp_l2t);
3081 toep->tp_tp->t_toe = NULL;
3082 toep->tp_tp->t_flags &= ~TF_TOE;
3085 * XXX need to call syncache_chkrst - but we don't
3086 * have a way of doing that yet
3088 toepcb_release(toep);
3089 log(LOG_ERR, "abort for unestablished connection :-(\n");
3092 if (toep->tp_tp == NULL) {
3093 log(LOG_NOTICE, "disconnected toepcb\n");
3094 /* should be freed momentarily */
3100 process_abort_req(toep, m, toep->tp_toedev);
3101 toepcb_release(toep);
3106 pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m)
3108 struct toedev *tdev = TOE_DEV(parent);
3110 do_abort_syn_rcv(child, parent);
3111 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) {
3112 struct cpl_pass_accept_rpl *rpl = cplhdr(m);
3114 rpl->opt0h = htonl(F_TCAM_BYPASS);
3115 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
3116 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
3122 handle_pass_open_arp_failure(struct socket *so, struct mbuf *m)
3124 CXGB_UNIMPLEMENTED();
3127 struct t3cdev *cdev;
3128 struct socket *parent;
3129 struct socket *oreq;
3130 struct t3c_tid_entry *t3c_stid;
3132 struct tcpcb *otp, *tp = so_sototcpcb(so);
3133 struct toepcb *toep = tp->t_toe;
3136 * If the connection is being aborted due to the parent listening
3137 * socket going away there's nothing to do, the ABORT_REQ will close
3140 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
3145 oreq = so->so_incomp;
3146 otp = so_sototcpcb(oreq);
3149 t = &(T3C_DATA(cdev))->tid_maps;
3150 t3c_stid = lookup_stid(t, otp->ts_recent);
3151 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
3154 pass_open_abort(so, parent, m);
3160 * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly
3161 * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV
3165 pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m)
3169 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
3170 BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk);
3172 handle_pass_open_arp_failure(m_get_socket(m), m);
3176 * Populate a reject CPL_PASS_ACCEPT_RPL WR.
3179 mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf)
3181 struct cpl_pass_accept_req *req = cplhdr(req_mbuf);
3182 struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf);
3183 unsigned int tid = GET_TID(req);
3185 m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP);
3186 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3187 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
3188 rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet
3189 rpl->opt0h = htonl(F_TCAM_BYPASS);
3190 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
3192 rpl->rsvd = rpl->opt2; /* workaround for HW bug */
3196 * Send a deferred reject to an accept request.
3199 reject_pass_request(struct toedev *tdev, struct mbuf *m)
3201 struct mbuf *reply_mbuf;
3203 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl));
3204 mk_pass_accept_rpl(reply_mbuf, m);
3205 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
3210 handle_syncache_event(int event, void *arg)
3212 struct toepcb *toep = arg;
3215 case TOE_SC_ENTRY_PRESENT:
3217 * entry already exists - free toepcb
3220 printf("syncache entry present\n");
3221 toepcb_release(toep);
3225 * The syncache has given up on this entry
3226 * either it timed out, or it was evicted
3227 * we need to explicitly release the tid
3229 printf("syncache entry dropped\n");
3230 toepcb_release(toep);
3233 log(LOG_ERR, "unknown syncache event %d\n", event);
3239 syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep)
3241 struct in_conninfo inc;
3245 int mss, wsf, sack, ts;
3246 uint32_t rcv_isn = ntohl(req->rcv_isn);
3248 bzero(&to, sizeof(struct tcpopt));
3249 inp = so_sotoinpcb(lso);
3252 * Fill out information for entering us into the syncache
3254 inc.inc_fport = th.th_sport = req->peer_port;
3255 inc.inc_lport = th.th_dport = req->local_port;
3256 th.th_seq = req->rcv_isn;
3257 th.th_flags = TH_SYN;
3259 toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1;
3264 inc.inc_faddr.s_addr = req->peer_ip;
3265 inc.inc_laddr.s_addr = req->local_ip;
3267 DPRINTF("syncache add of %d:%d %d:%d\n",
3268 ntohl(req->local_ip), ntohs(req->local_port),
3269 ntohl(req->peer_ip), ntohs(req->peer_port));
3271 mss = req->tcp_options.mss;
3272 wsf = req->tcp_options.wsf;
3273 ts = req->tcp_options.tstamp;
3274 sack = req->tcp_options.sack;
3277 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
3278 tcp_offload_syncache_add(&inc, &to, &th, inp, &lso, &cxgb_toe_usrreqs, toep);
3283 * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket
3284 * lock held. Note that the sock here is a listening socket that is not owned
3288 process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev,
3289 struct listen_ctx *lctx)
3292 struct l2t_entry *e;
3294 struct mbuf *reply_mbuf, *ddp_mbuf = NULL;
3295 struct cpl_pass_accept_rpl *rpl;
3296 struct cpl_pass_accept_req *req = cplhdr(m);
3297 unsigned int tid = GET_TID(req);
3298 struct tom_data *d = TOM_DATA(tdev);
3299 struct t3cdev *cdev = d->cdev;
3300 struct tcpcb *tp = so_sototcpcb(so);
3301 struct toepcb *newtoep;
3302 struct rtentry *dst;
3303 struct sockaddr_in nam;
3304 struct t3c_data *td = T3C_DATA(cdev);
3306 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
3307 if (__predict_false(reply_mbuf == NULL)) {
3308 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
3309 t3_defer_reply(m, tdev, reject_pass_request);
3311 cxgb_queue_tid_release(cdev, tid);
3314 DPRINTF("failed to get reply_mbuf\n");
3319 if (tp->t_state != TCPS_LISTEN) {
3320 DPRINTF("socket not in listen state\n");
3325 tim.mac_addr = req->dst_mac;
3326 tim.vlan_tag = ntohs(req->vlan_tag);
3327 if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
3328 DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n");
3334 * XXX do route lookup to confirm that we're still listening on this
3337 if (ip_route_input(skb, req->local_ip, req->peer_ip,
3338 G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev))
3340 rt_flags = ((struct rtable *)skb->dst)->rt_flags &
3341 (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL);
3342 dst_release(skb->dst); // done with the input route, release it
3345 if ((rt_flags & RTF_LOCAL) == 0)
3351 rt_flags = RTF_LOCAL;
3352 if ((rt_flags & RTF_LOCAL) == 0)
3356 * Calculate values and add to syncache
3359 newtoep = toepcb_alloc();
3360 if (newtoep == NULL)
3363 bzero(&nam, sizeof(struct sockaddr_in));
3365 nam.sin_len = sizeof(struct sockaddr_in);
3366 nam.sin_family = AF_INET;
3367 nam.sin_addr.s_addr =req->peer_ip;
3368 dst = rtalloc2((struct sockaddr *)&nam, 1, 0);
3371 printf("failed to find route\n");
3374 e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev,
3375 (struct sockaddr *)&nam);
3377 DPRINTF("failed to get l2t\n");
3380 * Point to our listen socket until accept
3382 newtoep->tp_tp = tp;
3383 newtoep->tp_flags = TP_SYN_RCVD;
3384 newtoep->tp_tid = tid;
3385 newtoep->tp_toedev = tdev;
3386 tp->rcv_wnd = select_rcv_wnd(tdev, so);
3388 cxgb_insert_tid(cdev, d->client, newtoep, tid);
3390 LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry);
3393 newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
3394 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
3396 if (newtoep->tp_ulp_mode) {
3397 ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
3399 if (ddp_mbuf == NULL)
3400 newtoep->tp_ulp_mode = 0;
3403 CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d",
3404 TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode);
3405 set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
3407 * XXX workaround for lack of syncache drop
3409 toepcb_hold(newtoep);
3410 syncache_add_accept_req(req, so, newtoep);
3412 rpl = cplhdr(reply_mbuf);
3413 reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl);
3414 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3416 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
3417 rpl->opt2 = htonl(calc_opt2(so, tdev));
3418 rpl->rsvd = rpl->opt2; /* workaround for HW bug */
3419 rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten
3421 rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) |
3422 V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx));
3423 rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) |
3424 CPL_PASS_OPEN_ACCEPT);
3426 DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status);
3428 m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep));
3430 l2t_send(cdev, reply_mbuf, e);
3432 if (newtoep->tp_ulp_mode) {
3433 __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS,
3435 TP_DDP_TIMER_WORKAROUND_MASK,
3437 TP_DDP_TIMER_WORKAROUND_VAL, 1);
3439 printf("not offloading\n");
3445 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
3446 mk_pass_accept_rpl(reply_mbuf, m);
3448 mk_tid_release(reply_mbuf, newtoep, tid);
3449 cxgb_ofld_send(cdev, reply_mbuf);
3453 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
3460 * Handle a CPL_PASS_ACCEPT_REQ message.
3463 do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3465 struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
3466 struct socket *lso = listen_ctx->lso; /* XXX need an interlock against the listen socket going away */
3467 struct tom_data *d = listen_ctx->tom_data;
3470 struct cpl_pass_accept_req *req = cplhdr(m);
3471 unsigned int tid = GET_TID(req);
3472 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
3474 if (unlikely(!lsk)) {
3475 printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n",
3477 (unsigned long)((union listen_entry *)ctx -
3479 return CPL_RET_BUF_DONE;
3481 if (unlikely(tid >= t->ntids)) {
3482 printk(KERN_ERR "%s: passive open TID %u too large\n",
3484 return CPL_RET_BUF_DONE;
3487 * For T3A the current user of the TID may have closed but its last
3488 * message(s) may have been backlogged so the TID appears to be still
3489 * in use. Just take the TID away, the connection can close at its
3490 * own leisure. For T3B this situation is a bug.
3492 if (!valid_new_tid(t, tid) &&
3493 cdev->type != T3A) {
3494 printk(KERN_ERR "%s: passive open uses existing TID %u\n",
3496 return CPL_RET_BUF_DONE;
3500 process_pass_accept_req(lso, m, &d->tdev, listen_ctx);
3505 * Called when a connection is established to translate the TCP options
3506 * reported by HW to FreeBSD's native format.
3509 assign_rxopt(struct socket *so, unsigned int opt)
3511 struct tcpcb *tp = so_sototcpcb(so);
3512 struct toepcb *toep = tp->t_toe;
3513 const struct t3c_data *td = T3C_DATA(TOEP_T3C_DEV(toep));
3515 inp_lock_assert(tp->t_inpcb);
3517 toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40;
3518 tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0;
3519 tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0;
3520 tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0;
3521 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
3522 (TF_RCVD_SCALE|TF_REQ_SCALE))
3523 tp->rcv_scale = tp->request_r_scale;
3527 * Completes some final bits of initialization for just established connections
3528 * and changes their state to TCP_ESTABLISHED.
3530 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
3533 make_established(struct socket *so, u32 snd_isn, unsigned int opt)
3535 struct tcpcb *tp = so_sototcpcb(so);
3536 struct toepcb *toep = tp->t_toe;
3538 toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn;
3539 assign_rxopt(so, opt);
3546 so->so_proto->pr_ctloutput = t3_ctloutput;
3550 inet_sk(sk)->id = tp->write_seq ^ jiffies;
3553 * XXX not clear what rcv_wup maps to
3556 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't
3557 * pass through opt0.
3559 if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10))
3560 toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10);
3566 * no clean interface for marking ARP up to date
3568 dst_confirm(sk->sk_dst_cache);
3570 tp->t_starttime = ticks;
3571 tp->t_state = TCPS_ESTABLISHED;
3576 syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep)
3579 struct in_conninfo inc;
3582 int mss, wsf, sack, ts;
3583 struct mbuf *m = NULL;
3584 const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev);
3588 #error "no MAC support"
3591 opt = ntohs(req->tcp_opt);
3593 bzero(&to, sizeof(struct tcpopt));
3596 * Fill out information for entering us into the syncache
3598 inc.inc_fport = th.th_sport = req->peer_port;
3599 inc.inc_lport = th.th_dport = req->local_port;
3600 th.th_seq = req->rcv_isn;
3601 th.th_flags = TH_ACK;
3605 inc.inc_faddr.s_addr = req->peer_ip;
3606 inc.inc_laddr.s_addr = req->local_ip;
3608 mss = td->mtus[G_TCPOPT_MSS(opt)] - 40;
3609 wsf = G_TCPOPT_WSCALE_OK(opt);
3610 ts = G_TCPOPT_TSTAMP(opt);
3611 sack = G_TCPOPT_SACK(opt);
3614 to.to_wscale = G_TCPOPT_SND_WSCALE(opt);
3615 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
3617 DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n",
3618 ntohl(req->local_ip), ntohs(req->local_port),
3619 ntohl(req->peer_ip), ntohs(req->peer_port),
3620 mss, wsf, ts, sack);
3621 return tcp_offload_syncache_expand(&inc, &to, &th, so, m);
3626 * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work
3627 * if we are in TCP_SYN_RECV due to crossed SYNs
3630 do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3632 struct cpl_pass_establish *req = cplhdr(m);
3633 struct toepcb *toep = (struct toepcb *)ctx;
3634 struct tcpcb *tp = toep->tp_tp;
3635 struct socket *so, *lso;
3636 struct t3c_data *td = T3C_DATA(cdev);
3637 struct sockbuf *snd, *rcv;
3639 // Complete socket initialization now that we have the SND_ISN
3641 struct toedev *tdev;
3644 tdev = toep->tp_toedev;
3646 inp_wlock(tp->t_inpcb);
3650 * XXX need to add reference while we're manipulating
3652 so = lso = inp_inpcbtosocket(tp->t_inpcb);
3654 inp_wunlock(tp->t_inpcb);
3657 LIST_REMOVE(toep, synq_entry);
3660 if (!syncache_expand_establish_req(req, &so, toep)) {
3664 CXGB_UNIMPLEMENTED();
3668 * Couldn't create the socket
3670 CXGB_UNIMPLEMENTED();
3673 tp = so_sototcpcb(so);
3674 inp_wlock(tp->t_inpcb);
3676 snd = so_sockbuf_snd(so);
3677 rcv = so_sockbuf_rcv(so);
3679 snd->sb_flags |= SB_NOCOALESCE;
3680 rcv->sb_flags |= SB_NOCOALESCE;
3685 reset_wr_list(toep);
3686 tp->rcv_wnd = select_rcv_wnd(tdev, so);
3687 tp->rcv_nxt = toep->tp_copied_seq;
3688 install_offload_ops(so);
3690 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
3691 toep->tp_wr_unacked = 0;
3692 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
3693 toep->tp_qset_idx = 0;
3694 toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu);
3697 * XXX Cancel any keep alive timer
3700 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
3703 * XXX workaround for lack of syncache drop
3705 toepcb_release(toep);
3706 inp_wunlock(tp->t_inpcb);
3708 CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid);
3709 cxgb_log_tcb(cdev->adapter, toep->tp_tid);
3712 * XXX not sure how these checks map to us
3714 if (unlikely(sk->sk_socket)) { // simultaneous opens only
3715 sk->sk_state_change(sk);
3716 sk_wake_async(so, 0, POLL_OUT);
3719 * The state for the new connection is now up to date.
3720 * Next check if we should add the connection to the parent's
3721 * accept queue. When the parent closes it resets connections
3722 * on its SYN queue, so check if we are being reset. If so we
3723 * don't need to do anything more, the coming ABORT_RPL will
3724 * destroy this socket. Otherwise move the connection to the
3727 * Note that we reset the synq before closing the server so if
3728 * we are not being reset the stid is still open.
3730 if (unlikely(!tp->forward_skb_hint)) { // removed from synq
3741 * Fill in the right TID for CPL messages waiting in the out-of-order queue
3742 * and send them to the TOE.
3745 fixup_and_send_ofo(struct toepcb *toep)
3748 struct toedev *tdev = toep->tp_toedev;
3749 struct tcpcb *tp = toep->tp_tp;
3750 unsigned int tid = toep->tp_tid;
3752 log(LOG_NOTICE, "fixup_and_send_ofo\n");
3754 inp_lock_assert(tp->t_inpcb);
3755 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) {
3757 * A variety of messages can be waiting but the fields we'll
3758 * be touching are common to all so any message type will do.
3760 struct cpl_close_con_req *p = cplhdr(m);
3762 p->wr.wr_lo = htonl(V_WR_TID(tid));
3763 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid));
3764 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
3769 * Updates socket state from an active establish CPL message. Runs with the
3773 socket_act_establish(struct socket *so, struct mbuf *m)
3775 struct cpl_act_establish *req = cplhdr(m);
3776 u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */
3777 struct tcpcb *tp = so_sototcpcb(so);
3778 struct toepcb *toep = tp->t_toe;
3780 if (__predict_false(tp->t_state != TCPS_SYN_SENT))
3781 log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n",
3782 toep->tp_tid, tp->t_state);
3784 tp->ts_recent_age = ticks;
3785 tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn;
3786 toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs;
3788 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
3791 * Now that we finally have a TID send any CPL messages that we had to
3792 * defer for lack of a TID.
3794 if (mbufq_len(&toep->out_of_order_queue))
3795 fixup_and_send_ofo(toep);
3797 if (__predict_false(so_state_get(so) & SS_NOFDREF)) {
3799 * XXX does this even make sense?
3806 * XXX assume no write requests permitted while socket connection is
3810 * Currently the send queue must be empty at this point because the
3811 * socket layer does not send anything before a connection is
3812 * established. To be future proof though we handle the possibility
3813 * that there are pending buffers to send (either TX_DATA or
3814 * CLOSE_CON_REQ). First we need to adjust the sequence number of the
3815 * buffers according to the just learned write_seq, and then we send
3816 * them on their way.
3818 fixup_pending_writeq_buffers(sk);
3819 if (t3_push_frames(so, 1))
3820 sk->sk_write_space(sk);
3823 toep->tp_state = tp->t_state;
3824 V_tcpstat.tcps_connects++;
3829 * Process a CPL_ACT_ESTABLISH message.
3832 do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3834 struct cpl_act_establish *req = cplhdr(m);
3835 unsigned int tid = GET_TID(req);
3836 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
3837 struct toepcb *toep = (struct toepcb *)ctx;
3838 struct tcpcb *tp = toep->tp_tp;
3840 struct toedev *tdev;
3844 free_atid(cdev, atid);
3847 inp_wlock(tp->t_inpcb);
3852 so = inp_inpcbtosocket(tp->t_inpcb);
3853 tdev = toep->tp_toedev; /* blow up here if link was down */
3857 * It's OK if the TID is currently in use, the owning socket may have
3858 * backlogged its last CPL message(s). Just take it away.
3862 so_insert_tid(d, toep, tid);
3863 free_atid(cdev, atid);
3864 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
3866 socket_act_establish(so, m);
3867 inp_wunlock(tp->t_inpcb);
3868 CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid);
3869 cxgb_log_tcb(cdev->adapter, toep->tp_tid);
3875 * Process an acknowledgment of WR completion. Advance snd_una and send the
3876 * next batch of work requests from the write queue.
3879 wr_ack(struct toepcb *toep, struct mbuf *m)
3881 struct tcpcb *tp = toep->tp_tp;
3882 struct cpl_wr_ack *hdr = cplhdr(m);
3884 unsigned int credits = ntohs(hdr->credits);
3885 u32 snd_una = ntohl(hdr->snd_una);
3887 struct sockbuf *snd;
3889 CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits);
3891 inp_wlock(tp->t_inpcb);
3892 so = inp_inpcbtosocket(tp->t_inpcb);
3893 toep->tp_wr_avail += credits;
3894 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail)
3895 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail;
3898 struct mbuf *p = peek_wr(toep);
3900 if (__predict_false(!p)) {
3901 log(LOG_ERR, "%u WR_ACK credits for TID %u with "
3902 "nothing pending, state %u wr_avail=%u\n",
3903 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail);
3907 "wr_ack: p->credits=%d p->bytes=%d",
3908 p->m_pkthdr.csum_data, p->m_pkthdr.len);
3909 KASSERT(p->m_pkthdr.csum_data != 0,
3910 ("empty request still on list"));
3912 if (__predict_false(credits < p->m_pkthdr.csum_data)) {
3915 struct tx_data_wr *w = cplhdr(p);
3917 "TID %u got %u WR credits, need %u, len %u, "
3918 "main body %u, frags %u, seq # %u, ACK una %u,"
3919 " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n",
3920 toep->tp_tid, credits, p->csum, p->len,
3921 p->len - p->data_len, skb_shinfo(p)->nr_frags,
3922 ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt),
3923 toep->tp_wr_avail, count_pending_wrs(tp) - credits);
3925 p->m_pkthdr.csum_data -= credits;
3929 credits -= p->m_pkthdr.csum_data;
3930 bytes += p->m_pkthdr.len;
3932 "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d",
3933 p->m_pkthdr.len, credits, p->m_pkthdr.csum_data);
3940 check_wr_invariants(tp);
3943 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
3945 struct tom_data *d = TOM_DATA(TOE_DEV(so));
3947 log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK "
3948 "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una,
3949 toep->tp_tid, tp->snd_una);
3954 if (tp->snd_una != snd_una) {
3955 tp->snd_una = snd_una;
3956 tp->ts_recent_age = ticks;
3959 * Keep ARP entry "minty fresh"
3961 dst_confirm(sk->sk_dst_cache);
3963 if (tp->snd_una == tp->snd_nxt)
3964 toep->tp_flags &= ~TP_TX_WAIT_IDLE;
3967 snd = so_sockbuf_snd(so);
3969 CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes);
3970 snd = so_sockbuf_snd(so);
3972 sbdrop_locked(snd, bytes);
3973 so_sowwakeup_locked(so);
3976 if (snd->sb_sndptroff < snd->sb_cc)
3977 t3_push_frames(so, 0);
3980 inp_wunlock(tp->t_inpcb);
3985 * Handler for TX_DATA_ACK CPL messages.
3988 do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx)
3990 struct toepcb *toep = (struct toepcb *)ctx;
3999 * Handler for TRACE_PKT CPL messages. Just sink these packets.
4002 do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx)
4009 * Reset a connection that is on a listener's SYN queue or accept queue,
4010 * i.e., one that has not had a struct socket associated with it.
4011 * Must be called from process context.
4013 * Modeled after code in inet_csk_listen_stop().
4016 t3_reset_listen_child(struct socket *child)
4018 struct tcpcb *tp = so_sototcpcb(child);
4020 t3_send_reset(tp->t_toe);
4025 t3_child_disconnect(struct socket *so, void *arg)
4027 struct tcpcb *tp = so_sototcpcb(so);
4029 if (tp->t_flags & TF_TOE) {
4030 inp_wlock(tp->t_inpcb);
4031 t3_reset_listen_child(so);
4032 inp_wunlock(tp->t_inpcb);
4037 * Disconnect offloaded established but not yet accepted connections sitting
4038 * on a server's accept_queue. We just send an ABORT_REQ at this point and
4039 * finish off the disconnect later as we may need to wait for the ABORT_RPL.
4042 t3_disconnect_acceptq(struct socket *listen_so)
4046 so_listeners_apply_all(listen_so, t3_child_disconnect, NULL);
4047 so_unlock(listen_so);
4051 * Reset offloaded connections sitting on a server's syn queue. As above
4052 * we send ABORT_REQ and finish off when we get ABORT_RPL.
4056 t3_reset_synq(struct listen_ctx *lctx)
4058 struct toepcb *toep;
4061 while (!LIST_EMPTY(&lctx->synq_head)) {
4062 toep = LIST_FIRST(&lctx->synq_head);
4063 LIST_REMOVE(toep, synq_entry);
4065 t3_send_reset(toep);
4066 cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid);
4067 toepcb_release(toep);
4069 so_unlock(lctx->lso);
4074 t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl,
4075 unsigned int nppods, unsigned int tag, unsigned int maxoff,
4076 unsigned int pg_off, unsigned int color)
4078 unsigned int i, j, pidx;
4081 struct ulp_mem_io *req;
4082 unsigned int tid = toep->tp_tid;
4083 const struct tom_data *td = TOM_DATA(toep->tp_toedev);
4084 unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit;
4086 CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)",
4087 gl, nppods, tag, maxoff, pg_off, color);
4089 for (i = 0; i < nppods; ++i) {
4090 m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE);
4091 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4092 req = mtod(m, struct ulp_mem_io *);
4093 m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE;
4094 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
4096 req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) |
4097 V_ULPTX_CMD(ULP_MEM_WRITE));
4098 req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) |
4099 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1));
4101 p = (struct pagepod *)(req + 1);
4102 if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) {
4103 p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid));
4104 p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) |
4105 V_PPOD_COLOR(color));
4106 p->pp_max_offset = htonl(maxoff);
4107 p->pp_page_offset = htonl(pg_off);
4109 for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx)
4110 p->pp_addr[j] = pidx < gl->dgl_nelem ?
4111 htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0;
4113 p->pp_vld_tid = 0; /* mark sentinel page pods invalid */
4114 send_or_defer(toep, m, 0);
4115 ppod_addr += PPOD_SIZE;
4121 * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command.
4124 mk_cpl_barrier_ulp(struct cpl_barrier *b)
4126 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b;
4128 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4129 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8));
4130 b->opcode = CPL_BARRIER;
4134 * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command.
4137 mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno)
4139 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
4141 txpkt = (struct ulp_txpkt *)req;
4142 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4143 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
4144 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid));
4145 req->cpuno = htons(cpuno);
4149 * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command.
4152 mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
4153 unsigned int word, uint64_t mask, uint64_t val)
4155 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
4157 CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
4158 tid, word, mask, val);
4160 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4161 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
4162 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
4163 req->reply = V_NO_REPLY(1);
4165 req->word = htons(word);
4166 req->mask = htobe64(mask);
4167 req->val = htobe64(val);
4171 * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command.
4174 mk_rx_data_ack_ulp(struct toepcb *toep, struct cpl_rx_data_ack *ack,
4175 unsigned int tid, unsigned int credits)
4177 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack;
4179 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4180 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8));
4181 OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid));
4182 ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
4183 V_RX_DACK_MODE(TOM_TUNABLE(toep->tp_toedev, delack)) |
4184 V_RX_CREDITS(credits));
4188 t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx)
4192 struct work_request_hdr *wr;
4193 struct cpl_barrier *lock;
4194 struct cpl_set_tcb_field *req;
4195 struct cpl_get_tcb *getreq;
4196 struct ddp_state *p = &toep->tp_ddp_state;
4199 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
4201 wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) +
4203 m = m_gethdr_nofail(wrlen);
4204 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4205 wr = mtod(m, struct work_request_hdr *);
4208 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
4209 m->m_pkthdr.len = m->m_len = wrlen;
4211 lock = (struct cpl_barrier *)(wr + 1);
4212 mk_cpl_barrier_ulp(lock);
4214 req = (struct cpl_set_tcb_field *)(lock + 1);
4216 CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx);
4218 /* Hmmm, not sure if this actually a good thing: reactivating
4219 * the other buffer might be an issue if it has been completed
4220 * already. However, that is unlikely, since the fact that the UBUF
4221 * is not completed indicates that there is no oustanding data.
4224 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4225 V_TF_DDP_ACTIVE_BUF(1) |
4226 V_TF_DDP_BUF0_VALID(1),
4227 V_TF_DDP_ACTIVE_BUF(1));
4229 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4230 V_TF_DDP_ACTIVE_BUF(1) |
4231 V_TF_DDP_BUF1_VALID(1), 0);
4233 getreq = (struct cpl_get_tcb *)(req + 1);
4234 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
4236 mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1));
4238 /* Keep track of the number of oustanding CPL_GET_TCB requests
4243 T3_TRACE1(TIDTB(so),
4244 "t3_cancel_ddpbuf: bufidx %u", bufidx);
4246 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
4250 * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one
4251 * @sk: the socket associated with the buffers
4252 * @bufidx: index of HW DDP buffer (0 or 1)
4253 * @tag0: new tag for HW buffer 0
4254 * @tag1: new tag for HW buffer 1
4255 * @len: new length for HW buf @bufidx
4257 * Sends a compound WR to overlay a new DDP buffer on top of an existing
4258 * buffer by changing the buffer tag and length and setting the valid and
4259 * active flag accordingly. The caller must ensure the new buffer is at
4260 * least as big as the existing one. Since we typically reprogram both HW
4261 * buffers this function sets both tags for convenience. Read the TCB to
4262 * determine how made data was written into the buffer before the overlay
4266 t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0,
4267 unsigned int tag1, unsigned int len)
4271 struct work_request_hdr *wr;
4272 struct cpl_get_tcb *getreq;
4273 struct cpl_set_tcb_field *req;
4274 struct ddp_state *p = &toep->tp_ddp_state;
4276 CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)",
4277 bufidx, tag0, tag1, len);
4279 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
4281 wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq);
4282 m = m_gethdr_nofail(wrlen);
4283 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4284 wr = mtod(m, struct work_request_hdr *);
4285 m->m_pkthdr.len = m->m_len = wrlen;
4289 /* Set the ATOMIC flag to make sure that TP processes the following
4290 * CPLs in an atomic manner and no wire segments can be interleaved.
4292 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
4293 req = (struct cpl_set_tcb_field *)(wr + 1);
4294 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG,
4295 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) |
4296 V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32,
4297 V_TCB_RX_DDP_BUF0_TAG(tag0) |
4298 V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32);
4301 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN,
4302 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
4303 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
4305 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4306 V_TF_DDP_PUSH_DISABLE_0(1) |
4307 V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
4308 V_TF_DDP_PUSH_DISABLE_0(0) |
4309 V_TF_DDP_BUF0_VALID(1));
4311 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN,
4312 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN),
4313 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len));
4315 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4316 V_TF_DDP_PUSH_DISABLE_1(1) |
4317 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
4318 V_TF_DDP_PUSH_DISABLE_1(0) |
4319 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1));
4322 getreq = (struct cpl_get_tcb *)(req + 1);
4323 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
4325 /* Keep track of the number of oustanding CPL_GET_TCB requests
4330 T3_TRACE4(TIDTB(sk),
4331 "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u "
4333 bufidx, tag0, tag1, len);
4335 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
4339 * Sends a compound WR containing all the CPL messages needed to program the
4340 * two HW DDP buffers, namely optionally setting up the length and offset of
4341 * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK.
4344 t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0,
4345 unsigned int len1, unsigned int offset1,
4346 uint64_t ddp_flags, uint64_t flag_mask, int modulate)
4350 struct work_request_hdr *wr;
4351 struct cpl_set_tcb_field *req;
4353 CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ",
4354 len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff);
4357 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
4359 wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) +
4360 (len1 ? sizeof(*req) : 0) +
4361 (modulate ? sizeof(struct cpl_rx_data_ack) : 0);
4362 m = m_gethdr_nofail(wrlen);
4363 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4364 wr = mtod(m, struct work_request_hdr *);
4367 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
4368 m->m_pkthdr.len = m->m_len = wrlen;
4370 req = (struct cpl_set_tcb_field *)(wr + 1);
4371 if (len0) { /* program buffer 0 offset and length */
4372 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET,
4373 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
4374 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
4375 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) |
4376 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0));
4379 if (len1) { /* program buffer 1 offset and length */
4380 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET,
4381 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
4382 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32,
4383 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) |
4384 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32);
4388 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask,
4392 mk_rx_data_ack_ulp(toep,
4393 (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid,
4394 toep->tp_copied_seq - toep->tp_rcv_wup);
4395 toep->tp_rcv_wup = toep->tp_copied_seq;
4399 T3_TRACE5(TIDTB(sk),
4400 "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x "
4402 len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff,
4406 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
4410 t3_init_wr_tab(unsigned int wr_len)
4414 if (mbuf_wrs[1]) /* already initialized */
4417 for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) {
4418 int sgl_len = (3 * i) / 2 + (i & 1);
4421 mbuf_wrs[i] = sgl_len <= wr_len ?
4422 1 : 1 + (sgl_len - 2) / (wr_len - 1);
4429 t3_init_cpl_io(void)
4432 tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL);
4435 "Chelsio TCP offload: can't allocate sk_buff\n");
4438 skb_put(tcphdr_skb, sizeof(struct tcphdr));
4439 tcphdr_skb->h.raw = tcphdr_skb->data;
4440 memset(tcphdr_skb->data, 0, tcphdr_skb->len);
4443 t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
4444 t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
4445 t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack);
4446 t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data);
4447 t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl);
4448 t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close);
4449 t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
4450 t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
4451 t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req);
4452 t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl);
4453 t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp);
4454 t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete);
4455 t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify);
4456 t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt);
4457 t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl);