1 /**************************************************************************
3 Copyright (c) 2007-2008, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/kernel.h>
37 #include <sys/limits.h>
41 #include <sys/mutex.h>
42 #include <sys/socket.h>
43 #include <sys/sysctl.h>
44 #include <sys/syslog.h>
45 #include <sys/protosw.h>
49 #include <net/route.h>
51 #include <netinet/in.h>
52 #include <netinet/in_pcb.h>
53 #include <netinet/in_systm.h>
54 #include <netinet/in_var.h>
57 #include <dev/cxgb/cxgb_osdep.h>
58 #include <dev/cxgb/sys/mbufq.h>
60 #include <netinet/ip.h>
61 #include <netinet/tcp_var.h>
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/tcp_offload.h>
64 #include <netinet/tcp_seq.h>
65 #include <netinet/tcp_syncache.h>
66 #include <netinet/tcp_timer.h>
67 #include <net/route.h>
69 #include <dev/cxgb/t3cdev.h>
70 #include <dev/cxgb/common/cxgb_firmware_exports.h>
71 #include <dev/cxgb/common/cxgb_t3_cpl.h>
72 #include <dev/cxgb/common/cxgb_tcb.h>
73 #include <dev/cxgb/common/cxgb_ctl_defs.h>
74 #include <dev/cxgb/cxgb_offload.h>
77 #include <machine/bus.h>
78 #include <dev/cxgb/sys/mvec.h>
79 #include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
80 #include <dev/cxgb/ulp/tom/cxgb_defs.h>
81 #include <dev/cxgb/ulp/tom/cxgb_tom.h>
82 #include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
83 #include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
84 #include <dev/cxgb/ulp/tom/cxgb_tcp.h>
86 #include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h>
89 * For ULP connections HW may add headers, e.g., for digests, that aren't part
90 * of the messages sent by the host but that are part of the TCP payload and
91 * therefore consume TCP sequence space. Tx connection parameters that
92 * operate in TCP sequence space are affected by the HW additions and need to
93 * compensate for them to accurately track TCP sequence numbers. This array
94 * contains the compensating extra lengths for ULP packets. It is indexed by
95 * a packet's ULP submode.
97 const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8};
101 * This sk_buff holds a fake header-only TCP segment that we use whenever we
102 * need to exploit SW TCP functionality that expects TCP headers, such as
103 * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple
104 * CPUs without locking.
106 static struct mbuf *tcphdr_mbuf __read_mostly;
110 * Size of WRs in bytes. Note that we assume all devices we are handling have
113 static unsigned int wrlen __read_mostly;
116 * The number of WRs needed for an skb depends on the number of page fragments
117 * in the skb and whether it has any payload in its main body. This maps the
118 * length of the gather list represented by an skb into the # of necessary WRs.
120 static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly;
123 * Max receive window supported by HW in bytes. Only a small part of it can
124 * be set through option0, the rest needs to be set through RX_DATA_ACK.
126 #define MAX_RCV_WND ((1U << 27) - 1)
129 * Min receive window. We want it to be large enough to accommodate receive
130 * coalescing, handle jumbo frames, and not trigger sender SWS avoidance.
132 #define MIN_RCV_WND (24 * 1024U)
133 #define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS)
135 #define VALIDATE_SEQ 0
136 #define VALIDATE_SOCK(so)
139 #define TCP_TIMEWAIT 1
143 extern int tcp_do_autorcvbuf;
144 extern int tcp_do_autosndbuf;
145 extern int tcp_autorcvbuf_max;
146 extern int tcp_autosndbuf_max;
148 static void t3_send_reset(struct toepcb *toep);
149 static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status);
150 static inline void free_atid(struct t3cdev *cdev, unsigned int tid);
151 static void handle_syncache_event(int event, void *arg);
154 SBAPPEND(struct sockbuf *sb, struct mbuf *n)
160 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
161 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
162 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
163 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
164 m->m_next, m->m_nextpkt, m->m_flags));
169 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
170 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
171 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
172 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
173 m->m_next, m->m_nextpkt, m->m_flags));
176 KASSERT(sb->sb_flags & SB_NOCOALESCE, ("NOCOALESCE not set"));
177 sbappendstream_locked(sb, n);
181 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
182 m->m_next, m->m_nextpkt, m->m_flags));
188 is_t3a(const struct toedev *dev)
190 return (dev->tod_ttid == TOE_ID_CHELSIO_T3);
194 dump_toepcb(struct toepcb *toep)
196 DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n",
197 toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode,
198 toep->tp_mtu_idx, toep->tp_tid);
200 DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n",
201 toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked,
202 toep->tp_mss_clamp, toep->tp_flags);
205 #ifndef RTALLOC2_DEFINED
206 static struct rtentry *
207 rtalloc2(struct sockaddr *dst, int report, u_long ignflags)
209 struct rtentry *rt = NULL;
211 if ((rt = rtalloc1(dst, report, ignflags)) != NULL)
219 * Determine whether to send a CPL message now or defer it. A message is
220 * deferred if the connection is in SYN_SENT since we don't know the TID yet.
221 * For connections in other states the message is sent immediately.
222 * If through_l2t is set the message is subject to ARP processing, otherwise
223 * it is sent directly.
226 send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t)
228 struct tcpcb *tp = toep->tp_tp;
230 if (__predict_false(tp->t_state == TCPS_SYN_SENT)) {
231 inp_wlock(tp->t_inpcb);
232 mbufq_tail(&toep->out_of_order_queue, m); // defer
233 inp_wunlock(tp->t_inpcb);
234 } else if (through_l2t)
235 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T
237 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly
240 static inline unsigned int
241 mkprio(unsigned int cntrl, const struct toepcb *toep)
247 * Populate a TID_RELEASE WR. The skb must be already propely sized.
250 mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid)
252 struct cpl_tid_release *req;
254 m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep));
255 m->m_pkthdr.len = m->m_len = sizeof(*req);
256 req = mtod(m, struct cpl_tid_release *);
257 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
259 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
263 make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail)
265 struct tcpcb *tp = so_sototcpcb(so);
266 struct toepcb *toep = tp->t_toe;
267 struct tx_data_wr *req;
270 inp_lock_assert(tp->t_inpcb);
271 snd = so_sockbuf_snd(so);
273 req = mtod(m, struct tx_data_wr *);
274 m->m_len = sizeof(*req);
275 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
276 req->wr_lo = htonl(V_WR_TID(toep->tp_tid));
277 /* len includes the length of any HW ULP additions */
278 req->len = htonl(len);
279 req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx));
280 /* V_TX_ULP_SUBMODE sets both the mode and submode */
281 req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) |
282 V_TX_URG(/* skb_urgent(skb) */ 0 ) |
283 V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) &&
285 req->sndseq = htonl(tp->snd_nxt);
286 if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) {
287 req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
288 V_TX_CPU_IDX(toep->tp_qset));
290 /* Sendbuffer is in units of 32KB.
292 if (tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE)
293 req->param |= htonl(V_TX_SNDBUF(tcp_autosndbuf_max >> 15));
295 req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15));
298 toep->tp_flags |= TP_DATASENT;
302 #define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */
305 t3_push_frames(struct socket *so, int req_completion)
307 struct tcpcb *tp = so_sototcpcb(so);
308 struct toepcb *toep = tp->t_toe;
310 struct mbuf *tail, *m0, *last;
313 int state, bytes, count, total_bytes;
314 bus_dma_segment_t segs[TX_MAX_SEGS], *segp;
317 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) {
318 DPRINTF("tcp state=%d\n", tp->t_state);
322 state = so_state_get(so);
324 if (state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) {
325 DPRINTF("disconnecting\n");
330 inp_lock_assert(tp->t_inpcb);
332 snd = so_sockbuf_snd(so);
335 d = TOM_DATA(toep->tp_toedev);
338 last = tail = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb;
341 DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n",
342 toep->tp_wr_avail, tail, snd->sb_cc, toep->tp_m_last);
344 if (last && toep->tp_m_last == last && snd->sb_sndptroff != 0) {
345 KASSERT(tail, ("sbdrop error"));
346 last = tail = tail->m_next;
349 if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) {
350 DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail);
356 toep->tp_m_last = NULL;
357 while (toep->tp_wr_avail && (tail != NULL)) {
360 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) {
365 * If the data in tail fits as in-line, then
366 * make an immediate data wr.
368 if (tail->m_len <= IMM_LEN) {
375 make_tx_data_wr(so, m0, bytes, tail);
376 m_append(m0, bytes, mtod(last, caddr_t));
377 KASSERT(!m0->m_next, ("bad append"));
379 while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail)
380 && (tail != NULL) && (count < TX_MAX_SEGS-1)) {
381 bytes += tail->m_len;
385 * technically an abuse to be using this for a VA
386 * but less gross than defining my own structure
387 * or calling pmap_kextract from here :-|
389 segp->ds_addr = (bus_addr_t)tail->m_data;
390 segp->ds_len = tail->m_len;
391 DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n",
392 count, mbuf_wrs[count], tail->m_data, tail->m_len);
396 DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n",
397 toep->tp_wr_avail, count, mbuf_wrs[count], tail);
400 m_set_sgllen(m0, count);
401 make_tx_data_wr(so, m0, bytes, tail);
403 m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep));
406 snd->sb_sndptr = tail;
407 toep->tp_m_last = NULL;
409 toep->tp_m_last = snd->sb_sndptr = last;
412 DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last);
414 snd->sb_sndptroff += bytes;
415 total_bytes += bytes;
416 toep->tp_write_seq += bytes;
417 CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d tail=%p sndptr=%p sndptroff=%d",
418 toep->tp_wr_avail, count, mbuf_wrs[count], tail, snd->sb_sndptr, snd->sb_sndptroff);
420 CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p tailbuf=%p snd_una=0x%08x",
421 total_bytes, toep->tp_m_last, tail->m_data, tp->snd_una);
423 CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p snd_una=0x%08x",
424 total_bytes, toep->tp_m_last, tp->snd_una);
432 while (i < count && m_get_sgllen(m0)) {
433 if ((count - i) >= 3) {
435 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d pa=0x%zx len=%d",
436 segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len,
437 segs[i + 2].ds_addr, segs[i + 2].ds_len);
439 } else if ((count - i) == 2) {
441 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d",
442 segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len);
445 CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d",
446 segs[i].ds_addr, segs[i].ds_len);
454 * remember credits used
456 m0->m_pkthdr.csum_data = mbuf_wrs[count];
457 m0->m_pkthdr.len = bytes;
458 toep->tp_wr_avail -= mbuf_wrs[count];
459 toep->tp_wr_unacked += mbuf_wrs[count];
461 if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) ||
462 toep->tp_wr_unacked >= toep->tp_wr_max / 2) {
463 struct work_request_hdr *wr = cplhdr(m0);
465 wr->wr_hi |= htonl(F_WR_COMPL);
466 toep->tp_wr_unacked = 0;
468 KASSERT((m0->m_pkthdr.csum_data > 0) &&
469 (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d",
470 m0->m_pkthdr.csum_data));
471 m0->m_type = MT_DONTFREE;
472 enqueue_wr(toep, m0);
473 DPRINTF("sending offload tx with %d bytes in %d segments\n",
475 l2t_send(cdev, m0, toep->tp_l2t);
478 return (total_bytes);
482 * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail
483 * under any circumstances. We take the easy way out and always queue the
484 * message to the write_queue. We can optimize the case where the queue is
485 * already empty though the optimization is probably not worth it.
488 close_conn(struct socket *so)
491 struct cpl_close_con_req *req;
493 struct inpcb *inp = so_sotoinpcb(so);
500 tp = so_sototcpcb(so);
503 if (tp->t_state != TCPS_SYN_SENT)
504 t3_push_frames(so, 1);
506 if (toep->tp_flags & TP_FIN_SENT) {
513 d = TOM_DATA(toep->tp_toedev);
515 m = m_gethdr_nofail(sizeof(*req));
516 m_set_priority(m, CPL_PRIORITY_DATA);
520 toep->tp_flags |= TP_FIN_SENT;
521 req = mtod(m, struct cpl_close_con_req *);
523 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
524 req->wr.wr_lo = htonl(V_WR_TID(tid));
525 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
529 * XXX - need to defer shutdown while there is still data in the queue
532 CTR4(KTR_TOM, "%s CLOSE_CON_REQ so %p tp %p tid=%u", __FUNCTION__, so, tp, tid);
533 cxgb_ofld_send(d->cdev, m);
538 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
542 abort_arp_failure(struct t3cdev *cdev, struct mbuf *m)
544 struct cpl_abort_req *req = cplhdr(m);
546 req->cmd = CPL_ABORT_NO_RST;
547 cxgb_ofld_send(cdev, m);
551 * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are
552 * permitted to return without sending the message in case we cannot allocate
553 * an sk_buff. Returns the number of credits sent.
556 t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail)
559 struct cpl_rx_data_ack *req;
560 struct toepcb *toep = tp->t_toe;
561 struct toedev *tdev = toep->tp_toedev;
563 m = m_gethdr_nofail(sizeof(*req));
565 DPRINTF("returning %u credits to HW\n", credits);
567 req = mtod(m, struct cpl_rx_data_ack *);
568 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
570 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
571 req->credit_dack = htonl(dack | V_RX_CREDITS(credits));
572 m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep));
573 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
578 * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled.
579 * This is only used in DDP mode, so we take the opportunity to also set the
580 * DACK mode and flush any Rx credits.
583 t3_send_rx_modulate(struct toepcb *toep)
586 struct cpl_rx_data_ack *req;
588 m = m_gethdr_nofail(sizeof(*req));
590 req = mtod(m, struct cpl_rx_data_ack *);
591 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
593 m->m_pkthdr.len = m->m_len = sizeof(*req);
595 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
596 req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
598 V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup));
599 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
600 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
601 toep->tp_rcv_wup = toep->tp_copied_seq;
605 * Handle receipt of an urgent pointer.
608 handle_urg_ptr(struct socket *so, uint32_t urg_seq)
610 #ifdef URGENT_DATA_SUPPORTED
611 struct tcpcb *tp = so_sototcpcb(so);
613 urg_seq--; /* initially points past the urgent data, per BSD */
615 if (tp->urg_data && !after(urg_seq, tp->urg_seq))
616 return; /* duplicate pointer */
618 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
619 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
620 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
623 if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len)
624 tom_eat_skb(sk, skb, 0);
626 tp->urg_data = TCP_URG_NOTYET;
627 tp->urg_seq = urg_seq;
632 * Returns true if a socket cannot accept new Rx data.
635 so_no_receive(const struct socket *so)
637 return (so_state_get(so) & (SS_ISDISCONNECTED|SS_ISDISCONNECTING));
641 * Process an urgent data notification.
644 rx_urg_notify(struct toepcb *toep, struct mbuf *m)
646 struct cpl_rx_urg_notify *hdr = cplhdr(m);
647 struct socket *so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
651 if (!so_no_receive(so))
652 handle_urg_ptr(so, ntohl(hdr->seq));
658 * Handler for RX_URG_NOTIFY CPL messages.
661 do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx)
663 struct toepcb *toep = (struct toepcb *)ctx;
665 rx_urg_notify(toep, m);
670 is_delack_mode_valid(struct toedev *dev, struct toepcb *toep)
672 return (toep->tp_ulp_mode ||
673 (toep->tp_ulp_mode == ULP_MODE_TCPDDP &&
674 dev->tod_ttid >= TOE_ID_CHELSIO_T3));
678 * Set of states for which we should return RX credits.
680 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)
683 * Called after some received data has been read. It returns RX credits
684 * to the HW for the amount of data processed.
687 t3_cleanup_rbuf(struct tcpcb *tp, int copied)
689 struct toepcb *toep = tp->t_toe;
692 int dack_mode, must_send, read;
693 u32 thres, credits, dack = 0;
696 so = inp_inpcbtosocket(tp->t_inpcb);
697 rcv = so_sockbuf_rcv(so);
699 if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) ||
700 (tp->t_state == TCPS_FIN_WAIT_2))) {
703 toep->tp_copied_seq += copied;
710 inp_lock_assert(tp->t_inpcb);
714 toep->tp_copied_seq += copied;
716 read = toep->tp_enqueued_bytes - rcv->sb_cc;
717 toep->tp_copied_seq += read;
719 credits = toep->tp_copied_seq - toep->tp_rcv_wup;
720 toep->tp_enqueued_bytes = rcv->sb_cc;
723 if (credits > rcv->sb_mbmax) {
724 log(LOG_ERR, "copied_seq=%u rcv_wup=%u credits=%u\n",
725 toep->tp_copied_seq, toep->tp_rcv_wup, credits);
726 credits = rcv->sb_mbmax;
731 * XXX this won't accurately reflect credit return - we need
732 * to look at the difference between the amount that has been
733 * put in the recv sockbuf and what is there now
736 if (__predict_false(!credits))
739 dev = toep->tp_toedev;
740 thres = TOM_TUNABLE(dev, rx_credit_thres);
742 if (__predict_false(thres == 0))
745 if (is_delack_mode_valid(dev, toep)) {
746 dack_mode = TOM_TUNABLE(dev, delack);
747 if (__predict_false(dack_mode != toep->tp_delack_mode)) {
748 u32 r = tp->rcv_nxt - toep->tp_delack_seq;
750 if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp)
751 dack = F_RX_DACK_CHANGE |
752 V_RX_DACK_MODE(dack_mode);
755 dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
758 * For coalescing to work effectively ensure the receive window has
759 * at least 16KB left.
761 must_send = credits + 16384 >= tp->rcv_wnd;
763 if (must_send || credits >= thres)
764 toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send);
768 cxgb_toe_disconnect(struct tcpcb *tp)
772 DPRINTF("cxgb_toe_disconnect\n");
774 so = inp_inpcbtosocket(tp->t_inpcb);
780 cxgb_toe_reset(struct tcpcb *tp)
782 struct toepcb *toep = tp->t_toe;
789 tp->t_flags &= ~TF_TOE;
796 cxgb_toe_send(struct tcpcb *tp)
800 DPRINTF("cxgb_toe_send\n");
801 dump_toepcb(tp->t_toe);
803 so = inp_inpcbtosocket(tp->t_inpcb);
804 t3_push_frames(so, 1);
809 cxgb_toe_rcvd(struct tcpcb *tp)
812 inp_lock_assert(tp->t_inpcb);
814 t3_cleanup_rbuf(tp, 0);
820 cxgb_toe_detach(struct tcpcb *tp)
825 * XXX how do we handle teardown in the SYN_SENT state?
828 inp_lock_assert(tp->t_inpcb);
835 tp->t_flags &= ~TF_TOE;
840 static struct toe_usrreqs cxgb_toe_usrreqs = {
841 .tu_disconnect = cxgb_toe_disconnect,
842 .tu_reset = cxgb_toe_reset,
843 .tu_send = cxgb_toe_send,
844 .tu_rcvd = cxgb_toe_rcvd,
845 .tu_detach = cxgb_toe_detach,
846 .tu_detach = cxgb_toe_detach,
847 .tu_syncache_event = handle_syncache_event,
852 __set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word,
853 uint64_t mask, uint64_t val, int no_reply)
855 struct cpl_set_tcb_field *req;
857 CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
858 toep->tp_tid, word, mask, val);
860 req = mtod(m, struct cpl_set_tcb_field *);
861 m->m_pkthdr.len = m->m_len = sizeof(*req);
862 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
864 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid));
865 req->reply = V_NO_REPLY(no_reply);
867 req->word = htons(word);
868 req->mask = htobe64(mask);
869 req->val = htobe64(val);
871 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
872 send_or_defer(toep, m, 0);
876 t3_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val)
879 struct tcpcb *tp = toep->tp_tp;
884 if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) {
885 printf("not seting field\n");
889 m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field));
891 __set_tcb_field(toep, m, word, mask, val, 1);
895 * Set one of the t_flags bits in the TCB.
898 set_tcb_tflag(struct toepcb *toep, unsigned int bit_pos, int val)
901 t3_set_tcb_field(toep, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos);
905 * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting.
908 t3_set_nagle(struct toepcb *toep)
910 struct tcpcb *tp = toep->tp_tp;
912 set_tcb_tflag(toep, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY));
916 * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting.
919 t3_set_keepalive(struct toepcb *toep, int on_off)
922 set_tcb_tflag(toep, S_TF_KEEPALIVE, on_off);
926 t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off)
928 set_tcb_tflag(toep, S_TF_RCV_COALESCE_ENABLE, on_off);
932 t3_set_dack_mss(struct toepcb *toep, int on_off)
935 set_tcb_tflag(toep, S_TF_DACK_MSS, on_off);
939 * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting.
942 t3_set_tos(struct toepcb *toep)
944 int tos = inp_ip_tos_get(toep->tp_tp->t_inpcb);
946 t3_set_tcb_field(toep, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS),
952 * In DDP mode, TP fails to schedule a timer to push RX data to the host when
953 * DDP is disabled (data is delivered to freelist). [Note that, the peer should
954 * set the PSH bit in the last segment, which would trigger delivery.]
955 * We work around the issue by setting a DDP buffer in a partial placed state,
956 * which guarantees that TP will schedule a timer.
958 #define TP_DDP_TIMER_WORKAROUND_MASK\
959 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\
960 ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\
961 V_TCB_RX_DDP_BUF0_LEN(3)) << 32))
962 #define TP_DDP_TIMER_WORKAROUND_VAL\
963 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\
964 ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\
968 t3_enable_ddp(struct toepcb *toep, int on)
972 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1),
975 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS,
977 TP_DDP_TIMER_WORKAROUND_MASK,
979 TP_DDP_TIMER_WORKAROUND_VAL);
984 t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag_color)
986 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_TAG + buf_idx,
987 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG),
992 t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset,
996 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_OFFSET,
997 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
998 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
999 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) |
1000 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
1002 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF1_OFFSET,
1003 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
1004 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32),
1005 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) |
1006 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32));
1010 t3_set_cong_control(struct socket *so, const char *name)
1012 #ifdef CONGESTION_CONTROL_SUPPORTED
1015 for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++)
1016 if (!strcmp(name, t3_cong_ops[cong_algo].name))
1019 if (cong_algo >= ARRAY_SIZE(t3_cong_ops))
1026 t3_get_tcb(struct toepcb *toep)
1028 struct cpl_get_tcb *req;
1029 struct tcpcb *tp = toep->tp_tp;
1030 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
1035 inp_lock_assert(tp->t_inpcb);
1036 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
1037 req = mtod(m, struct cpl_get_tcb *);
1038 m->m_pkthdr.len = m->m_len = sizeof(*req);
1039 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1041 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid));
1042 req->cpuno = htons(toep->tp_qset);
1044 if (tp->t_state == TCPS_SYN_SENT)
1045 mbufq_tail(&toep->out_of_order_queue, m); // defer
1047 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
1052 so_insert_tid(struct tom_data *d, struct toepcb *toep, unsigned int tid)
1057 cxgb_insert_tid(d->cdev, d->client, toep, tid);
1061 * find_best_mtu - find the entry in the MTU table closest to an MTU
1063 * @mtu: the target MTU
1065 * Returns the index of the value in the MTU table that is closest to but
1066 * does not exceed the target MTU.
1069 find_best_mtu(const struct t3c_data *d, unsigned short mtu)
1073 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
1079 select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu)
1084 struct rtentry *dst = so_sotoinpcb(so)->inp_route.ro_rt;
1087 tp->t_maxseg = pmtu - 40;
1088 if (tp->t_maxseg < td->mtus[0] - 40)
1089 tp->t_maxseg = td->mtus[0] - 40;
1090 idx = find_best_mtu(td, tp->t_maxseg + 40);
1092 tp->t_maxseg = td->mtus[idx] - 40;
1094 idx = find_best_mtu(td, pmtu);
1100 free_atid(struct t3cdev *cdev, unsigned int tid)
1102 struct toepcb *toep = cxgb_free_atid(cdev, tid);
1105 toepcb_release(toep);
1109 * Release resources held by an offload connection (TID, L2T entry, etc.)
1112 t3_release_offload_resources(struct toepcb *toep)
1114 struct tcpcb *tp = toep->tp_tp;
1115 struct toedev *tdev = toep->tp_toedev;
1116 struct t3cdev *cdev;
1118 unsigned int tid = toep->tp_tid;
1119 struct sockbuf *rcv;
1121 CTR0(KTR_TOM, "t3_release_offload_resources");
1126 cdev = TOEP_T3C_DEV(toep);
1131 t3_release_ddp_resources(toep);
1133 #ifdef CTRL_SKB_CACHE
1134 kfree_skb(CTRL_SKB_CACHE(tp));
1135 CTRL_SKB_CACHE(tp) = NULL;
1138 if (toep->tp_wr_avail != toep->tp_wr_max) {
1139 purge_wr_queue(toep);
1140 reset_wr_list(toep);
1144 l2t_release(L2DATA(cdev), toep->tp_l2t);
1145 toep->tp_l2t = NULL;
1149 inp_lock_assert(tp->t_inpcb);
1150 so = inp_inpcbtosocket(tp->t_inpcb);
1151 rcv = so_sockbuf_rcv(so);
1153 * cancel any offloaded reads
1158 tp->t_flags &= ~TF_TOE;
1159 if (toep->tp_ddp_state.user_ddp_pending) {
1160 t3_cancel_ubuf(toep, rcv);
1161 toep->tp_ddp_state.user_ddp_pending = 0;
1163 so_sorwakeup_locked(so);
1167 if (toep->tp_state == TCPS_SYN_SENT) {
1168 free_atid(cdev, tid);
1170 __skb_queue_purge(&tp->out_of_order_queue);
1172 } else { // we have TID
1173 cxgb_remove_tid(cdev, toep, tid);
1174 toepcb_release(toep);
1177 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state);
1182 install_offload_ops(struct socket *so)
1184 struct tcpcb *tp = so_sototcpcb(so);
1186 KASSERT(tp->t_toe != NULL, ("toepcb not set"));
1188 t3_install_socket_ops(so);
1189 tp->t_flags |= TF_TOE;
1190 tp->t_tu = &cxgb_toe_usrreqs;
1194 * Determine the receive window scaling factor given a target max
1198 select_rcv_wscale(int space)
1202 if (space > MAX_RCV_WND)
1203 space = MAX_RCV_WND;
1206 for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ;
1212 * Determine the receive window size for a socket.
1214 static unsigned long
1215 select_rcv_wnd(struct toedev *dev, struct socket *so)
1217 struct tom_data *d = TOM_DATA(dev);
1219 unsigned int max_rcv_wnd;
1220 struct sockbuf *rcv;
1222 rcv = so_sockbuf_rcv(so);
1224 if (tcp_do_autorcvbuf)
1225 wnd = tcp_autorcvbuf_max;
1227 wnd = rcv->sb_hiwat;
1232 * For receive coalescing to work effectively we need a receive window
1233 * that can accomodate a coalesced segment.
1235 if (wnd < MIN_RCV_WND)
1239 max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ?
1240 (uint32_t)d->rx_page_size * 23 :
1243 return min(wnd, max_rcv_wnd);
1247 * Assign offload parameters to some socket fields. This code is used by
1248 * both active and passive opens.
1251 init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid,
1252 struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep)
1254 struct tcpcb *tp = so_sototcpcb(so);
1255 struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev);
1256 struct sockbuf *snd, *rcv;
1259 SOCK_LOCK_ASSERT(so);
1262 snd = so_sockbuf_snd(so);
1263 rcv = so_sockbuf_rcv(so);
1265 log(LOG_INFO, "initializing offload socket\n");
1267 * We either need to fix push frames to work with sbcompress
1268 * or we need to add this
1270 snd->sb_flags |= SB_NOCOALESCE;
1271 rcv->sb_flags |= SB_NOCOALESCE;
1275 toep->tp_toedev = dev;
1279 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs);
1280 toep->tp_wr_unacked = 0;
1281 toep->tp_delack_mode = 0;
1283 toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu);
1288 tp->rcv_wnd = select_rcv_wnd(dev, so);
1290 toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
1291 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
1292 toep->tp_qset_idx = 0;
1294 reset_wr_list(toep);
1295 DPRINTF("initialization done\n");
1299 * The next two functions calculate the option 0 value for a socket.
1301 static inline unsigned int
1302 calc_opt0h(struct socket *so, int mtu_idx)
1304 struct tcpcb *tp = so_sototcpcb(so);
1305 int wscale = select_rcv_wscale(tp->rcv_wnd);
1307 return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) |
1308 V_KEEP_ALIVE((so_options_get(so) & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS |
1309 V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx);
1312 static inline unsigned int
1313 calc_opt0l(struct socket *so, int ulp_mode)
1315 struct tcpcb *tp = so_sototcpcb(so);
1318 val = V_TOS(INP_TOS(tp->t_inpcb)) | V_ULP_MODE(ulp_mode) |
1319 V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ));
1321 DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", INP_TOS(tp->t_inpcb), tp->rcv_wnd, val);
1325 static inline unsigned int
1326 calc_opt2(const struct socket *so, struct toedev *dev)
1330 flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1);
1332 return (V_FLAVORS_VALID(flv_valid) |
1333 V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0));
1338 count_pending_wrs(const struct toepcb *toep)
1340 const struct mbuf *m;
1343 wr_queue_walk(toep, m)
1344 n += m->m_pkthdr.csum_data;
1350 (((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1)
1354 mk_act_open_req(struct socket *so, struct mbuf *m,
1355 unsigned int atid, const struct l2t_entry *e)
1357 struct cpl_act_open_req *req;
1358 struct inpcb *inp = so_sotoinpcb(so);
1359 struct tcpcb *tp = inp_inpcbtotcpcb(inp);
1360 struct toepcb *toep = tp->t_toe;
1361 struct toedev *tdev = toep->tp_toedev;
1363 m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep));
1365 req = mtod(m, struct cpl_act_open_req *);
1366 m->m_pkthdr.len = m->m_len = sizeof(*req);
1368 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1370 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid));
1371 inp_4tuple_get(inp, &req->local_ip, &req->local_port, &req->peer_ip, &req->peer_port);
1373 req->local_port = inp->inp_lport;
1374 req->peer_port = inp->inp_fport;
1375 memcpy(&req->local_ip, &inp->inp_laddr, 4);
1376 memcpy(&req->peer_ip, &inp->inp_faddr, 4);
1378 req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) |
1379 V_TX_CHANNEL(e->smt_idx));
1380 req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode));
1382 req->opt2 = htonl(calc_opt2(so, tdev));
1387 * Convert an ACT_OPEN_RPL status to an errno.
1390 act_open_rpl_status_to_errno(int status)
1393 case CPL_ERR_CONN_RESET:
1394 return (ECONNREFUSED);
1395 case CPL_ERR_ARP_MISS:
1396 return (EHOSTUNREACH);
1397 case CPL_ERR_CONN_TIMEDOUT:
1399 case CPL_ERR_TCAM_FULL:
1401 case CPL_ERR_CONN_EXIST:
1402 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n");
1403 return (EADDRINUSE);
1410 fail_act_open(struct toepcb *toep, int errno)
1412 struct tcpcb *tp = toep->tp_tp;
1414 t3_release_offload_resources(toep);
1416 inp_wunlock(tp->t_inpcb);
1417 tcp_offload_drop(tp, errno);
1421 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1426 * Handle active open failures.
1429 active_open_failed(struct toepcb *toep, struct mbuf *m)
1431 struct cpl_act_open_rpl *rpl = cplhdr(m);
1434 if (toep->tp_tp == NULL)
1437 inp = toep->tp_tp->t_inpcb;
1440 * Don't handle connection retry for now
1443 struct inet_connection_sock *icsk = inet_csk(sk);
1445 if (rpl->status == CPL_ERR_CONN_EXIST &&
1446 icsk->icsk_retransmit_timer.function != act_open_retry_timer) {
1447 icsk->icsk_retransmit_timer.function = act_open_retry_timer;
1448 sk_reset_timer(so, &icsk->icsk_retransmit_timer,
1455 * drops the inpcb lock
1457 fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status));
1465 * Return whether a failed active open has allocated a TID
1468 act_open_has_tid(int status)
1470 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1471 status != CPL_ERR_ARP_MISS;
1475 * Process an ACT_OPEN_RPL CPL message.
1478 do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
1480 struct toepcb *toep = (struct toepcb *)ctx;
1481 struct cpl_act_open_rpl *rpl = cplhdr(m);
1483 if (cdev->type != T3A && act_open_has_tid(rpl->status))
1484 cxgb_queue_tid_release(cdev, GET_TID(rpl));
1486 active_open_failed(toep, m);
1491 * Handle an ARP failure for an active open. XXX purge ofo queue
1493 * XXX badly broken for crossed SYNs as the ATID is no longer valid.
1494 * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should
1495 * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't
1496 * free the atid. Hmm.
1500 act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m)
1502 struct toepcb *toep = m_get_toep(m);
1503 struct tcpcb *tp = toep->tp_tp;
1504 struct inpcb *inp = tp->t_inpcb;
1508 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) {
1510 * drops the inpcb lock
1512 fail_act_open(so, EHOSTUNREACH);
1513 printf("freeing %p\n", m);
1521 * Send an active open request.
1524 t3_connect(struct toedev *tdev, struct socket *so,
1525 struct rtentry *rt, struct sockaddr *nam)
1528 struct l2t_entry *e;
1529 struct tom_data *d = TOM_DATA(tdev);
1530 struct inpcb *inp = so_sotoinpcb(so);
1531 struct tcpcb *tp = intotcpcb(inp);
1532 struct toepcb *toep; /* allocated by init_offload_socket */
1536 toep = toepcb_alloc();
1540 if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0)
1543 e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam);
1547 inp_lock_assert(inp);
1548 m = m_gethdr(MT_DATA, M_WAITOK);
1551 m->m_toe.mt_toepcb = tp->t_toe;
1552 set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure);
1556 init_offload_socket(so, tdev, atid, e, rt, toep);
1558 install_offload_ops(so);
1560 mk_act_open_req(so, m, atid, e);
1565 m_set_toep(m, tp->t_toe);
1567 toep->tp_state = TCPS_SYN_SENT;
1568 l2t_send(d->cdev, (struct mbuf *)m, e);
1570 if (toep->tp_ulp_mode)
1571 t3_enable_ddp(toep, 0);
1575 printf("failing connect - free atid\n");
1577 free_atid(d->cdev, atid);
1579 printf("return ENOMEM\n");
1584 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do
1585 * not send multiple ABORT_REQs for the same connection and also that we do
1586 * not try to send a message after the connection has closed. Returns 1 if
1587 * an ABORT_REQ wasn't generated after all, 0 otherwise.
1590 t3_send_reset(struct toepcb *toep)
1593 struct cpl_abort_req *req;
1594 unsigned int tid = toep->tp_tid;
1595 int mode = CPL_ABORT_SEND_RST;
1596 struct tcpcb *tp = toep->tp_tp;
1597 struct toedev *tdev = toep->tp_toedev;
1598 struct socket *so = NULL;
1600 struct sockbuf *snd;
1603 inp_lock_assert(tp->t_inpcb);
1604 so = inp_inpcbtosocket(tp->t_inpcb);
1607 if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) ||
1610 toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN);
1612 snd = so_sockbuf_snd(so);
1613 /* Purge the send queue so we don't send anything after an abort. */
1616 if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev))
1617 mode |= CPL_ABORT_POST_CLOSE_REQ;
1619 m = m_gethdr_nofail(sizeof(*req));
1620 m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep));
1621 set_arp_failure_handler(m, abort_arp_failure);
1623 req = mtod(m, struct cpl_abort_req *);
1624 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
1625 req->wr.wr_lo = htonl(V_WR_TID(tid));
1626 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
1627 req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0;
1628 req->rsvd1 = !(toep->tp_flags & TP_DATASENT);
1630 if (tp && (tp->t_state == TCPS_SYN_SENT))
1631 mbufq_tail(&toep->out_of_order_queue, m); // defer
1633 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t);
1637 t3_ip_ctloutput(struct socket *so, struct sockopt *sopt)
1642 if (sopt->sopt_name == IP_OPTIONS)
1643 return (ENOPROTOOPT);
1645 if (sopt->sopt_name != IP_TOS)
1646 return (EOPNOTSUPP);
1648 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
1653 if (optval > IPTOS_PREC_CRITIC_ECP && !suser(curthread))
1656 inp = so_sotoinpcb(so);
1658 inp_ip_tos_set(inp, optval);
1660 inp->inp_ip_tos = optval;
1662 t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe);
1669 t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt)
1674 if (sopt->sopt_name != TCP_CONGESTION &&
1675 sopt->sopt_name != TCP_NODELAY)
1676 return (EOPNOTSUPP);
1678 if (sopt->sopt_name == TCP_CONGESTION) {
1679 char name[TCP_CA_NAME_MAX];
1680 int optlen = sopt->sopt_valsize;
1686 err = copyinstr(sopt->sopt_val, name,
1687 min(TCP_CA_NAME_MAX - 1, optlen), &copied);
1693 tp = so_sototcpcb(so);
1695 * XXX I need to revisit this
1697 if ((err = t3_set_cong_control(so, name)) == 0) {
1698 #ifdef CONGESTION_CONTROL_SUPPORTED
1699 tp->t_cong_control = strdup(name, M_CXGB);
1708 err = sooptcopyin(sopt, &optval, sizeof optval,
1714 inp = so_sotoinpcb(so);
1715 tp = inp_inpcbtotcpcb(inp);
1719 oldval = tp->t_flags;
1721 tp->t_flags |= TF_NODELAY;
1723 tp->t_flags &= ~TF_NODELAY;
1727 if (oldval != tp->t_flags && (tp->t_toe != NULL))
1728 t3_set_nagle(tp->t_toe);
1736 t3_ctloutput(struct socket *so, struct sockopt *sopt)
1740 if (sopt->sopt_level != IPPROTO_TCP)
1741 err = t3_ip_ctloutput(so, sopt);
1743 err = t3_tcp_ctloutput(so, sopt);
1745 if (err != EOPNOTSUPP)
1748 return (tcp_ctloutput(so, sopt));
1752 * Returns true if we need to explicitly request RST when we receive new data
1753 * on an RX-closed connection.
1756 need_rst_on_excess_rx(const struct toepcb *toep)
1762 * Handles Rx data that arrives in a state where the socket isn't accepting
1766 handle_excess_rx(struct toepcb *toep, struct mbuf *m)
1769 if (need_rst_on_excess_rx(toep) &&
1770 !(toep->tp_flags & TP_ABORT_SHUTDOWN))
1771 t3_send_reset(toep);
1776 * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE)
1777 * by getting the DDP offset from the TCB.
1780 tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m)
1782 struct ddp_state *q = &toep->tp_ddp_state;
1783 struct ddp_buf_state *bsp;
1784 struct cpl_get_tcb_rpl *hdr;
1785 unsigned int ddp_offset;
1788 struct sockbuf *rcv;
1795 so = inp_inpcbtosocket(tp->t_inpcb);
1797 inp_lock_assert(tp->t_inpcb);
1798 rcv = so_sockbuf_rcv(so);
1801 /* Note that we only accout for CPL_GET_TCB issued by the DDP code.
1802 * We really need a cookie in order to dispatch the RPLs.
1806 /* It is a possible that a previous CPL already invalidated UBUF DDP
1807 * and moved the cur_buf idx and hence no further processing of this
1808 * skb is required. However, the app might be sleeping on
1809 * !q->get_tcb_count and we need to wake it up.
1811 if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) {
1812 int state = so_state_get(so);
1815 if (__predict_true((state & SS_NOFDREF) == 0))
1816 so_sorwakeup_locked(so);
1818 sockbuf_unlock(rcv);
1823 bsp = &q->buf_state[q->cur_buf];
1825 tcb = (__be64 *)(hdr + 1);
1826 if (q->cur_buf == 0) {
1827 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]);
1828 ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET);
1830 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]);
1831 ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET;
1833 ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET;
1834 m->m_cur_offset = bsp->cur_offset;
1835 bsp->cur_offset = ddp_offset;
1836 m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset;
1839 "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u",
1840 q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset);
1841 KASSERT(ddp_offset >= m->m_cur_offset,
1842 ("ddp_offset=%u less than cur_offset=%u",
1843 ddp_offset, m->m_cur_offset));
1847 unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx;
1849 t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]);
1850 ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS;
1852 t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]);
1853 rcv_nxt = t >> S_TCB_RCV_NXT;
1854 rcv_nxt &= M_TCB_RCV_NXT;
1856 t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]);
1857 rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET);
1858 rx_hdr_offset &= M_TCB_RX_HDR_OFFSET;
1860 T3_TRACE2(TIDTB(sk),
1861 "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x",
1862 ddp_flags, rcv_nxt - rx_hdr_offset);
1864 "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u",
1865 tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf);
1867 "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u",
1868 rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset);
1870 "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x",
1871 q->buf_state[0].flags, q->buf_state[1].flags);
1875 if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) {
1876 handle_excess_rx(toep, m);
1881 if ((int)m->m_pkthdr.len < 0) {
1882 t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len");
1885 if (bsp->flags & DDP_BF_NOCOPY) {
1888 "tcb_rpl_as_ddp_complete: CANCEL UBUF");
1890 if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1891 printk("!cancel_ubuf");
1892 t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf");
1895 m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1;
1896 bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA);
1898 } else if (bsp->flags & DDP_BF_NOFLIP) {
1900 m->m_ddp_flags = 1; /* always a kernel buffer */
1902 /* now HW buffer carries a user buffer */
1903 bsp->flags &= ~DDP_BF_NOFLIP;
1904 bsp->flags |= DDP_BF_NOCOPY;
1906 /* It is possible that the CPL_GET_TCB_RPL doesn't indicate
1907 * any new data in which case we're done. If in addition the
1908 * offset is 0, then there wasn't a completion for the kbuf
1909 * and we need to decrement the posted count.
1911 if (m->m_pkthdr.len == 0) {
1912 if (ddp_offset == 0) {
1914 bsp->flags |= DDP_BF_NODATA;
1916 sockbuf_unlock(rcv);
1921 sockbuf_unlock(rcv);
1923 /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP,
1924 * but it got here way late and nobody cares anymore.
1930 m->m_ddp_gl = (unsigned char *)bsp->gl;
1931 m->m_flags |= M_DDP;
1932 m->m_seq = tp->rcv_nxt;
1933 tp->rcv_nxt += m->m_pkthdr.len;
1934 tp->t_rcvtime = ticks;
1935 CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u",
1936 m->m_seq, q->cur_buf, m->m_pkthdr.len);
1937 if (m->m_pkthdr.len == 0) {
1938 q->user_ddp_pending = 0;
1943 state = so_state_get(so);
1944 if (__predict_true((state & SS_NOFDREF) == 0))
1945 so_sorwakeup_locked(so);
1947 sockbuf_unlock(rcv);
1951 * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code,
1952 * in that case they are similar to DDP completions.
1955 do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
1957 struct toepcb *toep = (struct toepcb *)ctx;
1959 /* OK if socket doesn't exist */
1961 printf("null toep in do_get_tcb_rpl\n");
1962 return (CPL_RET_BUF_DONE);
1965 inp_wlock(toep->tp_tp->t_inpcb);
1966 tcb_rpl_as_ddp_complete(toep, m);
1967 inp_wunlock(toep->tp_tp->t_inpcb);
1973 handle_ddp_data(struct toepcb *toep, struct mbuf *m)
1975 struct tcpcb *tp = toep->tp_tp;
1977 struct ddp_state *q;
1978 struct ddp_buf_state *bsp;
1979 struct cpl_rx_data *hdr = cplhdr(m);
1980 unsigned int rcv_nxt = ntohl(hdr->seq);
1981 struct sockbuf *rcv;
1983 if (tp->rcv_nxt == rcv_nxt)
1986 inp_lock_assert(tp->t_inpcb);
1987 so = inp_inpcbtosocket(tp->t_inpcb);
1988 rcv = so_sockbuf_rcv(so);
1991 q = &toep->tp_ddp_state;
1992 bsp = &q->buf_state[q->cur_buf];
1993 KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x",
1994 rcv_nxt, tp->rcv_nxt));
1995 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
1996 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
1997 CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d",
1998 rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len);
2001 if ((int)m->m_pkthdr.len < 0) {
2002 t3_ddp_error(so, "handle_ddp_data: neg len");
2005 m->m_ddp_gl = (unsigned char *)bsp->gl;
2006 m->m_flags |= M_DDP;
2007 m->m_cur_offset = bsp->cur_offset;
2008 m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
2009 if (bsp->flags & DDP_BF_NOCOPY)
2010 bsp->flags &= ~DDP_BF_NOCOPY;
2012 m->m_seq = tp->rcv_nxt;
2013 tp->rcv_nxt = rcv_nxt;
2014 bsp->cur_offset += m->m_pkthdr.len;
2015 if (!(bsp->flags & DDP_BF_NOFLIP))
2018 * For now, don't re-enable DDP after a connection fell out of DDP
2021 q->ubuf_ddp_ready = 0;
2022 sockbuf_unlock(rcv);
2026 * Process new data received for a connection.
2029 new_rx_data(struct toepcb *toep, struct mbuf *m)
2031 struct cpl_rx_data *hdr = cplhdr(m);
2032 struct tcpcb *tp = toep->tp_tp;
2034 struct sockbuf *rcv;
2036 int len = be16toh(hdr->len);
2038 inp_wlock(tp->t_inpcb);
2040 so = inp_inpcbtosocket(tp->t_inpcb);
2042 if (__predict_false(so_no_receive(so))) {
2043 handle_excess_rx(toep, m);
2044 inp_wunlock(tp->t_inpcb);
2049 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP)
2050 handle_ddp_data(toep, m);
2052 m->m_seq = ntohl(hdr->seq);
2053 m->m_ulp_mode = 0; /* for iSCSI */
2056 if (__predict_false(m->m_seq != tp->rcv_nxt)) {
2058 "%s: TID %u: Bad sequence number %u, expected %u\n",
2059 toep->tp_toedev->name, toep->tp_tid, m->m_seq,
2062 inp_wunlock(tp->t_inpcb);
2066 m_adj(m, sizeof(*hdr));
2068 #ifdef URGENT_DATA_SUPPORTED
2070 * We don't handle urgent data yet
2072 if (__predict_false(hdr->urg))
2073 handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg));
2074 if (__predict_false(tp->urg_data == TCP_URG_NOTYET &&
2075 tp->urg_seq - tp->rcv_nxt < skb->len))
2076 tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq -
2079 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) {
2080 toep->tp_delack_mode = hdr->dack_mode;
2081 toep->tp_delack_seq = tp->rcv_nxt;
2083 CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d",
2084 m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes);
2086 if (len < m->m_pkthdr.len)
2087 m->m_pkthdr.len = m->m_len = len;
2089 tp->rcv_nxt += m->m_pkthdr.len;
2090 tp->t_rcvtime = ticks;
2091 toep->tp_enqueued_bytes += m->m_pkthdr.len;
2093 "new_rx_data: seq 0x%x len %u",
2094 m->m_seq, m->m_pkthdr.len);
2095 inp_wunlock(tp->t_inpcb);
2096 rcv = so_sockbuf_rcv(so);
2100 DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, rcv->sb_flags, m->m_pkthdr.len);
2106 * We're giving too many credits to the card - but disable this check so we can keep on moving :-|
2109 KASSERT(rcv->sb_cc < (rcv->sb_mbmax << 1),
2111 ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d",
2112 so, rcv->sb_cc, rcv->sb_mbmax));
2116 CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d",
2117 rcv->sb_cc, rcv->sb_mbcnt);
2119 state = so_state_get(so);
2120 if (__predict_true((state & SS_NOFDREF) == 0))
2121 so_sorwakeup_locked(so);
2123 sockbuf_unlock(rcv);
2127 * Handler for RX_DATA CPL messages.
2130 do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2132 struct toepcb *toep = (struct toepcb *)ctx;
2134 DPRINTF("rx_data len=%d\n", m->m_pkthdr.len);
2136 new_rx_data(toep, m);
2142 new_rx_data_ddp(struct toepcb *toep, struct mbuf *m)
2145 struct ddp_state *q;
2146 struct ddp_buf_state *bsp;
2147 struct cpl_rx_data_ddp *hdr;
2149 unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx;
2151 unsigned int delack_mode;
2152 struct sockbuf *rcv;
2155 inp_wlock(tp->t_inpcb);
2156 so = inp_inpcbtosocket(tp->t_inpcb);
2158 if (__predict_false(so_no_receive(so))) {
2160 handle_excess_rx(toep, m);
2161 inp_wunlock(tp->t_inpcb);
2165 q = &toep->tp_ddp_state;
2167 ddp_report = ntohl(hdr->u.ddp_report);
2168 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
2169 bsp = &q->buf_state[buf_idx];
2172 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u "
2173 "hdr seq 0x%x len %u",
2174 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq),
2177 "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d",
2178 G_DDP_OFFSET(ddp_report), ddp_report, buf_idx);
2180 ddp_len = ntohs(hdr->len);
2181 rcv_nxt = ntohl(hdr->seq) + ddp_len;
2183 delack_mode = G_DDP_DACK_MODE(ddp_report);
2184 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
2185 toep->tp_delack_mode = delack_mode;
2186 toep->tp_delack_seq = tp->rcv_nxt;
2189 m->m_seq = tp->rcv_nxt;
2190 tp->rcv_nxt = rcv_nxt;
2192 tp->t_rcvtime = ticks;
2194 * Store the length in m->m_len. We are changing the meaning of
2195 * m->m_len here, we need to be very careful that nothing from now on
2196 * interprets ->len of this packet the usual way.
2198 m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq;
2199 inp_wunlock(tp->t_inpcb);
2201 "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ",
2202 m->m_len, rcv_nxt, m->m_seq);
2204 * Figure out where the new data was placed in the buffer and store it
2205 * in when. Assumes the buffer offset starts at 0, consumer needs to
2206 * account for page pod's pg_offset.
2208 end_offset = G_DDP_OFFSET(ddp_report) + ddp_len;
2209 m->m_cur_offset = end_offset - m->m_pkthdr.len;
2211 rcv = so_sockbuf_rcv(so);
2214 m->m_ddp_gl = (unsigned char *)bsp->gl;
2215 m->m_flags |= M_DDP;
2216 bsp->cur_offset = end_offset;
2217 toep->tp_enqueued_bytes += m->m_pkthdr.len;
2220 * Length is only meaningful for kbuf
2222 if (!(bsp->flags & DDP_BF_NOCOPY))
2223 KASSERT(m->m_len <= bsp->gl->dgl_length,
2224 ("length received exceeds ddp pages: len=%d dgl_length=%d",
2225 m->m_len, bsp->gl->dgl_length));
2227 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2228 KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next));
2230 * Bit 0 of flags stores whether the DDP buffer is completed.
2231 * Note that other parts of the code depend on this being in bit 0.
2233 if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) {
2234 panic("spurious ddp completion");
2236 m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
2237 if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP))
2238 q->cur_buf ^= 1; /* flip buffers */
2241 if (bsp->flags & DDP_BF_NOCOPY) {
2242 m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY);
2243 bsp->flags &= ~DDP_BF_NOCOPY;
2246 if (ddp_report & F_DDP_PSH)
2247 m->m_ddp_flags |= DDP_BF_PSH;
2249 m->m_ddp_flags |= DDP_BF_NODATA;
2252 skb_reset_transport_header(skb);
2253 tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */
2257 if ((so_state_get(so) & SS_NOFDREF) == 0 && ((ddp_report & F_DDP_PSH) ||
2258 (((m->m_ddp_flags & (DDP_BF_NOCOPY|1)) == (DDP_BF_NOCOPY|1))
2259 || !(m->m_ddp_flags & DDP_BF_NOCOPY))))
2260 so_sorwakeup_locked(so);
2262 sockbuf_unlock(rcv);
2265 #define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\
2266 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\
2267 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\
2271 * Handler for RX_DATA_DDP CPL messages.
2274 do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2276 struct toepcb *toep = ctx;
2277 const struct cpl_rx_data_ddp *hdr = cplhdr(m);
2281 if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) {
2282 log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n",
2283 GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status)));
2284 return (CPL_RET_BUF_DONE);
2287 skb->h.th = tcphdr_skb->h.th;
2289 new_rx_data_ddp(toep, m);
2294 process_ddp_complete(struct toepcb *toep, struct mbuf *m)
2296 struct tcpcb *tp = toep->tp_tp;
2298 struct ddp_state *q;
2299 struct ddp_buf_state *bsp;
2300 struct cpl_rx_ddp_complete *hdr;
2301 unsigned int ddp_report, buf_idx, when, delack_mode;
2303 struct sockbuf *rcv;
2305 inp_wlock(tp->t_inpcb);
2306 so = inp_inpcbtosocket(tp->t_inpcb);
2308 if (__predict_false(so_no_receive(so))) {
2309 struct inpcb *inp = so_sotoinpcb(so);
2311 handle_excess_rx(toep, m);
2315 q = &toep->tp_ddp_state;
2317 ddp_report = ntohl(hdr->ddp_report);
2318 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
2319 m->m_pkthdr.csum_data = tp->rcv_nxt;
2321 rcv = so_sockbuf_rcv(so);
2324 bsp = &q->buf_state[buf_idx];
2325 when = bsp->cur_offset;
2326 m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when;
2327 tp->rcv_nxt += m->m_len;
2328 tp->t_rcvtime = ticks;
2330 delack_mode = G_DDP_DACK_MODE(ddp_report);
2331 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
2332 toep->tp_delack_mode = delack_mode;
2333 toep->tp_delack_seq = tp->rcv_nxt;
2336 skb_reset_transport_header(skb);
2337 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
2339 inp_wunlock(tp->t_inpcb);
2341 KASSERT(m->m_len >= 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2343 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
2344 "ddp_report 0x%x offset %u, len %u",
2345 tp->rcv_nxt, bsp->cur_offset, ddp_report,
2346 G_DDP_OFFSET(ddp_report), m->m_len);
2348 m->m_cur_offset = bsp->cur_offset;
2349 bsp->cur_offset += m->m_len;
2351 if (!(bsp->flags & DDP_BF_NOFLIP)) {
2352 q->cur_buf ^= 1; /* flip buffers */
2353 if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length)
2358 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
2359 "ddp_report %u offset %u",
2360 tp->rcv_nxt, bsp->cur_offset, ddp_report,
2361 G_DDP_OFFSET(ddp_report));
2363 m->m_ddp_gl = (unsigned char *)bsp->gl;
2364 m->m_flags |= M_DDP;
2365 m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1;
2366 if (bsp->flags & DDP_BF_NOCOPY)
2367 bsp->flags &= ~DDP_BF_NOCOPY;
2369 m->m_ddp_flags |= DDP_BF_NODATA;
2372 if ((so_state_get(so) & SS_NOFDREF) == 0)
2373 so_sorwakeup_locked(so);
2375 sockbuf_unlock(rcv);
2379 * Handler for RX_DDP_COMPLETE CPL messages.
2382 do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2384 struct toepcb *toep = ctx;
2388 skb->h.th = tcphdr_skb->h.th;
2390 process_ddp_complete(toep, m);
2395 * Move a socket to TIME_WAIT state. We need to make some adjustments to the
2396 * socket state before calling tcp_time_wait to comply with its expectations.
2399 enter_timewait(struct tcpcb *tp)
2402 * Bump rcv_nxt for the peer FIN. We don't do this at the time we
2403 * process peer_close because we don't want to carry the peer FIN in
2404 * the socket's receive queue and if we increment rcv_nxt without
2405 * having the FIN in the receive queue we'll confuse facilities such
2408 inp_wlock(tp->t_inpcb);
2411 tp->ts_recent_age = 0; /* defeat recycling */
2412 tp->t_srtt = 0; /* defeat tcp_update_metrics */
2413 inp_wunlock(tp->t_inpcb);
2414 tcp_offload_twstart(tp);
2418 enter_timewait_disconnect(struct tcpcb *tp)
2421 * Bump rcv_nxt for the peer FIN. We don't do this at the time we
2422 * process peer_close because we don't want to carry the peer FIN in
2423 * the socket's receive queue and if we increment rcv_nxt without
2424 * having the FIN in the receive queue we'll confuse facilities such
2427 inp_wlock(tp->t_inpcb);
2430 tp->ts_recent_age = 0; /* defeat recycling */
2431 tp->t_srtt = 0; /* defeat tcp_update_metrics */
2432 inp_wunlock(tp->t_inpcb);
2433 tcp_offload_twstart_disconnect(tp);
2437 * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This
2438 * function deals with the data that may be reported along with the FIN.
2439 * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to
2440 * perform normal FIN-related processing. In the latter case 1 indicates that
2441 * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the
2445 handle_peer_close_data(struct socket *so, struct mbuf *m)
2447 struct tcpcb *tp = so_sototcpcb(so);
2448 struct toepcb *toep = tp->t_toe;
2449 struct ddp_state *q;
2450 struct ddp_buf_state *bsp;
2451 struct cpl_peer_close *req = cplhdr(m);
2452 unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */
2453 struct sockbuf *rcv;
2455 if (tp->rcv_nxt == rcv_nxt) /* no data */
2458 CTR0(KTR_TOM, "handle_peer_close_data");
2459 if (__predict_false(so_no_receive(so))) {
2460 handle_excess_rx(toep, m);
2463 * Although we discard the data we want to process the FIN so
2464 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP +
2465 * PEER_CLOSE without data. In particular this PEER_CLOSE
2466 * may be what will close the connection. We return 1 because
2467 * handle_excess_rx() already freed the packet.
2472 inp_lock_assert(tp->t_inpcb);
2473 q = &toep->tp_ddp_state;
2474 rcv = so_sockbuf_rcv(so);
2477 bsp = &q->buf_state[q->cur_buf];
2478 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
2479 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2480 m->m_ddp_gl = (unsigned char *)bsp->gl;
2481 m->m_flags |= M_DDP;
2482 m->m_cur_offset = bsp->cur_offset;
2484 DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
2485 m->m_seq = tp->rcv_nxt;
2486 tp->rcv_nxt = rcv_nxt;
2487 bsp->cur_offset += m->m_pkthdr.len;
2488 if (!(bsp->flags & DDP_BF_NOFLIP))
2491 skb_reset_transport_header(skb);
2492 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
2494 tp->t_rcvtime = ticks;
2496 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
2497 so_sorwakeup_locked(so);
2499 sockbuf_unlock(rcv);
2505 * Handle a peer FIN.
2508 do_peer_fin(struct toepcb *toep, struct mbuf *m)
2511 struct tcpcb *tp = toep->tp_tp;
2515 CTR1(KTR_TOM, "do_peer_fin state=%d", tp->t_state);
2516 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
2517 printf("abort_pending set\n");
2521 inp_wlock(tp->t_inpcb);
2522 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
2523 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) {
2524 keep = handle_peer_close_data(so, m);
2526 inp_wunlock(tp->t_inpcb);
2530 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
2532 "waking up waiters for cantrcvmore on %p ", so);
2536 * If connection is half-synchronized
2537 * (ie NEEDSYN flag on) then delay ACK,
2538 * so it may be piggybacked when SYN is sent.
2539 * Otherwise, since we received a FIN then no
2540 * more input can be expected, send ACK now.
2542 if (tp->t_flags & TF_NEEDSYN)
2543 tp->t_flags |= TF_DELACK;
2545 tp->t_flags |= TF_ACKNOW;
2549 switch (tp->t_state) {
2550 case TCPS_SYN_RECEIVED:
2551 tp->t_starttime = ticks;
2553 case TCPS_ESTABLISHED:
2554 tp->t_state = TCPS_CLOSE_WAIT;
2556 case TCPS_FIN_WAIT_1:
2557 tp->t_state = TCPS_CLOSING;
2559 case TCPS_FIN_WAIT_2:
2561 * If we've sent an abort_req we must have sent it too late,
2562 * HW will send us a reply telling us so, and this peer_close
2563 * is really the last message for this connection and needs to
2564 * be treated as an abort_rpl, i.e., transition the connection
2565 * to TCP_CLOSE (note that the host stack does this at the
2566 * time of generating the RST but we must wait for HW).
2567 * Otherwise we enter TIME_WAIT.
2569 t3_release_offload_resources(toep);
2570 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
2573 action = TCP_TIMEWAIT;
2578 "%s: TID %u received PEER_CLOSE in bad state %d\n",
2579 toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state);
2581 inp_wunlock(tp->t_inpcb);
2583 if (action == TCP_TIMEWAIT) {
2585 } else if (action == TCP_DROP) {
2586 tcp_offload_drop(tp, 0);
2587 } else if (action == TCP_CLOSE) {
2588 tcp_offload_close(tp);
2592 /* Do not send POLL_HUP for half duplex close. */
2593 if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
2594 sk->sk_state == TCP_CLOSE)
2595 sk_wake_async(so, 1, POLL_HUP);
2597 sk_wake_async(so, 1, POLL_IN);
2606 * Handler for PEER_CLOSE CPL messages.
2609 do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2611 struct toepcb *toep = (struct toepcb *)ctx;
2615 do_peer_fin(toep, m);
2620 process_close_con_rpl(struct toepcb *toep, struct mbuf *m)
2622 struct cpl_close_con_rpl *rpl = cplhdr(m);
2623 struct tcpcb *tp = toep->tp_tp;
2626 struct sockbuf *rcv;
2628 inp_wlock(tp->t_inpcb);
2629 so = inp_inpcbtosocket(tp->t_inpcb);
2631 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */
2633 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
2634 inp_wunlock(tp->t_inpcb);
2638 CTR3(KTR_TOM, "process_close_con_rpl(%p) state=%d dead=%d", toep,
2639 tp->t_state, !!(so_state_get(so) & SS_NOFDREF));
2641 switch (tp->t_state) {
2642 case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */
2643 t3_release_offload_resources(toep);
2644 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
2648 action = TCP_TIMEWAIT;
2653 * In this state we don't care about pending abort_rpl.
2654 * If we've sent abort_req it was post-close and was sent too
2655 * late, this close_con_rpl is the actual last message.
2657 t3_release_offload_resources(toep);
2660 case TCPS_FIN_WAIT_1:
2662 * If we can't receive any more
2663 * data, then closing user can proceed.
2664 * Starting the timer is contrary to the
2665 * specification, but if we don't get a FIN
2666 * we'll hang forever.
2669 * we should release the tp also, and use a
2673 rcv = so_sockbuf_rcv(so);
2677 if (rcv->sb_state & SBS_CANTRCVMORE) {
2681 soisdisconnected(so);
2682 timeout = (tcp_fast_finwait2_recycle) ?
2683 tcp_finwait2_timeout : tcp_maxidle;
2684 tcp_timer_activate(tp, TT_2MSL, timeout);
2686 tp->t_state = TCPS_FIN_WAIT_2;
2687 if ((so_options_get(so) & SO_LINGER) && so_linger_get(so) == 0 &&
2688 (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) {
2695 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
2696 toep->tp_toedev->tod_name, toep->tp_tid,
2699 inp_wunlock(tp->t_inpcb);
2702 if (action == TCP_TIMEWAIT) {
2703 enter_timewait_disconnect(tp);
2704 } else if (action == TCP_DROP) {
2705 tcp_offload_drop(tp, 0);
2706 } else if (action == TCP_CLOSE) {
2707 tcp_offload_close(tp);
2714 * Handler for CLOSE_CON_RPL CPL messages.
2717 do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m,
2720 struct toepcb *toep = (struct toepcb *)ctx;
2722 process_close_con_rpl(toep, m);
2727 * Process abort replies. We only process these messages if we anticipate
2728 * them as the coordination between SW and HW in this area is somewhat lacking
2729 * and sometimes we get ABORT_RPLs after we are done with the connection that
2730 * originated the ABORT_REQ.
2733 process_abort_rpl(struct toepcb *toep, struct mbuf *m)
2735 struct tcpcb *tp = toep->tp_tp;
2740 T3_TRACE1(TIDTB(sk),
2741 "process_abort_rpl: GTS rpl pending %d",
2742 sock_flag(sk, ABORT_RPL_PENDING));
2745 inp_wlock(tp->t_inpcb);
2746 so = inp_inpcbtosocket(tp->t_inpcb);
2748 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
2750 * XXX panic on tcpdrop
2752 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(toep->tp_toedev))
2753 toep->tp_flags |= TP_ABORT_RPL_RCVD;
2755 toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING);
2756 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) ||
2757 !is_t3a(toep->tp_toedev)) {
2758 if (toep->tp_flags & TP_ABORT_REQ_RCVD)
2759 panic("TP_ABORT_REQ_RCVD set");
2760 t3_release_offload_resources(toep);
2765 inp_wunlock(tp->t_inpcb);
2768 tcp_offload_close(tp);
2774 * Handle an ABORT_RPL_RSS CPL message.
2777 do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2779 struct cpl_abort_rpl_rss *rpl = cplhdr(m);
2780 struct toepcb *toep;
2783 * Ignore replies to post-close aborts indicating that the abort was
2784 * requested too late. These connections are terminated when we get
2785 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss
2786 * arrives the TID is either no longer used or it has been recycled.
2788 if (rpl->status == CPL_ERR_ABORT_FAILED) {
2794 toep = (struct toepcb *)ctx;
2797 * Sometimes we've already closed the socket, e.g., a post-close
2798 * abort races with ABORT_REQ_RSS, the latter frees the socket
2799 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED,
2800 * but FW turns the ABORT_REQ into a regular one and so we get
2801 * ABORT_RPL_RSS with status 0 and no socket. Only on T3A.
2806 if (toep->tp_tp == NULL) {
2807 log(LOG_NOTICE, "removing tid for abort\n");
2808 cxgb_remove_tid(cdev, toep, toep->tp_tid);
2810 l2t_release(L2DATA(cdev), toep->tp_l2t);
2812 toepcb_release(toep);
2816 log(LOG_NOTICE, "toep=%p\n", toep);
2817 log(LOG_NOTICE, "tp=%p\n", toep->tp_tp);
2820 process_abort_rpl(toep, m);
2821 toepcb_release(toep);
2826 * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also
2827 * indicate whether RST should be sent in response.
2830 abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst)
2832 struct tcpcb *tp = so_sototcpcb(so);
2834 switch (abort_reason) {
2835 case CPL_ERR_BAD_SYN:
2837 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through
2839 case CPL_ERR_CONN_RESET:
2840 // XXX need to handle SYN_RECV due to crossed SYNs
2841 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
2842 case CPL_ERR_XMIT_TIMEDOUT:
2843 case CPL_ERR_PERSIST_TIMEDOUT:
2844 case CPL_ERR_FINWAIT2_TIMEDOUT:
2845 case CPL_ERR_KEEPALIVE_TIMEDOUT:
2847 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
2856 set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd)
2858 struct cpl_abort_rpl *rpl = cplhdr(m);
2860 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
2861 rpl->wr.wr_lo = htonl(V_WR_TID(tid));
2862 m->m_len = m->m_pkthdr.len = sizeof(*rpl);
2864 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
2869 send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m)
2871 struct mbuf *reply_mbuf;
2872 struct cpl_abort_req_rss *req = cplhdr(m);
2874 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl));
2875 m_set_priority(m, CPL_PRIORITY_DATA);
2876 m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl);
2877 set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status);
2878 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
2883 * Returns whether an ABORT_REQ_RSS message is a negative advice.
2886 is_neg_adv_abort(unsigned int status)
2888 return status == CPL_ERR_RTX_NEG_ADVICE ||
2889 status == CPL_ERR_PERSIST_NEG_ADVICE;
2893 send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status)
2895 struct mbuf *reply_mbuf;
2896 struct cpl_abort_req_rss *req = cplhdr(m);
2898 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
2901 /* Defer the reply. Stick rst_status into req->cmd. */
2902 req->status = rst_status;
2903 t3_defer_reply(m, tdev, send_deferred_abort_rpl);
2907 m_set_priority(reply_mbuf, CPL_PRIORITY_DATA);
2908 set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status);
2912 * XXX need to sync with ARP as for SYN_RECV connections we can send
2913 * these messages while ARP is pending. For other connection states
2914 * it's not a problem.
2916 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
2921 cleanup_syn_rcv_conn(struct socket *child, struct socket *parent)
2923 CXGB_UNIMPLEMENTED();
2925 struct request_sock *req = child->sk_user_data;
2927 inet_csk_reqsk_queue_removed(parent, req);
2928 synq_remove(tcp_sk(child));
2930 child->sk_user_data = NULL;
2936 * Performs the actual work to abort a SYN_RECV connection.
2939 do_abort_syn_rcv(struct socket *child, struct socket *parent)
2941 struct tcpcb *parenttp = so_sototcpcb(parent);
2942 struct tcpcb *childtp = so_sototcpcb(child);
2945 * If the server is still open we clean up the child connection,
2946 * otherwise the server already did the clean up as it was purging
2947 * its SYN queue and the skb was just sitting in its backlog.
2949 if (__predict_false(parenttp->t_state == TCPS_LISTEN)) {
2950 cleanup_syn_rcv_conn(child, parent);
2951 inp_wlock(childtp->t_inpcb);
2952 t3_release_offload_resources(childtp->t_toe);
2953 inp_wunlock(childtp->t_inpcb);
2954 tcp_offload_close(childtp);
2960 * Handle abort requests for a SYN_RECV connection. These need extra work
2961 * because the socket is on its parent's SYN queue.
2964 abort_syn_rcv(struct socket *so, struct mbuf *m)
2966 CXGB_UNIMPLEMENTED();
2968 struct socket *parent;
2969 struct toedev *tdev = toep->tp_toedev;
2970 struct t3cdev *cdev = TOM_DATA(tdev)->cdev;
2971 struct socket *oreq = so->so_incomp;
2972 struct t3c_tid_entry *t3c_stid;
2976 return -1; /* somehow we are not on the SYN queue */
2978 t = &(T3C_DATA(cdev))->tid_maps;
2979 t3c_stid = lookup_stid(t, oreq->ts_recent);
2980 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
2983 do_abort_syn_rcv(so, parent);
2984 send_abort_rpl(m, tdev, CPL_ABORT_NO_RST);
2991 * Process abort requests. If we are waiting for an ABORT_RPL we ignore this
2992 * request except that we need to reply to it.
2995 process_abort_req(struct toepcb *toep, struct mbuf *m, struct toedev *tdev)
2997 int rst_status = CPL_ABORT_NO_RST;
2998 const struct cpl_abort_req_rss *req = cplhdr(m);
2999 struct tcpcb *tp = toep->tp_tp;
3003 inp_wlock(tp->t_inpcb);
3004 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
3005 if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) {
3006 toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN);
3011 toep->tp_flags &= ~TP_ABORT_REQ_RCVD;
3013 * Three cases to consider:
3014 * a) We haven't sent an abort_req; close the connection.
3015 * b) We have sent a post-close abort_req that will get to TP too late
3016 * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will
3017 * be ignored and the connection should be closed now.
3018 * c) We have sent a regular abort_req that will get to TP too late.
3019 * That will generate an abort_rpl with status 0, wait for it.
3021 if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) ||
3022 (is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) {
3025 error = abort_status_to_errno(so, req->status,
3027 so_error_set(so, error);
3029 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
3032 * SYN_RECV needs special processing. If abort_syn_rcv()
3033 * returns 0 is has taken care of the abort.
3035 if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m))
3038 t3_release_offload_resources(toep);
3041 inp_wunlock(tp->t_inpcb);
3044 tcp_offload_close(tp);
3046 send_abort_rpl(m, tdev, rst_status);
3049 inp_wunlock(tp->t_inpcb);
3053 * Handle an ABORT_REQ_RSS CPL message.
3056 do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3058 const struct cpl_abort_req_rss *req = cplhdr(m);
3059 struct toepcb *toep = (struct toepcb *)ctx;
3061 if (is_neg_adv_abort(req->status)) {
3066 log(LOG_NOTICE, "aborting tid=%d\n", toep->tp_tid);
3068 if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) {
3069 cxgb_remove_tid(cdev, toep, toep->tp_tid);
3070 toep->tp_flags |= TP_ABORT_REQ_RCVD;
3072 send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST);
3074 l2t_release(L2DATA(cdev), toep->tp_l2t);
3079 toep->tp_tp->t_toe = NULL;
3080 toep->tp_tp->t_flags &= ~TF_TOE;
3083 * XXX need to call syncache_chkrst - but we don't
3084 * have a way of doing that yet
3086 toepcb_release(toep);
3087 log(LOG_ERR, "abort for unestablished connection :-(\n");
3090 if (toep->tp_tp == NULL) {
3091 log(LOG_NOTICE, "disconnected toepcb\n");
3092 /* should be freed momentarily */
3098 process_abort_req(toep, m, toep->tp_toedev);
3099 toepcb_release(toep);
3104 pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m)
3106 struct toedev *tdev = TOE_DEV(parent);
3108 do_abort_syn_rcv(child, parent);
3109 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) {
3110 struct cpl_pass_accept_rpl *rpl = cplhdr(m);
3112 rpl->opt0h = htonl(F_TCAM_BYPASS);
3113 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
3114 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
3120 handle_pass_open_arp_failure(struct socket *so, struct mbuf *m)
3122 CXGB_UNIMPLEMENTED();
3125 struct t3cdev *cdev;
3126 struct socket *parent;
3127 struct socket *oreq;
3128 struct t3c_tid_entry *t3c_stid;
3130 struct tcpcb *otp, *tp = so_sototcpcb(so);
3131 struct toepcb *toep = tp->t_toe;
3134 * If the connection is being aborted due to the parent listening
3135 * socket going away there's nothing to do, the ABORT_REQ will close
3138 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
3143 oreq = so->so_incomp;
3144 otp = so_sototcpcb(oreq);
3147 t = &(T3C_DATA(cdev))->tid_maps;
3148 t3c_stid = lookup_stid(t, otp->ts_recent);
3149 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
3152 pass_open_abort(so, parent, m);
3158 * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly
3159 * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV
3163 pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m)
3167 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
3168 BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk);
3170 handle_pass_open_arp_failure(m_get_socket(m), m);
3174 * Populate a reject CPL_PASS_ACCEPT_RPL WR.
3177 mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf)
3179 struct cpl_pass_accept_req *req = cplhdr(req_mbuf);
3180 struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf);
3181 unsigned int tid = GET_TID(req);
3183 m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP);
3184 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3185 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
3186 rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet
3187 rpl->opt0h = htonl(F_TCAM_BYPASS);
3188 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
3190 rpl->rsvd = rpl->opt2; /* workaround for HW bug */
3194 * Send a deferred reject to an accept request.
3197 reject_pass_request(struct toedev *tdev, struct mbuf *m)
3199 struct mbuf *reply_mbuf;
3201 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl));
3202 mk_pass_accept_rpl(reply_mbuf, m);
3203 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
3208 handle_syncache_event(int event, void *arg)
3210 struct toepcb *toep = arg;
3213 case TOE_SC_ENTRY_PRESENT:
3215 * entry already exists - free toepcb
3218 printf("syncache entry present\n");
3219 toepcb_release(toep);
3223 * The syncache has given up on this entry
3224 * either it timed out, or it was evicted
3225 * we need to explicitly release the tid
3227 printf("syncache entry dropped\n");
3228 toepcb_release(toep);
3231 log(LOG_ERR, "unknown syncache event %d\n", event);
3237 syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep)
3239 struct in_conninfo inc;
3243 int mss, wsf, sack, ts;
3244 uint32_t rcv_isn = ntohl(req->rcv_isn);
3246 bzero(&to, sizeof(struct tcpopt));
3247 inp = so_sotoinpcb(lso);
3250 * Fill out information for entering us into the syncache
3252 inc.inc_fport = th.th_sport = req->peer_port;
3253 inc.inc_lport = th.th_dport = req->local_port;
3254 th.th_seq = req->rcv_isn;
3255 th.th_flags = TH_SYN;
3257 toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1;
3262 inc.inc_faddr.s_addr = req->peer_ip;
3263 inc.inc_laddr.s_addr = req->local_ip;
3265 DPRINTF("syncache add of %d:%d %d:%d\n",
3266 ntohl(req->local_ip), ntohs(req->local_port),
3267 ntohl(req->peer_ip), ntohs(req->peer_port));
3269 mss = req->tcp_options.mss;
3270 wsf = req->tcp_options.wsf;
3271 ts = req->tcp_options.tstamp;
3272 sack = req->tcp_options.sack;
3275 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
3276 syncache_offload_add(&inc, &to, &th, inp, &lso, &cxgb_toe_usrreqs, toep);
3281 * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket
3282 * lock held. Note that the sock here is a listening socket that is not owned
3286 process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev,
3287 struct listen_ctx *lctx)
3290 struct l2t_entry *e;
3292 struct mbuf *reply_mbuf, *ddp_mbuf = NULL;
3293 struct cpl_pass_accept_rpl *rpl;
3294 struct cpl_pass_accept_req *req = cplhdr(m);
3295 unsigned int tid = GET_TID(req);
3296 struct tom_data *d = TOM_DATA(tdev);
3297 struct t3cdev *cdev = d->cdev;
3298 struct tcpcb *tp = so_sototcpcb(so);
3299 struct toepcb *newtoep;
3300 struct rtentry *dst;
3301 struct sockaddr_in nam;
3302 struct t3c_data *td = T3C_DATA(cdev);
3304 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
3305 if (__predict_false(reply_mbuf == NULL)) {
3306 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
3307 t3_defer_reply(m, tdev, reject_pass_request);
3309 cxgb_queue_tid_release(cdev, tid);
3312 DPRINTF("failed to get reply_mbuf\n");
3317 if (tp->t_state != TCPS_LISTEN) {
3318 DPRINTF("socket not in listen state\n");
3323 tim.mac_addr = req->dst_mac;
3324 tim.vlan_tag = ntohs(req->vlan_tag);
3325 if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
3326 DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n");
3332 * XXX do route lookup to confirm that we're still listening on this
3335 if (ip_route_input(skb, req->local_ip, req->peer_ip,
3336 G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev))
3338 rt_flags = ((struct rtable *)skb->dst)->rt_flags &
3339 (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL);
3340 dst_release(skb->dst); // done with the input route, release it
3343 if ((rt_flags & RTF_LOCAL) == 0)
3349 rt_flags = RTF_LOCAL;
3350 if ((rt_flags & RTF_LOCAL) == 0)
3354 * Calculate values and add to syncache
3357 newtoep = toepcb_alloc();
3358 if (newtoep == NULL)
3361 bzero(&nam, sizeof(struct sockaddr_in));
3363 nam.sin_len = sizeof(struct sockaddr_in);
3364 nam.sin_family = AF_INET;
3365 nam.sin_addr.s_addr =req->peer_ip;
3366 dst = rtalloc2((struct sockaddr *)&nam, 1, 0);
3369 printf("failed to find route\n");
3372 e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev,
3373 (struct sockaddr *)&nam);
3375 DPRINTF("failed to get l2t\n");
3378 * Point to our listen socket until accept
3380 newtoep->tp_tp = tp;
3381 newtoep->tp_flags = TP_SYN_RCVD;
3382 newtoep->tp_tid = tid;
3383 newtoep->tp_toedev = tdev;
3384 tp->rcv_wnd = select_rcv_wnd(tdev, so);
3386 cxgb_insert_tid(cdev, d->client, newtoep, tid);
3388 LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry);
3391 newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
3392 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
3394 if (newtoep->tp_ulp_mode) {
3395 ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
3397 if (ddp_mbuf == NULL)
3398 newtoep->tp_ulp_mode = 0;
3401 CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d",
3402 TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode);
3403 set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
3405 * XXX workaround for lack of syncache drop
3407 toepcb_hold(newtoep);
3408 syncache_add_accept_req(req, so, newtoep);
3410 rpl = cplhdr(reply_mbuf);
3411 reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl);
3412 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3414 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
3415 rpl->opt2 = htonl(calc_opt2(so, tdev));
3416 rpl->rsvd = rpl->opt2; /* workaround for HW bug */
3417 rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten
3419 rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) |
3420 V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx));
3421 rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) |
3422 CPL_PASS_OPEN_ACCEPT);
3424 DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status);
3426 m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep));
3428 l2t_send(cdev, reply_mbuf, e);
3430 if (newtoep->tp_ulp_mode) {
3431 __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS,
3433 TP_DDP_TIMER_WORKAROUND_MASK,
3435 TP_DDP_TIMER_WORKAROUND_VAL, 1);
3437 printf("not offloading\n");
3443 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
3444 mk_pass_accept_rpl(reply_mbuf, m);
3446 mk_tid_release(reply_mbuf, newtoep, tid);
3447 cxgb_ofld_send(cdev, reply_mbuf);
3451 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
3458 * Handle a CPL_PASS_ACCEPT_REQ message.
3461 do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3463 struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
3464 struct socket *lso = listen_ctx->lso; /* XXX need an interlock against the listen socket going away */
3465 struct tom_data *d = listen_ctx->tom_data;
3468 struct cpl_pass_accept_req *req = cplhdr(m);
3469 unsigned int tid = GET_TID(req);
3470 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
3472 if (unlikely(!lsk)) {
3473 printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n",
3475 (unsigned long)((union listen_entry *)ctx -
3477 return CPL_RET_BUF_DONE;
3479 if (unlikely(tid >= t->ntids)) {
3480 printk(KERN_ERR "%s: passive open TID %u too large\n",
3482 return CPL_RET_BUF_DONE;
3485 * For T3A the current user of the TID may have closed but its last
3486 * message(s) may have been backlogged so the TID appears to be still
3487 * in use. Just take the TID away, the connection can close at its
3488 * own leisure. For T3B this situation is a bug.
3490 if (!valid_new_tid(t, tid) &&
3491 cdev->type != T3A) {
3492 printk(KERN_ERR "%s: passive open uses existing TID %u\n",
3494 return CPL_RET_BUF_DONE;
3498 process_pass_accept_req(lso, m, &d->tdev, listen_ctx);
3503 * Called when a connection is established to translate the TCP options
3504 * reported by HW to FreeBSD's native format.
3507 assign_rxopt(struct socket *so, unsigned int opt)
3509 struct tcpcb *tp = so_sototcpcb(so);
3510 struct toepcb *toep = tp->t_toe;
3511 const struct t3c_data *td = T3C_DATA(TOEP_T3C_DEV(toep));
3513 inp_lock_assert(tp->t_inpcb);
3515 toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40;
3516 tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0;
3517 tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0;
3518 tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0;
3519 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
3520 (TF_RCVD_SCALE|TF_REQ_SCALE))
3521 tp->rcv_scale = tp->request_r_scale;
3525 * Completes some final bits of initialization for just established connections
3526 * and changes their state to TCP_ESTABLISHED.
3528 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
3531 make_established(struct socket *so, u32 snd_isn, unsigned int opt)
3533 struct tcpcb *tp = so_sototcpcb(so);
3534 struct toepcb *toep = tp->t_toe;
3536 toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn;
3537 assign_rxopt(so, opt);
3544 so->so_proto->pr_ctloutput = t3_ctloutput;
3548 inet_sk(sk)->id = tp->write_seq ^ jiffies;
3551 * XXX not clear what rcv_wup maps to
3554 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't
3555 * pass through opt0.
3557 if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10))
3558 toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10);
3564 * no clean interface for marking ARP up to date
3566 dst_confirm(sk->sk_dst_cache);
3568 tp->t_starttime = ticks;
3569 tp->t_state = TCPS_ESTABLISHED;
3574 syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep)
3577 struct in_conninfo inc;
3580 int mss, wsf, sack, ts;
3581 struct mbuf *m = NULL;
3582 const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev);
3586 #error "no MAC support"
3589 opt = ntohs(req->tcp_opt);
3591 bzero(&to, sizeof(struct tcpopt));
3594 * Fill out information for entering us into the syncache
3596 inc.inc_fport = th.th_sport = req->peer_port;
3597 inc.inc_lport = th.th_dport = req->local_port;
3598 th.th_seq = req->rcv_isn;
3599 th.th_flags = TH_ACK;
3603 inc.inc_faddr.s_addr = req->peer_ip;
3604 inc.inc_laddr.s_addr = req->local_ip;
3606 mss = td->mtus[G_TCPOPT_MSS(opt)] - 40;
3607 wsf = G_TCPOPT_WSCALE_OK(opt);
3608 ts = G_TCPOPT_TSTAMP(opt);
3609 sack = G_TCPOPT_SACK(opt);
3612 to.to_wscale = G_TCPOPT_SND_WSCALE(opt);
3613 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
3615 DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n",
3616 ntohl(req->local_ip), ntohs(req->local_port),
3617 ntohl(req->peer_ip), ntohs(req->peer_port),
3618 mss, wsf, ts, sack);
3619 return syncache_offload_expand(&inc, &to, &th, so, m);
3624 * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work
3625 * if we are in TCP_SYN_RECV due to crossed SYNs
3628 do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3630 struct cpl_pass_establish *req = cplhdr(m);
3631 struct toepcb *toep = (struct toepcb *)ctx;
3632 struct tcpcb *tp = toep->tp_tp;
3633 struct socket *so, *lso;
3634 struct t3c_data *td = T3C_DATA(cdev);
3635 struct sockbuf *snd, *rcv;
3637 // Complete socket initialization now that we have the SND_ISN
3639 struct toedev *tdev;
3642 tdev = toep->tp_toedev;
3644 inp_wlock(tp->t_inpcb);
3648 * XXX need to add reference while we're manipulating
3650 so = lso = inp_inpcbtosocket(tp->t_inpcb);
3652 inp_wunlock(tp->t_inpcb);
3655 LIST_REMOVE(toep, synq_entry);
3658 if (!syncache_expand_establish_req(req, &so, toep)) {
3662 CXGB_UNIMPLEMENTED();
3666 * Couldn't create the socket
3668 CXGB_UNIMPLEMENTED();
3671 tp = so_sototcpcb(so);
3672 inp_wlock(tp->t_inpcb);
3674 snd = so_sockbuf_snd(so);
3675 rcv = so_sockbuf_rcv(so);
3677 snd->sb_flags |= SB_NOCOALESCE;
3678 rcv->sb_flags |= SB_NOCOALESCE;
3683 reset_wr_list(toep);
3684 tp->rcv_wnd = select_rcv_wnd(tdev, so);
3685 tp->rcv_nxt = toep->tp_copied_seq;
3686 install_offload_ops(so);
3688 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
3689 toep->tp_wr_unacked = 0;
3690 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
3691 toep->tp_qset_idx = 0;
3692 toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu);
3695 * XXX Cancel any keep alive timer
3698 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
3701 * XXX workaround for lack of syncache drop
3703 toepcb_release(toep);
3704 inp_wunlock(tp->t_inpcb);
3706 CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid);
3707 cxgb_log_tcb(cdev->adapter, toep->tp_tid);
3710 * XXX not sure how these checks map to us
3712 if (unlikely(sk->sk_socket)) { // simultaneous opens only
3713 sk->sk_state_change(sk);
3714 sk_wake_async(so, 0, POLL_OUT);
3717 * The state for the new connection is now up to date.
3718 * Next check if we should add the connection to the parent's
3719 * accept queue. When the parent closes it resets connections
3720 * on its SYN queue, so check if we are being reset. If so we
3721 * don't need to do anything more, the coming ABORT_RPL will
3722 * destroy this socket. Otherwise move the connection to the
3725 * Note that we reset the synq before closing the server so if
3726 * we are not being reset the stid is still open.
3728 if (unlikely(!tp->forward_skb_hint)) { // removed from synq
3739 * Fill in the right TID for CPL messages waiting in the out-of-order queue
3740 * and send them to the TOE.
3743 fixup_and_send_ofo(struct toepcb *toep)
3746 struct toedev *tdev = toep->tp_toedev;
3747 struct tcpcb *tp = toep->tp_tp;
3748 unsigned int tid = toep->tp_tid;
3750 log(LOG_NOTICE, "fixup_and_send_ofo\n");
3752 inp_lock_assert(tp->t_inpcb);
3753 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) {
3755 * A variety of messages can be waiting but the fields we'll
3756 * be touching are common to all so any message type will do.
3758 struct cpl_close_con_req *p = cplhdr(m);
3760 p->wr.wr_lo = htonl(V_WR_TID(tid));
3761 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid));
3762 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
3767 * Updates socket state from an active establish CPL message. Runs with the
3771 socket_act_establish(struct socket *so, struct mbuf *m)
3773 struct cpl_act_establish *req = cplhdr(m);
3774 u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */
3775 struct tcpcb *tp = so_sototcpcb(so);
3776 struct toepcb *toep = tp->t_toe;
3778 if (__predict_false(tp->t_state != TCPS_SYN_SENT))
3779 log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n",
3780 toep->tp_tid, tp->t_state);
3782 tp->ts_recent_age = ticks;
3783 tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn;
3784 toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs;
3786 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
3789 * Now that we finally have a TID send any CPL messages that we had to
3790 * defer for lack of a TID.
3792 if (mbufq_len(&toep->out_of_order_queue))
3793 fixup_and_send_ofo(toep);
3795 if (__predict_false(so_state_get(so) & SS_NOFDREF)) {
3797 * XXX does this even make sense?
3804 * XXX assume no write requests permitted while socket connection is
3808 * Currently the send queue must be empty at this point because the
3809 * socket layer does not send anything before a connection is
3810 * established. To be future proof though we handle the possibility
3811 * that there are pending buffers to send (either TX_DATA or
3812 * CLOSE_CON_REQ). First we need to adjust the sequence number of the
3813 * buffers according to the just learned write_seq, and then we send
3814 * them on their way.
3816 fixup_pending_writeq_buffers(sk);
3817 if (t3_push_frames(so, 1))
3818 sk->sk_write_space(sk);
3821 toep->tp_state = tp->t_state;
3822 tcpstat.tcps_connects++;
3827 * Process a CPL_ACT_ESTABLISH message.
3830 do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3832 struct cpl_act_establish *req = cplhdr(m);
3833 unsigned int tid = GET_TID(req);
3834 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
3835 struct toepcb *toep = (struct toepcb *)ctx;
3836 struct tcpcb *tp = toep->tp_tp;
3838 struct toedev *tdev;
3842 free_atid(cdev, atid);
3845 inp_wlock(tp->t_inpcb);
3850 so = inp_inpcbtosocket(tp->t_inpcb);
3851 tdev = toep->tp_toedev; /* blow up here if link was down */
3855 * It's OK if the TID is currently in use, the owning socket may have
3856 * backlogged its last CPL message(s). Just take it away.
3860 so_insert_tid(d, toep, tid);
3861 free_atid(cdev, atid);
3862 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
3864 socket_act_establish(so, m);
3865 inp_wunlock(tp->t_inpcb);
3866 CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid);
3867 cxgb_log_tcb(cdev->adapter, toep->tp_tid);
3873 * Process an acknowledgment of WR completion. Advance snd_una and send the
3874 * next batch of work requests from the write queue.
3877 wr_ack(struct toepcb *toep, struct mbuf *m)
3879 struct tcpcb *tp = toep->tp_tp;
3880 struct cpl_wr_ack *hdr = cplhdr(m);
3882 unsigned int credits = ntohs(hdr->credits);
3883 u32 snd_una = ntohl(hdr->snd_una);
3885 struct sockbuf *snd;
3887 CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits);
3889 inp_wlock(tp->t_inpcb);
3890 so = inp_inpcbtosocket(tp->t_inpcb);
3891 toep->tp_wr_avail += credits;
3892 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail)
3893 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail;
3896 struct mbuf *p = peek_wr(toep);
3898 if (__predict_false(!p)) {
3899 log(LOG_ERR, "%u WR_ACK credits for TID %u with "
3900 "nothing pending, state %u wr_avail=%u\n",
3901 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail);
3905 "wr_ack: p->credits=%d p->bytes=%d",
3906 p->m_pkthdr.csum_data, p->m_pkthdr.len);
3907 KASSERT(p->m_pkthdr.csum_data != 0,
3908 ("empty request still on list"));
3910 if (__predict_false(credits < p->m_pkthdr.csum_data)) {
3913 struct tx_data_wr *w = cplhdr(p);
3915 "TID %u got %u WR credits, need %u, len %u, "
3916 "main body %u, frags %u, seq # %u, ACK una %u,"
3917 " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n",
3918 toep->tp_tid, credits, p->csum, p->len,
3919 p->len - p->data_len, skb_shinfo(p)->nr_frags,
3920 ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt),
3921 toep->tp_wr_avail, count_pending_wrs(tp) - credits);
3923 p->m_pkthdr.csum_data -= credits;
3927 credits -= p->m_pkthdr.csum_data;
3928 bytes += p->m_pkthdr.len;
3930 "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d",
3931 p->m_pkthdr.len, credits, p->m_pkthdr.csum_data);
3938 check_wr_invariants(tp);
3941 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
3943 struct tom_data *d = TOM_DATA(TOE_DEV(so));
3945 log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK "
3946 "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una,
3947 toep->tp_tid, tp->snd_una);
3952 if (tp->snd_una != snd_una) {
3953 tp->snd_una = snd_una;
3954 tp->ts_recent_age = ticks;
3957 * Keep ARP entry "minty fresh"
3959 dst_confirm(sk->sk_dst_cache);
3961 if (tp->snd_una == tp->snd_nxt)
3962 toep->tp_flags &= ~TP_TX_WAIT_IDLE;
3965 snd = so_sockbuf_snd(so);
3967 CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes);
3968 snd = so_sockbuf_snd(so);
3970 sbdrop_locked(snd, bytes);
3971 so_sowwakeup_locked(so);
3974 if (snd->sb_sndptroff < snd->sb_cc)
3975 t3_push_frames(so, 0);
3978 inp_wunlock(tp->t_inpcb);
3983 * Handler for TX_DATA_ACK CPL messages.
3986 do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx)
3988 struct toepcb *toep = (struct toepcb *)ctx;
3997 * Handler for TRACE_PKT CPL messages. Just sink these packets.
4000 do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx)
4007 * Reset a connection that is on a listener's SYN queue or accept queue,
4008 * i.e., one that has not had a struct socket associated with it.
4009 * Must be called from process context.
4011 * Modeled after code in inet_csk_listen_stop().
4014 t3_reset_listen_child(struct socket *child)
4016 struct tcpcb *tp = so_sototcpcb(child);
4018 t3_send_reset(tp->t_toe);
4023 t3_child_disconnect(struct socket *so, void *arg)
4025 struct tcpcb *tp = so_sototcpcb(so);
4027 if (tp->t_flags & TF_TOE) {
4028 inp_wlock(tp->t_inpcb);
4029 t3_reset_listen_child(so);
4030 inp_wunlock(tp->t_inpcb);
4035 * Disconnect offloaded established but not yet accepted connections sitting
4036 * on a server's accept_queue. We just send an ABORT_REQ at this point and
4037 * finish off the disconnect later as we may need to wait for the ABORT_RPL.
4040 t3_disconnect_acceptq(struct socket *listen_so)
4044 so_listeners_apply_all(listen_so, t3_child_disconnect, NULL);
4045 so_unlock(listen_so);
4049 * Reset offloaded connections sitting on a server's syn queue. As above
4050 * we send ABORT_REQ and finish off when we get ABORT_RPL.
4054 t3_reset_synq(struct listen_ctx *lctx)
4056 struct toepcb *toep;
4059 while (!LIST_EMPTY(&lctx->synq_head)) {
4060 toep = LIST_FIRST(&lctx->synq_head);
4061 LIST_REMOVE(toep, synq_entry);
4063 t3_send_reset(toep);
4064 cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid);
4065 toepcb_release(toep);
4067 so_unlock(lctx->lso);
4072 t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl,
4073 unsigned int nppods, unsigned int tag, unsigned int maxoff,
4074 unsigned int pg_off, unsigned int color)
4076 unsigned int i, j, pidx;
4079 struct ulp_mem_io *req;
4080 unsigned int tid = toep->tp_tid;
4081 const struct tom_data *td = TOM_DATA(toep->tp_toedev);
4082 unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit;
4084 CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)",
4085 gl, nppods, tag, maxoff, pg_off, color);
4087 for (i = 0; i < nppods; ++i) {
4088 m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE);
4089 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4090 req = mtod(m, struct ulp_mem_io *);
4091 m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE;
4092 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
4094 req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) |
4095 V_ULPTX_CMD(ULP_MEM_WRITE));
4096 req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) |
4097 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1));
4099 p = (struct pagepod *)(req + 1);
4100 if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) {
4101 p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid));
4102 p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) |
4103 V_PPOD_COLOR(color));
4104 p->pp_max_offset = htonl(maxoff);
4105 p->pp_page_offset = htonl(pg_off);
4107 for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx)
4108 p->pp_addr[j] = pidx < gl->dgl_nelem ?
4109 htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0;
4111 p->pp_vld_tid = 0; /* mark sentinel page pods invalid */
4112 send_or_defer(toep, m, 0);
4113 ppod_addr += PPOD_SIZE;
4119 * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command.
4122 mk_cpl_barrier_ulp(struct cpl_barrier *b)
4124 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b;
4126 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4127 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8));
4128 b->opcode = CPL_BARRIER;
4132 * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command.
4135 mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno)
4137 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
4139 txpkt = (struct ulp_txpkt *)req;
4140 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4141 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
4142 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid));
4143 req->cpuno = htons(cpuno);
4147 * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command.
4150 mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
4151 unsigned int word, uint64_t mask, uint64_t val)
4153 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
4155 CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
4156 tid, word, mask, val);
4158 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4159 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
4160 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
4161 req->reply = V_NO_REPLY(1);
4163 req->word = htons(word);
4164 req->mask = htobe64(mask);
4165 req->val = htobe64(val);
4169 * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command.
4172 mk_rx_data_ack_ulp(struct toepcb *toep, struct cpl_rx_data_ack *ack,
4173 unsigned int tid, unsigned int credits)
4175 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack;
4177 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4178 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8));
4179 OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid));
4180 ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
4181 V_RX_DACK_MODE(TOM_TUNABLE(toep->tp_toedev, delack)) |
4182 V_RX_CREDITS(credits));
4186 t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx)
4190 struct work_request_hdr *wr;
4191 struct cpl_barrier *lock;
4192 struct cpl_set_tcb_field *req;
4193 struct cpl_get_tcb *getreq;
4194 struct ddp_state *p = &toep->tp_ddp_state;
4197 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
4199 wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) +
4201 m = m_gethdr_nofail(wrlen);
4202 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4203 wr = mtod(m, struct work_request_hdr *);
4206 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
4207 m->m_pkthdr.len = m->m_len = wrlen;
4209 lock = (struct cpl_barrier *)(wr + 1);
4210 mk_cpl_barrier_ulp(lock);
4212 req = (struct cpl_set_tcb_field *)(lock + 1);
4214 CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx);
4216 /* Hmmm, not sure if this actually a good thing: reactivating
4217 * the other buffer might be an issue if it has been completed
4218 * already. However, that is unlikely, since the fact that the UBUF
4219 * is not completed indicates that there is no oustanding data.
4222 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4223 V_TF_DDP_ACTIVE_BUF(1) |
4224 V_TF_DDP_BUF0_VALID(1),
4225 V_TF_DDP_ACTIVE_BUF(1));
4227 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4228 V_TF_DDP_ACTIVE_BUF(1) |
4229 V_TF_DDP_BUF1_VALID(1), 0);
4231 getreq = (struct cpl_get_tcb *)(req + 1);
4232 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
4234 mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1));
4236 /* Keep track of the number of oustanding CPL_GET_TCB requests
4241 T3_TRACE1(TIDTB(so),
4242 "t3_cancel_ddpbuf: bufidx %u", bufidx);
4244 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
4248 * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one
4249 * @sk: the socket associated with the buffers
4250 * @bufidx: index of HW DDP buffer (0 or 1)
4251 * @tag0: new tag for HW buffer 0
4252 * @tag1: new tag for HW buffer 1
4253 * @len: new length for HW buf @bufidx
4255 * Sends a compound WR to overlay a new DDP buffer on top of an existing
4256 * buffer by changing the buffer tag and length and setting the valid and
4257 * active flag accordingly. The caller must ensure the new buffer is at
4258 * least as big as the existing one. Since we typically reprogram both HW
4259 * buffers this function sets both tags for convenience. Read the TCB to
4260 * determine how made data was written into the buffer before the overlay
4264 t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0,
4265 unsigned int tag1, unsigned int len)
4269 struct work_request_hdr *wr;
4270 struct cpl_get_tcb *getreq;
4271 struct cpl_set_tcb_field *req;
4272 struct ddp_state *p = &toep->tp_ddp_state;
4274 CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)",
4275 bufidx, tag0, tag1, len);
4277 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
4279 wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq);
4280 m = m_gethdr_nofail(wrlen);
4281 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4282 wr = mtod(m, struct work_request_hdr *);
4283 m->m_pkthdr.len = m->m_len = wrlen;
4287 /* Set the ATOMIC flag to make sure that TP processes the following
4288 * CPLs in an atomic manner and no wire segments can be interleaved.
4290 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
4291 req = (struct cpl_set_tcb_field *)(wr + 1);
4292 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG,
4293 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) |
4294 V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32,
4295 V_TCB_RX_DDP_BUF0_TAG(tag0) |
4296 V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32);
4299 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN,
4300 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
4301 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
4303 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4304 V_TF_DDP_PUSH_DISABLE_0(1) |
4305 V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
4306 V_TF_DDP_PUSH_DISABLE_0(0) |
4307 V_TF_DDP_BUF0_VALID(1));
4309 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN,
4310 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN),
4311 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len));
4313 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4314 V_TF_DDP_PUSH_DISABLE_1(1) |
4315 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
4316 V_TF_DDP_PUSH_DISABLE_1(0) |
4317 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1));
4320 getreq = (struct cpl_get_tcb *)(req + 1);
4321 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
4323 /* Keep track of the number of oustanding CPL_GET_TCB requests
4328 T3_TRACE4(TIDTB(sk),
4329 "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u "
4331 bufidx, tag0, tag1, len);
4333 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
4337 * Sends a compound WR containing all the CPL messages needed to program the
4338 * two HW DDP buffers, namely optionally setting up the length and offset of
4339 * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK.
4342 t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0,
4343 unsigned int len1, unsigned int offset1,
4344 uint64_t ddp_flags, uint64_t flag_mask, int modulate)
4348 struct work_request_hdr *wr;
4349 struct cpl_set_tcb_field *req;
4351 CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ",
4352 len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff);
4355 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
4357 wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) +
4358 (len1 ? sizeof(*req) : 0) +
4359 (modulate ? sizeof(struct cpl_rx_data_ack) : 0);
4360 m = m_gethdr_nofail(wrlen);
4361 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4362 wr = mtod(m, struct work_request_hdr *);
4365 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
4366 m->m_pkthdr.len = m->m_len = wrlen;
4368 req = (struct cpl_set_tcb_field *)(wr + 1);
4369 if (len0) { /* program buffer 0 offset and length */
4370 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET,
4371 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
4372 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
4373 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) |
4374 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0));
4377 if (len1) { /* program buffer 1 offset and length */
4378 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET,
4379 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
4380 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32,
4381 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) |
4382 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32);
4386 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask,
4390 mk_rx_data_ack_ulp(toep,
4391 (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid,
4392 toep->tp_copied_seq - toep->tp_rcv_wup);
4393 toep->tp_rcv_wup = toep->tp_copied_seq;
4397 T3_TRACE5(TIDTB(sk),
4398 "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x "
4400 len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff,
4404 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
4408 t3_init_wr_tab(unsigned int wr_len)
4412 if (mbuf_wrs[1]) /* already initialized */
4415 for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) {
4416 int sgl_len = (3 * i) / 2 + (i & 1);
4419 mbuf_wrs[i] = sgl_len <= wr_len ?
4420 1 : 1 + (sgl_len - 2) / (wr_len - 1);
4427 t3_init_cpl_io(void)
4430 tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL);
4433 "Chelsio TCP offload: can't allocate sk_buff\n");
4436 skb_put(tcphdr_skb, sizeof(struct tcphdr));
4437 tcphdr_skb->h.raw = tcphdr_skb->data;
4438 memset(tcphdr_skb->data, 0, tcphdr_skb->len);
4441 t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
4442 t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
4443 t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack);
4444 t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data);
4445 t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl);
4446 t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close);
4447 t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
4448 t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
4449 t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req);
4450 t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl);
4451 t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp);
4452 t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete);
4453 t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify);
4454 t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt);
4455 t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl);