1 /**************************************************************************
3 Copyright (c) 2007-2008, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/kernel.h>
37 #include <sys/limits.h>
41 #include <sys/mutex.h>
42 #include <sys/socket.h>
43 #include <sys/sysctl.h>
44 #include <sys/syslog.h>
45 #include <sys/protosw.h>
49 #include <net/route.h>
51 #include <netinet/in.h>
52 #include <netinet/in_pcb.h>
53 #include <netinet/in_systm.h>
54 #include <netinet/in_var.h>
57 #include <dev/cxgb/cxgb_osdep.h>
58 #include <dev/cxgb/sys/mbufq.h>
60 #include <netinet/ip.h>
61 #include <netinet/tcp_var.h>
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/tcp_offload.h>
64 #include <netinet/tcp_seq.h>
65 #include <netinet/tcp_syncache.h>
66 #include <netinet/tcp_timer.h>
67 #include <net/route.h>
69 #include <dev/cxgb/t3cdev.h>
70 #include <dev/cxgb/common/cxgb_firmware_exports.h>
71 #include <dev/cxgb/common/cxgb_t3_cpl.h>
72 #include <dev/cxgb/common/cxgb_tcb.h>
73 #include <dev/cxgb/common/cxgb_ctl_defs.h>
74 #include <dev/cxgb/cxgb_offload.h>
77 #include <machine/bus.h>
78 #include <dev/cxgb/sys/mvec.h>
79 #include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
80 #include <dev/cxgb/ulp/tom/cxgb_defs.h>
81 #include <dev/cxgb/ulp/tom/cxgb_tom.h>
82 #include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
83 #include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
84 #include <dev/cxgb/ulp/tom/cxgb_tcp.h>
86 #include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h>
89 * For ULP connections HW may add headers, e.g., for digests, that aren't part
90 * of the messages sent by the host but that are part of the TCP payload and
91 * therefore consume TCP sequence space. Tx connection parameters that
92 * operate in TCP sequence space are affected by the HW additions and need to
93 * compensate for them to accurately track TCP sequence numbers. This array
94 * contains the compensating extra lengths for ULP packets. It is indexed by
95 * a packet's ULP submode.
97 const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8};
101 * This sk_buff holds a fake header-only TCP segment that we use whenever we
102 * need to exploit SW TCP functionality that expects TCP headers, such as
103 * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple
104 * CPUs without locking.
106 static struct mbuf *tcphdr_mbuf __read_mostly;
110 * Size of WRs in bytes. Note that we assume all devices we are handling have
113 static unsigned int wrlen __read_mostly;
116 * The number of WRs needed for an skb depends on the number of page fragments
117 * in the skb and whether it has any payload in its main body. This maps the
118 * length of the gather list represented by an skb into the # of necessary WRs.
120 static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly;
123 * Max receive window supported by HW in bytes. Only a small part of it can
124 * be set through option0, the rest needs to be set through RX_DATA_ACK.
126 #define MAX_RCV_WND ((1U << 27) - 1)
129 * Min receive window. We want it to be large enough to accommodate receive
130 * coalescing, handle jumbo frames, and not trigger sender SWS avoidance.
132 #define MIN_RCV_WND (24 * 1024U)
133 #define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS)
135 #define VALIDATE_SEQ 0
136 #define VALIDATE_SOCK(so)
139 #define TCP_TIMEWAIT 1
143 extern int tcp_do_autorcvbuf;
144 extern int tcp_do_autosndbuf;
145 extern int tcp_autorcvbuf_max;
146 extern int tcp_autosndbuf_max;
148 static void t3_send_reset(struct toepcb *toep);
149 static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status);
150 static inline void free_atid(struct t3cdev *cdev, unsigned int tid);
151 static void handle_syncache_event(int event, void *arg);
154 SBAPPEND(struct sockbuf *sb, struct mbuf *n)
160 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
161 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
162 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
163 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
164 m->m_next, m->m_nextpkt, m->m_flags));
169 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
170 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
171 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
172 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
173 m->m_next, m->m_nextpkt, m->m_flags));
176 KASSERT(sb->sb_flags & SB_NOCOALESCE, ("NOCOALESCE not set"));
177 sbappendstream_locked(sb, n);
181 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
182 m->m_next, m->m_nextpkt, m->m_flags));
188 is_t3a(const struct toedev *dev)
190 return (dev->tod_ttid == TOE_ID_CHELSIO_T3);
194 dump_toepcb(struct toepcb *toep)
196 DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n",
197 toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode,
198 toep->tp_mtu_idx, toep->tp_tid);
200 DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n",
201 toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked,
202 toep->tp_mss_clamp, toep->tp_flags);
205 #ifndef RTALLOC2_DEFINED
206 static struct rtentry *
207 rtalloc2(struct sockaddr *dst, int report, u_long ignflags)
209 struct rtentry *rt = NULL;
211 if ((rt = rtalloc1(dst, report, ignflags)) != NULL)
219 * Determine whether to send a CPL message now or defer it. A message is
220 * deferred if the connection is in SYN_SENT since we don't know the TID yet.
221 * For connections in other states the message is sent immediately.
222 * If through_l2t is set the message is subject to ARP processing, otherwise
223 * it is sent directly.
226 send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t)
228 struct tcpcb *tp = toep->tp_tp;
230 if (__predict_false(tp->t_state == TCPS_SYN_SENT)) {
231 inp_wlock(tp->t_inpcb);
232 mbufq_tail(&toep->out_of_order_queue, m); // defer
233 inp_wunlock(tp->t_inpcb);
234 } else if (through_l2t)
235 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T
237 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly
240 static inline unsigned int
241 mkprio(unsigned int cntrl, const struct toepcb *toep)
247 * Populate a TID_RELEASE WR. The skb must be already propely sized.
250 mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid)
252 struct cpl_tid_release *req;
254 m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep));
255 m->m_pkthdr.len = m->m_len = sizeof(*req);
256 req = mtod(m, struct cpl_tid_release *);
257 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
259 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
263 make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail)
265 struct tcpcb *tp = so_sototcpcb(so);
266 struct toepcb *toep = tp->t_toe;
267 struct tx_data_wr *req;
270 inp_lock_assert(tp->t_inpcb);
271 snd = so_sockbuf_snd(so);
273 req = mtod(m, struct tx_data_wr *);
274 m->m_len = sizeof(*req);
275 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
276 req->wr_lo = htonl(V_WR_TID(toep->tp_tid));
277 /* len includes the length of any HW ULP additions */
278 req->len = htonl(len);
279 req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx));
280 /* V_TX_ULP_SUBMODE sets both the mode and submode */
281 req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) |
282 V_TX_URG(/* skb_urgent(skb) */ 0 ) |
283 V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) &&
285 req->sndseq = htonl(tp->snd_nxt);
286 if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) {
287 req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
288 V_TX_CPU_IDX(toep->tp_qset));
290 /* Sendbuffer is in units of 32KB.
292 if (tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE)
293 req->param |= htonl(V_TX_SNDBUF(tcp_autosndbuf_max >> 15));
295 req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15));
298 toep->tp_flags |= TP_DATASENT;
302 #define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */
305 t3_push_frames(struct socket *so, int req_completion)
307 struct tcpcb *tp = so_sototcpcb(so);
308 struct toepcb *toep = tp->t_toe;
310 struct mbuf *tail, *m0, *last;
313 int state, bytes, count, total_bytes;
314 bus_dma_segment_t segs[TX_MAX_SEGS], *segp;
317 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) {
318 DPRINTF("tcp state=%d\n", tp->t_state);
322 state = so_state_get(so);
324 if (state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) {
325 DPRINTF("disconnecting\n");
330 inp_lock_assert(tp->t_inpcb);
332 snd = so_sockbuf_snd(so);
335 d = TOM_DATA(toep->tp_toedev);
338 last = tail = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb;
341 DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n",
342 toep->tp_wr_avail, tail, snd->sb_cc, toep->tp_m_last);
344 if (last && toep->tp_m_last == last && snd->sb_sndptroff != 0) {
345 KASSERT(tail, ("sbdrop error"));
346 last = tail = tail->m_next;
349 if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) {
350 DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail);
356 toep->tp_m_last = NULL;
357 while (toep->tp_wr_avail && (tail != NULL)) {
360 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) {
365 * If the data in tail fits as in-line, then
366 * make an immediate data wr.
368 if (tail->m_len <= IMM_LEN) {
375 make_tx_data_wr(so, m0, bytes, tail);
376 m_append(m0, bytes, mtod(last, caddr_t));
377 KASSERT(!m0->m_next, ("bad append"));
379 while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail)
380 && (tail != NULL) && (count < TX_MAX_SEGS-1)) {
381 bytes += tail->m_len;
385 * technically an abuse to be using this for a VA
386 * but less gross than defining my own structure
387 * or calling pmap_kextract from here :-|
389 segp->ds_addr = (bus_addr_t)tail->m_data;
390 segp->ds_len = tail->m_len;
391 DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n",
392 count, mbuf_wrs[count], tail->m_data, tail->m_len);
396 DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n",
397 toep->tp_wr_avail, count, mbuf_wrs[count], tail);
400 m_set_sgllen(m0, count);
401 make_tx_data_wr(so, m0, bytes, tail);
403 m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep));
406 snd->sb_sndptr = tail;
407 toep->tp_m_last = NULL;
409 toep->tp_m_last = snd->sb_sndptr = last;
412 DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last);
414 snd->sb_sndptroff += bytes;
415 total_bytes += bytes;
416 toep->tp_write_seq += bytes;
417 CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d"
418 " tail=%p sndptr=%p sndptroff=%d",
419 toep->tp_wr_avail, count, mbuf_wrs[count],
420 tail, snd->sb_sndptr, snd->sb_sndptroff);
422 CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d"
423 " tp_m_last=%p tailbuf=%p snd_una=0x%08x",
424 total_bytes, toep->tp_m_last, tail->m_data,
427 CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d"
428 " tp_m_last=%p snd_una=0x%08x",
429 total_bytes, toep->tp_m_last, tp->snd_una);
437 while (i < count && m_get_sgllen(m0)) {
438 if ((count - i) >= 3) {
440 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx"
441 " len=%d pa=0x%zx len=%d",
442 segs[i].ds_addr, segs[i].ds_len,
443 segs[i + 1].ds_addr, segs[i + 1].ds_len,
444 segs[i + 2].ds_addr, segs[i + 2].ds_len);
446 } else if ((count - i) == 2) {
448 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx"
450 segs[i].ds_addr, segs[i].ds_len,
451 segs[i + 1].ds_addr, segs[i + 1].ds_len);
454 CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d",
455 segs[i].ds_addr, segs[i].ds_len);
463 * remember credits used
465 m0->m_pkthdr.csum_data = mbuf_wrs[count];
466 m0->m_pkthdr.len = bytes;
467 toep->tp_wr_avail -= mbuf_wrs[count];
468 toep->tp_wr_unacked += mbuf_wrs[count];
470 if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) ||
471 toep->tp_wr_unacked >= toep->tp_wr_max / 2) {
472 struct work_request_hdr *wr = cplhdr(m0);
474 wr->wr_hi |= htonl(F_WR_COMPL);
475 toep->tp_wr_unacked = 0;
477 KASSERT((m0->m_pkthdr.csum_data > 0) &&
478 (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d",
479 m0->m_pkthdr.csum_data));
480 m0->m_type = MT_DONTFREE;
481 enqueue_wr(toep, m0);
482 DPRINTF("sending offload tx with %d bytes in %d segments\n",
484 l2t_send(cdev, m0, toep->tp_l2t);
487 return (total_bytes);
491 * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail
492 * under any circumstances. We take the easy way out and always queue the
493 * message to the write_queue. We can optimize the case where the queue is
494 * already empty though the optimization is probably not worth it.
497 close_conn(struct socket *so)
500 struct cpl_close_con_req *req;
502 struct inpcb *inp = so_sotoinpcb(so);
509 tp = so_sototcpcb(so);
512 if (tp->t_state != TCPS_SYN_SENT)
513 t3_push_frames(so, 1);
515 if (toep->tp_flags & TP_FIN_SENT) {
522 d = TOM_DATA(toep->tp_toedev);
524 m = m_gethdr_nofail(sizeof(*req));
525 m_set_priority(m, CPL_PRIORITY_DATA);
529 toep->tp_flags |= TP_FIN_SENT;
530 req = mtod(m, struct cpl_close_con_req *);
532 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
533 req->wr.wr_lo = htonl(V_WR_TID(tid));
534 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
538 * XXX - need to defer shutdown while there is still data in the queue
541 CTR4(KTR_TOM, "%s CLOSE_CON_REQ so %p tp %p tid=%u", __FUNCTION__, so, tp, tid);
542 cxgb_ofld_send(d->cdev, m);
547 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
551 abort_arp_failure(struct t3cdev *cdev, struct mbuf *m)
553 struct cpl_abort_req *req = cplhdr(m);
555 req->cmd = CPL_ABORT_NO_RST;
556 cxgb_ofld_send(cdev, m);
560 * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are
561 * permitted to return without sending the message in case we cannot allocate
562 * an sk_buff. Returns the number of credits sent.
565 t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail)
568 struct cpl_rx_data_ack *req;
569 struct toepcb *toep = tp->t_toe;
570 struct toedev *tdev = toep->tp_toedev;
572 m = m_gethdr_nofail(sizeof(*req));
574 DPRINTF("returning %u credits to HW\n", credits);
576 req = mtod(m, struct cpl_rx_data_ack *);
577 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
579 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
580 req->credit_dack = htonl(dack | V_RX_CREDITS(credits));
581 m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep));
582 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
587 * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled.
588 * This is only used in DDP mode, so we take the opportunity to also set the
589 * DACK mode and flush any Rx credits.
592 t3_send_rx_modulate(struct toepcb *toep)
595 struct cpl_rx_data_ack *req;
597 m = m_gethdr_nofail(sizeof(*req));
599 req = mtod(m, struct cpl_rx_data_ack *);
600 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
602 m->m_pkthdr.len = m->m_len = sizeof(*req);
604 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
605 req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
607 V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup));
608 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
609 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
610 toep->tp_rcv_wup = toep->tp_copied_seq;
614 * Handle receipt of an urgent pointer.
617 handle_urg_ptr(struct socket *so, uint32_t urg_seq)
619 #ifdef URGENT_DATA_SUPPORTED
620 struct tcpcb *tp = so_sototcpcb(so);
622 urg_seq--; /* initially points past the urgent data, per BSD */
624 if (tp->urg_data && !after(urg_seq, tp->urg_seq))
625 return; /* duplicate pointer */
627 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
628 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
629 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
632 if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len)
633 tom_eat_skb(sk, skb, 0);
635 tp->urg_data = TCP_URG_NOTYET;
636 tp->urg_seq = urg_seq;
641 * Returns true if a socket cannot accept new Rx data.
644 so_no_receive(const struct socket *so)
646 return (so_state_get(so) & (SS_ISDISCONNECTED|SS_ISDISCONNECTING));
650 * Process an urgent data notification.
653 rx_urg_notify(struct toepcb *toep, struct mbuf *m)
655 struct cpl_rx_urg_notify *hdr = cplhdr(m);
656 struct socket *so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
660 if (!so_no_receive(so))
661 handle_urg_ptr(so, ntohl(hdr->seq));
667 * Handler for RX_URG_NOTIFY CPL messages.
670 do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx)
672 struct toepcb *toep = (struct toepcb *)ctx;
674 rx_urg_notify(toep, m);
679 is_delack_mode_valid(struct toedev *dev, struct toepcb *toep)
681 return (toep->tp_ulp_mode ||
682 (toep->tp_ulp_mode == ULP_MODE_TCPDDP &&
683 dev->tod_ttid >= TOE_ID_CHELSIO_T3));
687 * Set of states for which we should return RX credits.
689 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)
692 * Called after some received data has been read. It returns RX credits
693 * to the HW for the amount of data processed.
696 t3_cleanup_rbuf(struct tcpcb *tp, int copied)
698 struct toepcb *toep = tp->t_toe;
701 int dack_mode, must_send, read;
702 u32 thres, credits, dack = 0;
705 so = inp_inpcbtosocket(tp->t_inpcb);
706 rcv = so_sockbuf_rcv(so);
708 if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) ||
709 (tp->t_state == TCPS_FIN_WAIT_2))) {
712 toep->tp_copied_seq += copied;
719 inp_lock_assert(tp->t_inpcb);
723 toep->tp_copied_seq += copied;
725 read = toep->tp_enqueued_bytes - rcv->sb_cc;
726 toep->tp_copied_seq += read;
728 credits = toep->tp_copied_seq - toep->tp_rcv_wup;
729 toep->tp_enqueued_bytes = rcv->sb_cc;
732 if (credits > rcv->sb_mbmax) {
733 log(LOG_ERR, "copied_seq=%u rcv_wup=%u credits=%u\n",
734 toep->tp_copied_seq, toep->tp_rcv_wup, credits);
735 credits = rcv->sb_mbmax;
740 * XXX this won't accurately reflect credit return - we need
741 * to look at the difference between the amount that has been
742 * put in the recv sockbuf and what is there now
745 if (__predict_false(!credits))
748 dev = toep->tp_toedev;
749 thres = TOM_TUNABLE(dev, rx_credit_thres);
751 if (__predict_false(thres == 0))
754 if (is_delack_mode_valid(dev, toep)) {
755 dack_mode = TOM_TUNABLE(dev, delack);
756 if (__predict_false(dack_mode != toep->tp_delack_mode)) {
757 u32 r = tp->rcv_nxt - toep->tp_delack_seq;
759 if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp)
760 dack = F_RX_DACK_CHANGE |
761 V_RX_DACK_MODE(dack_mode);
764 dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
767 * For coalescing to work effectively ensure the receive window has
768 * at least 16KB left.
770 must_send = credits + 16384 >= tp->rcv_wnd;
772 if (must_send || credits >= thres)
773 toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send);
777 cxgb_toe_disconnect(struct tcpcb *tp)
781 DPRINTF("cxgb_toe_disconnect\n");
783 so = inp_inpcbtosocket(tp->t_inpcb);
789 cxgb_toe_reset(struct tcpcb *tp)
791 struct toepcb *toep = tp->t_toe;
798 tp->t_flags &= ~TF_TOE;
805 cxgb_toe_send(struct tcpcb *tp)
809 DPRINTF("cxgb_toe_send\n");
810 dump_toepcb(tp->t_toe);
812 so = inp_inpcbtosocket(tp->t_inpcb);
813 t3_push_frames(so, 1);
818 cxgb_toe_rcvd(struct tcpcb *tp)
821 inp_lock_assert(tp->t_inpcb);
823 t3_cleanup_rbuf(tp, 0);
829 cxgb_toe_detach(struct tcpcb *tp)
834 * XXX how do we handle teardown in the SYN_SENT state?
837 inp_lock_assert(tp->t_inpcb);
844 tp->t_flags &= ~TF_TOE;
849 static struct toe_usrreqs cxgb_toe_usrreqs = {
850 .tu_disconnect = cxgb_toe_disconnect,
851 .tu_reset = cxgb_toe_reset,
852 .tu_send = cxgb_toe_send,
853 .tu_rcvd = cxgb_toe_rcvd,
854 .tu_detach = cxgb_toe_detach,
855 .tu_detach = cxgb_toe_detach,
856 .tu_syncache_event = handle_syncache_event,
861 __set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word,
862 uint64_t mask, uint64_t val, int no_reply)
864 struct cpl_set_tcb_field *req;
866 CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
867 toep->tp_tid, word, mask, val);
869 req = mtod(m, struct cpl_set_tcb_field *);
870 m->m_pkthdr.len = m->m_len = sizeof(*req);
871 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
873 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid));
874 req->reply = V_NO_REPLY(no_reply);
876 req->word = htons(word);
877 req->mask = htobe64(mask);
878 req->val = htobe64(val);
880 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
881 send_or_defer(toep, m, 0);
885 t3_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val)
888 struct tcpcb *tp = toep->tp_tp;
893 if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) {
894 printf("not seting field\n");
898 m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field));
900 __set_tcb_field(toep, m, word, mask, val, 1);
904 * Set one of the t_flags bits in the TCB.
907 set_tcb_tflag(struct toepcb *toep, unsigned int bit_pos, int val)
910 t3_set_tcb_field(toep, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos);
914 * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting.
917 t3_set_nagle(struct toepcb *toep)
919 struct tcpcb *tp = toep->tp_tp;
921 set_tcb_tflag(toep, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY));
925 * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting.
928 t3_set_keepalive(struct toepcb *toep, int on_off)
931 set_tcb_tflag(toep, S_TF_KEEPALIVE, on_off);
935 t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off)
937 set_tcb_tflag(toep, S_TF_RCV_COALESCE_ENABLE, on_off);
941 t3_set_dack_mss(struct toepcb *toep, int on_off)
944 set_tcb_tflag(toep, S_TF_DACK_MSS, on_off);
948 * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting.
951 t3_set_tos(struct toepcb *toep)
953 int tos = inp_ip_tos_get(toep->tp_tp->t_inpcb);
955 t3_set_tcb_field(toep, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS),
961 * In DDP mode, TP fails to schedule a timer to push RX data to the host when
962 * DDP is disabled (data is delivered to freelist). [Note that, the peer should
963 * set the PSH bit in the last segment, which would trigger delivery.]
964 * We work around the issue by setting a DDP buffer in a partial placed state,
965 * which guarantees that TP will schedule a timer.
967 #define TP_DDP_TIMER_WORKAROUND_MASK\
968 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\
969 ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\
970 V_TCB_RX_DDP_BUF0_LEN(3)) << 32))
971 #define TP_DDP_TIMER_WORKAROUND_VAL\
972 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\
973 ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\
977 t3_enable_ddp(struct toepcb *toep, int on)
981 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1),
984 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS,
986 TP_DDP_TIMER_WORKAROUND_MASK,
988 TP_DDP_TIMER_WORKAROUND_VAL);
993 t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag_color)
995 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_TAG + buf_idx,
996 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG),
1001 t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset,
1005 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_OFFSET,
1006 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
1007 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
1008 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) |
1009 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
1011 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF1_OFFSET,
1012 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
1013 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32),
1014 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) |
1015 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32));
1019 t3_set_cong_control(struct socket *so, const char *name)
1021 #ifdef CONGESTION_CONTROL_SUPPORTED
1024 for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++)
1025 if (!strcmp(name, t3_cong_ops[cong_algo].name))
1028 if (cong_algo >= ARRAY_SIZE(t3_cong_ops))
1035 t3_get_tcb(struct toepcb *toep)
1037 struct cpl_get_tcb *req;
1038 struct tcpcb *tp = toep->tp_tp;
1039 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
1044 inp_lock_assert(tp->t_inpcb);
1045 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
1046 req = mtod(m, struct cpl_get_tcb *);
1047 m->m_pkthdr.len = m->m_len = sizeof(*req);
1048 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1050 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid));
1051 req->cpuno = htons(toep->tp_qset);
1053 if (tp->t_state == TCPS_SYN_SENT)
1054 mbufq_tail(&toep->out_of_order_queue, m); // defer
1056 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
1061 so_insert_tid(struct tom_data *d, struct toepcb *toep, unsigned int tid)
1066 cxgb_insert_tid(d->cdev, d->client, toep, tid);
1070 * find_best_mtu - find the entry in the MTU table closest to an MTU
1072 * @mtu: the target MTU
1074 * Returns the index of the value in the MTU table that is closest to but
1075 * does not exceed the target MTU.
1078 find_best_mtu(const struct t3c_data *d, unsigned short mtu)
1082 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
1088 select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu)
1093 struct rtentry *dst = so_sotoinpcb(so)->inp_route.ro_rt;
1096 tp->t_maxseg = pmtu - 40;
1097 if (tp->t_maxseg < td->mtus[0] - 40)
1098 tp->t_maxseg = td->mtus[0] - 40;
1099 idx = find_best_mtu(td, tp->t_maxseg + 40);
1101 tp->t_maxseg = td->mtus[idx] - 40;
1103 idx = find_best_mtu(td, pmtu);
1109 free_atid(struct t3cdev *cdev, unsigned int tid)
1111 struct toepcb *toep = cxgb_free_atid(cdev, tid);
1114 toepcb_release(toep);
1118 * Release resources held by an offload connection (TID, L2T entry, etc.)
1121 t3_release_offload_resources(struct toepcb *toep)
1123 struct tcpcb *tp = toep->tp_tp;
1124 struct toedev *tdev = toep->tp_toedev;
1125 struct t3cdev *cdev;
1127 unsigned int tid = toep->tp_tid;
1128 struct sockbuf *rcv;
1130 CTR0(KTR_TOM, "t3_release_offload_resources");
1135 cdev = TOEP_T3C_DEV(toep);
1140 t3_release_ddp_resources(toep);
1142 #ifdef CTRL_SKB_CACHE
1143 kfree_skb(CTRL_SKB_CACHE(tp));
1144 CTRL_SKB_CACHE(tp) = NULL;
1147 if (toep->tp_wr_avail != toep->tp_wr_max) {
1148 purge_wr_queue(toep);
1149 reset_wr_list(toep);
1153 l2t_release(L2DATA(cdev), toep->tp_l2t);
1154 toep->tp_l2t = NULL;
1158 inp_lock_assert(tp->t_inpcb);
1159 so = inp_inpcbtosocket(tp->t_inpcb);
1160 rcv = so_sockbuf_rcv(so);
1162 * cancel any offloaded reads
1167 tp->t_flags &= ~TF_TOE;
1168 if (toep->tp_ddp_state.user_ddp_pending) {
1169 t3_cancel_ubuf(toep, rcv);
1170 toep->tp_ddp_state.user_ddp_pending = 0;
1172 so_sorwakeup_locked(so);
1176 if (toep->tp_state == TCPS_SYN_SENT) {
1177 free_atid(cdev, tid);
1179 __skb_queue_purge(&tp->out_of_order_queue);
1181 } else { // we have TID
1182 cxgb_remove_tid(cdev, toep, tid);
1183 toepcb_release(toep);
1186 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state);
1191 install_offload_ops(struct socket *so)
1193 struct tcpcb *tp = so_sototcpcb(so);
1195 KASSERT(tp->t_toe != NULL, ("toepcb not set"));
1197 t3_install_socket_ops(so);
1198 tp->t_flags |= TF_TOE;
1199 tp->t_tu = &cxgb_toe_usrreqs;
1203 * Determine the receive window scaling factor given a target max
1207 select_rcv_wscale(int space)
1211 if (space > MAX_RCV_WND)
1212 space = MAX_RCV_WND;
1215 for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ;
1221 * Determine the receive window size for a socket.
1223 static unsigned long
1224 select_rcv_wnd(struct toedev *dev, struct socket *so)
1226 struct tom_data *d = TOM_DATA(dev);
1228 unsigned int max_rcv_wnd;
1229 struct sockbuf *rcv;
1231 rcv = so_sockbuf_rcv(so);
1233 if (tcp_do_autorcvbuf)
1234 wnd = tcp_autorcvbuf_max;
1236 wnd = rcv->sb_hiwat;
1241 * For receive coalescing to work effectively we need a receive window
1242 * that can accomodate a coalesced segment.
1244 if (wnd < MIN_RCV_WND)
1248 max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ?
1249 (uint32_t)d->rx_page_size * 23 :
1252 return min(wnd, max_rcv_wnd);
1256 * Assign offload parameters to some socket fields. This code is used by
1257 * both active and passive opens.
1260 init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid,
1261 struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep)
1263 struct tcpcb *tp = so_sototcpcb(so);
1264 struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev);
1265 struct sockbuf *snd, *rcv;
1268 SOCK_LOCK_ASSERT(so);
1271 snd = so_sockbuf_snd(so);
1272 rcv = so_sockbuf_rcv(so);
1274 log(LOG_INFO, "initializing offload socket\n");
1276 * We either need to fix push frames to work with sbcompress
1277 * or we need to add this
1279 snd->sb_flags |= SB_NOCOALESCE;
1280 rcv->sb_flags |= SB_NOCOALESCE;
1284 toep->tp_toedev = dev;
1288 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs);
1289 toep->tp_wr_unacked = 0;
1290 toep->tp_delack_mode = 0;
1292 toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu);
1297 tp->rcv_wnd = select_rcv_wnd(dev, so);
1299 toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
1300 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
1301 toep->tp_qset_idx = 0;
1303 reset_wr_list(toep);
1304 DPRINTF("initialization done\n");
1308 * The next two functions calculate the option 0 value for a socket.
1310 static inline unsigned int
1311 calc_opt0h(struct socket *so, int mtu_idx)
1313 struct tcpcb *tp = so_sototcpcb(so);
1314 int wscale = select_rcv_wscale(tp->rcv_wnd);
1316 return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) |
1317 V_KEEP_ALIVE((so_options_get(so) & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS |
1318 V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx);
1321 static inline unsigned int
1322 calc_opt0l(struct socket *so, int ulp_mode)
1324 struct tcpcb *tp = so_sototcpcb(so);
1327 val = V_TOS(INP_TOS(tp->t_inpcb)) | V_ULP_MODE(ulp_mode) |
1328 V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ));
1330 DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", INP_TOS(tp->t_inpcb), tp->rcv_wnd, val);
1334 static inline unsigned int
1335 calc_opt2(const struct socket *so, struct toedev *dev)
1339 flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1);
1341 return (V_FLAVORS_VALID(flv_valid) |
1342 V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0));
1347 count_pending_wrs(const struct toepcb *toep)
1349 const struct mbuf *m;
1352 wr_queue_walk(toep, m)
1353 n += m->m_pkthdr.csum_data;
1359 (((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1)
1363 mk_act_open_req(struct socket *so, struct mbuf *m,
1364 unsigned int atid, const struct l2t_entry *e)
1366 struct cpl_act_open_req *req;
1367 struct inpcb *inp = so_sotoinpcb(so);
1368 struct tcpcb *tp = inp_inpcbtotcpcb(inp);
1369 struct toepcb *toep = tp->t_toe;
1370 struct toedev *tdev = toep->tp_toedev;
1372 m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep));
1374 req = mtod(m, struct cpl_act_open_req *);
1375 m->m_pkthdr.len = m->m_len = sizeof(*req);
1377 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1379 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid));
1380 inp_4tuple_get(inp, &req->local_ip, &req->local_port, &req->peer_ip, &req->peer_port);
1382 req->local_port = inp->inp_lport;
1383 req->peer_port = inp->inp_fport;
1384 memcpy(&req->local_ip, &inp->inp_laddr, 4);
1385 memcpy(&req->peer_ip, &inp->inp_faddr, 4);
1387 req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) |
1388 V_TX_CHANNEL(e->smt_idx));
1389 req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode));
1391 req->opt2 = htonl(calc_opt2(so, tdev));
1396 * Convert an ACT_OPEN_RPL status to an errno.
1399 act_open_rpl_status_to_errno(int status)
1402 case CPL_ERR_CONN_RESET:
1403 return (ECONNREFUSED);
1404 case CPL_ERR_ARP_MISS:
1405 return (EHOSTUNREACH);
1406 case CPL_ERR_CONN_TIMEDOUT:
1408 case CPL_ERR_TCAM_FULL:
1410 case CPL_ERR_CONN_EXIST:
1411 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n");
1412 return (EADDRINUSE);
1419 fail_act_open(struct toepcb *toep, int errno)
1421 struct tcpcb *tp = toep->tp_tp;
1423 t3_release_offload_resources(toep);
1425 inp_wunlock(tp->t_inpcb);
1426 tcp_offload_drop(tp, errno);
1430 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1435 * Handle active open failures.
1438 active_open_failed(struct toepcb *toep, struct mbuf *m)
1440 struct cpl_act_open_rpl *rpl = cplhdr(m);
1443 if (toep->tp_tp == NULL)
1446 inp = toep->tp_tp->t_inpcb;
1449 * Don't handle connection retry for now
1452 struct inet_connection_sock *icsk = inet_csk(sk);
1454 if (rpl->status == CPL_ERR_CONN_EXIST &&
1455 icsk->icsk_retransmit_timer.function != act_open_retry_timer) {
1456 icsk->icsk_retransmit_timer.function = act_open_retry_timer;
1457 sk_reset_timer(so, &icsk->icsk_retransmit_timer,
1464 * drops the inpcb lock
1466 fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status));
1474 * Return whether a failed active open has allocated a TID
1477 act_open_has_tid(int status)
1479 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1480 status != CPL_ERR_ARP_MISS;
1484 * Process an ACT_OPEN_RPL CPL message.
1487 do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
1489 struct toepcb *toep = (struct toepcb *)ctx;
1490 struct cpl_act_open_rpl *rpl = cplhdr(m);
1492 if (cdev->type != T3A && act_open_has_tid(rpl->status))
1493 cxgb_queue_tid_release(cdev, GET_TID(rpl));
1495 active_open_failed(toep, m);
1500 * Handle an ARP failure for an active open. XXX purge ofo queue
1502 * XXX badly broken for crossed SYNs as the ATID is no longer valid.
1503 * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should
1504 * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't
1505 * free the atid. Hmm.
1509 act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m)
1511 struct toepcb *toep = m_get_toep(m);
1512 struct tcpcb *tp = toep->tp_tp;
1513 struct inpcb *inp = tp->t_inpcb;
1517 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) {
1519 * drops the inpcb lock
1521 fail_act_open(so, EHOSTUNREACH);
1522 printf("freeing %p\n", m);
1530 * Send an active open request.
1533 t3_connect(struct toedev *tdev, struct socket *so,
1534 struct rtentry *rt, struct sockaddr *nam)
1537 struct l2t_entry *e;
1538 struct tom_data *d = TOM_DATA(tdev);
1539 struct inpcb *inp = so_sotoinpcb(so);
1540 struct tcpcb *tp = intotcpcb(inp);
1541 struct toepcb *toep; /* allocated by init_offload_socket */
1545 toep = toepcb_alloc();
1549 if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0)
1552 e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam);
1556 inp_lock_assert(inp);
1557 m = m_gethdr(MT_DATA, M_WAITOK);
1560 m->m_toe.mt_toepcb = tp->t_toe;
1561 set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure);
1565 init_offload_socket(so, tdev, atid, e, rt, toep);
1567 install_offload_ops(so);
1569 mk_act_open_req(so, m, atid, e);
1574 m_set_toep(m, tp->t_toe);
1576 toep->tp_state = TCPS_SYN_SENT;
1577 l2t_send(d->cdev, (struct mbuf *)m, e);
1579 if (toep->tp_ulp_mode)
1580 t3_enable_ddp(toep, 0);
1584 printf("failing connect - free atid\n");
1586 free_atid(d->cdev, atid);
1588 printf("return ENOMEM\n");
1593 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do
1594 * not send multiple ABORT_REQs for the same connection and also that we do
1595 * not try to send a message after the connection has closed. Returns 1 if
1596 * an ABORT_REQ wasn't generated after all, 0 otherwise.
1599 t3_send_reset(struct toepcb *toep)
1602 struct cpl_abort_req *req;
1603 unsigned int tid = toep->tp_tid;
1604 int mode = CPL_ABORT_SEND_RST;
1605 struct tcpcb *tp = toep->tp_tp;
1606 struct toedev *tdev = toep->tp_toedev;
1607 struct socket *so = NULL;
1609 struct sockbuf *snd;
1612 inp_lock_assert(tp->t_inpcb);
1613 so = inp_inpcbtosocket(tp->t_inpcb);
1616 if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) ||
1619 toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN);
1621 snd = so_sockbuf_snd(so);
1622 /* Purge the send queue so we don't send anything after an abort. */
1625 if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev))
1626 mode |= CPL_ABORT_POST_CLOSE_REQ;
1628 m = m_gethdr_nofail(sizeof(*req));
1629 m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep));
1630 set_arp_failure_handler(m, abort_arp_failure);
1632 req = mtod(m, struct cpl_abort_req *);
1633 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
1634 req->wr.wr_lo = htonl(V_WR_TID(tid));
1635 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
1636 req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0;
1637 req->rsvd1 = !(toep->tp_flags & TP_DATASENT);
1639 if (tp && (tp->t_state == TCPS_SYN_SENT))
1640 mbufq_tail(&toep->out_of_order_queue, m); // defer
1642 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t);
1646 t3_ip_ctloutput(struct socket *so, struct sockopt *sopt)
1651 if (sopt->sopt_name == IP_OPTIONS)
1652 return (ENOPROTOOPT);
1654 if (sopt->sopt_name != IP_TOS)
1655 return (EOPNOTSUPP);
1657 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
1662 if (optval > IPTOS_PREC_CRITIC_ECP && !suser(curthread))
1665 inp = so_sotoinpcb(so);
1667 inp_ip_tos_set(inp, optval);
1669 inp->inp_ip_tos = optval;
1671 t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe);
1678 t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt)
1683 if (sopt->sopt_name != TCP_CONGESTION &&
1684 sopt->sopt_name != TCP_NODELAY)
1685 return (EOPNOTSUPP);
1687 if (sopt->sopt_name == TCP_CONGESTION) {
1688 char name[TCP_CA_NAME_MAX];
1689 int optlen = sopt->sopt_valsize;
1692 if (sopt->sopt_dir == SOPT_GET) {
1693 KASSERT(0, ("unimplemented"));
1694 return (EOPNOTSUPP);
1700 err = copyinstr(sopt->sopt_val, name,
1701 min(TCP_CA_NAME_MAX - 1, optlen), &copied);
1707 tp = so_sototcpcb(so);
1709 * XXX I need to revisit this
1711 if ((err = t3_set_cong_control(so, name)) == 0) {
1712 #ifdef CONGESTION_CONTROL_SUPPORTED
1713 tp->t_cong_control = strdup(name, M_CXGB);
1722 if (sopt->sopt_dir == SOPT_GET)
1723 return (EOPNOTSUPP);
1725 err = sooptcopyin(sopt, &optval, sizeof optval,
1731 inp = so_sotoinpcb(so);
1732 tp = inp_inpcbtotcpcb(inp);
1736 oldval = tp->t_flags;
1738 tp->t_flags |= TF_NODELAY;
1740 tp->t_flags &= ~TF_NODELAY;
1744 if (oldval != tp->t_flags && (tp->t_toe != NULL))
1745 t3_set_nagle(tp->t_toe);
1753 t3_ctloutput(struct socket *so, struct sockopt *sopt)
1757 if (sopt->sopt_level != IPPROTO_TCP)
1758 err = t3_ip_ctloutput(so, sopt);
1760 err = t3_tcp_ctloutput(so, sopt);
1762 if (err != EOPNOTSUPP)
1765 return (tcp_ctloutput(so, sopt));
1769 * Returns true if we need to explicitly request RST when we receive new data
1770 * on an RX-closed connection.
1773 need_rst_on_excess_rx(const struct toepcb *toep)
1779 * Handles Rx data that arrives in a state where the socket isn't accepting
1783 handle_excess_rx(struct toepcb *toep, struct mbuf *m)
1786 if (need_rst_on_excess_rx(toep) &&
1787 !(toep->tp_flags & TP_ABORT_SHUTDOWN))
1788 t3_send_reset(toep);
1793 * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE)
1794 * by getting the DDP offset from the TCB.
1797 tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m)
1799 struct ddp_state *q = &toep->tp_ddp_state;
1800 struct ddp_buf_state *bsp;
1801 struct cpl_get_tcb_rpl *hdr;
1802 unsigned int ddp_offset;
1805 struct sockbuf *rcv;
1812 so = inp_inpcbtosocket(tp->t_inpcb);
1814 inp_lock_assert(tp->t_inpcb);
1815 rcv = so_sockbuf_rcv(so);
1818 /* Note that we only accout for CPL_GET_TCB issued by the DDP code.
1819 * We really need a cookie in order to dispatch the RPLs.
1823 /* It is a possible that a previous CPL already invalidated UBUF DDP
1824 * and moved the cur_buf idx and hence no further processing of this
1825 * skb is required. However, the app might be sleeping on
1826 * !q->get_tcb_count and we need to wake it up.
1828 if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) {
1829 int state = so_state_get(so);
1832 if (__predict_true((state & SS_NOFDREF) == 0))
1833 so_sorwakeup_locked(so);
1835 sockbuf_unlock(rcv);
1840 bsp = &q->buf_state[q->cur_buf];
1842 tcb = (__be64 *)(hdr + 1);
1843 if (q->cur_buf == 0) {
1844 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]);
1845 ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET);
1847 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]);
1848 ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET;
1850 ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET;
1851 m->m_cur_offset = bsp->cur_offset;
1852 bsp->cur_offset = ddp_offset;
1853 m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset;
1856 "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u",
1857 q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset);
1858 KASSERT(ddp_offset >= m->m_cur_offset,
1859 ("ddp_offset=%u less than cur_offset=%u",
1860 ddp_offset, m->m_cur_offset));
1864 unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx;
1866 t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]);
1867 ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS;
1869 t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]);
1870 rcv_nxt = t >> S_TCB_RCV_NXT;
1871 rcv_nxt &= M_TCB_RCV_NXT;
1873 t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]);
1874 rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET);
1875 rx_hdr_offset &= M_TCB_RX_HDR_OFFSET;
1877 T3_TRACE2(TIDTB(sk),
1878 "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x",
1879 ddp_flags, rcv_nxt - rx_hdr_offset);
1881 "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u",
1882 tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf);
1884 "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u",
1885 rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset);
1887 "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x",
1888 q->buf_state[0].flags, q->buf_state[1].flags);
1892 if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) {
1893 handle_excess_rx(toep, m);
1898 if ((int)m->m_pkthdr.len < 0) {
1899 t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len");
1902 if (bsp->flags & DDP_BF_NOCOPY) {
1905 "tcb_rpl_as_ddp_complete: CANCEL UBUF");
1907 if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1908 printk("!cancel_ubuf");
1909 t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf");
1912 m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1;
1913 bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA);
1915 } else if (bsp->flags & DDP_BF_NOFLIP) {
1917 m->m_ddp_flags = 1; /* always a kernel buffer */
1919 /* now HW buffer carries a user buffer */
1920 bsp->flags &= ~DDP_BF_NOFLIP;
1921 bsp->flags |= DDP_BF_NOCOPY;
1923 /* It is possible that the CPL_GET_TCB_RPL doesn't indicate
1924 * any new data in which case we're done. If in addition the
1925 * offset is 0, then there wasn't a completion for the kbuf
1926 * and we need to decrement the posted count.
1928 if (m->m_pkthdr.len == 0) {
1929 if (ddp_offset == 0) {
1931 bsp->flags |= DDP_BF_NODATA;
1933 sockbuf_unlock(rcv);
1938 sockbuf_unlock(rcv);
1940 /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP,
1941 * but it got here way late and nobody cares anymore.
1947 m->m_ddp_gl = (unsigned char *)bsp->gl;
1948 m->m_flags |= M_DDP;
1949 m->m_seq = tp->rcv_nxt;
1950 tp->rcv_nxt += m->m_pkthdr.len;
1951 tp->t_rcvtime = ticks;
1952 CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u",
1953 m->m_seq, q->cur_buf, m->m_pkthdr.len);
1954 if (m->m_pkthdr.len == 0) {
1955 q->user_ddp_pending = 0;
1960 state = so_state_get(so);
1961 if (__predict_true((state & SS_NOFDREF) == 0))
1962 so_sorwakeup_locked(so);
1964 sockbuf_unlock(rcv);
1968 * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code,
1969 * in that case they are similar to DDP completions.
1972 do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
1974 struct toepcb *toep = (struct toepcb *)ctx;
1976 /* OK if socket doesn't exist */
1978 printf("null toep in do_get_tcb_rpl\n");
1979 return (CPL_RET_BUF_DONE);
1982 inp_wlock(toep->tp_tp->t_inpcb);
1983 tcb_rpl_as_ddp_complete(toep, m);
1984 inp_wunlock(toep->tp_tp->t_inpcb);
1990 handle_ddp_data(struct toepcb *toep, struct mbuf *m)
1992 struct tcpcb *tp = toep->tp_tp;
1994 struct ddp_state *q;
1995 struct ddp_buf_state *bsp;
1996 struct cpl_rx_data *hdr = cplhdr(m);
1997 unsigned int rcv_nxt = ntohl(hdr->seq);
1998 struct sockbuf *rcv;
2000 if (tp->rcv_nxt == rcv_nxt)
2003 inp_lock_assert(tp->t_inpcb);
2004 so = inp_inpcbtosocket(tp->t_inpcb);
2005 rcv = so_sockbuf_rcv(so);
2008 q = &toep->tp_ddp_state;
2009 bsp = &q->buf_state[q->cur_buf];
2010 KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x",
2011 rcv_nxt, tp->rcv_nxt));
2012 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
2013 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2014 CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d",
2015 rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len);
2018 if ((int)m->m_pkthdr.len < 0) {
2019 t3_ddp_error(so, "handle_ddp_data: neg len");
2022 m->m_ddp_gl = (unsigned char *)bsp->gl;
2023 m->m_flags |= M_DDP;
2024 m->m_cur_offset = bsp->cur_offset;
2025 m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
2026 if (bsp->flags & DDP_BF_NOCOPY)
2027 bsp->flags &= ~DDP_BF_NOCOPY;
2029 m->m_seq = tp->rcv_nxt;
2030 tp->rcv_nxt = rcv_nxt;
2031 bsp->cur_offset += m->m_pkthdr.len;
2032 if (!(bsp->flags & DDP_BF_NOFLIP))
2035 * For now, don't re-enable DDP after a connection fell out of DDP
2038 q->ubuf_ddp_ready = 0;
2039 sockbuf_unlock(rcv);
2043 * Process new data received for a connection.
2046 new_rx_data(struct toepcb *toep, struct mbuf *m)
2048 struct cpl_rx_data *hdr = cplhdr(m);
2049 struct tcpcb *tp = toep->tp_tp;
2051 struct sockbuf *rcv;
2053 int len = be16toh(hdr->len);
2055 inp_wlock(tp->t_inpcb);
2057 so = inp_inpcbtosocket(tp->t_inpcb);
2059 if (__predict_false(so_no_receive(so))) {
2060 handle_excess_rx(toep, m);
2061 inp_wunlock(tp->t_inpcb);
2066 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP)
2067 handle_ddp_data(toep, m);
2069 m->m_seq = ntohl(hdr->seq);
2070 m->m_ulp_mode = 0; /* for iSCSI */
2073 if (__predict_false(m->m_seq != tp->rcv_nxt)) {
2075 "%s: TID %u: Bad sequence number %u, expected %u\n",
2076 toep->tp_toedev->name, toep->tp_tid, m->m_seq,
2079 inp_wunlock(tp->t_inpcb);
2083 m_adj(m, sizeof(*hdr));
2085 #ifdef URGENT_DATA_SUPPORTED
2087 * We don't handle urgent data yet
2089 if (__predict_false(hdr->urg))
2090 handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg));
2091 if (__predict_false(tp->urg_data == TCP_URG_NOTYET &&
2092 tp->urg_seq - tp->rcv_nxt < skb->len))
2093 tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq -
2096 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) {
2097 toep->tp_delack_mode = hdr->dack_mode;
2098 toep->tp_delack_seq = tp->rcv_nxt;
2100 CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d",
2101 m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes);
2103 if (len < m->m_pkthdr.len)
2104 m->m_pkthdr.len = m->m_len = len;
2106 tp->rcv_nxt += m->m_pkthdr.len;
2107 tp->t_rcvtime = ticks;
2108 toep->tp_enqueued_bytes += m->m_pkthdr.len;
2110 "new_rx_data: seq 0x%x len %u",
2111 m->m_seq, m->m_pkthdr.len);
2112 inp_wunlock(tp->t_inpcb);
2113 rcv = so_sockbuf_rcv(so);
2117 DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, rcv->sb_flags, m->m_pkthdr.len);
2123 * We're giving too many credits to the card - but disable this check so we can keep on moving :-|
2126 KASSERT(rcv->sb_cc < (rcv->sb_mbmax << 1),
2128 ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d",
2129 so, rcv->sb_cc, rcv->sb_mbmax));
2133 CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d",
2134 rcv->sb_cc, rcv->sb_mbcnt);
2136 state = so_state_get(so);
2137 if (__predict_true((state & SS_NOFDREF) == 0))
2138 so_sorwakeup_locked(so);
2140 sockbuf_unlock(rcv);
2144 * Handler for RX_DATA CPL messages.
2147 do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2149 struct toepcb *toep = (struct toepcb *)ctx;
2151 DPRINTF("rx_data len=%d\n", m->m_pkthdr.len);
2153 new_rx_data(toep, m);
2159 new_rx_data_ddp(struct toepcb *toep, struct mbuf *m)
2162 struct ddp_state *q;
2163 struct ddp_buf_state *bsp;
2164 struct cpl_rx_data_ddp *hdr;
2166 unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx;
2168 unsigned int delack_mode;
2169 struct sockbuf *rcv;
2172 inp_wlock(tp->t_inpcb);
2173 so = inp_inpcbtosocket(tp->t_inpcb);
2175 if (__predict_false(so_no_receive(so))) {
2177 handle_excess_rx(toep, m);
2178 inp_wunlock(tp->t_inpcb);
2182 q = &toep->tp_ddp_state;
2184 ddp_report = ntohl(hdr->u.ddp_report);
2185 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
2186 bsp = &q->buf_state[buf_idx];
2189 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u "
2190 "hdr seq 0x%x len %u",
2191 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq),
2194 "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d",
2195 G_DDP_OFFSET(ddp_report), ddp_report, buf_idx);
2197 ddp_len = ntohs(hdr->len);
2198 rcv_nxt = ntohl(hdr->seq) + ddp_len;
2200 delack_mode = G_DDP_DACK_MODE(ddp_report);
2201 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
2202 toep->tp_delack_mode = delack_mode;
2203 toep->tp_delack_seq = tp->rcv_nxt;
2206 m->m_seq = tp->rcv_nxt;
2207 tp->rcv_nxt = rcv_nxt;
2209 tp->t_rcvtime = ticks;
2211 * Store the length in m->m_len. We are changing the meaning of
2212 * m->m_len here, we need to be very careful that nothing from now on
2213 * interprets ->len of this packet the usual way.
2215 m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq;
2216 inp_wunlock(tp->t_inpcb);
2218 "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ",
2219 m->m_len, rcv_nxt, m->m_seq);
2221 * Figure out where the new data was placed in the buffer and store it
2222 * in when. Assumes the buffer offset starts at 0, consumer needs to
2223 * account for page pod's pg_offset.
2225 end_offset = G_DDP_OFFSET(ddp_report) + ddp_len;
2226 m->m_cur_offset = end_offset - m->m_pkthdr.len;
2228 rcv = so_sockbuf_rcv(so);
2231 m->m_ddp_gl = (unsigned char *)bsp->gl;
2232 m->m_flags |= M_DDP;
2233 bsp->cur_offset = end_offset;
2234 toep->tp_enqueued_bytes += m->m_pkthdr.len;
2237 * Length is only meaningful for kbuf
2239 if (!(bsp->flags & DDP_BF_NOCOPY))
2240 KASSERT(m->m_len <= bsp->gl->dgl_length,
2241 ("length received exceeds ddp pages: len=%d dgl_length=%d",
2242 m->m_len, bsp->gl->dgl_length));
2244 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2245 KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next));
2247 * Bit 0 of flags stores whether the DDP buffer is completed.
2248 * Note that other parts of the code depend on this being in bit 0.
2250 if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) {
2251 panic("spurious ddp completion");
2253 m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
2254 if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP))
2255 q->cur_buf ^= 1; /* flip buffers */
2258 if (bsp->flags & DDP_BF_NOCOPY) {
2259 m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY);
2260 bsp->flags &= ~DDP_BF_NOCOPY;
2263 if (ddp_report & F_DDP_PSH)
2264 m->m_ddp_flags |= DDP_BF_PSH;
2266 m->m_ddp_flags |= DDP_BF_NODATA;
2269 skb_reset_transport_header(skb);
2270 tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */
2274 if ((so_state_get(so) & SS_NOFDREF) == 0 && ((ddp_report & F_DDP_PSH) ||
2275 (((m->m_ddp_flags & (DDP_BF_NOCOPY|1)) == (DDP_BF_NOCOPY|1))
2276 || !(m->m_ddp_flags & DDP_BF_NOCOPY))))
2277 so_sorwakeup_locked(so);
2279 sockbuf_unlock(rcv);
2282 #define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\
2283 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\
2284 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\
2288 * Handler for RX_DATA_DDP CPL messages.
2291 do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2293 struct toepcb *toep = ctx;
2294 const struct cpl_rx_data_ddp *hdr = cplhdr(m);
2298 if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) {
2299 log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n",
2300 GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status)));
2301 return (CPL_RET_BUF_DONE);
2304 skb->h.th = tcphdr_skb->h.th;
2306 new_rx_data_ddp(toep, m);
2311 process_ddp_complete(struct toepcb *toep, struct mbuf *m)
2313 struct tcpcb *tp = toep->tp_tp;
2315 struct ddp_state *q;
2316 struct ddp_buf_state *bsp;
2317 struct cpl_rx_ddp_complete *hdr;
2318 unsigned int ddp_report, buf_idx, when, delack_mode;
2320 struct sockbuf *rcv;
2322 inp_wlock(tp->t_inpcb);
2323 so = inp_inpcbtosocket(tp->t_inpcb);
2325 if (__predict_false(so_no_receive(so))) {
2326 struct inpcb *inp = so_sotoinpcb(so);
2328 handle_excess_rx(toep, m);
2332 q = &toep->tp_ddp_state;
2334 ddp_report = ntohl(hdr->ddp_report);
2335 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
2336 m->m_pkthdr.csum_data = tp->rcv_nxt;
2338 rcv = so_sockbuf_rcv(so);
2341 bsp = &q->buf_state[buf_idx];
2342 when = bsp->cur_offset;
2343 m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when;
2344 tp->rcv_nxt += m->m_len;
2345 tp->t_rcvtime = ticks;
2347 delack_mode = G_DDP_DACK_MODE(ddp_report);
2348 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
2349 toep->tp_delack_mode = delack_mode;
2350 toep->tp_delack_seq = tp->rcv_nxt;
2353 skb_reset_transport_header(skb);
2354 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
2356 inp_wunlock(tp->t_inpcb);
2358 KASSERT(m->m_len >= 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2360 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
2361 "ddp_report 0x%x offset %u, len %u",
2362 tp->rcv_nxt, bsp->cur_offset, ddp_report,
2363 G_DDP_OFFSET(ddp_report), m->m_len);
2365 m->m_cur_offset = bsp->cur_offset;
2366 bsp->cur_offset += m->m_len;
2368 if (!(bsp->flags & DDP_BF_NOFLIP)) {
2369 q->cur_buf ^= 1; /* flip buffers */
2370 if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length)
2375 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
2376 "ddp_report %u offset %u",
2377 tp->rcv_nxt, bsp->cur_offset, ddp_report,
2378 G_DDP_OFFSET(ddp_report));
2380 m->m_ddp_gl = (unsigned char *)bsp->gl;
2381 m->m_flags |= M_DDP;
2382 m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1;
2383 if (bsp->flags & DDP_BF_NOCOPY)
2384 bsp->flags &= ~DDP_BF_NOCOPY;
2386 m->m_ddp_flags |= DDP_BF_NODATA;
2389 if ((so_state_get(so) & SS_NOFDREF) == 0)
2390 so_sorwakeup_locked(so);
2392 sockbuf_unlock(rcv);
2396 * Handler for RX_DDP_COMPLETE CPL messages.
2399 do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2401 struct toepcb *toep = ctx;
2405 skb->h.th = tcphdr_skb->h.th;
2407 process_ddp_complete(toep, m);
2412 * Move a socket to TIME_WAIT state. We need to make some adjustments to the
2413 * socket state before calling tcp_time_wait to comply with its expectations.
2416 enter_timewait(struct tcpcb *tp)
2419 * Bump rcv_nxt for the peer FIN. We don't do this at the time we
2420 * process peer_close because we don't want to carry the peer FIN in
2421 * the socket's receive queue and if we increment rcv_nxt without
2422 * having the FIN in the receive queue we'll confuse facilities such
2425 inp_wlock(tp->t_inpcb);
2428 tp->ts_recent_age = 0; /* defeat recycling */
2429 tp->t_srtt = 0; /* defeat tcp_update_metrics */
2430 inp_wunlock(tp->t_inpcb);
2431 tcp_offload_twstart(tp);
2435 * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This
2436 * function deals with the data that may be reported along with the FIN.
2437 * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to
2438 * perform normal FIN-related processing. In the latter case 1 indicates that
2439 * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the
2443 handle_peer_close_data(struct socket *so, struct mbuf *m)
2445 struct tcpcb *tp = so_sototcpcb(so);
2446 struct toepcb *toep = tp->t_toe;
2447 struct ddp_state *q;
2448 struct ddp_buf_state *bsp;
2449 struct cpl_peer_close *req = cplhdr(m);
2450 unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */
2451 struct sockbuf *rcv;
2453 if (tp->rcv_nxt == rcv_nxt) /* no data */
2456 CTR0(KTR_TOM, "handle_peer_close_data");
2457 if (__predict_false(so_no_receive(so))) {
2458 handle_excess_rx(toep, m);
2461 * Although we discard the data we want to process the FIN so
2462 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP +
2463 * PEER_CLOSE without data. In particular this PEER_CLOSE
2464 * may be what will close the connection. We return 1 because
2465 * handle_excess_rx() already freed the packet.
2470 inp_lock_assert(tp->t_inpcb);
2471 q = &toep->tp_ddp_state;
2472 rcv = so_sockbuf_rcv(so);
2475 bsp = &q->buf_state[q->cur_buf];
2476 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
2477 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
2478 m->m_ddp_gl = (unsigned char *)bsp->gl;
2479 m->m_flags |= M_DDP;
2480 m->m_cur_offset = bsp->cur_offset;
2482 DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
2483 m->m_seq = tp->rcv_nxt;
2484 tp->rcv_nxt = rcv_nxt;
2485 bsp->cur_offset += m->m_pkthdr.len;
2486 if (!(bsp->flags & DDP_BF_NOFLIP))
2489 skb_reset_transport_header(skb);
2490 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
2492 tp->t_rcvtime = ticks;
2494 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
2495 so_sorwakeup_locked(so);
2497 sockbuf_unlock(rcv);
2503 * Handle a peer FIN.
2506 do_peer_fin(struct toepcb *toep, struct mbuf *m)
2509 struct tcpcb *tp = toep->tp_tp;
2513 CTR1(KTR_TOM, "do_peer_fin state=%d", tp->t_state);
2514 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
2515 printf("abort_pending set\n");
2519 inp_wlock(tp->t_inpcb);
2520 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
2521 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) {
2522 keep = handle_peer_close_data(so, m);
2524 inp_wunlock(tp->t_inpcb);
2528 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
2530 "waking up waiters for cantrcvmore on %p ", so);
2534 * If connection is half-synchronized
2535 * (ie NEEDSYN flag on) then delay ACK,
2536 * so it may be piggybacked when SYN is sent.
2537 * Otherwise, since we received a FIN then no
2538 * more input can be expected, send ACK now.
2540 if (tp->t_flags & TF_NEEDSYN)
2541 tp->t_flags |= TF_DELACK;
2543 tp->t_flags |= TF_ACKNOW;
2547 switch (tp->t_state) {
2548 case TCPS_SYN_RECEIVED:
2549 tp->t_starttime = ticks;
2551 case TCPS_ESTABLISHED:
2552 tp->t_state = TCPS_CLOSE_WAIT;
2554 case TCPS_FIN_WAIT_1:
2555 tp->t_state = TCPS_CLOSING;
2557 case TCPS_FIN_WAIT_2:
2559 * If we've sent an abort_req we must have sent it too late,
2560 * HW will send us a reply telling us so, and this peer_close
2561 * is really the last message for this connection and needs to
2562 * be treated as an abort_rpl, i.e., transition the connection
2563 * to TCP_CLOSE (note that the host stack does this at the
2564 * time of generating the RST but we must wait for HW).
2565 * Otherwise we enter TIME_WAIT.
2567 t3_release_offload_resources(toep);
2568 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
2571 action = TCP_TIMEWAIT;
2576 "%s: TID %u received PEER_CLOSE in bad state %d\n",
2577 toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state);
2579 inp_wunlock(tp->t_inpcb);
2581 if (action == TCP_TIMEWAIT) {
2583 } else if (action == TCP_DROP) {
2584 tcp_offload_drop(tp, 0);
2585 } else if (action == TCP_CLOSE) {
2586 tcp_offload_close(tp);
2590 /* Do not send POLL_HUP for half duplex close. */
2591 if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
2592 sk->sk_state == TCP_CLOSE)
2593 sk_wake_async(so, 1, POLL_HUP);
2595 sk_wake_async(so, 1, POLL_IN);
2604 * Handler for PEER_CLOSE CPL messages.
2607 do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2609 struct toepcb *toep = (struct toepcb *)ctx;
2613 do_peer_fin(toep, m);
2618 process_close_con_rpl(struct toepcb *toep, struct mbuf *m)
2620 struct cpl_close_con_rpl *rpl = cplhdr(m);
2621 struct tcpcb *tp = toep->tp_tp;
2624 struct sockbuf *rcv;
2626 inp_wlock(tp->t_inpcb);
2627 so = inp_inpcbtosocket(tp->t_inpcb);
2629 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */
2631 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
2632 inp_wunlock(tp->t_inpcb);
2636 CTR3(KTR_TOM, "process_close_con_rpl(%p) state=%d dead=%d", toep,
2637 tp->t_state, !!(so_state_get(so) & SS_NOFDREF));
2639 switch (tp->t_state) {
2640 case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */
2641 t3_release_offload_resources(toep);
2642 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
2646 action = TCP_TIMEWAIT;
2651 * In this state we don't care about pending abort_rpl.
2652 * If we've sent abort_req it was post-close and was sent too
2653 * late, this close_con_rpl is the actual last message.
2655 t3_release_offload_resources(toep);
2658 case TCPS_FIN_WAIT_1:
2660 * If we can't receive any more
2661 * data, then closing user can proceed.
2662 * Starting the timer is contrary to the
2663 * specification, but if we don't get a FIN
2664 * we'll hang forever.
2667 * we should release the tp also, and use a
2671 rcv = so_sockbuf_rcv(so);
2675 if (rcv->sb_state & SBS_CANTRCVMORE) {
2679 soisdisconnected(so);
2680 timeout = (tcp_fast_finwait2_recycle) ?
2681 tcp_finwait2_timeout : tcp_maxidle;
2682 tcp_timer_activate(tp, TT_2MSL, timeout);
2684 tp->t_state = TCPS_FIN_WAIT_2;
2685 if ((so_options_get(so) & SO_LINGER) && so_linger_get(so) == 0 &&
2686 (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) {
2693 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
2694 toep->tp_toedev->tod_name, toep->tp_tid,
2697 inp_wunlock(tp->t_inpcb);
2700 if (action == TCP_TIMEWAIT) {
2702 } else if (action == TCP_DROP) {
2703 tcp_offload_drop(tp, 0);
2704 } else if (action == TCP_CLOSE) {
2705 tcp_offload_close(tp);
2712 * Handler for CLOSE_CON_RPL CPL messages.
2715 do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m,
2718 struct toepcb *toep = (struct toepcb *)ctx;
2720 process_close_con_rpl(toep, m);
2725 * Process abort replies. We only process these messages if we anticipate
2726 * them as the coordination between SW and HW in this area is somewhat lacking
2727 * and sometimes we get ABORT_RPLs after we are done with the connection that
2728 * originated the ABORT_REQ.
2731 process_abort_rpl(struct toepcb *toep, struct mbuf *m)
2733 struct tcpcb *tp = toep->tp_tp;
2738 T3_TRACE1(TIDTB(sk),
2739 "process_abort_rpl: GTS rpl pending %d",
2740 sock_flag(sk, ABORT_RPL_PENDING));
2743 inp_wlock(tp->t_inpcb);
2744 so = inp_inpcbtosocket(tp->t_inpcb);
2746 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
2748 * XXX panic on tcpdrop
2750 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(toep->tp_toedev))
2751 toep->tp_flags |= TP_ABORT_RPL_RCVD;
2753 toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING);
2754 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) ||
2755 !is_t3a(toep->tp_toedev)) {
2756 if (toep->tp_flags & TP_ABORT_REQ_RCVD)
2757 panic("TP_ABORT_REQ_RCVD set");
2758 t3_release_offload_resources(toep);
2763 inp_wunlock(tp->t_inpcb);
2766 tcp_offload_close(tp);
2772 * Handle an ABORT_RPL_RSS CPL message.
2775 do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
2777 struct cpl_abort_rpl_rss *rpl = cplhdr(m);
2778 struct toepcb *toep;
2781 * Ignore replies to post-close aborts indicating that the abort was
2782 * requested too late. These connections are terminated when we get
2783 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss
2784 * arrives the TID is either no longer used or it has been recycled.
2786 if (rpl->status == CPL_ERR_ABORT_FAILED) {
2792 toep = (struct toepcb *)ctx;
2795 * Sometimes we've already closed the socket, e.g., a post-close
2796 * abort races with ABORT_REQ_RSS, the latter frees the socket
2797 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED,
2798 * but FW turns the ABORT_REQ into a regular one and so we get
2799 * ABORT_RPL_RSS with status 0 and no socket. Only on T3A.
2804 if (toep->tp_tp == NULL) {
2805 log(LOG_NOTICE, "removing tid for abort\n");
2806 cxgb_remove_tid(cdev, toep, toep->tp_tid);
2808 l2t_release(L2DATA(cdev), toep->tp_l2t);
2810 toepcb_release(toep);
2814 log(LOG_NOTICE, "toep=%p\n", toep);
2815 log(LOG_NOTICE, "tp=%p\n", toep->tp_tp);
2818 process_abort_rpl(toep, m);
2819 toepcb_release(toep);
2824 * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also
2825 * indicate whether RST should be sent in response.
2828 abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst)
2830 struct tcpcb *tp = so_sototcpcb(so);
2832 switch (abort_reason) {
2833 case CPL_ERR_BAD_SYN:
2835 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through
2837 case CPL_ERR_CONN_RESET:
2838 // XXX need to handle SYN_RECV due to crossed SYNs
2839 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
2840 case CPL_ERR_XMIT_TIMEDOUT:
2841 case CPL_ERR_PERSIST_TIMEDOUT:
2842 case CPL_ERR_FINWAIT2_TIMEDOUT:
2843 case CPL_ERR_KEEPALIVE_TIMEDOUT:
2845 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
2854 set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd)
2856 struct cpl_abort_rpl *rpl = cplhdr(m);
2858 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
2859 rpl->wr.wr_lo = htonl(V_WR_TID(tid));
2860 m->m_len = m->m_pkthdr.len = sizeof(*rpl);
2862 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
2867 send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m)
2869 struct mbuf *reply_mbuf;
2870 struct cpl_abort_req_rss *req = cplhdr(m);
2872 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl));
2873 m_set_priority(m, CPL_PRIORITY_DATA);
2874 m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl);
2875 set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status);
2876 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
2881 * Returns whether an ABORT_REQ_RSS message is a negative advice.
2884 is_neg_adv_abort(unsigned int status)
2886 return status == CPL_ERR_RTX_NEG_ADVICE ||
2887 status == CPL_ERR_PERSIST_NEG_ADVICE;
2891 send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status)
2893 struct mbuf *reply_mbuf;
2894 struct cpl_abort_req_rss *req = cplhdr(m);
2896 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
2899 /* Defer the reply. Stick rst_status into req->cmd. */
2900 req->status = rst_status;
2901 t3_defer_reply(m, tdev, send_deferred_abort_rpl);
2905 m_set_priority(reply_mbuf, CPL_PRIORITY_DATA);
2906 set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status);
2910 * XXX need to sync with ARP as for SYN_RECV connections we can send
2911 * these messages while ARP is pending. For other connection states
2912 * it's not a problem.
2914 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
2919 cleanup_syn_rcv_conn(struct socket *child, struct socket *parent)
2921 CXGB_UNIMPLEMENTED();
2923 struct request_sock *req = child->sk_user_data;
2925 inet_csk_reqsk_queue_removed(parent, req);
2926 synq_remove(tcp_sk(child));
2928 child->sk_user_data = NULL;
2934 * Performs the actual work to abort a SYN_RECV connection.
2937 do_abort_syn_rcv(struct socket *child, struct socket *parent)
2939 struct tcpcb *parenttp = so_sototcpcb(parent);
2940 struct tcpcb *childtp = so_sototcpcb(child);
2943 * If the server is still open we clean up the child connection,
2944 * otherwise the server already did the clean up as it was purging
2945 * its SYN queue and the skb was just sitting in its backlog.
2947 if (__predict_false(parenttp->t_state == TCPS_LISTEN)) {
2948 cleanup_syn_rcv_conn(child, parent);
2949 inp_wlock(childtp->t_inpcb);
2950 t3_release_offload_resources(childtp->t_toe);
2951 inp_wunlock(childtp->t_inpcb);
2952 tcp_offload_close(childtp);
2958 * Handle abort requests for a SYN_RECV connection. These need extra work
2959 * because the socket is on its parent's SYN queue.
2962 abort_syn_rcv(struct socket *so, struct mbuf *m)
2964 CXGB_UNIMPLEMENTED();
2966 struct socket *parent;
2967 struct toedev *tdev = toep->tp_toedev;
2968 struct t3cdev *cdev = TOM_DATA(tdev)->cdev;
2969 struct socket *oreq = so->so_incomp;
2970 struct t3c_tid_entry *t3c_stid;
2974 return -1; /* somehow we are not on the SYN queue */
2976 t = &(T3C_DATA(cdev))->tid_maps;
2977 t3c_stid = lookup_stid(t, oreq->ts_recent);
2978 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
2981 do_abort_syn_rcv(so, parent);
2982 send_abort_rpl(m, tdev, CPL_ABORT_NO_RST);
2989 * Process abort requests. If we are waiting for an ABORT_RPL we ignore this
2990 * request except that we need to reply to it.
2993 process_abort_req(struct toepcb *toep, struct mbuf *m, struct toedev *tdev)
2995 int rst_status = CPL_ABORT_NO_RST;
2996 const struct cpl_abort_req_rss *req = cplhdr(m);
2997 struct tcpcb *tp = toep->tp_tp;
3001 inp_wlock(tp->t_inpcb);
3002 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
3003 if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) {
3004 toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN);
3009 toep->tp_flags &= ~TP_ABORT_REQ_RCVD;
3011 * Three cases to consider:
3012 * a) We haven't sent an abort_req; close the connection.
3013 * b) We have sent a post-close abort_req that will get to TP too late
3014 * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will
3015 * be ignored and the connection should be closed now.
3016 * c) We have sent a regular abort_req that will get to TP too late.
3017 * That will generate an abort_rpl with status 0, wait for it.
3019 if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) ||
3020 (is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) {
3023 error = abort_status_to_errno(so, req->status,
3025 so_error_set(so, error);
3027 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
3030 * SYN_RECV needs special processing. If abort_syn_rcv()
3031 * returns 0 is has taken care of the abort.
3033 if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m))
3036 t3_release_offload_resources(toep);
3039 inp_wunlock(tp->t_inpcb);
3042 tcp_offload_close(tp);
3044 send_abort_rpl(m, tdev, rst_status);
3047 inp_wunlock(tp->t_inpcb);
3051 * Handle an ABORT_REQ_RSS CPL message.
3054 do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3056 const struct cpl_abort_req_rss *req = cplhdr(m);
3057 struct toepcb *toep = (struct toepcb *)ctx;
3059 if (is_neg_adv_abort(req->status)) {
3064 log(LOG_NOTICE, "aborting tid=%d\n", toep->tp_tid);
3066 if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) {
3067 cxgb_remove_tid(cdev, toep, toep->tp_tid);
3068 toep->tp_flags |= TP_ABORT_REQ_RCVD;
3070 send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST);
3072 l2t_release(L2DATA(cdev), toep->tp_l2t);
3077 toep->tp_tp->t_toe = NULL;
3078 toep->tp_tp->t_flags &= ~TF_TOE;
3081 * XXX need to call syncache_chkrst - but we don't
3082 * have a way of doing that yet
3084 toepcb_release(toep);
3085 log(LOG_ERR, "abort for unestablished connection :-(\n");
3088 if (toep->tp_tp == NULL) {
3089 log(LOG_NOTICE, "disconnected toepcb\n");
3090 /* should be freed momentarily */
3096 process_abort_req(toep, m, toep->tp_toedev);
3097 toepcb_release(toep);
3102 pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m)
3104 struct toedev *tdev = TOE_DEV(parent);
3106 do_abort_syn_rcv(child, parent);
3107 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) {
3108 struct cpl_pass_accept_rpl *rpl = cplhdr(m);
3110 rpl->opt0h = htonl(F_TCAM_BYPASS);
3111 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
3112 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
3118 handle_pass_open_arp_failure(struct socket *so, struct mbuf *m)
3120 CXGB_UNIMPLEMENTED();
3123 struct t3cdev *cdev;
3124 struct socket *parent;
3125 struct socket *oreq;
3126 struct t3c_tid_entry *t3c_stid;
3128 struct tcpcb *otp, *tp = so_sototcpcb(so);
3129 struct toepcb *toep = tp->t_toe;
3132 * If the connection is being aborted due to the parent listening
3133 * socket going away there's nothing to do, the ABORT_REQ will close
3136 if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
3141 oreq = so->so_incomp;
3142 otp = so_sototcpcb(oreq);
3145 t = &(T3C_DATA(cdev))->tid_maps;
3146 t3c_stid = lookup_stid(t, otp->ts_recent);
3147 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
3150 pass_open_abort(so, parent, m);
3156 * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly
3157 * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV
3161 pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m)
3165 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
3166 BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk);
3168 handle_pass_open_arp_failure(m_get_socket(m), m);
3172 * Populate a reject CPL_PASS_ACCEPT_RPL WR.
3175 mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf)
3177 struct cpl_pass_accept_req *req = cplhdr(req_mbuf);
3178 struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf);
3179 unsigned int tid = GET_TID(req);
3181 m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP);
3182 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3183 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
3184 rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet
3185 rpl->opt0h = htonl(F_TCAM_BYPASS);
3186 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
3188 rpl->rsvd = rpl->opt2; /* workaround for HW bug */
3192 * Send a deferred reject to an accept request.
3195 reject_pass_request(struct toedev *tdev, struct mbuf *m)
3197 struct mbuf *reply_mbuf;
3199 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl));
3200 mk_pass_accept_rpl(reply_mbuf, m);
3201 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
3206 handle_syncache_event(int event, void *arg)
3208 struct toepcb *toep = arg;
3211 case TOE_SC_ENTRY_PRESENT:
3213 * entry already exists - free toepcb
3216 printf("syncache entry present\n");
3217 toepcb_release(toep);
3221 * The syncache has given up on this entry
3222 * either it timed out, or it was evicted
3223 * we need to explicitly release the tid
3225 printf("syncache entry dropped\n");
3226 toepcb_release(toep);
3229 log(LOG_ERR, "unknown syncache event %d\n", event);
3235 syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep)
3237 struct in_conninfo inc;
3241 int mss, wsf, sack, ts;
3242 uint32_t rcv_isn = ntohl(req->rcv_isn);
3244 bzero(&to, sizeof(struct tcpopt));
3245 inp = so_sotoinpcb(lso);
3248 * Fill out information for entering us into the syncache
3250 inc.inc_fport = th.th_sport = req->peer_port;
3251 inc.inc_lport = th.th_dport = req->local_port;
3252 th.th_seq = req->rcv_isn;
3253 th.th_flags = TH_SYN;
3255 toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1;
3260 inc.inc_faddr.s_addr = req->peer_ip;
3261 inc.inc_laddr.s_addr = req->local_ip;
3263 DPRINTF("syncache add of %d:%d %d:%d\n",
3264 ntohl(req->local_ip), ntohs(req->local_port),
3265 ntohl(req->peer_ip), ntohs(req->peer_port));
3267 mss = req->tcp_options.mss;
3268 wsf = req->tcp_options.wsf;
3269 ts = req->tcp_options.tstamp;
3270 sack = req->tcp_options.sack;
3273 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
3274 tcp_offload_syncache_add(&inc, &to, &th, inp, &lso, &cxgb_toe_usrreqs, toep);
3279 * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket
3280 * lock held. Note that the sock here is a listening socket that is not owned
3284 process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev,
3285 struct listen_ctx *lctx)
3288 struct l2t_entry *e;
3290 struct mbuf *reply_mbuf, *ddp_mbuf = NULL;
3291 struct cpl_pass_accept_rpl *rpl;
3292 struct cpl_pass_accept_req *req = cplhdr(m);
3293 unsigned int tid = GET_TID(req);
3294 struct tom_data *d = TOM_DATA(tdev);
3295 struct t3cdev *cdev = d->cdev;
3296 struct tcpcb *tp = so_sototcpcb(so);
3297 struct toepcb *newtoep;
3298 struct rtentry *dst;
3299 struct sockaddr_in nam;
3300 struct t3c_data *td = T3C_DATA(cdev);
3302 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
3303 if (__predict_false(reply_mbuf == NULL)) {
3304 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
3305 t3_defer_reply(m, tdev, reject_pass_request);
3307 cxgb_queue_tid_release(cdev, tid);
3310 DPRINTF("failed to get reply_mbuf\n");
3315 if (tp->t_state != TCPS_LISTEN) {
3316 DPRINTF("socket not in listen state\n");
3321 tim.mac_addr = req->dst_mac;
3322 tim.vlan_tag = ntohs(req->vlan_tag);
3323 if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
3324 DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n");
3330 * XXX do route lookup to confirm that we're still listening on this
3333 if (ip_route_input(skb, req->local_ip, req->peer_ip,
3334 G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev))
3336 rt_flags = ((struct rtable *)skb->dst)->rt_flags &
3337 (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL);
3338 dst_release(skb->dst); // done with the input route, release it
3341 if ((rt_flags & RTF_LOCAL) == 0)
3347 rt_flags = RTF_LOCAL;
3348 if ((rt_flags & RTF_LOCAL) == 0)
3352 * Calculate values and add to syncache
3355 newtoep = toepcb_alloc();
3356 if (newtoep == NULL)
3359 bzero(&nam, sizeof(struct sockaddr_in));
3361 nam.sin_len = sizeof(struct sockaddr_in);
3362 nam.sin_family = AF_INET;
3363 nam.sin_addr.s_addr =req->peer_ip;
3364 dst = rtalloc2((struct sockaddr *)&nam, 1, 0);
3367 printf("failed to find route\n");
3370 e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev,
3371 (struct sockaddr *)&nam);
3373 DPRINTF("failed to get l2t\n");
3376 * Point to our listen socket until accept
3378 newtoep->tp_tp = tp;
3379 newtoep->tp_flags = TP_SYN_RCVD;
3380 newtoep->tp_tid = tid;
3381 newtoep->tp_toedev = tdev;
3382 tp->rcv_wnd = select_rcv_wnd(tdev, so);
3384 cxgb_insert_tid(cdev, d->client, newtoep, tid);
3386 LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry);
3389 newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
3390 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
3392 if (newtoep->tp_ulp_mode) {
3393 ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
3395 if (ddp_mbuf == NULL)
3396 newtoep->tp_ulp_mode = 0;
3399 CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d",
3400 TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode);
3401 set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
3403 * XXX workaround for lack of syncache drop
3405 toepcb_hold(newtoep);
3406 syncache_add_accept_req(req, so, newtoep);
3408 rpl = cplhdr(reply_mbuf);
3409 reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl);
3410 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3412 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
3413 rpl->opt2 = htonl(calc_opt2(so, tdev));
3414 rpl->rsvd = rpl->opt2; /* workaround for HW bug */
3415 rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten
3417 rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) |
3418 V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx));
3419 rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) |
3420 CPL_PASS_OPEN_ACCEPT);
3422 DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status);
3424 m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep));
3426 l2t_send(cdev, reply_mbuf, e);
3428 if (newtoep->tp_ulp_mode) {
3429 __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS,
3431 TP_DDP_TIMER_WORKAROUND_MASK,
3433 TP_DDP_TIMER_WORKAROUND_VAL, 1);
3435 printf("not offloading\n");
3441 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
3442 mk_pass_accept_rpl(reply_mbuf, m);
3444 mk_tid_release(reply_mbuf, newtoep, tid);
3445 cxgb_ofld_send(cdev, reply_mbuf);
3449 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
3456 * Handle a CPL_PASS_ACCEPT_REQ message.
3459 do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3461 struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
3462 struct socket *lso = listen_ctx->lso; /* XXX need an interlock against the listen socket going away */
3463 struct tom_data *d = listen_ctx->tom_data;
3466 struct cpl_pass_accept_req *req = cplhdr(m);
3467 unsigned int tid = GET_TID(req);
3468 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
3470 if (unlikely(!lsk)) {
3471 printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n",
3473 (unsigned long)((union listen_entry *)ctx -
3475 return CPL_RET_BUF_DONE;
3477 if (unlikely(tid >= t->ntids)) {
3478 printk(KERN_ERR "%s: passive open TID %u too large\n",
3480 return CPL_RET_BUF_DONE;
3483 * For T3A the current user of the TID may have closed but its last
3484 * message(s) may have been backlogged so the TID appears to be still
3485 * in use. Just take the TID away, the connection can close at its
3486 * own leisure. For T3B this situation is a bug.
3488 if (!valid_new_tid(t, tid) &&
3489 cdev->type != T3A) {
3490 printk(KERN_ERR "%s: passive open uses existing TID %u\n",
3492 return CPL_RET_BUF_DONE;
3496 process_pass_accept_req(lso, m, &d->tdev, listen_ctx);
3501 * Called when a connection is established to translate the TCP options
3502 * reported by HW to FreeBSD's native format.
3505 assign_rxopt(struct socket *so, unsigned int opt)
3507 struct tcpcb *tp = so_sototcpcb(so);
3508 struct toepcb *toep = tp->t_toe;
3509 const struct t3c_data *td = T3C_DATA(TOEP_T3C_DEV(toep));
3511 inp_lock_assert(tp->t_inpcb);
3513 toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40;
3514 tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0;
3515 tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0;
3516 tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0;
3517 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
3518 (TF_RCVD_SCALE|TF_REQ_SCALE))
3519 tp->rcv_scale = tp->request_r_scale;
3523 * Completes some final bits of initialization for just established connections
3524 * and changes their state to TCP_ESTABLISHED.
3526 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
3529 make_established(struct socket *so, u32 snd_isn, unsigned int opt)
3531 struct tcpcb *tp = so_sototcpcb(so);
3532 struct toepcb *toep = tp->t_toe;
3534 toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn;
3535 assign_rxopt(so, opt);
3542 so->so_proto->pr_ctloutput = t3_ctloutput;
3546 inet_sk(sk)->id = tp->write_seq ^ jiffies;
3549 * XXX not clear what rcv_wup maps to
3552 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't
3553 * pass through opt0.
3555 if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10))
3556 toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10);
3562 * no clean interface for marking ARP up to date
3564 dst_confirm(sk->sk_dst_cache);
3566 tp->t_starttime = ticks;
3567 tp->t_state = TCPS_ESTABLISHED;
3572 syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep)
3575 struct in_conninfo inc;
3578 int mss, wsf, sack, ts;
3579 struct mbuf *m = NULL;
3580 const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev);
3584 #error "no MAC support"
3587 opt = ntohs(req->tcp_opt);
3589 bzero(&to, sizeof(struct tcpopt));
3592 * Fill out information for entering us into the syncache
3594 inc.inc_fport = th.th_sport = req->peer_port;
3595 inc.inc_lport = th.th_dport = req->local_port;
3596 th.th_seq = req->rcv_isn;
3597 th.th_flags = TH_ACK;
3601 inc.inc_faddr.s_addr = req->peer_ip;
3602 inc.inc_laddr.s_addr = req->local_ip;
3604 mss = td->mtus[G_TCPOPT_MSS(opt)] - 40;
3605 wsf = G_TCPOPT_WSCALE_OK(opt);
3606 ts = G_TCPOPT_TSTAMP(opt);
3607 sack = G_TCPOPT_SACK(opt);
3610 to.to_wscale = G_TCPOPT_SND_WSCALE(opt);
3611 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
3613 DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n",
3614 ntohl(req->local_ip), ntohs(req->local_port),
3615 ntohl(req->peer_ip), ntohs(req->peer_port),
3616 mss, wsf, ts, sack);
3617 return tcp_offload_syncache_expand(&inc, &to, &th, so, m);
3622 * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work
3623 * if we are in TCP_SYN_RECV due to crossed SYNs
3626 do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3628 struct cpl_pass_establish *req = cplhdr(m);
3629 struct toepcb *toep = (struct toepcb *)ctx;
3630 struct tcpcb *tp = toep->tp_tp;
3631 struct socket *so, *lso;
3632 struct t3c_data *td = T3C_DATA(cdev);
3633 struct sockbuf *snd, *rcv;
3635 // Complete socket initialization now that we have the SND_ISN
3637 struct toedev *tdev;
3640 tdev = toep->tp_toedev;
3642 inp_wlock(tp->t_inpcb);
3646 * XXX need to add reference while we're manipulating
3648 so = lso = inp_inpcbtosocket(tp->t_inpcb);
3650 inp_wunlock(tp->t_inpcb);
3653 LIST_REMOVE(toep, synq_entry);
3656 if (!syncache_expand_establish_req(req, &so, toep)) {
3660 CXGB_UNIMPLEMENTED();
3664 * Couldn't create the socket
3666 CXGB_UNIMPLEMENTED();
3669 tp = so_sototcpcb(so);
3670 inp_wlock(tp->t_inpcb);
3672 snd = so_sockbuf_snd(so);
3673 rcv = so_sockbuf_rcv(so);
3675 snd->sb_flags |= SB_NOCOALESCE;
3676 rcv->sb_flags |= SB_NOCOALESCE;
3681 reset_wr_list(toep);
3682 tp->rcv_wnd = select_rcv_wnd(tdev, so);
3683 tp->rcv_nxt = toep->tp_copied_seq;
3684 install_offload_ops(so);
3686 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
3687 toep->tp_wr_unacked = 0;
3688 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
3689 toep->tp_qset_idx = 0;
3690 toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu);
3693 * XXX Cancel any keep alive timer
3696 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
3699 * XXX workaround for lack of syncache drop
3701 toepcb_release(toep);
3702 inp_wunlock(tp->t_inpcb);
3704 CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid);
3705 cxgb_log_tcb(cdev->adapter, toep->tp_tid);
3708 * XXX not sure how these checks map to us
3710 if (unlikely(sk->sk_socket)) { // simultaneous opens only
3711 sk->sk_state_change(sk);
3712 sk_wake_async(so, 0, POLL_OUT);
3715 * The state for the new connection is now up to date.
3716 * Next check if we should add the connection to the parent's
3717 * accept queue. When the parent closes it resets connections
3718 * on its SYN queue, so check if we are being reset. If so we
3719 * don't need to do anything more, the coming ABORT_RPL will
3720 * destroy this socket. Otherwise move the connection to the
3723 * Note that we reset the synq before closing the server so if
3724 * we are not being reset the stid is still open.
3726 if (unlikely(!tp->forward_skb_hint)) { // removed from synq
3737 * Fill in the right TID for CPL messages waiting in the out-of-order queue
3738 * and send them to the TOE.
3741 fixup_and_send_ofo(struct toepcb *toep)
3744 struct toedev *tdev = toep->tp_toedev;
3745 struct tcpcb *tp = toep->tp_tp;
3746 unsigned int tid = toep->tp_tid;
3748 log(LOG_NOTICE, "fixup_and_send_ofo\n");
3750 inp_lock_assert(tp->t_inpcb);
3751 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) {
3753 * A variety of messages can be waiting but the fields we'll
3754 * be touching are common to all so any message type will do.
3756 struct cpl_close_con_req *p = cplhdr(m);
3758 p->wr.wr_lo = htonl(V_WR_TID(tid));
3759 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid));
3760 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
3765 * Updates socket state from an active establish CPL message. Runs with the
3769 socket_act_establish(struct socket *so, struct mbuf *m)
3771 struct cpl_act_establish *req = cplhdr(m);
3772 u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */
3773 struct tcpcb *tp = so_sototcpcb(so);
3774 struct toepcb *toep = tp->t_toe;
3776 if (__predict_false(tp->t_state != TCPS_SYN_SENT))
3777 log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n",
3778 toep->tp_tid, tp->t_state);
3780 tp->ts_recent_age = ticks;
3781 tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn;
3782 toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs;
3784 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
3787 * Now that we finally have a TID send any CPL messages that we had to
3788 * defer for lack of a TID.
3790 if (mbufq_len(&toep->out_of_order_queue))
3791 fixup_and_send_ofo(toep);
3793 if (__predict_false(so_state_get(so) & SS_NOFDREF)) {
3795 * XXX does this even make sense?
3802 * XXX assume no write requests permitted while socket connection is
3806 * Currently the send queue must be empty at this point because the
3807 * socket layer does not send anything before a connection is
3808 * established. To be future proof though we handle the possibility
3809 * that there are pending buffers to send (either TX_DATA or
3810 * CLOSE_CON_REQ). First we need to adjust the sequence number of the
3811 * buffers according to the just learned write_seq, and then we send
3812 * them on their way.
3814 fixup_pending_writeq_buffers(sk);
3815 if (t3_push_frames(so, 1))
3816 sk->sk_write_space(sk);
3819 toep->tp_state = tp->t_state;
3820 tcpstat.tcps_connects++;
3825 * Process a CPL_ACT_ESTABLISH message.
3828 do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
3830 struct cpl_act_establish *req = cplhdr(m);
3831 unsigned int tid = GET_TID(req);
3832 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
3833 struct toepcb *toep = (struct toepcb *)ctx;
3834 struct tcpcb *tp = toep->tp_tp;
3836 struct toedev *tdev;
3840 free_atid(cdev, atid);
3843 inp_wlock(tp->t_inpcb);
3848 so = inp_inpcbtosocket(tp->t_inpcb);
3849 tdev = toep->tp_toedev; /* blow up here if link was down */
3853 * It's OK if the TID is currently in use, the owning socket may have
3854 * backlogged its last CPL message(s). Just take it away.
3858 so_insert_tid(d, toep, tid);
3859 free_atid(cdev, atid);
3860 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
3862 socket_act_establish(so, m);
3863 inp_wunlock(tp->t_inpcb);
3864 CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid);
3865 cxgb_log_tcb(cdev->adapter, toep->tp_tid);
3871 * Process an acknowledgment of WR completion. Advance snd_una and send the
3872 * next batch of work requests from the write queue.
3875 wr_ack(struct toepcb *toep, struct mbuf *m)
3877 struct tcpcb *tp = toep->tp_tp;
3878 struct cpl_wr_ack *hdr = cplhdr(m);
3880 unsigned int credits = ntohs(hdr->credits);
3881 u32 snd_una = ntohl(hdr->snd_una);
3883 struct sockbuf *snd;
3885 CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits);
3887 inp_wlock(tp->t_inpcb);
3888 so = inp_inpcbtosocket(tp->t_inpcb);
3889 toep->tp_wr_avail += credits;
3890 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail)
3891 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail;
3894 struct mbuf *p = peek_wr(toep);
3896 if (__predict_false(!p)) {
3897 log(LOG_ERR, "%u WR_ACK credits for TID %u with "
3898 "nothing pending, state %u wr_avail=%u\n",
3899 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail);
3903 "wr_ack: p->credits=%d p->bytes=%d",
3904 p->m_pkthdr.csum_data, p->m_pkthdr.len);
3905 KASSERT(p->m_pkthdr.csum_data != 0,
3906 ("empty request still on list"));
3908 if (__predict_false(credits < p->m_pkthdr.csum_data)) {
3911 struct tx_data_wr *w = cplhdr(p);
3913 "TID %u got %u WR credits, need %u, len %u, "
3914 "main body %u, frags %u, seq # %u, ACK una %u,"
3915 " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n",
3916 toep->tp_tid, credits, p->csum, p->len,
3917 p->len - p->data_len, skb_shinfo(p)->nr_frags,
3918 ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt),
3919 toep->tp_wr_avail, count_pending_wrs(tp) - credits);
3921 p->m_pkthdr.csum_data -= credits;
3925 credits -= p->m_pkthdr.csum_data;
3926 bytes += p->m_pkthdr.len;
3928 "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d",
3929 p->m_pkthdr.len, credits, p->m_pkthdr.csum_data);
3936 check_wr_invariants(tp);
3939 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
3941 struct tom_data *d = TOM_DATA(TOE_DEV(so));
3943 log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK "
3944 "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una,
3945 toep->tp_tid, tp->snd_una);
3950 if (tp->snd_una != snd_una) {
3951 tp->snd_una = snd_una;
3952 tp->ts_recent_age = ticks;
3955 * Keep ARP entry "minty fresh"
3957 dst_confirm(sk->sk_dst_cache);
3959 if (tp->snd_una == tp->snd_nxt)
3960 toep->tp_flags &= ~TP_TX_WAIT_IDLE;
3963 snd = so_sockbuf_snd(so);
3965 CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes);
3966 snd = so_sockbuf_snd(so);
3968 sbdrop_locked(snd, bytes);
3969 so_sowwakeup_locked(so);
3972 if (snd->sb_sndptroff < snd->sb_cc)
3973 t3_push_frames(so, 0);
3976 inp_wunlock(tp->t_inpcb);
3981 * Handler for TX_DATA_ACK CPL messages.
3984 do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx)
3986 struct toepcb *toep = (struct toepcb *)ctx;
3995 * Handler for TRACE_PKT CPL messages. Just sink these packets.
3998 do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx)
4005 * Reset a connection that is on a listener's SYN queue or accept queue,
4006 * i.e., one that has not had a struct socket associated with it.
4007 * Must be called from process context.
4009 * Modeled after code in inet_csk_listen_stop().
4012 t3_reset_listen_child(struct socket *child)
4014 struct tcpcb *tp = so_sototcpcb(child);
4016 t3_send_reset(tp->t_toe);
4021 t3_child_disconnect(struct socket *so, void *arg)
4023 struct tcpcb *tp = so_sototcpcb(so);
4025 if (tp->t_flags & TF_TOE) {
4026 inp_wlock(tp->t_inpcb);
4027 t3_reset_listen_child(so);
4028 inp_wunlock(tp->t_inpcb);
4033 * Disconnect offloaded established but not yet accepted connections sitting
4034 * on a server's accept_queue. We just send an ABORT_REQ at this point and
4035 * finish off the disconnect later as we may need to wait for the ABORT_RPL.
4038 t3_disconnect_acceptq(struct socket *listen_so)
4042 so_listeners_apply_all(listen_so, t3_child_disconnect, NULL);
4043 so_unlock(listen_so);
4047 * Reset offloaded connections sitting on a server's syn queue. As above
4048 * we send ABORT_REQ and finish off when we get ABORT_RPL.
4052 t3_reset_synq(struct listen_ctx *lctx)
4054 struct toepcb *toep;
4057 while (!LIST_EMPTY(&lctx->synq_head)) {
4058 toep = LIST_FIRST(&lctx->synq_head);
4059 LIST_REMOVE(toep, synq_entry);
4061 t3_send_reset(toep);
4062 cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid);
4063 toepcb_release(toep);
4065 so_unlock(lctx->lso);
4070 t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl,
4071 unsigned int nppods, unsigned int tag, unsigned int maxoff,
4072 unsigned int pg_off, unsigned int color)
4074 unsigned int i, j, pidx;
4077 struct ulp_mem_io *req;
4078 unsigned int tid = toep->tp_tid;
4079 const struct tom_data *td = TOM_DATA(toep->tp_toedev);
4080 unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit;
4082 CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)",
4083 gl, nppods, tag, maxoff, pg_off, color);
4085 for (i = 0; i < nppods; ++i) {
4086 m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE);
4087 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4088 req = mtod(m, struct ulp_mem_io *);
4089 m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE;
4090 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
4092 req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) |
4093 V_ULPTX_CMD(ULP_MEM_WRITE));
4094 req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) |
4095 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1));
4097 p = (struct pagepod *)(req + 1);
4098 if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) {
4099 p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid));
4100 p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) |
4101 V_PPOD_COLOR(color));
4102 p->pp_max_offset = htonl(maxoff);
4103 p->pp_page_offset = htonl(pg_off);
4105 for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx)
4106 p->pp_addr[j] = pidx < gl->dgl_nelem ?
4107 htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0;
4109 p->pp_vld_tid = 0; /* mark sentinel page pods invalid */
4110 send_or_defer(toep, m, 0);
4111 ppod_addr += PPOD_SIZE;
4117 * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command.
4120 mk_cpl_barrier_ulp(struct cpl_barrier *b)
4122 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b;
4124 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4125 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8));
4126 b->opcode = CPL_BARRIER;
4130 * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command.
4133 mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno)
4135 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
4137 txpkt = (struct ulp_txpkt *)req;
4138 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4139 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
4140 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid));
4141 req->cpuno = htons(cpuno);
4145 * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command.
4148 mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
4149 unsigned int word, uint64_t mask, uint64_t val)
4151 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
4153 CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
4154 tid, word, mask, val);
4156 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4157 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
4158 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
4159 req->reply = V_NO_REPLY(1);
4161 req->word = htons(word);
4162 req->mask = htobe64(mask);
4163 req->val = htobe64(val);
4167 * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command.
4170 mk_rx_data_ack_ulp(struct toepcb *toep, struct cpl_rx_data_ack *ack,
4171 unsigned int tid, unsigned int credits)
4173 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack;
4175 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
4176 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8));
4177 OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid));
4178 ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
4179 V_RX_DACK_MODE(TOM_TUNABLE(toep->tp_toedev, delack)) |
4180 V_RX_CREDITS(credits));
4184 t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx)
4188 struct work_request_hdr *wr;
4189 struct cpl_barrier *lock;
4190 struct cpl_set_tcb_field *req;
4191 struct cpl_get_tcb *getreq;
4192 struct ddp_state *p = &toep->tp_ddp_state;
4195 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
4197 wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) +
4199 m = m_gethdr_nofail(wrlen);
4200 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4201 wr = mtod(m, struct work_request_hdr *);
4204 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
4205 m->m_pkthdr.len = m->m_len = wrlen;
4207 lock = (struct cpl_barrier *)(wr + 1);
4208 mk_cpl_barrier_ulp(lock);
4210 req = (struct cpl_set_tcb_field *)(lock + 1);
4212 CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx);
4214 /* Hmmm, not sure if this actually a good thing: reactivating
4215 * the other buffer might be an issue if it has been completed
4216 * already. However, that is unlikely, since the fact that the UBUF
4217 * is not completed indicates that there is no oustanding data.
4220 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4221 V_TF_DDP_ACTIVE_BUF(1) |
4222 V_TF_DDP_BUF0_VALID(1),
4223 V_TF_DDP_ACTIVE_BUF(1));
4225 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4226 V_TF_DDP_ACTIVE_BUF(1) |
4227 V_TF_DDP_BUF1_VALID(1), 0);
4229 getreq = (struct cpl_get_tcb *)(req + 1);
4230 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
4232 mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1));
4234 /* Keep track of the number of oustanding CPL_GET_TCB requests
4239 T3_TRACE1(TIDTB(so),
4240 "t3_cancel_ddpbuf: bufidx %u", bufidx);
4242 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
4246 * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one
4247 * @sk: the socket associated with the buffers
4248 * @bufidx: index of HW DDP buffer (0 or 1)
4249 * @tag0: new tag for HW buffer 0
4250 * @tag1: new tag for HW buffer 1
4251 * @len: new length for HW buf @bufidx
4253 * Sends a compound WR to overlay a new DDP buffer on top of an existing
4254 * buffer by changing the buffer tag and length and setting the valid and
4255 * active flag accordingly. The caller must ensure the new buffer is at
4256 * least as big as the existing one. Since we typically reprogram both HW
4257 * buffers this function sets both tags for convenience. Read the TCB to
4258 * determine how made data was written into the buffer before the overlay
4262 t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0,
4263 unsigned int tag1, unsigned int len)
4267 struct work_request_hdr *wr;
4268 struct cpl_get_tcb *getreq;
4269 struct cpl_set_tcb_field *req;
4270 struct ddp_state *p = &toep->tp_ddp_state;
4272 CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)",
4273 bufidx, tag0, tag1, len);
4275 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
4277 wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq);
4278 m = m_gethdr_nofail(wrlen);
4279 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4280 wr = mtod(m, struct work_request_hdr *);
4281 m->m_pkthdr.len = m->m_len = wrlen;
4285 /* Set the ATOMIC flag to make sure that TP processes the following
4286 * CPLs in an atomic manner and no wire segments can be interleaved.
4288 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
4289 req = (struct cpl_set_tcb_field *)(wr + 1);
4290 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG,
4291 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) |
4292 V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32,
4293 V_TCB_RX_DDP_BUF0_TAG(tag0) |
4294 V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32);
4297 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN,
4298 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
4299 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
4301 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4302 V_TF_DDP_PUSH_DISABLE_0(1) |
4303 V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
4304 V_TF_DDP_PUSH_DISABLE_0(0) |
4305 V_TF_DDP_BUF0_VALID(1));
4307 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN,
4308 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN),
4309 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len));
4311 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
4312 V_TF_DDP_PUSH_DISABLE_1(1) |
4313 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
4314 V_TF_DDP_PUSH_DISABLE_1(0) |
4315 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1));
4318 getreq = (struct cpl_get_tcb *)(req + 1);
4319 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
4321 /* Keep track of the number of oustanding CPL_GET_TCB requests
4326 T3_TRACE4(TIDTB(sk),
4327 "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u "
4329 bufidx, tag0, tag1, len);
4331 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
4335 * Sends a compound WR containing all the CPL messages needed to program the
4336 * two HW DDP buffers, namely optionally setting up the length and offset of
4337 * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK.
4340 t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0,
4341 unsigned int len1, unsigned int offset1,
4342 uint64_t ddp_flags, uint64_t flag_mask, int modulate)
4346 struct work_request_hdr *wr;
4347 struct cpl_set_tcb_field *req;
4349 CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ",
4350 len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff);
4353 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
4355 wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) +
4356 (len1 ? sizeof(*req) : 0) +
4357 (modulate ? sizeof(struct cpl_rx_data_ack) : 0);
4358 m = m_gethdr_nofail(wrlen);
4359 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
4360 wr = mtod(m, struct work_request_hdr *);
4363 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
4364 m->m_pkthdr.len = m->m_len = wrlen;
4366 req = (struct cpl_set_tcb_field *)(wr + 1);
4367 if (len0) { /* program buffer 0 offset and length */
4368 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET,
4369 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
4370 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
4371 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) |
4372 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0));
4375 if (len1) { /* program buffer 1 offset and length */
4376 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET,
4377 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
4378 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32,
4379 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) |
4380 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32);
4384 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask,
4388 mk_rx_data_ack_ulp(toep,
4389 (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid,
4390 toep->tp_copied_seq - toep->tp_rcv_wup);
4391 toep->tp_rcv_wup = toep->tp_copied_seq;
4395 T3_TRACE5(TIDTB(sk),
4396 "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x "
4398 len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff,
4402 cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
4406 t3_init_wr_tab(unsigned int wr_len)
4410 if (mbuf_wrs[1]) /* already initialized */
4413 for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) {
4414 int sgl_len = (3 * i) / 2 + (i & 1);
4417 mbuf_wrs[i] = sgl_len <= wr_len ?
4418 1 : 1 + (sgl_len - 2) / (wr_len - 1);
4425 t3_init_cpl_io(void)
4428 tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL);
4431 "Chelsio TCP offload: can't allocate sk_buff\n");
4434 skb_put(tcphdr_skb, sizeof(struct tcphdr));
4435 tcphdr_skb->h.raw = tcphdr_skb->data;
4436 memset(tcphdr_skb->data, 0, tcphdr_skb->len);
4439 t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
4440 t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
4441 t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack);
4442 t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data);
4443 t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl);
4444 t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close);
4445 t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
4446 t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
4447 t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req);
4448 t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl);
4449 t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp);
4450 t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete);
4451 t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify);
4452 t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt);
4453 t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl);