2 * Copyright (c) 2012 Chelsio Communications, Inc.
4 * Written by: Navdeep Parhar <np@FreeBSD.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
32 #include "opt_inet6.h"
35 #include <sys/param.h>
36 #include <sys/types.h>
37 #include <sys/kernel.h>
39 #include <sys/module.h>
40 #include <sys/protosw.h>
41 #include <sys/refcount.h>
42 #include <sys/domain.h>
43 #include <sys/fnv_hash.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <net/ethernet.h>
48 #include <net/if_types.h>
49 #include <net/if_vlan_var.h>
50 #include <net/route.h>
51 #include <netinet/in.h>
52 #include <netinet/in_pcb.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/scope6_var.h>
56 #include <netinet/tcp_timer.h>
57 #include <netinet/tcp_var.h>
59 #include <netinet/tcp_fsm.h>
60 #include <netinet/toecore.h>
62 #include "common/common.h"
63 #include "common/t4_msg.h"
64 #include "common/t4_regs.h"
65 #include "tom/t4_tom_l2t.h"
66 #include "tom/t4_tom.h"
69 static int alloc_stid(struct adapter *, struct listen_ctx *, int);
70 static struct listen_ctx *lookup_stid(struct adapter *, int);
71 static void free_stid(struct adapter *, struct listen_ctx *);
74 static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *,
76 static int free_lctx(struct adapter *, struct listen_ctx *);
77 static void hold_lctx(struct listen_ctx *);
78 static void listen_hash_add(struct adapter *, struct listen_ctx *);
79 static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *);
80 static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *);
81 static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *);
83 static inline void save_qids_in_mbuf(struct mbuf *, struct port_info *);
84 static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *);
85 static void send_reset_synqe(struct toedev *, struct synq_entry *);
88 alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6)
90 struct tid_info *t = &sc->tids;
91 u_int stid, n, f, mask;
92 struct stid_region *sr = &lctx->stid_region;
95 * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in
96 * the TCAM. The start of the stid region is properly aligned (the chip
97 * requires each region to be 128-cell aligned).
101 KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0,
102 ("%s: stid region (%u, %u) not properly aligned. n = %u",
103 __func__, t->stid_base, t->nstids, n));
105 mtx_lock(&t->stid_lock);
106 if (n > t->nstids - t->stids_in_use) {
107 mtx_unlock(&t->stid_lock);
111 if (t->nstids_free_head >= n) {
113 * This allocation will definitely succeed because the region
114 * starts at a good alignment and we just checked we have enough
117 f = t->nstids_free_head & mask;
118 t->nstids_free_head -= n + f;
119 stid = t->nstids_free_head;
120 TAILQ_INSERT_HEAD(&t->stids, sr, link);
122 struct stid_region *s;
124 stid = t->nstids_free_head;
125 TAILQ_FOREACH(s, &t->stids, link) {
126 stid += s->used + s->free;
128 if (s->free >= n + f) {
131 TAILQ_INSERT_AFTER(&t->stids, s, sr, link);
136 if (__predict_false(stid != t->nstids)) {
137 panic("%s: stids TAILQ (%p) corrupt."
138 " At %d instead of %d at the end of the queue.",
139 __func__, &t->stids, stid, t->nstids);
142 mtx_unlock(&t->stid_lock);
149 t->stids_in_use += n;
150 t->stid_tab[stid] = lctx;
151 mtx_unlock(&t->stid_lock);
153 KASSERT(((stid + t->stid_base) & mask) == 0,
154 ("%s: EDOOFUS.", __func__));
155 return (stid + t->stid_base);
158 static struct listen_ctx *
159 lookup_stid(struct adapter *sc, int stid)
161 struct tid_info *t = &sc->tids;
163 return (t->stid_tab[stid - t->stid_base]);
167 free_stid(struct adapter *sc, struct listen_ctx *lctx)
169 struct tid_info *t = &sc->tids;
170 struct stid_region *sr = &lctx->stid_region;
171 struct stid_region *s;
173 KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used));
175 mtx_lock(&t->stid_lock);
176 s = TAILQ_PREV(sr, stid_head, link);
178 s->free += sr->used + sr->free;
180 t->nstids_free_head += sr->used + sr->free;
181 KASSERT(t->stids_in_use >= sr->used,
182 ("%s: stids_in_use (%u) < stids being freed (%u)", __func__,
183 t->stids_in_use, sr->used));
184 t->stids_in_use -= sr->used;
185 TAILQ_REMOVE(&t->stids, sr, link);
186 mtx_unlock(&t->stid_lock);
189 static struct listen_ctx *
190 alloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi)
192 struct listen_ctx *lctx;
194 INP_WLOCK_ASSERT(inp);
196 lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO);
200 lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6);
201 if (lctx->stid < 0) {
206 if (inp->inp_vflag & INP_IPV6 &&
207 !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) {
208 struct tom_data *td = sc->tom_softc;
210 lctx->ce = hold_lip(td, &inp->in6p_laddr);
211 if (lctx->ce == NULL) {
217 lctx->ctrlq = &sc->sge.ctrlq[pi->port_id];
218 lctx->ofld_rxq = &sc->sge.ofld_rxq[pi->first_ofld_rxq];
219 refcount_init(&lctx->refcount, 1);
220 TAILQ_INIT(&lctx->synq);
228 /* Don't call this directly, use release_lctx instead */
230 free_lctx(struct adapter *sc, struct listen_ctx *lctx)
232 struct inpcb *inp = lctx->inp;
233 struct tom_data *td = sc->tom_softc;
235 INP_WLOCK_ASSERT(inp);
236 KASSERT(lctx->refcount == 0,
237 ("%s: refcount %d", __func__, lctx->refcount));
238 KASSERT(TAILQ_EMPTY(&lctx->synq),
239 ("%s: synq not empty.", __func__));
240 KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
242 CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
243 __func__, lctx->stid, lctx, lctx->inp);
246 release_lip(td, lctx->ce);
250 return (in_pcbrele_wlocked(inp));
254 hold_lctx(struct listen_ctx *lctx)
257 refcount_acquire(&lctx->refcount);
260 static inline uint32_t
261 listen_hashfn(void *key, u_long mask)
264 return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
268 * Add a listen_ctx entry to the listen hash table.
271 listen_hash_add(struct adapter *sc, struct listen_ctx *lctx)
273 struct tom_data *td = sc->tom_softc;
274 int bucket = listen_hashfn(lctx->inp, td->listen_mask);
276 mtx_lock(&td->lctx_hash_lock);
277 LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
279 mtx_unlock(&td->lctx_hash_lock);
283 * Look for the listening socket's context entry in the hash and return it.
285 static struct listen_ctx *
286 listen_hash_find(struct adapter *sc, struct inpcb *inp)
288 struct tom_data *td = sc->tom_softc;
289 int bucket = listen_hashfn(inp, td->listen_mask);
290 struct listen_ctx *lctx;
292 mtx_lock(&td->lctx_hash_lock);
293 LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
294 if (lctx->inp == inp)
297 mtx_unlock(&td->lctx_hash_lock);
303 * Removes the listen_ctx structure for inp from the hash and returns it.
305 static struct listen_ctx *
306 listen_hash_del(struct adapter *sc, struct inpcb *inp)
308 struct tom_data *td = sc->tom_softc;
309 int bucket = listen_hashfn(inp, td->listen_mask);
310 struct listen_ctx *lctx, *l;
312 mtx_lock(&td->lctx_hash_lock);
313 LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
314 if (lctx->inp == inp) {
315 LIST_REMOVE(lctx, link);
320 mtx_unlock(&td->lctx_hash_lock);
326 * Releases a hold on the lctx. Must be called with the listening socket's inp
327 * locked. The inp may be freed by this function and it returns NULL to
330 static struct inpcb *
331 release_lctx(struct adapter *sc, struct listen_ctx *lctx)
333 struct inpcb *inp = lctx->inp;
336 INP_WLOCK_ASSERT(inp);
337 if (refcount_release(&lctx->refcount))
338 inp_freed = free_lctx(sc, lctx);
340 return (inp_freed ? NULL : inp);
344 send_reset_synqe(struct toedev *tod, struct synq_entry *synqe)
346 struct adapter *sc = tod->tod_softc;
347 struct mbuf *m = synqe->syn;
348 struct ifnet *ifp = m->m_pkthdr.rcvif;
349 struct port_info *pi = ifp->if_softc;
350 struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx];
352 struct fw_flowc_wr *flowc;
353 struct cpl_abort_req *req;
354 int txqid, rxqid, flowclen;
355 struct sge_wrq *ofld_txq;
356 struct sge_ofld_rxq *ofld_rxq;
357 const int nparams = 6;
358 unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN;
360 INP_WLOCK_ASSERT(synqe->lctx->inp);
362 CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s",
363 __func__, synqe, synqe->flags, synqe->tid,
364 synqe->flags & TPF_ABORT_SHUTDOWN ?
365 " (abort already in progress)" : "");
366 if (synqe->flags & TPF_ABORT_SHUTDOWN)
367 return; /* abort already in progress */
368 synqe->flags |= TPF_ABORT_SHUTDOWN;
370 get_qids_from_mbuf(m, &txqid, &rxqid);
371 ofld_txq = &sc->sge.ofld_txq[txqid];
372 ofld_rxq = &sc->sge.ofld_rxq[rxqid];
374 /* The wrqe will have two WRs - a flowc followed by an abort_req */
375 flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
377 wr = alloc_wrqe(roundup2(flowclen, EQ_ESIZE) + sizeof(*req), ofld_txq);
380 panic("%s: allocation failure.", __func__);
383 req = (void *)((caddr_t)flowc + roundup2(flowclen, EQ_ESIZE));
385 /* First the flowc ... */
386 memset(flowc, 0, wr->wr_len);
387 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
388 V_FW_FLOWC_WR_NPARAMS(nparams));
389 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
390 V_FW_WR_FLOWID(synqe->tid));
391 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
392 flowc->mnemval[0].val = htobe32(pfvf);
393 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
394 flowc->mnemval[1].val = htobe32(pi->tx_chan);
395 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
396 flowc->mnemval[2].val = htobe32(pi->tx_chan);
397 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
398 flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id);
399 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
400 flowc->mnemval[4].val = htobe32(512);
401 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
402 flowc->mnemval[5].val = htobe32(512);
403 synqe->flags |= TPF_FLOWC_WR_SENT;
405 /* ... then ABORT request */
406 INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid);
407 req->rsvd0 = 0; /* don't have a snd_nxt */
408 req->rsvd1 = 1; /* no data sent yet */
409 req->cmd = CPL_ABORT_SEND_RST;
411 t4_l2t_send(sc, wr, e);
415 create_server(struct adapter *sc, struct listen_ctx *lctx)
418 struct cpl_pass_open_req *req;
419 struct inpcb *inp = lctx->inp;
421 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
423 log(LOG_ERR, "%s: allocation failure", __func__);
429 OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
430 req->local_port = inp->inp_lport;
432 req->local_ip = inp->inp_laddr.s_addr;
434 req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
435 req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
436 F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
443 create_server6(struct adapter *sc, struct listen_ctx *lctx)
446 struct cpl_pass_open_req6 *req;
447 struct inpcb *inp = lctx->inp;
449 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
451 log(LOG_ERR, "%s: allocation failure", __func__);
457 OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid));
458 req->local_port = inp->inp_lport;
460 req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
461 req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
464 req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
465 req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
466 F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
473 destroy_server(struct adapter *sc, struct listen_ctx *lctx)
476 struct cpl_close_listsvr_req *req;
478 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
481 panic("%s: allocation failure.", __func__);
486 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
488 req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id);
489 req->rsvd = htobe16(0);
496 * Start a listening server by sending a passive open request to HW.
498 * Can't take adapter lock here and access to sc->flags, sc->open_device_map,
499 * sc->offload_map, if_capenable are all race prone.
502 t4_listen_start(struct toedev *tod, struct tcpcb *tp)
504 struct adapter *sc = tod->tod_softc;
505 struct port_info *pi;
506 struct inpcb *inp = tp->t_inpcb;
507 struct listen_ctx *lctx;
510 INP_WLOCK_ASSERT(inp);
512 /* Don't start a hardware listener for any loopback address. */
513 if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr))
515 if (!(inp->inp_vflag & INP_IPV6) &&
516 IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr)))
521 log(LOG_ERR, "%s: listen request ignored, %s is busy",
522 __func__, device_get_nameunit(sc->dev));
526 KASSERT(uld_active(sc, ULD_TOM),
527 ("%s: TOM not initialized", __func__));
530 if ((sc->open_device_map & sc->offload_map) == 0)
531 goto done; /* no port that's UP with IFCAP_TOE enabled */
534 * Find a running port with IFCAP_TOE (4 or 6). We'll use the first
535 * such port's queues to send the passive open and receive the reply to
538 * XXX: need a way to mark a port in use by offload. if_cxgbe should
539 * then reject any attempt to bring down such a port (and maybe reject
540 * attempts to disable IFCAP_TOE on that port too?).
542 for_each_port(sc, i) {
543 if (isset(&sc->open_device_map, i) &&
544 sc->port[i]->ifp->if_capenable & IFCAP_TOE)
547 KASSERT(i < sc->params.nports,
548 ("%s: no running port with TOE capability enabled.", __func__));
551 if (listen_hash_find(sc, inp) != NULL)
552 goto done; /* already setup */
554 lctx = alloc_lctx(sc, inp, pi);
557 "%s: listen request ignored, %s couldn't allocate lctx\n",
558 __func__, device_get_nameunit(sc->dev));
561 listen_hash_add(sc, lctx);
563 CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x",
564 __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp,
567 if (inp->inp_vflag & INP_IPV6)
568 rc = create_server6(sc, lctx);
570 rc = create_server(sc, lctx);
572 log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n",
573 __func__, device_get_nameunit(sc->dev), rc);
574 (void) listen_hash_del(sc, inp);
575 inp = release_lctx(sc, lctx);
576 /* can't be freed, host stack has a reference */
577 KASSERT(inp != NULL, ("%s: inp freed", __func__));
580 lctx->flags |= LCTX_RPL_PENDING;
589 t4_listen_stop(struct toedev *tod, struct tcpcb *tp)
591 struct listen_ctx *lctx;
592 struct adapter *sc = tod->tod_softc;
593 struct inpcb *inp = tp->t_inpcb;
594 struct synq_entry *synqe;
596 INP_WLOCK_ASSERT(inp);
598 lctx = listen_hash_del(sc, inp);
600 return (ENOENT); /* no hardware listener for this inp */
602 CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
606 * If the reply to the PASS_OPEN is still pending we'll wait for it to
607 * arrive and clean up when it does.
609 if (lctx->flags & LCTX_RPL_PENDING) {
610 KASSERT(TAILQ_EMPTY(&lctx->synq),
611 ("%s: synq not empty.", __func__));
612 return (EINPROGRESS);
616 * The host stack will abort all the connections on the listening
617 * socket's so_comp. It doesn't know about the connections on the synq
618 * so we need to take care of those.
620 TAILQ_FOREACH(synqe, &lctx->synq, link) {
621 if (synqe->flags & TPF_SYNQE_HAS_L2TE)
622 send_reset_synqe(tod, synqe);
625 destroy_server(sc, lctx);
630 hold_synqe(struct synq_entry *synqe)
633 refcount_acquire(&synqe->refcnt);
637 release_synqe(struct synq_entry *synqe)
640 if (refcount_release(&synqe->refcnt)) {
641 int needfree = synqe->flags & TPF_SYNQE_NEEDFREE;
645 free(synqe, M_CXGBE);
650 t4_syncache_added(struct toedev *tod __unused, void *arg)
652 struct synq_entry *synqe = arg;
658 t4_syncache_removed(struct toedev *tod __unused, void *arg)
660 struct synq_entry *synqe = arg;
662 release_synqe(synqe);
666 extern void tcp_dooptions(struct tcpopt *, u_char *, int, int);
669 t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
671 struct adapter *sc = tod->tod_softc;
672 struct synq_entry *synqe = arg;
676 struct ip *ip = mtod(m, struct ip *);
679 wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr);
685 if (ip->ip_v == IPVERSION)
686 th = (void *)(ip + 1);
688 th = (void *)((struct ip6_hdr *)ip + 1);
689 bzero(&to, sizeof(to));
690 tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th),
693 /* save these for later */
694 synqe->iss = be32toh(th->th_seq);
695 synqe->ts = to.to_tsval;
698 struct cpl_t5_pass_accept_rpl *rpl5 = wrtod(wr);
700 rpl5->iss = th->th_seq;
703 e = &sc->l2t->l2tab[synqe->l2e_idx];
704 t4_l2t_send(sc, wr, e);
706 m_freem(m); /* don't need this any more */
711 do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
714 struct adapter *sc = iq->adapter;
715 const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1);
716 int stid = GET_TID(cpl);
717 unsigned int status = cpl->status;
718 struct listen_ctx *lctx = lookup_stid(sc, stid);
719 struct inpcb *inp = lctx->inp;
721 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
724 KASSERT(opcode == CPL_PASS_OPEN_RPL,
725 ("%s: unexpected opcode 0x%x", __func__, opcode));
726 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
727 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
731 CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x",
732 __func__, stid, status, lctx->flags);
734 lctx->flags &= ~LCTX_RPL_PENDING;
736 if (status != CPL_ERR_NONE)
737 log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status);
741 * If the inp has been dropped (listening socket closed) then
742 * listen_stop must have run and taken the inp out of the hash.
744 if (inp->inp_flags & INP_DROPPED) {
745 KASSERT(listen_hash_del(sc, inp) == NULL,
746 ("%s: inp %p still in listen hash", __func__, inp));
750 if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) {
751 if (release_lctx(sc, lctx) != NULL)
757 * Listening socket stopped listening earlier and now the chip tells us
758 * it has started the hardware listener. Stop it; the lctx will be
759 * released in do_close_server_rpl.
761 if (inp->inp_flags & INP_DROPPED) {
762 destroy_server(sc, lctx);
768 * Failed to start hardware listener. Take inp out of the hash and
769 * release our reference on it. An error message has been logged
772 if (status != CPL_ERR_NONE) {
773 listen_hash_del(sc, inp);
774 if (release_lctx(sc, lctx) != NULL)
779 /* hardware listener open for business */
786 do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss,
789 struct adapter *sc = iq->adapter;
790 const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1);
791 int stid = GET_TID(cpl);
792 unsigned int status = cpl->status;
793 struct listen_ctx *lctx = lookup_stid(sc, stid);
794 struct inpcb *inp = lctx->inp;
796 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
799 KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL,
800 ("%s: unexpected opcode 0x%x", __func__, opcode));
801 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
802 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
804 CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status);
806 if (status != CPL_ERR_NONE) {
807 log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n",
808 __func__, status, stid);
813 inp = release_lctx(sc, lctx);
821 done_with_synqe(struct adapter *sc, struct synq_entry *synqe)
823 struct listen_ctx *lctx = synqe->lctx;
824 struct inpcb *inp = lctx->inp;
825 struct port_info *pi = synqe->syn->m_pkthdr.rcvif->if_softc;
826 struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx];
828 INP_WLOCK_ASSERT(inp);
830 TAILQ_REMOVE(&lctx->synq, synqe, link);
831 inp = release_lctx(sc, lctx);
834 remove_tid(sc, synqe->tid);
835 release_tid(sc, synqe->tid, &sc->sge.ctrlq[pi->port_id]);
837 release_synqe(synqe); /* removed from synq list */
841 do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
844 struct adapter *sc = iq->adapter;
845 const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
846 unsigned int tid = GET_TID(cpl);
847 struct synq_entry *synqe = lookup_tid(sc, tid);
848 struct listen_ctx *lctx = synqe->lctx;
849 struct inpcb *inp = lctx->inp;
851 struct sge_wrq *ofld_txq;
853 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
856 KASSERT(opcode == CPL_ABORT_REQ_RSS,
857 ("%s: unexpected opcode 0x%x", __func__, opcode));
858 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
859 KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
861 CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
862 __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
864 if (negative_advice(cpl->status))
865 return (0); /* Ignore negative advice */
869 get_qids_from_mbuf(synqe->syn, &txqid, NULL);
870 ofld_txq = &sc->sge.ofld_txq[txqid];
873 * If we'd initiated an abort earlier the reply to it is responsible for
874 * cleaning up resources. Otherwise we tear everything down right here
875 * right now. We owe the T4 a CPL_ABORT_RPL no matter what.
877 if (synqe->flags & TPF_ABORT_SHUTDOWN) {
882 done_with_synqe(sc, synqe);
883 /* inp lock released by done_with_synqe */
885 send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
890 do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss,
893 struct adapter *sc = iq->adapter;
894 const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
895 unsigned int tid = GET_TID(cpl);
896 struct synq_entry *synqe = lookup_tid(sc, tid);
897 struct listen_ctx *lctx = synqe->lctx;
898 struct inpcb *inp = lctx->inp;
900 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
903 KASSERT(opcode == CPL_ABORT_RPL_RSS,
904 ("%s: unexpected opcode 0x%x", __func__, opcode));
905 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
906 KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
908 CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
909 __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
912 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
913 ("%s: wasn't expecting abort reply for synqe %p (0x%x)",
914 __func__, synqe, synqe->flags));
916 done_with_synqe(sc, synqe);
917 /* inp lock released by done_with_synqe */
923 t4_offload_socket(struct toedev *tod, void *arg, struct socket *so)
925 struct adapter *sc = tod->tod_softc;
926 struct synq_entry *synqe = arg;
928 struct inpcb *inp = sotoinpcb(so);
930 struct cpl_pass_establish *cpl = mtod(synqe->syn, void *);
931 struct toepcb *toep = *(struct toepcb **)(cpl + 1);
933 INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
934 INP_WLOCK_ASSERT(inp);
935 KASSERT(synqe->flags & TPF_SYNQE,
936 ("%s: %p not a synq_entry?", __func__, arg));
938 offload_socket(so, toep);
939 make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
940 toep->flags |= TPF_CPL_PENDING;
941 update_tid(sc, synqe->tid, toep);
942 synqe->flags |= TPF_SYNQE_EXPANDED;
946 save_qids_in_mbuf(struct mbuf *m, struct port_info *pi)
948 uint32_t txqid, rxqid;
950 txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq;
951 rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq;
953 m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff);
957 get_qids_from_mbuf(struct mbuf *m, int *txqid, int *rxqid)
961 *txqid = m->m_pkthdr.flowid >> 16;
963 *rxqid = m->m_pkthdr.flowid & 0xffff;
967 * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to
968 * store some state temporarily.
970 static struct synq_entry *
971 mbuf_to_synqe(struct mbuf *m)
973 int len = roundup2(sizeof (struct synq_entry), 8);
974 int tspace = M_TRAILINGSPACE(m);
975 struct synq_entry *synqe = NULL;
978 synqe = malloc(sizeof(*synqe), M_CXGBE, M_NOWAIT);
981 synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE;
983 synqe = (void *)(m->m_data + m->m_len + tspace - len);
984 synqe->flags = TPF_SYNQE;
991 t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
993 bzero(to, sizeof(*to));
996 to->to_flags |= TOF_MSS;
997 to->to_mss = be16toh(t4opt->mss);
1001 to->to_flags |= TOF_SCALE;
1002 to->to_wscale = t4opt->wsf;
1006 to->to_flags |= TOF_TS;
1009 to->to_flags |= TOF_SACKPERM;
1013 * Options2 for passive open.
1016 calc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid,
1017 const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode)
1019 struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid];
1022 opt2 = V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]) |
1023 F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id);
1025 if (V_tcp_do_rfc1323) {
1027 opt2 |= F_TSTAMPS_EN;
1030 if (tcpopt->wsf <= 14)
1031 opt2 |= F_WND_SCALE_EN;
1034 if (V_tcp_do_ecn && th->th_flags & (TH_ECE | TH_CWR))
1035 opt2 |= F_CCTRL_ECN;
1037 /* RX_COALESCE is always a valid value (0 or M_RX_COALESCE). */
1039 opt2 |= F_RX_COALESCE_VALID;
1041 opt2 |= F_T5_OPT_2_VALID;
1042 opt2 |= F_CONG_CNTRL_VALID; /* OPT_2_ISS really, for T5 */
1044 if (sc->tt.rx_coalesce)
1045 opt2 |= V_RX_COALESCE(M_RX_COALESCE);
1047 #ifdef USE_DDP_RX_FLOW_CONTROL
1048 if (ulp_mode == ULP_MODE_TCPDDP)
1049 opt2 |= F_RX_FC_VALID | F_RX_FC_DDP;
1052 return htobe32(opt2);
1056 pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc,
1059 const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1060 const struct ether_header *eh;
1061 unsigned int hlen = be32toh(cpl->hdr_len);
1063 const struct tcphdr *tcp;
1065 eh = (const void *)(cpl + 1);
1066 l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
1067 tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
1070 bzero(inc, sizeof(*inc));
1071 inc->inc_fport = tcp->th_sport;
1072 inc->inc_lport = tcp->th_dport;
1073 if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
1074 const struct ip *ip = (const void *)l3hdr;
1076 inc->inc_faddr = ip->ip_src;
1077 inc->inc_laddr = ip->ip_dst;
1079 const struct ip6_hdr *ip6 = (const void *)l3hdr;
1081 inc->inc_flags |= INC_ISIPV6;
1082 inc->inc6_faddr = ip6->ip6_src;
1083 inc->inc6_laddr = ip6->ip6_dst;
1088 bcopy(tcp, th, sizeof(*th));
1089 tcp_fields_to_host(th); /* just like tcp_input */
1094 ifnet_has_ip6(struct ifnet *ifp, struct in6_addr *ip6)
1097 struct sockaddr_in6 *sin6;
1099 struct in6_addr in6 = *ip6;
1101 /* Just as in ip6_input */
1102 if (in6_clearscope(&in6) || in6_clearscope(&in6))
1104 in6_setscope(&in6, ifp, NULL);
1107 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1108 sin6 = (void *)ifa->ifa_addr;
1109 if (sin6->sin6_family != AF_INET6)
1112 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &in6)) {
1117 if_addr_runlock(ifp);
1122 static struct l2t_entry *
1123 get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp,
1124 struct in_conninfo *inc)
1127 struct l2t_entry *e;
1128 struct sockaddr_in6 sin6;
1129 struct sockaddr *dst = (void *)&sin6;
1131 if (inc->inc_flags & INC_ISIPV6) {
1132 dst->sa_len = sizeof(struct sockaddr_in6);
1133 dst->sa_family = AF_INET6;
1134 ((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr;
1136 if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) {
1137 /* no need for route lookup */
1138 e = t4_l2t_get(pi, ifp, dst);
1142 dst->sa_len = sizeof(struct sockaddr_in);
1143 dst->sa_family = AF_INET;
1144 ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
1147 rt = rtalloc1(dst, 0, 0);
1151 struct sockaddr *nexthop;
1154 if (rt->rt_ifp != ifp)
1157 if (rt->rt_flags & RTF_GATEWAY)
1158 nexthop = rt->rt_gateway;
1161 e = t4_l2t_get(pi, ifp, nexthop);
1170 ifnet_has_ip(struct ifnet *ifp, struct in_addr in)
1173 struct sockaddr_in *sin;
1177 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1178 sin = (void *)ifa->ifa_addr;
1179 if (sin->sin_family != AF_INET)
1182 if (sin->sin_addr.s_addr == in.s_addr) {
1187 if_addr_runlock(ifp);
1192 #define REJECT_PASS_ACCEPT() do { \
1193 reject_reason = __LINE__; \
1198 * The context associated with a tid entry via insert_tid could be a synq_entry
1199 * or a toepcb. The only way CPL handlers can tell is via a bit in these flags.
1201 CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags));
1204 * Incoming SYN on a listening socket.
1206 * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe,
1210 do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
1213 struct adapter *sc = iq->adapter;
1215 const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1216 struct cpl_pass_accept_rpl *rpl;
1218 unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1219 unsigned int tid = GET_TID(cpl);
1220 struct listen_ctx *lctx = lookup_stid(sc, stid);
1223 struct in_conninfo inc;
1226 struct port_info *pi;
1227 struct ifnet *hw_ifp, *ifp;
1228 struct l2t_entry *e = NULL;
1229 int rscale, mtu_idx, rx_credits, rxqid, ulp_mode;
1230 struct synq_entry *synqe = NULL;
1234 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1237 KASSERT(opcode == CPL_PASS_ACCEPT_REQ,
1238 ("%s: unexpected opcode 0x%x", __func__, opcode));
1239 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1241 CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
1244 pass_accept_req_to_protohdrs(m, &inc, &th);
1245 t4opt_to_tcpopt(&cpl->tcpopt, &to);
1247 pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))];
1248 hw_ifp = pi->ifp; /* the cxgbeX ifnet */
1249 m->m_pkthdr.rcvif = hw_ifp;
1250 tod = TOEDEV(hw_ifp);
1253 * Figure out if there is a pseudo interface (vlan, lagg, etc.)
1254 * involved. Don't offload if the SYN had a VLAN tag and the vid
1255 * doesn't match anything on this interface.
1257 * XXX: lagg support, lagg + vlan support.
1259 vid = EVL_VLANOFTAG(be16toh(cpl->vlan));
1261 ifp = VLAN_DEVAT(hw_ifp, vid);
1263 REJECT_PASS_ACCEPT();
1268 * Don't offload if the peer requested a TCP option that's not known to
1271 if (cpl->tcpopt.unknown)
1272 REJECT_PASS_ACCEPT();
1274 if (inc.inc_flags & INC_ISIPV6) {
1276 /* Don't offload if the ifcap isn't enabled */
1277 if ((ifp->if_capenable & IFCAP_TOE6) == 0)
1278 REJECT_PASS_ACCEPT();
1281 * SYN must be directed to an IP6 address on this ifnet. This
1282 * is more restrictive than in6_localip.
1284 if (!ifnet_has_ip6(ifp, &inc.inc6_laddr))
1285 REJECT_PASS_ACCEPT();
1288 /* Don't offload if the ifcap isn't enabled */
1289 if ((ifp->if_capenable & IFCAP_TOE4) == 0)
1290 REJECT_PASS_ACCEPT();
1293 * SYN must be directed to an IP address on this ifnet. This
1294 * is more restrictive than in_localip.
1296 if (!ifnet_has_ip(ifp, inc.inc_laddr))
1297 REJECT_PASS_ACCEPT();
1300 e = get_l2te_for_nexthop(pi, ifp, &inc);
1302 REJECT_PASS_ACCEPT();
1304 synqe = mbuf_to_synqe(m);
1306 REJECT_PASS_ACCEPT();
1308 wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) :
1309 sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[pi->port_id]);
1311 REJECT_PASS_ACCEPT();
1314 INP_INFO_WLOCK(&V_tcbinfo); /* for 4-tuple check, syncache_add */
1316 /* Don't offload if the 4-tuple is already in use */
1317 if (toe_4tuple_check(&inc, &th, ifp) != 0) {
1318 INP_INFO_WUNLOCK(&V_tcbinfo);
1320 REJECT_PASS_ACCEPT();
1323 inp = lctx->inp; /* listening socket, not owned by TOE */
1326 /* Don't offload if the listening socket has closed */
1327 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1329 * The listening socket has closed. The reply from the TOE to
1330 * our CPL_CLOSE_LISTSRV_REQ will ultimately release all
1331 * resources tied to this listen context.
1334 INP_INFO_WUNLOCK(&V_tcbinfo);
1336 REJECT_PASS_ACCEPT();
1338 so = inp->inp_socket;
1340 mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss));
1341 rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0;
1342 SOCKBUF_LOCK(&so->so_rcv);
1343 /* opt0 rcv_bufsiz initially, assumes its normal meaning later */
1344 rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
1345 SOCKBUF_UNLOCK(&so->so_rcv);
1347 save_qids_in_mbuf(m, pi);
1348 get_qids_from_mbuf(m, NULL, &rxqid);
1351 INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
1353 struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl;
1355 INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
1357 if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) {
1358 ulp_mode = ULP_MODE_TCPDDP;
1359 synqe->flags |= TPF_SYNQE_TCPDDP;
1361 ulp_mode = ULP_MODE_NONE;
1362 rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits, ulp_mode);
1363 rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode);
1369 refcount_init(&synqe->refcnt, 1); /* 1 means extra hold */
1370 synqe->l2e_idx = e->idx;
1371 synqe->rcv_bufsize = rx_credits;
1372 atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr);
1374 insert_tid(sc, tid, synqe);
1375 TAILQ_INSERT_TAIL(&lctx->synq, synqe, link);
1376 hold_synqe(synqe); /* hold for the duration it's in the synq */
1377 hold_lctx(lctx); /* A synqe on the list has a ref on its lctx */
1380 * If all goes well t4_syncache_respond will get called during
1381 * syncache_add. Also note that syncache_add releases both pcbinfo and
1384 toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
1385 INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */
1386 INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
1389 * If we replied during syncache_add (synqe->wr has been consumed),
1390 * good. Otherwise, set it to 0 so that further syncache_respond
1391 * attempts by the kernel will be ignored.
1393 if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) {
1396 * syncache may or may not have a hold on the synqe, which may
1397 * or may not be stashed in the original SYN mbuf passed to us.
1398 * Just copy it over instead of dealing with all possibilities.
1400 m = m_dup(synqe->syn, M_NOWAIT);
1402 m->m_pkthdr.rcvif = hw_ifp;
1404 remove_tid(sc, synqe->tid);
1407 /* Yank the synqe out of the lctx synq. */
1409 TAILQ_REMOVE(&lctx->synq, synqe, link);
1410 release_synqe(synqe); /* removed from synq list */
1411 inp = release_lctx(sc, lctx);
1415 release_synqe(synqe); /* extra hold */
1416 REJECT_PASS_ACCEPT();
1419 CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK",
1420 __func__, stid, tid, lctx, synqe);
1423 synqe->flags |= TPF_SYNQE_HAS_L2TE;
1424 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1426 * Listening socket closed but tod_listen_stop did not abort
1427 * this tid because there was no L2T entry for the tid at that
1428 * time. Abort it now. The reply to the abort will clean up.
1431 "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT",
1432 __func__, stid, tid, lctx, synqe, synqe->flags);
1433 if (!(synqe->flags & TPF_SYNQE_EXPANDED))
1434 send_reset_synqe(tod, synqe);
1437 release_synqe(synqe); /* extra hold */
1442 release_synqe(synqe); /* extra hold */
1445 CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
1450 release_tid(sc, tid, lctx->ctrlq);
1452 if (__predict_true(m != NULL)) {
1453 m_adj(m, sizeof(*cpl));
1454 m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
1455 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1456 m->m_pkthdr.csum_data = 0xffff;
1457 hw_ifp->if_input(hw_ifp, m);
1460 return (reject_reason);
1464 synqe_to_protohdrs(struct synq_entry *synqe,
1465 const struct cpl_pass_establish *cpl, struct in_conninfo *inc,
1466 struct tcphdr *th, struct tcpopt *to)
1468 uint16_t tcp_opt = be16toh(cpl->tcp_opt);
1470 /* start off with the original SYN */
1471 pass_accept_req_to_protohdrs(synqe->syn, inc, th);
1473 /* modify parts to make it look like the ACK to our SYN|ACK */
1474 th->th_flags = TH_ACK;
1475 th->th_ack = synqe->iss + 1;
1476 th->th_seq = be32toh(cpl->rcv_isn);
1477 bzero(to, sizeof(*to));
1478 if (G_TCPOPT_TSTAMP(tcp_opt)) {
1479 to->to_flags |= TOF_TS;
1480 to->to_tsecr = synqe->ts;
1485 do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
1488 struct adapter *sc = iq->adapter;
1489 struct port_info *pi;
1491 const struct cpl_pass_establish *cpl = (const void *)(rss + 1);
1492 #if defined(KTR) || defined(INVARIANTS)
1493 unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1495 unsigned int tid = GET_TID(cpl);
1496 struct synq_entry *synqe = lookup_tid(sc, tid);
1497 struct listen_ctx *lctx = synqe->lctx;
1498 struct inpcb *inp = lctx->inp;
1502 struct in_conninfo inc;
1503 struct toepcb *toep;
1506 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1509 KASSERT(opcode == CPL_PASS_ESTABLISH,
1510 ("%s: unexpected opcode 0x%x", __func__, opcode));
1511 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1512 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1513 KASSERT(synqe->flags & TPF_SYNQE,
1514 ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
1516 INP_INFO_WLOCK(&V_tcbinfo); /* for syncache_expand */
1520 "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x",
1521 __func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
1523 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1525 if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
1526 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
1527 ("%s: listen socket closed but tid %u not aborted.",
1532 INP_INFO_WUNLOCK(&V_tcbinfo);
1536 ifp = synqe->syn->m_pkthdr.rcvif;
1538 KASSERT(pi->adapter == sc,
1539 ("%s: pi %p, sc %p mismatch", __func__, pi, sc));
1541 get_qids_from_mbuf(synqe->syn, &txqid, &rxqid);
1542 KASSERT(rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0],
1543 ("%s: CPL arrived on unexpected rxq. %d %d", __func__, rxqid,
1544 (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0])));
1546 toep = alloc_toepcb(pi, txqid, rxqid, M_NOWAIT);
1550 * The reply to this abort will perform final cleanup. There is
1551 * no need to check for HAS_L2TE here. We can be here only if
1552 * we responded to the PASS_ACCEPT_REQ, and our response had the
1555 send_reset_synqe(TOEDEV(ifp), synqe);
1557 INP_INFO_WUNLOCK(&V_tcbinfo);
1561 toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx];
1562 if (synqe->flags & TPF_SYNQE_TCPDDP)
1563 set_tcpddp_ulp_mode(toep);
1565 toep->ulp_mode = ULP_MODE_NONE;
1566 /* opt0 rcv_bufsiz initially, assumes its normal meaning later */
1567 toep->rx_credits = synqe->rcv_bufsize;
1569 so = inp->inp_socket;
1570 KASSERT(so != NULL, ("%s: socket is NULL", __func__));
1572 /* Come up with something that syncache_expand should be ok with. */
1573 synqe_to_protohdrs(synqe, cpl, &inc, &th, &to);
1576 * No more need for anything in the mbuf that carried the
1577 * CPL_PASS_ACCEPT_REQ. Drop the CPL_PASS_ESTABLISH and toep pointer
1578 * there. XXX: bad form but I don't want to increase the size of synqe.
1581 KASSERT(sizeof(*cpl) + sizeof(toep) <= m->m_len,
1582 ("%s: no room in mbuf %p (m_len %d)", __func__, m, m->m_len));
1583 bcopy(cpl, mtod(m, void *), sizeof(*cpl));
1584 *(struct toepcb **)(mtod(m, struct cpl_pass_establish *) + 1) = toep;
1586 if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) {
1592 * This is for the unlikely case where the syncache entry that we added
1593 * has been evicted from the syncache, but the syncache_expand above
1594 * works because of syncookies.
1596 * XXX: we've held the tcbinfo lock throughout so there's no risk of
1597 * anyone accept'ing a connection before we've installed our hooks, but
1598 * this somewhat defeats the purpose of having a tod_offload_socket :-(
1600 if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
1601 struct inpcb *new_inp = sotoinpcb(so);
1604 tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
1605 t4_offload_socket(TOEDEV(ifp), synqe, so);
1606 INP_WUNLOCK(new_inp);
1609 /* Done with the synqe */
1610 TAILQ_REMOVE(&lctx->synq, synqe, link);
1611 inp = release_lctx(sc, lctx);
1614 INP_INFO_WUNLOCK(&V_tcbinfo);
1615 release_synqe(synqe);
1621 t4_init_listen_cpl_handlers(struct adapter *sc)
1624 t4_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl);
1625 t4_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
1626 t4_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
1627 t4_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish);