2 * Copyright (c) 2012 Chelsio Communications, Inc.
4 * Written by: Navdeep Parhar <np@FreeBSD.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
32 #include "opt_inet6.h"
35 #include <sys/param.h>
36 #include <sys/types.h>
37 #include <sys/kernel.h>
39 #include <sys/module.h>
40 #include <sys/protosw.h>
41 #include <sys/refcount.h>
42 #include <sys/domain.h>
43 #include <sys/fnv_hash.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <net/ethernet.h>
48 #include <net/if_types.h>
49 #include <net/if_vlan_var.h>
50 #include <net/route.h>
51 #include <netinet/in.h>
52 #include <netinet/in_pcb.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/scope6_var.h>
56 #include <netinet/tcp_timer.h>
57 #include <netinet/tcp_var.h>
59 #include <netinet/tcp_fsm.h>
60 #include <netinet/toecore.h>
62 #include "common/common.h"
63 #include "common/t4_msg.h"
64 #include "common/t4_regs.h"
65 #include "tom/t4_tom_l2t.h"
66 #include "tom/t4_tom.h"
69 static int alloc_stid(struct adapter *, struct listen_ctx *, int);
70 static struct listen_ctx *lookup_stid(struct adapter *, int);
71 static void free_stid(struct adapter *, struct listen_ctx *);
74 static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *,
76 static int free_lctx(struct adapter *, struct listen_ctx *);
77 static void hold_lctx(struct listen_ctx *);
78 static void listen_hash_add(struct adapter *, struct listen_ctx *);
79 static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *);
80 static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *);
81 static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *);
83 static inline void save_qids_in_mbuf(struct mbuf *, struct vi_info *);
84 static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *);
85 static void send_reset_synqe(struct toedev *, struct synq_entry *);
88 alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6)
90 struct tid_info *t = &sc->tids;
91 u_int stid, n, f, mask;
92 struct stid_region *sr = &lctx->stid_region;
95 * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in
96 * the TCAM. The start of the stid region is properly aligned (the chip
97 * requires each region to be 128-cell aligned).
101 KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0,
102 ("%s: stid region (%u, %u) not properly aligned. n = %u",
103 __func__, t->stid_base, t->nstids, n));
105 mtx_lock(&t->stid_lock);
106 if (n > t->nstids - t->stids_in_use) {
107 mtx_unlock(&t->stid_lock);
111 if (t->nstids_free_head >= n) {
113 * This allocation will definitely succeed because the region
114 * starts at a good alignment and we just checked we have enough
117 f = t->nstids_free_head & mask;
118 t->nstids_free_head -= n + f;
119 stid = t->nstids_free_head;
120 TAILQ_INSERT_HEAD(&t->stids, sr, link);
122 struct stid_region *s;
124 stid = t->nstids_free_head;
125 TAILQ_FOREACH(s, &t->stids, link) {
126 stid += s->used + s->free;
128 if (s->free >= n + f) {
131 TAILQ_INSERT_AFTER(&t->stids, s, sr, link);
136 if (__predict_false(stid != t->nstids)) {
137 panic("%s: stids TAILQ (%p) corrupt."
138 " At %d instead of %d at the end of the queue.",
139 __func__, &t->stids, stid, t->nstids);
142 mtx_unlock(&t->stid_lock);
149 t->stids_in_use += n;
150 t->stid_tab[stid] = lctx;
151 mtx_unlock(&t->stid_lock);
153 KASSERT(((stid + t->stid_base) & mask) == 0,
154 ("%s: EDOOFUS.", __func__));
155 return (stid + t->stid_base);
158 static struct listen_ctx *
159 lookup_stid(struct adapter *sc, int stid)
161 struct tid_info *t = &sc->tids;
163 return (t->stid_tab[stid - t->stid_base]);
167 free_stid(struct adapter *sc, struct listen_ctx *lctx)
169 struct tid_info *t = &sc->tids;
170 struct stid_region *sr = &lctx->stid_region;
171 struct stid_region *s;
173 KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used));
175 mtx_lock(&t->stid_lock);
176 s = TAILQ_PREV(sr, stid_head, link);
178 s->free += sr->used + sr->free;
180 t->nstids_free_head += sr->used + sr->free;
181 KASSERT(t->stids_in_use >= sr->used,
182 ("%s: stids_in_use (%u) < stids being freed (%u)", __func__,
183 t->stids_in_use, sr->used));
184 t->stids_in_use -= sr->used;
185 TAILQ_REMOVE(&t->stids, sr, link);
186 mtx_unlock(&t->stid_lock);
189 static struct listen_ctx *
190 alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi)
192 struct listen_ctx *lctx;
194 INP_WLOCK_ASSERT(inp);
196 lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO);
200 lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6);
201 if (lctx->stid < 0) {
206 if (inp->inp_vflag & INP_IPV6 &&
207 !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) {
208 struct tom_data *td = sc->tom_softc;
210 lctx->ce = hold_lip(td, &inp->in6p_laddr);
211 if (lctx->ce == NULL) {
217 lctx->ctrlq = &sc->sge.ctrlq[vi->pi->port_id];
218 lctx->ofld_rxq = &sc->sge.ofld_rxq[vi->first_ofld_rxq];
219 refcount_init(&lctx->refcount, 1);
220 TAILQ_INIT(&lctx->synq);
223 lctx->vnet = inp->inp_socket->so_vnet;
229 /* Don't call this directly, use release_lctx instead */
231 free_lctx(struct adapter *sc, struct listen_ctx *lctx)
233 struct inpcb *inp = lctx->inp;
234 struct tom_data *td = sc->tom_softc;
236 INP_WLOCK_ASSERT(inp);
237 KASSERT(lctx->refcount == 0,
238 ("%s: refcount %d", __func__, lctx->refcount));
239 KASSERT(TAILQ_EMPTY(&lctx->synq),
240 ("%s: synq not empty.", __func__));
241 KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
243 CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
244 __func__, lctx->stid, lctx, lctx->inp);
247 release_lip(td, lctx->ce);
251 return (in_pcbrele_wlocked(inp));
255 hold_lctx(struct listen_ctx *lctx)
258 refcount_acquire(&lctx->refcount);
261 static inline uint32_t
262 listen_hashfn(void *key, u_long mask)
265 return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
269 * Add a listen_ctx entry to the listen hash table.
272 listen_hash_add(struct adapter *sc, struct listen_ctx *lctx)
274 struct tom_data *td = sc->tom_softc;
275 int bucket = listen_hashfn(lctx->inp, td->listen_mask);
277 mtx_lock(&td->lctx_hash_lock);
278 LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
280 mtx_unlock(&td->lctx_hash_lock);
284 * Look for the listening socket's context entry in the hash and return it.
286 static struct listen_ctx *
287 listen_hash_find(struct adapter *sc, struct inpcb *inp)
289 struct tom_data *td = sc->tom_softc;
290 int bucket = listen_hashfn(inp, td->listen_mask);
291 struct listen_ctx *lctx;
293 mtx_lock(&td->lctx_hash_lock);
294 LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
295 if (lctx->inp == inp)
298 mtx_unlock(&td->lctx_hash_lock);
304 * Removes the listen_ctx structure for inp from the hash and returns it.
306 static struct listen_ctx *
307 listen_hash_del(struct adapter *sc, struct inpcb *inp)
309 struct tom_data *td = sc->tom_softc;
310 int bucket = listen_hashfn(inp, td->listen_mask);
311 struct listen_ctx *lctx, *l;
313 mtx_lock(&td->lctx_hash_lock);
314 LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
315 if (lctx->inp == inp) {
316 LIST_REMOVE(lctx, link);
321 mtx_unlock(&td->lctx_hash_lock);
327 * Releases a hold on the lctx. Must be called with the listening socket's inp
328 * locked. The inp may be freed by this function and it returns NULL to
331 static struct inpcb *
332 release_lctx(struct adapter *sc, struct listen_ctx *lctx)
334 struct inpcb *inp = lctx->inp;
337 INP_WLOCK_ASSERT(inp);
338 if (refcount_release(&lctx->refcount))
339 inp_freed = free_lctx(sc, lctx);
341 return (inp_freed ? NULL : inp);
345 send_reset_synqe(struct toedev *tod, struct synq_entry *synqe)
347 struct adapter *sc = tod->tod_softc;
348 struct mbuf *m = synqe->syn;
349 struct ifnet *ifp = m->m_pkthdr.rcvif;
350 struct vi_info *vi = ifp->if_softc;
351 struct port_info *pi = vi->pi;
352 struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx];
354 struct fw_flowc_wr *flowc;
355 struct cpl_abort_req *req;
356 int txqid, rxqid, flowclen;
357 struct sge_wrq *ofld_txq;
358 struct sge_ofld_rxq *ofld_rxq;
359 const int nparams = 6;
360 unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN;
362 INP_WLOCK_ASSERT(synqe->lctx->inp);
364 CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s",
365 __func__, synqe, synqe->flags, synqe->tid,
366 synqe->flags & TPF_ABORT_SHUTDOWN ?
367 " (abort already in progress)" : "");
368 if (synqe->flags & TPF_ABORT_SHUTDOWN)
369 return; /* abort already in progress */
370 synqe->flags |= TPF_ABORT_SHUTDOWN;
372 get_qids_from_mbuf(m, &txqid, &rxqid);
373 ofld_txq = &sc->sge.ofld_txq[txqid];
374 ofld_rxq = &sc->sge.ofld_rxq[rxqid];
376 /* The wrqe will have two WRs - a flowc followed by an abort_req */
377 flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
379 wr = alloc_wrqe(roundup2(flowclen, EQ_ESIZE) + sizeof(*req), ofld_txq);
382 panic("%s: allocation failure.", __func__);
385 req = (void *)((caddr_t)flowc + roundup2(flowclen, EQ_ESIZE));
387 /* First the flowc ... */
388 memset(flowc, 0, wr->wr_len);
389 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
390 V_FW_FLOWC_WR_NPARAMS(nparams));
391 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
392 V_FW_WR_FLOWID(synqe->tid));
393 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
394 flowc->mnemval[0].val = htobe32(pfvf);
395 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
396 flowc->mnemval[1].val = htobe32(pi->tx_chan);
397 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
398 flowc->mnemval[2].val = htobe32(pi->tx_chan);
399 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
400 flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id);
401 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
402 flowc->mnemval[4].val = htobe32(512);
403 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
404 flowc->mnemval[5].val = htobe32(512);
405 synqe->flags |= TPF_FLOWC_WR_SENT;
407 /* ... then ABORT request */
408 INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid);
409 req->rsvd0 = 0; /* don't have a snd_nxt */
410 req->rsvd1 = 1; /* no data sent yet */
411 req->cmd = CPL_ABORT_SEND_RST;
413 t4_l2t_send(sc, wr, e);
417 create_server(struct adapter *sc, struct listen_ctx *lctx)
420 struct cpl_pass_open_req *req;
421 struct inpcb *inp = lctx->inp;
423 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
425 log(LOG_ERR, "%s: allocation failure", __func__);
431 OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
432 req->local_port = inp->inp_lport;
434 req->local_ip = inp->inp_laddr.s_addr;
436 req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
437 req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
438 F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
445 create_server6(struct adapter *sc, struct listen_ctx *lctx)
448 struct cpl_pass_open_req6 *req;
449 struct inpcb *inp = lctx->inp;
451 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
453 log(LOG_ERR, "%s: allocation failure", __func__);
459 OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid));
460 req->local_port = inp->inp_lport;
462 req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
463 req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
466 req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
467 req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
468 F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
475 destroy_server(struct adapter *sc, struct listen_ctx *lctx)
478 struct cpl_close_listsvr_req *req;
480 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
483 panic("%s: allocation failure.", __func__);
488 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
490 req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id);
491 req->rsvd = htobe16(0);
498 * Start a listening server by sending a passive open request to HW.
500 * Can't take adapter lock here and access to sc->flags,
501 * sc->offload_map, if_capenable are all race prone.
504 t4_listen_start(struct toedev *tod, struct tcpcb *tp)
506 struct adapter *sc = tod->tod_softc;
508 struct port_info *pi;
509 struct inpcb *inp = tp->t_inpcb;
510 struct listen_ctx *lctx;
513 INP_WLOCK_ASSERT(inp);
515 /* Don't start a hardware listener for any loopback address. */
516 if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr))
518 if (!(inp->inp_vflag & INP_IPV6) &&
519 IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr)))
524 log(LOG_ERR, "%s: listen request ignored, %s is busy",
525 __func__, device_get_nameunit(sc->dev));
529 KASSERT(uld_active(sc, ULD_TOM),
530 ("%s: TOM not initialized", __func__));
534 * Find an initialized VI with IFCAP_TOE (4 or 6). We'll use the first
535 * such VI's queues to send the passive open and receive the reply to
538 * XXX: need a way to mark a port in use by offload. if_cxgbe should
539 * then reject any attempt to bring down such a port (and maybe reject
540 * attempts to disable IFCAP_TOE on that port too?).
542 for_each_port(sc, i) {
544 for_each_vi(pi, v, vi) {
545 if (vi->flags & VI_INIT_DONE &&
546 vi->ifp->if_capenable & IFCAP_TOE)
550 goto done; /* no port that's UP with IFCAP_TOE enabled */
553 if (listen_hash_find(sc, inp) != NULL)
554 goto done; /* already setup */
556 lctx = alloc_lctx(sc, inp, vi);
559 "%s: listen request ignored, %s couldn't allocate lctx\n",
560 __func__, device_get_nameunit(sc->dev));
563 listen_hash_add(sc, lctx);
565 CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x",
566 __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp,
569 if (inp->inp_vflag & INP_IPV6)
570 rc = create_server6(sc, lctx);
572 rc = create_server(sc, lctx);
574 log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n",
575 __func__, device_get_nameunit(sc->dev), rc);
576 (void) listen_hash_del(sc, inp);
577 inp = release_lctx(sc, lctx);
578 /* can't be freed, host stack has a reference */
579 KASSERT(inp != NULL, ("%s: inp freed", __func__));
582 lctx->flags |= LCTX_RPL_PENDING;
591 t4_listen_stop(struct toedev *tod, struct tcpcb *tp)
593 struct listen_ctx *lctx;
594 struct adapter *sc = tod->tod_softc;
595 struct inpcb *inp = tp->t_inpcb;
596 struct synq_entry *synqe;
598 INP_WLOCK_ASSERT(inp);
600 lctx = listen_hash_del(sc, inp);
602 return (ENOENT); /* no hardware listener for this inp */
604 CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
608 * If the reply to the PASS_OPEN is still pending we'll wait for it to
609 * arrive and clean up when it does.
611 if (lctx->flags & LCTX_RPL_PENDING) {
612 KASSERT(TAILQ_EMPTY(&lctx->synq),
613 ("%s: synq not empty.", __func__));
614 return (EINPROGRESS);
618 * The host stack will abort all the connections on the listening
619 * socket's so_comp. It doesn't know about the connections on the synq
620 * so we need to take care of those.
622 TAILQ_FOREACH(synqe, &lctx->synq, link) {
623 if (synqe->flags & TPF_SYNQE_HAS_L2TE)
624 send_reset_synqe(tod, synqe);
627 destroy_server(sc, lctx);
632 hold_synqe(struct synq_entry *synqe)
635 refcount_acquire(&synqe->refcnt);
639 release_synqe(struct synq_entry *synqe)
642 if (refcount_release(&synqe->refcnt)) {
643 int needfree = synqe->flags & TPF_SYNQE_NEEDFREE;
647 free(synqe, M_CXGBE);
652 t4_syncache_added(struct toedev *tod __unused, void *arg)
654 struct synq_entry *synqe = arg;
660 t4_syncache_removed(struct toedev *tod __unused, void *arg)
662 struct synq_entry *synqe = arg;
664 release_synqe(synqe);
668 extern void tcp_dooptions(struct tcpopt *, u_char *, int, int);
671 t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
673 struct adapter *sc = tod->tod_softc;
674 struct synq_entry *synqe = arg;
678 struct ip *ip = mtod(m, struct ip *);
681 wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr);
687 if (ip->ip_v == IPVERSION)
688 th = (void *)(ip + 1);
690 th = (void *)((struct ip6_hdr *)ip + 1);
691 bzero(&to, sizeof(to));
692 tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th),
695 /* save these for later */
696 synqe->iss = be32toh(th->th_seq);
697 synqe->ts = to.to_tsval;
699 if (chip_id(sc) >= CHELSIO_T5) {
700 struct cpl_t5_pass_accept_rpl *rpl5 = wrtod(wr);
702 rpl5->iss = th->th_seq;
705 e = &sc->l2t->l2tab[synqe->l2e_idx];
706 t4_l2t_send(sc, wr, e);
708 m_freem(m); /* don't need this any more */
713 do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
716 struct adapter *sc = iq->adapter;
717 const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1);
718 int stid = GET_TID(cpl);
719 unsigned int status = cpl->status;
720 struct listen_ctx *lctx = lookup_stid(sc, stid);
721 struct inpcb *inp = lctx->inp;
723 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
726 KASSERT(opcode == CPL_PASS_OPEN_RPL,
727 ("%s: unexpected opcode 0x%x", __func__, opcode));
728 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
729 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
733 CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x",
734 __func__, stid, status, lctx->flags);
736 lctx->flags &= ~LCTX_RPL_PENDING;
738 if (status != CPL_ERR_NONE)
739 log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status);
743 * If the inp has been dropped (listening socket closed) then
744 * listen_stop must have run and taken the inp out of the hash.
746 if (inp->inp_flags & INP_DROPPED) {
747 KASSERT(listen_hash_del(sc, inp) == NULL,
748 ("%s: inp %p still in listen hash", __func__, inp));
752 if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) {
753 if (release_lctx(sc, lctx) != NULL)
759 * Listening socket stopped listening earlier and now the chip tells us
760 * it has started the hardware listener. Stop it; the lctx will be
761 * released in do_close_server_rpl.
763 if (inp->inp_flags & INP_DROPPED) {
764 destroy_server(sc, lctx);
770 * Failed to start hardware listener. Take inp out of the hash and
771 * release our reference on it. An error message has been logged
774 if (status != CPL_ERR_NONE) {
775 listen_hash_del(sc, inp);
776 if (release_lctx(sc, lctx) != NULL)
781 /* hardware listener open for business */
788 do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss,
791 struct adapter *sc = iq->adapter;
792 const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1);
793 int stid = GET_TID(cpl);
794 unsigned int status = cpl->status;
795 struct listen_ctx *lctx = lookup_stid(sc, stid);
796 struct inpcb *inp = lctx->inp;
798 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
801 KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL,
802 ("%s: unexpected opcode 0x%x", __func__, opcode));
803 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
804 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
806 CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status);
808 if (status != CPL_ERR_NONE) {
809 log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n",
810 __func__, status, stid);
815 inp = release_lctx(sc, lctx);
823 done_with_synqe(struct adapter *sc, struct synq_entry *synqe)
825 struct listen_ctx *lctx = synqe->lctx;
826 struct inpcb *inp = lctx->inp;
827 struct vi_info *vi = synqe->syn->m_pkthdr.rcvif->if_softc;
828 struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx];
831 INP_WLOCK_ASSERT(inp);
832 ntids = inp->inp_vflag & INP_IPV6 ? 2 : 1;
834 TAILQ_REMOVE(&lctx->synq, synqe, link);
835 inp = release_lctx(sc, lctx);
838 remove_tid(sc, synqe->tid, ntids);
839 release_tid(sc, synqe->tid, &sc->sge.ctrlq[vi->pi->port_id]);
841 release_synqe(synqe); /* removed from synq list */
845 do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
848 struct adapter *sc = iq->adapter;
849 const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
850 unsigned int tid = GET_TID(cpl);
851 struct synq_entry *synqe = lookup_tid(sc, tid);
852 struct listen_ctx *lctx = synqe->lctx;
853 struct inpcb *inp = lctx->inp;
855 struct sge_wrq *ofld_txq;
857 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
860 KASSERT(opcode == CPL_ABORT_REQ_RSS,
861 ("%s: unexpected opcode 0x%x", __func__, opcode));
862 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
863 KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
865 CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
866 __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
868 if (negative_advice(cpl->status))
869 return (0); /* Ignore negative advice */
873 get_qids_from_mbuf(synqe->syn, &txqid, NULL);
874 ofld_txq = &sc->sge.ofld_txq[txqid];
877 * If we'd initiated an abort earlier the reply to it is responsible for
878 * cleaning up resources. Otherwise we tear everything down right here
879 * right now. We owe the T4 a CPL_ABORT_RPL no matter what.
881 if (synqe->flags & TPF_ABORT_SHUTDOWN) {
886 done_with_synqe(sc, synqe);
887 /* inp lock released by done_with_synqe */
889 send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
894 do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss,
897 struct adapter *sc = iq->adapter;
898 const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
899 unsigned int tid = GET_TID(cpl);
900 struct synq_entry *synqe = lookup_tid(sc, tid);
901 struct listen_ctx *lctx = synqe->lctx;
902 struct inpcb *inp = lctx->inp;
904 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
907 KASSERT(opcode == CPL_ABORT_RPL_RSS,
908 ("%s: unexpected opcode 0x%x", __func__, opcode));
909 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
910 KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
912 CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
913 __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
916 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
917 ("%s: wasn't expecting abort reply for synqe %p (0x%x)",
918 __func__, synqe, synqe->flags));
920 done_with_synqe(sc, synqe);
921 /* inp lock released by done_with_synqe */
927 t4_offload_socket(struct toedev *tod, void *arg, struct socket *so)
929 struct adapter *sc = tod->tod_softc;
930 struct synq_entry *synqe = arg;
932 struct inpcb *inp = sotoinpcb(so);
934 struct cpl_pass_establish *cpl = mtod(synqe->syn, void *);
935 struct toepcb *toep = *(struct toepcb **)(cpl + 1);
937 INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
938 INP_WLOCK_ASSERT(inp);
939 KASSERT(synqe->flags & TPF_SYNQE,
940 ("%s: %p not a synq_entry?", __func__, arg));
942 offload_socket(so, toep);
943 make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
944 toep->flags |= TPF_CPL_PENDING;
945 update_tid(sc, synqe->tid, toep);
946 synqe->flags |= TPF_SYNQE_EXPANDED;
950 save_qids_in_mbuf(struct mbuf *m, struct vi_info *vi)
952 uint32_t txqid, rxqid;
954 txqid = (arc4random() % vi->nofldtxq) + vi->first_ofld_txq;
955 rxqid = (arc4random() % vi->nofldrxq) + vi->first_ofld_rxq;
957 m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff);
961 get_qids_from_mbuf(struct mbuf *m, int *txqid, int *rxqid)
965 *txqid = m->m_pkthdr.flowid >> 16;
967 *rxqid = m->m_pkthdr.flowid & 0xffff;
971 * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to
972 * store some state temporarily.
974 static struct synq_entry *
975 mbuf_to_synqe(struct mbuf *m)
977 int len = roundup2(sizeof (struct synq_entry), 8);
978 int tspace = M_TRAILINGSPACE(m);
979 struct synq_entry *synqe = NULL;
982 synqe = malloc(sizeof(*synqe), M_CXGBE, M_NOWAIT);
985 synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE;
987 synqe = (void *)(m->m_data + m->m_len + tspace - len);
988 synqe->flags = TPF_SYNQE;
995 t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
997 bzero(to, sizeof(*to));
1000 to->to_flags |= TOF_MSS;
1001 to->to_mss = be16toh(t4opt->mss);
1005 to->to_flags |= TOF_SCALE;
1006 to->to_wscale = t4opt->wsf;
1010 to->to_flags |= TOF_TS;
1013 to->to_flags |= TOF_SACKPERM;
1017 * Options2 for passive open.
1020 calc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid,
1021 const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode)
1023 struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid];
1026 opt2 = V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]) |
1027 F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id);
1029 if (V_tcp_do_rfc1323) {
1031 opt2 |= F_TSTAMPS_EN;
1034 if (tcpopt->wsf <= 14)
1035 opt2 |= F_WND_SCALE_EN;
1038 if (V_tcp_do_ecn && th->th_flags & (TH_ECE | TH_CWR))
1039 opt2 |= F_CCTRL_ECN;
1041 /* RX_COALESCE is always a valid value (0 or M_RX_COALESCE). */
1043 opt2 |= F_RX_COALESCE_VALID;
1045 opt2 |= F_T5_OPT_2_VALID;
1046 opt2 |= F_CONG_CNTRL_VALID; /* OPT_2_ISS really, for T5 */
1048 if (sc->tt.rx_coalesce)
1049 opt2 |= V_RX_COALESCE(M_RX_COALESCE);
1051 #ifdef USE_DDP_RX_FLOW_CONTROL
1052 if (ulp_mode == ULP_MODE_TCPDDP)
1053 opt2 |= F_RX_FC_VALID | F_RX_FC_DDP;
1056 return htobe32(opt2);
1060 pass_accept_req_to_protohdrs(struct adapter *sc, const struct mbuf *m,
1061 struct in_conninfo *inc, struct tcphdr *th)
1063 const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1064 const struct ether_header *eh;
1065 unsigned int hlen = be32toh(cpl->hdr_len);
1067 const struct tcphdr *tcp;
1069 eh = (const void *)(cpl + 1);
1070 if (chip_id(sc) >= CHELSIO_T6) {
1071 l3hdr = ((uintptr_t)eh + G_T6_ETH_HDR_LEN(hlen));
1072 tcp = (const void *)(l3hdr + G_T6_IP_HDR_LEN(hlen));
1074 l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
1075 tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
1079 bzero(inc, sizeof(*inc));
1080 inc->inc_fport = tcp->th_sport;
1081 inc->inc_lport = tcp->th_dport;
1082 if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
1083 const struct ip *ip = (const void *)l3hdr;
1085 inc->inc_faddr = ip->ip_src;
1086 inc->inc_laddr = ip->ip_dst;
1088 const struct ip6_hdr *ip6 = (const void *)l3hdr;
1090 inc->inc_flags |= INC_ISIPV6;
1091 inc->inc6_faddr = ip6->ip6_src;
1092 inc->inc6_laddr = ip6->ip6_dst;
1097 bcopy(tcp, th, sizeof(*th));
1098 tcp_fields_to_host(th); /* just like tcp_input */
1103 ifnet_has_ip6(struct ifnet *ifp, struct in6_addr *ip6)
1106 struct sockaddr_in6 *sin6;
1108 struct in6_addr in6 = *ip6;
1110 /* Just as in ip6_input */
1111 if (in6_clearscope(&in6) || in6_clearscope(&in6))
1113 in6_setscope(&in6, ifp, NULL);
1116 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1117 sin6 = (void *)ifa->ifa_addr;
1118 if (sin6->sin6_family != AF_INET6)
1121 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &in6)) {
1126 if_addr_runlock(ifp);
1131 static struct l2t_entry *
1132 get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp,
1133 struct in_conninfo *inc)
1136 struct l2t_entry *e;
1137 struct sockaddr_in6 sin6;
1138 struct sockaddr *dst = (void *)&sin6;
1140 if (inc->inc_flags & INC_ISIPV6) {
1141 dst->sa_len = sizeof(struct sockaddr_in6);
1142 dst->sa_family = AF_INET6;
1143 ((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr;
1145 if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) {
1146 /* no need for route lookup */
1147 e = t4_l2t_get(pi, ifp, dst);
1151 dst->sa_len = sizeof(struct sockaddr_in);
1152 dst->sa_family = AF_INET;
1153 ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
1156 rt = rtalloc1(dst, 0, 0);
1160 struct sockaddr *nexthop;
1163 if (rt->rt_ifp != ifp)
1166 if (rt->rt_flags & RTF_GATEWAY)
1167 nexthop = rt->rt_gateway;
1170 e = t4_l2t_get(pi, ifp, nexthop);
1179 ifnet_has_ip(struct ifnet *ifp, struct in_addr in)
1182 struct sockaddr_in *sin;
1186 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1187 sin = (void *)ifa->ifa_addr;
1188 if (sin->sin_family != AF_INET)
1191 if (sin->sin_addr.s_addr == in.s_addr) {
1196 if_addr_runlock(ifp);
1201 #define REJECT_PASS_ACCEPT() do { \
1202 reject_reason = __LINE__; \
1207 * The context associated with a tid entry via insert_tid could be a synq_entry
1208 * or a toepcb. The only way CPL handlers can tell is via a bit in these flags.
1210 CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags));
1213 * Incoming SYN on a listening socket.
1215 * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe,
1219 do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
1222 struct adapter *sc = iq->adapter;
1224 const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1225 struct cpl_pass_accept_rpl *rpl;
1227 unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1228 unsigned int tid = GET_TID(cpl);
1229 struct listen_ctx *lctx = lookup_stid(sc, stid);
1232 struct in_conninfo inc;
1235 struct port_info *pi;
1237 struct ifnet *hw_ifp, *ifp;
1238 struct l2t_entry *e = NULL;
1239 int rscale, mtu_idx, rx_credits, rxqid, ulp_mode;
1240 struct synq_entry *synqe = NULL;
1241 int reject_reason, v, ntids;
1244 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1247 KASSERT(opcode == CPL_PASS_ACCEPT_REQ,
1248 ("%s: unexpected opcode 0x%x", __func__, opcode));
1249 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1251 CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
1254 pass_accept_req_to_protohdrs(sc, m, &inc, &th);
1255 t4opt_to_tcpopt(&cpl->tcpopt, &to);
1257 pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))];
1259 CURVNET_SET(lctx->vnet);
1262 * Use the MAC index to lookup the associated VI. If this SYN
1263 * didn't match a perfect MAC filter, punt.
1265 if (!(be16toh(cpl->l2info) & F_SYN_XACT_MATCH)) {
1268 REJECT_PASS_ACCEPT();
1270 for_each_vi(pi, v, vi) {
1271 if (vi->xact_addr_filt == G_SYN_MAC_IDX(be16toh(cpl->l2info)))
1276 REJECT_PASS_ACCEPT();
1279 hw_ifp = vi->ifp; /* the (v)cxgbeX ifnet */
1280 m->m_pkthdr.rcvif = hw_ifp;
1281 tod = TOEDEV(hw_ifp);
1284 * Figure out if there is a pseudo interface (vlan, lagg, etc.)
1285 * involved. Don't offload if the SYN had a VLAN tag and the vid
1286 * doesn't match anything on this interface.
1288 * XXX: lagg support, lagg + vlan support.
1290 vid = EVL_VLANOFTAG(be16toh(cpl->vlan));
1292 ifp = VLAN_DEVAT(hw_ifp, vid);
1294 REJECT_PASS_ACCEPT();
1299 * Don't offload if the peer requested a TCP option that's not known to
1302 if (cpl->tcpopt.unknown)
1303 REJECT_PASS_ACCEPT();
1305 if (inc.inc_flags & INC_ISIPV6) {
1307 /* Don't offload if the ifcap isn't enabled */
1308 if ((ifp->if_capenable & IFCAP_TOE6) == 0)
1309 REJECT_PASS_ACCEPT();
1312 * SYN must be directed to an IP6 address on this ifnet. This
1313 * is more restrictive than in6_localip.
1315 if (!ifnet_has_ip6(ifp, &inc.inc6_laddr))
1316 REJECT_PASS_ACCEPT();
1321 /* Don't offload if the ifcap isn't enabled */
1322 if ((ifp->if_capenable & IFCAP_TOE4) == 0)
1323 REJECT_PASS_ACCEPT();
1326 * SYN must be directed to an IP address on this ifnet. This
1327 * is more restrictive than in_localip.
1329 if (!ifnet_has_ip(ifp, inc.inc_laddr))
1330 REJECT_PASS_ACCEPT();
1336 * Don't offload if the ifnet that the SYN came in on is not in the same
1337 * vnet as the listening socket.
1339 if (lctx->vnet != ifp->if_vnet)
1340 REJECT_PASS_ACCEPT();
1342 e = get_l2te_for_nexthop(pi, ifp, &inc);
1344 REJECT_PASS_ACCEPT();
1346 synqe = mbuf_to_synqe(m);
1348 REJECT_PASS_ACCEPT();
1350 wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) :
1351 sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[pi->port_id]);
1353 REJECT_PASS_ACCEPT();
1356 INP_INFO_RLOCK(&V_tcbinfo); /* for 4-tuple check */
1358 /* Don't offload if the 4-tuple is already in use */
1359 if (toe_4tuple_check(&inc, &th, ifp) != 0) {
1360 INP_INFO_RUNLOCK(&V_tcbinfo);
1362 REJECT_PASS_ACCEPT();
1364 INP_INFO_RUNLOCK(&V_tcbinfo);
1366 inp = lctx->inp; /* listening socket, not owned by TOE */
1369 /* Don't offload if the listening socket has closed */
1370 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1372 * The listening socket has closed. The reply from the TOE to
1373 * our CPL_CLOSE_LISTSRV_REQ will ultimately release all
1374 * resources tied to this listen context.
1378 REJECT_PASS_ACCEPT();
1380 so = inp->inp_socket;
1382 mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss));
1383 rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0;
1384 SOCKBUF_LOCK(&so->so_rcv);
1385 /* opt0 rcv_bufsiz initially, assumes its normal meaning later */
1386 rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
1387 SOCKBUF_UNLOCK(&so->so_rcv);
1389 save_qids_in_mbuf(m, vi);
1390 get_qids_from_mbuf(m, NULL, &rxqid);
1393 INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
1395 struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl;
1397 INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
1399 if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) {
1400 ulp_mode = ULP_MODE_TCPDDP;
1401 synqe->flags |= TPF_SYNQE_TCPDDP;
1403 ulp_mode = ULP_MODE_NONE;
1404 rpl->opt0 = calc_opt0(so, vi, e, mtu_idx, rscale, rx_credits, ulp_mode);
1405 rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode);
1411 refcount_init(&synqe->refcnt, 1); /* 1 means extra hold */
1412 synqe->l2e_idx = e->idx;
1413 synqe->rcv_bufsize = rx_credits;
1414 atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr);
1416 insert_tid(sc, tid, synqe, ntids);
1417 TAILQ_INSERT_TAIL(&lctx->synq, synqe, link);
1418 hold_synqe(synqe); /* hold for the duration it's in the synq */
1419 hold_lctx(lctx); /* A synqe on the list has a ref on its lctx */
1422 * If all goes well t4_syncache_respond will get called during
1423 * syncache_add. Note that syncache_add releases the pcb lock.
1425 toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
1426 INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */
1429 * If we replied during syncache_add (synqe->wr has been consumed),
1430 * good. Otherwise, set it to 0 so that further syncache_respond
1431 * attempts by the kernel will be ignored.
1433 if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) {
1436 * syncache may or may not have a hold on the synqe, which may
1437 * or may not be stashed in the original SYN mbuf passed to us.
1438 * Just copy it over instead of dealing with all possibilities.
1440 m = m_dup(synqe->syn, M_NOWAIT);
1442 m->m_pkthdr.rcvif = hw_ifp;
1444 remove_tid(sc, synqe->tid, ntids);
1447 /* Yank the synqe out of the lctx synq. */
1449 TAILQ_REMOVE(&lctx->synq, synqe, link);
1450 release_synqe(synqe); /* removed from synq list */
1451 inp = release_lctx(sc, lctx);
1455 release_synqe(synqe); /* extra hold */
1456 REJECT_PASS_ACCEPT();
1459 CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK",
1460 __func__, stid, tid, lctx, synqe);
1463 synqe->flags |= TPF_SYNQE_HAS_L2TE;
1464 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1466 * Listening socket closed but tod_listen_stop did not abort
1467 * this tid because there was no L2T entry for the tid at that
1468 * time. Abort it now. The reply to the abort will clean up.
1471 "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT",
1472 __func__, stid, tid, lctx, synqe, synqe->flags);
1473 if (!(synqe->flags & TPF_SYNQE_EXPANDED))
1474 send_reset_synqe(tod, synqe);
1478 release_synqe(synqe); /* extra hold */
1484 release_synqe(synqe); /* extra hold */
1488 CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
1493 release_tid(sc, tid, lctx->ctrlq);
1495 if (__predict_true(m != NULL)) {
1496 m_adj(m, sizeof(*cpl));
1497 m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
1498 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1499 m->m_pkthdr.csum_data = 0xffff;
1500 hw_ifp->if_input(hw_ifp, m);
1503 return (reject_reason);
1507 synqe_to_protohdrs(struct adapter *sc, struct synq_entry *synqe,
1508 const struct cpl_pass_establish *cpl, struct in_conninfo *inc,
1509 struct tcphdr *th, struct tcpopt *to)
1511 uint16_t tcp_opt = be16toh(cpl->tcp_opt);
1513 /* start off with the original SYN */
1514 pass_accept_req_to_protohdrs(sc, synqe->syn, inc, th);
1516 /* modify parts to make it look like the ACK to our SYN|ACK */
1517 th->th_flags = TH_ACK;
1518 th->th_ack = synqe->iss + 1;
1519 th->th_seq = be32toh(cpl->rcv_isn);
1520 bzero(to, sizeof(*to));
1521 if (G_TCPOPT_TSTAMP(tcp_opt)) {
1522 to->to_flags |= TOF_TS;
1523 to->to_tsecr = synqe->ts;
1528 do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
1531 struct adapter *sc = iq->adapter;
1534 const struct cpl_pass_establish *cpl = (const void *)(rss + 1);
1535 #if defined(KTR) || defined(INVARIANTS)
1536 unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1538 unsigned int tid = GET_TID(cpl);
1539 struct synq_entry *synqe = lookup_tid(sc, tid);
1540 struct listen_ctx *lctx = synqe->lctx;
1541 struct inpcb *inp = lctx->inp, *new_inp;
1545 struct in_conninfo inc;
1546 struct toepcb *toep;
1549 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1552 KASSERT(opcode == CPL_PASS_ESTABLISH,
1553 ("%s: unexpected opcode 0x%x", __func__, opcode));
1554 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1555 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1556 KASSERT(synqe->flags & TPF_SYNQE,
1557 ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
1559 CURVNET_SET(lctx->vnet);
1560 INP_INFO_RLOCK(&V_tcbinfo); /* for syncache_expand */
1564 "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x",
1565 __func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
1567 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1569 if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
1570 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
1571 ("%s: listen socket closed but tid %u not aborted.",
1576 INP_INFO_RUNLOCK(&V_tcbinfo);
1581 ifp = synqe->syn->m_pkthdr.rcvif;
1583 KASSERT(vi->pi->adapter == sc,
1584 ("%s: vi %p, sc %p mismatch", __func__, vi, sc));
1586 get_qids_from_mbuf(synqe->syn, &txqid, &rxqid);
1587 KASSERT(rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0],
1588 ("%s: CPL arrived on unexpected rxq. %d %d", __func__, rxqid,
1589 (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0])));
1591 toep = alloc_toepcb(vi, txqid, rxqid, M_NOWAIT);
1595 * The reply to this abort will perform final cleanup. There is
1596 * no need to check for HAS_L2TE here. We can be here only if
1597 * we responded to the PASS_ACCEPT_REQ, and our response had the
1600 send_reset_synqe(TOEDEV(ifp), synqe);
1602 INP_INFO_RUNLOCK(&V_tcbinfo);
1607 toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx];
1608 if (synqe->flags & TPF_SYNQE_TCPDDP)
1609 set_tcpddp_ulp_mode(toep);
1611 toep->ulp_mode = ULP_MODE_NONE;
1612 /* opt0 rcv_bufsiz initially, assumes its normal meaning later */
1613 toep->rx_credits = synqe->rcv_bufsize;
1615 so = inp->inp_socket;
1616 KASSERT(so != NULL, ("%s: socket is NULL", __func__));
1618 /* Come up with something that syncache_expand should be ok with. */
1619 synqe_to_protohdrs(sc, synqe, cpl, &inc, &th, &to);
1622 * No more need for anything in the mbuf that carried the
1623 * CPL_PASS_ACCEPT_REQ. Drop the CPL_PASS_ESTABLISH and toep pointer
1624 * there. XXX: bad form but I don't want to increase the size of synqe.
1627 KASSERT(sizeof(*cpl) + sizeof(toep) <= m->m_len,
1628 ("%s: no room in mbuf %p (m_len %d)", __func__, m, m->m_len));
1629 bcopy(cpl, mtod(m, void *), sizeof(*cpl));
1630 *(struct toepcb **)(mtod(m, struct cpl_pass_establish *) + 1) = toep;
1632 if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) {
1637 /* New connection inpcb is already locked by syncache_expand(). */
1638 new_inp = sotoinpcb(so);
1639 INP_WLOCK_ASSERT(new_inp);
1640 MPASS(so->so_vnet == lctx->vnet);
1641 toep->vnet = lctx->vnet;
1644 * This is for the unlikely case where the syncache entry that we added
1645 * has been evicted from the syncache, but the syncache_expand above
1646 * works because of syncookies.
1648 * XXX: we've held the tcbinfo lock throughout so there's no risk of
1649 * anyone accept'ing a connection before we've installed our hooks, but
1650 * this somewhat defeats the purpose of having a tod_offload_socket :-(
1652 if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
1653 tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
1654 t4_offload_socket(TOEDEV(ifp), synqe, so);
1657 INP_WUNLOCK(new_inp);
1659 /* Done with the synqe */
1660 TAILQ_REMOVE(&lctx->synq, synqe, link);
1661 inp = release_lctx(sc, lctx);
1664 INP_INFO_RUNLOCK(&V_tcbinfo);
1666 release_synqe(synqe);
1672 t4_init_listen_cpl_handlers(void)
1675 t4_register_cpl_handler(CPL_PASS_OPEN_RPL, do_pass_open_rpl);
1676 t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
1677 t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
1678 t4_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
1682 t4_uninit_listen_cpl_handlers(void)
1685 t4_register_cpl_handler(CPL_PASS_OPEN_RPL, NULL);
1686 t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, NULL);
1687 t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, NULL);
1688 t4_register_cpl_handler(CPL_PASS_ESTABLISH, NULL);