2 * Copyright (c) 2012 Chelsio Communications, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/refcount.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/sysctl.h>
39 #include <net/route.h>
40 #include <netinet/in.h>
41 #include <netinet/ip.h>
42 #include <netinet/in_pcb.h>
43 #include <netinet/in_var.h>
44 #include <netinet/tcp_timer.h>
45 #include <netinet/tcp_var.h>
47 #include <netinet/tcp_fsm.h>
48 #include <netinet/toecore.h>
50 #include "cxgb_include.h"
51 #include "ulp/tom/cxgb_tom.h"
52 #include "ulp/tom/cxgb_l2t.h"
53 #include "ulp/tom/cxgb_toepcb.h"
55 static void t3_send_reset_synqe(struct toedev *, struct synq_entry *);
58 alloc_stid(struct tid_info *t, void *ctx)
62 mtx_lock(&t->stid_lock);
64 union listen_entry *p = t->sfree;
66 stid = (p - t->stid_tab) + t->stid_base;
71 mtx_unlock(&t->stid_lock);
76 free_stid(struct tid_info *t, int stid)
78 union listen_entry *p = stid2entry(t, stid);
80 mtx_lock(&t->stid_lock);
84 mtx_unlock(&t->stid_lock);
87 static struct listen_ctx *
88 alloc_lctx(struct tom_data *td, struct inpcb *inp, int qset)
90 struct listen_ctx *lctx;
92 INP_WLOCK_ASSERT(inp);
94 lctx = malloc(sizeof(struct listen_ctx), M_CXGB, M_NOWAIT | M_ZERO);
98 lctx->stid = alloc_stid(&td->tid_maps, lctx);
108 refcount_init(&lctx->refcnt, 1);
109 TAILQ_INIT(&lctx->synq);
114 /* Don't call this directly, use release_lctx instead */
116 free_lctx(struct tom_data *td, struct listen_ctx *lctx)
118 struct inpcb *inp = lctx->inp;
120 INP_WLOCK_ASSERT(inp);
121 KASSERT(lctx->refcnt == 0,
122 ("%s: refcnt %d", __func__, lctx->refcnt));
123 KASSERT(TAILQ_EMPTY(&lctx->synq),
124 ("%s: synq not empty.", __func__));
125 KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
127 CTR4(KTR_CXGB, "%s: stid %u, lctx %p, inp %p",
128 __func__, lctx->stid, lctx, lctx->inp);
130 free_stid(&td->tid_maps, lctx->stid);
133 return in_pcbrele_wlocked(inp);
137 hold_lctx(struct listen_ctx *lctx)
140 refcount_acquire(&lctx->refcnt);
143 static inline uint32_t
144 listen_hashfn(void *key, u_long mask)
147 return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
151 * Add a listen_ctx entry to the listen hash table.
154 listen_hash_add(struct tom_data *td, struct listen_ctx *lctx)
156 int bucket = listen_hashfn(lctx->inp, td->listen_mask);
158 mtx_lock(&td->lctx_hash_lock);
159 LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
161 mtx_unlock(&td->lctx_hash_lock);
165 * Look for the listening socket's context entry in the hash and return it.
167 static struct listen_ctx *
168 listen_hash_find(struct tom_data *td, struct inpcb *inp)
170 int bucket = listen_hashfn(inp, td->listen_mask);
171 struct listen_ctx *lctx;
173 mtx_lock(&td->lctx_hash_lock);
174 LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
175 if (lctx->inp == inp)
178 mtx_unlock(&td->lctx_hash_lock);
184 * Removes the listen_ctx structure for inp from the hash and returns it.
186 static struct listen_ctx *
187 listen_hash_del(struct tom_data *td, struct inpcb *inp)
189 int bucket = listen_hashfn(inp, td->listen_mask);
190 struct listen_ctx *lctx, *l;
192 mtx_lock(&td->lctx_hash_lock);
193 LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
194 if (lctx->inp == inp) {
195 LIST_REMOVE(lctx, link);
200 mtx_unlock(&td->lctx_hash_lock);
206 * Releases a hold on the lctx. Must be called with the listening socket's inp
207 * locked. The inp may be freed by this function and it returns NULL to
210 static struct inpcb *
211 release_lctx(struct tom_data *td, struct listen_ctx *lctx)
213 struct inpcb *inp = lctx->inp;
216 INP_WLOCK_ASSERT(inp);
217 if (refcount_release(&lctx->refcnt))
218 inp_freed = free_lctx(td, lctx);
220 return (inp_freed ? NULL : inp);
224 create_server(struct adapter *sc, struct listen_ctx *lctx)
227 struct cpl_pass_open_req *req;
228 struct inpcb *inp = lctx->inp;
230 m = M_GETHDR_OFLD(lctx->qset, CPL_PRIORITY_CONTROL, req);
234 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
235 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
236 req->local_port = inp->inp_lport;
237 memcpy(&req->local_ip, &inp->inp_laddr, 4);
240 req->peer_netmask = 0;
241 req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
242 req->opt0l = htonl(V_RCV_BUFSIZ(16));
243 req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
245 t3_offload_tx(sc, m);
251 destroy_server(struct adapter *sc, struct listen_ctx *lctx)
254 struct cpl_close_listserv_req *req;
256 m = M_GETHDR_OFLD(lctx->qset, CPL_PRIORITY_CONTROL, req);
260 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
261 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
265 t3_offload_tx(sc, m);
271 * Process a CPL_CLOSE_LISTSRV_RPL message. If the status is good we release
275 do_close_server_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
277 struct adapter *sc = qs->adap;
278 struct tom_data *td = sc->tom_softc;
279 struct cpl_close_listserv_rpl *rpl = mtod(m, void *);
280 unsigned int stid = GET_TID(rpl);
281 struct listen_ctx *lctx = lookup_stid(&td->tid_maps, stid);
282 struct inpcb *inp = lctx->inp;
284 CTR3(KTR_CXGB, "%s: stid %u, status %u", __func__, stid, rpl->status);
286 if (rpl->status != CPL_ERR_NONE) {
287 log(LOG_ERR, "%s: failed (%u) to close listener for stid %u",
288 __func__, rpl->status, stid);
291 KASSERT(listen_hash_del(td, lctx->inp) == NULL,
292 ("%s: inp %p still in listen hash", __func__, inp));
293 if (release_lctx(td, lctx) != NULL)
302 * Process a CPL_PASS_OPEN_RPL message. Remove the lctx from the listen hash
303 * table and free it if there was any error, otherwise nothing to do.
306 do_pass_open_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
308 struct adapter *sc = qs->adap;
309 struct tom_data *td = sc->tom_softc;
310 struct cpl_pass_open_rpl *rpl = mtod(m, void *);
311 int stid = GET_TID(rpl);
312 struct listen_ctx *lctx;
316 * We get these replies also when setting up HW filters. Just throw
319 if (stid >= td->tid_maps.stid_base + td->tid_maps.nstids)
322 lctx = lookup_stid(&td->tid_maps, stid);
327 CTR4(KTR_CXGB, "%s: stid %u, status %u, flags 0x%x",
328 __func__, stid, rpl->status, lctx->flags);
330 lctx->flags &= ~LCTX_RPL_PENDING;
332 if (rpl->status != CPL_ERR_NONE) {
333 log(LOG_ERR, "%s: %s: hw listen (stid %d) failed: %d\n",
334 __func__, device_get_nameunit(sc->dev), stid, rpl->status);
339 * If the inp has been dropped (listening socket closed) then
340 * listen_stop must have run and taken the inp out of the hash.
342 if (inp->inp_flags & INP_DROPPED) {
343 KASSERT(listen_hash_del(td, inp) == NULL,
344 ("%s: inp %p still in listen hash", __func__, inp));
348 if (inp->inp_flags & INP_DROPPED && rpl->status != CPL_ERR_NONE) {
349 if (release_lctx(td, lctx) != NULL)
355 * Listening socket stopped listening earlier and now the chip tells us
356 * it has started the hardware listener. Stop it; the lctx will be
357 * released in do_close_server_rpl.
359 if (inp->inp_flags & INP_DROPPED) {
360 destroy_server(sc, lctx);
366 * Failed to start hardware listener. Take inp out of the hash and
367 * release our reference on it. An error message has been logged
370 if (rpl->status != CPL_ERR_NONE) {
371 listen_hash_del(td, inp);
372 if (release_lctx(td, lctx) != NULL)
377 /* hardware listener open for business */
386 pass_accept_req_to_protohdrs(const struct cpl_pass_accept_req *cpl,
387 struct in_conninfo *inc, struct tcphdr *th, struct tcpopt *to)
389 const struct tcp_options *t3opt = &cpl->tcp_options;
391 bzero(inc, sizeof(*inc));
392 inc->inc_faddr.s_addr = cpl->peer_ip;
393 inc->inc_laddr.s_addr = cpl->local_ip;
394 inc->inc_fport = cpl->peer_port;
395 inc->inc_lport = cpl->local_port;
397 bzero(th, sizeof(*th));
398 th->th_sport = cpl->peer_port;
399 th->th_dport = cpl->local_port;
400 th->th_seq = be32toh(cpl->rcv_isn); /* as in tcp_fields_to_host */
401 th->th_flags = TH_SYN;
403 bzero(to, sizeof(*to));
405 to->to_flags |= TOF_MSS;
406 to->to_mss = be16toh(t3opt->mss);
409 to->to_flags |= TOF_SCALE;
410 to->to_wscale = t3opt->wsf;
413 to->to_flags |= TOF_TS;
415 to->to_flags |= TOF_SACKPERM;
419 hold_synqe(struct synq_entry *synqe)
422 refcount_acquire(&synqe->refcnt);
426 release_synqe(struct synq_entry *synqe)
429 if (refcount_release(&synqe->refcnt))
434 * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to
435 * store some state temporarily. There will be enough room in the mbuf's
436 * trailing space as the CPL is not that large.
440 static struct synq_entry *
441 mbuf_to_synq_entry(struct mbuf *m)
443 int len = roundup(sizeof (struct synq_entry), 8);
447 if (__predict_false(M_TRAILINGSPACE(m) < len)) {
448 panic("%s: no room for synq_entry (%td, %d)\n", __func__,
449 M_TRAILINGSPACE(m), len);
452 if (m->m_flags & M_EXT) {
453 buf = m->m_ext.ext_buf;
454 buflen = m->m_ext.ext_size;
455 } else if (m->m_flags & M_PKTHDR) {
456 buf = &m->m_pktdat[0];
463 return ((void *)(buf + buflen - len));
467 #define REJECT_PASS_ACCEPT() do { \
468 reject_reason = __LINE__; \
472 #define REJECT_PASS_ACCEPT() do { goto reject; } while (0)
476 * The context associated with a tid entry via insert_tid could be a synq_entry
477 * or a toepcb. The only way CPL handlers can tell is via a bit in these flags.
479 CTASSERT(offsetof(struct toepcb, tp_flags) == offsetof(struct synq_entry, flags));
482 * Handle a CPL_PASS_ACCEPT_REQ message.
485 do_pass_accept_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
487 struct adapter *sc = qs->adap;
488 struct tom_data *td = sc->tom_softc;
489 struct toedev *tod = &td->tod;
490 const struct cpl_pass_accept_req *req = mtod(m, void *);
491 unsigned int stid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
492 unsigned int tid = GET_TID(req);
493 struct listen_ctx *lctx = lookup_stid(&td->tid_maps, stid);
494 struct l2t_entry *e = NULL;
495 struct sockaddr_in nam;
499 struct port_info *pi;
501 struct in_conninfo inc;
504 struct synq_entry *synqe = NULL;
510 CTR4(KTR_CXGB, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
513 pass_accept_req_to_protohdrs(req, &inc, &th, &to);
516 * Don't offload if the interface that received the SYN doesn't have
520 for_each_port(sc, i) {
521 if (memcmp(sc->port[i].hw_addr, req->dst_mac, ETHER_ADDR_LEN))
527 REJECT_PASS_ACCEPT();
529 if ((ifp->if_capenable & IFCAP_TOE4) == 0)
530 REJECT_PASS_ACCEPT();
533 * Don't offload if the outgoing interface for the route back to the
534 * peer is not the same as the interface that received the SYN.
536 bzero(&nam, sizeof(nam));
537 nam.sin_len = sizeof(nam);
538 nam.sin_family = AF_INET;
539 nam.sin_addr = inc.inc_faddr;
540 rt = rtalloc1((struct sockaddr *)&nam, 0, 0);
542 REJECT_PASS_ACCEPT();
544 struct sockaddr *nexthop;
547 nexthop = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway :
548 (struct sockaddr *)&nam;
549 if (rt->rt_ifp == ifp)
550 e = t3_l2t_get(pi, rt->rt_ifp, nexthop);
553 REJECT_PASS_ACCEPT(); /* no l2te, or ifp mismatch */
556 INP_INFO_WLOCK(&V_tcbinfo);
558 /* Don't offload if the 4-tuple is already in use */
559 if (toe_4tuple_check(&inc, &th, ifp) != 0) {
560 INP_INFO_WUNLOCK(&V_tcbinfo);
561 REJECT_PASS_ACCEPT();
564 inp = lctx->inp; /* listening socket (not owned by the TOE) */
566 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
568 * The listening socket has closed. The reply from the TOE to
569 * our CPL_CLOSE_LISTSRV_REQ will ultimately release all
570 * resources tied to this listen context.
573 INP_INFO_WUNLOCK(&V_tcbinfo);
574 REJECT_PASS_ACCEPT();
576 so = inp->inp_socket;
578 /* Reuse the mbuf that delivered the CPL to us */
579 synqe = mbuf_to_synq_entry(m);
580 synqe->flags = TP_IS_A_SYNQ_ENTRY;
585 synqe->opt0h = calc_opt0h(so, 0, 0, e);
586 synqe->qset = pi->first_qset + (arc4random() % pi->nqsets);
587 SOCKBUF_LOCK(&so->so_rcv);
588 synqe->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
589 SOCKBUF_UNLOCK(&so->so_rcv);
590 refcount_init(&synqe->refcnt, 1);
591 atomic_store_rel_int(&synqe->reply, RPL_OK);
593 insert_tid(td, synqe, tid);
594 TAILQ_INSERT_TAIL(&lctx->synq, synqe, link);
598 /* syncache_add releases both pcbinfo and pcb locks */
599 toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
600 INP_UNLOCK_ASSERT(inp);
601 INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
604 * If we replied during syncache_add (reply is RPL_DONE), good.
605 * Otherwise (reply is unchanged - RPL_OK) it's no longer ok to reply.
606 * The mbuf will stick around as long as the entry is in the syncache.
607 * The kernel is free to retry syncache_respond but we'll ignore it due
610 if (atomic_cmpset_int(&synqe->reply, RPL_OK, RPL_DONT)) {
613 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
614 /* listener closed. synqe must have been aborted. */
615 KASSERT(synqe->flags & TP_ABORT_SHUTDOWN,
616 ("%s: listener %p closed but synqe %p not aborted",
617 __func__, inp, synqe));
620 "%s: stid %u, tid %u, lctx %p, synqe %p, ABORTED",
621 __func__, stid, tid, lctx, synqe);
623 release_synqe(synqe);
627 KASSERT(!(synqe->flags & TP_ABORT_SHUTDOWN),
628 ("%s: synqe %p aborted, but listener %p not dropped.",
629 __func__, synqe, inp));
631 TAILQ_REMOVE(&lctx->synq, synqe, link);
632 release_synqe(synqe); /* removed from synq list */
633 inp = release_lctx(td, lctx);
637 release_synqe(synqe); /* about to exit function */
638 REJECT_PASS_ACCEPT();
641 KASSERT(synqe->reply == RPL_DONE,
642 ("%s: reply %d", __func__, synqe->reply));
644 CTR3(KTR_CXGB, "%s: stid %u, tid %u, OK", __func__, stid, tid);
645 release_synqe(synqe);
649 CTR4(KTR_CXGB, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
655 l2t_release(td->l2t, e);
656 queue_tid_release(tod, tid);
662 pass_establish_to_protohdrs(const struct cpl_pass_establish *cpl,
663 struct in_conninfo *inc, struct tcphdr *th, struct tcpopt *to)
665 uint16_t tcp_opt = be16toh(cpl->tcp_opt);
667 bzero(inc, sizeof(*inc));
668 inc->inc_faddr.s_addr = cpl->peer_ip;
669 inc->inc_laddr.s_addr = cpl->local_ip;
670 inc->inc_fport = cpl->peer_port;
671 inc->inc_lport = cpl->local_port;
673 bzero(th, sizeof(*th));
674 th->th_sport = cpl->peer_port;
675 th->th_dport = cpl->local_port;
676 th->th_flags = TH_ACK;
677 th->th_seq = be32toh(cpl->rcv_isn); /* as in tcp_fields_to_host */
678 th->th_ack = be32toh(cpl->snd_isn); /* ditto */
680 bzero(to, sizeof(*to));
681 if (G_TCPOPT_TSTAMP(tcp_opt))
682 to->to_flags |= TOF_TS;
686 * Process a CPL_PASS_ESTABLISH message. The T3 has already established a
687 * connection and we need to do the software side setup.
690 do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
692 struct adapter *sc = qs->adap;
693 struct tom_data *td = sc->tom_softc;
694 struct cpl_pass_establish *cpl = mtod(m, void *);
695 struct toedev *tod = &td->tod;
696 unsigned int tid = GET_TID(cpl);
697 struct synq_entry *synqe = lookup_tid(&td->tid_maps, tid);
700 struct listen_ctx *lctx = synqe->lctx;
701 struct inpcb *inp = lctx->inp;
704 struct in_conninfo inc;
706 int stid = G_PASS_OPEN_TID(ntohl(cpl->tos_tid));
709 CTR5(KTR_CXGB, "%s: stid %u, tid %u, lctx %p, inp_flags 0x%x",
710 __func__, stid, tid, lctx, inp->inp_flags);
712 KASSERT(qs->idx == synqe->qset,
713 ("%s qset mismatch %d %d", __func__, qs->idx, synqe->qset));
715 INP_INFO_WLOCK(&V_tcbinfo); /* for syncache_expand */
718 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
720 * The listening socket has closed. The TOM must have aborted
721 * all the embryonic connections (including this one) that were
722 * on the lctx's synq. do_abort_rpl for the tid is responsible
725 KASSERT(synqe->flags & TP_ABORT_SHUTDOWN,
726 ("%s: listen socket dropped but tid %u not aborted.",
729 INP_INFO_WUNLOCK(&V_tcbinfo);
734 pass_establish_to_protohdrs(cpl, &inc, &th, &to);
736 /* Lie in order to pass the checks in syncache_expand */
737 to.to_tsecr = synqe->ts;
738 th.th_ack = synqe->iss + 1;
740 toep = toepcb_alloc(tod);
743 t3_send_reset_synqe(tod, synqe);
745 INP_INFO_WUNLOCK(&V_tcbinfo);
749 toep->tp_qset = qs->idx;
750 toep->tp_l2t = synqe->e;
752 toep->tp_rx_credits = synqe->rx_credits;
757 so = inp->inp_socket;
758 if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) {
763 if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
764 struct inpcb *new_inp = sotoinpcb(so);
767 tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
768 t3_offload_socket(tod, synqe, so);
769 INP_WUNLOCK(new_inp);
772 /* Remove the synq entry and release its reference on the lctx */
773 TAILQ_REMOVE(&lctx->synq, synqe, link);
774 inp = release_lctx(td, lctx);
777 INP_INFO_WUNLOCK(&V_tcbinfo);
778 release_synqe(synqe);
785 t3_init_listen_cpl_handlers(struct adapter *sc)
787 t3_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl);
788 t3_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
789 t3_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
790 t3_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish);
794 * Start a listening server by sending a passive open request to HW.
796 * Can't take adapter lock here and access to sc->flags, sc->open_device_map,
797 * sc->offload_map, if_capenable are all race prone.
800 t3_listen_start(struct toedev *tod, struct tcpcb *tp)
802 struct tom_data *td = t3_tomdata(tod);
803 struct adapter *sc = tod->tod_softc;
804 struct port_info *pi;
805 struct inpcb *inp = tp->t_inpcb;
806 struct listen_ctx *lctx;
809 INP_WLOCK_ASSERT(inp);
811 if ((inp->inp_vflag & INP_IPV4) == 0)
817 log(LOG_ERR, "%s: listen request ignored, %s is busy",
818 __func__, device_get_nameunit(sc->dev));
822 KASSERT(sc->flags & TOM_INIT_DONE,
823 ("%s: TOM not initialized", __func__));
826 if ((sc->open_device_map & sc->offload_map) == 0)
827 goto done; /* no port that's UP with IFCAP_TOE enabled */
830 * Find a running port with IFCAP_TOE4. We'll use the first such port's
831 * queues to send the passive open and receive the reply to it.
833 * XXX: need a way to mark an port in use by offload. if_cxgbe should
834 * then reject any attempt to bring down such a port (and maybe reject
835 * attempts to disable IFCAP_TOE on that port too?).
837 for_each_port(sc, i) {
838 if (isset(&sc->open_device_map, i) &&
839 sc->port[i].ifp->if_capenable & IFCAP_TOE4)
842 KASSERT(i < sc->params.nports,
843 ("%s: no running port with TOE capability enabled.", __func__));
846 if (listen_hash_find(td, inp) != NULL)
847 goto done; /* already setup */
849 lctx = alloc_lctx(td, inp, pi->first_qset);
852 "%s: listen request ignored, %s couldn't allocate lctx\n",
853 __func__, device_get_nameunit(sc->dev));
856 listen_hash_add(td, lctx);
858 CTR5(KTR_CXGB, "%s: stid %u (%s), lctx %p, inp %p", __func__,
859 lctx->stid, tcpstates[tp->t_state], lctx, inp);
861 if (create_server(sc, lctx) != 0) {
862 log(LOG_ERR, "%s: %s failed to create hw listener.\n", __func__,
863 device_get_nameunit(sc->dev));
864 (void) listen_hash_del(td, inp);
865 inp = release_lctx(td, lctx);
866 /* can't be freed, host stack has a reference */
867 KASSERT(inp != NULL, ("%s: inp freed", __func__));
870 lctx->flags |= LCTX_RPL_PENDING;
879 * Stop a listening server by sending a close_listsvr request to HW.
880 * The server TID is freed when we get the reply.
883 t3_listen_stop(struct toedev *tod, struct tcpcb *tp)
885 struct listen_ctx *lctx;
886 struct adapter *sc = tod->tod_softc;
887 struct tom_data *td = t3_tomdata(tod);
888 struct inpcb *inp = tp->t_inpcb;
889 struct synq_entry *synqe;
891 INP_WLOCK_ASSERT(inp);
893 lctx = listen_hash_del(td, inp);
895 return (ENOENT); /* no hardware listener for this inp */
897 CTR4(KTR_CXGB, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
901 * If the reply to the PASS_OPEN is still pending we'll wait for it to
902 * arrive and clean up when it does.
904 if (lctx->flags & LCTX_RPL_PENDING) {
905 KASSERT(TAILQ_EMPTY(&lctx->synq),
906 ("%s: synq not empty.", __func__));
907 return (EINPROGRESS);
911 * The host stack will abort all the connections on the listening
912 * socket's so_comp. It doesn't know about the connections on the synq
913 * so we need to take care of those.
915 TAILQ_FOREACH(synqe, &lctx->synq, link) {
916 KASSERT(synqe->lctx == lctx, ("%s: synq corrupt", __func__));
917 t3_send_reset_synqe(tod, synqe);
920 destroy_server(sc, lctx);
925 t3_syncache_added(struct toedev *tod __unused, void *arg)
927 struct synq_entry *synqe = arg;
933 t3_syncache_removed(struct toedev *tod __unused, void *arg)
935 struct synq_entry *synqe = arg;
937 release_synqe(synqe);
941 extern void tcp_dooptions(struct tcpopt *, u_char *, int, int);
944 t3_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
946 struct adapter *sc = tod->tod_softc;
947 struct synq_entry *synqe = arg;
948 struct l2t_entry *e = synqe->e;
949 struct ip *ip = mtod(m, struct ip *);
950 struct tcphdr *th = (void *)(ip + 1);
951 struct cpl_pass_accept_rpl *rpl;
953 struct listen_ctx *lctx = synqe->lctx;
955 int mtu_idx, cpu_idx;
958 * The first time we run it's during the call to syncache_add. That's
959 * the only one we care about.
961 if (atomic_cmpset_int(&synqe->reply, RPL_OK, RPL_DONE) == 0)
962 goto done; /* reply to the CPL only if it's ok to do so */
964 r = M_GETHDR_OFLD(lctx->qset, CPL_PRIORITY_CONTROL, rpl);
969 * Use only the provided mbuf (with ip and tcp headers) and what's in
970 * synqe. Avoid looking at the listening socket (lctx->inp) here.
972 * XXX: if the incoming SYN had the TCP timestamp option but the kernel
973 * decides it doesn't want to use TCP timestamps we have no way of
974 * relaying this info to the chip on a per-tid basis (all we have is a
977 bzero(&to, sizeof(to));
978 tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th),
981 /* stash them for later */
982 synqe->iss = be32toh(th->th_seq);
983 synqe->ts = to.to_tsval;
985 mtu_idx = find_best_mtu_idx(sc, NULL, to.to_mss);
986 cpu_idx = sc->rrss_map[synqe->qset];
988 rpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
990 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, synqe->tid));
991 rpl->opt2 = calc_opt2(cpu_idx);
992 rpl->rsvd = rpl->opt2; /* workaround for HW bug */
993 rpl->peer_ip = ip->ip_dst.s_addr;
994 rpl->opt0h = synqe->opt0h |
995 calc_opt0h(NULL, mtu_idx, to.to_wscale, NULL);
996 rpl->opt0l_status = htobe32(CPL_PASS_OPEN_ACCEPT) |
997 calc_opt0l(NULL, synqe->rx_credits);
1006 do_abort_req_synqe(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1008 struct adapter *sc = qs->adap;
1009 struct tom_data *td = sc->tom_softc;
1010 struct toedev *tod = &td->tod;
1011 const struct cpl_abort_req_rss *req = mtod(m, void *);
1012 unsigned int tid = GET_TID(req);
1013 struct synq_entry *synqe = lookup_tid(&td->tid_maps, tid);
1014 struct listen_ctx *lctx = synqe->lctx;
1015 struct inpcb *inp = lctx->inp;
1017 KASSERT(synqe->flags & TP_IS_A_SYNQ_ENTRY,
1018 ("%s: !SYNQ_ENTRY", __func__));
1020 CTR6(KTR_CXGB, "%s: tid %u, synqe %p (%x), lctx %p, status %d",
1021 __func__, tid, synqe, synqe->flags, synqe->lctx, req->status);
1025 if (!(synqe->flags & TP_ABORT_REQ_RCVD)) {
1026 synqe->flags |= TP_ABORT_REQ_RCVD;
1027 synqe->flags |= TP_ABORT_SHUTDOWN;
1032 synqe->flags &= ~TP_ABORT_REQ_RCVD;
1035 * If we'd sent a reset on this synqe, we'll ignore this and clean up in
1036 * the T3's reply to our reset instead.
1038 if (synqe->flags & TP_ABORT_RPL_PENDING) {
1039 synqe->flags |= TP_ABORT_RPL_SENT;
1042 TAILQ_REMOVE(&lctx->synq, synqe, link);
1043 inp = release_lctx(td, lctx);
1046 release_tid(tod, tid, qs->idx);
1047 l2t_release(td->l2t, synqe->e);
1048 release_synqe(synqe);
1051 send_abort_rpl(tod, tid, qs->idx);
1057 do_abort_rpl_synqe(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1059 struct adapter *sc = qs->adap;
1060 struct tom_data *td = sc->tom_softc;
1061 struct toedev *tod = &td->tod;
1062 const struct cpl_abort_rpl_rss *rpl = mtod(m, void *);
1063 unsigned int tid = GET_TID(rpl);
1064 struct synq_entry *synqe = lookup_tid(&td->tid_maps, tid);
1065 struct listen_ctx *lctx = synqe->lctx;
1066 struct inpcb *inp = lctx->inp;
1068 CTR3(KTR_CXGB, "%s: tid %d, synqe %p, status %d", tid, synqe,
1073 if (synqe->flags & TP_ABORT_RPL_PENDING) {
1074 if (!(synqe->flags & TP_ABORT_RPL_RCVD)) {
1075 synqe->flags |= TP_ABORT_RPL_RCVD;
1078 synqe->flags &= ~TP_ABORT_RPL_RCVD;
1079 synqe->flags &= TP_ABORT_RPL_PENDING;
1081 TAILQ_REMOVE(&lctx->synq, synqe, link);
1082 inp = release_lctx(td, lctx);
1085 release_tid(tod, tid, qs->idx);
1086 l2t_release(td->l2t, synqe->e);
1087 release_synqe(synqe);
1096 t3_send_reset_synqe(struct toedev *tod, struct synq_entry *synqe)
1098 struct cpl_abort_req *req;
1099 unsigned int tid = synqe->tid;
1100 struct adapter *sc = tod->tod_softc;
1103 struct listen_ctx *lctx = synqe->lctx;
1104 struct inpcb *inp = lctx->inp;
1107 INP_WLOCK_ASSERT(inp);
1109 CTR4(KTR_CXGB, "%s: tid %d, synqe %p (%x)", __func__, tid, synqe,
1112 if (synqe->flags & TP_ABORT_SHUTDOWN)
1115 synqe->flags |= (TP_ABORT_RPL_PENDING | TP_ABORT_SHUTDOWN);
1117 m = M_GETHDR_OFLD(synqe->qset, CPL_PRIORITY_DATA, req);
1119 CXGB_UNIMPLEMENTED();
1121 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
1122 req->wr.wrh_lo = htonl(V_WR_TID(tid));
1123 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
1125 req->rsvd1 = !(synqe->flags & TP_DATASENT);
1126 req->cmd = CPL_ABORT_SEND_RST;
1128 l2t_send(sc, m, synqe->e);
1132 t3_offload_socket(struct toedev *tod, void *arg, struct socket *so)
1134 struct adapter *sc = tod->tod_softc;
1135 struct tom_data *td = sc->tom_softc;
1136 struct synq_entry *synqe = arg;
1138 struct inpcb *inp = sotoinpcb(so);
1140 struct cpl_pass_establish *cpl = synqe->cpl;
1141 struct toepcb *toep = synqe->toep;
1143 INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
1144 INP_WLOCK_ASSERT(inp);
1146 offload_socket(so, toep);
1147 make_established(so, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
1148 update_tid(td, toep, synqe->tid);
1149 synqe->flags |= TP_SYNQE_EXPANDED;