From cf2bf7bb11ac9335c9200fa745be6a9aadd78c26 Mon Sep 17 00:00:00 2001 From: jch Date: Wed, 27 Jul 2016 07:51:58 +0000 Subject: [PATCH] MFC r271119, r272081: r271119: In tcp_input(), don't acquire the pcbinfo global write lock for SYN packets targeting a listening socket. Permit to reduce TCP input processing starvation in context of high SYN load (e.g. short-lived TCP connections or SYN flood). Submitted by: Julien Charbon Reviewed by: adrian, hiren, jhb, Mike Bentkofsky r272081: Catch up with r271119. git-svn-id: svn://svn.freebsd.org/base/stable/10@303371 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- sys/netinet/tcp_input.c | 28 ++++++++++++++++------------ sys/netinet/tcp_syncache.c | 19 +++++-------------- sys/netinet/toecore.c | 1 - 3 files changed, 21 insertions(+), 27 deletions(-) diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 25762cb73..131ce4821 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -751,12 +751,12 @@ tcp_input(struct mbuf *m, int off0) /* * Locate pcb for segment; if we're likely to add or remove a - * connection then first acquire pcbinfo lock. There are two cases + * connection then first acquire pcbinfo lock. There are three cases * where we might discover later we need a write lock despite the - * flags: ACKs moving a connection out of the syncache, and ACKs for - * a connection in TIMEWAIT. + * flags: ACKs moving a connection out of the syncache, ACKs for a + * connection in TIMEWAIT and SYNs not targeting a listening socket. */ - if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) { + if ((thflags & (TH_FIN | TH_RST)) != 0) { INP_INFO_WLOCK(&V_tcbinfo); ti_locked = TI_WLOCKED; } else @@ -983,10 +983,12 @@ tcp_input(struct mbuf *m, int off0) * now be in TIMEWAIT. */ #ifdef INVARIANTS - if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) + if ((thflags & (TH_FIN | TH_RST)) != 0) INP_INFO_WLOCK_ASSERT(&V_tcbinfo); #endif - if (tp->t_state != TCPS_ESTABLISHED) { + if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) || + (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) && + !(tp->t_flags & TF_FASTOPEN)))) { if (ti_locked == TI_UNLOCKED) { if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) { in_pcbref(inp); @@ -1027,17 +1029,13 @@ tcp_input(struct mbuf *m, int off0) /* * When the socket is accepting connections (the INPCB is in LISTEN * state) we look into the SYN cache if this is a new connection - * attempt or the completion of a previous one. Because listen - * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be - * held in this case. + * attempt or the completion of a previous one. */ if (so->so_options & SO_ACCEPTCONN) { struct in_conninfo inc; KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but " "tp not listening", __func__)); - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); - bzero(&inc, sizeof(inc)); #ifdef INET6 if (isipv6) { @@ -1060,6 +1058,8 @@ tcp_input(struct mbuf *m, int off0) * socket appended to the listen queue in SYN_RECEIVED state. */ if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) { + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* * Parse the TCP options here because * syncookies need access to the reflected @@ -1348,8 +1348,12 @@ tcp_input(struct mbuf *m, int off0) #endif /* * Entry added to syncache and mbuf consumed. - * Everything already unlocked by syncache_add(). + * Only the listen socket is unlocked by syncache_add(). */ + if (ti_locked == TI_WLOCKED) { + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + } INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return; } else if (tp->t_state == TCPS_LISTEN) { diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 03e8c72fb..f71d02894 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1107,7 +1107,7 @@ syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m, * Global TCP locks are held because we manipulate the PCB lists * and create a new socket. */ - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending; *lsop = syncache_socket(sc, *lsop, m); @@ -1175,7 +1175,6 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, int tfo_response_cookie_valid = 0; #endif - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); /* listen socket */ KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN, ("%s: unexpected tcp flags", __func__)); @@ -1229,21 +1228,15 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, #ifdef MAC if (mac_syncache_init(&maclabel) != 0) { INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); goto done; } else mac_syncache_create(maclabel, inp); #endif #ifdef TCP_RFC7413 - if (!tfo_cookie_valid) { - INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); - } -#else - INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); + if (!tfo_cookie_valid) #endif - + INP_WUNLOCK(inp); + /* * Remember the IP options, if any. */ @@ -1272,10 +1265,8 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, SCH_LOCK_ASSERT(sch); if (sc != NULL) { #ifdef TCP_RFC7413 - if (tfo_cookie_valid) { + if (tfo_cookie_valid) INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); - } #endif TCPSTAT_INC(tcps_sc_dupsyn); if (ipopts) { diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c index 53d4778a6..1adaf4d35 100644 --- a/sys/netinet/toecore.c +++ b/sys/netinet/toecore.c @@ -328,7 +328,6 @@ toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, { struct socket *lso = inp->inp_socket; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx); -- 2.45.0