From fd765e545ce1c120c649b321ac4fce60f146ff3e Mon Sep 17 00:00:00 2001 From: melifaro Date: Tue, 27 Nov 2012 20:16:37 +0000 Subject: [PATCH] MFC r241406, r241502, r241884. Do not check if found IPv4 rte is dynamic if net.inet.icmp.drop_redirect is enabled. This eliminates one mtx_lock() per each routing lookup thus improving performance in several cases (routing to directly connected interface or routing to default gateway). Icmp redirects should not be used to provide routing direction nowadays, even for end hosts. Routers should not use them too (and this is explicitly restricted in IPv6, see RFC 4861, clause 8.2). Current commit changes rnh_machaddr function to 'stock' rn_match (and back) for every AF_INET routing table in given VNET instance on drop_redirect sysctl change. Eliminate code checking if found IPv6 rte is dynamic. IPv6 redirects are using (different) ND-based approach described in RFC 4861. This change is similar to r241406 which conditionally skips the same check in IPv4. Cleanup documentation: cloning route support has been removed in r186119. This change is part of bigger patch eliminating rte locking. Sponsored by: Yandex LLC git-svn-id: svn://svn.freebsd.org/base/stable/8@243629 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- sys/netinet/in_rmx.c | 24 +++--- sys/netinet/in_var.h | 2 + sys/netinet/ip_icmp.c | 39 ++++++++-- sys/netinet/ip_var.h | 2 + sys/netinet6/in6_rmx.c | 169 ----------------------------------------- 5 files changed, 48 insertions(+), 188 deletions(-) diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c index 1389873eb..d09805d58 100644 --- a/sys/netinet/in_rmx.c +++ b/sys/netinet/in_rmx.c @@ -27,19 +27,6 @@ * SUCH DAMAGE. */ -/* - * This code does two things necessary for the enhanced TCP metrics to - * function in a useful manner: - * 1) It marks all non-host routes as `cloning', thus ensuring that - * every actual reference to such a route actually gets turned - * into a reference to a host route to the specific destination - * requested. - * 2) When such routes lose all their references, it arranges for them - * to be deleted in some random collection of circumstances, so that - * a large quantity of stale routing data is not kept in kernel memory - * indefinitely. See in_rtqtimo() below for the exact mechanism. - */ - #include __FBSDID("$FreeBSD$"); @@ -58,6 +45,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include extern int in_inithead(void **head, int off); @@ -340,6 +329,13 @@ in_rtqdrain(void) VNET_LIST_RUNLOCK_NOSLEEP(); } +void +in_setmatchfunc(struct radix_node_head *rnh, int val) +{ + + rnh->rnh_matchaddr = (val != 0) ? rn_match : in_matroute; +} + static int _in_rt_was_here; /* * Initialize our routing tree. @@ -365,7 +361,7 @@ in_inithead(void **head, int off) rnh = *head; rnh->rnh_addaddr = in_addroute; - rnh->rnh_matchaddr = in_matroute; + in_setmatchfunc(rnh, V_drop_redirect); rnh->rnh_close = in_clsroute; if (_in_rt_was_here == 0 ) { callout_init(&V_rtq_timer, CALLOUT_MPSAFE); diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index 88c1a16a1..c04d45b91 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -423,6 +423,7 @@ inm_acquire_locked(struct in_multi *inm) struct rtentry; struct route; struct ip_moptions; +struct radix_node_head; int imo_multi_filter(const struct ip_moptions *, const struct ifnet *, const struct sockaddr *, const struct sockaddr *); @@ -461,6 +462,7 @@ void in_rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, u_int); int in_rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); +void in_setmatchfunc(struct radix_node_head *, int); #if 0 int in_rt_getifa(struct rt_addrinfo *, u_int fibnum); diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index f9bdfaca3..2f0aa3d43 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -92,11 +92,7 @@ SYSCTL_VNET_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW, &VNET_NAME(icmpmaskfake), 0, "Fake reply to ICMP Address Mask Request packets."); -static VNET_DEFINE(int, drop_redirect) = 0; -#define V_drop_redirect VNET(drop_redirect) -SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW, - &VNET_NAME(drop_redirect), 0, - "Ignore ICMP redirects"); +VNET_DEFINE(int, drop_redirect) = 0; static VNET_DEFINE(int, log_redirect) = 0; #define V_log_redirect VNET(log_redirect) @@ -153,6 +149,39 @@ static void icmp_send(struct mbuf *, struct mbuf *); extern struct protosw inetsw[]; +static int +sysctl_net_icmp_drop_redir(SYSCTL_HANDLER_ARGS) +{ + int error, new; + int i; + struct radix_node_head *rnh; + + new = V_drop_redirect; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + new = (new != 0) ? 1 : 0; + + if (new == V_drop_redirect) + return (0); + + for (i = 0; i < rt_numfibs; i++) { + if ((rnh = rt_tables_get_rnh(i, AF_INET)) == NULL) + continue; + RADIX_NODE_HEAD_LOCK(rnh); + in_setmatchfunc(rnh, new); + RADIX_NODE_HEAD_UNLOCK(rnh); + } + + V_drop_redirect = new; + } + + return (error); +} + +SYSCTL_VNET_PROC(_net_inet_icmp, OID_AUTO, drop_redirect, + CTLTYPE_INT|CTLFLAG_RW, 0, 0, + sysctl_net_icmp_drop_redir, "I", "Ignore ICMP redirects"); + /* * Kernel module interface for updating icmpstat. The argument is an index * into icmpstat treated as an array of u_long. While this encodes the diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 51d881ae1..d196fd04c 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -187,6 +187,7 @@ VNET_DECLARE(struct socket *, ip_mrouter); /* multicast routing daemon */ extern int (*legal_vif_num)(int); extern u_long (*ip_mcast_src)(int); VNET_DECLARE(int, rsvp_on); +VNET_DECLARE(int, drop_redirect); extern struct pr_usrreqs rip_usrreqs; #define V_ipstat VNET(ipstat) @@ -199,6 +200,7 @@ extern struct pr_usrreqs rip_usrreqs; #define V_ip_rsvpd VNET(ip_rsvpd) #define V_ip_mrouter VNET(ip_mrouter) #define V_rsvp_on VNET(rsvp_on) +#define V_drop_redirect VNET(drop_redirect) void inp_freemoptions(struct ip_moptions *); int inp_getmoptions(struct inpcb *, struct sockopt *); diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index b5260308d..88ecc146c 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -59,19 +59,6 @@ * */ -/* - * This code does two things necessary for the enhanced TCP metrics to - * function in a useful manner: - * 1) It marks all non-host routes as `cloning', thus ensuring that - * every actual reference to such a route actually gets turned - * into a reference to a host route to the specific destination - * requested. - * 2) When such routes lose all their references, it arranges for them - * to be deleted in some random collection of circumstances, so that - * a large quantity of stale routing data is not kept in kernel memory - * indefinitely. See in6_rtqtimo() below for the exact mechanism. - */ - #include __FBSDID("$FreeBSD$"); @@ -111,8 +98,6 @@ extern int in6_inithead(void **head, int off); extern int in6_detachhead(void **head, int off); #endif -#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ - /* * Do what we need to do when inserting a route. */ @@ -183,42 +168,8 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, return (ret); } -/* - * This code is the inverse of in6_clsroute: on first reference, if we - * were managing the route, stop doing so and set the expiration timer - * back off again. - */ -static struct radix_node * -in6_matroute(void *v_arg, struct radix_node_head *head) -{ - struct radix_node *rn = rn_match(v_arg, head); - struct rtentry *rt = (struct rtentry *)rn; - - if (rt) { - RT_LOCK(rt); - if (rt->rt_flags & RTPRF_OURS) { - rt->rt_flags &= ~RTPRF_OURS; - rt->rt_rmx.rmx_expire = 0; - } - RT_UNLOCK(rt); - } - return rn; -} - SYSCTL_DECL(_net_inet6_ip6); -static VNET_DEFINE(int, rtq_reallyold6) = 60*60; - /* one hour is ``really old'' */ -#define V_rtq_reallyold6 VNET(rtq_reallyold6) -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire, CTLFLAG_RW, - &VNET_NAME(rtq_reallyold6) , 0, ""); - -static VNET_DEFINE(int, rtq_minreallyold6) = 10; - /* never automatically crank down to less */ -#define V_rtq_minreallyold6 VNET(rtq_minreallyold6) -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, - &VNET_NAME(rtq_minreallyold6) , 0, ""); - static VNET_DEFINE(int, rtq_toomany6) = 128; /* 128 cached routes is ``too many'' */ #define V_rtq_toomany6 VNET(rtq_toomany6) @@ -235,122 +186,6 @@ struct rtqk_arg { time_t nextstop; }; -/* - * Get rid of old routes. When draining, this deletes everything, even when - * the timeout is not expired yet. When updating, this makes sure that - * nothing has a timeout longer than the current value of rtq_reallyold6. - */ -static int -in6_rtqkill(struct radix_node *rn, void *rock) -{ - struct rtqk_arg *ap = rock; - struct rtentry *rt = (struct rtentry *)rn; - int err; - - RADIX_NODE_HEAD_WLOCK_ASSERT(ap->rnh); - - if (rt->rt_flags & RTPRF_OURS) { - ap->found++; - - if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) { - if (rt->rt_refcnt > 0) - panic("rtqkill route really not free"); - - err = in6_rtrequest(RTM_DELETE, - (struct sockaddr *)rt_key(rt), - rt->rt_gateway, rt_mask(rt), - rt->rt_flags|RTF_RNH_LOCKED, 0, - rt->rt_fibnum); - if (err) { - log(LOG_WARNING, "in6_rtqkill: error %d", err); - } else { - ap->killed++; - } - } else { - if (ap->updating - && (rt->rt_rmx.rmx_expire - time_uptime - > V_rtq_reallyold6)) { - rt->rt_rmx.rmx_expire = time_uptime - + V_rtq_reallyold6; - } - ap->nextstop = lmin(ap->nextstop, - rt->rt_rmx.rmx_expire); - } - } - - return 0; -} - -#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ -static VNET_DEFINE(int, rtq_timeout6) = RTQ_TIMEOUT; -static VNET_DEFINE(struct callout, rtq_timer6); - -#define V_rtq_timeout6 VNET(rtq_timeout6) -#define V_rtq_timer6 VNET(rtq_timer6) - -static void -in6_rtqtimo_one(struct radix_node_head *rnh) -{ - struct rtqk_arg arg; - static time_t last_adjusted_timeout = 0; - - arg.found = arg.killed = 0; - arg.rnh = rnh; - arg.nextstop = time_uptime + V_rtq_timeout6; - arg.draining = arg.updating = 0; - RADIX_NODE_HEAD_LOCK(rnh); - rnh->rnh_walktree(rnh, in6_rtqkill, &arg); - RADIX_NODE_HEAD_UNLOCK(rnh); - - /* - * Attempt to be somewhat dynamic about this: - * If there are ``too many'' routes sitting around taking up space, - * then crank down the timeout, and see if we can't make some more - * go away. However, we make sure that we will never adjust more - * than once in rtq_timeout6 seconds, to keep from cranking down too - * hard. - */ - if ((arg.found - arg.killed > V_rtq_toomany6) - && (time_uptime - last_adjusted_timeout >= V_rtq_timeout6) - && V_rtq_reallyold6 > V_rtq_minreallyold6) { - V_rtq_reallyold6 = 2*V_rtq_reallyold6 / 3; - if (V_rtq_reallyold6 < V_rtq_minreallyold6) { - V_rtq_reallyold6 = V_rtq_minreallyold6; - } - - last_adjusted_timeout = time_uptime; -#ifdef DIAGNOSTIC - log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold6 to %d", - V_rtq_reallyold6); -#endif - arg.found = arg.killed = 0; - arg.updating = 1; - RADIX_NODE_HEAD_LOCK(rnh); - rnh->rnh_walktree(rnh, in6_rtqkill, &arg); - RADIX_NODE_HEAD_UNLOCK(rnh); - } -} - -static void -in6_rtqtimo(void *rock) -{ - CURVNET_SET_QUIET((struct vnet *) rock); - struct radix_node_head *rnh; - struct timeval atv; - u_int fibnum; - - for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { - rnh = rt_tables_get_rnh(fibnum, AF_INET6); - if (rnh != NULL) - in6_rtqtimo_one(rnh); - } - - atv.tv_usec = 0; - atv.tv_sec = V_rtq_timeout6; - callout_reset(&V_rtq_timer6, tvtohz(&atv), in6_rtqtimo, rock); - CURVNET_RESTORE(); -} - /* * Age old PMTUs. */ @@ -440,12 +275,9 @@ in6_inithead(void **head, int off) rnh = *head; rnh->rnh_addaddr = in6_addroute; - rnh->rnh_matchaddr = in6_matroute; if (V__in6_rt_was_here == 0) { - callout_init(&V_rtq_timer6, CALLOUT_MPSAFE); callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE); - in6_rtqtimo(curvnet); /* kick off timeout first time */ in6_mtutimo(curvnet); /* kick off timeout first time */ V__in6_rt_was_here = 1; } @@ -458,7 +290,6 @@ int in6_detachhead(void **head, int off) { - callout_drain(&V_rtq_timer6); callout_drain(&V_rtq_mtutimer); return (1); } -- 2.45.0