From 0a3397a7e5ac0e4d006e3fb8909fee807d705b15 Mon Sep 17 00:00:00 2001 From: bz Date: Mon, 5 Mar 2012 17:33:01 +0000 Subject: [PATCH] MFC r231852,232127: Merge multi-FIB IPv6 support. Extend the so far IPv4-only support for multiple routing tables (FIBs) introduced in r178888 to IPv6 providing feature parity. This includes an extended rtalloc(9) KPI for IPv6, the necessary adjustments to the network stack, and user land support as in netstat. Sponsored by: Cisco Systems, Inc. git-svn-id: svn://svn.freebsd.org/base/stable/8@232552 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- contrib/netcat/netcat.c | 13 +- etc/network.subr | 19 +- etc/rc.d/network_ipv6 | 1 + etc/rc.d/routing | 16 +- share/man/man4/faith.4 | 4 +- sys/contrib/pf/net/pf.c | 29 +- sys/contrib/pf/net/pf_ioctl.c | 4 +- sys/fs/nfsclient/nfs_clport.c | 6 +- sys/fs/nfsclient/nfs_clvfsops.c | 4 +- sys/kern/uipc_socket.c | 2 + sys/net/flowtable.c | 4 +- sys/net/if_faith.c | 2 +- sys/net/route.c | 187 ++++++---- sys/net/route.h | 3 + sys/netinet/in.c | 2 +- sys/netinet/ipfw/ip_fw2.c | 12 +- sys/netinet/sctp_os_bsd.h | 6 + sys/netinet/tcp_subr.c | 2 +- sys/netinet6/icmp6.c | 18 +- sys/netinet6/in6.c | 589 ++++++++++++++++---------------- sys/netinet6/in6_gif.c | 7 +- sys/netinet6/in6_ifattach.c | 24 +- sys/netinet6/in6_mcast.c | 9 +- sys/netinet6/in6_rmx.c | 124 +++++-- sys/netinet6/in6_src.c | 73 +++- sys/netinet6/in6_var.h | 11 + sys/netinet6/ip6_forward.c | 2 +- sys/netinet6/ip6_input.c | 2 +- sys/netinet6/ip6_output.c | 40 ++- sys/netinet6/ip6_var.h | 3 + sys/netinet6/nd6.c | 5 +- sys/netinet6/nd6_nbr.c | 45 ++- sys/netinet6/nd6_rtr.c | 188 ++++++---- sys/netinet6/raw_ip6.c | 12 +- sys/netipsec/ipsec_output.c | 2 +- sys/nfs/bootp_subr.c | 7 +- sys/nfsclient/nfs_vfsops.c | 4 +- usr.bin/netstat/route.c | 29 +- 38 files changed, 909 insertions(+), 601 deletions(-) diff --git a/contrib/netcat/netcat.c b/contrib/netcat/netcat.c index a874adb40..b1c53fb57 100644 --- a/contrib/netcat/netcat.c +++ b/contrib/netcat/netcat.c @@ -605,8 +605,10 @@ remote_connect(const char *host, const char *port, struct addrinfo hints) #endif if (rtableid) { - if (setfib(rtableid) == -1) - err(1, "setfib"); + if (setsockopt(s, SOL_SOCKET, SO_SETFIB, &rtableid, + sizeof(rtableid)) == -1) + err(1, "setsockopt(.., SO_SETFIB, %u, ..)", + rtableid); } /* Bind to a local port or source address if specified. */ @@ -678,8 +680,11 @@ local_listen(char *host, char *port, struct addrinfo hints) continue; if (rtableid) { - if (setfib(rtableid) == -1) - err(1, "setfib"); + ret = setsockopt(s, SOL_SOCKET, SO_SETFIB, &rtableid, + sizeof(rtableid)); + if (ret == -1) + err(1, "setsockopt(.., SO_SETFIB, %u, ..)", + rtableid); } ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &x, sizeof(x)); diff --git a/etc/network.subr b/etc/network.subr index 2638f8306..b8dbc44fe 100644 --- a/etc/network.subr +++ b/etc/network.subr @@ -1125,16 +1125,33 @@ network6_default_interface_setup() ;; esac + # Get the number of FIBs supported. + fibs=`sysctl -n net.fibs` + : ${fibs:=1} + # Disallow unicast packets without outgoing scope identifiers, # or route such packets to a "default" interface, if it is specified. route add -inet6 fe80:: -prefixlen 10 ::1 -reject case ${ipv6_default_interface} in [Nn][Oo] | '') - route add -inet6 ff02:: -prefixlen 16 ::1 -reject + i=0 + while test ${i} -lt ${fibs}; do + setfib -F ${i} \ + route add -inet6 ff02:: -prefixlen 16 ::1 -reject + i=$((i + 1)) + done ;; *) laddr=`network6_getladdr ${ipv6_default_interface}` + # Only add the laddr route to the default FIB and a reject + # route to all others. route add -inet6 ff02:: ${laddr} -prefixlen 16 -interface + i=1 + while test ${i} -lt ${fibs}; do + setfib -F ${i} \ + route add -inet6 ff02:: -prefixlen 16 ::1 -reject + i=$((i + 1)) + done # Disable installing the default interface with the # case net.inet6.ip6.forwarding=0 and diff --git a/etc/rc.d/network_ipv6 b/etc/rc.d/network_ipv6 index 0d4f06430..e13d94a74 100755 --- a/etc/rc.d/network_ipv6 +++ b/etc/rc.d/network_ipv6 @@ -41,6 +41,7 @@ start_cmd="network_ipv6_start" network_ipv6_start() { + case ${ipv6_network_interfaces} in [Aa][Uu][Tt][Oo]) # Get a list of network interfaces diff --git a/etc/rc.d/routing b/etc/rc.d/routing index 8c6830b83..b4759bc07 100755 --- a/etc/rc.d/routing +++ b/etc/rc.d/routing @@ -61,9 +61,21 @@ static_start() # Disallow "internal" addresses to appear on the wire if inet6 # is enabled. if afexists inet6; then + local fibs i + + # Get the number of FIBs supported. + fibs=`sysctl -n net.fibs` + : ${fibs:=1} + # disallow "internal" addresses to appear on the wire - route add -inet6 ::ffff:0.0.0.0 -prefixlen 96 ::1 -reject - route add -inet6 ::0.0.0.0 -prefixlen 96 ::1 -reject + i=0 + while test ${i} -lt ${fibs}; do + setfib -F ${i} route add -inet6 \ + ::ffff:0.0.0.0 -prefixlen 96 ::1 -reject + setfib -F ${i} route add -inet6 \ + ::0.0.0.0 -prefixlen 96 ::1 -reject + i=$((i + 1)) + done fi } diff --git a/share/man/man4/faith.4 b/share/man/man4/faith.4 index b98ba3945..f0a2df6f6 100644 --- a/share/man/man4/faith.4 +++ b/share/man/man4/faith.4 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd April 10, 1999 +.Dd January 23, 2012 .Dt FAITH 4 .Os .Sh NAME @@ -58,7 +58,7 @@ variable in .Xr rc.conf 5 . .Pp Special action will be taken when IPv6 TCP traffic is seen on a router, -and the routing table suggests to route it to the +and the default routing table suggests to route it to the .Nm interface. In this case, the packet will be accepted by the router, diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index 242834d80..329caf162 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -3197,11 +3197,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; #ifdef __FreeBSD__ -#ifdef RTF_PRCLONING - rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING)); -#else /* !RTF_PRCLONING */ - in_rtalloc_ign(&ro, 0, 0); -#endif + in_rtalloc_ign(&ro, 0, RT_DEFAULT_FIB); #else /* ! __FreeBSD__ */ rtalloc_noclone(&ro, NO_CLONING); #endif @@ -3217,12 +3213,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; #ifdef __FreeBSD__ -#ifdef RTF_PRCLONING - rtalloc_ign((struct route *)&ro6, - (RTF_CLONING | RTF_PRCLONING)); -#else /* !RTF_PRCLONING */ - rtalloc_ign((struct route *)&ro6, 0); -#endif + in6_rtalloc_ign(&ro6, 0, RT_DEFAULT_FIB); #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro6, NO_CLONING); #endif @@ -6134,9 +6125,11 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) #ifdef __FreeBSD__ /* XXX MRT not always INET */ /* stick with table 0 though */ if (af == AF_INET) - in_rtalloc_ign((struct route *)&ro, 0, 0); + in_rtalloc_ign((struct route *)&ro, 0, RT_DEFAULT_FIB); +#ifdef INET6 else - rtalloc_ign((struct route *)&ro, 0); + in6_rtalloc_ign(&ro, 0, RT_DEFAULT_FIB); +#endif #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro, NO_CLONING); #endif @@ -6212,14 +6205,12 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) } #ifdef __FreeBSD__ -# ifdef RTF_PRCLONING - rtalloc_ign((struct route *)&ro, (RTF_CLONING|RTF_PRCLONING)); -# else /* !RTF_PRCLONING */ if (af == AF_INET) - in_rtalloc_ign((struct route *)&ro, 0, 0); + in_rtalloc_ign((struct route *)&ro, 0, RT_DEFAULT_FIB); +#ifdef INET6 else - rtalloc_ign((struct route *)&ro, 0); -# endif + in6_rtalloc_ign(&ro, 0, RT_DEFAULT_FIB); +#endif #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro, NO_CLONING); #endif diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c index 514b7c3db..2244b94ad 100644 --- a/sys/contrib/pf/net/pf_ioctl.c +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -1531,7 +1531,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } #ifdef __FreeBSD__ /* ROUTING */ - if (rule->rtableid > 0 && rule->rtableid > rt_numfibs) + if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs) #else if (rule->rtableid > 0 && !rtable_exists(rule->rtableid)) #endif @@ -1794,7 +1794,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (newrule->rtableid > 0 && #ifdef __FreeBSD__ /* ROUTING */ - newrule->rtableid > rt_numfibs) + newrule->rtableid >= rt_numfibs) #else !rtable_exists(newrule->rtableid)) #endif diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index 371dad699..9eae27438 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -946,7 +946,8 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) sad.sin_len = sizeof (struct sockaddr_in); sad.sin_addr.s_addr = sin->sin_addr.s_addr; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); - rt = rtalloc1((struct sockaddr *)&sad, 0, 0UL); + rt = rtalloc1_fib((struct sockaddr *)&sad, 0, 0UL, + curthread->td_proc->p_fibnum); if (rt != NULL) { if (rt->rt_ifp != NULL && rt->rt_ifa != NULL && @@ -971,7 +972,8 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) sad6.sin6_len = sizeof (struct sockaddr_in6); sad6.sin6_addr = sin6->sin6_addr; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); - rt = rtalloc1((struct sockaddr *)&sad6, 0, 0UL); + rt = rtalloc1_fib((struct sockaddr *)&sad6, 0, 0UL, + curthread->td_proc->p_fibnum); if (rt != NULL) { if (rt->rt_ifp != NULL && rt->rt_ifa != NULL && diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index df34841d0..a74792d98 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -455,10 +455,10 @@ nfs_mountroot(struct mount *mp) sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ CURVNET_SET(TD_TO_VNET(td)); - error = rtrequest(RTM_ADD, (struct sockaddr *)&sin, + error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, - RTF_UP | RTF_GATEWAY, NULL); + RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB); CURVNET_RESTORE(); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 146cf5249..63c1bfdfd 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -383,6 +383,7 @@ socreate(int dom, struct socket **aso, int type, int proto, so->so_type = type; so->so_cred = crhold(cred); if ((prp->pr_domain->dom_family == PF_INET) || + (prp->pr_domain->dom_family == PF_INET6) || (prp->pr_domain->dom_family == PF_ROUTE)) so->so_fibnum = td->td_proc->p_fibnum; else @@ -2498,6 +2499,7 @@ sosetopt(struct socket *so, struct sockopt *sopt) } if (so->so_proto != NULL && ((so->so_proto->pr_domain->dom_family == PF_INET) || + (so->so_proto->pr_domain->dom_family == PF_INET6) || (so->so_proto->pr_domain->dom_family == PF_ROUTE))) { so->so_fibnum = optval; /* Note: ignore error */ diff --git a/sys/net/flowtable.c b/sys/net/flowtable.c index fe7abee85..527a3f3a4 100644 --- a/sys/net/flowtable.c +++ b/sys/net/flowtable.c @@ -373,7 +373,7 @@ SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD #ifndef RADIX_MPATH static void -in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum) +rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum) { rtalloc_ign_fib(ro, 0, fibnum); @@ -1315,7 +1315,7 @@ flowtable_alloc(char *name, int nentry, int flags) #ifdef RADIX_MPATH ft->ft_rtalloc = rtalloc_mpath_fib; #else - ft->ft_rtalloc = in_rtalloc_ign_wrapper; + ft->ft_rtalloc = rtalloc_ign_wrapper; #endif if (flags & FL_PCPU) { ft->ft_lock = flowtable_pcpu_lock; diff --git a/sys/net/if_faith.c b/sys/net/if_faith.c index ca8088362..db4c1d99e 100644 --- a/sys/net/if_faith.c +++ b/sys/net/if_faith.c @@ -338,7 +338,7 @@ faithprefix(in6) sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *in6; - rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); + rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB); if (rt && rt->rt_ifp && rt->rt_ifp->if_type == IFT_FAITH && (rt->rt_ifp->if_flags & IFF_UP) != 0) ret = 1; diff --git a/sys/net/route.c b/sys/net/route.c index 2bbabcafd..d6d89f215 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -35,6 +35,7 @@ ***********************************************************************/ #include "opt_inet.h" +#include "opt_inet6.h" #include "opt_route.h" #include "opt_mrouting.h" #include "opt_mpath.h" @@ -72,7 +73,11 @@ SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, ""); /* * Allow the boot code to allow LESS than RT_MAXFIBS to be used. * We can't do more because storage is statically allocated for now. - * (for compatibility reasons.. this will change). + * (for compatibility reasons.. this will change. When this changes, code should + * be refactored to protocol independent parts and protocol dependent parts, + * probably hanging of domain(9) specific storage to not need the full + * fib * af RNH allocation etc. but allow tuning the number of tables per + * address family). */ TUNABLE_INT("net.fibs", &rt_numfibs); @@ -82,6 +87,9 @@ TUNABLE_INT("net.fibs", &rt_numfibs); * changes for the FIB of the caller when adding a new set of addresses * to an interface. XXX this is a shotgun aproach to a problem that needs * a more fine grained solution.. that will come. + * XXX also has the problems getting the FIB from curthread which will not + * always work given the fib can be overridden and prefixes can be added + * from the network stack context. */ u_int rt_add_addr_allfibs = 1; SYSCTL_INT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW, @@ -196,27 +204,23 @@ vnet_route_init(const void *unused __unused) V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); for (dom = domains; dom; dom = dom->dom_next) { - if (dom->dom_rtattach) { - for (table = 0; table < rt_numfibs; table++) { - if ( (fam = dom->dom_family) == AF_INET || - table == 0) { - /* for now only AF_INET has > 1 table */ - /* XXX MRT - * rtattach will be also called - * from vfs_export.c but the - * offset will be 0 - * (only for AF_INET and AF_INET6 - * which don't need it anyhow) - */ - rnh = rt_tables_get_rnh_ptr(table, fam); - if (rnh == NULL) - panic("%s: rnh NULL", __func__); - dom->dom_rtattach((void **)rnh, - dom->dom_rtoffset); - } else { - break; - } - } + if (dom->dom_rtattach == NULL) + continue; + + for (table = 0; table < rt_numfibs; table++) { + fam = dom->dom_family; + if (table != 0 && fam != AF_INET6 && fam != AF_INET) + break; + + /* + * XXX MRT rtattach will be also called from + * vfs_export.c but the offset will be 0 (only for + * AF_INET and AF_INET6 which don't need it anyhow). + */ + rnh = rt_tables_get_rnh_ptr(table, fam); + if (rnh == NULL) + panic("%s: rnh NULL", __func__); + dom->dom_rtattach((void **)rnh, dom->dom_rtoffset); } } } @@ -233,20 +237,19 @@ vnet_route_uninit(const void *unused __unused) struct radix_node_head **rnh; for (dom = domains; dom; dom = dom->dom_next) { - if (dom->dom_rtdetach) { - for (table = 0; table < rt_numfibs; table++) { - if ( (fam = dom->dom_family) == AF_INET || - table == 0) { - /* For now only AF_INET has > 1 tbl. */ - rnh = rt_tables_get_rnh_ptr(table, fam); - if (rnh == NULL) - panic("%s: rnh NULL", __func__); - dom->dom_rtdetach((void **)rnh, - dom->dom_rtoffset); - } else { - break; - } - } + if (dom->dom_rtdetach == NULL) + continue; + + for (table = 0; table < rt_numfibs; table++) { + fam = dom->dom_family; + + if (table != 0 && fam != AF_INET6 && fam != AF_INET) + break; + + rnh = rt_tables_get_rnh_ptr(table, fam); + if (rnh == NULL) + panic("%s: rnh NULL", __func__); + dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset); } } } @@ -274,7 +277,8 @@ setfib(struct thread *td, struct setfib_args *uap) void rtalloc(struct route *ro) { - rtalloc_ign_fib(ro, 0UL, 0); + + rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB); } void @@ -294,7 +298,7 @@ rtalloc_ign(struct route *ro, u_long ignore) RTFREE(rt); ro->ro_rt = NULL; } - ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0); + ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); } @@ -324,7 +328,8 @@ rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) struct rtentry * rtalloc1(struct sockaddr *dst, int report, u_long ignflags) { - return (rtalloc1_fib(dst, report, ignflags, 0)); + + return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB)); } struct rtentry * @@ -339,8 +344,15 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, int needlock; KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); - if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */ - fibnum = 0; + switch (dst->sa_family) { + case AF_INET6: + case AF_INET: + /* We support multiple FIBs. */ + break; + default: + fibnum = RT_DEFAULT_FIB; + break; + } rnh = rt_tables_get_rnh(fibnum, dst->sa_family); newrt = NULL; if (rnh == NULL) @@ -486,7 +498,8 @@ rtredirect(struct sockaddr *dst, int flags, struct sockaddr *src) { - rtredirect_fib(dst, gateway, netmask, flags, src, 0); + + rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB); } void @@ -617,7 +630,8 @@ out: int rtioctl(u_long req, caddr_t data) { - return (rtioctl_fib(req, data, 0)); + + return (rtioctl_fib(req, data, RT_DEFAULT_FIB)); } /* @@ -647,7 +661,8 @@ rtioctl_fib(u_long req, caddr_t data, u_int fibnum) struct ifaddr * ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) { - return (ifa_ifwithroute_fib(flags, dst, gateway, 0)); + + return (ifa_ifwithroute_fib(flags, dst, gateway, RT_DEFAULT_FIB)); } struct ifaddr * @@ -732,7 +747,9 @@ rtrequest(int req, int flags, struct rtentry **ret_nrt) { - return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0)); + + return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, + RT_DEFAULT_FIB)); } int @@ -771,7 +788,8 @@ rtrequest_fib(int req, int rt_getifa(struct rt_addrinfo *info) { - return (rt_getifa_fib(info, 0)); + + return (rt_getifa_fib(info, RT_DEFAULT_FIB)); } /* @@ -1029,8 +1047,16 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, #define senderr(x) { error = x ; goto bad; } KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); - if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */ - fibnum = 0; + switch (dst->sa_family) { + case AF_INET6: + case AF_INET: + /* We support multiple FIBs. */ + break; + default: + fibnum = RT_DEFAULT_FIB; + break; + } + /* * Find the correct routing tree to use for this Address Family */ @@ -1136,8 +1162,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, rt->rt_flags = RTF_UP | flags; rt->rt_fibnum = fibnum; /* - * Add the gateway. Possibly re-malloc-ing the storage for it - * + * Add the gateway. Possibly re-malloc-ing the storage for it. */ RT_LOCK(rt); if ((error = rt_setgate(rt, dst, gateway)) != 0) { @@ -1186,12 +1211,17 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, #ifdef FLOWTABLE rt0 = NULL; - /* XXX - * "flow-table" only support IPv4 at the moment. - * XXX-BZ as of r205066 it would support IPv6. - */ + /* "flow-table" only supports IPv6 and IPv4 at the moment. */ + switch (dst->sa_family) { +#ifdef notyet +#ifdef INET6 + case AF_INET6: +#endif +#endif #ifdef INET - if (dst->sa_family == AF_INET) { + case AF_INET: +#endif +#if defined(INET6) || defined(INET) rn = rnh->rnh_matchaddr(dst, rnh); if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { struct sockaddr *mask; @@ -1230,9 +1260,9 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, } } } +#endif/* INET6 || INET */ } -#endif -#endif +#endif /* FLOWTABLE */ /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); @@ -1254,9 +1284,20 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, } #ifdef FLOWTABLE else if (rt0 != NULL) { + switch (dst->sa_family) { +#ifdef notyet +#ifdef INET6 + case AF_INET6: + flowtable_route_flush(V_ip6_ft, rt0); + break; +#endif +#endif #ifdef INET - flowtable_route_flush(V_ip_ft, rt0); + case AF_INET: + flowtable_route_flush(V_ip_ft, rt0); + break; #endif + } RTFREE(rt0); } #endif @@ -1388,8 +1429,17 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) dst = ifa->ifa_addr; netmask = ifa->ifa_netmask; } - if ( dst->sa_family != AF_INET) - fibnum = 0; + if (dst->sa_len == 0) + return(EINVAL); + switch (dst->sa_family) { + case AF_INET6: + case AF_INET: + /* We support multiple FIBs. */ + break; + default: + fibnum = RT_DEFAULT_FIB; + break; + } if (fibnum == -1) { if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) { startfib = endfib = curthread->td_proc->p_fibnum; @@ -1402,8 +1452,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) startfib = fibnum; endfib = fibnum; } - if (dst->sa_len == 0) - return(EINVAL); /* * If it's a delete, check that if it exists, @@ -1427,9 +1475,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) * Now go through all the requested tables (fibs) and do the * requested action. Realistically, this will either be fib 0 * for protocols that don't do multiple tables or all the - * tables for those that do. XXX For this version only AF_INET. - * When that changes code should be refactored to protocol - * independent parts and protocol dependent parts. + * tables for those that do. */ for ( fibnum = startfib; fibnum <= endfib; fibnum++) { if (cmd == RTM_DELETE) { @@ -1569,12 +1615,14 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) return (error); } +#ifndef BURN_BRIDGES /* special one for inet internal use. may not use. */ int rtinit_fib(struct ifaddr *ifa, int cmd, int flags) { return (rtinit1(ifa, cmd, flags, -1)); } +#endif /* * Set up a routing table entry, normally @@ -1584,7 +1632,7 @@ int rtinit(struct ifaddr *ifa, int cmd, int flags) { struct sockaddr *dst; - int fib = 0; + int fib = RT_DEFAULT_FIB; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; @@ -1592,7 +1640,12 @@ rtinit(struct ifaddr *ifa, int cmd, int flags) dst = ifa->ifa_addr; } - if (dst->sa_family == AF_INET) + switch (dst->sa_family) { + case AF_INET6: + case AF_INET: + /* We do support multiple FIBs. */ fib = -1; + break; + } return (rtinit1(ifa, cmd, flags, fib)); } diff --git a/sys/net/route.h b/sys/net/route.h index a74dfe6c3..aa514b0e3 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -107,6 +107,7 @@ struct rt_metrics { #endif #endif +#define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */ extern u_int rt_numfibs; /* number fo usable routing tables */ /* * XXX kernel function pointer `rt_output' is visible to applications. @@ -401,8 +402,10 @@ void rtredirect(struct sockaddr *, struct sockaddr *, int rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **); +#ifndef BURN_BRIDGES /* defaults to "all" FIBs */ int rtinit_fib(struct ifaddr *, int, int); +#endif /* XXX MRT NEW VERSIONS THAT USE FIBs * For now the protocol indepedent versions are the same as the AF_INET ones diff --git a/sys/netinet/in.c b/sys/netinet/in.c index b6595005c..bb8e37307 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -957,7 +957,7 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin, bzero(&ia_ro, sizeof(ia_ro)); *((struct sockaddr_in *)(&ia_ro.ro_dst)) = ia->ia_addr; - rtalloc_ign_fib(&ia_ro, 0, 0); + rtalloc_ign_fib(&ia_ro, 0, RT_DEFAULT_FIB); if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) && (ia_ro.ro_rt->rt_ifp == V_loif)) { RT_LOCK(ia_ro.ro_rt); diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c index 79c974db0..23853dd07 100644 --- a/sys/netinet/ipfw/ip_fw2.c +++ b/sys/netinet/ipfw/ip_fw2.c @@ -494,7 +494,7 @@ search_ip6_addr_net (struct in6_addr * ip6_addr) } static int -verify_path6(struct in6_addr *src, struct ifnet *ifp) +verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib) { struct route_in6 ro; struct sockaddr_in6 *dst; @@ -505,9 +505,8 @@ verify_path6(struct in6_addr *src, struct ifnet *ifp) dst->sin6_family = AF_INET6; dst->sin6_len = sizeof(*dst); dst->sin6_addr = *src; - /* XXX MRT 0 for ipv6 at this time */ - rtalloc_ign((struct route *)&ro, 0); + in6_rtalloc_ign(&ro, 0, fib); if (ro.ro_rt == NULL) return 0; @@ -1682,7 +1681,7 @@ do { \ #ifdef INET6 is_ipv6 ? verify_path6(&(args->f_id.src_ip6), - m->m_pkthdr.rcvif) : + m->m_pkthdr.rcvif, args->f_id.fib) : #endif verify_path(src_ip, m->m_pkthdr.rcvif, args->f_id.fib))); @@ -1694,7 +1693,7 @@ do { \ #ifdef INET6 is_ipv6 ? verify_path6(&(args->f_id.src_ip6), - NULL) : + NULL, args->f_id.fib) : #endif verify_path(src_ip, NULL, args->f_id.fib))); break; @@ -1712,7 +1711,8 @@ do { \ #ifdef INET6 is_ipv6 ? verify_path6( &(args->f_id.src_ip6), - m->m_pkthdr.rcvif) : + m->m_pkthdr.rcvif, + args->f_id.fib) : #endif verify_path(src_ip, m->m_pkthdr.rcvif, diff --git a/sys/netinet/sctp_os_bsd.h b/sys/netinet/sctp_os_bsd.h index 4e0dcab53..215b93dbd 100644 --- a/sys/netinet/sctp_os_bsd.h +++ b/sys/netinet/sctp_os_bsd.h @@ -424,6 +424,12 @@ typedef struct callout sctp_os_timer_t; typedef struct route sctp_route_t; typedef struct rtentry sctp_rtentry_t; +/* + * XXX multi-FIB support was backed out in r179783 and it seems clear that the + * VRF support as currently in FreeBSD is not ready to support multi-FIB. + * It might be best to implement multi-FIB support for both v4 and v6 indepedent + * of VRFs and leave those to a real MPLS stack. + */ #define SCTP_RTALLOC(ro, vrf_id) rtalloc_ign((struct route *)ro, 0UL) /* Future zero copy wakeup/send function */ diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 9d1132d79..adb5742be 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1801,7 +1801,7 @@ tcp_maxmtu6(struct in_conninfo *inc, int *flags) sro6.ro_dst.sin6_family = AF_INET6; sro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); sro6.ro_dst.sin6_addr = inc->inc6_faddr; - rtalloc_ign((struct route *)&sro6, 0); + in6_rtalloc_ign(&sro6, 0, inc->inc_fibnum); } if (sro6.ro_rt != NULL) { ifp = sro6.ro_rt->rt_ifp; diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 2c76f493d..e0e7cd6ec 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -359,7 +359,7 @@ icmp6_error(struct mbuf *m, int type, int code, int param) m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); - M_PREPEND(m, preplen, M_DONTWAIT); + M_PREPEND(m, preplen, M_DONTWAIT); /* FIB is also copied over. */ if (m && m->m_len < preplen) m = m_pullup(m, preplen); if (m == NULL) { @@ -581,7 +581,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) MGETHDR(n, M_DONTWAIT, n0->m_type); n0len = n0->m_pkthdr.len; /* save for use below */ if (n) - M_MOVE_PKTHDR(n, n0); + M_MOVE_PKTHDR(n, n0); /* FIB copied. */ if (n && maxlen >= MHLEN) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { @@ -1419,7 +1419,7 @@ ni6_input(struct mbuf *m, int off) m_freem(m); return (NULL); } - M_MOVE_PKTHDR(n, m); /* just for recvif */ + M_MOVE_PKTHDR(n, m); /* just for recvif and FIB */ if (replylen > MHLEN) { if (replylen > MCLBYTES) { /* @@ -2332,7 +2332,7 @@ icmp6_redirect_input(struct mbuf *m, int off) sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6)); - rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); + rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB); if (rt) { if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) { @@ -2421,6 +2421,7 @@ icmp6_redirect_input(struct mbuf *m, int off) struct sockaddr_in6 sdst; struct sockaddr_in6 sgw; struct sockaddr_in6 ssrc; + u_int fibnum; bzero(&sdst, sizeof(sdst)); bzero(&sgw, sizeof(sgw)); @@ -2431,9 +2432,11 @@ icmp6_redirect_input(struct mbuf *m, int off) bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr)); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr)); - rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw, - (struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST, - (struct sockaddr *)&ssrc); + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) + in6_rtredirect((struct sockaddr *)&sdst, + (struct sockaddr *)&sgw, (struct sockaddr *)NULL, + RTF_GATEWAY | RTF_HOST, (struct sockaddr *)&ssrc, + fibnum); } /* finally update cached route in each socket via pfctlinput */ { @@ -2517,6 +2520,7 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) MCLGET(m, M_DONTWAIT); if (!m) goto fail; + M_SETFIB(m, rt->rt_fibnum); m->m_pkthdr.rcvif = NULL; m->m_len = 0; maxlen = M_TRAILINGSPACE(m); diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index ce01d220a..28fb434ff 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -198,6 +198,11 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, switch (cmd) { case SIOCGETSGCNT_IN6: case SIOCGETMIFCNT_IN6: + /* + * XXX mrt_ioctl has a 3rd, unused, FIB argument in route.c. + * We cannot see how that would be needed, so do not adjust the + * KPI blindly; more likely should clean up the IPv4 variant. + */ return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP); } @@ -695,6 +700,169 @@ out: return (error); } +/* + * Join necessary multicast groups. Factored out from in6_update_ifa(). + * This entire work should only be done once, for the default FIB. + */ +static int +in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra, + struct in6_ifaddr *ia, int flags, struct in6_multi **in6m_sol) +{ + char ip6buf[INET6_ADDRSTRLEN]; + struct sockaddr_in6 mltaddr, mltmask; + struct in6_addr llsol; + struct in6_multi_mship *imm; + struct rtentry *rt; + int delay, error; + + KASSERT(in6m_sol != NULL, ("%s: in6m_sol is NULL", __func__)); + + /* Join solicited multicast addr for new host id. */ + bzero(&llsol, sizeof(struct in6_addr)); + llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; + llsol.s6_addr32[1] = 0; + llsol.s6_addr32[2] = htonl(1); + llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; + llsol.s6_addr8[12] = 0xff; + if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) { + /* XXX: should not happen */ + log(LOG_ERR, "%s: in6_setscope failed\n", __func__); + goto cleanup; + } + delay = 0; + if ((flags & IN6_IFAUPDATE_DADDELAY)) { + /* + * We need a random delay for DAD on the address being + * configured. It also means delaying transmission of the + * corresponding MLD report to avoid report collision. + * [RFC 4861, Section 6.3.7] + */ + delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); + } + imm = in6_joingroup(ifp, &llsol, &error, delay); + if (imm == NULL) { + nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &llsol), + if_name(ifp), error)); + goto cleanup; + } + LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); + *in6m_sol = imm->i6mm_maddr; + + bzero(&mltmask, sizeof(mltmask)); + mltmask.sin6_len = sizeof(struct sockaddr_in6); + mltmask.sin6_family = AF_INET6; + mltmask.sin6_addr = in6mask32; +#define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */ + + /* + * Join link-local all-nodes address. + */ + bzero(&mltaddr, sizeof(mltaddr)); + mltaddr.sin6_len = sizeof(struct sockaddr_in6); + mltaddr.sin6_family = AF_INET6; + mltaddr.sin6_addr = in6addr_linklocal_allnodes; + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + goto cleanup; /* XXX: should not fail */ + + /* + * XXX: do we really need this automatic routes? We should probably + * reconsider this stuff. Most applications actually do not need the + * routes, since they usually specify the outgoing interface. + */ + rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); + if (rt != NULL) { + /* XXX: only works in !SCOPEDROUTING case. */ + if (memcmp(&mltaddr.sin6_addr, + &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, + MLTMASK_LEN)) { + RTFREE_LOCKED(rt); + rt = NULL; + } + } + if (rt == NULL) { + error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0, RT_DEFAULT_FIB); + if (error) + goto cleanup; + } else + RTFREE_LOCKED(rt); + + imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); + if (imm == NULL) { + nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, + &mltaddr.sin6_addr), if_name(ifp), error)); + goto cleanup; + } + LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); + + /* + * Join node information group address. + */ + delay = 0; + if ((flags & IN6_IFAUPDATE_DADDELAY)) { + /* + * The spec does not say anything about delay for this group, + * but the same logic should apply. + */ + delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); + } + if (in6_nigroup(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) { + /* XXX jinmei */ + imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay); + if (imm == NULL) + nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, + &mltaddr.sin6_addr), if_name(ifp), error)); + /* XXX not very fatal, go on... */ + else + LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); + } + + /* + * Join interface-local all-nodes address. + * (ff01::1%ifN, and ff01::%ifN/32) + */ + mltaddr.sin6_addr = in6addr_nodelocal_allnodes; + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + goto cleanup; /* XXX: should not fail */ + /* XXX: again, do we really need the route? */ + rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); + if (rt != NULL) { + if (memcmp(&mltaddr.sin6_addr, + &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, + MLTMASK_LEN)) { + RTFREE_LOCKED(rt); + rt = NULL; + } + } + if (rt == NULL) { + error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0, RT_DEFAULT_FIB); + if (error) + goto cleanup; + } else + RTFREE_LOCKED(rt); + + imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); + if (imm == NULL) { + nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, + &mltaddr.sin6_addr), if_name(ifp), error)); + goto cleanup; + } + LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); +#undef MLTMASK_LEN + +cleanup: + return (error); +} + /* * Update parameters of an IPv6 interface address. * If necessary, a new entry is created and linked into address chains. @@ -708,9 +876,7 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, int error = 0, hostIsNew = 0, plen = -1; struct sockaddr_in6 dst6; struct in6_addrlifetime *lt; - struct in6_multi_mship *imm; struct in6_multi *in6m_sol; - struct rtentry *rt; int delay; char ip6buf[INET6_ADDRSTRLEN]; @@ -954,178 +1120,17 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, * not just go to unlink. */ - /* Join necessary multicast groups */ + /* Join necessary multicast groups. */ in6m_sol = NULL; if ((ifp->if_flags & IFF_MULTICAST) != 0) { - struct sockaddr_in6 mltaddr, mltmask; - struct in6_addr llsol; - - /* join solicited multicast addr for new host id */ - bzero(&llsol, sizeof(struct in6_addr)); - llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; - llsol.s6_addr32[1] = 0; - llsol.s6_addr32[2] = htonl(1); - llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; - llsol.s6_addr8[12] = 0xff; - if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) { - /* XXX: should not happen */ - log(LOG_ERR, "in6_update_ifa: " - "in6_setscope failed\n"); + error = in6_update_ifa_join_mc(ifp, ifra, ia, flags, &in6m_sol); + if (error) goto cleanup; - } - delay = 0; - if ((flags & IN6_IFAUPDATE_DADDELAY)) { - /* - * We need a random delay for DAD on the address - * being configured. It also means delaying - * transmission of the corresponding MLD report to - * avoid report collision. - * [draft-ietf-ipv6-rfc2462bis-02.txt] - */ - delay = arc4random() % - (MAX_RTR_SOLICITATION_DELAY * hz); - } - imm = in6_joingroup(ifp, &llsol, &error, delay); - if (imm == NULL) { - nd6log((LOG_WARNING, - "in6_update_ifa: addmulti failed for " - "%s on %s (errno=%d)\n", - ip6_sprintf(ip6buf, &llsol), if_name(ifp), - error)); - goto cleanup; - } - LIST_INSERT_HEAD(&ia->ia6_memberships, - imm, i6mm_chain); - in6m_sol = imm->i6mm_maddr; - - bzero(&mltmask, sizeof(mltmask)); - mltmask.sin6_len = sizeof(struct sockaddr_in6); - mltmask.sin6_family = AF_INET6; - mltmask.sin6_addr = in6mask32; -#define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */ - - /* - * join link-local all-nodes address - */ - bzero(&mltaddr, sizeof(mltaddr)); - mltaddr.sin6_len = sizeof(struct sockaddr_in6); - mltaddr.sin6_family = AF_INET6; - mltaddr.sin6_addr = in6addr_linklocal_allnodes; - if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != - 0) - goto cleanup; /* XXX: should not fail */ - - /* - * XXX: do we really need this automatic routes? - * We should probably reconsider this stuff. Most applications - * actually do not need the routes, since they usually specify - * the outgoing interface. - */ - rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); - if (rt) { - /* XXX: only works in !SCOPEDROUTING case. */ - if (memcmp(&mltaddr.sin6_addr, - &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, - MLTMASK_LEN)) { - RTFREE_LOCKED(rt); - rt = NULL; - } - } - if (!rt) { - error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0); - if (error) - goto cleanup; - } else { - RTFREE_LOCKED(rt); - } - - imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); - if (!imm) { - nd6log((LOG_WARNING, - "in6_update_ifa: addmulti failed for " - "%s on %s (errno=%d)\n", - ip6_sprintf(ip6buf, &mltaddr.sin6_addr), - if_name(ifp), error)); - goto cleanup; - } - LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); - - /* - * join node information group address - */ - delay = 0; - if ((flags & IN6_IFAUPDATE_DADDELAY)) { - /* - * The spec doesn't say anything about delay for this - * group, but the same logic should apply. - */ - delay = arc4random() % - (MAX_RTR_SOLICITATION_DELAY * hz); - } - if (in6_nigroup(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) { - imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, - delay); /* XXX jinmei */ - if (!imm) { - nd6log((LOG_WARNING, "in6_update_ifa: " - "addmulti failed for %s on %s " - "(errno=%d)\n", - ip6_sprintf(ip6buf, &mltaddr.sin6_addr), - if_name(ifp), error)); - /* XXX not very fatal, go on... */ - } else { - LIST_INSERT_HEAD(&ia->ia6_memberships, - imm, i6mm_chain); - } - } - - /* - * join interface-local all-nodes address. - * (ff01::1%ifN, and ff01::%ifN/32) - */ - mltaddr.sin6_addr = in6addr_nodelocal_allnodes; - if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) - != 0) - goto cleanup; /* XXX: should not fail */ - /* XXX: again, do we really need the route? */ - rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); - if (rt) { - if (memcmp(&mltaddr.sin6_addr, - &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, - MLTMASK_LEN)) { - RTFREE_LOCKED(rt); - rt = NULL; - } - } - if (!rt) { - error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0); - if (error) - goto cleanup; - } else - RTFREE_LOCKED(rt); - - imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); - if (!imm) { - nd6log((LOG_WARNING, "in6_update_ifa: " - "addmulti failed for %s on %s " - "(errno=%d)\n", - ip6_sprintf(ip6buf, &mltaddr.sin6_addr), - if_name(ifp), error)); - goto cleanup; - } - LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); -#undef MLTMASK_LEN } /* * Perform DAD, if needed. - * XXX It may be of use, if we can administratively - * disable DAD. + * XXX It may be of use, if we can administratively disable DAD. */ if (in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) && (ia->ia6_flags & IN6_IFF_TENTATIVE)) @@ -1183,79 +1188,20 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, return error; } -void -in6_purgeaddr(struct ifaddr *ifa) +/* + * Leave multicast groups. Factored out from in6_purgeaddr(). + * This entire work should only be done once, for the default FIB. + */ +static int +in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0) { - struct ifnet *ifp = ifa->ifa_ifp; - struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; - struct in6_multi_mship *imm; struct sockaddr_in6 mltaddr, mltmask; - struct rtentry rt0; - struct sockaddr_dl gateway; - struct sockaddr_in6 mask, addr; - int plen, error; + struct in6_multi_mship *imm; struct rtentry *rt; - struct ifaddr *ifa0; - - /* - * find another IPv6 address as the gateway for the - * link-local and node-local all-nodes multicast - * address routes - */ - IF_ADDR_LOCK(ifp); - TAILQ_FOREACH(ifa0, &ifp->if_addrhead, ifa_link) { - if ((ifa0->ifa_addr->sa_family != AF_INET6) || - memcmp(&satosin6(ifa0->ifa_addr)->sin6_addr, - &ia->ia_addr.sin6_addr, - sizeof(struct in6_addr)) == 0) - continue; - else - break; - } - if (ifa0 != NULL) - ifa_ref(ifa0); - IF_ADDR_UNLOCK(ifp); - - /* - * Remove the loopback route to the interface address. - * The check for the current setting of "nd6_useloopback" - * is not needed. - */ - if (ia->ia_flags & IFA_RTSELF) { - error = ifa_del_loopback_route((struct ifaddr *)ia, - (struct sockaddr *)&ia->ia_addr); - if (error == 0) - ia->ia_flags &= ~IFA_RTSELF; - } - - /* stop DAD processing */ - nd6_dad_stop(ifa); - - IF_AFDATA_LOCK(ifp); - lla_lookup(LLTABLE6(ifp), (LLE_DELETE | LLE_IFADDR), - (struct sockaddr *)&ia->ia_addr); - IF_AFDATA_UNLOCK(ifp); - - /* - * initialize for rtmsg generation - */ - bzero(&gateway, sizeof(gateway)); - gateway.sdl_len = sizeof(gateway); - gateway.sdl_family = AF_LINK; - gateway.sdl_nlen = 0; - gateway.sdl_alen = ifp->if_addrlen; - /* */ - bzero(&rt0, sizeof(rt0)); - rt0.rt_gateway = (struct sockaddr *)&gateway; - memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); - memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); - rt_mask(&rt0) = (struct sockaddr *)&mask; - rt_key(&rt0) = (struct sockaddr *)&addr; - rt0.rt_flags = RTF_HOST | RTF_STATIC; - rt_newaddrmsg(RTM_DELETE, ifa, 0, &rt0); + int error; /* - * leave from multicast groups we have joined for the interface + * Leave from multicast groups we have joined for the interface. */ while ((imm = ia->ia6_memberships.lh_first) != NULL) { LIST_REMOVE(imm, i6mm_chain); @@ -1263,7 +1209,7 @@ in6_purgeaddr(struct ifaddr *ifa) } /* - * remove the link-local all-nodes address + * Remove the link-local all-nodes address. */ bzero(&mltmask, sizeof(mltmask)); mltmask.sin6_len = sizeof(struct sockaddr_in6); @@ -1276,31 +1222,33 @@ in6_purgeaddr(struct ifaddr *ifa) mltaddr.sin6_addr = in6addr_linklocal_allnodes; if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) - goto cleanup; + return (error); - rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); if (rt != NULL && rt->rt_gateway != NULL && (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, &ia->ia_addr.sin6_addr, sizeof(ia->ia_addr.sin6_addr)) == 0)) { /* - * if no more IPv6 address exists on this interface - * then remove the multicast address route + * If no more IPv6 address exists on this interface then + * remove the multicast address route. */ if (ifa0 == NULL) { memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr, sizeof(mltaddr.sin6_addr)); RTFREE_LOCKED(rt); - error = rtrequest(RTM_DELETE, (struct sockaddr *)&mltaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0); + error = in6_rtrequest(RTM_DELETE, + (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0, RT_DEFAULT_FIB); if (error) - log(LOG_INFO, "in6_purgeaddr: link-local all-nodes" - "multicast address deletion error\n"); + log(LOG_INFO, "%s: link-local all-nodes " + "multicast address deletion error\n", + __func__); } else { /* - * replace the gateway of the route + * Replace the gateway of the route. */ struct sockaddr_in6 sa; @@ -1319,38 +1267,38 @@ in6_purgeaddr(struct ifaddr *ifa) } /* - * remove the node-local all-nodes address + * Remove the node-local all-nodes address. */ mltaddr.sin6_addr = in6addr_nodelocal_allnodes; - if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != - 0) - goto cleanup; + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + return (error); - rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); if (rt != NULL && rt->rt_gateway != NULL && (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, &ia->ia_addr.sin6_addr, sizeof(ia->ia_addr.sin6_addr)) == 0)) { /* - * if no more IPv6 address exists on this interface - * then remove the multicast address route + * If no more IPv6 address exists on this interface then + * remove the multicast address route. */ if (ifa0 == NULL) { memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr, sizeof(mltaddr.sin6_addr)); RTFREE_LOCKED(rt); - error = rtrequest(RTM_DELETE, (struct sockaddr *)&mltaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0); - + error = in6_rtrequest(RTM_DELETE, + (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0, RT_DEFAULT_FIB); if (error) - log(LOG_INFO, "in6_purgeaddr: node-local all-nodes" - "multicast address deletion error\n"); + log(LOG_INFO, "%s: node-local all-nodes" + "multicast address deletion error\n", + __func__); } else { /* - * replace the gateway of the route + * Replace the gateway of the route. */ struct sockaddr_in6 sa; @@ -1368,31 +1316,91 @@ in6_purgeaddr(struct ifaddr *ifa) RTFREE_LOCKED(rt); } -cleanup: + return (0); +} + +void +in6_purgeaddr(struct ifaddr *ifa) +{ + struct ifnet *ifp = ifa->ifa_ifp; + struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; + struct sockaddr_dl gateway; + struct sockaddr_in6 mask, addr; + struct rtentry rt0; + int plen, error; + struct ifaddr *ifa0; + + /* + * find another IPv6 address as the gateway for the + * link-local and node-local all-nodes multicast + * address routes + */ + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa0, &ifp->if_addrhead, ifa_link) { + if ((ifa0->ifa_addr->sa_family != AF_INET6) || + memcmp(&satosin6(ifa0->ifa_addr)->sin6_addr, + &ia->ia_addr.sin6_addr, + sizeof(struct in6_addr)) == 0) + continue; + else + break; + } + if (ifa0 != NULL) + ifa_ref(ifa0); + IF_ADDR_UNLOCK(ifp); + + /* + * Remove the loopback route to the interface address. + * The check for the current setting of "nd6_useloopback" + * is not needed. + */ + if (ia->ia_flags & IFA_RTSELF) { + error = ifa_del_loopback_route((struct ifaddr *)ia, + (struct sockaddr *)&ia->ia_addr); + if (error == 0) + ia->ia_flags &= ~IFA_RTSELF; + } + + /* stop DAD processing */ + nd6_dad_stop(ifa); + + /* Remove local address entry from lltable. */ + IF_AFDATA_LOCK(ifp); + lla_lookup(LLTABLE6(ifp), (LLE_DELETE | LLE_IFADDR), + (struct sockaddr *)&ia->ia_addr); + IF_AFDATA_UNLOCK(ifp); + + /* + * initialize for rtmsg generation + */ + bzero(&gateway, sizeof(gateway)); + gateway.sdl_len = sizeof(gateway); + gateway.sdl_family = AF_LINK; + gateway.sdl_nlen = 0; + gateway.sdl_alen = ifp->if_addrlen; + /* */ + bzero(&rt0, sizeof(rt0)); + rt0.rt_gateway = (struct sockaddr *)&gateway; + memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); + memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); + rt_mask(&rt0) = (struct sockaddr *)&mask; + rt_key(&rt0) = (struct sockaddr *)&addr; + rt0.rt_flags = RTF_HOST | RTF_STATIC; + rt_newaddrmsg(RTM_DELETE, ifa, 0, &rt0); + + /* Leave multicast groups. */ + error = in6_purgeaddr_mc(ifp, ia, ifa0); + if (ifa0 != NULL) ifa_free(ifa0); plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ if ((ia->ia_flags & IFA_ROUTE) && plen == 128) { - int error; - struct sockaddr *dstaddr; - - /* - * use the interface address if configuring an - * interface address with a /128 prefix len - */ - if (ia->ia_dstaddr.sin6_family == AF_INET6) - dstaddr = (struct sockaddr *)&ia->ia_dstaddr; - else - dstaddr = (struct sockaddr *)&ia->ia_addr; - - error = rtrequest(RTM_DELETE, - (struct sockaddr *)dstaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&ia->ia_prefixmask, - ia->ia_flags | RTF_HOST, NULL); + error = rtinit(&(ia->ia_ifa), RTM_DELETE, ia->ia_flags | + (ia->ia_dstaddr.sin6_family == AF_INET6) ? RTF_HOST : 0); if (error != 0) - return; + log(LOG_INFO, "%s: err=%d, destination address delete " + "failed\n", __func__, error); ia->ia_flags &= ~IFA_ROUTE; } @@ -1724,8 +1732,7 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, } /* - * Initialize an interface's intetnet6 address - * and routing table entry. + * Initialize an interface's IPv6 address and routing table entry. */ static int in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, @@ -1775,13 +1782,8 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 && ia->ia_dstaddr.sin6_family == AF_INET6) { int rtflags = RTF_UP | RTF_HOST; - - error = rtrequest(RTM_ADD, - (struct sockaddr *)&ia->ia_dstaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&ia->ia_prefixmask, - ia->ia_flags | rtflags, NULL); - if (error != 0) + error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags); + if (error) return (error); ia->ia_flags |= IFA_ROUTE; /* @@ -1801,7 +1803,7 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, ia->ia_flags |= IFA_RTSELF; } - /* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */ + /* Add local address to lltable, if necessary (ex. on p2p link). */ if (newhost) { struct llentry *ln; struct rtentry rt; @@ -1836,6 +1838,7 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, rt_mask(&rt) = (struct sockaddr *)&mask; rt_key(&rt) = (struct sockaddr *)&addr; rt.rt_flags = RTF_UP | RTF_HOST | RTF_STATIC; + /* Announce arrival of local address to all FIBs. */ rt_newaddrmsg(RTM_ADD, &ia->ia_ifa, 0, &rt); } @@ -2397,8 +2400,10 @@ in6_lltable_rtcheck(struct ifnet *ifp, KASSERT(l3addr->sa_family == AF_INET6, ("sin_family %d", l3addr->sa_family)); + /* Our local addresses are always only installed on the default FIB. */ /* XXX rtalloc1 should take a const param */ - rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0); + rt = in6_rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0, + RT_DEFAULT_FIB); if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) { struct ifaddr *ifa; /* diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c index e78683645..573287caa 100644 --- a/sys/netinet6/in6_gif.c +++ b/sys/netinet6/in6_gif.c @@ -228,6 +228,8 @@ in6_gif_output(struct ifnet *ifp, ip6->ip6_flow &= ~htonl(0xff << 20); ip6->ip6_flow |= htonl((u_int32_t)otos << 20); + M_SETFIB(m, sc->gif_fibnum); + if (dst->sin6_family != sin6_dst->sin6_family || !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &sin6_dst->sin6_addr)) { /* cache route doesn't match */ @@ -245,7 +247,7 @@ in6_gif_output(struct ifnet *ifp, } if (sc->gif_ro6.ro_rt == NULL) { - rtalloc((struct route *)&sc->gif_ro6); + in6_rtalloc(&sc->gif_ro6, sc->gif_fibnum); if (sc->gif_ro6.ro_rt == NULL) { m_freem(m); return ENETUNREACH; @@ -402,7 +404,8 @@ gif_validate6(const struct ip6_hdr *ip6, struct gif_softc *sc, sin6.sin6_addr = ip6->ip6_src; sin6.sin6_scope_id = 0; /* XXX */ - rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); + rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, + sc->gif_fibnum); if (!rt || rt->rt_ifp != ifp) { #if 0 char ip6buf[INET6_ADDRSTRLEN]; diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c index d6b9566ed..c453b250c 100644 --- a/sys/netinet6/in6_ifattach.c +++ b/sys/netinet6/in6_ifattach.c @@ -784,7 +784,6 @@ in6_ifdetach(struct ifnet *ifp) struct ifaddr *ifa, *next; struct radix_node_head *rnh; struct rtentry *rt; - short rtflags; struct sockaddr_in6 sin6; struct in6_multi_mship *imm; @@ -815,16 +814,9 @@ in6_ifdetach(struct ifnet *ifp) in6_leavegroup(imm); } - /* remove from the routing table */ - if ((ia->ia_flags & IFA_ROUTE) && - (rt = rtalloc1((struct sockaddr *)&ia->ia_addr, 0, 0UL))) { - rtflags = rt->rt_flags; - RTFREE_LOCKED(rt); - rtrequest(RTM_DELETE, (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&ia->ia_prefixmask, - rtflags, (struct rtentry **)0); - } + /* Remove link-local from the routing table. */ + if (ia->ia_flags & IFA_ROUTE) + (void)rtinit(&ia->ia_ifa, RTM_DELETE, ia->ia_flags); /* remove from the linked list */ IF_ADDR_LOCK(ifp); @@ -853,7 +845,10 @@ in6_ifdetach(struct ifnet *ifp) */ nd6_purge(ifp); - /* remove route to link-local allnodes multicast (ff02::1) */ + /* + * Remove route to link-local allnodes multicast (ff02::1). + * These only get automatically installed for the default FIB. + */ bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; @@ -862,10 +857,11 @@ in6_ifdetach(struct ifnet *ifp) /* XXX: should not fail */ return; /* XXX grab lock first to avoid LOR */ - rnh = rt_tables_get_rnh(0, AF_INET6); + rnh = rt_tables_get_rnh(RT_DEFAULT_FIB, AF_INET6); if (rnh != NULL) { RADIX_NODE_HEAD_LOCK(rnh); - rt = rtalloc1((struct sockaddr *)&sin6, 0, RTF_RNH_LOCKED); + rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, RTF_RNH_LOCKED, + RT_DEFAULT_FIB); if (rt) { if (rt->rt_ifp == ifp) rtexpunge(rt); diff --git a/sys/netinet6/in6_mcast.c b/sys/netinet6/in6_mcast.c index 456301fe7..be3979bea 100644 --- a/sys/netinet6/in6_mcast.c +++ b/sys/netinet6/in6_mcast.c @@ -1763,7 +1763,7 @@ ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt) * Returns NULL if no ifp could be found. */ static struct ifnet * -in6p_lookup_mcast_ifp(const struct inpcb *in6p __unused, +in6p_lookup_mcast_ifp(const struct inpcb *in6p, const struct sockaddr_in6 *gsin6) { struct route_in6 ro6; @@ -1779,11 +1779,8 @@ in6p_lookup_mcast_ifp(const struct inpcb *in6p __unused, ifp = NULL; memset(&ro6, 0, sizeof(struct route_in6)); memcpy(&ro6.ro_dst, gsin6, sizeof(struct sockaddr_in6)); -#ifdef notyet - rtalloc_ign_fib(&ro6, 0, inp ? inp->inp_inc.inc_fibnum : 0); -#else - rtalloc_ign((struct route *)&ro6, 0); -#endif + rtalloc_ign_fib((struct route *)&ro6, 0, + in6p ? in6p->inp_inc.inc_fibnum : RT_DEFAULT_FIB); if (ro6.ro_rt != NULL) { ifp = ro6.ro_rt->rt_ifp; KASSERT(ifp != NULL, ("%s: null ifp", __func__)); diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index 2a1364673..b5260308d 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -168,7 +168,8 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, * net route entry, 3ffe:0501:: -> if0. * This case should not raise an error. */ - rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED); + rt2 = in6_rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED, + rt->rt_fibnum); if (rt2) { if (((rt2->rt_flags & (RTF_HOST|RTF_GATEWAY)) == 0) && rt2->rt_gateway @@ -255,10 +256,11 @@ in6_rtqkill(struct radix_node *rn, void *rock) if (rt->rt_refcnt > 0) panic("rtqkill route really not free"); - err = rtrequest(RTM_DELETE, + err = in6_rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), - rt->rt_flags|RTF_RNH_LOCKED, 0); + rt->rt_flags|RTF_RNH_LOCKED, 0, + rt->rt_fibnum); if (err) { log(LOG_WARNING, "in6_rtqkill: error %d", err); } else { @@ -287,19 +289,11 @@ static VNET_DEFINE(struct callout, rtq_timer6); #define V_rtq_timer6 VNET(rtq_timer6) static void -in6_rtqtimo(void *rock) +in6_rtqtimo_one(struct radix_node_head *rnh) { - CURVNET_SET_QUIET((struct vnet *) rock); - struct radix_node_head *rnh; struct rtqk_arg arg; - struct timeval atv; static time_t last_adjusted_timeout = 0; - rnh = rt_tables_get_rnh(0, AF_INET6); - if (rnh == NULL) { - CURVNET_RESTORE(); - return; - } arg.found = arg.killed = 0; arg.rnh = rnh; arg.nextstop = time_uptime + V_rtq_timeout6; @@ -335,9 +329,24 @@ in6_rtqtimo(void *rock) rnh->rnh_walktree(rnh, in6_rtqkill, &arg); RADIX_NODE_HEAD_UNLOCK(rnh); } +} + +static void +in6_rtqtimo(void *rock) +{ + CURVNET_SET_QUIET((struct vnet *) rock); + struct radix_node_head *rnh; + struct timeval atv; + u_int fibnum; + + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + rnh = rt_tables_get_rnh(fibnum, AF_INET6); + if (rnh != NULL) + in6_rtqtimo_one(rnh); + } atv.tv_usec = 0; - atv.tv_sec = arg.nextstop - time_uptime; + atv.tv_sec = V_rtq_timeout6; callout_reset(&V_rtq_timer6, tvtohz(&atv), in6_rtqtimo, rock); CURVNET_RESTORE(); } @@ -377,31 +386,33 @@ in6_mtuexpire(struct radix_node *rn, void *rock) #define MTUTIMO_DEFAULT (60*1) static void -in6_mtutimo(void *rock) +in6_mtutimo_one(struct radix_node_head *rnh) { - CURVNET_SET_QUIET((struct vnet *) rock); - struct radix_node_head *rnh; struct mtuex_arg arg; - struct timeval atv; - rnh = rt_tables_get_rnh(0, AF_INET6); - if (rnh == NULL) { - CURVNET_RESTORE(); - return; - } arg.rnh = rnh; arg.nextstop = time_uptime + MTUTIMO_DEFAULT; RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, in6_mtuexpire, &arg); RADIX_NODE_HEAD_UNLOCK(rnh); +} - atv.tv_usec = 0; - atv.tv_sec = arg.nextstop - time_uptime; - if (atv.tv_sec < 0) { - printf("invalid mtu expiration time on routing table\n"); - arg.nextstop = time_uptime + 30; /* last resort */ - atv.tv_sec = 30; +static void +in6_mtutimo(void *rock) +{ + CURVNET_SET_QUIET((struct vnet *) rock); + struct radix_node_head *rnh; + struct timeval atv; + u_int fibnum; + + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + rnh = rt_tables_get_rnh(fibnum, AF_INET6); + if (rnh != NULL) + in6_mtutimo_one(rnh); } + + atv.tv_sec = MTUTIMO_DEFAULT; + atv.tv_usec = 0; callout_reset(&V_rtq_mtutimer, tvtohz(&atv), in6_mtutimo, rock); CURVNET_RESTORE(); } @@ -413,6 +424,9 @@ in6_mtutimo(void *rock) * value should be so just use that).. FIX AFTER RELENG_7 is MFC'd * see also comments in in_inithead() vfs_export.c and domain.h */ +static VNET_DEFINE(int, _in6_rt_was_here); +#define V__in6_rt_was_here VNET(_in6_rt_was_here) + int in6_inithead(void **head, int off) { @@ -425,13 +439,17 @@ in6_inithead(void **head, int off) return 1; /* only do the rest for the real thing */ rnh = *head; - KASSERT(rnh == rt_tables_get_rnh(0, AF_INET6), ("rnh?")); rnh->rnh_addaddr = in6_addroute; rnh->rnh_matchaddr = in6_matroute; - callout_init(&V_rtq_timer6, CALLOUT_MPSAFE); - callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE); - in6_rtqtimo(curvnet); /* kick off timeout first time */ - in6_mtutimo(curvnet); /* kick off timeout first time */ + + if (V__in6_rt_was_here == 0) { + callout_init(&V_rtq_timer6, CALLOUT_MPSAFE); + callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE); + in6_rtqtimo(curvnet); /* kick off timeout first time */ + in6_mtutimo(curvnet); /* kick off timeout first time */ + V__in6_rt_was_here = 1; + } + return 1; } @@ -445,3 +463,43 @@ in6_detachhead(void **head, int off) return (1); } #endif + +/* + * Extended API for IPv6 FIB support. + */ +void +in6_rtredirect(struct sockaddr *dst, struct sockaddr *gw, struct sockaddr *nm, + int flags, struct sockaddr *src, u_int fibnum) +{ + + rtredirect_fib(dst, gw, nm, flags, src, fibnum); +} + +int +in6_rtrequest(int req, struct sockaddr *dst, struct sockaddr *gw, + struct sockaddr *mask, int flags, struct rtentry **ret_nrt, u_int fibnum) +{ + + return (rtrequest_fib(req, dst, gw, mask, flags, ret_nrt, fibnum)); +} + +void +in6_rtalloc(struct route_in6 *ro, u_int fibnum) +{ + + rtalloc_ign_fib((struct route *)ro, 0ul, fibnum); +} + +void +in6_rtalloc_ign(struct route_in6 *ro, u_long ignflags, u_int fibnum) +{ + + rtalloc_ign_fib((struct route *)ro, ignflags, fibnum); +} + +struct rtentry * +in6_rtalloc1(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum) +{ + + return (rtalloc1_fib(dst, report, ignflags, fibnum)); +} diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index b491e0e7a..d8e2f71d7 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -129,9 +129,10 @@ VNET_DEFINE(int, ip6_prefer_tempaddr) = 0; static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, - struct rtentry **, int)); + struct rtentry **, int, u_int)); static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, - struct ip6_moptions *, struct route_in6 *ro, struct ifnet **)); + struct ip6_moptions *, struct route_in6 *ro, struct ifnet **, + struct ifnet *, u_int)); static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); @@ -182,7 +183,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ifnet **ifpp, struct in6_addr *srcp) { struct in6_addr dst, tmp; - struct ifnet *ifp = NULL; + struct ifnet *ifp = NULL, *oifp = NULL; struct in6_ifaddr *ia = NULL, *ia_best = NULL; struct in6_pktinfo *pi = NULL; int dst_scope = -1, best_scope = -1, best_matchlen = -1; @@ -195,8 +196,18 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, KASSERT(srcp != NULL, ("%s: srcp is NULL", __func__)); dst = dstsock->sin6_addr; /* make a copy for local operation */ - if (ifpp) + if (ifpp) { + /* + * Save a possibly passed in ifp for in6_selectsrc. Only + * neighbor discovery code should use this feature, where + * we may know the interface but not the FIB number holding + * the connected subnet in case someone deleted it from the + * default FIB and we need to check the interface. + */ + if (*ifpp != NULL) + oifp = *ifpp; *ifpp = NULL; + } if (inp != NULL) { INP_LOCK_ASSERT(inp); @@ -217,7 +228,9 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct in6_ifaddr *ia6; /* get the outgoing interface */ - if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) + if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp, + (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) + != 0) return (error); /* @@ -281,7 +294,8 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * the outgoing interface and the destination address. */ /* get the outgoing interface */ - if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) + if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp, + (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0) return (error); #ifdef DIAGNOSTIC @@ -504,7 +518,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, static int selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, - struct ifnet **retifp, struct rtentry **retrt, int norouteok) + struct ifnet **retifp, struct rtentry **retrt, int norouteok, u_int fibnum) { int error = 0; struct ifnet *ifp = NULL; @@ -581,7 +595,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, if (ron->ro_rt == NULL) { - rtalloc((struct route *)ron); /* multi path case? */ + in6_rtalloc(ron, fibnum); /* multi path case? */ if (ron->ro_rt == NULL) { if (ron->ro_rt) { RTFREE(ron->ro_rt); @@ -616,7 +630,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, *satosin6(&ron->ro_dst) = *sin6_next; } if (ron->ro_rt == NULL) { - rtalloc((struct route *)ron); /* multi path case? */ + in6_rtalloc(ron, fibnum); /* multi path case? */ if (ron->ro_rt == NULL || !(ron->ro_rt->rt_flags & RTF_LLINFO)) { if (ron->ro_rt) { @@ -661,11 +675,11 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, sa6->sin6_scope_id = 0; #ifdef RADIX_MPATH - rtalloc_mpath((struct route *)ro, - ntohl(sa6->sin6_addr.s6_addr32[3])); + rtalloc_mpath_fib((struct route *)ro, + ntohl(sa6->sin6_addr.s6_addr32[3]), fibnum); #else - ro->ro_rt = rtalloc1(&((struct route *)ro) - ->ro_dst, 0, 0UL); + ro->ro_rt = in6_rtalloc1((struct sockaddr *) + &ro->ro_dst, 0, 0UL, fibnum); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); #endif @@ -746,21 +760,29 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, static int in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, - struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp) + struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, + struct ifnet *oifp, u_int fibnum) { int error; struct route_in6 sro; struct rtentry *rt = NULL; + KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__)); + if (ro == NULL) { bzero(&sro, sizeof(sro)); ro = &sro; } if ((error = selectroute(dstsock, opts, mopts, ro, retifp, - &rt, 1)) != 0) { + &rt, 1, fibnum)) != 0) { if (ro == &sro && rt && rt == sro.ro_rt) RTFREE(rt); + /* Help ND. See oifp comment in in6_selectsrc(). */ + if (oifp != NULL && fibnum == RT_DEFAULT_FIB) { + *retifp = oifp; + error = 0; + } return (error); } @@ -795,7 +817,10 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } /* - * clone - meaningful only for bsdi and freebsd + * Public wrapper function to selectroute(). + * + * XXX-BZ in6_selectroute() should and will grow the FIB argument. The + * in6_selectroute_fib() function is only there for backward compat on stable. */ int in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, @@ -804,9 +829,21 @@ in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, { return (selectroute(dstsock, opts, mopts, ro, retifp, - retrt, 0)); + retrt, 0, RT_DEFAULT_FIB)); } +#ifndef BURN_BRIDGES +int +in6_selectroute_fib(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, + struct ip6_moptions *mopts, struct route_in6 *ro, + struct ifnet **retifp, struct rtentry **retrt, u_int fibnum) +{ + + return (selectroute(dstsock, opts, mopts, ro, retifp, + retrt, 0, fibnum)); +} +#endif + /* * Default hop limit selection. The precedence is as follows: * 1. Hoplimit value specified via ioctl. @@ -830,7 +867,7 @@ in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp) ro6.ro_dst.sin6_family = AF_INET6; ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); ro6.ro_dst.sin6_addr = in6p->in6p_faddr; - rtalloc((struct route *)&ro6); + in6_rtalloc(&ro6, in6p->inp_inc.inc_fibnum); if (ro6.ro_rt) { lifp = ro6.ro_rt->rt_ifp; RTFREE(ro6.ro_rt); diff --git a/sys/netinet6/in6_var.h b/sys/netinet6/in6_var.h index 756667035..d48773f1d 100644 --- a/sys/netinet6/in6_var.h +++ b/sys/netinet6/in6_var.h @@ -780,6 +780,17 @@ void in6_ifaddloop(struct ifaddr *); int in6_is_addr_deprecated __P((struct sockaddr_in6 *)); int in6_src_ioctl __P((u_long, caddr_t)); + +/* + * Extended API for IPv6 FIB support. + */ +void in6_rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, + int, struct sockaddr *, u_int); +int in6_rtrequest(int, struct sockaddr *, struct sockaddr *, + struct sockaddr *, int, struct rtentry **, u_int); +void in6_rtalloc(struct route_in6 *, u_int); +void in6_rtalloc_ign(struct route_in6 *, u_long, u_int); +struct rtentry *in6_rtalloc1(struct sockaddr *, int, u_long, u_int); #endif /* _KERNEL */ #endif /* _NETINET6_IN6_VAR_H_ */ diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c index cff29e19b..c71776759 100644 --- a/sys/netinet6/ip6_forward.c +++ b/sys/netinet6/ip6_forward.c @@ -352,7 +352,7 @@ skip_ipsec: dst->sin6_family = AF_INET6; dst->sin6_addr = ip6->ip6_dst; - rin6.ro_rt = rtalloc1((struct sockaddr *)dst, 0, 0); + rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m)); if (rin6.ro_rt != NULL) RT_UNLOCK(rin6.ro_rt); else { diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 089cee183..ff3f131ee 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -603,7 +603,7 @@ passin: dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; dst->sin6_addr = ip6->ip6_dst; - rin6.ro_rt = rtalloc1((struct sockaddr *)dst, 0, 0); + rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m)); if (rin6.ro_rt) RT_UNLOCK(rin6.ro_rt); diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 6565bf0df..ff98e957a 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -135,7 +135,7 @@ static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int, static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *, - struct ifnet *, struct in6_addr *, u_long *, int *)); + struct ifnet *, struct in6_addr *, u_long *, int *, u_int)); static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); @@ -230,6 +230,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, goto bad; } + if (inp != NULL) + M_SETFIB(m, inp->inp_inc.inc_fibnum); + finaldst = ip6->ip6_dst; bzero(&exthdrs, sizeof(exthdrs)); @@ -576,8 +579,8 @@ again: dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; - if ((error = in6_selectroute(&dst_sa, opt, im6o, ro, - &ifp, &rt)) != 0) { + if ((error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, + &ifp, &rt, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m))) != 0) { switch (error) { case EHOSTUNREACH: V_ip6stat.ip6s_noroute++; @@ -745,7 +748,7 @@ again: /* Determine path MTU. */ if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu, - &alwaysfrag)) != 0) + &alwaysfrag, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m))) != 0) goto bad; /* @@ -1009,7 +1012,7 @@ passout: goto sendorfree; } m->m_pkthdr.rcvif = NULL; - m->m_flags = m0->m_flags & M_COPYFLAGS; + m->m_flags = m0->m_flags & M_COPYFLAGS; /* incl. FIB */ *mnext = m; mnext = &m->m_nextpkt; m->m_data += max_linkhdr; @@ -1264,7 +1267,7 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, static int ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, struct ifnet *ifp, struct in6_addr *dst, u_long *mtup, - int *alwaysfragp) + int *alwaysfragp, u_int fibnum) { u_int32_t mtu = 0; int alwaysfrag = 0; @@ -1286,7 +1289,7 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, sa6_dst->sin6_len = sizeof(struct sockaddr_in6); sa6_dst->sin6_addr = *dst; - rtalloc((struct route *)ro_pmtu); + in6_rtalloc(ro_pmtu, fibnum); } } if (ro_pmtu->ro_rt) { @@ -1364,7 +1367,23 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) optval = 0; uproto = (int)so->so_proto->pr_protocol; - if (level == IPPROTO_IPV6) { + if (level != IPPROTO_IPV6) { + error = EINVAL; + + if (sopt->sopt_level == SOL_SOCKET && + sopt->sopt_dir == SOPT_SET) { + switch (sopt->sopt_name) { + case SO_SETFIB: + INP_WLOCK(in6p); + in6p->inp_inc.inc_fibnum = so->so_fibnum; + INP_WUNLOCK(in6p); + error = 0; + break; + default: + break; + } + } + } else { switch (op) { case SOPT_SET: @@ -1887,7 +1906,8 @@ do { \ * the outgoing interface. */ error = ip6_getpmtu(&sro, NULL, NULL, - &in6p->in6p_faddr, &pmtu, NULL); + &in6p->in6p_faddr, &pmtu, NULL, + so->so_fibnum); if (sro.ro_rt) RTFREE(sro.ro_rt); if (error) @@ -1987,8 +2007,6 @@ do { \ } break; } - } else { /* level != IPPROTO_IPV6 */ - error = EINVAL; } return (error); } diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index 51ae1f749..9e210e2dd 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -437,6 +437,9 @@ int in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *, int in6_selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, struct rtentry **)); +int in6_selectroute_fib(struct sockaddr_in6 *, struct ip6_pktopts *, + struct ip6_moptions *, struct route_in6 *, struct ifnet **, + struct rtentry **, u_int); u_int32_t ip6_randomid __P((void)); u_int32_t ip6_randomflowlabel __P((void)); #endif /* _KERNEL */ diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index f3d9e3196..88c8106ab 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -910,7 +910,10 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) { struct rtentry *rt; - rt = rtalloc1((struct sockaddr *)&pr->ndpr_prefix, 0, 0); + + /* Always use the default FIB here. */ + rt = in6_rtalloc1((struct sockaddr *)&pr->ndpr_prefix, + 0, 0, RT_DEFAULT_FIB); if (rt == NULL) continue; /* diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index f86aa2c25..2835f17f1 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -84,6 +84,8 @@ static void nd6_dad_timer(struct dadq *); static void nd6_dad_ns_output(struct dadq *, struct ifaddr *); static void nd6_dad_ns_input(struct ifaddr *); static void nd6_dad_na_input(struct ifaddr *); +static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *, + const struct in6_addr *, u_long, int, struct sockaddr *, u_int); VNET_DEFINE(int, dad_ignore_ns) = 0; /* ignore NS in DAD - specwise incorrect*/ VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to transmit DAD packet */ @@ -237,13 +239,16 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) tsin6.sin6_family = AF_INET6; tsin6.sin6_addr = taddr6; + /* Always use the default FIB. */ #ifdef RADIX_MPATH bzero(&ro, sizeof(ro)); ro.ro_dst = tsin6; - rtalloc_mpath((struct route *)&ro, RTF_ANNOUNCE); + rtalloc_mpath_fib((struct route *)&ro, RTF_ANNOUNCE, + RT_DEFAULT_FIB); rt = ro.ro_rt; #else - rt = rtalloc1((struct sockaddr *)&tsin6, 0, 0); + rt = in6_rtalloc1((struct sockaddr *)&tsin6, 0, 0, + RT_DEFAULT_FIB); #endif need_proxy = (rt && (rt->rt_flags & RTF_ANNOUNCE) != 0 && rt->rt_gateway->sa_family == AF_LINK); @@ -336,20 +341,21 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) in6_all = in6addr_linklocal_allnodes; if (in6_setscope(&in6_all, ifp, NULL) != 0) goto bad; - nd6_na_output(ifp, &in6_all, &taddr6, + nd6_na_output_fib(ifp, &in6_all, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | (V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), - tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL); + tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL, + M_GETFIB(m)); goto freeit; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_NEIGHBOR_SOLICIT, 0); - nd6_na_output(ifp, &saddr6, &taddr6, + nd6_na_output_fib(ifp, &saddr6, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | (V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0) | ND_NA_FLAG_SOLICITED, - tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL); + tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m)); freeit: if (ifa != NULL) ifa_free(ifa); @@ -500,14 +506,16 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, int error; struct sockaddr_in6 dst_sa; struct in6_addr src_in; + struct ifnet *oifp; bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; + oifp = ifp; error = in6_selectsrc(&dst_sa, NULL, - NULL, &ro, NULL, NULL, &src_in); + NULL, &ro, NULL, &oifp, &src_in); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, @@ -922,12 +930,13 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * tlladdr - 1 if include target link-layer address * sdl0 - sockaddr_dl (= proxy NA) or NULL */ -void -nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, +static void +nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, const struct in6_addr *taddr6, u_long flags, int tlladdr, - struct sockaddr *sdl0) + struct sockaddr *sdl0, u_int fibnum) { struct mbuf *m; + struct ifnet *oifp; struct ip6_hdr *ip6; struct nd_neighbor_advert *nd_na; struct ip6_moptions im6o; @@ -963,6 +972,7 @@ nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, if (m == NULL) return; m->m_pkthdr.rcvif = NULL; + M_SETFIB(m, fibnum); if (IN6_IS_ADDR_MULTICAST(&daddr6)) { m->m_flags |= M_MCAST; @@ -1004,7 +1014,8 @@ nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, * Select a source whose scope is the same as that of the dest. */ bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa)); - error = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, NULL, &src); + oifp = ifp; + error = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, &oifp, &src); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "nd6_na_output: source can't be " @@ -1084,6 +1095,18 @@ nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, return; } +#ifndef BURN_BRIDGES +void +nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, + const struct in6_addr *taddr6, u_long flags, int tlladdr, + struct sockaddr *sdl0) +{ + + nd6_na_output_fib(ifp, daddr6_0, taddr6, flags, tlladdr, sdl0, + RT_DEFAULT_FIB); +} +#endif + caddr_t nd6_ifptomac(struct ifnet *ifp) { diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index b58d47cb2..4468e2dd3 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -453,7 +453,7 @@ nd6_rtmsg(int cmd, struct rtentry *rt) } else ifa = NULL; - rt_missmsg(cmd, &info, rt->rt_flags, 0); + rt_missmsg_fib(cmd, &info, rt->rt_flags, 0, rt->rt_fibnum); if (ifa != NULL) ifa_free(ifa); } @@ -476,9 +476,9 @@ defrouter_addreq(struct nd_defrouter *new) gate.sin6_addr = new->rtaddr; s = splnet(); - error = rtrequest(RTM_ADD, (struct sockaddr *)&def, + error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, - RTF_GATEWAY, &newrt); + RTF_GATEWAY, &newrt, RT_DEFAULT_FIB); if (newrt) { nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ RTFREE(newrt); @@ -523,9 +523,9 @@ defrouter_delreq(struct nd_defrouter *dr) def.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = dr->rtaddr; - rtrequest(RTM_DELETE, (struct sockaddr *)&def, + in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def, (struct sockaddr *)&gate, - (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt); + (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, RT_DEFAULT_FIB); if (oldrt) { nd6_rtmsg(RTM_DELETE, oldrt); RTFREE(oldrt); @@ -1554,19 +1554,92 @@ pfxlist_onlink_check() } } +static int +nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) +{ + static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; + struct radix_node_head *rnh; + struct rtentry *rt; + struct sockaddr_in6 mask6; + u_long rtflags; + int error, a_failure, fibnum; + + /* + * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs. + * ifa->ifa_rtrequest = nd6_rtrequest; + */ + bzero(&mask6, sizeof(mask6)); + mask6.sin6_len = sizeof(mask6); + mask6.sin6_addr = pr->ndpr_mask; + rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP; + + a_failure = 0; + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + + rt = NULL; + error = in6_rtrequest(RTM_ADD, + (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr, + (struct sockaddr *)&mask6, rtflags, &rt, fibnum); + if (error == 0) { + KASSERT(rt != NULL, ("%s: in6_rtrequest return no " + "error(%d) but rt is NULL, pr=%p, ifa=%p", __func__, + error, pr, ifa)); + + rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6); + /* XXX what if rhn == NULL? */ + RADIX_NODE_HEAD_LOCK(rnh); + RT_LOCK(rt); + if (rt_setgate(rt, rt_key(rt), + (struct sockaddr *)&null_sdl) == 0) { + struct sockaddr_dl *dl; + + dl = (struct sockaddr_dl *)rt->rt_gateway; + dl->sdl_type = rt->rt_ifp->if_type; + dl->sdl_index = rt->rt_ifp->if_index; + } + RADIX_NODE_HEAD_UNLOCK(rnh); + nd6_rtmsg(RTM_ADD, rt); + RT_UNLOCK(rt); + pr->ndpr_stateflags |= NDPRF_ONLINK; + } else { + char ip6buf[INET6_ADDRSTRLEN]; + char ip6bufg[INET6_ADDRSTRLEN]; + char ip6bufm[INET6_ADDRSTRLEN]; + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; + nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add " + "route for a prefix (%s/%d) on %s, gw=%s, mask=%s, " + "flags=%lx errno = %d\n", + ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen, if_name(pr->ndpr_ifp), + ip6_sprintf(ip6bufg, &sin6->sin6_addr), + ip6_sprintf(ip6bufm, &mask6.sin6_addr), + rtflags, error)); + + /* Save last error to return, see rtinit(). */ + a_failure = error; + } + + if (rt != NULL) { + RT_LOCK(rt); + RT_REMREF(rt); + RT_UNLOCK(rt); + } + } + + /* Return the last error we got. */ + return (a_failure); +} + static int nd6_prefix_onlink(struct nd_prefix *pr) { struct ifaddr *ifa; struct ifnet *ifp = pr->ndpr_ifp; - struct sockaddr_in6 mask6; struct nd_prefix *opr; - u_long rtflags; int error = 0; - struct radix_node_head *rnh; - struct rtentry *rt = NULL; char ip6buf[INET6_ADDRSTRLEN]; - struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { @@ -1630,49 +1703,8 @@ nd6_prefix_onlink(struct nd_prefix *pr) return (0); } - /* - * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs. - * ifa->ifa_rtrequest = nd6_rtrequest; - */ - bzero(&mask6, sizeof(mask6)); - mask6.sin6_len = sizeof(mask6); - mask6.sin6_addr = pr->ndpr_mask; - rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP; - error = rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix, - ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt); - if (error == 0) { - if (rt != NULL) /* this should be non NULL, though */ { - rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6); - /* XXX what if rhn == NULL? */ - RADIX_NODE_HEAD_LOCK(rnh); - RT_LOCK(rt); - if (!rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl)) { - ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = - rt->rt_ifp->if_type; - ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = - rt->rt_ifp->if_index; - } - RADIX_NODE_HEAD_UNLOCK(rnh); - nd6_rtmsg(RTM_ADD, rt); - RT_UNLOCK(rt); - } - pr->ndpr_stateflags |= NDPRF_ONLINK; - } else { - char ip6bufg[INET6_ADDRSTRLEN], ip6bufm[INET6_ADDRSTRLEN]; - nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add route for a" - " prefix (%s/%d) on %s, gw=%s, mask=%s, flags=%lx " - "errno = %d\n", - ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), - pr->ndpr_plen, if_name(ifp), - ip6_sprintf(ip6bufg, &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr), - ip6_sprintf(ip6bufm, &mask6.sin6_addr), rtflags, error)); - } + error = nd6_prefix_onlink_rtrequest(pr, ifa); - if (rt != NULL) { - RT_LOCK(rt); - RT_REMREF(rt); - RT_UNLOCK(rt); - } if (ifa != NULL) ifa_free(ifa); @@ -1686,8 +1718,9 @@ nd6_prefix_offlink(struct nd_prefix *pr) struct ifnet *ifp = pr->ndpr_ifp; struct nd_prefix *opr; struct sockaddr_in6 sa6, mask6; - struct rtentry *rt = NULL; + struct rtentry *rt; char ip6buf[INET6_ADDRSTRLEN]; + int fibnum, a_failure; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { @@ -1707,15 +1740,28 @@ nd6_prefix_offlink(struct nd_prefix *pr) mask6.sin6_family = AF_INET6; mask6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr)); - error = rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, - (struct sockaddr *)&mask6, 0, &rt); + + a_failure = 0; + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + rt = NULL; + error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, + (struct sockaddr *)&mask6, 0, &rt, fibnum); + if (error == 0) { + /* report the route deletion to the routing socket. */ + if (rt != NULL) + nd6_rtmsg(RTM_DELETE, rt); + } else { + /* Save last error to return, see rtinit(). */ + a_failure = error; + } + if (rt != NULL) { + RTFREE(rt); + } + } + error = a_failure; if (error == 0) { pr->ndpr_stateflags &= ~NDPRF_ONLINK; - /* report the route deletion to the routing socket. */ - if (rt != NULL) - nd6_rtmsg(RTM_DELETE, rt); - /* * There might be the same prefix on another interface, * the prefix which could not be on-link just because we have @@ -1763,10 +1809,6 @@ nd6_prefix_offlink(struct nd_prefix *pr) if_name(ifp), error)); } - if (rt != NULL) { - RTFREE(rt); - } - return (error); } @@ -2083,6 +2125,7 @@ void rt6_flush(struct in6_addr *gateway, struct ifnet *ifp) { struct radix_node_head *rnh; + u_int fibnum; int s = splnet(); /* We'll care only link-local addresses */ @@ -2091,13 +2134,16 @@ rt6_flush(struct in6_addr *gateway, struct ifnet *ifp) return; } - rnh = rt_tables_get_rnh(0, AF_INET6); - if (rnh == NULL) - return; + /* XXX Do we really need to walk any but the default FIB? */ + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + rnh = rt_tables_get_rnh(fibnum, AF_INET6); + if (rnh == NULL) + continue; - RADIX_NODE_HEAD_LOCK(rnh); - rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway); - RADIX_NODE_HEAD_UNLOCK(rnh); + RADIX_NODE_HEAD_LOCK(rnh); + rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway); + RADIX_NODE_HEAD_UNLOCK(rnh); + } splx(s); } @@ -2130,8 +2176,8 @@ rt6_deleteroute(struct radix_node *rn, void *arg) if ((rt->rt_flags & RTF_HOST) == 0) return (0); - return (rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, - rt_mask(rt), rt->rt_flags, 0)); + return (in6_rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, + rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum)); #undef SIN6 } diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index 74d83567b..6bca19fd3 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -562,6 +562,7 @@ rip6_output(m, va_alist) int rip6_ctloutput(struct socket *so, struct sockopt *sopt) { + struct inpcb *inp; int error; if (sopt->sopt_level == IPPROTO_ICMPV6) @@ -570,8 +571,17 @@ rip6_ctloutput(struct socket *so, struct sockopt *sopt) * from protosw? */ return (icmp6_ctloutput(so, sopt)); - else if (sopt->sopt_level != IPPROTO_IPV6) + else if (sopt->sopt_level != IPPROTO_IPV6) { + if (sopt->sopt_level == SOL_SOCKET && + sopt->sopt_name == SO_SETFIB) { + inp = sotoinpcb(so); + INP_WLOCK(inp); + inp->inp_inc.inc_fibnum = so->so_fibnum; + INP_WUNLOCK(inp); + return (0); + } return (EINVAL); + } error = 0; diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 7c21d8372..60955a7fa 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -843,7 +843,7 @@ ipsec6_output_tunnel(struct ipsec_output_state *state, struct secpolicy *sp, int dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = ip6->ip6_dst; - rtalloc(state->ro); + rtalloc_ign_fib(state->ro, 0UL, M_GETFIB(m)); } if (state->ro->ro_rt == NULL) { V_ip6stat.ip6s_noroute++; diff --git a/sys/nfs/bootp_subr.c b/sys/nfs/bootp_subr.c index ca90089d0..f4759bc6f 100644 --- a/sys/nfs/bootp_subr.c +++ b/sys/nfs/bootp_subr.c @@ -1133,10 +1133,9 @@ bootpc_adjust_interface(struct bootpc_ifcontext *ifctx, clear_sinaddr(&defmask); /* XXX MRT just table 0 */ error = rtrequest_fib(RTM_ADD, - (struct sockaddr *) &defdst, - (struct sockaddr *) gw, - (struct sockaddr *) &defmask, - (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL, 0); + (struct sockaddr *) &defdst, (struct sockaddr *) gw, + (struct sockaddr *) &defmask, + (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL, RT_DEFAULT_FIB); if (error != 0) { printf("bootpc_adjust_interface: " "add net route, error=%d\n", error); diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c index 258f28c33..5b8761133 100644 --- a/sys/nfsclient/nfs_vfsops.c +++ b/sys/nfsclient/nfs_vfsops.c @@ -508,10 +508,10 @@ nfs_mountroot(struct mount *mp) sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ CURVNET_SET(TD_TO_VNET(td)); - error = rtrequest(RTM_ADD, (struct sockaddr *)&sin, + error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, - RTF_UP | RTF_GATEWAY, NULL); + RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB); CURVNET_RESTORE(); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); diff --git a/usr.bin/netstat/route.c b/usr.bin/netstat/route.c index 42596460f..b347d7ea5 100644 --- a/usr.bin/netstat/route.c +++ b/usr.bin/netstat/route.c @@ -117,7 +117,6 @@ typedef union { static sa_u pt_u; -int fibnum; int do_rtent = 0; struct rtentry rtentry; struct radix_node rnode; @@ -152,8 +151,7 @@ routepr(u_long rtree) { struct radix_node_head **rnhp, *rnh, head; size_t intsize; - int i; - int numfibs; + int fam, fibnum, numfibs; intsize = sizeof(int); if (sysctlbyname("net.my_fibnum", &fibnum, &intsize, NULL, 0) == -1) @@ -185,15 +183,20 @@ routepr(u_long rtree) if (kread((u_long)(rtree), (char *)(rt_tables), (numfibs * (AF_MAX+1) * sizeof(struct radix_node_head *))) != 0) return; - for (i = 0; i <= AF_MAX; i++) { + for (fam = 0; fam <= AF_MAX; fam++) { int tmpfib; - if (i != AF_INET) - tmpfib = 0; - else + + switch (fam) { + case AF_INET6: + case AF_INET: tmpfib = fibnum; + break; + default: + tmpfib = 0; + } rnhp = (struct radix_node_head **)*rt_tables; /* Calculate the in-kernel address. */ - rnhp += tmpfib * (AF_MAX+1) + i; + rnhp += tmpfib * (AF_MAX+1) + fam; /* Read the in kernel rhn pointer. */ if (kget(rnhp, rnh) != 0) continue; @@ -202,16 +205,16 @@ routepr(u_long rtree) /* Read the rnh data. */ if (kget(rnh, head) != 0) continue; - if (i == AF_UNSPEC) { + if (fam == AF_UNSPEC) { if (Aflag && af == 0) { printf("Netmasks:\n"); p_tree(head.rnh_treetop); } - } else if (af == AF_UNSPEC || af == i) { - size_cols(i, head.rnh_treetop); - pr_family(i); + } else if (af == AF_UNSPEC || af == fam) { + size_cols(fam, head.rnh_treetop); + pr_family(fam); do_rtent = 1; - pr_rthdr(i); + pr_rthdr(fam); p_tree(head.rnh_treetop); } } -- 2.45.0