2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 1980, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95
34 /************************************************************************
35 * Note: In this file a 'fib' is a "forwarding information base" *
36 * Which is the new name for an in kernel routing (next hop) table. *
37 ***********************************************************************/
40 #include "opt_inet6.h"
41 #include "opt_mrouting.h"
42 #include "opt_mpath.h"
43 #include "opt_route.h"
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/malloc.h>
49 #include <sys/socket.h>
50 #include <sys/sysctl.h>
51 #include <sys/syslog.h>
52 #include <sys/sysproto.h>
54 #include <sys/domain.h>
55 #include <sys/eventhandler.h>
56 #include <sys/kernel.h>
58 #include <sys/rmlock.h>
61 #include <net/if_var.h>
62 #include <net/if_dl.h>
63 #include <net/route.h>
64 #include <net/route/route_ctl.h>
65 #include <net/route/route_var.h>
66 #include <net/route/nhop.h>
67 #include <net/route/shared.h>
71 #include <net/radix_mpath.h>
74 #include <netinet/in.h>
75 #include <netinet/ip_mroute.h>
79 #define RT_MAXFIBS UINT16_MAX
81 /* Kernel config default option. */
84 #error "ROUTETABLES defined too low"
86 #if ROUTETABLES > RT_MAXFIBS
87 #error "ROUTETABLES defined too big"
89 #define RT_NUMFIBS ROUTETABLES
90 #endif /* ROUTETABLES */
91 /* Initialize to default if not otherwise set. */
96 /* This is read-only.. */
97 u_int rt_numfibs = RT_NUMFIBS;
98 SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, "");
101 * By default add routes to all fibs for new interfaces.
102 * Once this is set to 0 then only allocate routes on interface
103 * changes for the FIB of the caller when adding a new set of addresses
104 * to an interface. XXX this is a shotgun aproach to a problem that needs
105 * a more fine grained solution.. that will come.
106 * XXX also has the problems getting the FIB from curthread which will not
107 * always work given the fib can be overridden and prefixes can be added
108 * from the network stack context.
110 VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1;
111 SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET,
112 &VNET_NAME(rt_add_addr_allfibs), 0, "");
114 VNET_PCPUSTAT_DEFINE(struct rtstat, rtstat);
116 VNET_PCPUSTAT_SYSINIT(rtstat);
118 VNET_PCPUSTAT_SYSUNINIT(rtstat);
121 VNET_DEFINE(struct rib_head *, rt_tables);
122 #define V_rt_tables VNET(rt_tables)
125 VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */
126 #define V_rtzone VNET(rtzone)
128 EVENTHANDLER_LIST_DEFINE(rt_addrmsg);
130 static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *,
132 static void destroy_rtentry_epoch(epoch_context_t ctx);
133 static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info,
137 * handler for net.my_fibnum
140 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
145 fibnum = curthread->td_proc->p_fibnum;
146 error = sysctl_handle_int(oidp, &fibnum, 0, req);
150 SYSCTL_PROC(_net, OID_AUTO, my_fibnum,
151 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
152 &sysctl_my_fibnum, "I",
153 "default FIB of caller");
155 static __inline struct rib_head **
156 rt_tables_get_rnh_ptr(int table, int fam)
158 struct rib_head **rnh;
160 KASSERT(table >= 0 && table < rt_numfibs,
161 ("%s: table out of bounds (0 <= %d < %d)", __func__, table,
163 KASSERT(fam >= 0 && fam < (AF_MAX + 1),
164 ("%s: fam out of bounds (0 <= %d < %d)", __func__, fam, AF_MAX+1));
166 /* rnh is [fib=0][af=0]. */
167 rnh = (struct rib_head **)V_rt_tables;
168 /* Get the offset to the requested table and fam. */
169 rnh += table * (AF_MAX+1) + fam;
175 rt_tables_get_rnh(int table, int fam)
178 return (*rt_tables_get_rnh_ptr(table, fam));
182 rt_tables_get_gen(int table, int fam)
184 struct rib_head *rnh;
186 rnh = *rt_tables_get_rnh_ptr(table, fam);
187 KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d fam %d",
188 __func__, table, fam));
189 return (rnh->rnh_gen);
194 * route initialization must occur before ip6_init2(), which happenas at
201 /* whack the tunable ints into line. */
202 if (rt_numfibs > RT_MAXFIBS)
203 rt_numfibs = RT_MAXFIBS;
208 SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, NULL);
211 rtentry_zinit(void *mem, int size, int how)
213 struct rtentry *rt = mem;
221 rtentry_zfini(void *mem, int size)
223 struct rtentry *rt = mem;
229 rtentry_ctor(void *mem, int size, void *arg, int how)
231 struct rtentry *rt = mem;
233 bzero(rt, offsetof(struct rtentry, rt_endzero));
240 rtentry_dtor(void *mem, int size, void *arg)
242 struct rtentry *rt = mem;
248 vnet_route_init(const void *unused __unused)
251 struct rib_head **rnh;
255 V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
256 sizeof(struct rib_head *), M_RTABLE, M_WAITOK|M_ZERO);
258 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry),
259 rtentry_ctor, rtentry_dtor,
260 rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0);
261 for (dom = domains; dom; dom = dom->dom_next) {
262 if (dom->dom_rtattach == NULL)
265 for (table = 0; table < rt_numfibs; table++) {
266 fam = dom->dom_family;
267 if (table != 0 && fam != AF_INET6 && fam != AF_INET)
270 rnh = rt_tables_get_rnh_ptr(table, fam);
272 panic("%s: rnh NULL", __func__);
273 dom->dom_rtattach((void **)rnh, 0, table);
277 VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
282 vnet_route_uninit(const void *unused __unused)
287 struct rib_head **rnh;
289 for (dom = domains; dom; dom = dom->dom_next) {
290 if (dom->dom_rtdetach == NULL)
293 for (table = 0; table < rt_numfibs; table++) {
294 fam = dom->dom_family;
296 if (table != 0 && fam != AF_INET6 && fam != AF_INET)
299 rnh = rt_tables_get_rnh_ptr(table, fam);
301 panic("%s: rnh NULL", __func__);
302 dom->dom_rtdetach((void **)rnh, 0);
307 * dom_rtdetach calls rt_table_destroy(), which
308 * schedules deletion for all rtentries, nexthops and control
309 * structures. Wait for the destruction callbacks to fire.
310 * Note that this should result in freeing all rtentries, but
311 * nexthops deletions will be scheduled for the next epoch run
312 * and will be completed after vnet teardown.
314 epoch_drain_callbacks(net_epoch_preempt);
316 free(V_rt_tables, M_RTABLE);
317 uma_zdestroy(V_rtzone);
319 VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
320 vnet_route_uninit, 0);
324 rt_table_init(int offset, int family, u_int fibnum)
328 rh = malloc(sizeof(struct rib_head), M_RTABLE, M_WAITOK | M_ZERO);
330 /* TODO: These details should be hidded inside radix.c */
331 /* Init masks tree */
332 rn_inithead_internal(&rh->head, rh->rnh_nodes, offset);
333 rn_inithead_internal(&rh->rmhead.head, rh->rmhead.mask_nodes, 0);
334 rh->head.rnh_masks = &rh->rmhead;
336 /* Save metadata associated with this routing table. */
337 rh->rib_family = family;
338 rh->rib_fibnum = fibnum;
340 rh->rib_vnet = curvnet;
350 /* Init subscription system */
351 rib_init_subscriptions(rh);
353 /* Finally, set base callbacks */
354 rh->rnh_addaddr = rn_addroute;
355 rh->rnh_deladdr = rn_delete;
356 rh->rnh_matchaddr = rn_match;
357 rh->rnh_lookup = rn_lookup;
358 rh->rnh_walktree = rn_walktree;
359 rh->rnh_walktree_from = rn_walktree_from;
365 rt_freeentry(struct radix_node *rn, void *arg)
367 struct radix_head * const rnh = arg;
368 struct radix_node *x;
370 x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh);
377 rt_table_destroy(struct rib_head *rh)
380 tmproutes_destroy(rh);
382 rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head);
384 nhops_destroy_rib(rh);
386 rib_destroy_subscriptions(rh);
388 /* Assume table is already empty */
389 RIB_LOCK_DESTROY(rh);
394 #ifndef _SYS_SYSPROTO_H_
400 sys_setfib(struct thread *td, struct setfib_args *uap)
402 if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs)
404 td->td_proc->p_fibnum = uap->fibnum;
409 * Remove a reference count from an rtentry.
410 * If the count gets low enough, take it out of the routing table
413 rtfree(struct rtentry *rt)
416 KASSERT(rt != NULL,("%s: NULL rt", __func__));
421 epoch_call(net_epoch_preempt, destroy_rtentry_epoch,
426 destroy_rtentry(struct rtentry *rt)
430 * At this moment rnh, nh_control may be already freed.
431 * nhop interface may have been migrated to a different vnet.
432 * Use vnet stored in the nexthop to delete the entry.
434 CURVNET_SET(nhop_get_vnet(rt->rt_nhop));
436 /* Unreference nexthop */
437 nhop_free(rt->rt_nhop);
439 uma_zfree(V_rtzone, rt);
445 * Epoch callback indicating rtentry is safe to destroy
448 destroy_rtentry_epoch(epoch_context_t ctx)
452 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx);
458 * Adds a temporal redirect entry to the routing table.
459 * @fibnum: fib number
460 * @dst: destination to install redirect to
461 * @gateway: gateway to go via
462 * @author: sockaddr of originating router, can be NULL
463 * @ifp: interface to use for the redirected route
464 * @flags: set of flags to add. Allowed: RTF_GATEWAY
465 * @lifetime_sec: time in seconds to expire this redirect.
467 * Retuns 0 on success, errno otherwise.
470 rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway,
471 struct sockaddr *author, struct ifnet *ifp, int flags, int lifetime_sec)
473 struct rib_cmd_info rc;
475 struct rt_addrinfo info;
476 struct rt_metrics rti_rmx;
481 if (rt_tables_get_rnh(fibnum, dst->sa_family) == NULL)
482 return (EAFNOSUPPORT);
484 /* Verify the allowed flag mask. */
485 KASSERT(((flags & ~(RTF_GATEWAY)) == 0),
486 ("invalid redirect flags: %x", flags));
488 /* Get the best ifa for the given interface and gateway. */
489 if ((ifa = ifaof_ifpforaddr(gateway, ifp)) == NULL)
490 return (ENETUNREACH);
493 bzero(&info, sizeof(info));
494 info.rti_info[RTAX_DST] = dst;
495 info.rti_info[RTAX_GATEWAY] = gateway;
498 info.rti_flags = flags | RTF_HOST | RTF_DYNAMIC;
500 /* Setup route metrics to define expire time. */
501 bzero(&rti_rmx, sizeof(rti_rmx));
502 /* Set expire time as absolute. */
503 rti_rmx.rmx_expire = lifetime_sec + time_second;
504 info.rti_mflags |= RTV_EXPIRE;
505 info.rti_rmx = &rti_rmx;
507 error = rib_action(fibnum, RTM_ADD, &info, &rc);
511 /* TODO: add per-fib redirect stats. */
516 flags = rc.rc_rt->rt_flags;
519 RTSTAT_INC(rts_dynamic);
521 /* Send notification of a route addition to userland. */
522 bzero(&info, sizeof(info));
523 info.rti_info[RTAX_DST] = dst;
524 info.rti_info[RTAX_GATEWAY] = gateway;
525 info.rti_info[RTAX_AUTHOR] = author;
526 rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum);
532 * Routing table ioctl interface.
535 rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
539 * If more ioctl commands are added here, make sure the proper
540 * super-user checks are being performed because it is possible for
541 * prison-root to make it this far if raw sockets have been enabled
545 /* Multicast goop, grrr... */
546 return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP;
553 ifa_ifwithroute(int flags, const struct sockaddr *dst,
554 const struct sockaddr *gateway, u_int fibnum)
559 if ((flags & RTF_GATEWAY) == 0) {
561 * If we are adding a route to an interface,
562 * and the interface is a pt to pt link
563 * we should search for the destination
564 * as our clue to the interface. Otherwise
565 * we can use the local address.
568 if (flags & RTF_HOST)
569 ifa = ifa_ifwithdstaddr(dst, fibnum);
571 ifa = ifa_ifwithaddr(gateway);
574 * If we are adding a route to a remote net
575 * or host, the gateway may still be on the
576 * other end of a pt to pt link.
578 ifa = ifa_ifwithdstaddr(gateway, fibnum);
581 ifa = ifa_ifwithnet(gateway, 0, fibnum);
583 struct nhop_object *nh;
585 nh = rib_lookup(fibnum, gateway, NHR_NONE, 0);
588 * dismiss a gateway that is reachable only
589 * through the default router
591 if ((nh == NULL) || (nh->nh_flags & NHF_DEFAULT))
595 if (ifa->ifa_addr->sa_family != dst->sa_family) {
596 struct ifaddr *oifa = ifa;
597 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
607 * Copy most of @rt data into @info.
609 * If @flags contains NHR_COPY, copies dst,netmask and gw to the
610 * pointers specified by @info structure. Assume such pointers
611 * are zeroed sockaddr-like structures with sa_len field initialized
612 * to reflect size of the provided buffer. if no NHR_COPY is specified,
613 * point dst,netmask and gw @info fields to appropriate @rt values.
615 * if @flags contains NHR_REF, do refcouting on rt_ifp and rt_ifa.
617 * Returns 0 on success.
620 rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags)
622 struct rt_metrics *rmx;
623 struct sockaddr *src, *dst;
624 struct nhop_object *nh;
627 if (flags & NHR_COPY) {
628 /* Copy destination if dst is non-zero */
630 dst = info->rti_info[RTAX_DST];
631 sa_len = src->sa_len;
633 if (src->sa_len > dst->sa_len)
635 memcpy(dst, src, src->sa_len);
636 info->rti_addrs |= RTA_DST;
639 /* Copy mask if set && dst is non-zero */
641 dst = info->rti_info[RTAX_NETMASK];
642 if (src != NULL && dst != NULL) {
645 * Radix stores different value in sa_len,
646 * assume rt_mask() to have the same length
649 if (sa_len > dst->sa_len)
651 memcpy(dst, src, src->sa_len);
652 info->rti_addrs |= RTA_NETMASK;
655 /* Copy gateway is set && dst is non-zero */
656 src = &rt->rt_nhop->gw_sa;
657 dst = info->rti_info[RTAX_GATEWAY];
658 if ((rt->rt_flags & RTF_GATEWAY) && src != NULL && dst != NULL){
659 if (src->sa_len > dst->sa_len)
661 memcpy(dst, src, src->sa_len);
662 info->rti_addrs |= RTA_GATEWAY;
665 info->rti_info[RTAX_DST] = rt_key(rt);
666 info->rti_addrs |= RTA_DST;
667 if (rt_mask(rt) != NULL) {
668 info->rti_info[RTAX_NETMASK] = rt_mask(rt);
669 info->rti_addrs |= RTA_NETMASK;
671 if (rt->rt_flags & RTF_GATEWAY) {
672 info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
673 info->rti_addrs |= RTA_GATEWAY;
680 info->rti_mflags |= RTV_MTU;
681 rmx->rmx_mtu = nh->nh_mtu;
684 info->rti_flags = rt->rt_flags | nhop_get_rtflags(nh);
685 info->rti_ifp = nh->nh_ifp;
686 info->rti_ifa = nh->nh_ifa;
687 if (flags & NHR_REF) {
688 if_ref(info->rti_ifp);
689 ifa_ref(info->rti_ifa);
696 * Lookups up route entry for @dst in RIB database for fib @fibnum.
697 * Exports entry data to @info using rt_exportinfo().
699 * If @flags contains NHR_REF, refcouting is performed on rt_ifp and rt_ifa.
700 * All references can be released later by calling rib_free_info().
702 * Returns 0 on success.
703 * Returns ENOENT for lookup failure, ENOMEM for export failure.
706 rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags,
707 uint32_t flowid, struct rt_addrinfo *info)
711 struct radix_node *rn;
715 KASSERT((fibnum < rt_numfibs), ("rib_lookup_rte: bad fibnum"));
716 rh = rt_tables_get_rnh(fibnum, dst->sa_family);
721 rn = rh->rnh_matchaddr(__DECONST(void *, dst), &rh->head);
722 if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
724 /* Ensure route & ifp is UP */
725 if (RT_LINK_IS_UP(rt->rt_nhop->nh_ifp)) {
726 flags = (flags & NHR_REF) | NHR_COPY;
727 error = rt_exportinfo(rt, info, flags);
739 * Releases all references acquired by rib_lookup_info() when
740 * called with NHR_REF flags.
743 rib_free_info(struct rt_addrinfo *info)
746 ifa_free(info->rti_ifa);
747 if_rele(info->rti_ifp);
751 * Iterates over all existing fibs in system calling
752 * @setwa_f function prior to traversing each fib.
753 * Calls @wa_f function for each element in current fib.
754 * If af is not AF_UNSPEC, iterates over fibs in particular
758 rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f,
761 struct rib_head *rnh;
765 for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
766 /* Do we want some specific family? */
767 if (af != AF_UNSPEC) {
768 rnh = rt_tables_get_rnh(fibnum, af);
772 setwa_f(rnh, fibnum, af, arg);
775 rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg);
780 for (i = 1; i <= AF_MAX; i++) {
781 rnh = rt_tables_get_rnh(fibnum, i);
785 setwa_f(rnh, fibnum, i, arg);
788 rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg);
795 * Iterates over all existing fibs in system and deletes each element
796 * for which @filter_f function returns non-zero value.
797 * If @family is not AF_UNSPEC, iterates over fibs in particular
801 rt_foreach_fib_walk_del(int family, rt_filter_f_t *filter_f, void *arg)
806 for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
807 /* Do we want some specific family? */
808 if (family != AF_UNSPEC) {
816 for (i = start; i <= end; i++) {
817 if (rt_tables_get_rnh(fibnum, i) == NULL)
820 rib_walk_del(fibnum, i, filter_f, arg, 0);
826 * Delete Routes for a Network Interface
828 * Called for each routing entry via the rnh->rnh_walktree() call above
829 * to delete all route entries referencing a detaching network interface.
832 * rt pointer to rtentry
834 * arg argument passed to rnh->rnh_walktree() - detaching interface
838 * errno failed - reason indicated
841 rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *nh, void *arg)
843 struct ifnet *ifp = arg;
845 if (nh->nh_ifp != ifp)
849 * Protect (sorta) against walktree recursion problems
852 if ((rt->rt_flags & RTF_UP) == 0)
859 * Delete all remaining routes using this interface
860 * Unfortuneatly the only way to do this is to slog through
861 * the entire routing table looking for routes which point
862 * to this interface...oh well...
865 rt_flushifroutes_af(struct ifnet *ifp, int af)
867 KASSERT((af >= 1 && af <= AF_MAX), ("%s: af %d not >= 1 and <= %d",
868 __func__, af, AF_MAX));
870 rt_foreach_fib_walk_del(af, rt_ifdelroute, ifp);
874 rt_flushifroutes(struct ifnet *ifp)
877 rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp);
881 * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined,
882 * it will be referenced so the caller must free it.
884 * Assume basic consistency checks are executed by callers:
885 * RTAX_DST exists, if RTF_GATEWAY is set, RTAX_GATEWAY exists as well.
888 rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
890 const struct sockaddr *dst, *gateway, *ifpaddr, *ifaaddr;
891 struct epoch_tracker et;
892 int needref, error, flags;
894 dst = info->rti_info[RTAX_DST];
895 gateway = info->rti_info[RTAX_GATEWAY];
896 ifpaddr = info->rti_info[RTAX_IFP];
897 ifaaddr = info->rti_info[RTAX_IFA];
898 flags = info->rti_flags;
901 * ifp may be specified by sockaddr_dl
902 * when protocol address is ambiguous.
905 needref = (info->rti_ifa == NULL);
908 /* If we have interface specified by the ifindex in the address, use it */
909 if (info->rti_ifp == NULL && ifpaddr != NULL &&
910 ifpaddr->sa_family == AF_LINK) {
911 const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)ifpaddr;
912 if (sdl->sdl_index != 0)
913 info->rti_ifp = ifnet_byindex(sdl->sdl_index);
916 * If we have source address specified, try to find it
917 * TODO: avoid enumerating all ifas on all interfaces.
919 if (info->rti_ifa == NULL && ifaaddr != NULL)
920 info->rti_ifa = ifa_ifwithaddr(ifaaddr);
921 if (info->rti_ifa == NULL) {
922 const struct sockaddr *sa;
925 * Most common use case for the userland-supplied routes.
927 * Choose sockaddr to select ifa.
928 * -- if ifp is set --
929 * Order of preference:
932 * Note: for interface routes link-level gateway address
933 * is specified to indicate the interface index without
934 * specifying RTF_GATEWAY. In this case, ignore gateway
935 * Note: gateway AF may be different from dst AF. In this case,
937 * 3) final destination.
938 * 4) if all of these fails, try to get at least link-level ifa.
940 * try to lookup gateway or dst in the routing table to get ifa
942 if (info->rti_info[RTAX_IFA] != NULL)
943 sa = info->rti_info[RTAX_IFA];
944 else if ((info->rti_flags & RTF_GATEWAY) != 0 &&
945 gateway->sa_family == dst->sa_family)
949 if (info->rti_ifp != NULL) {
950 info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
952 if (info->rti_ifa == NULL && gateway != NULL)
953 info->rti_ifa = ifaof_ifpforaddr(gateway, info->rti_ifp);
954 } else if (dst != NULL && gateway != NULL)
955 info->rti_ifa = ifa_ifwithroute(flags, dst, gateway,
958 info->rti_ifa = ifa_ifwithroute(flags, sa, sa,
961 if (needref && info->rti_ifa != NULL) {
962 if (info->rti_ifp == NULL)
963 info->rti_ifp = info->rti_ifa->ifa_ifp;
964 ifa_ref(info->rti_ifa);
972 rt_updatemtu(struct ifnet *ifp)
974 struct rib_head *rnh;
979 * Try to update rt_mtu for all routes using this interface
980 * Unfortunately the only way to do this is to traverse all
981 * routing tables in all fibs/domains.
983 for (i = 1; i <= AF_MAX; i++) {
984 mtu = if_getmtu_family(ifp, i);
985 for (j = 0; j < rt_numfibs; j++) {
986 rnh = rt_tables_get_rnh(j, i);
989 nhops_update_ifmtu(rnh, ifp, mtu);
996 int p_sockaddr(char *buf, int buflen, struct sockaddr *s);
997 int rt_print(char *buf, int buflen, struct rtentry *rt);
1000 p_sockaddr(char *buf, int buflen, struct sockaddr *s)
1004 switch (s->sa_family) {
1006 paddr = &((struct sockaddr_in *)s)->sin_addr;
1009 paddr = &((struct sockaddr_in6 *)s)->sin6_addr;
1016 if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL)
1019 return (strlen(buf));
1023 rt_print(char *buf, int buflen, struct rtentry *rt)
1025 struct sockaddr *addr, *mask;
1031 i = p_sockaddr(buf, buflen, addr);
1032 if (!(rt->rt_flags & RTF_HOST)) {
1034 i += p_sockaddr(buf + i, buflen - i, mask);
1037 if (rt->rt_flags & RTF_GATEWAY) {
1039 i += p_sockaddr(buf + i, buflen - i, &rt->rt_nhop->gw_sa);
1048 * Deletes key for single-path routes, unlinks rtentry with
1049 * gateway specified in @info from multi-path routes.
1051 * Returnes unlinked entry. In case of failure, returns NULL
1052 * and sets @perror to ESRCH.
1055 rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info,
1056 struct rtentry *rto, int *perror)
1059 * if we got multipath routes, we require users to specify
1060 * a matching RTAX_GATEWAY.
1062 struct rtentry *rt; // *rto = NULL;
1063 struct radix_node *rn;
1064 struct sockaddr *gw;
1066 gw = info->rti_info[RTAX_GATEWAY];
1067 rt = rt_mpath_matchgate(rto, gw);
1074 * this is the first entry in the chain
1077 rn = rn_mpath_next((struct radix_node *)rt);
1079 * there is another entry, now it's active
1084 rto->rt_flags |= RTF_UP;
1086 } else if (rt->rt_flags & RTF_GATEWAY) {
1088 * For gateway routes, we need to
1089 * make sure that we we are deleting
1090 * the correct gateway.
1091 * rt_mpath_matchgate() does not
1092 * check the case when there is only
1093 * one route in the chain.
1096 (rt->rt_nhop->gw_sa.sa_len != gw->sa_len ||
1097 memcmp(&rt->rt_nhop->gw_sa, gw, gw->sa_len))) {
1104 * use the normal delete code to remove
1107 rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST],
1108 info->rti_info[RTAX_NETMASK],
1115 * if the entry is 2nd and on up
1117 if (rt_mpath_deldup(rto, rt) == 0)
1118 panic ("rtrequest1: rt_mpath_deldup");
1120 rn = (struct radix_node *)rt;
1126 rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt)
1129 if (info->rti_mflags & RTV_WEIGHT)
1130 rt->rt_weight = info->rti_rmx->rmx_weight;
1131 /* Kernel -> userland timebase conversion. */
1132 if (info->rti_mflags & RTV_EXPIRE)
1133 rt->rt_expire = info->rti_rmx->rmx_expire ?
1134 info->rti_rmx->rmx_expire - time_second + time_uptime : 0;
1138 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
1140 u_char *cp1 = (u_char *)src;
1141 u_char *cp2 = (u_char *)dst;
1142 u_char *cp3 = (u_char *)netmask;
1143 u_char *cplim = cp2 + *cp3;
1144 u_char *cplim2 = cp2 + *cp1;
1146 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1151 *cp2++ = *cp1++ & *cp3++;
1153 bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
1157 * Set up a routing table entry, normally
1160 #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */
1162 rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
1165 struct epoch_tracker et;
1166 struct sockaddr *dst;
1167 struct sockaddr *netmask;
1168 struct rib_cmd_info rc;
1169 struct rt_addrinfo info;
1171 int startfib, endfib;
1172 char tempbuf[_SOCKADDR_TMPSIZE];
1175 struct sockaddr_dl_short *sdl = NULL;
1176 struct rib_head *rnh;
1178 if (flags & RTF_HOST) {
1179 dst = ifa->ifa_dstaddr;
1182 dst = ifa->ifa_addr;
1183 netmask = ifa->ifa_netmask;
1185 if (dst->sa_len == 0)
1187 switch (dst->sa_family) {
1190 /* We support multiple FIBs. */
1193 fibnum = RT_DEFAULT_FIB;
1196 if (fibnum == RT_ALL_FIBS) {
1197 if (V_rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD)
1198 startfib = endfib = ifa->ifa_ifp->if_fib;
1201 endfib = rt_numfibs - 1;
1204 KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum"));
1210 * If it's a delete, check that if it exists,
1211 * it's on the correct interface or we might scrub
1212 * a route to another ifa which would
1213 * be confusing at best and possibly worse.
1215 if (cmd == RTM_DELETE) {
1217 * It's a delete, so it should already exist..
1218 * If it's a net, mask off the host bits
1219 * (Assuming we have a mask)
1220 * XXX this is kinda inet specific..
1222 if (netmask != NULL) {
1223 rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
1224 dst = (struct sockaddr *)tempbuf;
1226 } else if (cmd == RTM_ADD) {
1227 sdl = (struct sockaddr_dl_short *)tempbuf;
1228 bzero(sdl, sizeof(struct sockaddr_dl_short));
1229 sdl->sdl_family = AF_LINK;
1230 sdl->sdl_len = sizeof(struct sockaddr_dl_short);
1231 sdl->sdl_type = ifa->ifa_ifp->if_type;
1232 sdl->sdl_index = ifa->ifa_ifp->if_index;
1235 * Now go through all the requested tables (fibs) and do the
1236 * requested action. Realistically, this will either be fib 0
1237 * for protocols that don't do multiple tables or all the
1238 * tables for those that do.
1240 for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
1241 if (cmd == RTM_DELETE) {
1242 struct radix_node *rn;
1244 * Look up an rtentry that is in the routing tree and
1245 * contains the correct info.
1247 rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
1249 /* this table doesn't exist but others might */
1252 rn = rnh->rnh_lookup(dst, netmask, &rnh->head);
1254 if (rt_mpath_capable(rnh)) {
1259 struct rtentry *rt = RNTORT(rn);
1261 * for interface route the gateway
1262 * gateway is sockaddr_dl, so
1263 * rt_mpath_matchgate must use the
1266 rt = rt_mpath_matchgate(rt,
1273 error = (rn == NULL ||
1274 (rn->rn_flags & RNF_ROOT) ||
1275 RNTORT(rn)->rt_nhop->nh_ifa != ifa);
1278 /* this is only an error if bad on ALL tables */
1283 * Do the actual request
1285 bzero((caddr_t)&info, sizeof(info));
1287 info.rti_flags = flags |
1288 (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED;
1289 info.rti_info[RTAX_DST] = dst;
1291 * doing this for compatibility reasons
1294 info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sdl;
1296 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1297 info.rti_info[RTAX_NETMASK] = netmask;
1298 NET_EPOCH_ENTER(et);
1299 error = rib_action(fibnum, cmd, &info, &rc);
1300 if (error == 0 && rc.rc_rt != NULL) {
1302 * notify any listening routing agents of the change
1305 /* TODO: interface routes/aliases */
1306 rt_newaddrmsg_fib(cmd, ifa, rc.rc_rt, fibnum);
1313 if (cmd == RTM_DELETE) {
1317 /* we only give an error if it wasn't in any table */
1318 error = ((flags & RTF_HOST) ?
1319 EHOSTUNREACH : ENETUNREACH);
1323 /* return an error if any of them failed */
1331 * Set up a routing table entry, normally
1335 rtinit(struct ifaddr *ifa, int cmd, int flags)
1337 struct sockaddr *dst;
1338 int fib = RT_DEFAULT_FIB;
1340 if (flags & RTF_HOST) {
1341 dst = ifa->ifa_dstaddr;
1343 dst = ifa->ifa_addr;
1346 switch (dst->sa_family) {
1349 /* We do support multiple FIBs. */
1353 return (rtinit1(ifa, cmd, flags, fib));
1357 * Announce interface address arrival/withdraw
1358 * Returns 0 on success.
1361 rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
1364 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
1365 ("unexpected cmd %d", cmd));
1366 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
1367 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
1369 EVENTHANDLER_DIRECT_INVOKE(rt_addrmsg, ifa, cmd);
1370 return (rtsock_addrmsg(cmd, ifa, fibnum));
1374 * Announce kernel-originated route addition/removal to rtsock based on @rt data.
1376 * @rt: valid rtentry
1377 * @ifp: target route interface
1378 * @fibnum: fib id or RT_ALL_FIBS
1380 * Returns 0 on success.
1383 rt_routemsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int rti_addrs,
1387 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
1388 ("unexpected cmd %d", cmd));
1390 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
1391 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
1393 KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__));
1395 return (rtsock_routemsg(cmd, rt, ifp, 0, fibnum));
1399 * Announce kernel-originated route addition/removal to rtsock based on @rt data.
1401 * @info: addrinfo structure with valid data.
1402 * @fibnum: fib id or RT_ALL_FIBS
1404 * Returns 0 on success.
1407 rt_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum)
1410 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE || cmd == RTM_CHANGE,
1411 ("unexpected cmd %d", cmd));
1413 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
1414 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
1416 KASSERT(info->rti_info[RTAX_DST] != NULL, (":%s: RTAX_DST must be supplied", __func__));
1418 return (rtsock_routemsg_info(cmd, info, fibnum));
1423 * This is called to generate messages from the routing socket
1424 * indicating a network interface has had addresses associated with it.
1427 rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, struct rtentry *rt, int fibnum)
1430 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
1431 ("unexpected cmd %u", cmd));
1432 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
1433 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
1435 if (cmd == RTM_ADD) {
1436 rt_addrmsg(cmd, ifa, fibnum);
1438 rt_routemsg(cmd, rt, ifa->ifa_ifp, 0, fibnum);
1441 rt_routemsg(cmd, rt, ifa->ifa_ifp, 0, fibnum);
1442 rt_addrmsg(cmd, ifa, fibnum);