2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2020 Alexander V. Chernikov
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include "opt_inet6.h"
32 #include "opt_route.h"
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/syslog.h>
41 #include <sys/kernel.h>
43 #include <sys/rmlock.h>
46 #include <net/if_var.h>
47 #include <net/if_dl.h>
49 #include <net/route.h>
50 #include <net/route/route_ctl.h>
51 #include <net/route/route_var.h>
52 #include <net/route/nhop_utils.h>
53 #include <net/route/nhop.h>
54 #include <net/route/nhop_var.h>
55 #include <netinet/in.h>
56 #include <netinet6/scope6_var.h>
57 #include <netinet6/in6_var.h>
61 #define DEBUG_MOD_NAME route_ctl
62 #define DEBUG_MAX_LEVEL LOG_DEBUG
63 #include <net/route/route_debug.h>
64 _DECLARE_DEBUG(LOG_INFO);
67 * This file contains control plane routing tables functions.
69 * All functions assumes they are called in net epoch.
72 struct rib_subscription {
73 CK_STAILQ_ENTRY(rib_subscription) next;
74 rib_subscription_cb_t *func;
77 enum rib_subscription_type type;
78 struct epoch_context epoch_ctx;
81 union sockaddr_union {
83 struct sockaddr_in sin;
84 struct sockaddr_in6 sin6;
88 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
89 struct rib_cmd_info *rc);
90 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
91 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
92 struct rib_cmd_info *rc);
94 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt,
95 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc);
96 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
97 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
98 int op_flags, struct rib_cmd_info *rc);
100 static int add_route(struct rib_head *rnh, struct rtentry *rt,
101 struct route_nhop_data *rnd, struct rib_cmd_info *rc);
102 static int delete_route(struct rib_head *rnh, struct rtentry *rt,
103 struct rib_cmd_info *rc);
104 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
105 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc);
107 static struct rtentry *create_rtentry(struct rib_head *rnh,
108 const struct sockaddr *dst, struct sockaddr *netmask);
110 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type,
111 struct rib_cmd_info *rc);
113 static int get_prio_from_info(const struct rt_addrinfo *info);
114 static int nhop_get_prio(const struct nhop_object *nh);
116 static void destroy_subscription_epoch(epoch_context_t ctx);
118 static bool rib_can_multipath(struct rib_head *rh);
121 /* Per-vnet multipath routing configuration */
122 SYSCTL_DECL(_net_route);
123 #define V_rib_route_multipath VNET(rib_route_multipath)
125 #define _MP_FLAGS CTLFLAG_RW
127 #define _MP_FLAGS CTLFLAG_RD
129 VNET_DEFINE(u_int, rib_route_multipath) = 1;
130 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET,
131 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
135 VNET_DEFINE(u_int, fib_hash_outbound) = 0;
136 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET,
137 &VNET_NAME(fib_hash_outbound), 0,
138 "Compute flowid for locally-originated packets");
140 /* Default entropy to add to the hash calculation for the outbound connections*/
141 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = {
142 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
143 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
144 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
145 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
146 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
150 #if defined(INET) && defined(INET6)
151 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
152 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
153 VNET_DEFINE(u_int, rib_route_ipv6_nexthop) = 1;
154 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
155 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
158 /* Routing table UMA zone */
159 VNET_DEFINE_STATIC(uma_zone_t, rtzone);
160 #define V_rtzone VNET(rtzone)
163 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
166 vnet_rtzone_init(void)
169 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry),
170 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
175 vnet_rtzone_destroy(void)
178 uma_zdestroy(V_rtzone);
183 destroy_rtentry(struct rtentry *rt)
186 struct nhop_object *nh = rt->rt_nhop;
189 * At this moment rnh, nh_control may be already freed.
190 * nhop interface may have been migrated to a different vnet.
191 * Use vnet stored in the nexthop to delete the entry.
194 if (NH_IS_NHGRP(nh)) {
195 const struct weightened_nhop *wn;
197 wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
201 CURVNET_SET(nhop_get_vnet(nh));
204 /* Unreference nexthop */
205 nhop_free_any(rt->rt_nhop);
207 uma_zfree(V_rtzone, rt);
213 * Epoch callback indicating rtentry is safe to destroy
216 destroy_rtentry_epoch(epoch_context_t ctx)
220 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx);
226 * Schedule rtentry deletion
229 rtfree(struct rtentry *rt)
232 KASSERT(rt != NULL, ("%s: NULL rt", __func__));
234 epoch_call(net_epoch_preempt, destroy_rtentry_epoch,
238 static struct rib_head *
239 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
241 struct rib_head *rnh;
242 struct sockaddr *dst;
244 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
246 dst = info->rti_info[RTAX_DST];
247 rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
252 #if defined(INET) && defined(INET6)
254 rib_can_ipv6_nexthop_address(struct rib_head *rh)
258 CURVNET_SET(rh->rib_vnet);
259 result = !!V_rib_route_ipv6_nexthop;
268 rib_can_multipath(struct rib_head *rh)
272 CURVNET_SET(rh->rib_vnet);
273 result = !!V_rib_route_multipath;
280 * Check is nhop is multipath-eligible.
281 * Avoid nhops without gateways and redirects.
283 * Returns 1 for multipath-eligible nexthop,
287 nhop_can_multipath(const struct nhop_object *nh)
290 if ((nh->nh_flags & NHF_MULTIPATH) != 0)
292 if ((nh->nh_flags & NHF_GATEWAY) == 0)
294 if ((nh->nh_flags & NHF_REDIRECT) != 0)
302 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
306 if (info->rti_mflags & RTV_WEIGHT)
307 weight = info->rti_rmx->rmx_weight;
309 weight = default_weight;
310 /* Keep upper 1 byte for adm distance purposes */
311 if (weight > RT_MAX_WEIGHT)
312 weight = RT_MAX_WEIGHT;
313 else if (weight == 0)
314 weight = default_weight;
320 rt_is_host(const struct rtentry *rt)
323 return (rt->rte_flags & RTF_HOST);
327 rt_get_family(const struct rtentry *rt)
329 const struct sockaddr *dst;
331 dst = (const struct sockaddr *)rt_key_const(rt);
333 return (dst->sa_family);
337 * Returns pointer to nexthop or nexthop group
338 * associated with @rt
341 rt_get_raw_nhop(const struct rtentry *rt)
344 return (rt->rt_nhop);
349 * Stores IPv4 address and prefix length of @rt inside
351 * @pscopeid is currently always set to 0.
354 rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr,
355 int *plen, uint32_t *pscopeid)
357 const struct sockaddr_in *dst;
359 dst = (const struct sockaddr_in *)rt_key_const(rt);
360 KASSERT((dst->sin_family == AF_INET),
361 ("rt family is %d, not inet", dst->sin_family));
362 *paddr = dst->sin_addr;
363 dst = (const struct sockaddr_in *)rt_mask_const(rt);
367 *plen = bitcount32(dst->sin_addr.s_addr);
372 * Stores IPv4 address and prefix mask of @rt inside
373 * @paddr and @pmask. Sets mask to INADDR_ANY for host routes.
374 * @pscopeid is currently always set to 0.
377 rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr,
378 struct in_addr *pmask, uint32_t *pscopeid)
380 const struct sockaddr_in *dst;
382 dst = (const struct sockaddr_in *)rt_key_const(rt);
383 KASSERT((dst->sin_family == AF_INET),
384 ("rt family is %d, not inet", dst->sin_family));
385 *paddr = dst->sin_addr;
386 dst = (const struct sockaddr_in *)rt_mask_const(rt);
388 pmask->s_addr = INADDR_BROADCAST;
390 *pmask = dst->sin_addr;
397 inet6_get_plen(const struct in6_addr *addr)
400 return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
401 bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
405 * Stores IPv6 address and prefix length of @rt inside
406 * @paddr and @plen. Addresses are returned in de-embedded form.
407 * Scopeid is set to 0 for non-LL addresses.
410 rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr,
411 int *plen, uint32_t *pscopeid)
413 const struct sockaddr_in6 *dst;
415 dst = (const struct sockaddr_in6 *)rt_key_const(rt);
416 KASSERT((dst->sin6_family == AF_INET6),
417 ("rt family is %d, not inet6", dst->sin6_family));
418 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
419 in6_splitscope(&dst->sin6_addr, paddr, pscopeid);
421 *paddr = dst->sin6_addr;
422 dst = (const struct sockaddr_in6 *)rt_mask_const(rt);
426 *plen = inet6_get_plen(&dst->sin6_addr);
430 * Stores IPv6 address and prefix mask of @rt inside
431 * @paddr and @pmask. Addresses are returned in de-embedded form.
432 * Scopeid is set to 0 for non-LL addresses.
435 rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr,
436 struct in6_addr *pmask, uint32_t *pscopeid)
438 const struct sockaddr_in6 *dst;
440 dst = (const struct sockaddr_in6 *)rt_key_const(rt);
441 KASSERT((dst->sin6_family == AF_INET6),
442 ("rt family is %d, not inet", dst->sin6_family));
443 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
444 in6_splitscope(&dst->sin6_addr, paddr, pscopeid);
446 *paddr = dst->sin6_addr;
447 dst = (const struct sockaddr_in6 *)rt_mask_const(rt);
449 memset(pmask, 0xFF, sizeof(struct in6_addr));
451 *pmask = dst->sin6_addr;
456 * File-local concept for distingushing between the normal and
457 * RTF_PINNED routes tha can override the "normal" one.
459 #define NH_PRIORITY_HIGH 2
460 #define NH_PRIORITY_NORMAL 1
462 get_prio_from_info(const struct rt_addrinfo *info)
464 if (info->rti_flags & RTF_PINNED)
465 return (NH_PRIORITY_HIGH);
466 return (NH_PRIORITY_NORMAL);
470 nhop_get_prio(const struct nhop_object *nh)
472 if (NH_IS_PINNED(nh))
473 return (NH_PRIORITY_HIGH);
474 return (NH_PRIORITY_NORMAL);
478 * Check if specified @gw matches gw data in the nexthop @nh.
480 * Returns true if matches, false otherwise.
483 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
486 if (nh->gw_sa.sa_family != gw->sa_family)
489 switch (gw->sa_family) {
491 return (nh->gw4_sa.sin_addr.s_addr ==
492 ((const struct sockaddr_in *)gw)->sin_addr.s_addr);
495 const struct sockaddr_in6 *gw6;
496 gw6 = (const struct sockaddr_in6 *)gw;
499 * Currently (2020-09) IPv6 gws in kernel have their
500 * scope embedded. Once this becomes false, this code
501 * has to be revisited.
503 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr,
510 const struct sockaddr_dl *sdl;
511 sdl = (const struct sockaddr_dl *)gw;
512 return (nh->gwl_sa.sdl_index == sdl->sdl_index);
515 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0);
522 struct gw_filter_data {
523 const struct sockaddr *gw;
528 gw_filter_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
530 struct gw_filter_data *gwd = (struct gw_filter_data *)_data;
532 /* Return only first match to make rtsock happy */
533 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0)
539 * Checks if data in @info matches nexhop @nh.
541 * Returns 0 on success,
542 * ESRCH if not matched,
543 * ENOENT if filter function returned false
546 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt,
547 const struct nhop_object *nh)
549 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
551 if (info->rti_filter != NULL) {
552 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0)
557 if ((gw != NULL) && !match_nhop_gw(nh, gw))
564 * Runs exact prefix match based on @dst and @netmask.
565 * Returns matched @rtentry if found or NULL.
566 * If rtentry was found, saves nexthop / weight value into @rnd.
568 static struct rtentry *
569 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
570 const struct sockaddr *netmask, struct route_nhop_data *rnd)
574 RIB_LOCK_ASSERT(rnh);
576 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
578 rnd->rnd_nhop = rt->rt_nhop;
579 rnd->rnd_weight = rt->rt_weight;
581 rnd->rnd_nhop = NULL;
589 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
590 struct route_nhop_data *rnd)
592 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd));
596 * Runs exact prefix match based on dst/netmask from @info.
597 * Assumes RIB lock is held.
598 * Returns matched @rtentry if found or NULL.
599 * If rtentry was found, saves nexthop / weight value into @rnd.
602 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info,
603 struct route_nhop_data *rnd)
607 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST],
608 info->rti_info[RTAX_NETMASK], rnd);
614 fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
615 struct sockaddr **pmask)
621 struct sockaddr_in *mask = (struct sockaddr_in *)pmask;
622 struct sockaddr_in *dst= (struct sockaddr_in *)_dst;
624 memset(mask, 0, sizeof(*mask));
625 mask->sin_family = family;
626 mask->sin_len = sizeof(*mask);
627 if (plen == 32 || plen == -1)
629 else if (plen > 32 || plen < 0)
632 uint32_t daddr, maddr;
633 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
634 mask->sin_addr.s_addr = maddr;
635 daddr = dst->sin_addr.s_addr;
636 daddr = htonl(ntohl(daddr) & ntohl(maddr));
637 dst->sin_addr.s_addr = daddr;
646 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)pmask;
647 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst;
649 memset(mask, 0, sizeof(*mask));
650 mask->sin6_family = family;
651 mask->sin6_len = sizeof(*mask);
652 if (plen == 128 || plen == -1)
654 else if (plen > 128 || plen < 0)
657 ip6_writemask(&mask->sin6_addr, plen);
658 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr);
669 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd
670 * to the routing table.
672 * @fibnum: rtable id to insert route to
673 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
674 * @plen: prefix length (or -1 if host route or not applicable for AF)
675 * @op_flags: combination of RTM_F_ flags
676 * @rc: storage to report operation result
678 * Returns 0 on success.
681 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, unsigned int plen,
682 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc)
684 union sockaddr_union mask_storage;
685 struct sockaddr *netmask = &mask_storage.sa;
690 bzero(rc, sizeof(struct rib_cmd_info));
691 rc->rc_cmd = RTM_ADD;
693 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
695 return (EAFNOSUPPORT);
697 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
698 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
702 if (op_flags & RTM_F_CREATE) {
703 if ((rt = create_rtentry(rnh, dst, netmask)) == NULL)
706 struct route_nhop_data rnd_tmp;
708 rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp);
713 return (add_route_flags(rnh, rt, rnd, op_flags, rc));
717 * Attempts to delete @dst/plen prefix matching gateway @gw from the
720 * @fibnum: rtable id to remove route from
721 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
722 * @plen: prefix length (or -1 if host route or not applicable for AF)
723 * @gw: gateway to match
724 * @op_flags: combination of RTM_F_ flags
725 * @rc: storage to report operation result
727 * Returns 0 on success.
730 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, unsigned int plen,
731 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc)
733 struct gw_filter_data gwd = { .gw = gw };
735 return (rib_del_route_px(fibnum, dst, plen, gw_filter_func, &gwd, op_flags, rc));
739 * Attempts to delete @dst/plen prefix matching @filter_func from the
742 * @fibnum: rtable id to remove route from
743 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
744 * @plen: prefix length (or -1 if host route or not applicable for AF)
745 * @filter_func: func to be called for each nexthop of the prefix for matching
746 * @filter_arg: argument to pass to @filter_func
747 * @op_flags: combination of RTM_F_ flags
748 * @rc: storage to report operation result
750 * Returns 0 on success.
753 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, unsigned int plen,
754 rib_filter_f_t *filter_func, void *filter_arg, int op_flags,
755 struct rib_cmd_info *rc)
757 union sockaddr_union mask_storage;
758 struct sockaddr *netmask = &mask_storage.sa;
763 bzero(rc, sizeof(struct rib_cmd_info));
764 rc->rc_cmd = RTM_DELETE;
766 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
768 return (EAFNOSUPPORT);
770 if (dst->sa_len > sizeof(mask_storage)) {
771 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len);
775 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
776 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
780 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL;
783 struct route_nhop_data rnd;
784 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
786 error = rt_delete_conditional(rnh, rt, prio, filter_func,
795 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
797 if (rc->rc_cmd == RTM_DELETE)
802 * Deleting 1 path may result in RTM_CHANGE to
803 * a different mpath group/nhop.
804 * Free old mpath group.
806 nhop_free_any(rc->rc_nh_old);
814 * Adds route defined by @info into the kernel table specified by @fibnum and
815 * sa_family in @info->rti_info[RTAX_DST].
817 * Returns 0 on success and fills in operation metadata into @rc.
820 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
821 struct rib_cmd_info *rc)
823 struct rib_head *rnh;
828 rnh = get_rnh(fibnum, info);
830 return (EAFNOSUPPORT);
833 * Check consistency between RTF_HOST flag and netmask
836 if (info->rti_flags & RTF_HOST)
837 info->rti_info[RTAX_NETMASK] = NULL;
838 else if (info->rti_info[RTAX_NETMASK] == NULL) {
839 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask");
843 bzero(rc, sizeof(struct rib_cmd_info));
844 rc->rc_cmd = RTM_ADD;
846 error = add_route_byinfo(rnh, info, rc);
848 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
854 * Checks if @dst and @gateway is valid combination.
856 * Returns true if is valid, false otherwise.
859 check_gateway(struct rib_head *rnh, struct sockaddr *dst,
860 struct sockaddr *gateway)
862 if (dst->sa_family == gateway->sa_family)
864 else if (gateway->sa_family == AF_UNSPEC)
866 else if (gateway->sa_family == AF_LINK)
868 #if defined(INET) && defined(INET6)
869 else if (dst->sa_family == AF_INET && gateway->sa_family == AF_INET6 &&
870 rib_can_ipv6_nexthop_address(rnh))
878 * Creates rtentry and nexthop based on @info data.
879 * Return 0 and fills in rtentry into @prt on success,
880 * Note: rtentry mask will be set to RTAX_NETMASK, thus its pointer is required
881 * to be stable till the end of the operation (radix rt insertion/change/removal).
882 * return errno otherwise.
884 static struct rtentry *
885 create_rtentry(struct rib_head *rnh, const struct sockaddr *dst,
886 struct sockaddr *netmask)
888 MPASS(dst->sa_len <= sizeof(((struct rtentry *)NULL)->rt_dstb));
890 struct rtentry *rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO);
893 rt->rte_flags = RTF_UP | (netmask == NULL ? RTF_HOST : 0);
895 /* Fill in dst, ensuring it's masked if needed. */
896 if (netmask != NULL) {
897 rt_maskedcopy(dst, &rt->rt_dst, netmask);
899 bcopy(dst, &rt->rt_dst, dst->sa_len);
900 rt_key(rt) = &rt->rt_dst;
901 /* Set netmask to the storage from info. It will be updated upon insertion */
902 rt_mask(rt) = netmask;
908 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
909 struct rib_cmd_info *rc)
911 struct route_nhop_data rnd_add;
912 struct nhop_object *nh;
914 struct sockaddr *dst, *gateway, *netmask;
917 dst = info->rti_info[RTAX_DST];
918 gateway = info->rti_info[RTAX_GATEWAY];
919 netmask = info->rti_info[RTAX_NETMASK];
921 if ((info->rti_flags & RTF_GATEWAY) && !gateway) {
922 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw");
925 if (dst && gateway && !check_gateway(rnh, dst, gateway)) {
926 FIB_RH_LOG(LOG_DEBUG, rnh,
927 "error: invalid dst/gateway family combination (%d, %d)",
928 dst->sa_family, gateway->sa_family);
932 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) {
933 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d",
938 if (info->rti_ifa == NULL) {
939 error = rt_getifa_fib(info, rnh->rib_fibnum);
944 if ((rt = create_rtentry(rnh, dst, netmask)) == NULL)
947 error = nhop_create_from_info(rnh, info, &nh);
949 uma_zfree(V_rtzone, rt);
953 rnd_add.rnd_nhop = nh;
954 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT);
956 int op_flags = RTM_F_CREATE;
957 if (get_prio_from_info(info) == NH_PRIORITY_HIGH)
958 op_flags |= RTM_F_FORCE;
960 op_flags |= RTM_F_APPEND;
961 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc));
966 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add,
967 int op_flags, struct rib_cmd_info *rc)
969 struct route_nhop_data rnd_orig;
970 struct nhop_object *nh;
971 struct rtentry *rt_orig;
974 nh = rnd_add->rnd_nhop;
978 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig);
980 if (rt_orig == NULL) {
981 if (op_flags & RTM_F_CREATE)
982 error = add_route(rnh, rt, rnd_add, rc);
984 error = ENOENT; // no entry but creation was not required
991 if (op_flags & RTM_F_EXCL) {
992 /* We have existing route in the RIB but not allowed to replace. */
998 /* Now either append or replace */
999 if (op_flags & RTM_F_REPLACE) {
1000 if (nhop_get_prio(rnd_orig.rnd_nhop) > nhop_get_prio(rnd_add->rnd_nhop)) {
1001 /* Old path is "better" (e.g. has PINNED flag set) */
1005 change_route(rnh, rt_orig, rnd_add, rc);
1014 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) &&
1015 nhop_can_multipath(rnd_add->rnd_nhop) &&
1016 nhop_can_multipath(rnd_orig.rnd_nhop)) {
1018 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
1019 error = add_route_flags_mpath(rnh, rt, rnd_add, &rnd_orig,
1021 if (error != EAGAIN)
1023 RTSTAT_INC(rts_add_retry);
1027 * Original nhop reference is unused in any case.
1029 nhop_free_any(rnd_add->rnd_nhop);
1030 if (op_flags & RTM_F_CREATE) {
1031 if (error != 0 || rc->rc_cmd != RTM_ADD)
1032 uma_zfree(V_rtzone, rt);
1037 /* Out of options - free state and return error */
1040 if (op_flags & RTM_F_CREATE)
1041 uma_zfree(V_rtzone, rt);
1048 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
1049 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
1050 int op_flags, struct rib_cmd_info *rc)
1053 struct route_nhop_data rnd_new;
1056 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new);
1058 if (error == EAGAIN) {
1060 * Group creation failed, most probably because
1061 * @rnd_orig data got scheduled for deletion.
1062 * Refresh @rnd_orig data and retry.
1065 lookup_prefix_rt(rnh, rt, rnd_orig);
1067 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) {
1068 /* In this iteration route doesn't exist */
1074 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1078 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) {
1080 * First multipath route got installed. Enable local
1081 * outbound connections hashing.
1084 printf("FIB: enabled flowid calculation for locally-originated packets\n");
1085 V_fib_hash_outbound = 1;
1092 * Removes route defined by @info from the kernel table specified by @fibnum and
1093 * sa_family in @info->rti_info[RTAX_DST].
1095 * Returns 0 on success and fills in operation metadata into @rc.
1098 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
1100 struct rib_head *rnh;
1101 struct sockaddr *dst, *netmask;
1102 struct sockaddr_storage mdst;
1107 rnh = get_rnh(fibnum, info);
1109 return (EAFNOSUPPORT);
1111 bzero(rc, sizeof(struct rib_cmd_info));
1112 rc->rc_cmd = RTM_DELETE;
1114 dst = info->rti_info[RTAX_DST];
1115 netmask = info->rti_info[RTAX_NETMASK];
1117 if (netmask != NULL) {
1118 /* Ensure @dst is always properly masked */
1119 if (dst->sa_len > sizeof(mdst)) {
1120 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large");
1123 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
1124 dst = (struct sockaddr *)&mdst;
1127 rib_filter_f_t *filter_func = NULL;
1128 void *filter_arg = NULL;
1129 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] };
1131 if (info->rti_filter != NULL) {
1132 filter_func = info->rti_filter;
1133 filter_arg = info->rti_filterdata;
1134 } else if (gwd.gw != NULL) {
1135 filter_func = gw_filter_func;
1139 int prio = get_prio_from_info(info);
1142 struct route_nhop_data rnd;
1143 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
1145 error = rt_delete_conditional(rnh, rt, prio, filter_func,
1154 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
1156 if (rc->rc_cmd == RTM_DELETE)
1161 * Deleting 1 path may result in RTM_CHANGE to
1162 * a different mpath group/nhop.
1163 * Free old mpath group.
1165 nhop_free_any(rc->rc_nh_old);
1173 * Conditionally unlinks rtentry paths from @rnh matching @cb.
1174 * Returns 0 on success with operation result stored in @rc.
1175 * On error, returns:
1176 * ESRCH - if prefix was not found or filter function failed to match
1177 * EADDRINUSE - if trying to delete higher priority route.
1180 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
1181 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc)
1183 struct nhop_object *nh = rt->rt_nhop;
1184 struct route_nhop_data rnd;
1187 if (NH_IS_NHGRP(nh)) {
1188 struct nhgrp_object *nhg = (struct nhgrp_object *)nh;
1193 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd);
1195 if (rnd.rnd_nhgrp == nhg) {
1196 /* No match, unreference new group and return. */
1197 nhop_free_any(rnd.rnd_nhop);
1200 error = change_route(rnh, rt, &rnd, rc);
1205 if (cb != NULL && !cb(rt, nh, cbdata))
1208 if (prio < nhop_get_prio(nh))
1209 return (EADDRINUSE);
1211 return (delete_route(rnh, rt, rc));
1215 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
1216 struct rib_cmd_info *rc)
1219 struct route_nhop_data rnd_orig;
1220 struct rib_head *rnh;
1226 rnh = get_rnh(fibnum, info);
1228 return (EAFNOSUPPORT);
1230 bzero(rc, sizeof(struct rib_cmd_info));
1231 rc->rc_cmd = RTM_CHANGE;
1233 /* Check if updated gateway exists */
1234 if ((info->rti_flags & RTF_GATEWAY) &&
1235 (info->rti_info[RTAX_GATEWAY] == NULL)) {
1238 * route(8) adds RTF_GATEWAY flag if -interface is not set.
1239 * Remove RTF_GATEWAY to enforce consistency and maintain
1242 info->rti_flags &= ~RTF_GATEWAY;
1246 * route change is done in multiple steps, with dropping and
1247 * reacquiring lock. In the situations with multiple processes
1248 * changes the same route in can lead to the case when route
1249 * is changed between the steps. Address it by retrying the operation
1250 * multiple times before failing.
1254 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
1255 info->rti_info[RTAX_NETMASK], &rnh->head);
1262 rnd_orig.rnd_nhop = rt->rt_nhop;
1263 rnd_orig.rnd_weight = rt->rt_weight;
1267 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
1268 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc);
1269 if (error != EAGAIN)
1277 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
1278 struct nhop_object *nh_orig, struct nhop_object **nh_new)
1283 * New gateway could require new ifaddr, ifp;
1284 * flags may also be different; ifp may be specified
1285 * by ll sockaddr when protocol address is ambiguous
1287 if (((nh_orig->nh_flags & NHF_GATEWAY) &&
1288 info->rti_info[RTAX_GATEWAY] != NULL) ||
1289 info->rti_info[RTAX_IFP] != NULL ||
1290 (info->rti_info[RTAX_IFA] != NULL &&
1291 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
1292 error = rt_getifa_fib(info, rnh->rib_fibnum);
1295 info->rti_ifa = NULL;
1300 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
1301 info->rti_ifa = NULL;
1308 change_mpath_route(struct rib_head *rnh, struct rtentry *rt,
1309 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1310 struct rib_cmd_info *rc)
1312 int error = 0, found_idx = 0;
1313 struct nhop_object *nh_orig = NULL, *nh_new;
1314 struct route_nhop_data rnd_new = {};
1315 const struct weightened_nhop *wn = NULL;
1316 struct weightened_nhop *wn_new;
1319 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops);
1320 for (int i = 0; i < num_nhops; i++) {
1321 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) {
1328 if (nh_orig == NULL)
1331 error = change_nhop(rnh, info, nh_orig, &nh_new);
1335 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
1336 M_TEMP, M_NOWAIT | M_ZERO);
1337 if (wn_new == NULL) {
1342 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
1343 wn_new[found_idx].nh = nh_new;
1344 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight);
1346 error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new.rnd_nhgrp);
1348 free(wn_new, M_TEMP);
1353 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1360 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
1361 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1362 struct rib_cmd_info *rc)
1365 struct nhop_object *nh_orig;
1366 struct route_nhop_data rnd_new;
1368 nh_orig = rnd_orig->rnd_nhop;
1369 if (nh_orig == NULL)
1373 if (NH_IS_NHGRP(nh_orig))
1374 return (change_mpath_route(rnh, rt, info, rnd_orig, rc));
1377 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
1378 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
1381 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1387 * Insert @rt with nhop data from @rnd_new to @rnh.
1388 * Returns 0 on success and stores operation results in @rc.
1391 add_route(struct rib_head *rnh, struct rtentry *rt,
1392 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1394 struct radix_node *rn;
1396 RIB_WLOCK_ASSERT(rnh);
1398 rt->rt_nhop = rnd->rnd_nhop;
1399 rt->rt_weight = rnd->rnd_weight;
1400 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes);
1403 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1404 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1406 /* Finalize notification */
1408 rnh->rnh_prefixes++;
1410 rc->rc_cmd = RTM_ADD;
1412 rc->rc_nh_old = NULL;
1413 rc->rc_nh_new = rnd->rnd_nhop;
1414 rc->rc_nh_weight = rnd->rnd_weight;
1416 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1420 /* Existing route or memory allocation failure. */
1425 * Unconditionally deletes @rt from @rnh.
1428 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc)
1430 RIB_WLOCK_ASSERT(rnh);
1432 /* Route deletion requested. */
1433 struct radix_node *rn;
1435 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
1439 rt->rte_flags &= ~RTF_UP;
1442 rnh->rnh_prefixes--;
1444 rc->rc_cmd = RTM_DELETE;
1446 rc->rc_nh_old = rt->rt_nhop;
1447 rc->rc_nh_new = NULL;
1448 rc->rc_nh_weight = rt->rt_weight;
1450 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1456 * Switch @rt nhop/weigh to the ones specified in @rnd.
1457 * Returns 0 on success.
1460 change_route(struct rib_head *rnh, struct rtentry *rt,
1461 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1463 struct nhop_object *nh_orig;
1465 RIB_WLOCK_ASSERT(rnh);
1467 nh_orig = rt->rt_nhop;
1469 if (rnd->rnd_nhop == NULL)
1470 return (delete_route(rnh, rt, rc));
1472 /* Changing nexthop & weight to a new one */
1473 rt->rt_nhop = rnd->rnd_nhop;
1474 rt->rt_weight = rnd->rnd_weight;
1475 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1476 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1478 /* Finalize notification */
1480 rc->rc_cmd = RTM_CHANGE;
1482 rc->rc_nh_old = nh_orig;
1483 rc->rc_nh_new = rnd->rnd_nhop;
1484 rc->rc_nh_weight = rnd->rnd_weight;
1486 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1492 * Conditionally update route nhop/weight IFF data in @nhd_orig is
1493 * consistent with the current route data.
1494 * Nexthop in @nhd_new is consumed.
1497 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
1498 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new,
1499 struct rib_cmd_info *rc)
1501 struct rtentry *rt_new;
1504 #if DEBUG_MAX_LEVEL >= LOG_DEBUG2
1506 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE];
1507 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE);
1508 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE);
1509 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family,
1510 "trying change %s -> %s", buf_old, buf_new);
1515 struct route_nhop_data rnd;
1516 rt_new = lookup_prefix_rt(rnh, rt, &rnd);
1518 if (rt_new == NULL) {
1519 if (rnd_orig->rnd_nhop == NULL)
1520 error = add_route(rnh, rt, rnd_new, rc);
1523 * Prefix does not exist, which was not our assumption.
1524 * Update @rnd_orig with the new data and return
1526 rnd_orig->rnd_nhop = NULL;
1527 rnd_orig->rnd_weight = 0;
1531 /* Prefix exists, try to update */
1532 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
1534 * Nhop/mpath group hasn't changed. Flip
1535 * to the new precalculated one and return
1537 error = change_route(rnh, rt_new, rnd_new, rc);
1539 /* Update and retry */
1540 rnd_orig->rnd_nhop = rt_new->rt_nhop;
1541 rnd_orig->rnd_weight = rt_new->rt_weight;
1549 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
1551 if (rnd_orig->rnd_nhop != NULL)
1552 nhop_free_any(rnd_orig->rnd_nhop);
1555 if (rnd_new->rnd_nhop != NULL)
1556 nhop_free_any(rnd_new->rnd_nhop);
1563 * Performs modification of routing table specificed by @action.
1564 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
1565 * Needs to be run in network epoch.
1567 * Returns 0 on success and fills in @rc with action result.
1570 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
1571 struct rib_cmd_info *rc)
1577 error = rib_add_route(fibnum, info, rc);
1580 error = rib_del_route(fibnum, info, rc);
1583 error = rib_change_route(fibnum, info, rc);
1594 struct rib_head *rnh;
1595 struct rtentry *head;
1596 rib_filter_f_t *filter_f;
1599 struct rib_cmd_info rc;
1603 * Conditionally unlinks rtenties or paths from radix tree based
1604 * on the callback data passed in @arg.
1607 rt_checkdelroute(struct radix_node *rn, void *arg)
1609 struct rt_delinfo *di = (struct rt_delinfo *)arg;
1610 struct rtentry *rt = (struct rtentry *)rn;
1612 if (rt_delete_conditional(di->rnh, rt, di->prio,
1613 di->filter_f, di->filter_arg, &di->rc) != 0)
1617 * Add deleted rtentries to the list to GC them
1618 * after dropping the lock.
1620 * XXX: Delayed notifications not implemented
1621 * for nexthop updates.
1623 if (di->rc.rc_cmd == RTM_DELETE) {
1624 /* Add to the list and return */
1625 rt->rt_chain = di->head;
1630 * RTM_CHANGE to a different nexthop or nexthop group.
1631 * Free old multipath group.
1633 nhop_free_any(di->rc.rc_nh_old);
1641 * Iterates over a routing table specified by @fibnum and @family and
1642 * deletes elements marked by @filter_f.
1643 * @fibnum: rtable id
1644 * @family: AF_ address family
1645 * @filter_f: function returning non-zero value for items to delete
1646 * @arg: data to pass to the @filter_f function
1647 * @report: true if rtsock notification is needed.
1650 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg,
1653 struct rib_head *rnh;
1655 struct nhop_object *nh;
1656 struct epoch_tracker et;
1658 rnh = rt_tables_get_rnh(fibnum, family);
1662 struct rt_delinfo di = {
1664 .filter_f = filter_f,
1665 .filter_arg = filter_arg,
1666 .prio = NH_PRIORITY_NORMAL,
1669 NET_EPOCH_ENTER(et);
1672 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
1675 /* We might have something to reclaim. */
1676 bzero(&di.rc, sizeof(di.rc));
1677 di.rc.rc_cmd = RTM_DELETE;
1678 while (di.head != NULL) {
1680 di.head = rt->rt_chain;
1681 rt->rt_chain = NULL;
1685 di.rc.rc_nh_old = nh;
1686 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
1690 struct nhgrp_object *nhg;
1691 const struct weightened_nhop *wn;
1693 if (NH_IS_NHGRP(nh)) {
1694 nhg = (struct nhgrp_object *)nh;
1695 wn = nhgrp_get_nhops(nhg, &num_nhops);
1696 for (int i = 0; i < num_nhops; i++)
1697 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum);
1700 rt_routemsg(RTM_DELETE, rt, nh, fibnum);
1709 rt_delete_unconditional(struct radix_node *rn, void *arg)
1711 struct rtentry *rt = RNTORT(rn);
1712 struct rib_head *rnh = (struct rib_head *)arg;
1714 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head);
1715 if (RNTORT(rn) == rt)
1722 * Removes all routes from the routing table without executing notifications.
1723 * rtentres will be removed after the end of a current epoch.
1726 rib_flush_routes(struct rib_head *rnh)
1729 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh);
1734 rib_flush_routes_family(int family)
1736 struct rib_head *rnh;
1738 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1739 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
1740 rib_flush_routes(rnh);
1745 rib_print_family(int family)
1759 rib_notify(struct rib_head *rnh, enum rib_subscription_type type,
1760 struct rib_cmd_info *rc)
1762 struct rib_subscription *rs;
1764 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) {
1765 if (rs->type == type)
1766 rs->func(rnh, rc, rs->arg);
1770 static struct rib_subscription *
1771 allocate_subscription(rib_subscription_cb_t *f, void *arg,
1772 enum rib_subscription_type type, bool waitok)
1774 struct rib_subscription *rs;
1775 int flags = M_ZERO | (waitok ? M_WAITOK : M_NOWAIT);
1777 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags);
1789 * Subscribe for the changes in the routing table specified by @fibnum and
1792 * Returns pointer to the subscription structure on success.
1794 struct rib_subscription *
1795 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg,
1796 enum rib_subscription_type type, bool waitok)
1798 struct rib_head *rnh;
1799 struct epoch_tracker et;
1801 NET_EPOCH_ENTER(et);
1802 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__));
1803 rnh = rt_tables_get_rnh(fibnum, family);
1806 return (rib_subscribe_internal(rnh, f, arg, type, waitok));
1809 struct rib_subscription *
1810 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg,
1811 enum rib_subscription_type type, bool waitok)
1813 struct rib_subscription *rs;
1814 struct epoch_tracker et;
1816 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL)
1820 NET_EPOCH_ENTER(et);
1822 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next);
1829 struct rib_subscription *
1830 rib_subscribe_locked(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg,
1831 enum rib_subscription_type type)
1833 struct rib_subscription *rs;
1836 RIB_WLOCK_ASSERT(rnh);
1838 if ((rs = allocate_subscription(f, arg, type, false)) == NULL)
1842 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next);
1848 * Remove rtable subscription @rs from the routing table.
1849 * Needs to be run in network epoch.
1852 rib_unsubscribe(struct rib_subscription *rs)
1854 struct rib_head *rnh = rs->rnh;
1859 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next);
1862 epoch_call(net_epoch_preempt, destroy_subscription_epoch,
1867 rib_unsubscribe_locked(struct rib_subscription *rs)
1869 struct rib_head *rnh = rs->rnh;
1872 RIB_WLOCK_ASSERT(rnh);
1874 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next);
1876 epoch_call(net_epoch_preempt, destroy_subscription_epoch,
1881 * Epoch callback indicating subscription is safe to destroy
1884 destroy_subscription_epoch(epoch_context_t ctx)
1886 struct rib_subscription *rs;
1888 rs = __containerof(ctx, struct rib_subscription, epoch_ctx);
1894 rib_init_subscriptions(struct rib_head *rnh)
1897 CK_STAILQ_INIT(&rnh->rnh_subscribers);
1901 rib_destroy_subscriptions(struct rib_head *rnh)
1903 struct rib_subscription *rs;
1904 struct epoch_tracker et;
1906 NET_EPOCH_ENTER(et);
1908 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) {
1909 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next);
1910 epoch_call(net_epoch_preempt, destroy_subscription_epoch,