2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2020 Alexander V. Chernikov
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include "opt_inet6.h"
32 #include "opt_route.h"
34 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
39 #include <sys/socket.h>
40 #include <sys/sysctl.h>
41 #include <sys/syslog.h>
42 #include <sys/sysproto.h>
44 #include <sys/domain.h>
45 #include <sys/kernel.h>
47 #include <sys/rmlock.h>
50 #include <net/if_var.h>
51 #include <net/if_dl.h>
52 #include <net/route.h>
53 #include <net/route/route_ctl.h>
54 #include <net/route/route_var.h>
55 #include <net/route/nhop_utils.h>
56 #include <net/route/nhop.h>
57 #include <net/route/nhop_var.h>
59 #include <netinet/in_fib.h>
62 #include <netinet6/in6_fib.h>
67 * RIB helper functions.
71 * Calls @wa_f with @arg for each entry in the table specified by
74 * Table is traversed under read lock.
77 rib_walk(int af, u_int fibnum, rt_walktree_f_t *wa_f, void *arg)
82 if ((rnh = rt_tables_get_rnh(fibnum, af)) == NULL)
86 rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f, arg);
91 * Wrapper for the control plane functions for performing af-agnostic
93 * @fibnum: fib to perform the lookup.
94 * @dst: sockaddr with family and addr filled in. IPv6 addresses needs to be in
96 * @flags: fib(9) flags.
97 * @flowid: flow id for path selection in multipath use case.
99 * Returns nhop_object or NULL.
101 * Requires NET_EPOCH.
105 rib_lookup(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags,
108 struct nhop_object *nh;
112 switch (dst->sa_family) {
116 const struct sockaddr_in *a = (const struct sockaddr_in *)dst;
117 nh = fib4_lookup(fibnum, a->sin_addr, 0, flags, flowid);
124 const struct sockaddr_in6 *a = (const struct sockaddr_in6*)dst;
125 nh = fib6_lookup(fibnum, &a->sin6_addr, a->sin6_scope_id,
137 decompose_change_notification(struct rib_cmd_info *rc, route_notification_t *cb,
140 uint32_t num_old, num_new;
141 uint32_t nh_idx_old, nh_idx_new;
142 struct weightened_nhop *wn_old, *wn_new;
143 struct weightened_nhop tmp = { NULL, 0 };
144 uint32_t idx_old = 0, idx_new = 0;
146 struct rib_cmd_info rc_del = { .rc_cmd = RTM_DELETE, .rc_rt = rc->rc_rt };
147 struct rib_cmd_info rc_add = { .rc_cmd = RTM_ADD, .rc_rt = rc->rc_rt };
149 if (NH_IS_NHGRP(rc->rc_nh_old)) {
150 wn_old = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_old);
152 tmp.nh = rc->rc_nh_old;
153 tmp.weight = rc->rc_nh_weight;
157 if (NH_IS_NHGRP(rc->rc_nh_new)) {
158 wn_new = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_new);
160 tmp.nh = rc->rc_nh_new;
161 tmp.weight = rc->rc_nh_weight;
166 /* Use the fact that each @wn array is sorted */
168 * Want to convert into set of add and delete operations
169 * [1] -> [1, 2] = A{2}
170 * [2] -> [1, 2] = A{1}
171 * [1, 2, 4]->[1, 3, 4] = A{2}, D{3}
172 * [1, 2, 4]->[1, 4] = D{2}
173 * [1, 2, 4] -> [3, 4] = D{1}, C{2,3} OR C{1,3}, D{2} OR D{1},D{2},A{3}
178 while ((idx_old < num_old) && (idx_new < num_new)) {
179 nh_idx_old = wn_old[idx_old].nh->nh_priv->nh_idx;
180 nh_idx_new = wn_new[idx_new].nh->nh_priv->nh_idx;
182 if (nh_idx_old == nh_idx_new) {
183 if (wn_old[idx_old].weight != wn_new[idx_new].weight) {
184 /* Update weight by providing del/add notifications */
185 rc_del.rc_nh_old = wn_old[idx_old].nh;
186 rc_del.rc_nh_weight = wn_old[idx_old].weight;
189 rc_add.rc_nh_new = wn_new[idx_new].nh;
190 rc_add.rc_nh_weight = wn_new[idx_new].weight;
195 } else if (nh_idx_old < nh_idx_new) {
197 * [1, ~2~, 4], [1, ~3~, 4]
198 * [1, ~2~, 5], [1, ~3~, 4]
199 * [1, ~2~], [1, ~3~, 4]
201 if ((idx_old + 1 >= num_old) ||
202 (wn_old[idx_old + 1].nh->nh_priv->nh_idx > nh_idx_new)) {
203 /* Add new unless the next old item is still <= new */
204 rc_add.rc_nh_new = wn_new[idx_new].nh;
205 rc_add.rc_nh_weight = wn_new[idx_new].weight;
209 /* In any case, delete current old */
210 rc_del.rc_nh_old = wn_old[idx_old].nh;
211 rc_del.rc_nh_weight = wn_old[idx_old].weight;
216 * nh_idx_old > nh_idx_new
218 * [1, ~3~, 4], [1, ~2~, 4]
219 * [1, ~3~, 5], [1, ~2~, 4]
220 * [1, ~3~, 4], [1, ~2~]
222 if ((idx_new + 1 >= num_new) ||
223 (wn_new[idx_new + 1].nh->nh_priv->nh_idx > nh_idx_old)) {
224 /* No next item or next item is > current one */
225 rc_add.rc_nh_new = wn_new[idx_new].nh;
226 rc_add.rc_nh_weight = wn_new[idx_new].weight;
230 /* In any case, delete current old */
231 rc_del.rc_nh_old = wn_old[idx_old].nh;
232 rc_del.rc_nh_weight = wn_old[idx_old].weight;
238 while (idx_old < num_old) {
239 rc_del.rc_nh_old = wn_old[idx_old].nh;
240 rc_del.rc_nh_weight = wn_old[idx_old].weight;
245 while (idx_new < num_new) {
246 rc_add.rc_nh_new = wn_new[idx_new].nh;
247 rc_add.rc_nh_weight = wn_new[idx_new].weight;
254 * Decompose multipath cmd info @rc into a list of add/del/change
255 * single-path operations, calling @cb callback for each operation.
256 * Assumes at least one of the nexthops in @rc is multipath.
259 rib_decompose_notification(struct rib_cmd_info *rc, route_notification_t *cb,
262 struct weightened_nhop *wn;
264 struct rib_cmd_info rc_new;
267 DPRINTF("cb=%p cmd=%d nh_old=%p nh_new=%p",
268 cb, rc->cmd, rc->nh_old, rc->nh_new);
269 switch (rc->rc_cmd) {
271 if (!NH_IS_NHGRP(rc->rc_nh_new))
273 wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_nhops);
274 for (uint32_t i = 0; i < num_nhops; i++) {
275 rc_new.rc_nh_new = wn[i].nh;
276 rc_new.rc_nh_weight = wn[i].weight;
281 if (!NH_IS_NHGRP(rc->rc_nh_old))
283 wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_nhops);
284 for (uint32_t i = 0; i < num_nhops; i++) {
285 rc_new.rc_nh_old = wn[i].nh;
286 rc_new.rc_nh_weight = wn[i].weight;
291 if (!NH_IS_NHGRP(rc->rc_nh_old) && !NH_IS_NHGRP(rc->rc_nh_new))
293 decompose_change_notification(rc, cb, cbdata);