2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2020 Alexander V. Chernikov
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include "opt_route.h"
33 #include <sys/cdefs.h>
34 #include <sys/param.h>
35 #include <sys/systm.h>
37 #include <sys/rmlock.h>
38 #include <sys/rwlock.h>
39 #include <sys/malloc.h>
41 #include <sys/refcount.h>
42 #include <sys/socket.h>
43 #include <sys/sysctl.h>
44 #include <sys/kernel.h>
47 #include <net/if_var.h>
48 #include <net/if_dl.h>
49 #include <net/route.h>
50 #include <net/route/route_ctl.h>
51 #include <net/route/route_var.h>
54 #include <netinet/in.h>
55 #include <netinet/in_var.h>
56 #include <netinet/in_fib.h>
58 #include <net/route/nhop_utils.h>
59 #include <net/route/nhop.h>
60 #include <net/route/nhop_var.h>
61 #include <net/route/nhgrp_var.h>
64 * This file contains data structures management logic for the nexthop
65 * groups ("nhgrp") route subsystem.
67 * Nexthop groups are used to store multiple routes available for the specific
68 * prefix. Nexthop groups are immutable and can be shared across multiple
71 * Each group consists of a control plane part and a dataplane part.
72 * Control plane is basically a collection of nexthop objects with
73 * weights and refcount.
75 * Datapath consists of a array of nexthop pointers, compiled from control
76 * plane data to support O(1) nexthop selection.
78 * For example, consider the following group:
79 * [(nh1, weight=100), (nh2, weight=200)]
80 * It will compile to the following array:
85 static void consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets,
86 uint32_t new_idx_items);
88 static int cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b);
89 static unsigned int hash_nhgrp(const struct nhgrp_priv *obj);
92 djb_hash(const unsigned char *h, const int len)
94 unsigned int result = 0;
97 for (i = 0; i < len; i++)
98 result = 33 * result ^ h[i];
104 cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b)
108 * In case of consistent hashing, there can be multiple nexthop groups
109 * with the same "control plane" list of nexthops with weights and a
110 * different set of "data plane" nexthops.
111 * For now, ignore the data plane and focus on the control plane list.
113 if (a->nhg_nh_count != b->nhg_nh_count)
115 return !memcmp(a->nhg_nh_weights, b->nhg_nh_weights,
116 sizeof(struct weightened_nhop) * a->nhg_nh_count);
120 * Hash callback: calculate hash of an object
123 hash_nhgrp(const struct nhgrp_priv *obj)
125 const unsigned char *key;
127 key = (const unsigned char *)obj->nhg_nh_weights;
129 return (djb_hash(key, sizeof(struct weightened_nhop) * obj->nhg_nh_count));
133 * Returns object referenced and unlocked
136 find_nhgrp(struct nh_control *ctl, const struct nhgrp_priv *key)
138 struct nhgrp_priv *priv_ret;
141 CHT_SLIST_FIND_BYOBJ(&ctl->gr_head, mpath, key, priv_ret);
142 if (priv_ret != NULL) {
143 if (refcount_acquire_if_not_zero(&priv_ret->nhg_refcount) == 0) {
144 /* refcount is 0 -> group is being deleted */
154 link_nhgrp(struct nh_control *ctl, struct nhgrp_priv *grp_priv)
157 uint32_t new_num_buckets, new_num_items;
160 /* Check if we need to resize hash and index */
161 new_num_buckets = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->gr_head);
162 new_num_items = bitmask_get_resize_items(&ctl->gr_idx_head);
164 if (bitmask_alloc_idx(&ctl->gr_idx_head, &idx) != 0) {
166 DPRINTF("Unable to allocate mpath index");
167 consider_resize(ctl, new_num_buckets, new_num_items);
171 grp_priv->nhg_idx = idx;
172 grp_priv->nh_control = ctl;
173 CHT_SLIST_INSERT_HEAD(&ctl->gr_head, mpath, grp_priv);
177 consider_resize(ctl, new_num_buckets, new_num_items);
183 unlink_nhgrp(struct nh_control *ctl, struct nhgrp_priv *key)
185 struct nhgrp_priv *nhg_priv_ret;
190 CHT_SLIST_REMOVE_BYOBJ(&ctl->gr_head, mpath, key, nhg_priv_ret);
192 if (nhg_priv_ret == NULL) {
193 DPRINTF("Unable to find nhop group!");
198 idx = nhg_priv_ret->nhg_idx;
199 ret = bitmask_free_idx(&ctl->gr_idx_head, idx);
200 nhg_priv_ret->nhg_idx = 0;
201 nhg_priv_ret->nh_control = NULL;
205 return (nhg_priv_ret);
209 * Checks if hash needs resizing and performs this resize if necessary
212 __noinline static void
213 consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets, uint32_t new_idx_items)
215 void *nh_ptr, *nh_idx_ptr;
220 if (new_nh_buckets != 0) {
221 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_nh_buckets);
222 nh_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
226 if (new_idx_items != 0) {
227 alloc_size = bitmask_get_size(new_idx_items);
228 nh_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
231 if (nh_ptr == NULL && nh_idx_ptr == NULL) {
232 /* Either resize is not required or allocations have failed. */
236 DPRINTF("mp: going to resize: nh:[ptr:%p sz:%u] idx:[ptr:%p sz:%u]",
237 nh_ptr, new_nh_buckets, nh_idx_ptr, new_idx_items);
242 if (nh_ptr != NULL) {
243 CHT_SLIST_RESIZE(&ctl->gr_head, mpath, nh_ptr, new_nh_buckets);
245 if (nh_idx_ptr != NULL) {
246 if (bitmask_copy(&ctl->gr_idx_head, nh_idx_ptr, new_idx_items))
247 bitmask_swap(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items, &old_idx_ptr);
252 free(nh_ptr, M_NHOP);
253 if (old_idx_ptr != NULL)
254 free(old_idx_ptr, M_NHOP);
258 * Function allocating the necessary group data structures.
261 nhgrp_ctl_alloc_default(struct nh_control *ctl, int malloc_flags)
264 uint32_t num_buckets, num_items;
265 void *cht_ptr, *mask_ptr;
267 malloc_flags = (malloc_flags & (M_NOWAIT | M_WAITOK)) | M_ZERO;
270 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
271 cht_ptr = malloc(alloc_size, M_NHOP, malloc_flags);
273 if (cht_ptr == NULL) {
274 DPRINTF("mpath init failed");
279 * Allocate nexthop index bitmask.
282 mask_ptr = malloc(bitmask_get_size(num_items), M_NHOP, malloc_flags);
283 if (mask_ptr == NULL) {
284 DPRINTF("mpath bitmask init failed");
285 free(cht_ptr, M_NHOP);
291 if (ctl->gr_head.hash_size == 0) {
292 /* Init hash and bitmask */
293 CHT_SLIST_INIT(&ctl->gr_head, cht_ptr, num_buckets);
294 bitmask_init(&ctl->gr_idx_head, mask_ptr, num_items);
297 /* Other thread has already initiliazed hash/bitmask */
299 free(cht_ptr, M_NHOP);
300 free(mask_ptr, M_NHOP);
303 DPRINTF("mpath init done for fib/af %d/%d", ctl->rh->rib_fibnum,
304 ctl->rh->rib_family);
310 nhgrp_ctl_init(struct nh_control *ctl)
314 * By default, do not allocate datastructures as multipath
315 * routes will not be necessarily used.
317 CHT_SLIST_INIT(&ctl->gr_head, NULL, 0);
318 bitmask_init(&ctl->gr_idx_head, NULL, 0);
323 nhgrp_ctl_free(struct nh_control *ctl)
326 if (ctl->gr_head.ptr != NULL)
327 free(ctl->gr_head.ptr, M_NHOP);
328 if (ctl->gr_idx_head.idx != NULL)
329 free(ctl->gr_idx_head.idx, M_NHOP);
333 nhgrp_ctl_unlink_all(struct nh_control *ctl)
335 struct nhgrp_priv *nhg_priv;
337 NHOPS_WLOCK_ASSERT(ctl);
339 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) {
340 DPRINTF("Marking nhgrp %u unlinked", nhg_priv->nhg_idx);
341 refcount_release(&nhg_priv->nhg_linked);
342 } CHT_SLIST_FOREACH_END;