2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include "opt_inet6.h"
32 #include "opt_route.h"
33 #include <sys/types.h>
35 #include <sys/epoch.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/rmlock.h>
39 #include <sys/socket.h>
42 #include <net/route.h>
43 #include <net/route/nhop.h>
44 #include <net/route/nhop_utils.h>
46 #include <net/route/route_ctl.h>
47 #include <net/route/route_var.h>
48 #include <netinet6/scope6_var.h>
49 #include <netlink/netlink.h>
50 #include <netlink/netlink_ctl.h>
51 #include <netlink/netlink_route.h>
52 #include <netlink/route/route_var.h>
54 #define DEBUG_MOD_NAME nl_nhop
55 #define DEBUG_MAX_LEVEL LOG_DEBUG3
56 #include <netlink/netlink_debug.h>
57 _DECLARE_DEBUG(LOG_DEBUG3);
60 * This file contains the logic to maintain kernel nexthops and
61 * nexhop groups based om the data provided by the user.
63 * Kernel stores (nearly) all of the routing data in the nexthops,
64 * including the prefix-specific flags (NHF_HOST and NHF_DEFAULT).
66 * Netlink API provides higher-level abstraction for the user. Each
67 * user-created nexthop may map to multiple kernel nexthops.
69 * The following variations require separate kernel nexthop to be
71 * * prefix flags (NHF_HOST, NHF_DEFAULT)
72 * * using IPv6 gateway for IPv4 routes
75 * These kernel nexthops have the lifetime bound to the lifetime of
76 * the user_nhop object. They are not collected until user requests
77 * to delete the created user_nhop.
81 uint32_t un_idx; /* Userland-provided index */
82 uint32_t un_fibfam; /* fibnum+af(as highest byte) */
83 uint8_t un_protocol; /* protocol that install the record */
84 struct nhop_object *un_nhop; /* "production" nexthop */
85 struct nhop_object *un_nhop_src; /* nexthop to copy from */
86 struct weightened_nhop *un_nhgrp_src; /* nexthops for nhg */
87 uint32_t un_nhgrp_count; /* number of nexthops */
88 struct user_nhop *un_next; /* next item in hash chain */
89 struct user_nhop *un_nextchild; /* master -> children */
90 struct epoch_context un_epoch_ctx; /* epoch ctl helper */
93 /* produce hash value for an object */
94 #define unhop_hash_obj(_obj) (hash_unhop(_obj))
95 /* compare two objects */
96 #define unhop_cmp(_one, _two) (cmp_unhop(_one, _two))
97 /* next object accessor */
98 #define unhop_next(_obj) (_obj)->un_next
100 CHT_SLIST_DEFINE(unhop, struct user_nhop);
103 struct unhop_head un_head;
104 struct rmlock un_lock;
106 #define UN_LOCK_INIT(_ctl) rm_init(&(_ctl)->un_lock, "unhop_ctl")
107 #define UN_TRACKER struct rm_priotracker un_tracker
108 #define UN_RLOCK(_ctl) rm_rlock(&((_ctl)->un_lock), &un_tracker)
109 #define UN_RUNLOCK(_ctl) rm_runlock(&((_ctl)->un_lock), &un_tracker)
111 #define UN_WLOCK(_ctl) rm_wlock(&(_ctl)->un_lock);
112 #define UN_WUNLOCK(_ctl) rm_wunlock(&(_ctl)->un_lock);
114 VNET_DEFINE_STATIC(struct unhop_ctl *, un_ctl) = NULL;
115 #define V_un_ctl VNET(un_ctl)
117 static void consider_resize(struct unhop_ctl *ctl, uint32_t new_size);
118 static int cmp_unhop(const struct user_nhop *a, const struct user_nhop *b);
119 static unsigned int hash_unhop(const struct user_nhop *obj);
121 static void destroy_unhop(struct user_nhop *unhop);
122 static struct nhop_object *clone_unhop(const struct user_nhop *unhop,
123 uint32_t fibnum, int family, int nh_flags);
126 cmp_unhop(const struct user_nhop *a, const struct user_nhop *b)
128 return (a->un_idx == b->un_idx && a->un_fibfam == b->un_fibfam);
132 * Hash callback: calculate hash of an object
135 hash_unhop(const struct user_nhop *obj)
137 return (obj->un_idx ^ obj->un_fibfam);
140 #define UNHOP_IS_MASTER(_unhop) ((_unhop)->un_fibfam == 0)
143 * Factory interface for creating matching kernel nexthops/nexthop groups
145 * @uidx: userland nexhop index used to create the nexthop
146 * @fibnum: fibnum nexthop will be used in
147 * @family: upper family nexthop will be used in
148 * @nh_flags: desired nexthop prefix flags
149 * @perror: pointer to store error to
151 * Returns referenced nexthop linked to @fibnum/@family rib on success.
154 nl_find_nhop(uint32_t fibnum, int family, uint32_t uidx,
155 int nh_flags, int *perror)
157 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl);
160 if (__predict_false(ctl == NULL))
163 struct user_nhop key= {
165 .un_fibfam = fibnum | ((uint32_t)family) << 24,
167 struct user_nhop *unhop;
169 nh_flags = nh_flags & (NHF_HOST | NHF_DEFAULT);
171 if (__predict_false(family == 0))
175 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
177 struct nhop_object *nh = unhop->un_nhop;
185 * Exact nexthop not found. Search for template nexthop to clone from.
188 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
197 /* Create entry to insert first */
198 struct user_nhop *un_new, *un_tmp;
199 un_new = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO);
200 if (un_new == NULL) {
204 un_new->un_idx = uidx;
205 un_new->un_fibfam = fibnum | ((uint32_t)family) << 24;
207 /* Relying on epoch to protect unhop here */
208 un_new->un_nhop = clone_unhop(unhop, fibnum, family, nh_flags);
209 if (un_new->un_nhop == NULL) {
210 free(un_new, M_NETLINK);
215 /* Insert back and report */
218 /* First, find template record once again */
219 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
221 /* Someone deleted the nexthop during the call */
224 destroy_unhop(un_new);
228 /* Second, check the direct match */
229 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, un_new, un_tmp);
230 struct nhop_object *nh;
231 if (un_tmp != NULL) {
232 /* Another thread already created the desired nextop, use it */
233 nh = un_tmp->un_nhop;
235 /* Finally, insert the new nexthop and link it to the primary */
236 nh = un_new->un_nhop;
237 CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, un_new);
238 un_new->un_nextchild = unhop->un_nextchild;
239 unhop->un_nextchild = un_new;
241 NL_LOG(LOG_DEBUG2, "linked cloned nexthop %p", nh);
247 destroy_unhop(un_new);
254 static struct user_nhop *
255 nl_find_base_unhop(struct unhop_ctl *ctl, uint32_t uidx)
257 struct user_nhop key= { .un_idx = uidx };
258 struct user_nhop *unhop = NULL;
262 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
268 #define MAX_STACK_NHOPS 4
269 static struct nhop_object *
270 clone_unhop(const struct user_nhop *unhop, uint32_t fibnum, int family, int nh_flags)
273 const struct weightened_nhop *wn;
274 struct weightened_nhop *wn_new, wn_base[MAX_STACK_NHOPS];
277 struct nhop_object *nh = NULL;
280 if (unhop->un_nhop_src != NULL) {
281 IF_DEBUG_LEVEL(LOG_DEBUG2) {
282 char nhbuf[NHOP_PRINT_BUFSIZE];
283 nhop_print_buf_any(unhop->un_nhop_src, nhbuf, sizeof(nhbuf));
284 FIB_NH_LOG(LOG_DEBUG2, unhop->un_nhop_src,
285 "cloning nhop %s -> %u.%u flags 0x%X", nhbuf, fibnum,
288 struct nhop_object *nh;
289 nh = nhop_alloc(fibnum, AF_UNSPEC);
292 nhop_copy(nh, unhop->un_nhop_src);
293 /* Check that nexthop gateway is compatible with the new family */
294 if (!nhop_set_upper_family(nh, family)) {
298 nhop_set_uidx(nh, unhop->un_idx);
299 nhop_set_pxtype_flag(nh, nh_flags);
300 return (nhop_get_nhop(nh, &error));
303 wn = unhop->un_nhgrp_src;
304 num_nhops = unhop->un_nhgrp_count;
306 if (num_nhops > MAX_STACK_NHOPS) {
307 wn_new = malloc(num_nhops * sizeof(struct weightened_nhop), M_TEMP, M_NOWAIT);
313 for (int i = 0; i < num_nhops; i++) {
314 uint32_t uidx = nhop_get_uidx(wn[i].nh);
316 wn_new[i].nh = nl_find_nhop(fibnum, family, uidx, nh_flags, &error);
319 wn_new[i].weight = wn[i].weight;
323 struct rib_head *rh = nhop_get_rh(wn_new[0].nh);
324 struct nhgrp_object *nhg;
326 error = nhgrp_get_group(rh, wn_new, num_nhops, unhop->un_idx, &nhg);
327 nh = (struct nhop_object *)nhg;
330 if (wn_new != wn_base)
331 free(wn_new, M_TEMP);
337 destroy_unhop(struct user_nhop *unhop)
339 if (unhop->un_nhop != NULL)
340 nhop_free_any(unhop->un_nhop);
341 if (unhop->un_nhop_src != NULL)
342 nhop_free_any(unhop->un_nhop_src);
343 free(unhop, M_NETLINK);
347 destroy_unhop_epoch(epoch_context_t ctx)
349 struct user_nhop *unhop;
351 unhop = __containerof(ctx, struct user_nhop, un_epoch_ctx);
353 destroy_unhop(unhop);
357 find_spare_uidx(struct unhop_ctl *ctl)
359 struct user_nhop *unhop, key = {};
364 /* This should return spare uid with 75% of 65k used in ~99/100 cases */
365 for (int i = 0; i < 16; i++) {
366 key.un_idx = (arc4random() % 65536) + 65536 * 4;
367 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
380 * Actual netlink code
382 struct netlink_walkargs {
383 struct nl_writer *nw;
391 #define ENOMEM_IF_NULL(_v) if ((_v) == NULL) goto enomem
394 dump_nhgrp(const struct user_nhop *unhop, struct nlmsghdr *hdr,
395 struct nl_writer *nw)
398 if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg)))
401 struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg);
402 nhm->nh_family = AF_UNSPEC;
404 nhm->nh_protocol = unhop->un_protocol;
407 nlattr_add_u32(nw, NHA_ID, unhop->un_idx);
408 nlattr_add_u16(nw, NHA_GROUP_TYPE, NEXTHOP_GRP_TYPE_MPATH);
410 struct weightened_nhop *wn = unhop->un_nhgrp_src;
411 uint32_t num_nhops = unhop->un_nhgrp_count;
412 /* TODO: a better API? */
413 int nla_len = sizeof(struct nlattr);
414 nla_len += NETLINK_ALIGN(num_nhops * sizeof(struct nexthop_grp));
415 struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
418 nla->nla_type = NHA_GROUP;
419 nla->nla_len = nla_len;
420 for (int i = 0; i < num_nhops; i++) {
421 struct nexthop_grp *grp = &((struct nexthop_grp *)(nla + 1))[i];
422 grp->id = nhop_get_uidx(wn[i].nh);
423 grp->weight = wn[i].weight;
431 NL_LOG(LOG_DEBUG, "error: unable to allocate attribute memory");
437 dump_nhop(const struct user_nhop *unhop, struct nlmsghdr *hdr,
438 struct nl_writer *nw)
440 struct nhop_object *nh = unhop->un_nhop_src;
442 if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg)))
445 struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg);
447 nhm->nh_family = nhop_get_neigh_family(nh);
448 nhm->nh_scope = 0; // XXX: what's that?
449 nhm->nh_protocol = unhop->un_protocol;
452 nlattr_add_u32(nw, NHA_ID, unhop->un_idx);
453 if (nh->nh_flags & NHF_BLACKHOLE) {
454 nlattr_add_flag(nw, NHA_BLACKHOLE);
457 nlattr_add_u32(nw, NHA_OIF, nh->nh_ifp->if_index);
459 switch (nh->gw_sa.sa_family) {
462 nlattr_add(nw, NHA_GATEWAY, 4, &nh->gw4_sa.sin_addr);
468 struct in6_addr addr = nh->gw6_sa.sin6_addr;
469 in6_clearscope(&addr);
470 nlattr_add(nw, NHA_GATEWAY, 16, &addr);
485 dump_unhop(const struct user_nhop *unhop, struct nlmsghdr *hdr,
486 struct nl_writer *nw)
488 if (unhop->un_nhop_src != NULL)
489 dump_nhop(unhop, hdr, nw);
491 dump_nhgrp(unhop, hdr, nw);
495 delete_unhop(struct unhop_ctl *ctl, struct nlmsghdr *hdr, uint32_t uidx)
497 struct user_nhop *unhop_ret, *unhop_base, *unhop_chain;
499 struct user_nhop key = { .un_idx = uidx };
503 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop_base);
505 if (unhop_base != NULL) {
506 CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_base, unhop_ret);
507 IF_DEBUG_LEVEL(LOG_DEBUG2) {
508 char nhbuf[NHOP_PRINT_BUFSIZE];
509 nhop_print_buf_any(unhop_base->un_nhop, nhbuf, sizeof(nhbuf));
510 FIB_NH_LOG(LOG_DEBUG3, unhop_base->un_nhop,
511 "removed base nhop %u: %s", uidx, nhbuf);
513 /* Unlink all child nexhops as well, keeping the chain intact */
514 unhop_chain = unhop_base->un_nextchild;
515 while (unhop_chain != NULL) {
516 CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_chain,
518 MPASS(unhop_chain == unhop_ret);
519 IF_DEBUG_LEVEL(LOG_DEBUG3) {
520 char nhbuf[NHOP_PRINT_BUFSIZE];
521 nhop_print_buf_any(unhop_chain->un_nhop,
522 nhbuf, sizeof(nhbuf));
523 FIB_NH_LOG(LOG_DEBUG3, unhop_chain->un_nhop,
524 "removed child nhop %u: %s", uidx, nhbuf);
526 unhop_chain = unhop_chain->un_nextchild;
532 if (unhop_base == NULL) {
533 NL_LOG(LOG_DEBUG, "unable to find unhop %u", uidx);
537 /* Report nexthop deletion */
538 struct netlink_walkargs wa = {
539 .hdr.nlmsg_pid = hdr->nlmsg_pid,
540 .hdr.nlmsg_seq = hdr->nlmsg_seq,
541 .hdr.nlmsg_flags = hdr->nlmsg_flags,
542 .hdr.nlmsg_type = NL_RTM_DELNEXTHOP,
545 struct nl_writer nw = {};
546 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) {
547 NL_LOG(LOG_DEBUG, "error allocating message writer");
551 dump_unhop(unhop_base, &wa.hdr, &nw);
554 while (unhop_base != NULL) {
555 unhop_chain = unhop_base->un_nextchild;
556 NET_EPOCH_CALL(destroy_unhop_epoch, &unhop_base->un_epoch_ctx);
557 unhop_base = unhop_chain;
564 consider_resize(struct unhop_ctl *ctl, uint32_t new_size)
566 void *new_ptr = NULL;
573 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_size);
574 new_ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO);
579 NL_LOG(LOG_DEBUG, "resizing hash: %u -> %u", ctl->un_head.hash_size, new_size);
581 if (new_ptr != NULL) {
582 CHT_SLIST_RESIZE(&ctl->un_head, unhop, new_ptr, new_size);
588 free(new_ptr, M_NETLINK);
591 static bool __noinline
592 vnet_init_unhops(void)
594 uint32_t num_buckets = 16;
595 size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
597 struct unhop_ctl *ctl = malloc(sizeof(struct unhop_ctl), M_NETLINK,
602 void *ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO);
604 free(ctl, M_NETLINK);
607 CHT_SLIST_INIT(&ctl->un_head, ptr, num_buckets);
610 if (!atomic_cmpset_ptr((uintptr_t *)&V_un_ctl, (uintptr_t)NULL, (uintptr_t)ctl)) {
611 free(ptr, M_NETLINK);
612 free(ctl, M_NETLINK);
615 if (atomic_load_ptr(&V_un_ctl) == NULL)
618 NL_LOG(LOG_NOTICE, "UNHOPS init done");
624 vnet_destroy_unhops(const void *unused __unused)
626 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl);
627 struct user_nhop *unhop, *tmp;
633 /* Wait till all unhop users finish their reads */
637 CHT_SLIST_FOREACH_SAFE(&ctl->un_head, unhop, unhop, tmp) {
638 destroy_unhop(unhop);
639 } CHT_SLIST_FOREACH_SAFE_END;
642 free(ctl->un_head.ptr, M_NETLINK);
643 free(ctl, M_NETLINK);
645 VNET_SYSUNINIT(vnet_destroy_unhops, SI_SUB_PROTO_IF, SI_ORDER_ANY,
646 vnet_destroy_unhops, NULL);
649 nlattr_get_nhg(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target)
653 /* Verify attribute correctness */
654 struct nexthop_grp *grp = NLA_DATA(nla);
655 int data_len = NLA_DATA_LEN(nla);
657 int count = data_len / sizeof(*grp);
658 if (count == 0 || (count * sizeof(*grp) != data_len)) {
659 NL_LOG(LOG_DEBUG, "Invalid length for RTA_GROUP: %d", data_len);
663 *((struct nlattr **)target) = nla;
667 struct nl_parsed_nhop {
669 uint8_t nha_blackhole;
671 struct ifnet *nha_oif;
672 struct sockaddr *nha_gw;
673 struct nlattr *nha_group;
678 #define _IN(_field) offsetof(struct nhmsg, _field)
679 #define _OUT(_field) offsetof(struct nl_parsed_nhop, _field)
680 static const struct nlfield_parser nlf_p_nh[] = {
681 { .off_in = _IN(nh_family), .off_out = _OUT(nh_family), .cb = nlf_get_u8 },
682 { .off_in = _IN(nh_protocol), .off_out = _OUT(nh_protocol), .cb = nlf_get_u8 },
685 static const struct nlattr_parser nla_p_nh[] = {
686 { .type = NHA_ID, .off = _OUT(nha_id), .cb = nlattr_get_uint32 },
687 { .type = NHA_GROUP, .off = _OUT(nha_group), .cb = nlattr_get_nhg },
688 { .type = NHA_BLACKHOLE, .off = _OUT(nha_blackhole), .cb = nlattr_get_flag },
689 { .type = NHA_OIF, .off = _OUT(nha_oif), .cb = nlattr_get_ifp },
690 { .type = NHA_GATEWAY, .off = _OUT(nha_gw), .cb = nlattr_get_ip },
691 { .type = NHA_GROUPS, .off = _OUT(nha_groups), .cb = nlattr_get_flag },
695 NL_DECLARE_PARSER(nhmsg_parser, struct nhmsg, nlf_p_nh, nla_p_nh);
698 eligible_nhg(const struct nhop_object *nh)
700 return (nh->nh_flags & NHF_GATEWAY);
704 newnhg(struct unhop_ctl *ctl, struct nl_parsed_nhop *attrs, struct user_nhop *unhop)
706 struct nexthop_grp *grp = NLA_DATA(attrs->nha_group);
707 int count = NLA_DATA_LEN(attrs->nha_group) / sizeof(*grp);
708 struct weightened_nhop *wn;
710 wn = malloc(sizeof(*wn) * count, M_NETLINK, M_NOWAIT | M_ZERO);
714 for (int i = 0; i < count; i++) {
715 struct user_nhop *unhop;
716 unhop = nl_find_base_unhop(ctl, grp[i].id);
718 NL_LOG(LOG_DEBUG, "unable to find uidx %u", grp[i].id);
721 } else if (unhop->un_nhop_src == NULL) {
722 NL_LOG(LOG_DEBUG, "uidx %u is a group, nested group unsupported",
726 } else if (!eligible_nhg(unhop->un_nhop_src)) {
727 NL_LOG(LOG_DEBUG, "uidx %u nhop is not mpath-eligible",
733 * TODO: consider more rigid eligibility checks:
734 * restrict nexthops with the same gateway
736 wn[i].nh = unhop->un_nhop_src;
737 wn[i].weight = grp[i].weight;
739 unhop->un_nhgrp_src = wn;
740 unhop->un_nhgrp_count = count;
745 newnhop(struct nl_parsed_nhop *attrs, struct user_nhop *unhop)
747 struct ifaddr *ifa = NULL;
748 struct nhop_object *nh;
751 if (!attrs->nha_blackhole) {
752 if (attrs->nha_gw == NULL) {
753 NL_LOG(LOG_DEBUG, "missing NHA_GATEWAY");
756 if (attrs->nha_oif == NULL) {
757 NL_LOG(LOG_DEBUG, "missing NHA_OIF");
761 ifa = ifaof_ifpforaddr(attrs->nha_gw, attrs->nha_oif);
763 NL_LOG(LOG_DEBUG, "Unable to determine default source IP");
768 int family = attrs->nha_gw != NULL ? attrs->nha_gw->sa_family : attrs->nh_family;
770 nh = nhop_alloc(RT_DEFAULT_FIB, family);
772 NL_LOG(LOG_DEBUG, "Unable to allocate nexthop");
775 nhop_set_uidx(nh, attrs->nha_id);
777 if (attrs->nha_blackhole)
778 nhop_set_blackhole(nh, NHF_BLACKHOLE);
780 nhop_set_gw(nh, attrs->nha_gw, true);
781 nhop_set_transmit_ifp(nh, attrs->nha_oif);
782 nhop_set_src(nh, ifa);
785 error = nhop_get_unlinked(nh);
787 NL_LOG(LOG_DEBUG, "unable to finalize nexthop");
791 IF_DEBUG_LEVEL(LOG_DEBUG2) {
792 char nhbuf[NHOP_PRINT_BUFSIZE];
793 nhop_print_buf(nh, nhbuf, sizeof(nhbuf));
794 NL_LOG(LOG_DEBUG2, "Adding unhop %u: %s", attrs->nha_id, nhbuf);
797 unhop->un_nhop_src = nh;
802 rtnl_handle_newnhop(struct nlmsghdr *hdr, struct nlpcb *nlp,
803 struct nl_pstate *npt)
805 struct user_nhop *unhop;
808 if ((__predict_false(V_un_ctl == NULL)) && (!vnet_init_unhops()))
810 struct unhop_ctl *ctl = V_un_ctl;
812 struct nl_parsed_nhop attrs = {};
813 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs);
818 * Get valid nha_id. Treat nha_id == 0 (auto-assignment) as a second-class
821 if (attrs.nha_id == 0) {
822 attrs.nha_id = find_spare_uidx(ctl);
823 if (attrs.nha_id == 0) {
824 NL_LOG(LOG_DEBUG, "Unable to get spare uidx");
829 NL_LOG(LOG_DEBUG, "IFINDEX %d", attrs.nha_oif ? attrs.nha_oif->if_index : 0);
831 unhop = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO);
833 NL_LOG(LOG_DEBUG, "Unable to allocate user_nhop");
836 unhop->un_idx = attrs.nha_id;
837 unhop->un_protocol = attrs.nh_protocol;
840 error = newnhg(ctl, &attrs, unhop);
842 error = newnhop(&attrs, unhop);
845 free(unhop, M_NETLINK);
850 /* Check if uidx already exists */
851 struct user_nhop *tmp = NULL;
852 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, unhop, tmp);
855 NL_LOG(LOG_DEBUG, "nhop idx %u already exists", attrs.nha_id);
856 destroy_unhop(unhop);
859 CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, unhop);
860 uint32_t num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->un_head);
863 /* Report addition of the next nexhop */
864 struct netlink_walkargs wa = {
865 .hdr.nlmsg_pid = hdr->nlmsg_pid,
866 .hdr.nlmsg_seq = hdr->nlmsg_seq,
867 .hdr.nlmsg_flags = hdr->nlmsg_flags,
868 .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP,
871 struct nl_writer nw = {};
872 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) {
873 NL_LOG(LOG_DEBUG, "error allocating message writer");
877 dump_unhop(unhop, &wa.hdr, &nw);
880 consider_resize(ctl, num_buckets_new);
886 rtnl_handle_delnhop(struct nlmsghdr *hdr, struct nlpcb *nlp,
887 struct nl_pstate *npt)
889 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl);
892 if (__predict_false(ctl == NULL))
895 struct nl_parsed_nhop attrs = {};
896 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs);
900 if (attrs.nha_id == 0) {
901 NL_LOG(LOG_DEBUG, "NHA_ID not set");
905 error = delete_unhop(ctl, hdr, attrs.nha_id);
911 match_unhop(const struct nl_parsed_nhop *attrs, struct user_nhop *unhop)
913 if (attrs->nha_id != 0 && unhop->un_idx != attrs->nha_id)
915 if (attrs->nha_groups != 0 && unhop->un_nhgrp_src == NULL)
917 if (attrs->nha_oif != NULL &&
918 (unhop->un_nhop_src == NULL || unhop->un_nhop_src->nh_ifp != attrs->nha_oif))
925 rtnl_handle_getnhop(struct nlmsghdr *hdr, struct nlpcb *nlp,
926 struct nl_pstate *npt)
928 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl);
929 struct user_nhop *unhop;
933 if (__predict_false(ctl == NULL))
936 struct nl_parsed_nhop attrs = {};
937 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs);
941 struct netlink_walkargs wa = {
943 .hdr.nlmsg_pid = hdr->nlmsg_pid,
944 .hdr.nlmsg_seq = hdr->nlmsg_seq,
945 .hdr.nlmsg_flags = hdr->nlmsg_flags,
946 .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP,
949 if (attrs.nha_id != 0) {
950 NL_LOG(LOG_DEBUG2, "searching for uidx %u", attrs.nha_id);
951 struct user_nhop key= { .un_idx = attrs.nha_id };
953 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
958 dump_unhop(unhop, &wa.hdr, wa.nw);
963 wa.hdr.nlmsg_flags |= NLM_F_MULTI;
964 CHT_SLIST_FOREACH(&ctl->un_head, unhop, unhop) {
965 if (UNHOP_IS_MASTER(unhop) && match_unhop(&attrs, unhop))
966 dump_unhop(unhop, &wa.hdr, wa.nw);
967 } CHT_SLIST_FOREACH_END;
971 if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr))
977 static const struct rtnl_cmd_handler cmd_handlers[] = {
979 .cmd = NL_RTM_NEWNEXTHOP,
980 .name = "RTM_NEWNEXTHOP",
981 .cb = &rtnl_handle_newnhop,
982 .priv = PRIV_NET_ROUTE,
985 .cmd = NL_RTM_DELNEXTHOP,
986 .name = "RTM_DELNEXTHOP",
987 .cb = &rtnl_handle_delnhop,
988 .priv = PRIV_NET_ROUTE,
991 .cmd = NL_RTM_GETNEXTHOP,
992 .name = "RTM_GETNEXTHOP",
993 .cb = &rtnl_handle_getnhop,
997 static const struct nlhdr_parser *all_parsers[] = { &nhmsg_parser };
1000 rtnl_nexthops_init(void)
1002 NL_VERIFY_PARSERS(all_parsers);
1003 rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));