From 9c25814dc86d0ec45e3c2b54b7c50ff16e10a375 Mon Sep 17 00:00:00 2001 From: melifaro Date: Fri, 28 Aug 2020 21:59:10 +0000 Subject: [PATCH] Further split nhop creation and rtable operations. As nexthops are immutable, some operations such as route attribute changes require nexthop fetching, forking, modification and route switching. These operations are not atomic, so they may need to be retried multiple times in presence of multiple speakers changing the same route. This change introduces "synchronisation" primitive: route_update_conditional(), simplifying logic for route changes and upcoming multipath operations. Differential Revision: https://reviews.freebsd.org/D26216 --- sys/net/route/route_ctl.c | 367 +++++++++++++++++++++++++------------- sys/net/route/route_var.h | 8 + 2 files changed, 251 insertions(+), 124 deletions(-) diff --git a/sys/net/route/route_ctl.c b/sys/net/route/route_ctl.c index b2b7fa4842b..573c8a43b3c 100644 --- a/sys/net/route/route_ctl.c +++ b/sys/net/route/route_ctl.c @@ -78,9 +78,15 @@ struct rib_subscription { static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc); +static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt, + struct rt_addrinfo *info, struct route_nhop_data *rnd, + struct rib_cmd_info *rc); static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc); -static int change_route(struct rib_head *, struct rt_addrinfo *, +static int change_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc); +static int change_route_nhop(struct rib_head *rnh, struct rtentry *rt, + struct rt_addrinfo *info, struct route_nhop_data *rnd, struct rib_cmd_info *rc); static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, struct rib_cmd_info *rc); @@ -202,14 +208,18 @@ rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, return (add_route(rnh, info, rc)); } +/* + * Creates rtentry and nexthop based on @info data. + * Return 0 and fills in rtentry into @prt on success, + * return errno otherwise. + */ static int -add_route(struct rib_head *rnh, struct rt_addrinfo *info, - struct rib_cmd_info *rc) +create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **prt) { struct sockaddr *dst, *ndst, *gateway, *netmask; - struct rtentry *rt, *rt_old; + struct rtentry *rt; struct nhop_object *nh; - struct radix_node *rn; struct ifaddr *ifa; int error, flags; @@ -276,7 +286,28 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info, rt->rt_weight = 1; rt_setmetrics(info, rt); - rt_old = NULL; + + *prt = rt; + return (0); +} + +static int +add_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rib_cmd_info *rc) +{ + struct sockaddr *ndst, *netmask; + struct route_nhop_data rnd; + struct nhop_object *nh; + struct rtentry *rt; + int error; + + error = create_rtentry(rnh, info, &rt); + if (error != 0) + return (error); + + rnd.rnd_nhop = rt->rt_nhop; + rnd.rnd_weight = rt->rt_weight; + nh = rt->rt_nhop; RIB_WLOCK(rnh); #ifdef RADIX_MPATH @@ -290,76 +321,42 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info, return (EEXIST); } #endif - - rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); - - if (rn != NULL) { - /* Most common usecase */ - if (rt->rt_expire > 0) - tmproutes_update(rnh, rt); - - /* Finalize notification */ - rnh->rnh_gen++; - - rc->rc_rt = rt; - rc->rc_nh_new = nh; - rc->rc_nh_weight = rt->rt_weight; - - rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); - } else if ((info->rti_flags & RTF_PINNED) != 0) { - - /* - * Force removal and re-try addition - * TODO: better multipath&pinned support - */ - struct sockaddr *info_dst = info->rti_info[RTAX_DST]; - info->rti_info[RTAX_DST] = ndst; - /* Do not delete existing PINNED(interface) routes */ - info->rti_flags &= ~RTF_PINNED; - rt_old = rt_unlinkrte(rnh, info, &error); - info->rti_flags |= RTF_PINNED; - info->rti_info[RTAX_DST] = info_dst; - if (rt_old != NULL) { - rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, - rt->rt_nodes); - - /* Finalize notification */ - rnh->rnh_gen++; - - if (rn != NULL) { - rc->rc_cmd = RTM_CHANGE; - rc->rc_rt = rt; - rc->rc_nh_old = rt_old->rt_nhop; - rc->rc_nh_new = nh; - rc->rc_nh_weight = rt->rt_weight; - } else { - rc->rc_cmd = RTM_DELETE; - rc->rc_rt = rt_old; - rc->rc_nh_old = rt_old->rt_nhop; - rc->rc_nh_weight = rt_old->rt_weight; + error = add_route_nhop(rnh, rt, info, &rnd, rc); + if (error == 0) { + rt = NULL; + nh = NULL; + } else if ((error == EEXIST) && ((info->rti_flags & RTF_PINNED) != 0)) { + struct rtentry *rt_orig; + struct nhop_object *nh_orig; + struct radix_node *rn; + + ndst = (struct sockaddr *)rt_key(rt); + netmask = info->rti_info[RTAX_NETMASK]; + rn = rnh->rnh_lookup(ndst, netmask, &rnh->head); + rt_orig = (struct rtentry *)rn; + if (rt_orig != NULL) { + nh_orig = rt_orig->rt_nhop; + if ((nhop_get_rtflags(nh_orig) & RTF_PINNED) == 0) { + /* Current nexhop is not PINNED, can update */ + error = change_route_nhop(rnh, rt_orig, + info, &rnd, rc); + if (error == 0) + nh = NULL; } - rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); - } + } else + error = ENOBUFS; } RIB_WUNLOCK(rnh); - if ((rn != NULL) || (rt_old != NULL)) + if (error == 0) rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); - if (rt_old != NULL) - rtfree(rt_old); - - /* - * If it still failed to go into the tree, - * then un-make it (this should be a function) - */ - if (rn == NULL) { + if (nh != NULL) nhop_free(nh); + if (rt != NULL) uma_zfree(V_rtzone, rt); - return (EEXIST); - } - return (0); + return (error); } @@ -508,7 +505,11 @@ int rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) { + RIB_RLOCK_TRACKER; + struct route_nhop_data rnd_orig; struct rib_head *rnh; + struct rtentry *rt; + int error; NET_EPOCH_ASSERT(); @@ -519,18 +520,18 @@ rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, bzero(rc, sizeof(struct rib_cmd_info)); rc->rc_cmd = RTM_CHANGE; - return (change_route(rnh, info, rc)); -} + /* Check if updated gateway exists */ + if ((info->rti_flags & RTF_GATEWAY) && + (info->rti_info[RTAX_GATEWAY] == NULL)) + return (EINVAL); -static int -change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, - struct rib_cmd_info *rc) -{ - RIB_RLOCK_TRACKER; - struct rtentry *rt = NULL; - int error = 0; - int free_ifa = 0; - struct nhop_object *nh, *nh_orig; + /* + * route change is done in multiple steps, with dropping and + * reacquiring lock. In the situations with multiple processes + * changes the same route in can lead to the case when route + * is changed between the steps. Address it by retrying the operation + * multiple times before failing. + */ RIB_RLOCK(rnh); rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], @@ -554,12 +555,33 @@ change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, } } #endif - nh_orig = rt->rt_nhop; + rnd_orig.rnd_nhop = rt->rt_nhop; + rnd_orig.rnd_weight = rt->rt_weight; RIB_RUNLOCK(rnh); - rt = NULL; + for (int i = 0; i < RIB_MAX_RETRIES; i++) { + error = change_route(rnh, info, &rnd_orig, rc); + if (error != EAGAIN) + break; + } + + return (error); +} + +static int +change_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) +{ + int error = 0; + int free_ifa = 0; + struct nhop_object *nh, *nh_orig; + struct route_nhop_data rnd_new; + nh = NULL; + nh_orig = rnd_orig->rnd_nhop; + if (nh_orig == NULL) + return (ESRCH); /* * New gateway could require new ifaddr, ifp; @@ -593,71 +615,168 @@ change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, if (error != 0) return (error); - RIB_WLOCK(rnh); + rnd_new.rnd_nhop = nh; + if (info->rti_mflags & RTV_WEIGHT) + rnd_new.rnd_weight = info->rti_rmx->rmx_weight; + else + rnd_new.rnd_weight = rnd_orig->rnd_weight; - /* Lookup rtentry once again and check if nexthop is still the same */ - rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], - info->rti_info[RTAX_NETMASK], &rnh->head); + error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); - if (rt == NULL) { - RIB_WUNLOCK(rnh); - nhop_free(nh); - return (ESRCH); - } + return (error); +} - if (rt->rt_nhop != nh_orig) { - RIB_WUNLOCK(rnh); - nhop_free(nh); - return (EAGAIN); +/* + * Insert @rt with nhop data from @rnd_new to @rnh. + * Returns 0 on success. + */ +static int +add_route_nhop(struct rib_head *rnh, struct rtentry *rt, + struct rt_addrinfo *info, struct route_nhop_data *rnd, + struct rib_cmd_info *rc) +{ + struct sockaddr *ndst, *netmask; + struct radix_node *rn; + int error = 0; + + RIB_WLOCK_ASSERT(rnh); + + ndst = (struct sockaddr *)rt_key(rt); + netmask = info->rti_info[RTAX_NETMASK]; + + rt->rt_nhop = rnd->rnd_nhop; + rt->rt_weight = rnd->rnd_weight; + rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); + + if (rn != NULL) { + if (rt->rt_expire > 0) + tmproutes_update(rnh, rt); + + /* Finalize notification */ + rnh->rnh_gen++; + + rc->rc_cmd = RTM_ADD; + rc->rc_rt = rt; + rc->rc_nh_old = NULL; + rc->rc_nh_new = rnd->rnd_nhop; + rc->rc_nh_weight = rnd->rnd_weight; + + rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); + } else { + /* Existing route or memory allocation failure */ + error = EEXIST; } - /* Proceed with the update */ + return (error); +} - /* Provide notification to the protocols.*/ - rt->rt_nhop = nh; - rt_setmetrics(info, rt); +/* + * Switch @rt nhop/weigh to the ones specified in @rnd. + * Conditionally set rt_expire if set in @info. + * Returns 0 on success. + */ +static int +change_route_nhop(struct rib_head *rnh, struct rtentry *rt, + struct rt_addrinfo *info, struct route_nhop_data *rnd, + struct rib_cmd_info *rc) +{ + struct nhop_object *nh_orig; + + RIB_WLOCK_ASSERT(rnh); + + nh_orig = rt->rt_nhop; + + if (rnd->rnd_nhop != NULL) { + /* Changing expiration & nexthop & weight to a new one */ + rt_setmetrics(info, rt); + rt->rt_nhop = rnd->rnd_nhop; + rt->rt_weight = rnd->rnd_weight; + if (rt->rt_expire > 0) + tmproutes_update(rnh, rt); + } else { + /* Route deletion requested. */ + struct sockaddr *ndst, *netmask; + struct radix_node *rn; + + ndst = (struct sockaddr *)rt_key(rt); + netmask = info->rti_info[RTAX_NETMASK]; + rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head); + if (rn == NULL) + return (ESRCH); + } /* Finalize notification */ rnh->rnh_gen++; + rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE; rc->rc_rt = rt; rc->rc_nh_old = nh_orig; - rc->rc_nh_new = rt->rt_nhop; - rc->rc_nh_weight = rt->rt_weight; + rc->rc_nh_new = rnd->rnd_nhop; + rc->rc_nh_weight = rnd->rnd_weight; rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); - RIB_WUNLOCK(rnh); - - rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); - - nhop_free(nh_orig); - return (0); } -static int -change_route(struct rib_head *rnh, struct rt_addrinfo *info, - struct rib_cmd_info *rc) +/* + * Conditionally update route nhop/weight IFF data in @nhd_orig is + * consistent with the current route data. + * Nexthop in @nhd_new is consumed. + */ +int +change_route_conditional(struct rib_head *rnh, struct rtentry *rt, + struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, + struct route_nhop_data *rnd_new, struct rib_cmd_info *rc) { - int error; + struct rtentry *rt_new; + int error = 0; - /* Check if updated gateway exists */ - if ((info->rti_flags & RTF_GATEWAY) && - (info->rti_info[RTAX_GATEWAY] == NULL)) - return (EINVAL); + RIB_WLOCK(rnh); - /* - * route change is done in multiple steps, with dropping and - * reacquiring lock. In the situations with multiple processes - * changes the same route in can lead to the case when route - * is changed between the steps. Address it by retrying the operation - * multiple times before failing. - */ - for (int i = 0; i < RIB_MAX_RETRIES; i++) { - error = change_route_one(rnh, info, rc); - if (error != EAGAIN) - break; + rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], &rnh->head); + + if (rt_new == NULL) { + if (rnd_orig->rnd_nhop == NULL) + error = add_route_nhop(rnh, rt, info, rnd_new, rc); + else { + /* + * Prefix does not exist, which was not our assumption. + * Update @rnd_orig with the new data and return + */ + rnd_orig->rnd_nhop = NULL; + rnd_orig->rnd_weight = 0; + error = EAGAIN; + } + } else { + /* Prefix exists, try to update */ + if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { + + /* + * Nhop/mpath group hasn't changed. Flip + * to the new precalculated one and return + */ + error = change_route_nhop(rnh, rt_new, info, rnd_new, rc); + } else { + /* Update and retry */ + rnd_orig->rnd_nhop = rt_new->rt_nhop; + rnd_orig->rnd_weight = rt_new->rt_weight; + error = EAGAIN; + } + } + + RIB_WUNLOCK(rnh); + + if (error == 0) { + rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); + + if (rnd_orig->rnd_nhop != NULL) + nhop_free_any(rnd_orig->rnd_nhop); + + } else { + if (rnd_new->rnd_nhop != NULL) + nhop_free_any(rnd_new->rnd_nhop); } return (error); diff --git a/sys/net/route/route_var.h b/sys/net/route/route_var.h index 89b5dfcda69..8b44f8705d7 100644 --- a/sys/net/route/route_var.h +++ b/sys/net/route/route_var.h @@ -226,6 +226,14 @@ void tmproutes_init(struct rib_head *rh); void tmproutes_destroy(struct rib_head *rh); /* route_ctl.c */ +struct route_nhop_data { + struct nhop_object *rnd_nhop; + uint32_t rnd_weight; +}; +int change_route_conditional(struct rib_head *rnh, struct rtentry *rt, + struct rt_addrinfo *info, struct route_nhop_data *nhd_orig, + struct route_nhop_data *nhd_new, struct rib_cmd_info *rc); + void vnet_rtzone_init(void); void vnet_rtzone_destroy(void); -- 2.45.0