2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2020 Alexander V. Chernikov
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet6.h"
32 #include <sys/param.h>
33 #include <sys/kernel.h>
35 #include <sys/rmlock.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/kernel.h>
39 #include <sys/socket.h>
40 #include <sys/sysctl.h>
41 #include <sys/syslog.h>
45 #include <net/if_var.h>
47 #include <netinet/in.h>
48 #include <netinet/ip.h>
49 #include <netinet/ip6.h>
50 #include <netinet6/ip6_var.h>
51 #include <netinet6/in6_fib.h>
53 #include <net/route.h>
54 #include <net/route/nhop.h>
55 #include <net/route/route_ctl.h>
56 #include <net/route/fib_algo.h>
61 #define LPM6_MIN_TBL8 8 /* 2 pages of memory */
62 #define LPM6_MAX_TBL8 65536 * 16 /* 256M */
64 struct fib_algo_calldata {
69 struct dpdk_lpm6_data {
70 struct rte_lpm6 *lpm6;
71 uint64_t routes_added;
72 uint64_t routes_failed;
73 uint32_t number_tbl8s;
79 static struct nhop_object *
80 lookup_ptr_ll(const struct rte_lpm6 *lpm6, const struct in6_addr *dst6,
83 const struct rte_lpm6_external *rte_ext;
85 rte_ext = (const struct rte_lpm6_external *)lpm6;
87 return (fib6_radix_lookup_nh(rte_ext->fibnum, dst6, scopeid));
91 * Main datapath routing
93 static struct nhop_object *
94 lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
96 const struct rte_lpm6 *lpm6;
97 const struct rte_lpm6_external *rte_ext;
98 const struct in6_addr *addr6;
102 lpm6 = (const struct rte_lpm6 *)algo_data;
104 rte_ext = (const struct rte_lpm6_external *)lpm6;
106 if (!IN6_IS_SCOPE_LINKLOCAL(addr6)) {
107 ret = rte_lpm6_lookup(lpm6, (const uint8_t *)addr6, &nhidx);
110 return (rte_ext->nh_idx[nhidx]);
112 /* Not found. Check default route */
113 if (rte_ext->default_idx > 0)
114 return (rte_ext->nh_idx[rte_ext->default_idx]);
120 return (lookup_ptr_ll(lpm6, addr6, scopeid));
125 rte6_get_pref(const struct rib_rtable_info *rinfo)
128 if (rinfo->num_prefixes < 10)
130 else if (rinfo->num_prefixes < 1000)
131 return (rinfo->num_prefixes / 10);
132 else if (rinfo->num_prefixes < 100000)
133 return (100 + rinfo->num_prefixes / 667);
138 static enum flm_op_result
139 handle_default_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc)
141 struct rte_lpm6_external *rte_ext;
142 rte_ext = (struct rte_lpm6_external *)dd->lpm6;
144 if (rc->rc_cmd != RTM_DELETE) {
146 uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
149 return (FLM_REBUILD);
150 rte_ext->default_idx = nhidx;
152 /* No default route */
153 rte_ext->default_idx = 0;
156 return (FLM_SUCCESS);
159 static enum flm_op_result
160 handle_ll_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc,
161 const struct in6_addr addr6, int plen, uint32_t scopeid)
164 return (FLM_SUCCESS);
167 static struct rte_lpm6_rule *
168 pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6, int plen,
169 int *pplen, uint32_t *pnhop_idx, char *buffer)
171 struct rte_lpm6_rule *lsp_rule = NULL;
177 rt = rt_get_inet6_parent(dd->fibnum, addr6, plen);
178 /* plen = 0 means default route and it's out of scope */
180 uint32_t nhop_idx, scopeid;
181 struct in6_addr new_addr6;
182 rt_get_inet6_prefix_plen(rt, &new_addr6, &plen, &scopeid);
184 nhop_idx = fib_get_nhop_idx(dd->fd, rt_get_raw_nhop(rt));
185 lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, plen, nhop_idx);
186 *pnhop_idx = nhop_idx;
194 static enum flm_op_result
195 handle_gu_change(struct dpdk_lpm6_data *dd, const struct rib_cmd_info *rc,
196 const struct in6_addr *addr6, int plen)
199 char abuf[INET6_ADDRSTRLEN];
200 inet_ntop(AF_INET6, addr6, abuf, sizeof(abuf));
202 /* So we get sin6, plen and nhidx */
203 if (rc->rc_cmd != RTM_DELETE) {
205 * Addition or change. Save nhop in the internal table
208 uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
210 FIB_PRINTF(LOG_INFO, dd->fd, "nhop limit reached, need rebuild");
211 return (FLM_REBUILD);
214 ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)addr6,
215 plen, nhidx, (rc->rc_cmd == RTM_ADD) ? 1 : 0);
216 FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u ret: %d",
217 (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
219 rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, rc->rc_nh_old) : 0,
223 * Need to lookup parent. Assume deletion happened already
225 char buffer[RTE_LPM6_RULE_SIZE];
226 struct rte_lpm6_rule *lsp_rule = NULL;
228 uint32_t parent_nhop_idx;
229 lsp_rule = pack_parent_rule(dd, addr6, plen, &parent_plen,
230 &parent_nhop_idx, buffer);
232 ret = rte_lpm6_delete(dd->lpm6, (const uint8_t *)addr6, plen, lsp_rule);
233 FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d -> /%d nhop %u -> %u ret: %d",
234 "DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, rc->rc_nh_old),
235 parent_nhop_idx, ret);
239 FIB_PRINTF(LOG_INFO, dd->fd, "error: %d", ret);
241 return (FLM_REBUILD);
244 return (FLM_SUCCESS);
247 static enum flm_op_result
248 handle_any_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc)
250 enum flm_op_result ret;
251 struct in6_addr addr6;
255 rt_get_inet6_prefix_plen(rc->rc_rt, &addr6, &plen, &scopeid);
257 if (IN6_IS_SCOPE_LINKLOCAL(&addr6))
258 ret = handle_ll_change(dd, rc, addr6, plen, scopeid);
260 ret = handle_default_change(dd, rc);
262 ret = handle_gu_change(dd, rc, &addr6, plen);
265 FIB_PRINTF(LOG_INFO, dd->fd, "error handling route");
269 static enum flm_op_result
270 handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
273 struct dpdk_lpm6_data *dd;
275 dd = (struct dpdk_lpm6_data *)_data;
277 return (handle_any_change(dd, rc));
281 destroy_dd(struct dpdk_lpm6_data *dd)
284 FIB_PRINTF(LOG_INFO, dd->fd, "destroy dd %p", dd);
285 if (dd->lpm6 != NULL)
286 rte_lpm6_free(dd->lpm6);
291 destroy_table(void *_data)
294 destroy_dd((struct dpdk_lpm6_data *)_data);
297 static enum flm_op_result
298 add_route_cb(struct rtentry *rt, void *_data)
300 struct dpdk_lpm6_data *dd = (struct dpdk_lpm6_data *)_data;
301 struct in6_addr addr6;
302 struct nhop_object *nh;
307 rt_get_inet6_prefix_plen(rt, &addr6, &plen, &scopeid);
308 nh = rt_get_raw_nhop(rt);
310 if (IN6_IS_SCOPE_LINKLOCAL(&addr6)) {
313 * We don't operate on LL directly, however
314 * reference them to maintain guarantee on
315 * ability to refcount nhops in epoch.
317 fib_get_nhop_idx(dd->fd, nh);
318 return (FLM_SUCCESS);
321 char abuf[INET6_ADDRSTRLEN];
322 inet_ntop(AF_INET6, &addr6, abuf, sizeof(abuf));
323 FIB_PRINTF(LOG_DEBUG, dd->fd, "Operating on %s/%d", abuf, plen);
326 struct rib_cmd_info rc = {
331 FIB_PRINTF(LOG_DEBUG, dd->fd, "Adding default route");
332 return (handle_default_change(dd, &rc));
335 uint32_t nhidx = fib_get_nhop_idx(dd->fd, nh);
337 FIB_PRINTF(LOG_INFO, dd->fd, "unable to get nhop index");
338 return (FLM_REBUILD);
340 ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)&addr6, plen, nhidx, 1);
341 FIB_PRINTF(LOG_DEBUG, dd->fd, "ADD %p %s/%d nh %u = %d",
342 dd->lpm6, abuf, plen, nhidx, ret);
345 FIB_PRINTF(LOG_INFO, dd->fd, "rte_lpm6_add() returned %d", ret);
346 if (ret == -ENOSPC) {
348 return (FLM_REBUILD);
355 return (FLM_SUCCESS);
358 static enum flm_op_result
359 check_dump_success(void *_data, struct fib_dp *dp)
361 struct dpdk_lpm6_data *dd;
363 dd = (struct dpdk_lpm6_data *)_data;
365 FIB_PRINTF(LOG_INFO, dd->fd, "scan completed. added: %zu failed: %zu",
366 dd->routes_added, dd->routes_failed);
367 if (dd->hit_tables || dd->routes_failed > 0)
368 return (FLM_REBUILD);
370 FIB_PRINTF(LOG_INFO, dd->fd,
371 "DPDK lookup engine synced with IPv6 RIB id %u, %zu routes",
372 dd->fibnum, dd->routes_added);
377 return (FLM_SUCCESS);
381 estimate_scale(const struct dpdk_lpm6_data *dd_src, struct dpdk_lpm6_data *dd)
384 /* XXX: update at 75% capacity */
385 if (dd_src->hit_tables)
386 dd->number_tbl8s = dd_src->number_tbl8s * 2;
388 dd->number_tbl8s = dd_src->number_tbl8s;
390 /* TODO: look into the appropriate RIB to adjust */
393 static struct dpdk_lpm6_data *
394 build_table(struct dpdk_lpm6_data *dd_prev, struct fib_data *fd)
396 struct dpdk_lpm6_data *dd;
397 struct rte_lpm6 *lpm6;
399 dd = malloc(sizeof(struct dpdk_lpm6_data), M_TEMP, M_NOWAIT | M_ZERO);
401 FIB_PRINTF(LOG_INFO, fd, "Unable to allocate base datastructure");
404 dd->fibnum = dd_prev->fibnum;
407 estimate_scale(dd_prev, dd);
409 struct rte_lpm6_config cfg = {.number_tbl8s = dd->number_tbl8s};
410 lpm6 = rte_lpm6_create("test", 0, &cfg);
412 FIB_PRINTF(LOG_INFO, fd, "unable to create lpm6");
417 struct rte_lpm6_external *ext = (struct rte_lpm6_external *)lpm6;
418 ext->nh_idx = fib_get_nhop_array(dd->fd);
420 FIB_PRINTF(LOG_INFO, fd, "allocated %u tbl8s", dd->number_tbl8s);
425 static enum flm_op_result
426 init_table(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **data)
428 struct dpdk_lpm6_data *dd, dd_base;
430 if (_old_data == NULL) {
431 bzero(&dd_base, sizeof(struct dpdk_lpm6_data));
432 dd_base.fibnum = fibnum;
433 /* TODO: get rib statistics */
434 dd_base.number_tbl8s = LPM6_MIN_TBL8;
437 FIB_PRINTF(LOG_INFO, fd, "Starting with old data");
438 dd = (struct dpdk_lpm6_data *)_old_data;
441 /* Guaranteed to be in epoch */
442 dd = build_table(dd, fd);
444 FIB_PRINTF(LOG_INFO, fd, "table creation failed");
445 return (FLM_REBUILD);
449 return (FLM_SUCCESS);
452 static struct fib_lookup_module dpdk_lpm6 = {
453 .flm_name = "dpdk_lpm6",
454 .flm_family = AF_INET6,
455 .flm_init_cb = init_table,
456 .flm_destroy_cb = destroy_table,
457 .flm_dump_rib_item_cb = add_route_cb,
458 .flm_dump_end_cb = check_dump_success,
459 .flm_change_rib_item_cb = handle_rtable_change_cb,
460 .flm_get_pref = rte6_get_pref,
464 lpm6_modevent(module_t mod, int type, void *unused)
470 fib_module_register(&dpdk_lpm6);
473 error = fib_module_unregister(&dpdk_lpm6);
482 static moduledata_t lpm6mod = {
488 DECLARE_MODULE(lpm6mod, lpm6mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
489 MODULE_VERSION(lpm6mod, 1);