2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2020 Alexander V. Chernikov
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/kernel.h>
35 #include <sys/rmlock.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/kernel.h>
39 #include <sys/socket.h>
40 #include <sys/sysctl.h>
41 #include <sys/syslog.h>
45 #include <net/if_var.h>
47 #include <netinet/in.h>
48 #include <netinet/in_fib.h>
49 #include <netinet/ip.h>
51 #include <net/route.h>
52 #include <net/route/nhop.h>
53 #include <net/route/route_ctl.h>
54 #include <net/route/fib_algo.h>
59 #define LPM_MIN_TBL8 8 /* 2 pages of memory */
60 #define LPM_MAX_TBL8 65536 * 16 /* 256M */
62 MALLOC_DECLARE(M_RTABLE);
64 struct dpdk_lpm_data {
66 uint64_t routes_added;
67 uint64_t routes_failed;
68 uint32_t number_tbl8s;
76 * Main datapath routing
78 static struct nhop_object *
79 lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
82 const struct rte_lpm_external *rte_ext;
86 lpm = (struct rte_lpm *)algo_data;
87 rte_ext = (const struct rte_lpm_external *)lpm;
89 ret = rte_lpm_lookup(lpm, ntohl(key.addr4.s_addr), &nhidx);
92 return (rte_ext->nh_idx[nhidx]);
94 /* Not found. Check default route */
95 return (rte_ext->nh_idx[rte_ext->default_idx]);
102 rte_get_pref(const struct rib_rtable_info *rinfo)
105 if (rinfo->num_prefixes < 10)
107 else if (rinfo->num_prefixes < 1000)
108 return (rinfo->num_prefixes / 10);
109 else if (rinfo->num_prefixes < 500000)
110 return (100 + rinfo->num_prefixes / 3334);
115 static enum flm_op_result
116 handle_default_change(struct dpdk_lpm_data *dd, struct rib_cmd_info *rc)
118 struct rte_lpm_external *rte_ext;
119 rte_ext = (struct rte_lpm_external *)dd->lpm;
121 if (rc->rc_cmd != RTM_DELETE) {
123 uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
126 return (FLM_REBUILD);
127 rte_ext->default_idx = nhidx;
129 /* No default route */
130 rte_ext->default_idx = 0;
133 return (FLM_SUCCESS);
137 get_parent_rule(struct dpdk_lpm_data *dd, struct in_addr addr, int plen,
138 uint8_t *pplen, uint32_t *nhop_idx)
142 rt = rt_get_inet_parent(dd->fibnum, addr, plen);
144 struct in_addr addr4;
148 rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
149 if (parent_plen > 0) {
150 *pplen = parent_plen;
151 *nhop_idx = fib_get_nhop_idx(dd->fd, rt_get_raw_nhop(rt));
160 static enum flm_op_result
161 handle_gu_change(struct dpdk_lpm_data *dd, const struct rib_cmd_info *rc,
162 const struct in_addr addr, int plen)
166 char abuf[INET_ADDRSTRLEN];
169 ip = ntohl(addr.s_addr);
170 inet_ntop(AF_INET, &addr, abuf, sizeof(abuf));
172 /* So we get sin, plen and nhidx */
173 if (rc->rc_cmd != RTM_DELETE) {
175 * Addition or change. Save nhop in the internal table
178 nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
180 FIB_PRINTF(LOG_INFO, dd->fd, "nhop limit reached, need rebuild");
181 return (FLM_REBUILD);
184 ret = rte_lpm_add(dd->lpm, ip, plen, nhidx);
185 FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u ret: %d",
186 (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
188 rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, rc->rc_nh_old) : 0,
192 * Need to lookup parent. Assume deletion happened already
195 uint32_t parent_nhop_idx;
196 get_parent_rule(dd, addr, plen, &parent_plen, &parent_nhop_idx);
198 ret = rte_lpm_delete(dd->lpm, ip, plen, parent_plen, parent_nhop_idx);
199 FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK: %s %s/%d -> /%d nhop %u -> %u ret: %d",
200 "DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, rc->rc_nh_old),
201 parent_nhop_idx, ret);
205 FIB_PRINTF(LOG_INFO, dd->fd, "error: %d", ret);
207 return (FLM_REBUILD);
210 return (FLM_SUCCESS);
213 static enum flm_op_result
214 handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
217 struct dpdk_lpm_data *dd;
218 enum flm_op_result ret;
219 struct in_addr addr4;
223 dd = (struct dpdk_lpm_data *)_data;
224 rt_get_inet_prefix_plen(rc->rc_rt, &addr4, &plen, &scopeid);
227 ret = handle_gu_change(dd, rc, addr4, plen);
229 ret = handle_default_change(dd, rc);
232 FIB_PRINTF(LOG_INFO, dd->fd, "error handling route");
237 destroy_table(void *_data)
239 struct dpdk_lpm_data *dd = (struct dpdk_lpm_data *)_data;
242 rte_lpm_free(dd->lpm);
246 static enum flm_op_result
247 add_route_cb(struct rtentry *rt, void *_data)
249 struct dpdk_lpm_data *dd = (struct dpdk_lpm_data *)_data;
250 struct nhop_object *nh;
252 struct in_addr addr4;
255 nh = rt_get_raw_nhop(rt);
256 rt_get_inet_prefix_plen(rt, &addr4, &plen, &scopeid);
258 char abuf[INET_ADDRSTRLEN];
259 inet_ntop(AF_INET, &addr4, abuf, sizeof(abuf));
261 FIB_PRINTF(LOG_DEBUG, dd->fd, "Operating on %s/%d", abuf, plen);
264 struct rib_cmd_info rc = {
269 FIB_PRINTF(LOG_DEBUG, dd->fd, "Adding default route");
270 return (handle_default_change(dd, &rc));
273 uint32_t nhidx = fib_get_nhop_idx(dd->fd, nh);
275 FIB_PRINTF(LOG_INFO, dd->fd, "unable to get nhop index");
276 return (FLM_REBUILD);
278 ret = rte_lpm_add(dd->lpm, ntohl(addr4.s_addr), plen, nhidx);
279 FIB_PRINTF(LOG_DEBUG, dd->fd, "ADD %p %s/%d nh %u = %d",
280 dd->lpm, abuf, plen, nhidx, ret);
283 FIB_PRINTF(LOG_INFO, dd->fd, "rte_lpm_add() returned %d", ret);
284 if (ret == -ENOSPC) {
286 return (FLM_REBUILD);
293 return (FLM_SUCCESS);
296 static enum flm_op_result
297 check_dump_success(void *_data, struct fib_dp *dp)
299 struct dpdk_lpm_data *dd;
301 dd = (struct dpdk_lpm_data *)_data;
303 FIB_PRINTF(LOG_INFO, dd->fd, "scan completed. added: %zu failed: %zu",
304 dd->routes_added, dd->routes_failed);
305 if (dd->hit_tables || dd->routes_failed > 0)
306 return (FLM_REBUILD);
308 FIB_PRINTF(LOG_INFO, dd->fd,
309 "DPDK lookup engine synced with IPv4 RIB id %u, %zu routes",
310 dd->fibnum, dd->routes_added);
315 return (FLM_SUCCESS);
319 estimate_scale(const struct dpdk_lpm_data *dd_src, struct dpdk_lpm_data *dd)
322 /* XXX: update at 75% capacity */
323 if (dd_src->hit_tables)
324 dd->number_tbl8s = dd_src->number_tbl8s * 2;
326 dd->number_tbl8s = dd_src->number_tbl8s;
328 /* TODO: look into the appropriate RIB to adjust */
331 static struct dpdk_lpm_data *
332 build_table(struct dpdk_lpm_data *dd_prev, struct fib_data *fd)
334 struct dpdk_lpm_data *dd;
337 dd = malloc(sizeof(struct dpdk_lpm_data), M_RTABLE, M_NOWAIT | M_ZERO);
339 FIB_PRINTF(LOG_INFO, fd, "Unable to allocate base datastructure");
342 dd->fibnum = dd_prev->fibnum;
345 estimate_scale(dd_prev, dd);
347 struct rte_lpm_config cfg = {.number_tbl8s = dd->number_tbl8s};
348 lpm = rte_lpm_create("test", 0, &cfg);
350 FIB_PRINTF(LOG_INFO, fd, "unable to create lpm");
355 struct rte_lpm_external *ext = (struct rte_lpm_external *)lpm;
356 ext->nh_idx = fib_get_nhop_array(dd->fd);
358 FIB_PRINTF(LOG_INFO, fd, "allocated %u tbl8s", dd->number_tbl8s);
363 static enum flm_op_result
364 init_table(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **data)
366 struct dpdk_lpm_data *dd, dd_base;
368 if (_old_data == NULL) {
369 bzero(&dd_base, sizeof(struct dpdk_lpm_data));
370 dd_base.fibnum = fibnum;
371 /* TODO: get rib statistics */
372 dd_base.number_tbl8s = LPM_MIN_TBL8;
375 FIB_PRINTF(LOG_DEBUG, fd, "Starting with old data");
376 dd = (struct dpdk_lpm_data *)_old_data;
379 /* Guaranteed to be in epoch */
380 dd = build_table(dd, fd);
382 FIB_PRINTF(LOG_NOTICE, fd, "table creation failed");
383 return (FLM_REBUILD);
387 return (FLM_SUCCESS);
390 static struct fib_lookup_module dpdk_lpm4 = {
391 .flm_name = "dpdk_lpm4",
392 .flm_family = AF_INET,
393 .flm_init_cb = init_table,
394 .flm_destroy_cb = destroy_table,
395 .flm_dump_rib_item_cb = add_route_cb,
396 .flm_dump_end_cb = check_dump_success,
397 .flm_change_rib_item_cb = handle_rtable_change_cb,
398 .flm_get_pref = rte_get_pref,
402 lpm4_modevent(module_t mod, int type, void *unused)
408 fib_module_register(&dpdk_lpm4);
411 error = fib_module_unregister(&dpdk_lpm4);
420 static moduledata_t lpm4mod = {
426 DECLARE_MODULE(lpm4mod, lpm4mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
427 MODULE_VERSION(lpm4mod, 1);