2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2020 Alexander V. Chernikov
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * This header file contains public definitions for the nexthop routing subsystem.
34 #ifndef _NET_ROUTE_NHOP_H_
35 #define _NET_ROUTE_NHOP_H_
37 #include <netinet/in.h> /* sockaddr_in && sockaddr_in6 */
39 #include <sys/counter.h>
42 NH_TYPE_IPV4_ETHER_RSLV = 1, /* IPv4 ethernet without GW */
43 NH_TYPE_IPV4_ETHER_NHOP = 2, /* IPv4 with pre-calculated ethernet encap */
44 NH_TYPE_IPV6_ETHER_RSLV = 3, /* IPv6 ethernet, without GW */
45 NH_TYPE_IPV6_ETHER_NHOP = 4 /* IPv6 with pre-calculated ethernet encap*/
51 * Define shorter version of AF_LINK sockaddr.
53 * Currently the only use case of AF_LINK gateway is storing
54 * interface index of the interface of the source IPv6 address.
55 * This is used by the IPv6 code for the connections over loopback
58 * The structure below copies 'struct sockaddr_dl', reducing the
59 * size of sdl_data buffer, as it is not used. This change
60 * allows to store the AF_LINK gateways in the nhop gateway itself,
61 * simplifying control plane handling.
63 struct sockaddr_dl_short {
64 u_char sdl_len; /* Total length of sockaddr */
65 u_char sdl_family; /* AF_LINK */
66 u_short sdl_index; /* if != 0, system given index for interface */
67 u_char sdl_type; /* interface type */
68 u_char sdl_nlen; /* interface name length, no trailing 0 reqd. */
69 u_char sdl_alen; /* link level address length */
70 u_char sdl_slen; /* link layer selector length */
71 char sdl_data[8]; /* unused */
74 #define NHOP_RELATED_FLAGS \
75 (RTF_GATEWAY | RTF_HOST | RTF_REJECT | RTF_BLACKHOLE | \
76 RTF_FIXEDMTU | RTF_LOCAL | RTF_BROADCAST | RTF_MULTICAST)
82 * Struct 'nhop_object' field description:
84 * nh_flags: NHF_ flags used in the dataplane code. NHF_GATEWAY or NHF_BLACKHOLE
85 * can be examples of such flags.
86 * nh_mtu: ready-to-use nexthop mtu. Already accounts for the link-level header,
87 * interface MTU and protocol-specific limitations.
88 * nh_prepend_len: link-level prepend length. Currently unused.
89 * nh_ifp: logical transmit interface. The one from which if_transmit() will be
90 * called. Guaranteed to be non-NULL.
91 * nh_aifp: ifnet of the source address. Same as nh_ifp except IPv6 loopback
92 * routes. See the example below.
93 * nh_ifa: interface address to use. Guaranteed to be non-NULL.
94 * nh_pksent: counter(9) reflecting the number of packets transmitted.
96 * gw_: storage suitable to hold AF_INET, AF_INET6 or AF_LINK gateway. More
97 * details ara available in the examples below.
101 * Direct routes (routes w/o gateway):
102 * NHF_GATEWAY is NOT set.
103 * nh_ifp denotes the logical transmit interface ().
104 * nh_aifp is the same as nh_ifp
105 * gw_sa contains AF_LINK sa with nh_aifp ifindex (compat)
107 * NHF_GATEWAY is NOT set.
108 * nh_ifp points to the loopback interface (lo0).
109 * nh_aifp points to the interface where the destination address belongs to.
110 * This is useful in IPv6 link-local-over-loopback communications.
111 * gw_sa contains AF_LINK sa with nh_aifp ifindex (compat)
113 * NHF_GATEWAY is set.
114 * nh_ifp denotes the logical transmit interface.
115 * nh_aifp is the same as nh_ifp
116 * gw_sa contains L3 address (either AF_INET or AF_INET6).
119 * Note: struct nhop_object fields are ordered in a way that
120 * supports memcmp-based comparisons.
123 #define NHOP_END_CMP (__offsetof(struct nhop_object, nh_pksent))
126 uint16_t nh_flags; /* nhop flags */
127 uint16_t nh_mtu; /* nexthop mtu */
129 struct sockaddr_in gw4_sa; /* GW accessor as IPv4 */
130 struct sockaddr_in6 gw6_sa; /* GW accessor as IPv6 */
131 struct sockaddr gw_sa;
132 struct sockaddr_dl_short gwl_sa; /* AF_LINK gw (compat) */
135 struct ifnet *nh_ifp; /* Logical egress interface. Always != NULL */
136 struct ifaddr *nh_ifa; /* interface address to use. Always != NULL */
137 struct ifnet *nh_aifp; /* ifnet of the source address. Always != NULL */
138 counter_u64_t nh_pksent; /* packets sent using this nhop */
139 /* 32 bytes + 4xPTR == 64(amd64) / 48(i386) */
140 uint8_t nh_prepend_len; /* length of prepend data */
142 uint32_t spare1; /* alignment */
143 char nh_prepend[48]; /* L2 prepend */
144 struct nhop_priv *nh_priv; /* control plane data */
145 /* -- 128 bytes -- */
151 * Currently we verify whether link is up or not on every packet, which can be
153 * TODO: subscribe for the interface notifications and update the nexthops
154 * with NHF_INVALID flag.
157 #define NH_IS_VALID(_nh) RT_LINK_IS_UP((_nh)->nh_ifp)
158 #define NH_IS_NHGRP(_nh) ((_nh)->nh_flags & NHF_MULTIPATH)
160 #define NH_FREE(_nh) do { \
162 /* guard against invalid refs */ \
166 struct weightened_nhop {
167 struct nhop_object *nh;
171 void nhop_free(struct nhop_object *nh);
177 uint32_t nhop_get_idx(const struct nhop_object *nh);
178 enum nhop_type nhop_get_type(const struct nhop_object *nh);
179 int nhop_get_rtflags(const struct nhop_object *nh);
180 struct vnet *nhop_get_vnet(const struct nhop_object *nh);
181 struct nhop_object *nhop_select_func(struct nhop_object *nh, uint32_t flowid);
185 /* Kernel <> userland structures */
187 /* Structure usage and layout are described in dump_nhop_entry() */
188 struct nhop_external {
189 uint32_t nh_len; /* length of the datastructure */
190 uint32_t nh_idx; /* Nexthop index */
191 uint32_t nh_fib; /* Fib nexhop is attached to */
192 uint32_t ifindex; /* transmit interface ifindex */
193 uint32_t aifindex; /* address ifindex */
194 uint8_t prepend_len; /* length of the prepend */
195 uint8_t nh_family; /* address family */
196 uint16_t nh_type; /* nexthop type */
197 uint16_t nh_mtu; /* nexthop mtu */
199 uint16_t nh_flags; /* nhop flags */
200 struct in_addr nh_addr; /* GW/DST IPv4 address */
201 struct in_addr nh_src; /* default source IPv4 address */
204 /* lookup key: address, family, type */
205 char nh_prepend[64]; /* L2 prepend */
206 uint64_t nh_refcount; /* number of references */
210 uint32_t na_len; /* length of the datastructure */
211 uint16_t gw_sa_off; /* offset of gateway SA */
212 uint16_t src_sa_off; /* offset of src address SA */
215 #define NHG_C_TYPE_CNHOPS 0x1 /* Control plane nhops list */
216 #define NHG_C_TYPE_DNHOPS 0x2 /* Dataplane nhops list */
217 struct nhgrp_container {
218 uint32_t nhgc_len; /* container length */
219 uint16_t nhgc_count; /* number of items */
220 uint8_t nhgc_type; /* container type */
221 uint8_t nhgc_subtype; /* container subtype */
224 struct nhgrp_nhop_external {
232 * - nhgrp_container (control plane nhops list)
233 * - nhgrp_nhop_external
234 * - nhgrp_nhop_external
236 * - nhgrp_container (dataplane nhops list)
237 * - nhgrp_nhop_external
238 * - nhgrp_nhop_external
240 struct nhgrp_external {
241 uint32_t nhg_idx; /* Nexthop group index */
242 uint32_t nhg_refcount; /* number of references */