2 * Copyright (c) 1980, 1986, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * @(#)route.h 8.4 (Berkeley) 1/9/95
36 #include <sys/counter.h>
40 * Kernel resident routing tables.
42 * The routing tables are initialized when interface addresses
43 * are set by making entries for all directly connected interfaces.
47 * Struct route consiste of a destination address,
48 * a route entry pointer, link-layer prepend data pointer along
52 struct rtentry *ro_rt;
53 struct llentry *ro_lle;
55 * ro_prepend and ro_plen are only used for bpf to pass in a
56 * preformed header. They are not cacheable.
61 uint16_t ro_mtu; /* saved ro_rt mtu */
63 struct sockaddr ro_dst;
66 #define RT_L2_ME_BIT 2 /* dst L2 addr is our address */
67 #define RT_MAY_LOOP_BIT 3 /* dst may require loop copy */
68 #define RT_HAS_HEADER_BIT 4 /* mbuf already have its header prepended */
70 #define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */
71 #define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */
72 #define RT_L2_ME (1 << RT_L2_ME_BIT) /* 0x0004 */
73 #define RT_MAY_LOOP (1 << RT_MAY_LOOP_BIT) /* 0x0008 */
74 #define RT_HAS_HEADER (1 << RT_HAS_HEADER_BIT) /* 0x0010 */
76 #define RT_REJECT 0x0020 /* Destination is reject */
77 #define RT_BLACKHOLE 0x0040 /* Destination is blackhole */
78 #define RT_HAS_GW 0x0080 /* Destination has GW */
79 #define RT_LLE_CACHE 0x0100 /* Cache link layer */
82 u_long rmx_locks; /* Kernel must leave these values alone */
83 u_long rmx_mtu; /* MTU for this path */
84 u_long rmx_hopcount; /* max hops expected */
85 u_long rmx_expire; /* lifetime for route, e.g. redirect */
86 u_long rmx_recvpipe; /* inbound delay-bandwidth product */
87 u_long rmx_sendpipe; /* outbound delay-bandwidth product */
88 u_long rmx_ssthresh; /* outbound gateway buffer limit */
89 u_long rmx_rtt; /* estimated round trip time */
90 u_long rmx_rttvar; /* estimated rtt variance */
91 u_long rmx_pksent; /* packets sent using this route */
92 u_long rmx_weight; /* route weight */
93 u_long rmx_filler[3]; /* will be used for T/TCP later */
97 * rmx_rtt and rmx_rttvar are stored as microseconds;
98 * RTTTOPRHZ(rtt) converts to a value suitable for use
99 * by a protocol slowtimo counter.
101 #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */
102 #define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ))
104 /* lle state is exported in rmx_state rt_metrics field */
105 #define rmx_state rmx_weight
108 * Keep a generation count of routing table, incremented on route addition,
109 * so we can invalidate caches. This is accessed without a lock, as precision
112 typedef volatile u_int rt_gen_t; /* tree generation (for adds) */
113 #define RT_GEN(fibnum, af) rt_tables_get_gen(fibnum, af)
115 #define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */
116 #define RT_ALL_FIBS -1 /* Announce event for every fib */
118 extern u_int rt_numfibs; /* number of usable routing tables */
119 VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */
120 #define V_rt_add_addr_allfibs VNET(rt_add_addr_allfibs)
124 * We distinguish between routes to hosts and routes to networks,
125 * preferring the former if available. For each route we infer
126 * the interface to use from the gateway address supplied when
127 * the route was entered. Routes that forward packets through
128 * gateways are marked so that the output routines know to address the
129 * gateway rather than the ultimate destination.
132 #include <net/radix.h>
134 #include <net/radix_mpath.h>
140 struct radix_node rt_nodes[2]; /* tree glue, and other values */
142 * XXX struct rtentry must begin with a struct radix_node (or two!)
143 * because the code does some casts of a 'struct radix_node *'
144 * to a 'struct rtentry *'
146 #define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key)))
147 #define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask)))
148 struct sockaddr *rt_gateway; /* value */
149 struct ifnet *rt_ifp; /* the answer: interface to use */
150 struct ifaddr *rt_ifa; /* the answer: interface address to use */
151 int rt_flags; /* up/down?, host/net */
152 int rt_refcnt; /* # held references */
153 u_int rt_fibnum; /* which FIB */
154 u_long rt_mtu; /* MTU for this path */
155 u_long rt_weight; /* absolute weight */
156 u_long rt_expire; /* lifetime for route, e.g. redirect */
157 #define rt_endzero rt_pksent
158 counter_u64_t rt_pksent; /* packets sent using this route */
159 struct mtx rt_mtx; /* mutex for routing entry */
160 struct rtentry *rt_chain; /* pointer to next rtentry to delete */
164 #define RTF_UP 0x1 /* route usable */
165 #define RTF_GATEWAY 0x2 /* destination is a gateway */
166 #define RTF_HOST 0x4 /* host entry (net otherwise) */
167 #define RTF_REJECT 0x8 /* host or net unreachable */
168 #define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */
169 #define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */
170 #define RTF_DONE 0x40 /* message confirmed */
171 /* 0x80 unused, was RTF_DELCLONE */
172 /* 0x100 unused, was RTF_CLONING */
173 #define RTF_XRESOLVE 0x200 /* external daemon resolves name */
174 #define RTF_LLINFO 0x400 /* DEPRECATED - exists ONLY for backward
176 #define RTF_LLDATA 0x400 /* used by apps to add/del L2 entries */
177 #define RTF_STATIC 0x800 /* manually added */
178 #define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */
179 #define RTF_PROTO2 0x4000 /* protocol specific routing flag */
180 #define RTF_PROTO1 0x8000 /* protocol specific routing flag */
181 /* 0x10000 unused, was RTF_PRCLONING */
182 /* 0x20000 unused, was RTF_WASCLONED */
183 #define RTF_PROTO3 0x40000 /* protocol specific routing flag */
184 #define RTF_FIXEDMTU 0x80000 /* MTU was explicitly specified */
185 #define RTF_PINNED 0x100000 /* route is immutable */
186 #define RTF_LOCAL 0x200000 /* route represents a local address */
187 #define RTF_BROADCAST 0x400000 /* route represents a bcast address */
188 #define RTF_MULTICAST 0x800000 /* route represents a mcast address */
189 /* 0x8000000 and up unassigned */
190 #define RTF_STICKY 0x10000000 /* always route dst->src */
192 #define RTF_RNH_LOCKED 0x40000000 /* unused */
194 #define RTF_GWFLAG_COMPAT 0x80000000 /* a compatibility bit for interacting
195 with existing routing apps */
197 /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */
199 (RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \
200 RTF_REJECT | RTF_STATIC | RTF_STICKY)
203 * fib_ nexthop API flags.
206 /* Consumer-visible nexthop info flags */
207 #define NHF_REJECT 0x0010 /* RTF_REJECT */
208 #define NHF_BLACKHOLE 0x0020 /* RTF_BLACKHOLE */
209 #define NHF_REDIRECT 0x0040 /* RTF_DYNAMIC|RTF_MODIFIED */
210 #define NHF_DEFAULT 0x0080 /* Default route */
211 #define NHF_BROADCAST 0x0100 /* RTF_BROADCAST */
212 #define NHF_GATEWAY 0x0200 /* RTF_GATEWAY */
214 /* Nexthop request flags */
215 #define NHR_IFAIF 0x01 /* Return ifa_ifp interface */
216 #define NHR_REF 0x02 /* For future use */
218 /* Control plane route request flags */
219 #define NHR_COPY 0x100 /* Copy rte data */
222 /* rte<>ro_flags translation */
224 rt_update_ro_flags(struct route *ro)
226 int rt_flags = ro->ro_rt->rt_flags;
228 ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW);
230 ro->ro_flags |= (rt_flags & RTF_REJECT) ? RT_REJECT : 0;
231 ro->ro_flags |= (rt_flags & RTF_BLACKHOLE) ? RT_BLACKHOLE : 0;
232 ro->ro_flags |= (rt_flags & RTF_GATEWAY) ? RT_HAS_GW : 0;
237 * Routing statistics.
240 short rts_badredirect; /* bogus redirect calls */
241 short rts_dynamic; /* routes created by redirects */
242 short rts_newgateway; /* routes modified by redirects */
243 short rts_unreach; /* lookups which failed */
244 short rts_wildcard; /* lookups satisfied by a wildcard */
247 * Structures for routing messages.
250 u_short rtm_msglen; /* to skip over non-understood messages */
251 u_char rtm_version; /* future binary compatibility */
252 u_char rtm_type; /* message type */
253 u_short rtm_index; /* index for associated ifp */
254 int rtm_flags; /* flags, incl. kern & message, e.g. DONE */
255 int rtm_addrs; /* bitmask identifying sockaddrs in msg */
256 pid_t rtm_pid; /* identify sender */
257 int rtm_seq; /* for sender to identify action */
258 int rtm_errno; /* why failed */
259 int rtm_fmask; /* bitmask used in RTM_CHANGE message */
260 u_long rtm_inits; /* which metrics we are initializing */
261 struct rt_metrics rtm_rmx; /* metrics themselves */
264 #define RTM_VERSION 5 /* Up the ante and ignore older versions */
269 #define RTM_ADD 0x1 /* Add Route */
270 #define RTM_DELETE 0x2 /* Delete Route */
271 #define RTM_CHANGE 0x3 /* Change Metrics or flags */
272 #define RTM_GET 0x4 /* Report Metrics */
273 #define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */
274 #define RTM_REDIRECT 0x6 /* Told to use different route */
275 #define RTM_MISS 0x7 /* Lookup failed on this address */
276 #define RTM_LOCK 0x8 /* fix specified metrics */
279 #define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */
280 #define RTM_NEWADDR 0xc /* address being added to iface */
281 #define RTM_DELADDR 0xd /* address being removed from iface */
282 #define RTM_IFINFO 0xe /* iface going up/down etc. */
283 #define RTM_NEWMADDR 0xf /* mcast group membership being added to if */
284 #define RTM_DELMADDR 0x10 /* mcast group membership being deleted */
285 #define RTM_IFANNOUNCE 0x11 /* iface arrival/departure */
286 #define RTM_IEEE80211 0x12 /* IEEE80211 wireless event */
289 * Bitmask values for rtm_inits and rmx_locks.
291 #define RTV_MTU 0x1 /* init or lock _mtu */
292 #define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */
293 #define RTV_EXPIRE 0x4 /* init or lock _expire */
294 #define RTV_RPIPE 0x8 /* init or lock _recvpipe */
295 #define RTV_SPIPE 0x10 /* init or lock _sendpipe */
296 #define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */
297 #define RTV_RTT 0x40 /* init or lock _rtt */
298 #define RTV_RTTVAR 0x80 /* init or lock _rttvar */
299 #define RTV_WEIGHT 0x100 /* init or lock _weight */
302 * Bitmask values for rtm_addrs.
304 #define RTA_DST 0x1 /* destination sockaddr present */
305 #define RTA_GATEWAY 0x2 /* gateway sockaddr present */
306 #define RTA_NETMASK 0x4 /* netmask sockaddr present */
307 #define RTA_GENMASK 0x8 /* cloning mask sockaddr present */
308 #define RTA_IFP 0x10 /* interface name sockaddr present */
309 #define RTA_IFA 0x20 /* interface addr sockaddr present */
310 #define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */
311 #define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */
314 * Index offsets for sockaddr array for alternate internal encoding.
316 #define RTAX_DST 0 /* destination sockaddr present */
317 #define RTAX_GATEWAY 1 /* gateway sockaddr present */
318 #define RTAX_NETMASK 2 /* netmask sockaddr present */
319 #define RTAX_GENMASK 3 /* cloning mask sockaddr present */
320 #define RTAX_IFP 4 /* interface name sockaddr present */
321 #define RTAX_IFA 5 /* interface addr sockaddr present */
322 #define RTAX_AUTHOR 6 /* sockaddr for author of redirect */
323 #define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */
324 #define RTAX_MAX 8 /* size of array to allocate */
326 typedef int rt_filter_f_t(const struct rtentry *, void *);
329 int rti_addrs; /* Route RTF_ flags */
330 int rti_flags; /* Route RTF_ flags */
331 struct sockaddr *rti_info[RTAX_MAX]; /* Sockaddr data */
332 struct ifaddr *rti_ifa; /* value of rt_ifa addr */
333 struct ifnet *rti_ifp; /* route interface */
334 rt_filter_f_t *rti_filter; /* filter function */
335 void *rti_filterdata; /* filter paramenters */
336 u_long rti_mflags; /* metrics RTV_ flags */
337 u_long rti_spare; /* Will be used for fib */
338 struct rt_metrics *rti_rmx; /* Pointer to route metrics */
342 * This macro returns the size of a struct sockaddr when passed
343 * through a routing socket. Basically we round up sa_len to
344 * a multiple of sizeof(long), with a minimum of sizeof(long).
345 * The check for a NULL pointer is just a convenience, probably never used.
346 * The case sa_len == 0 should only apply to empty structures.
348 #define SA_SIZE(sa) \
349 ( (!(sa) || ((struct sockaddr *)(sa))->sa_len == 0) ? \
351 1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(long) - 1) ) )
353 #define sa_equal(a, b) ( \
354 (((const struct sockaddr *)(a))->sa_len == ((const struct sockaddr *)(b))->sa_len) && \
355 (bcmp((a), (b), ((const struct sockaddr *)(b))->sa_len) == 0))
359 #define RT_LINK_IS_UP(ifp) (!((ifp)->if_capabilities & IFCAP_LINKSTATE) \
360 || (ifp)->if_link_state == LINK_STATE_UP)
362 #define RT_LOCK_INIT(_rt) \
363 mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW)
364 #define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx)
365 #define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx)
366 #define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx)
367 #define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED)
368 #define RT_UNLOCK_COND(_rt) do { \
369 if (mtx_owned(&(_rt)->rt_mtx)) \
370 mtx_unlock(&(_rt)->rt_mtx); \
373 #define RT_ADDREF(_rt) do { \
374 RT_LOCK_ASSERT(_rt); \
375 KASSERT((_rt)->rt_refcnt >= 0, \
376 ("negative refcnt %d", (_rt)->rt_refcnt)); \
377 (_rt)->rt_refcnt++; \
380 #define RT_REMREF(_rt) do { \
381 RT_LOCK_ASSERT(_rt); \
382 KASSERT((_rt)->rt_refcnt > 0, \
383 ("bogus refcnt %d", (_rt)->rt_refcnt)); \
384 (_rt)->rt_refcnt--; \
387 #define RTFREE_LOCKED(_rt) do { \
388 if ((_rt)->rt_refcnt <= 1) \
394 /* guard against invalid refs */ \
398 #define RTFREE(_rt) do { \
400 RTFREE_LOCKED(_rt); \
403 #define RO_RTFREE(_ro) do { \
404 if ((_ro)->ro_rt) { \
405 if ((_ro)->ro_flags & RT_NORTREF) { \
406 (_ro)->ro_flags &= ~RT_NORTREF; \
407 (_ro)->ro_rt = NULL; \
408 (_ro)->ro_lle = NULL; \
410 RT_LOCK((_ro)->ro_rt); \
411 RTFREE_LOCKED((_ro)->ro_rt); \
417 * Validate a cached route based on a supplied cookie. If there is an
418 * out-of-date cache, simply free it. Update the generation number
419 * for the new allocation
421 #define RT_VALIDATE(ro, cookiep, fibnum) do { \
422 rt_gen_t cookie = RT_GEN(fibnum, (ro)->ro_dst.sa_family); \
423 if (*(cookiep) != cookie) { \
424 if ((ro)->ro_rt != NULL) { \
425 RTFREE((ro)->ro_rt); \
426 (ro)->ro_rt = NULL; \
428 *(cookiep) = cookie; \
435 void rt_ieee80211msg(struct ifnet *, int, void *, size_t);
436 void rt_ifannouncemsg(struct ifnet *, int);
437 void rt_ifmsg(struct ifnet *);
438 void rt_missmsg(int, struct rt_addrinfo *, int, int);
439 void rt_missmsg_fib(int, struct rt_addrinfo *, int, int, int);
440 void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *);
441 void rt_newaddrmsg_fib(int, struct ifaddr *, int, struct rtentry *, int);
442 int rt_addrmsg(int, struct ifaddr *, int);
443 int rt_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
444 void rt_newmaddrmsg(int, struct ifmultiaddr *);
445 int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
446 void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
447 struct rib_head *rt_table_init(int);
448 void rt_table_destroy(struct rib_head *);
449 u_int rt_tables_get_gen(int table, int fam);
451 int rtsock_addrmsg(int, struct ifaddr *, int);
452 int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
455 * Note the following locking behavior:
457 * rtalloc1() returns a locked rtentry
459 * rtfree() and RTFREE_LOCKED() require a locked rtentry
461 * RTFREE() uses an unlocked entry.
464 void rtfree(struct rtentry *);
465 void rt_updatemtu(struct ifnet *);
467 typedef int rt_walktree_f_t(struct rtentry *, void *);
468 typedef void rt_setwarg_t(struct rib_head *, uint32_t, int, void *);
469 void rt_foreach_fib_walk(int af, rt_setwarg_t *, rt_walktree_f_t *, void *);
470 void rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg);
471 void rt_flushifroutes_af(struct ifnet *, int);
472 void rt_flushifroutes(struct ifnet *ifp);
474 /* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */
475 /* Thes are used by old code not yet converted to use multiple FIBS */
476 struct rtentry *rtalloc1(struct sockaddr *, int, u_long);
477 int rtinit(struct ifaddr *, int, int);
479 /* XXX MRT NEW VERSIONS THAT USE FIBs
480 * For now the protocol indepedent versions are the same as the AF_INET ones
481 * but this will change..
483 int rt_getifa_fib(struct rt_addrinfo *, u_int fibnum);
484 void rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum);
485 struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int);
486 int rtioctl_fib(u_long, caddr_t, u_int);
487 void rtredirect_fib(struct sockaddr *, struct sockaddr *,
488 struct sockaddr *, int, struct sockaddr *, u_int);
489 int rtrequest_fib(int, struct sockaddr *,
490 struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int);
491 int rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int);
492 int rib_lookup_info(uint32_t, const struct sockaddr *, uint32_t, uint32_t,
493 struct rt_addrinfo *);
494 void rib_free_info(struct rt_addrinfo *info);