2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 1982, 1986, 1988, 1993
5 * The Regents of the University of California.
6 * Copyright (c) 2005 Andre Oppermann, Internet Business Solutions AG.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
37 #include "opt_ipstealth.h"
39 #include <sys/param.h>
40 #include <sys/systm.h>
42 #include <sys/domain.h>
43 #include <sys/protosw.h>
44 #include <sys/socket.h>
46 #include <sys/kernel.h>
47 #include <sys/syslog.h>
48 #include <sys/sysctl.h>
51 #include <net/if_types.h>
52 #include <net/if_var.h>
53 #include <net/if_dl.h>
54 #include <net/route.h>
55 #include <net/netisr.h>
58 #include <netinet/in.h>
59 #include <netinet/in_fib.h>
60 #include <netinet/in_systm.h>
61 #include <netinet/in_var.h>
62 #include <netinet/ip.h>
63 #include <netinet/in_pcb.h>
64 #include <netinet/ip_var.h>
65 #include <netinet/ip_options.h>
66 #include <netinet/ip_icmp.h>
67 #include <machine/in_cksum.h>
69 #include <sys/socketvar.h>
71 VNET_DEFINE_STATIC(int, ip_dosourceroute);
72 SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute,
73 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_dosourceroute), 0,
74 "Enable forwarding source routed IP packets");
75 #define V_ip_dosourceroute VNET(ip_dosourceroute)
77 VNET_DEFINE_STATIC(int, ip_acceptsourceroute);
78 SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
79 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_acceptsourceroute), 0,
80 "Enable accepting source routed IP packets");
81 #define V_ip_acceptsourceroute VNET(ip_acceptsourceroute)
83 VNET_DEFINE(int, ip_doopts) = 1; /* 0 = ignore, 1 = process, 2 = reject */
84 SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_VNET | CTLFLAG_RW,
85 &VNET_NAME(ip_doopts), 0, "Enable IP options processing ([LS]SRR, RR, TS)");
87 static void save_rte(struct mbuf *m, u_char *, struct in_addr);
90 * Do option processing on a datagram, possibly discarding it if bad options
91 * are encountered, or forwarding it if source-routed.
93 * The pass argument is used when operating in the IPSTEALTH mode to tell
94 * what options to process: [LS]SRR (pass 0) or the others (pass 1). The
95 * reason for as many as two passes is that when doing IPSTEALTH, non-routing
96 * options should be processed only if the packet is for us.
98 * Returns 1 if packet has been forwarded/freed, 0 if the packet should be
102 ip_dooptions(struct mbuf *m, int pass)
104 struct ip *ip = mtod(m, struct ip *);
106 struct in_ifaddr *ia;
107 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
108 struct in_addr *sin, dst;
110 struct nhop4_extended nh_ext;
111 struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
112 struct epoch_tracker et;
114 /* Ignore or reject packets with IP options. */
115 if (V_ip_doopts == 0)
117 else if (V_ip_doopts == 2) {
119 code = ICMP_UNREACH_FILTER_PROHIB;
125 cp = (u_char *)(ip + 1);
126 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
127 for (; cnt > 0; cnt -= optlen, cp += optlen) {
128 opt = cp[IPOPT_OPTVAL];
129 if (opt == IPOPT_EOL)
131 if (opt == IPOPT_NOP)
134 if (cnt < IPOPT_OLEN + sizeof(*cp)) {
135 code = &cp[IPOPT_OLEN] - (u_char *)ip;
138 optlen = cp[IPOPT_OLEN];
139 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
140 code = &cp[IPOPT_OLEN] - (u_char *)ip;
150 * Source routing with record. Find interface with current
151 * destination address. If none on this machine then drop if
152 * strictly routed, or do nothing if loosely routed. Record
153 * interface address and bring up next address component. If
154 * strictly routed make sure next address is on directly
160 if (V_ipstealth && pass > 0)
163 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
164 code = &cp[IPOPT_OLEN] - (u_char *)ip;
167 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
168 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
171 ipaddr.sin_addr = ip->ip_dst;
172 if (ifa_ifwithaddr_check((struct sockaddr *)&ipaddr)
174 if (opt == IPOPT_SSRR) {
176 code = ICMP_UNREACH_SRCFAIL;
179 if (!V_ip_dosourceroute)
180 goto nosourcerouting;
182 * Loose routing, and not at next destination
183 * yet; nothing to do except forward.
187 off--; /* 0 origin */
188 if (off > optlen - (int)sizeof(struct in_addr)) {
190 * End of source route. Should be for us.
192 if (!V_ip_acceptsourceroute)
193 goto nosourcerouting;
194 save_rte(m, cp, ip->ip_src);
201 if (!V_ip_dosourceroute) {
202 if (V_ipforwarding) {
203 char srcbuf[INET_ADDRSTRLEN];
204 char dstbuf[INET_ADDRSTRLEN];
207 * Acting as a router, so generate
212 "attempted source route from %s "
214 inet_ntoa_r(ip->ip_src, srcbuf),
215 inet_ntoa_r(ip->ip_dst, dstbuf));
217 code = ICMP_UNREACH_SRCFAIL;
221 * Not acting as a router, so
227 IPSTAT_INC(ips_cantforward);
235 * locate outgoing interface
237 (void)memcpy(&ipaddr.sin_addr, cp + off,
238 sizeof(ipaddr.sin_addr));
241 code = ICMP_UNREACH_SRCFAIL;
243 if (opt == IPOPT_SSRR) {
244 #define INA struct in_ifaddr *
245 #define SA struct sockaddr *
246 ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr,
249 ia = (INA)ifa_ifwithnet((SA)&ipaddr, 0,
254 memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
255 sizeof(struct in_addr));
257 /* XXX MRT 0 for routing */
258 if (fib4_lookup_nh_ext(M_GETFIB(m),
259 ipaddr.sin_addr, 0, 0, &nh_ext) != 0)
262 memcpy(cp + off, &nh_ext.nh_src,
263 sizeof(struct in_addr));
266 ip->ip_dst = ipaddr.sin_addr;
267 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
269 * Let ip_intr's mcast routing check handle mcast pkts
271 forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
276 if (V_ipstealth && pass == 0)
279 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
280 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
283 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
284 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
288 * If no space remains, ignore.
290 off--; /* 0 origin */
291 if (off > optlen - (int)sizeof(struct in_addr))
293 (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
294 sizeof(ipaddr.sin_addr));
296 * Locate outgoing interface; if we're the
297 * destination, use the incoming interface (should be
300 if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) != NULL) {
301 memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
302 sizeof(struct in_addr));
303 } else if (fib4_lookup_nh_ext(M_GETFIB(m),
304 ipaddr.sin_addr, 0, 0, &nh_ext) == 0) {
305 memcpy(cp + off, &nh_ext.nh_src,
306 sizeof(struct in_addr));
309 code = ICMP_UNREACH_HOST;
312 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
317 if (V_ipstealth && pass == 0)
320 code = cp - (u_char *)ip;
321 if (optlen < 4 || optlen > 40) {
322 code = &cp[IPOPT_OLEN] - (u_char *)ip;
325 if ((off = cp[IPOPT_OFFSET]) < 5) {
326 code = &cp[IPOPT_OLEN] - (u_char *)ip;
329 if (off > optlen - (int)sizeof(int32_t)) {
330 cp[IPOPT_OFFSET + 1] += (1 << 4);
331 if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
332 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
337 off--; /* 0 origin */
338 sin = (struct in_addr *)(cp + off);
339 switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
341 case IPOPT_TS_TSONLY:
344 case IPOPT_TS_TSANDADDR:
345 if (off + sizeof(uint32_t) +
346 sizeof(struct in_addr) > optlen) {
347 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
350 ipaddr.sin_addr = dst;
351 ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
355 (void)memcpy(sin, &IA_SIN(ia)->sin_addr,
356 sizeof(struct in_addr));
357 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
358 off += sizeof(struct in_addr);
361 case IPOPT_TS_PRESPEC:
362 if (off + sizeof(uint32_t) +
363 sizeof(struct in_addr) > optlen) {
364 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
367 (void)memcpy(&ipaddr.sin_addr, sin,
368 sizeof(struct in_addr));
369 if (ifa_ifwithaddr_check((SA)&ipaddr) == 0)
371 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
372 off += sizeof(struct in_addr);
376 code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
380 (void)memcpy(cp + off, &ntime, sizeof(uint32_t));
381 cp[IPOPT_OFFSET] += sizeof(uint32_t);
385 if (forward && V_ipforwarding) {
393 icmp_error(m, type, code, 0, 0);
394 IPSTAT_INC(ips_badoptions);
399 * Save incoming source route for use in replies, to be picked up later by
400 * ip_srcroute if the receiver is interested.
403 save_rte(struct mbuf *m, u_char *option, struct in_addr dst)
406 struct ipopt_tag *opts;
408 opts = (struct ipopt_tag *)m_tag_get(PACKET_TAG_IPOPTIONS,
409 sizeof(struct ipopt_tag), M_NOWAIT);
413 olen = option[IPOPT_OLEN];
414 if (olen > sizeof(opts->ip_srcrt) - (1 + sizeof(dst))) {
415 m_tag_free((struct m_tag *)opts);
418 bcopy(option, opts->ip_srcrt.srcopt, olen);
419 opts->ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
420 opts->ip_srcrt.dst = dst;
421 m_tag_prepend(m, (struct m_tag *)opts);
425 * Retrieve incoming source route for use in replies, in the same form used
426 * by setsockopt. The first hop is placed before the options, will be
430 ip_srcroute(struct mbuf *m0)
432 struct in_addr *p, *q;
434 struct ipopt_tag *opts;
436 opts = (struct ipopt_tag *)m_tag_find(m0, PACKET_TAG_IPOPTIONS, NULL);
440 if (opts->ip_nhops == 0)
442 m = m_get(M_NOWAIT, MT_DATA);
446 #define OPTSIZ (sizeof(opts->ip_srcrt.nop) + sizeof(opts->ip_srcrt.srcopt))
448 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
449 m->m_len = opts->ip_nhops * sizeof(struct in_addr) +
450 sizeof(struct in_addr) + OPTSIZ;
453 * First, save first hop for return route.
455 p = &(opts->ip_srcrt.route[opts->ip_nhops - 1]);
456 *(mtod(m, struct in_addr *)) = *p--;
459 * Copy option fields and padding (nop) to mbuf.
461 opts->ip_srcrt.nop = IPOPT_NOP;
462 opts->ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
463 (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
464 &(opts->ip_srcrt.nop), OPTSIZ);
465 q = (struct in_addr *)(mtod(m, caddr_t) +
466 sizeof(struct in_addr) + OPTSIZ);
469 * Record return path as an IP source route, reversing the path
470 * (pointers are now aligned).
472 while (p >= opts->ip_srcrt.route) {
476 * Last hop goes to final destination.
478 *q = opts->ip_srcrt.dst;
479 m_tag_delete(m0, (struct m_tag *)opts);
484 * Strip out IP options, at higher level protocol in the kernel.
487 ip_stripoptions(struct mbuf *m)
489 struct ip *ip = mtod(m, struct ip *);
492 olen = (ip->ip_hl << 2) - sizeof(struct ip);
494 if (m->m_flags & M_PKTHDR)
495 m->m_pkthdr.len -= olen;
496 ip->ip_len = htons(ntohs(ip->ip_len) - olen);
497 ip->ip_hl = sizeof(struct ip) >> 2;
499 bcopy((char *)ip + sizeof(struct ip) + olen, (ip + 1),
500 (size_t )(m->m_len - sizeof(struct ip)));
504 * Insert IP options into preformed packet. Adjust IP destination as
505 * required for IP source routing, as indicated by a non-zero in_addr at the
506 * start of the options.
508 * XXX This routine assumes that the packet has no options in place.
511 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
513 struct ipoption *p = mtod(opt, struct ipoption *);
515 struct ip *ip = mtod(m, struct ip *);
518 optlen = opt->m_len - sizeof(p->ipopt_dst);
519 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) {
521 return (m); /* XXX should fail */
523 if (p->ipopt_dst.s_addr)
524 ip->ip_dst = p->ipopt_dst;
525 if (!M_WRITABLE(m) || M_LEADINGSPACE(m) < optlen) {
526 n = m_gethdr(M_NOWAIT, MT_DATA);
532 n->m_pkthdr.rcvif = NULL;
533 n->m_pkthdr.len += optlen;
534 m->m_len -= sizeof(struct ip);
535 m->m_data += sizeof(struct ip);
538 m->m_len = optlen + sizeof(struct ip);
539 m->m_data += max_linkhdr;
540 bcopy(ip, mtod(m, void *), sizeof(struct ip));
544 m->m_pkthdr.len += optlen;
545 bcopy(ip, mtod(m, void *), sizeof(struct ip));
547 ip = mtod(m, struct ip *);
548 bcopy(p->ipopt_list, ip + 1, optlen);
549 *phlen = sizeof(struct ip) + optlen;
550 ip->ip_v = IPVERSION;
551 ip->ip_hl = *phlen >> 2;
552 ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
557 * Copy options from ip to jp, omitting those not copied during
561 ip_optcopy(struct ip *ip, struct ip *jp)
564 int opt, optlen, cnt;
566 cp = (u_char *)(ip + 1);
567 dp = (u_char *)(jp + 1);
568 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
569 for (; cnt > 0; cnt -= optlen, cp += optlen) {
571 if (opt == IPOPT_EOL)
573 if (opt == IPOPT_NOP) {
574 /* Preserve for IP mcast tunnel's LSRR alignment. */
580 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
581 ("ip_optcopy: malformed ipv4 option"));
582 optlen = cp[IPOPT_OLEN];
583 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
584 ("ip_optcopy: malformed ipv4 option"));
586 /* Bogus lengths should have been caught by ip_dooptions. */
589 if (IPOPT_COPIED(opt)) {
590 bcopy(cp, dp, optlen);
594 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
600 * Set up IP options in pcb for insertion in output packets. Store in mbuf
601 * with pointer in pcbopt, adding pseudo-option with destination address if
605 ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
609 struct mbuf **pcbopt;
612 INP_WLOCK_ASSERT(inp);
614 pcbopt = &inp->inp_options;
616 /* turn off any old options */
618 (void)m_free(*pcbopt);
620 if (m == NULL || m->m_len == 0) {
622 * Only turning off any previous options.
629 if (m->m_len % sizeof(int32_t))
632 * IP first-hop destination address will be stored before actual
633 * options; move other options back and clear it when none present.
635 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
638 m->m_len += sizeof(struct in_addr);
639 cp = mtod(m, u_char *) + sizeof(struct in_addr);
640 bcopy(mtod(m, void *), cp, (unsigned)cnt);
641 bzero(mtod(m, void *), sizeof(struct in_addr));
643 for (; cnt > 0; cnt -= optlen, cp += optlen) {
644 opt = cp[IPOPT_OPTVAL];
645 if (opt == IPOPT_EOL)
647 if (opt == IPOPT_NOP)
650 if (cnt < IPOPT_OLEN + sizeof(*cp))
652 optlen = cp[IPOPT_OLEN];
653 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
664 * User process specifies route as:
668 * D must be our final destination (but we can't
669 * check that since we may not have connected yet).
670 * A is first hop destination, which doesn't appear
671 * in actual IP option, but is stored before the
674 /* XXX-BZ PRIV_NETINET_SETHDROPTS? */
675 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
677 m->m_len -= sizeof(struct in_addr);
678 cnt -= sizeof(struct in_addr);
679 optlen -= sizeof(struct in_addr);
680 cp[IPOPT_OLEN] = optlen;
682 * Move first hop before start of options.
684 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
685 sizeof(struct in_addr));
687 * Then copy rest of options back
688 * to close up the deleted entry.
690 bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
692 (unsigned)cnt - (IPOPT_MINOFF - 1));
696 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
707 * Check for the presence of the IP Router Alert option [RFC2113]
708 * in the header of an IPv4 datagram.
710 * This call is not intended for use from the forwarding path; it is here
711 * so that protocol domains may check for the presence of the option.
712 * Given how FreeBSD's IPv4 stack is currently structured, the Router Alert
713 * option does not have much relevance to the implementation, though this
714 * may change in future.
715 * Router alert options SHOULD be passed if running in IPSTEALTH mode and
716 * we are not the endpoint.
717 * Length checks on individual options should already have been performed
718 * by ip_dooptions() therefore they are folded under INVARIANTS here.
720 * Return zero if not present or options are invalid, non-zero if present.
723 ip_checkrouteralert(struct mbuf *m)
725 struct ip *ip = mtod(m, struct ip *);
727 int opt, optlen, cnt, found_ra;
730 cp = (u_char *)(ip + 1);
731 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
732 for (; cnt > 0; cnt -= optlen, cp += optlen) {
733 opt = cp[IPOPT_OPTVAL];
734 if (opt == IPOPT_EOL)
736 if (opt == IPOPT_NOP)
740 if (cnt < IPOPT_OLEN + sizeof(*cp))
743 optlen = cp[IPOPT_OLEN];
745 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
752 if (optlen != IPOPT_OFFSET + sizeof(uint16_t) ||
753 (*((uint16_t *)&cp[IPOPT_OFFSET]) != 0))