2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California.
4 * Copyright (c) 2005 Andre Oppermann, Internet Business Solutions AG.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include "opt_ipstealth.h"
37 #include <sys/param.h>
38 #include <sys/systm.h>
40 #include <sys/domain.h>
41 #include <sys/protosw.h>
42 #include <sys/socket.h>
44 #include <sys/kernel.h>
45 #include <sys/syslog.h>
46 #include <sys/sysctl.h>
49 #include <net/if_types.h>
50 #include <net/if_var.h>
51 #include <net/if_dl.h>
52 #include <net/route.h>
53 #include <net/netisr.h>
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip.h>
60 #include <netinet/in_pcb.h>
61 #include <netinet/ip_var.h>
62 #include <netinet/ip_options.h>
63 #include <netinet/ip_icmp.h>
64 #include <machine/in_cksum.h>
66 #include <sys/socketvar.h>
68 #include <security/mac/mac_framework.h>
70 static int ip_dosourceroute = 0;
71 SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
72 &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
74 static int ip_acceptsourceroute = 0;
75 SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
76 CTLFLAG_RW, &ip_acceptsourceroute, 0,
77 "Enable accepting source routed IP packets");
79 int ip_doopts = 1; /* 0 = ignore, 1 = process, 2 = reject */
80 SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW,
81 &ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)");
83 static void save_rte(struct mbuf *m, u_char *, struct in_addr);
86 * Do option processing on a datagram, possibly discarding it if bad options
87 * are encountered, or forwarding it if source-routed.
89 * The pass argument is used when operating in the IPSTEALTH mode to tell
90 * what options to process: [LS]SRR (pass 0) or the others (pass 1). The
91 * reason for as many as two passes is that when doing IPSTEALTH, non-routing
92 * options should be processed only if the packet is for us.
94 * Returns 1 if packet has been forwarded/freed, 0 if the packet should be
98 ip_dooptions(struct mbuf *m, int pass)
100 struct ip *ip = mtod(m, struct ip *);
102 struct in_ifaddr *ia;
103 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
104 struct in_addr *sin, dst;
106 struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
108 /* Ignore or reject packets with IP options. */
111 else if (ip_doopts == 2) {
113 code = ICMP_UNREACH_FILTER_PROHIB;
118 cp = (u_char *)(ip + 1);
119 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
120 for (; cnt > 0; cnt -= optlen, cp += optlen) {
121 opt = cp[IPOPT_OPTVAL];
122 if (opt == IPOPT_EOL)
124 if (opt == IPOPT_NOP)
127 if (cnt < IPOPT_OLEN + sizeof(*cp)) {
128 code = &cp[IPOPT_OLEN] - (u_char *)ip;
131 optlen = cp[IPOPT_OLEN];
132 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
133 code = &cp[IPOPT_OLEN] - (u_char *)ip;
143 * Source routing with record. Find interface with current
144 * destination address. If none on this machine then drop if
145 * strictly routed, or do nothing if loosely routed. Record
146 * interface address and bring up next address component. If
147 * strictly routed make sure next address is on directly
153 if (V_ipstealth && pass > 0)
156 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
157 code = &cp[IPOPT_OLEN] - (u_char *)ip;
160 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
161 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
164 ipaddr.sin_addr = ip->ip_dst;
165 if (ifa_ifwithaddr_check((struct sockaddr *)&ipaddr)
167 if (opt == IPOPT_SSRR) {
169 code = ICMP_UNREACH_SRCFAIL;
172 if (!ip_dosourceroute)
173 goto nosourcerouting;
175 * Loose routing, and not at next destination
176 * yet; nothing to do except forward.
180 off--; /* 0 origin */
181 if (off > optlen - (int)sizeof(struct in_addr)) {
183 * End of source route. Should be for us.
185 if (!ip_acceptsourceroute)
186 goto nosourcerouting;
187 save_rte(m, cp, ip->ip_src);
194 if (!ip_dosourceroute) {
195 if (V_ipforwarding) {
196 char buf[16]; /* aaa.bbb.ccc.ddd\0 */
198 * Acting as a router, so generate
202 strcpy(buf, inet_ntoa(ip->ip_dst));
204 "attempted source route from %s to %s\n",
205 inet_ntoa(ip->ip_src), buf);
207 code = ICMP_UNREACH_SRCFAIL;
211 * Not acting as a router, so
217 IPSTAT_INC(ips_cantforward);
224 * locate outgoing interface
226 (void)memcpy(&ipaddr.sin_addr, cp + off,
227 sizeof(ipaddr.sin_addr));
229 if (opt == IPOPT_SSRR) {
230 #define INA struct in_ifaddr *
231 #define SA struct sockaddr *
232 if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL)
233 ia = (INA)ifa_ifwithnet((SA)&ipaddr, 0);
235 /* XXX MRT 0 for routing */
236 ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m));
239 code = ICMP_UNREACH_SRCFAIL;
242 ip->ip_dst = ipaddr.sin_addr;
243 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
244 sizeof(struct in_addr));
245 ifa_free(&ia->ia_ifa);
246 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
248 * Let ip_intr's mcast routing check handle mcast pkts
250 forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
255 if (V_ipstealth && pass == 0)
258 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
259 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
262 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
263 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
267 * If no space remains, ignore.
269 off--; /* 0 origin */
270 if (off > optlen - (int)sizeof(struct in_addr))
272 (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
273 sizeof(ipaddr.sin_addr));
275 * Locate outgoing interface; if we're the
276 * destination, use the incoming interface (should be
279 if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL &&
280 (ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m))) == NULL) {
282 code = ICMP_UNREACH_HOST;
285 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
286 sizeof(struct in_addr));
287 ifa_free(&ia->ia_ifa);
288 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
293 if (V_ipstealth && pass == 0)
296 code = cp - (u_char *)ip;
297 if (optlen < 4 || optlen > 40) {
298 code = &cp[IPOPT_OLEN] - (u_char *)ip;
301 if ((off = cp[IPOPT_OFFSET]) < 5) {
302 code = &cp[IPOPT_OLEN] - (u_char *)ip;
305 if (off > optlen - (int)sizeof(int32_t)) {
306 cp[IPOPT_OFFSET + 1] += (1 << 4);
307 if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
308 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
313 off--; /* 0 origin */
314 sin = (struct in_addr *)(cp + off);
315 switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
317 case IPOPT_TS_TSONLY:
320 case IPOPT_TS_TSANDADDR:
321 if (off + sizeof(uint32_t) +
322 sizeof(struct in_addr) > optlen) {
323 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
326 ipaddr.sin_addr = dst;
327 ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
331 (void)memcpy(sin, &IA_SIN(ia)->sin_addr,
332 sizeof(struct in_addr));
333 ifa_free(&ia->ia_ifa);
334 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
335 off += sizeof(struct in_addr);
338 case IPOPT_TS_PRESPEC:
339 if (off + sizeof(uint32_t) +
340 sizeof(struct in_addr) > optlen) {
341 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
344 (void)memcpy(&ipaddr.sin_addr, sin,
345 sizeof(struct in_addr));
346 if (ifa_ifwithaddr_check((SA)&ipaddr) == 0)
348 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
349 off += sizeof(struct in_addr);
353 code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
357 (void)memcpy(cp + off, &ntime, sizeof(uint32_t));
358 cp[IPOPT_OFFSET] += sizeof(uint32_t);
361 if (forward && V_ipforwarding) {
367 icmp_error(m, type, code, 0, 0);
368 IPSTAT_INC(ips_badoptions);
373 * Save incoming source route for use in replies, to be picked up later by
374 * ip_srcroute if the receiver is interested.
377 save_rte(struct mbuf *m, u_char *option, struct in_addr dst)
380 struct ipopt_tag *opts;
382 opts = (struct ipopt_tag *)m_tag_get(PACKET_TAG_IPOPTIONS,
383 sizeof(struct ipopt_tag), M_NOWAIT);
387 olen = option[IPOPT_OLEN];
388 if (olen > sizeof(opts->ip_srcrt) - (1 + sizeof(dst))) {
389 m_tag_free((struct m_tag *)opts);
392 bcopy(option, opts->ip_srcrt.srcopt, olen);
393 opts->ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
394 opts->ip_srcrt.dst = dst;
395 m_tag_prepend(m, (struct m_tag *)opts);
399 * Retrieve incoming source route for use in replies, in the same form used
400 * by setsockopt. The first hop is placed before the options, will be
404 ip_srcroute(struct mbuf *m0)
406 struct in_addr *p, *q;
408 struct ipopt_tag *opts;
410 opts = (struct ipopt_tag *)m_tag_find(m0, PACKET_TAG_IPOPTIONS, NULL);
414 if (opts->ip_nhops == 0)
416 m = m_get(M_DONTWAIT, MT_DATA);
420 #define OPTSIZ (sizeof(opts->ip_srcrt.nop) + sizeof(opts->ip_srcrt.srcopt))
422 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
423 m->m_len = opts->ip_nhops * sizeof(struct in_addr) +
424 sizeof(struct in_addr) + OPTSIZ;
427 * First, save first hop for return route.
429 p = &(opts->ip_srcrt.route[opts->ip_nhops - 1]);
430 *(mtod(m, struct in_addr *)) = *p--;
433 * Copy option fields and padding (nop) to mbuf.
435 opts->ip_srcrt.nop = IPOPT_NOP;
436 opts->ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
437 (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
438 &(opts->ip_srcrt.nop), OPTSIZ);
439 q = (struct in_addr *)(mtod(m, caddr_t) +
440 sizeof(struct in_addr) + OPTSIZ);
443 * Record return path as an IP source route, reversing the path
444 * (pointers are now aligned).
446 while (p >= opts->ip_srcrt.route) {
450 * Last hop goes to final destination.
452 *q = opts->ip_srcrt.dst;
453 m_tag_delete(m0, (struct m_tag *)opts);
458 * Strip out IP options, at higher level protocol in the kernel. Second
459 * argument is buffer to which options will be moved, and return value is
462 * XXX should be deleted; last arg currently ignored.
465 ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
468 struct ip *ip = mtod(m, struct ip *);
472 olen = (ip->ip_hl << 2) - sizeof (struct ip);
473 opts = (caddr_t)(ip + 1);
474 i = m->m_len - (sizeof (struct ip) + olen);
475 bcopy(opts + olen, opts, (unsigned)i);
477 if (m->m_flags & M_PKTHDR)
478 m->m_pkthdr.len -= olen;
479 ip->ip_v = IPVERSION;
480 ip->ip_hl = sizeof(struct ip) >> 2;
484 * Insert IP options into preformed packet. Adjust IP destination as
485 * required for IP source routing, as indicated by a non-zero in_addr at the
486 * start of the options.
488 * XXX This routine assumes that the packet has no options in place.
491 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
493 struct ipoption *p = mtod(opt, struct ipoption *);
495 struct ip *ip = mtod(m, struct ip *);
498 optlen = opt->m_len - sizeof(p->ipopt_dst);
499 if (optlen + ip->ip_len > IP_MAXPACKET) {
501 return (m); /* XXX should fail */
503 if (p->ipopt_dst.s_addr)
504 ip->ip_dst = p->ipopt_dst;
505 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
506 MGETHDR(n, M_DONTWAIT, MT_DATA);
512 n->m_pkthdr.rcvif = NULL;
513 n->m_pkthdr.len += optlen;
514 m->m_len -= sizeof(struct ip);
515 m->m_data += sizeof(struct ip);
518 m->m_len = optlen + sizeof(struct ip);
519 m->m_data += max_linkhdr;
520 bcopy(ip, mtod(m, void *), sizeof(struct ip));
524 m->m_pkthdr.len += optlen;
525 bcopy(ip, mtod(m, void *), sizeof(struct ip));
527 ip = mtod(m, struct ip *);
528 bcopy(p->ipopt_list, ip + 1, optlen);
529 *phlen = sizeof(struct ip) + optlen;
530 ip->ip_v = IPVERSION;
531 ip->ip_hl = *phlen >> 2;
532 ip->ip_len += optlen;
537 * Copy options from ip to jp, omitting those not copied during
541 ip_optcopy(struct ip *ip, struct ip *jp)
544 int opt, optlen, cnt;
546 cp = (u_char *)(ip + 1);
547 dp = (u_char *)(jp + 1);
548 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
549 for (; cnt > 0; cnt -= optlen, cp += optlen) {
551 if (opt == IPOPT_EOL)
553 if (opt == IPOPT_NOP) {
554 /* Preserve for IP mcast tunnel's LSRR alignment. */
560 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
561 ("ip_optcopy: malformed ipv4 option"));
562 optlen = cp[IPOPT_OLEN];
563 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
564 ("ip_optcopy: malformed ipv4 option"));
566 /* Bogus lengths should have been caught by ip_dooptions. */
569 if (IPOPT_COPIED(opt)) {
570 bcopy(cp, dp, optlen);
574 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
580 * Set up IP options in pcb for insertion in output packets. Store in mbuf
581 * with pointer in pcbopt, adding pseudo-option with destination address if
585 ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
589 struct mbuf **pcbopt;
592 INP_WLOCK_ASSERT(inp);
594 pcbopt = &inp->inp_options;
596 /* turn off any old options */
598 (void)m_free(*pcbopt);
600 if (m == NULL || m->m_len == 0) {
602 * Only turning off any previous options.
609 if (m->m_len % sizeof(int32_t))
612 * IP first-hop destination address will be stored before actual
613 * options; move other options back and clear it when none present.
615 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
618 m->m_len += sizeof(struct in_addr);
619 cp = mtod(m, u_char *) + sizeof(struct in_addr);
620 bcopy(mtod(m, void *), cp, (unsigned)cnt);
621 bzero(mtod(m, void *), sizeof(struct in_addr));
623 for (; cnt > 0; cnt -= optlen, cp += optlen) {
624 opt = cp[IPOPT_OPTVAL];
625 if (opt == IPOPT_EOL)
627 if (opt == IPOPT_NOP)
630 if (cnt < IPOPT_OLEN + sizeof(*cp))
632 optlen = cp[IPOPT_OLEN];
633 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
644 * User process specifies route as:
648 * D must be our final destination (but we can't
649 * check that since we may not have connected yet).
650 * A is first hop destination, which doesn't appear
651 * in actual IP option, but is stored before the
654 /* XXX-BZ PRIV_NETINET_SETHDROPTS? */
655 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
657 m->m_len -= sizeof(struct in_addr);
658 cnt -= sizeof(struct in_addr);
659 optlen -= sizeof(struct in_addr);
660 cp[IPOPT_OLEN] = optlen;
662 * Move first hop before start of options.
664 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
665 sizeof(struct in_addr));
667 * Then copy rest of options back
668 * to close up the deleted entry.
670 bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
672 (unsigned)cnt - (IPOPT_MINOFF - 1));
676 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
687 * Check for the presence of the IP Router Alert option [RFC2113]
688 * in the header of an IPv4 datagram.
690 * This call is not intended for use from the forwarding path; it is here
691 * so that protocol domains may check for the presence of the option.
692 * Given how FreeBSD's IPv4 stack is currently structured, the Router Alert
693 * option does not have much relevance to the implementation, though this
694 * may change in future.
695 * Router alert options SHOULD be passed if running in IPSTEALTH mode and
696 * we are not the endpoint.
697 * Length checks on individual options should already have been peformed
698 * by ip_dooptions() therefore they are folded under INVARIANTS here.
700 * Return zero if not present or options are invalid, non-zero if present.
703 ip_checkrouteralert(struct mbuf *m)
705 struct ip *ip = mtod(m, struct ip *);
707 int opt, optlen, cnt, found_ra;
710 cp = (u_char *)(ip + 1);
711 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
712 for (; cnt > 0; cnt -= optlen, cp += optlen) {
713 opt = cp[IPOPT_OPTVAL];
714 if (opt == IPOPT_EOL)
716 if (opt == IPOPT_NOP)
720 if (cnt < IPOPT_OLEN + sizeof(*cp))
723 optlen = cp[IPOPT_OLEN];
725 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
732 if (optlen != IPOPT_OFFSET + sizeof(uint16_t) ||
733 (*((uint16_t *)&cp[IPOPT_OFFSET]) != 0))