2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2015-2019 Yandex LLC
5 * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include "opt_ipstealth.h"
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/counter.h>
37 #include <sys/errno.h>
38 #include <sys/kernel.h>
41 #include <sys/module.h>
42 #include <sys/rmlock.h>
43 #include <sys/rwlock.h>
44 #include <sys/socket.h>
45 #include <sys/queue.h>
48 #include <net/if_var.h>
49 #include <net/if_pflog.h>
51 #include <net/netisr.h>
52 #include <net/route.h>
53 #include <net/route/nhop.h>
55 #include <netinet/in.h>
56 #include <netinet/in_fib.h>
57 #include <netinet/in_var.h>
58 #include <netinet/ip.h>
59 #include <netinet/ip_var.h>
60 #include <netinet/ip_fw.h>
61 #include <netinet/ip6.h>
62 #include <netinet/icmp6.h>
63 #include <netinet/ip_icmp.h>
64 #include <netinet/tcp.h>
65 #include <netinet/udp.h>
66 #include <netinet6/in6_var.h>
67 #include <netinet6/in6_fib.h>
68 #include <netinet6/ip6_var.h>
69 #include <netinet6/ip_fw_nat64.h>
71 #include <netpfil/pf/pf.h>
72 #include <netpfil/ipfw/ip_fw_private.h>
73 #include <machine/in_cksum.h>
75 #include "ip_fw_nat64.h"
76 #include "nat64_translate.h"
79 typedef int (*nat64_output_t)(struct ifnet *, struct mbuf *,
80 struct sockaddr *, struct nat64_counters *, void *);
81 typedef int (*nat64_output_one_t)(struct mbuf *, struct nat64_counters *,
84 static struct nhop_object *nat64_find_route4(struct sockaddr_in *,
86 static struct nhop_object *nat64_find_route6(struct sockaddr_in6 *,
88 static int nat64_output_one(struct mbuf *, struct nat64_counters *, void *);
89 static int nat64_output(struct ifnet *, struct mbuf *, struct sockaddr *,
90 struct nat64_counters *, void *);
91 static int nat64_direct_output_one(struct mbuf *, struct nat64_counters *,
93 static int nat64_direct_output(struct ifnet *, struct mbuf *,
94 struct sockaddr *, struct nat64_counters *, void *);
96 struct nat64_methods {
97 nat64_output_t output;
98 nat64_output_one_t output_one;
100 static const struct nat64_methods nat64_netisr = {
101 .output = nat64_output,
102 .output_one = nat64_output_one
104 static const struct nat64_methods nat64_direct = {
105 .output = nat64_direct_output,
106 .output_one = nat64_direct_output_one
109 /* These variables should be initialized explicitly on module loading */
110 VNET_DEFINE_STATIC(const struct nat64_methods *, nat64out);
111 VNET_DEFINE_STATIC(const int *, nat64ipstealth);
112 VNET_DEFINE_STATIC(const int *, nat64ip6stealth);
113 #define V_nat64out VNET(nat64out)
114 #define V_nat64ipstealth VNET(nat64ipstealth)
115 #define V_nat64ip6stealth VNET(nat64ip6stealth)
117 static const int stealth_on = 1;
119 static const int stealth_off = 0;
123 nat64_set_output_method(int direct)
127 V_nat64out = &nat64_direct;
129 /* Honor corresponding variables, if IPSTEALTH is defined */
130 V_nat64ipstealth = &V_ipstealth;
131 V_nat64ip6stealth = &V_ip6stealth;
133 /* otherwise we need to decrement HLIM/TTL for direct case */
134 V_nat64ipstealth = V_nat64ip6stealth = &stealth_off;
137 V_nat64out = &nat64_netisr;
138 /* Leave TTL/HLIM decrementing to forwarding code */
139 V_nat64ipstealth = V_nat64ip6stealth = &stealth_on;
144 nat64_get_output_method(void)
147 return (V_nat64out == &nat64_direct ? 1: 0);
151 nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)
154 logdata->dir = PF_OUT;
155 logdata->af = family;
156 ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);
160 nat64_direct_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
161 struct nat64_counters *stats, void *logdata)
166 nat64_log(logdata, m, dst->sa_family);
167 error = (*ifp->if_output)(ifp, m, dst, NULL);
169 NAT64STAT_INC(stats, oerrors);
174 nat64_direct_output_one(struct mbuf *m, struct nat64_counters *stats,
177 struct nhop_object *nh4 = NULL;
178 struct nhop_object *nh6 = NULL;
179 struct sockaddr_in6 dst6;
180 struct sockaddr_in dst4;
181 struct sockaddr *dst;
187 ip4 = mtod(m, struct ip *);
191 dst4.sin_addr = ip4->ip_dst;
192 nh4 = nat64_find_route4(&dst4, m);
194 NAT64STAT_INC(stats, noroute4);
195 error = EHOSTUNREACH;
198 dst = (struct sockaddr *)&dst4;
201 case (IPV6_VERSION >> 4):
202 ip6 = mtod(m, struct ip6_hdr *);
203 dst6.sin6_addr = ip6->ip6_dst;
204 nh6 = nat64_find_route6(&dst6, m);
206 NAT64STAT_INC(stats, noroute6);
207 error = EHOSTUNREACH;
210 dst = (struct sockaddr *)&dst6;
215 NAT64STAT_INC(stats, dropped);
216 DPRINTF(DP_DROPS, "dropped due to unknown IP version");
217 return (EAFNOSUPPORT);
221 return (EHOSTUNREACH);
224 nat64_log(logdata, m, dst->sa_family);
225 error = (*ifp->if_output)(ifp, m, dst, NULL);
227 NAT64STAT_INC(stats, oerrors);
232 nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
233 struct nat64_counters *stats, void *logdata)
238 ip4 = mtod(m, struct ip *);
244 case (IPV6_VERSION >> 4):
250 NAT64STAT_INC(stats, dropped);
251 DPRINTF(DP_DROPS, "unknown IP version");
252 return (EAFNOSUPPORT);
255 nat64_log(logdata, m, af);
256 if (m->m_pkthdr.rcvif == NULL)
257 m->m_pkthdr.rcvif = V_loif;
258 ret = netisr_queue(ret, m);
260 NAT64STAT_INC(stats, oerrors);
265 nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata)
268 return (nat64_output(NULL, m, NULL, stats, logdata));
272 * Check the given IPv6 prefix and length according to RFC6052:
273 * The prefixes can only have one of the following lengths:
274 * 32, 40, 48, 56, 64, or 96 (The Well-Known Prefix is 96 bits long).
275 * Returns zero on success, otherwise EINVAL.
278 nat64_check_prefixlen(int length)
294 nat64_check_prefix6(const struct in6_addr *prefix, int length)
297 if (nat64_check_prefixlen(length) != 0)
300 /* Well-known prefix has 96 prefix length */
301 if (IN6_IS_ADDR_WKPFX(prefix) && length != 96)
304 /* Bits 64 to 71 must be set to zero */
305 if (prefix->__u6_addr.__u6_addr8[8] != 0)
308 /* Some extra checks */
309 if (IN6_IS_ADDR_MULTICAST(prefix) ||
310 IN6_IS_ADDR_UNSPECIFIED(prefix) ||
311 IN6_IS_ADDR_LOOPBACK(prefix))
317 nat64_check_private_ip4(const struct nat64_config *cfg, in_addr_t ia)
320 if (cfg->flags & NAT64_ALLOW_PRIVATE)
323 /* WKPFX must not be used to represent non-global IPv4 addresses */
324 if (cfg->flags & NAT64_WKPFX) {
326 if ((ia & htonl(0xff000000)) == htonl(0x0a000000) ||
327 (ia & htonl(0xfff00000)) == htonl(0xac100000) ||
328 (ia & htonl(0xffff0000)) == htonl(0xc0a80000))
332 * 192.0.0.0/24 - reserved for IETF protocol assignments
333 * 192.88.99.0/24 - for use as 6to4 relay anycast addresses
334 * 198.18.0.0/15 - for use in benchmark tests
335 * 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use
336 * in documentation and example code
338 if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) ||
339 (ia & htonl(0xffffff00)) == htonl(0xc0586300) ||
340 (ia & htonl(0xfffffe00)) == htonl(0xc6120000) ||
341 (ia & htonl(0xffffff00)) == htonl(0xc0000200) ||
342 (ia & htonl(0xfffffe00)) == htonl(0xc6336400) ||
343 (ia & htonl(0xffffff00)) == htonl(0xcb007100))
350 * Embed @ia IPv4 address into @ip6 IPv6 address.
351 * Place to embedding determined from prefix length @plen.
354 nat64_embed_ip4(struct in6_addr *ip6, int plen, in_addr_t ia)
360 ip6->s6_addr32[plen / 32] = ia;
366 * Preserve prefix bits.
367 * Since suffix bits should be zero and reserved for future
368 * use, we just overwrite the whole word, where they are.
370 ip6->s6_addr32[1] &= 0xffffffff << (32 - plen % 32);
371 #if BYTE_ORDER == BIG_ENDIAN
372 ip6->s6_addr32[1] |= ia >> (plen % 32);
373 ip6->s6_addr32[2] = ia << (24 - plen % 32);
374 #elif BYTE_ORDER == LITTLE_ENDIAN
375 ip6->s6_addr32[1] |= ia << (plen % 32);
376 ip6->s6_addr32[2] = ia >> (24 - plen % 32);
380 #if BYTE_ORDER == BIG_ENDIAN
381 ip6->s6_addr32[2] = ia >> 8;
382 ip6->s6_addr32[3] = ia << 24;
383 #elif BYTE_ORDER == LITTLE_ENDIAN
384 ip6->s6_addr32[2] = ia << 8;
385 ip6->s6_addr32[3] = ia >> 24;
389 panic("Wrong plen: %d", plen);
392 * Bits 64 to 71 of the address are reserved for compatibility
393 * with the host identifier format defined in the IPv6 addressing
394 * architecture [RFC4291]. These bits MUST be set to zero.
396 ip6->s6_addr8[8] = 0;
400 nat64_extract_ip4(const struct in6_addr *ip6, int plen)
405 * According to RFC 6052 p2.2:
406 * IPv4-embedded IPv6 addresses are composed of a variable-length
407 * prefix, the embedded IPv4 address, and a variable length suffix.
408 * The suffix bits are reserved for future extensions and SHOULD
413 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)
417 if (ip6->s6_addr32[3] != 0 ||
418 (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)
422 if (ip6->s6_addr32[3] != 0 ||
423 (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)
427 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)
431 if (ip6->s6_addr8[8] != 0 ||
432 (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)
438 ia = ip6->s6_addr32[plen / 32];
443 #if BYTE_ORDER == BIG_ENDIAN
444 ia = (ip6->s6_addr32[1] << (plen % 32)) |
445 (ip6->s6_addr32[2] >> (24 - plen % 32));
446 #elif BYTE_ORDER == LITTLE_ENDIAN
447 ia = (ip6->s6_addr32[1] >> (plen % 32)) |
448 (ip6->s6_addr32[2] << (24 - plen % 32));
452 #if BYTE_ORDER == BIG_ENDIAN
453 ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);
454 #elif BYTE_ORDER == LITTLE_ENDIAN
455 ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);
461 if (nat64_check_ip4(ia) == 0)
464 DPRINTF(DP_GENERIC | DP_DROPS,
465 "invalid destination address: %08x", ia);
468 DPRINTF(DP_GENERIC | DP_DROPS, "invalid IPv4-embedded IPv6 address");
473 * According to RFC 1624 the equation for incremental checksum update is:
474 * HC' = ~(~HC + ~m + m') -- [Eqn. 3]
475 * HC' = HC - ~m - m' -- [Eqn. 4]
476 * So, when we are replacing IPv4 addresses to IPv6, we
477 * can assume, that new bytes previously were zeros, and vise versa -
478 * when we replacing IPv6 addresses to IPv4, now unused bytes become
479 * zeros. The payload length in pseudo header has bigger size, but one
480 * half of it should be zero. Using the equation 4 we get:
481 * HC' = HC - (~m0 + m0') -- m0 is first changed word
482 * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word
483 * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =
484 * = HC - sum(~m[i] + m'[i])
486 * The function result should be used as follows:
487 * IPv6 to IPv4: HC' = cksum_add(HC, result)
488 * IPv4 to IPv6: HC' = cksum_add(HC, ~result)
491 nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)
496 sum = ~ip->ip_src.s_addr >> 16;
497 sum += ~ip->ip_src.s_addr & 0xffff;
498 sum += ~ip->ip_dst.s_addr >> 16;
499 sum += ~ip->ip_dst.s_addr & 0xffff;
501 for (p = (uint16_t *)&ip6->ip6_src;
502 p < (uint16_t *)(&ip6->ip6_src + 2); p++)
506 sum = (sum & 0xffff) + (sum >> 16);
511 nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,
512 uint16_t plen, uint8_t proto, struct ip *ip)
515 /* assume addresses are already initialized */
516 ip->ip_v = IPVERSION;
517 ip->ip_hl = sizeof(*ip) >> 2;
518 ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
519 ip->ip_len = htons(sizeof(*ip) + plen);
520 ip->ip_ttl = ip6->ip6_hlim;
521 if (*V_nat64ip6stealth == 0)
522 ip->ip_ttl -= IPV6_HLIMDEC;
524 ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;
527 ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);
528 if (frag->ip6f_offlg & IP6F_MORE_FRAG)
529 ip->ip_off |= htons(IP_MF);
531 ip->ip_off = htons(IP_DF);
533 ip->ip_sum = in_cksum_hdr(ip);
536 #define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))
537 static NAT64NOINLINE int
538 nat64_fragment6(struct nat64_counters *stats, struct ip6_hdr *ip6,
539 struct mbufq *mq, struct mbuf *m, uint32_t mtu, uint16_t ip_id,
542 struct ip6_frag ip6f;
544 uint16_t hlen, len, offset;
547 plen = ntohs(ip6->ip6_plen);
548 hlen = sizeof(struct ip6_hdr);
550 /* Fragmentation isn't needed */
551 if (ip_off == 0 && plen <= mtu - hlen) {
552 M_PREPEND(m, hlen, M_NOWAIT);
554 NAT64STAT_INC(stats, nomem);
557 bcopy(ip6, mtod(m, void *), hlen);
558 if (mbufq_enqueue(mq, m) != 0) {
560 NAT64STAT_INC(stats, dropped);
561 DPRINTF(DP_DROPS, "dropped due to mbufq overflow");
567 hlen += sizeof(struct ip6_frag);
568 ip6f.ip6f_reserved = 0;
569 ip6f.ip6f_nxt = ip6->ip6_nxt;
570 ip6->ip6_nxt = IPPROTO_FRAGMENT;
573 * We have got an IPv4 fragment.
574 * Use offset value and ip_id from original fragment.
576 ip6f.ip6f_ident = htonl(ntohs(ip_id));
577 offset = (ntohs(ip_off) & IP_OFFMASK) << 3;
578 NAT64STAT_INC(stats, ifrags);
580 /* The packet size exceeds interface MTU */
581 ip6f.ip6f_ident = htonl(ip6_randomid());
582 offset = 0; /* First fragment*/
584 while (plen > 0 && m != NULL) {
586 len = FRAGSZ(mtu) & ~7;
589 ip6->ip6_plen = htons(len + sizeof(ip6f));
590 ip6f.ip6f_offlg = ntohs(offset);
591 if (len < plen || (ip_off & htons(IP_MF)) != 0)
592 ip6f.ip6f_offlg |= IP6F_MORE_FRAG;
596 n = m_split(m, len, M_NOWAIT);
600 M_PREPEND(m, hlen, M_NOWAIT);
603 bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));
604 bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),
605 sizeof(struct ip6_frag));
606 if (mbufq_enqueue(mq, m) != 0)
610 NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));
618 NAT64STAT_INC(stats, nomem);
622 static struct nhop_object *
623 nat64_find_route6(struct sockaddr_in6 *dst, struct mbuf *m)
625 struct nhop_object *nh;
627 nh = fib6_lookup(M_GETFIB(m), &dst->sin6_addr, 0, 0, 0);
630 if (nh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT))
633 * XXX: we need to use destination address with embedded scope
634 * zone id, because LLTABLE uses such form of addresses for lookup.
636 dst->sin6_family = AF_INET6;
637 dst->sin6_len = sizeof(*dst);
638 dst->sin6_addr = ifatoia6(nh->nh_ifa)->ia_addr.sin6_addr;
639 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
640 dst->sin6_addr.s6_addr16[1] =
641 htons(nh->nh_ifp->if_index & 0xffff);
643 dst->sin6_scope_id = 0;
644 dst->sin6_flowinfo = 0;
649 #define NAT64_ICMP6_PLEN 64
650 static NAT64NOINLINE void
651 nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,
652 struct nat64_counters *stats, void *logdata)
654 struct icmp6_hdr *icmp6;
655 struct ip6_hdr *ip6, *oip6;
657 int len, plen, proto;
660 proto = nat64_getlasthdr(m, &len);
662 DPRINTF(DP_DROPS, "mbuf isn't contigious");
666 * Do not send ICMPv6 in reply to ICMPv6 errors.
668 if (proto == IPPROTO_ICMPV6) {
669 if (m->m_len < len + sizeof(*icmp6)) {
670 DPRINTF(DP_DROPS, "mbuf isn't contigious");
673 icmp6 = mtodo(m, len);
674 if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST ||
675 icmp6->icmp6_type == ND_REDIRECT) {
676 DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to "
681 * If there are extra headers between IPv6 and ICMPv6,
684 if (len > sizeof(struct ip6_hdr)) {
686 * NOTE: ipfw_chk already did m_pullup() and it is
687 * expected that data is contigious from the start
688 * of IPv6 header up to the end of ICMPv6 header.
690 bcopy(mtod(m, caddr_t),
691 mtodo(m, len - sizeof(struct ip6_hdr)),
692 sizeof(struct ip6_hdr));
693 m_adj(m, len - sizeof(struct ip6_hdr));
697 if (icmp6_ratelimit(&ip6->ip6_src, type, code))
700 ip6 = mtod(m, struct ip6_hdr *);
702 case ICMP6_DST_UNREACH:
703 case ICMP6_PACKET_TOO_BIG:
704 case ICMP6_TIME_EXCEEDED:
705 case ICMP6_PARAM_PROB:
710 /* Calculate length of ICMPv6 payload */
711 len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:
714 /* Create new ICMPv6 datagram */
715 plen = len + sizeof(struct icmp6_hdr);
716 n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,
717 MT_HEADER, M_PKTHDR);
719 NAT64STAT_INC(stats, nomem);
724 * Move pkthdr from original mbuf. We should have initialized some
725 * fields, because we can reinject this mbuf to netisr and it will
726 * go trough input path (it requires at least rcvif should be set).
727 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf
728 * in the chain, when we will do M_PREPEND() or make some type of
732 M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);
734 n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
735 oip6 = mtod(n, struct ip6_hdr *);
737 * Make IPv6 source address selection for reflected datagram.
738 * nat64_check_ip6() doesn't allow scoped addresses, therefore
739 * we use zero scopeid.
741 if (in6_selectsrc_addr(M_GETFIB(n), &ip6->ip6_src, 0,
742 n->m_pkthdr.rcvif, &oip6->ip6_src, NULL) != 0) {
744 * Failed to find proper source address, drop the packet.
749 oip6->ip6_dst = ip6->ip6_src;
750 oip6->ip6_nxt = IPPROTO_ICMPV6;
752 oip6->ip6_vfc |= IPV6_VERSION;
753 oip6->ip6_hlim = V_ip6_defhlim;
754 oip6->ip6_plen = htons(plen);
756 icmp6 = mtodo(n, sizeof(struct ip6_hdr));
757 icmp6->icmp6_cksum = 0;
758 icmp6->icmp6_type = type;
759 icmp6->icmp6_code = code;
760 icmp6->icmp6_mtu = htonl(mtu);
762 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +
763 sizeof(struct icmp6_hdr)));
764 icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,
765 sizeof(struct ip6_hdr), plen);
767 V_nat64out->output_one(n, stats, logdata);
770 NAT64STAT_INC(stats, dropped);
774 static struct nhop_object *
775 nat64_find_route4(struct sockaddr_in *dst, struct mbuf *m)
777 struct nhop_object *nh;
780 nh = fib4_lookup(M_GETFIB(m), dst->sin_addr, 0, 0, 0);
783 if (nh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT))
786 dst->sin_family = AF_INET;
787 dst->sin_len = sizeof(*dst);
788 dst->sin_addr = IA_SIN(nh->nh_ifa)->sin_addr;
793 #define NAT64_ICMP_PLEN 64
794 static NAT64NOINLINE void
795 nat64_icmp_reflect(struct mbuf *m, uint8_t type,
796 uint8_t code, uint16_t mtu, struct nat64_counters *stats, void *logdata)
803 ip = mtod(m, struct ip *);
804 /* Do not send ICMP error if packet is not the first fragment */
805 if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) {
806 DPRINTF(DP_DROPS, "not first fragment");
809 /* Do not send ICMP in reply to ICMP errors */
810 if (ip->ip_p == IPPROTO_ICMP) {
811 if (m->m_len < (ip->ip_hl << 2)) {
812 DPRINTF(DP_DROPS, "mbuf isn't contigious");
815 icmp = mtodo(m, ip->ip_hl << 2);
816 if (!ICMP_INFOTYPE(icmp->icmp_type)) {
817 DPRINTF(DP_DROPS, "do not send ICMP in reply to "
830 /* Calculate length of ICMP payload */
831 len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:
834 /* Create new ICMPv4 datagram */
835 plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);
836 n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,
837 MT_HEADER, M_PKTHDR);
839 NAT64STAT_INC(stats, nomem);
844 M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);
846 n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;
847 oip = mtod(n, struct ip *);
848 oip->ip_v = IPVERSION;
849 oip->ip_hl = sizeof(struct ip) >> 2;
851 oip->ip_len = htons(n->m_pkthdr.len);
852 oip->ip_ttl = V_ip_defttl;
853 oip->ip_p = IPPROTO_ICMP;
855 oip->ip_off = htons(IP_DF);
856 oip->ip_src = ip->ip_dst;
857 oip->ip_dst = ip->ip_src;
859 oip->ip_sum = in_cksum_hdr(oip);
861 icmp = mtodo(n, sizeof(struct ip));
862 icmp->icmp_type = type;
863 icmp->icmp_code = code;
864 icmp->icmp_cksum = 0;
865 icmp->icmp_pmvoid = 0;
866 icmp->icmp_nextmtu = htons(mtu);
867 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +
868 sizeof(struct icmphdr) + sizeof(uint32_t)));
869 icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,
872 V_nat64out->output_one(n, stats, logdata);
875 NAT64STAT_INC(stats, dropped);
879 /* Translate ICMP echo request/reply into ICMPv6 */
881 nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,
882 uint16_t id, uint8_t type)
886 old = *(uint16_t *)icmp6; /* save type+code in one word */
887 icmp6->icmp6_type = type;
888 /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */
889 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
890 old, *(uint16_t *)icmp6);
892 old = icmp6->icmp6_id;
893 icmp6->icmp6_id = id;
894 /* Reflect ICMP id translation in the cksum */
895 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
898 /* Reflect IPv6 pseudo header in the cksum */
899 icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
900 IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);
903 static NAT64NOINLINE struct mbuf *
904 nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,
905 int offset, struct nat64_config *cfg)
911 struct ip6_hdr *eip6;
917 if (m->m_len < offset + ICMP_MINLEN)
918 m = m_pullup(m, offset + ICMP_MINLEN);
920 NAT64STAT_INC(&cfg->stats, nomem);
924 icmp = mtodo(m, offset);
926 switch (icmp->icmp_type) {
928 type = ICMP6_ECHO_REPLY;
932 type = ICMP6_DST_UNREACH;
933 switch (icmp->icmp_code) {
934 case ICMP_UNREACH_NET:
935 case ICMP_UNREACH_HOST:
936 case ICMP_UNREACH_SRCFAIL:
937 case ICMP_UNREACH_NET_UNKNOWN:
938 case ICMP_UNREACH_HOST_UNKNOWN:
939 case ICMP_UNREACH_TOSNET:
940 case ICMP_UNREACH_TOSHOST:
941 code = ICMP6_DST_UNREACH_NOROUTE;
943 case ICMP_UNREACH_PROTOCOL:
944 type = ICMP6_PARAM_PROB;
945 code = ICMP6_PARAMPROB_NEXTHEADER;
947 case ICMP_UNREACH_PORT:
948 code = ICMP6_DST_UNREACH_NOPORT;
950 case ICMP_UNREACH_NEEDFRAG:
951 type = ICMP6_PACKET_TOO_BIG;
953 /* XXX: needs an additional look */
954 mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20);
956 case ICMP_UNREACH_NET_PROHIB:
957 case ICMP_UNREACH_HOST_PROHIB:
958 case ICMP_UNREACH_FILTER_PROHIB:
959 case ICMP_UNREACH_PRECEDENCE_CUTOFF:
960 code = ICMP6_DST_UNREACH_ADMIN;
963 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
964 icmp->icmp_type, icmp->icmp_code);
969 type = ICMP6_TIME_EXCEEDED;
970 code = icmp->icmp_code;
973 type = ICMP6_ECHO_REQUEST;
977 type = ICMP6_PARAM_PROB;
978 switch (icmp->icmp_code) {
979 case ICMP_PARAMPROB_ERRATPTR:
980 case ICMP_PARAMPROB_LENGTH:
981 code = ICMP6_PARAMPROB_HEADER;
982 switch (icmp->icmp_pptr) {
983 case 0: /* Version/IHL */
984 case 1: /* Type Of Service */
985 mtu = icmp->icmp_pptr;
987 case 2: /* Total Length */
988 case 3: mtu = 4; /* Payload Length */
990 case 8: /* Time to Live */
991 mtu = 7; /* Hop Limit */
993 case 9: /* Protocol */
994 mtu = 6; /* Next Header */
996 case 12: /* Source address */
1002 case 16: /* Destination address */
1008 default: /* Silently drop */
1009 DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
1010 " code %d, pptr %d", icmp->icmp_type,
1011 icmp->icmp_code, icmp->icmp_pptr);
1016 DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
1017 " code %d, pptr %d", icmp->icmp_type,
1018 icmp->icmp_code, icmp->icmp_pptr);
1023 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
1024 icmp->icmp_type, icmp->icmp_code);
1028 * For echo request/reply we can use original payload,
1029 * but we need adjust icmp_cksum, because ICMPv6 cksum covers
1030 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4.
1032 if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {
1033 nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);
1037 * For other types of ICMP messages we need to translate inner
1038 * IPv4 header to IPv6 header.
1039 * Assume ICMP src is the same as payload dst
1040 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header
1041 * and ( NATIP1, Hostdst1 ) in ICMP copy header.
1042 * In that case, we already have map for NATIP1 and GWsrc1.
1043 * The only thing we need is to copy IPv6 map prefix to
1046 hlen = offset + ICMP_MINLEN;
1047 if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {
1048 DPRINTF(DP_DROPS, "Message is too short %d",
1052 m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);
1053 if (ip.ip_v != IPVERSION) {
1054 DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v);
1057 hlen += ip.ip_hl << 2; /* Skip inner IP header */
1058 if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||
1059 nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||
1060 nat64_check_private_ip4(cfg, ip.ip_src.s_addr) != 0 ||
1061 nat64_check_private_ip4(cfg, ip.ip_dst.s_addr) != 0) {
1062 DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x",
1063 ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr));
1066 if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
1067 DPRINTF(DP_DROPS, "Message is too short %d",
1073 * Check that inner source matches the outer destination.
1074 * XXX: We need some method to convert IPv4 into IPv6 address here,
1075 * and compare IPv6 addresses.
1077 if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {
1078 DPRINTF(DP_GENERIC, "Inner source doesn't match destination ",
1079 "%04x vs %04x", ip.ip_src.s_addr,
1080 nat64_get_ip4(&ip6->ip6_dst));
1085 * Create new mbuf for ICMPv6 datagram.
1086 * NOTE: len is data length just after inner IP header.
1088 len = m->m_pkthdr.len - hlen;
1089 if (sizeof(struct ip6_hdr) +
1090 sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)
1091 len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -
1092 sizeof(struct ip6_hdr);
1093 plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;
1094 n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);
1096 NAT64STAT_INC(&cfg->stats, nomem);
1100 m_move_pkthdr(n, m);
1101 M_ALIGN(n, offset + plen + max_hdr);
1102 n->m_len = n->m_pkthdr.len = offset + plen;
1103 /* Adjust ip6_plen in outer header */
1104 ip6->ip6_plen = htons(plen);
1105 /* Construct new inner IPv6 header */
1106 eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));
1107 eip6->ip6_src = ip6->ip6_dst;
1109 /* Use the same prefix that we have in outer header */
1110 eip6->ip6_dst = ip6->ip6_src;
1111 MPASS(cfg->flags & NAT64_PLATPFX);
1112 nat64_embed_ip4(&eip6->ip6_dst, cfg->plat_plen, ip.ip_dst.s_addr);
1114 eip6->ip6_flow = htonl(ip.ip_tos << 20);
1115 eip6->ip6_vfc |= IPV6_VERSION;
1116 eip6->ip6_hlim = ip.ip_ttl;
1117 eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));
1118 eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;
1119 m_copydata(m, hlen, len, (char *)(eip6 + 1));
1121 * We need to translate source port in the inner ULP header,
1122 * and adjust ULP checksum.
1126 if (len < offsetof(struct tcphdr, th_sum))
1128 tcp = TCP(eip6 + 1);
1130 tcp->th_sum = cksum_adjust(tcp->th_sum,
1131 tcp->th_sport, icmpid);
1132 tcp->th_sport = icmpid;
1134 tcp->th_sum = cksum_add(tcp->th_sum,
1135 ~nat64_cksum_convert(eip6, &ip));
1138 if (len < offsetof(struct udphdr, uh_sum))
1140 udp = UDP(eip6 + 1);
1142 udp->uh_sum = cksum_adjust(udp->uh_sum,
1143 udp->uh_sport, icmpid);
1144 udp->uh_sport = icmpid;
1146 udp->uh_sum = cksum_add(udp->uh_sum,
1147 ~nat64_cksum_convert(eip6, &ip));
1151 * Check if this is an ICMP error message for echo request
1152 * that we sent. I.e. ULP in the data containing invoking
1153 * packet is IPPROTO_ICMP and its type is ICMP_ECHO.
1155 icmp = (struct icmp *)(eip6 + 1);
1156 if (icmp->icmp_type != ICMP_ECHO) {
1161 * For our client this original datagram should looks
1162 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.
1163 * Thus we need adjust icmp_cksum and convert type from
1164 * ICMP_ECHO to ICMP6_ECHO_REQUEST.
1166 nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,
1167 ICMP6_ECHO_REQUEST);
1170 /* Convert ICMPv4 into ICMPv6 header */
1171 icmp = mtodo(n, offset);
1172 ICMP6(icmp)->icmp6_type = type;
1173 ICMP6(icmp)->icmp6_code = code;
1174 ICMP6(icmp)->icmp6_mtu = htonl(mtu);
1175 ICMP6(icmp)->icmp6_cksum = 0;
1176 ICMP6(icmp)->icmp6_cksum = cksum_add(
1177 ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),
1178 in_cksum_skip(n, n->m_pkthdr.len, offset));
1182 NAT64STAT_INC(&cfg->stats, dropped);
1187 nat64_getlasthdr(struct mbuf *m, int *offset)
1189 struct ip6_hdr *ip6;
1190 struct ip6_hbh *hbh;
1198 if (m->m_len < hlen + sizeof(*ip6))
1201 ip6 = mtodo(m, hlen);
1202 hlen += sizeof(*ip6);
1203 proto = ip6->ip6_nxt;
1204 /* Skip extension headers */
1205 while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
1206 proto == IPPROTO_DSTOPTS) {
1207 hbh = mtodo(m, hlen);
1209 * We expect mbuf has contigious data up to
1210 * upper level header.
1212 if (m->m_len < hlen)
1215 * We doesn't support Jumbo payload option,
1218 if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0)
1220 proto = hbh->ip6h_nxt;
1221 hlen += (hbh->ip6h_len + 1) << 3;
1229 nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
1230 struct in6_addr *daddr, uint16_t lport, struct nat64_config *cfg,
1233 struct nhop_object *nh;
1235 struct sockaddr_in6 dst;
1238 uint16_t ip_id, ip_off;
1243 ip = mtod(m, struct ip*);
1245 if (*V_nat64ipstealth == 0 && ip->ip_ttl <= IPTTLDEC) {
1246 nat64_icmp_reflect(m, ICMP_TIMXCEED,
1247 ICMP_TIMXCEED_INTRANS, 0, &cfg->stats, logdata);
1248 return (NAT64RETURN);
1251 ip6.ip6_dst = *daddr;
1252 ip6.ip6_src = *saddr;
1254 hlen = ip->ip_hl << 2;
1255 plen = ntohs(ip->ip_len) - hlen;
1258 /* Save ip_id and ip_off, both are in network byte order */
1260 ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);
1262 /* Fragment length must be multiple of 8 octets */
1263 if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {
1264 nat64_icmp_reflect(m, ICMP_PARAMPROB,
1265 ICMP_PARAMPROB_LENGTH, 0, &cfg->stats, logdata);
1266 return (NAT64RETURN);
1268 /* Fragmented ICMP is unsupported */
1269 if (proto == IPPROTO_ICMP && ip_off != 0) {
1270 DPRINTF(DP_DROPS, "dropped due to fragmented ICMP");
1271 NAT64STAT_INC(&cfg->stats, dropped);
1272 return (NAT64MFREE);
1275 dst.sin6_addr = ip6.ip6_dst;
1276 nh = nat64_find_route6(&dst, m);
1278 NAT64STAT_INC(&cfg->stats, noroute6);
1279 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,
1280 &cfg->stats, logdata);
1281 return (NAT64RETURN);
1283 if (nh->nh_mtu < plen + sizeof(ip6) &&
1284 (ip->ip_off & htons(IP_DF)) != 0) {
1285 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
1286 FRAGSZ(nh->nh_mtu) + sizeof(struct ip), &cfg->stats, logdata);
1287 return (NAT64RETURN);
1290 ip6.ip6_flow = htonl(ip->ip_tos << 20);
1291 ip6.ip6_vfc |= IPV6_VERSION;
1292 ip6.ip6_hlim = ip->ip_ttl;
1293 if (*V_nat64ipstealth == 0)
1294 ip6.ip6_hlim -= IPTTLDEC;
1295 ip6.ip6_plen = htons(plen);
1296 ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;
1298 /* Handle delayed checksums if needed. */
1299 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1300 in_delayed_cksum(m);
1301 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1303 /* Convert checksums. */
1306 csum = &TCP(mtodo(m, hlen))->th_sum;
1308 struct tcphdr *tcp = TCP(mtodo(m, hlen));
1309 *csum = cksum_adjust(*csum, tcp->th_dport, lport);
1310 tcp->th_dport = lport;
1312 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1315 csum = &UDP(mtodo(m, hlen))->uh_sum;
1317 struct udphdr *udp = UDP(mtodo(m, hlen));
1318 *csum = cksum_adjust(*csum, udp->uh_dport, lport);
1319 udp->uh_dport = lport;
1321 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1324 m = nat64_icmp_translate(m, &ip6, lport, hlen, cfg);
1325 if (m == NULL) /* stats already accounted */
1326 return (NAT64RETURN);
1330 mbufq_init(&mq, 255);
1331 nat64_fragment6(&cfg->stats, &ip6, &mq, m, nh->nh_mtu, ip_id, ip_off);
1332 while ((m = mbufq_dequeue(&mq)) != NULL) {
1333 if (V_nat64out->output(nh->nh_ifp, m, (struct sockaddr *)&dst,
1334 &cfg->stats, logdata) != 0)
1336 NAT64STAT_INC(&cfg->stats, opcnt46);
1339 return (NAT64RETURN);
1343 nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
1344 struct nat64_config *cfg, void *logdata)
1347 struct icmp6_hdr *icmp6;
1348 struct ip6_frag *ip6f;
1349 struct ip6_hdr *ip6, *ip6i;
1355 ip6 = mtod(m, struct ip6_hdr *);
1356 if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1357 nat64_check_ip6(&ip6->ip6_dst) != 0)
1360 proto = nat64_getlasthdr(m, &hlen);
1361 if (proto != IPPROTO_ICMPV6) {
1363 "dropped due to mbuf isn't contigious");
1364 NAT64STAT_INC(&cfg->stats, dropped);
1365 return (NAT64MFREE);
1370 * Translate ICMPv6 type and code to ICMPv4 (RFC7915).
1371 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6().
1373 icmp6 = mtodo(m, hlen);
1375 switch (icmp6->icmp6_type) {
1376 case ICMP6_DST_UNREACH:
1377 type = ICMP_UNREACH;
1378 switch (icmp6->icmp6_code) {
1379 case ICMP6_DST_UNREACH_NOROUTE:
1380 case ICMP6_DST_UNREACH_BEYONDSCOPE:
1381 case ICMP6_DST_UNREACH_ADDR:
1382 code = ICMP_UNREACH_HOST;
1384 case ICMP6_DST_UNREACH_ADMIN:
1385 code = ICMP_UNREACH_HOST_PROHIB;
1387 case ICMP6_DST_UNREACH_NOPORT:
1388 code = ICMP_UNREACH_PORT;
1391 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1392 " code %d", icmp6->icmp6_type,
1394 NAT64STAT_INC(&cfg->stats, dropped);
1395 return (NAT64MFREE);
1398 case ICMP6_PACKET_TOO_BIG:
1399 type = ICMP_UNREACH;
1400 code = ICMP_UNREACH_NEEDFRAG;
1401 mtu = ntohl(icmp6->icmp6_mtu);
1402 if (mtu < IPV6_MMTU) {
1403 DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d,"
1404 " code %d", mtu, icmp6->icmp6_type,
1406 NAT64STAT_INC(&cfg->stats, dropped);
1407 return (NAT64MFREE);
1410 * Adjust MTU to reflect difference between
1411 * IPv6 an IPv4 headers.
1413 mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip);
1415 case ICMP6_TIME_EXCEEDED:
1416 type = ICMP_TIMXCEED;
1417 code = icmp6->icmp6_code;
1419 case ICMP6_PARAM_PROB:
1420 switch (icmp6->icmp6_code) {
1421 case ICMP6_PARAMPROB_HEADER:
1422 type = ICMP_PARAMPROB;
1423 code = ICMP_PARAMPROB_ERRATPTR;
1424 mtu = ntohl(icmp6->icmp6_pptr);
1426 case 0: /* Version/Traffic Class */
1427 case 1: /* Traffic Class/Flow Label */
1429 case 4: /* Payload Length */
1433 case 6: /* Next Header */
1436 case 7: /* Hop Limit */
1440 if (mtu >= 8 && mtu <= 23) {
1441 mtu = 12; /* Source address */
1444 if (mtu >= 24 && mtu <= 39) {
1445 mtu = 16; /* Destination address */
1448 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1449 " code %d, pptr %d", icmp6->icmp6_type,
1450 icmp6->icmp6_code, mtu);
1451 NAT64STAT_INC(&cfg->stats, dropped);
1452 return (NAT64MFREE);
1454 case ICMP6_PARAMPROB_NEXTHEADER:
1455 type = ICMP_UNREACH;
1456 code = ICMP_UNREACH_PROTOCOL;
1459 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1460 " code %d, pptr %d", icmp6->icmp6_type,
1461 icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr));
1462 NAT64STAT_INC(&cfg->stats, dropped);
1463 return (NAT64MFREE);
1467 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d",
1468 icmp6->icmp6_type, icmp6->icmp6_code);
1469 NAT64STAT_INC(&cfg->stats, dropped);
1470 return (NAT64MFREE);
1473 hlen += sizeof(struct icmp6_hdr);
1474 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
1475 NAT64STAT_INC(&cfg->stats, dropped);
1476 DPRINTF(DP_DROPS, "Message is too short %d",
1478 return (NAT64MFREE);
1481 * We need at least ICMP_MINLEN bytes of original datagram payload
1482 * to generate ICMP message. It is nice that ICMP_MINLEN is equal
1483 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment
1484 * header we will not have to do m_pullup() again.
1486 * What we have here:
1487 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost)
1488 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]
1489 * We need to translate it to:
1491 * Outer header: (alias_host, v4exthost)
1492 * Inner header: (v4exthost, alias_host) [sport, alias_port]
1494 * Assume caller function has checked if v4mapPRefix+v4host
1495 * matches configured prefix.
1496 * The only two things we should be provided with are mapping between
1497 * IPv6iHost <> alias_host and between dport and alias_port.
1499 if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
1500 m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
1502 NAT64STAT_INC(&cfg->stats, nomem);
1503 return (NAT64RETURN);
1505 ip6 = mtod(m, struct ip6_hdr *);
1506 ip6i = mtodo(m, hlen);
1508 proto = ip6i->ip6_nxt;
1509 plen = ntohs(ip6i->ip6_plen);
1510 hlen += sizeof(struct ip6_hdr);
1511 if (proto == IPPROTO_FRAGMENT) {
1512 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +
1515 ip6f = mtodo(m, hlen);
1516 proto = ip6f->ip6f_nxt;
1517 plen -= sizeof(struct ip6_frag);
1518 hlen += sizeof(struct ip6_frag);
1519 /* Ajust MTU to reflect frag header size */
1520 if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG)
1521 mtu -= sizeof(struct ip6_frag);
1523 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1524 DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header",
1528 if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||
1529 nat64_check_ip6(&ip6i->ip6_dst) != 0) {
1530 DPRINTF(DP_DROPS, "Inner addresses do not passes the check");
1533 /* Check if outer dst is the same as inner src */
1534 if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) {
1535 DPRINTF(DP_DROPS, "Inner src doesn't match outer dst");
1539 /* Now we need to make a fake IPv4 packet to generate ICMP message */
1540 ip.ip_dst.s_addr = aaddr;
1541 ip.ip_src.s_addr = nat64_extract_ip4(&ip6i->ip6_src, cfg->plat_plen);
1542 if (ip.ip_src.s_addr == 0)
1544 /* XXX: Make fake ulp header */
1545 if (V_nat64out == &nat64_direct) /* init_ip4hdr will decrement it */
1546 ip6i->ip6_hlim += IPV6_HLIMDEC;
1547 nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);
1548 m_adj(m, hlen - sizeof(struct ip));
1549 bcopy(&ip, mtod(m, void *), sizeof(ip));
1550 nat64_icmp_reflect(m, type, code, (uint16_t)mtu, &cfg->stats,
1552 return (NAT64RETURN);
1555 * We must call m_freem() because mbuf pointer could be
1556 * changed with m_pullup().
1559 NAT64STAT_INC(&cfg->stats, dropped);
1560 return (NAT64RETURN);
1564 nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
1565 struct nat64_config *cfg, void *logdata)
1568 struct nhop_object *nh;
1569 struct sockaddr_in dst;
1570 struct ip6_frag *frag;
1571 struct ip6_hdr *ip6;
1572 struct icmp6_hdr *icmp6;
1574 int plen, hlen, proto;
1577 * XXX: we expect ipfw_chk() did m_pullup() up to upper level
1578 * protocol's headers. Also we skip some checks, that ip6_input(),
1579 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
1581 ip6 = mtod(m, struct ip6_hdr *);
1582 if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1583 nat64_check_ip6(&ip6->ip6_dst) != 0) {
1587 /* Starting from this point we must not return zero */
1588 ip.ip_src.s_addr = aaddr;
1589 if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {
1590 DPRINTF(DP_GENERIC | DP_DROPS, "invalid source address: %08x",
1592 NAT64STAT_INC(&cfg->stats, dropped);
1593 return (NAT64MFREE);
1596 ip.ip_dst.s_addr = nat64_extract_ip4(&ip6->ip6_dst, cfg->plat_plen);
1597 if (ip.ip_dst.s_addr == 0) {
1598 NAT64STAT_INC(&cfg->stats, dropped);
1599 return (NAT64MFREE);
1602 if (*V_nat64ip6stealth == 0 && ip6->ip6_hlim <= IPV6_HLIMDEC) {
1603 nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,
1604 ICMP6_TIME_EXCEED_TRANSIT, 0, &cfg->stats, logdata);
1605 return (NAT64RETURN);
1609 plen = ntohs(ip6->ip6_plen);
1610 proto = nat64_getlasthdr(m, &hlen);
1612 DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
1613 NAT64STAT_INC(&cfg->stats, dropped);
1614 return (NAT64MFREE);
1617 if (proto == IPPROTO_FRAGMENT) {
1618 /* ipfw_chk should m_pullup up to frag header */
1619 if (m->m_len < hlen + sizeof(*frag)) {
1621 "dropped due to mbuf isn't contigious");
1622 NAT64STAT_INC(&cfg->stats, dropped);
1623 return (NAT64MFREE);
1625 frag = mtodo(m, hlen);
1626 proto = frag->ip6f_nxt;
1627 hlen += sizeof(*frag);
1628 /* Fragmented ICMPv6 is unsupported */
1629 if (proto == IPPROTO_ICMPV6) {
1630 DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6");
1631 NAT64STAT_INC(&cfg->stats, dropped);
1632 return (NAT64MFREE);
1634 /* Fragment length must be multiple of 8 octets */
1635 if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&
1636 ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {
1637 nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,
1638 ICMP6_PARAMPROB_HEADER,
1639 offsetof(struct ip6_hdr, ip6_plen), &cfg->stats,
1641 return (NAT64RETURN);
1644 plen -= hlen - sizeof(struct ip6_hdr);
1645 if (plen < 0 || m->m_pkthdr.len < plen + hlen) {
1646 DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d",
1647 plen, m->m_pkthdr.len, hlen);
1648 NAT64STAT_INC(&cfg->stats, dropped);
1649 return (NAT64MFREE);
1652 icmp6 = NULL; /* Make gcc happy */
1653 if (proto == IPPROTO_ICMPV6) {
1654 icmp6 = mtodo(m, hlen);
1655 if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&
1656 icmp6->icmp6_type != ICMP6_ECHO_REPLY)
1657 return (nat64_handle_icmp6(m, hlen, aaddr, aport,
1660 dst.sin_addr.s_addr = ip.ip_dst.s_addr;
1661 nh = nat64_find_route4(&dst, m);
1663 NAT64STAT_INC(&cfg->stats, noroute4);
1664 nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,
1665 ICMP6_DST_UNREACH_NOROUTE, 0, &cfg->stats, logdata);
1666 return (NAT64RETURN);
1668 if (nh->nh_mtu < plen + sizeof(ip)) {
1669 nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh->nh_mtu,
1670 &cfg->stats, logdata);
1671 return (NAT64RETURN);
1673 nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);
1675 /* Handle delayed checksums if needed. */
1676 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
1677 in6_delayed_cksum(m, plen, hlen);
1678 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
1680 /* Convert checksums. */
1683 csum = &TCP(mtodo(m, hlen))->th_sum;
1685 struct tcphdr *tcp = TCP(mtodo(m, hlen));
1686 *csum = cksum_adjust(*csum, tcp->th_sport, aport);
1687 tcp->th_sport = aport;
1689 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1692 csum = &UDP(mtodo(m, hlen))->uh_sum;
1694 struct udphdr *udp = UDP(mtodo(m, hlen));
1695 *csum = cksum_adjust(*csum, udp->uh_sport, aport);
1696 udp->uh_sport = aport;
1698 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1700 case IPPROTO_ICMPV6:
1701 /* Checksum in ICMPv6 covers pseudo header */
1702 csum = &icmp6->icmp6_cksum;
1703 *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,
1704 IPPROTO_ICMPV6, 0));
1705 /* Convert ICMPv6 types to ICMP */
1706 proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */
1707 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)
1708 icmp6->icmp6_type = ICMP_ECHO;
1709 else /* ICMP6_ECHO_REPLY */
1710 icmp6->icmp6_type = ICMP_ECHOREPLY;
1711 *csum = cksum_adjust(*csum, (uint16_t)proto,
1712 *(uint16_t *)icmp6);
1714 uint16_t old_id = icmp6->icmp6_id;
1715 icmp6->icmp6_id = aport;
1716 *csum = cksum_adjust(*csum, old_id, aport);
1721 m_adj(m, hlen - sizeof(ip));
1722 bcopy(&ip, mtod(m, void *), sizeof(ip));
1723 if (V_nat64out->output(nh->nh_ifp, m, (struct sockaddr *)&dst,
1724 &cfg->stats, logdata) == 0)
1725 NAT64STAT_INC(&cfg->stats, opcnt64);
1726 return (NAT64RETURN);