2 * Copyright (c) 2015-2018 Yandex LLC
3 * Copyright (c) 2015-2018 Andrey V. Elsukov <ae@FreeBSD.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/counter.h>
34 #include <sys/errno.h>
35 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/rmlock.h>
40 #include <sys/rwlock.h>
41 #include <sys/socket.h>
42 #include <sys/queue.h>
45 #include <net/if_var.h>
46 #include <net/if_pflog.h>
48 #include <net/netisr.h>
49 #include <net/route.h>
51 #include <netinet/in.h>
52 #include <netinet/in_fib.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip_var.h>
55 #include <netinet/ip_fw.h>
56 #include <netinet/ip6.h>
57 #include <netinet/icmp6.h>
58 #include <netinet/ip_icmp.h>
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
61 #include <netinet6/in6_var.h>
62 #include <netinet6/in6_fib.h>
63 #include <netinet6/ip6_var.h>
64 #include <netinet6/ip_fw_nat64.h>
66 #include <netpfil/pf/pf.h>
67 #include <netpfil/ipfw/ip_fw_private.h>
68 #include <machine/in_cksum.h>
70 #include "ip_fw_nat64.h"
71 #include "nat64_translate.h"
74 typedef int (*nat64_output_t)(struct ifnet *, struct mbuf *,
75 struct sockaddr *, struct nat64_counters *, void *);
76 typedef int (*nat64_output_one_t)(struct mbuf *, struct nat64_counters *,
79 static int nat64_find_route4(struct nhop4_basic *, struct sockaddr_in *,
81 static int nat64_find_route6(struct nhop6_basic *, struct sockaddr_in6 *,
83 static int nat64_output_one(struct mbuf *, struct nat64_counters *, void *);
84 static int nat64_output(struct ifnet *, struct mbuf *, struct sockaddr *,
85 struct nat64_counters *, void *);
86 static int nat64_direct_output_one(struct mbuf *, struct nat64_counters *,
88 static int nat64_direct_output(struct ifnet *, struct mbuf *,
89 struct sockaddr *, struct nat64_counters *, void *);
91 struct nat64_methods {
92 nat64_output_t output;
93 nat64_output_one_t output_one;
95 static const struct nat64_methods nat64_netisr = {
96 .output = nat64_output,
97 .output_one = nat64_output_one
99 static const struct nat64_methods nat64_direct = {
100 .output = nat64_direct_output,
101 .output_one = nat64_direct_output_one
103 static VNET_DEFINE(const struct nat64_methods *, nat64out) = &nat64_netisr;
104 #define V_nat64out VNET(nat64out)
107 nat64_set_output_method(int direct)
110 V_nat64out = direct != 0 ? &nat64_direct: &nat64_netisr;
114 nat64_get_output_method(void)
117 return (V_nat64out == &nat64_direct ? 1: 0);
121 nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)
124 logdata->dir = PF_OUT;
125 logdata->af = family;
126 ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);
130 nat64_direct_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
131 struct nat64_counters *stats, void *logdata)
136 nat64_log(logdata, m, dst->sa_family);
137 error = (*ifp->if_output)(ifp, m, dst, NULL);
139 NAT64STAT_INC(stats, oerrors);
144 nat64_direct_output_one(struct mbuf *m, struct nat64_counters *stats,
147 struct nhop6_basic nh6;
148 struct nhop4_basic nh4;
149 struct sockaddr_in6 dst6;
150 struct sockaddr_in dst4;
151 struct sockaddr *dst;
157 ip4 = mtod(m, struct ip *);
160 dst4.sin_addr = ip4->ip_dst;
161 error = nat64_find_route4(&nh4, &dst4, m);
163 NAT64STAT_INC(stats, noroute4);
166 dst = (struct sockaddr *)&dst4;
169 case (IPV6_VERSION >> 4):
170 ip6 = mtod(m, struct ip6_hdr *);
171 dst6.sin6_addr = ip6->ip6_dst;
172 error = nat64_find_route6(&nh6, &dst6, m);
174 NAT64STAT_INC(stats, noroute6);
177 dst = (struct sockaddr *)&dst6;
182 NAT64STAT_INC(stats, dropped);
183 DPRINTF(DP_DROPS, "dropped due to unknown IP version");
184 return (EAFNOSUPPORT);
188 return (EHOSTUNREACH);
191 nat64_log(logdata, m, dst->sa_family);
192 error = (*ifp->if_output)(ifp, m, dst, NULL);
194 NAT64STAT_INC(stats, oerrors);
199 nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
200 struct nat64_counters *stats, void *logdata)
205 ip4 = mtod(m, struct ip *);
211 case (IPV6_VERSION >> 4):
217 NAT64STAT_INC(stats, dropped);
218 DPRINTF(DP_DROPS, "unknown IP version");
219 return (EAFNOSUPPORT);
222 nat64_log(logdata, m, af);
223 if (m->m_pkthdr.rcvif == NULL)
224 m->m_pkthdr.rcvif = V_loif;
225 ret = netisr_queue(ret, m);
227 NAT64STAT_INC(stats, oerrors);
232 nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata)
235 return (nat64_output(NULL, m, NULL, stats, logdata));
239 * Check the given IPv6 prefix and length according to RFC6052:
240 * The prefixes can only have one of the following lengths:
241 * 32, 40, 48, 56, 64, or 96 (The Well-Known Prefix is 96 bits long).
242 * Returns zero on success, otherwise EINVAL.
245 nat64_check_prefixlen(int length)
261 nat64_check_prefix6(const struct in6_addr *prefix, int length)
264 if (nat64_check_prefixlen(length) != 0)
267 /* Well-known prefix has 96 prefix length */
268 if (IN6_IS_ADDR_WKPFX(prefix) && length != 96)
271 /* Bits 64 to 71 must be set to zero */
272 if (prefix->__u6_addr.__u6_addr8[8] != 0)
275 /* Some extra checks */
276 if (IN6_IS_ADDR_MULTICAST(prefix) ||
277 IN6_IS_ADDR_UNSPECIFIED(prefix) ||
278 IN6_IS_ADDR_LOOPBACK(prefix))
284 nat64_check_private_ip4(const struct nat64_config *cfg, in_addr_t ia)
287 if (cfg->flags & NAT64_ALLOW_PRIVATE)
290 /* WKPFX must not be used to represent non-global IPv4 addresses */
291 if (cfg->flags & NAT64_WKPFX) {
293 if ((ia & htonl(0xff000000)) == htonl(0x0a000000) ||
294 (ia & htonl(0xfff00000)) == htonl(0xac100000) ||
295 (ia & htonl(0xffff0000)) == htonl(0xc0a80000))
299 * 192.0.0.0/24 - reserved for IETF protocol assignments
300 * 192.88.99.0/24 - for use as 6to4 relay anycast addresses
301 * 198.18.0.0/15 - for use in benchmark tests
302 * 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use
303 * in documentation and example code
305 if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) ||
306 (ia & htonl(0xffffff00)) == htonl(0xc0586300) ||
307 (ia & htonl(0xfffffe00)) == htonl(0xc6120000) ||
308 (ia & htonl(0xffffff00)) == htonl(0xc0000200) ||
309 (ia & htonl(0xfffffe00)) == htonl(0xc6336400) ||
310 (ia & htonl(0xffffff00)) == htonl(0xcb007100))
317 * Embed @ia IPv4 address into @ip6 IPv6 address.
318 * Place to embedding determined from prefix length @plen.
321 nat64_embed_ip4(struct in6_addr *ip6, int plen, in_addr_t ia)
327 ip6->s6_addr32[plen / 32] = ia;
333 * Preserve prefix bits.
334 * Since suffix bits should be zero and reserved for future
335 * use, we just overwrite the whole word, where they are.
337 ip6->s6_addr32[1] &= 0xffffffff << (32 - plen % 32);
338 #if BYTE_ORDER == BIG_ENDIAN
339 ip6->s6_addr32[1] |= ia >> (plen % 32);
340 ip6->s6_addr32[2] = ia << (24 - plen % 32);
341 #elif BYTE_ORDER == LITTLE_ENDIAN
342 ip6->s6_addr32[1] |= ia << (plen % 32);
343 ip6->s6_addr32[2] = ia >> (24 - plen % 32);
347 #if BYTE_ORDER == BIG_ENDIAN
348 ip6->s6_addr32[2] = ia >> 8;
349 ip6->s6_addr32[3] = ia << 24;
350 #elif BYTE_ORDER == LITTLE_ENDIAN
351 ip6->s6_addr32[2] = ia << 8;
352 ip6->s6_addr32[3] = ia >> 24;
356 panic("Wrong plen: %d", plen);
359 * Bits 64 to 71 of the address are reserved for compatibility
360 * with the host identifier format defined in the IPv6 addressing
361 * architecture [RFC4291]. These bits MUST be set to zero.
363 ip6->s6_addr8[8] = 0;
367 nat64_extract_ip4(const struct in6_addr *ip6, int plen)
372 * According to RFC 6052 p2.2:
373 * IPv4-embedded IPv6 addresses are composed of a variable-length
374 * prefix, the embedded IPv4 address, and a variable length suffix.
375 * The suffix bits are reserved for future extensions and SHOULD
380 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)
384 if (ip6->s6_addr32[3] != 0 ||
385 (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)
389 if (ip6->s6_addr32[3] != 0 ||
390 (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)
394 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)
398 if (ip6->s6_addr8[8] != 0 ||
399 (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)
405 ia = ip6->s6_addr32[plen / 32];
410 #if BYTE_ORDER == BIG_ENDIAN
411 ia = (ip6->s6_addr32[1] << (plen % 32)) |
412 (ip6->s6_addr32[2] >> (24 - plen % 32));
413 #elif BYTE_ORDER == LITTLE_ENDIAN
414 ia = (ip6->s6_addr32[1] >> (plen % 32)) |
415 (ip6->s6_addr32[2] << (24 - plen % 32));
419 #if BYTE_ORDER == BIG_ENDIAN
420 ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);
421 #elif BYTE_ORDER == LITTLE_ENDIAN
422 ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);
428 if (nat64_check_ip4(ia) == 0)
431 DPRINTF(DP_GENERIC | DP_DROPS,
432 "invalid destination address: %08x", ia);
435 DPRINTF(DP_GENERIC | DP_DROPS, "invalid IPv4-embedded IPv6 address");
440 * According to RFC 1624 the equation for incremental checksum update is:
441 * HC' = ~(~HC + ~m + m') -- [Eqn. 3]
442 * HC' = HC - ~m - m' -- [Eqn. 4]
443 * So, when we are replacing IPv4 addresses to IPv6, we
444 * can assume, that new bytes previously were zeros, and vise versa -
445 * when we replacing IPv6 addresses to IPv4, now unused bytes become
446 * zeros. The payload length in pseudo header has bigger size, but one
447 * half of it should be zero. Using the equation 4 we get:
448 * HC' = HC - (~m0 + m0') -- m0 is first changed word
449 * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word
450 * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =
451 * = HC - sum(~m[i] + m'[i])
453 * The function result should be used as follows:
454 * IPv6 to IPv4: HC' = cksum_add(HC, result)
455 * IPv4 to IPv6: HC' = cksum_add(HC, ~result)
458 nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)
463 sum = ~ip->ip_src.s_addr >> 16;
464 sum += ~ip->ip_src.s_addr & 0xffff;
465 sum += ~ip->ip_dst.s_addr >> 16;
466 sum += ~ip->ip_dst.s_addr & 0xffff;
468 for (p = (uint16_t *)&ip6->ip6_src;
469 p < (uint16_t *)(&ip6->ip6_src + 2); p++)
473 sum = (sum & 0xffff) + (sum >> 16);
478 nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,
479 uint16_t plen, uint8_t proto, struct ip *ip)
482 /* assume addresses are already initialized */
483 ip->ip_v = IPVERSION;
484 ip->ip_hl = sizeof(*ip) >> 2;
485 ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
486 ip->ip_len = htons(sizeof(*ip) + plen);
487 ip->ip_ttl = ip6->ip6_hlim;
488 /* Forwarding code will decrement TTL for netisr based output. */
489 if (V_nat64out == &nat64_direct)
490 ip->ip_ttl -= IPV6_HLIMDEC;
492 ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;
495 ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);
496 if (frag->ip6f_offlg & IP6F_MORE_FRAG)
497 ip->ip_off |= htons(IP_MF);
499 ip->ip_off = htons(IP_DF);
501 ip->ip_sum = in_cksum_hdr(ip);
504 #define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))
505 static NAT64NOINLINE int
506 nat64_fragment6(struct nat64_counters *stats, struct ip6_hdr *ip6,
507 struct mbufq *mq, struct mbuf *m, uint32_t mtu, uint16_t ip_id,
510 struct ip6_frag ip6f;
512 uint16_t hlen, len, offset;
515 plen = ntohs(ip6->ip6_plen);
516 hlen = sizeof(struct ip6_hdr);
518 /* Fragmentation isn't needed */
519 if (ip_off == 0 && plen <= mtu - hlen) {
520 M_PREPEND(m, hlen, M_NOWAIT);
522 NAT64STAT_INC(stats, nomem);
525 bcopy(ip6, mtod(m, void *), hlen);
526 if (mbufq_enqueue(mq, m) != 0) {
528 NAT64STAT_INC(stats, dropped);
529 DPRINTF(DP_DROPS, "dropped due to mbufq overflow");
535 hlen += sizeof(struct ip6_frag);
536 ip6f.ip6f_reserved = 0;
537 ip6f.ip6f_nxt = ip6->ip6_nxt;
538 ip6->ip6_nxt = IPPROTO_FRAGMENT;
541 * We have got an IPv4 fragment.
542 * Use offset value and ip_id from original fragment.
544 ip6f.ip6f_ident = htonl(ntohs(ip_id));
545 offset = (ntohs(ip_off) & IP_OFFMASK) << 3;
546 NAT64STAT_INC(stats, ifrags);
548 /* The packet size exceeds interface MTU */
549 ip6f.ip6f_ident = htonl(ip6_randomid());
550 offset = 0; /* First fragment*/
552 while (plen > 0 && m != NULL) {
554 len = FRAGSZ(mtu) & ~7;
557 ip6->ip6_plen = htons(len + sizeof(ip6f));
558 ip6f.ip6f_offlg = ntohs(offset);
559 if (len < plen || (ip_off & htons(IP_MF)) != 0)
560 ip6f.ip6f_offlg |= IP6F_MORE_FRAG;
564 n = m_split(m, len, M_NOWAIT);
568 M_PREPEND(m, hlen, M_NOWAIT);
571 bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));
572 bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),
573 sizeof(struct ip6_frag));
574 if (mbufq_enqueue(mq, m) != 0)
578 NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));
586 NAT64STAT_INC(stats, nomem);
590 static NAT64NOINLINE int
591 nat64_find_route6(struct nhop6_basic *pnh, struct sockaddr_in6 *dst,
595 if (fib6_lookup_nh_basic(M_GETFIB(m), &dst->sin6_addr, 0, 0, 0,
597 return (EHOSTUNREACH);
598 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT))
599 return (EHOSTUNREACH);
601 * XXX: we need to use destination address with embedded scope
602 * zone id, because LLTABLE uses such form of addresses for lookup.
604 dst->sin6_family = AF_INET6;
605 dst->sin6_len = sizeof(*dst);
606 dst->sin6_addr = pnh->nh_addr;
607 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
608 dst->sin6_addr.s6_addr16[1] =
609 htons(pnh->nh_ifp->if_index & 0xffff);
611 dst->sin6_scope_id = 0;
612 dst->sin6_flowinfo = 0;
617 #define NAT64_ICMP6_PLEN 64
618 static NAT64NOINLINE void
619 nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,
620 struct nat64_counters *stats, void *logdata)
622 struct icmp6_hdr *icmp6;
623 struct ip6_hdr *ip6, *oip6;
628 plen = nat64_getlasthdr(m, &len);
630 DPRINTF(DP_DROPS, "mbuf isn't contigious");
634 * Do not send ICMPv6 in reply to ICMPv6 errors.
636 if (plen == IPPROTO_ICMPV6) {
637 if (m->m_len < len + sizeof(*icmp6)) {
638 DPRINTF(DP_DROPS, "mbuf isn't contigious");
641 icmp6 = mtodo(m, len);
642 if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST ||
643 icmp6->icmp6_type == ND_REDIRECT) {
644 DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to "
650 if (icmp6_ratelimit(&ip6->ip6_src, type, code))
653 ip6 = mtod(m, struct ip6_hdr *);
655 case ICMP6_DST_UNREACH:
656 case ICMP6_PACKET_TOO_BIG:
657 case ICMP6_TIME_EXCEEDED:
658 case ICMP6_PARAM_PROB:
663 /* Calculate length of ICMPv6 payload */
664 len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:
667 /* Create new ICMPv6 datagram */
668 plen = len + sizeof(struct icmp6_hdr);
669 n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,
670 MT_HEADER, M_PKTHDR);
672 NAT64STAT_INC(stats, nomem);
677 * Move pkthdr from original mbuf. We should have initialized some
678 * fields, because we can reinject this mbuf to netisr and it will
679 * go trough input path (it requires at least rcvif should be set).
680 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf
681 * in the chain, when we will do M_PREPEND() or make some type of
685 M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);
687 n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
688 oip6 = mtod(n, struct ip6_hdr *);
689 oip6->ip6_src = ip6->ip6_dst;
690 oip6->ip6_dst = ip6->ip6_src;
691 oip6->ip6_nxt = IPPROTO_ICMPV6;
693 oip6->ip6_vfc |= IPV6_VERSION;
694 oip6->ip6_hlim = V_ip6_defhlim;
695 oip6->ip6_plen = htons(plen);
697 icmp6 = mtodo(n, sizeof(struct ip6_hdr));
698 icmp6->icmp6_cksum = 0;
699 icmp6->icmp6_type = type;
700 icmp6->icmp6_code = code;
701 icmp6->icmp6_mtu = htonl(mtu);
703 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +
704 sizeof(struct icmp6_hdr)));
705 icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,
706 sizeof(struct ip6_hdr), plen);
708 V_nat64out->output_one(n, stats, logdata);
711 NAT64STAT_INC(stats, dropped);
715 static NAT64NOINLINE int
716 nat64_find_route4(struct nhop4_basic *pnh, struct sockaddr_in *dst,
720 if (fib4_lookup_nh_basic(M_GETFIB(m), dst->sin_addr, 0, 0, pnh) != 0)
721 return (EHOSTUNREACH);
722 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT))
723 return (EHOSTUNREACH);
725 dst->sin_family = AF_INET;
726 dst->sin_len = sizeof(*dst);
727 dst->sin_addr = pnh->nh_addr;
732 #define NAT64_ICMP_PLEN 64
733 static NAT64NOINLINE void
734 nat64_icmp_reflect(struct mbuf *m, uint8_t type,
735 uint8_t code, uint16_t mtu, struct nat64_counters *stats, void *logdata)
742 ip = mtod(m, struct ip *);
743 /* Do not send ICMP error if packet is not the first fragment */
744 if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) {
745 DPRINTF(DP_DROPS, "not first fragment");
748 /* Do not send ICMP in reply to ICMP errors */
749 if (ip->ip_p == IPPROTO_ICMP) {
750 if (m->m_len < (ip->ip_hl << 2)) {
751 DPRINTF(DP_DROPS, "mbuf isn't contigious");
754 icmp = mtodo(m, ip->ip_hl << 2);
755 if (!ICMP_INFOTYPE(icmp->icmp_type)) {
756 DPRINTF(DP_DROPS, "do not send ICMP in reply to "
769 /* Calculate length of ICMP payload */
770 len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:
773 /* Create new ICMPv4 datagram */
774 plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);
775 n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,
776 MT_HEADER, M_PKTHDR);
778 NAT64STAT_INC(stats, nomem);
783 M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);
785 n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;
786 oip = mtod(n, struct ip *);
787 oip->ip_v = IPVERSION;
788 oip->ip_hl = sizeof(struct ip) >> 2;
790 oip->ip_len = htons(n->m_pkthdr.len);
791 oip->ip_ttl = V_ip_defttl;
792 oip->ip_p = IPPROTO_ICMP;
794 oip->ip_off = htons(IP_DF);
795 oip->ip_src = ip->ip_dst;
796 oip->ip_dst = ip->ip_src;
798 oip->ip_sum = in_cksum_hdr(oip);
800 icmp = mtodo(n, sizeof(struct ip));
801 icmp->icmp_type = type;
802 icmp->icmp_code = code;
803 icmp->icmp_cksum = 0;
804 icmp->icmp_pmvoid = 0;
805 icmp->icmp_nextmtu = htons(mtu);
806 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +
807 sizeof(struct icmphdr) + sizeof(uint32_t)));
808 icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,
811 V_nat64out->output_one(n, stats, logdata);
814 NAT64STAT_INC(stats, dropped);
818 /* Translate ICMP echo request/reply into ICMPv6 */
820 nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,
821 uint16_t id, uint8_t type)
825 old = *(uint16_t *)icmp6; /* save type+code in one word */
826 icmp6->icmp6_type = type;
827 /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */
828 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
829 old, *(uint16_t *)icmp6);
831 old = icmp6->icmp6_id;
832 icmp6->icmp6_id = id;
833 /* Reflect ICMP id translation in the cksum */
834 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
837 /* Reflect IPv6 pseudo header in the cksum */
838 icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
839 IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);
842 static NAT64NOINLINE struct mbuf *
843 nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,
844 int offset, struct nat64_config *cfg)
850 struct ip6_hdr *eip6;
856 if (m->m_len < offset + ICMP_MINLEN)
857 m = m_pullup(m, offset + ICMP_MINLEN);
859 NAT64STAT_INC(&cfg->stats, nomem);
863 icmp = mtodo(m, offset);
865 switch (icmp->icmp_type) {
867 type = ICMP6_ECHO_REPLY;
871 type = ICMP6_DST_UNREACH;
872 switch (icmp->icmp_code) {
873 case ICMP_UNREACH_NET:
874 case ICMP_UNREACH_HOST:
875 case ICMP_UNREACH_SRCFAIL:
876 case ICMP_UNREACH_NET_UNKNOWN:
877 case ICMP_UNREACH_HOST_UNKNOWN:
878 case ICMP_UNREACH_TOSNET:
879 case ICMP_UNREACH_TOSHOST:
880 code = ICMP6_DST_UNREACH_NOROUTE;
882 case ICMP_UNREACH_PROTOCOL:
883 type = ICMP6_PARAM_PROB;
884 code = ICMP6_PARAMPROB_NEXTHEADER;
886 case ICMP_UNREACH_PORT:
887 code = ICMP6_DST_UNREACH_NOPORT;
889 case ICMP_UNREACH_NEEDFRAG:
890 type = ICMP6_PACKET_TOO_BIG;
892 /* XXX: needs an additional look */
893 mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20);
895 case ICMP_UNREACH_NET_PROHIB:
896 case ICMP_UNREACH_HOST_PROHIB:
897 case ICMP_UNREACH_FILTER_PROHIB:
898 case ICMP_UNREACH_PRECEDENCE_CUTOFF:
899 code = ICMP6_DST_UNREACH_ADMIN;
902 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
903 icmp->icmp_type, icmp->icmp_code);
908 type = ICMP6_TIME_EXCEEDED;
909 code = icmp->icmp_code;
912 type = ICMP6_ECHO_REQUEST;
916 type = ICMP6_PARAM_PROB;
917 switch (icmp->icmp_code) {
918 case ICMP_PARAMPROB_ERRATPTR:
919 case ICMP_PARAMPROB_LENGTH:
920 code = ICMP6_PARAMPROB_HEADER;
921 switch (icmp->icmp_pptr) {
922 case 0: /* Version/IHL */
923 case 1: /* Type Of Service */
924 mtu = icmp->icmp_pptr;
926 case 2: /* Total Length */
927 case 3: mtu = 4; /* Payload Length */
929 case 8: /* Time to Live */
930 mtu = 7; /* Hop Limit */
932 case 9: /* Protocol */
933 mtu = 6; /* Next Header */
935 case 12: /* Source address */
941 case 16: /* Destination address */
947 default: /* Silently drop */
948 DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
949 " code %d, pptr %d", icmp->icmp_type,
950 icmp->icmp_code, icmp->icmp_pptr);
955 DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
956 " code %d, pptr %d", icmp->icmp_type,
957 icmp->icmp_code, icmp->icmp_pptr);
962 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
963 icmp->icmp_type, icmp->icmp_code);
967 * For echo request/reply we can use original payload,
968 * but we need adjust icmp_cksum, because ICMPv6 cksum covers
969 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4.
971 if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {
972 nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);
976 * For other types of ICMP messages we need to translate inner
977 * IPv4 header to IPv6 header.
978 * Assume ICMP src is the same as payload dst
979 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header
980 * and ( NATIP1, Hostdst1 ) in ICMP copy header.
981 * In that case, we already have map for NATIP1 and GWsrc1.
982 * The only thing we need is to copy IPv6 map prefix to
985 hlen = offset + ICMP_MINLEN;
986 if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {
987 DPRINTF(DP_DROPS, "Message is too short %d",
991 m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);
992 if (ip.ip_v != IPVERSION) {
993 DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v);
996 hlen += ip.ip_hl << 2; /* Skip inner IP header */
997 if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||
998 nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||
999 nat64_check_private_ip4(cfg, ip.ip_src.s_addr) != 0 ||
1000 nat64_check_private_ip4(cfg, ip.ip_dst.s_addr) != 0) {
1001 DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x",
1002 ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr));
1005 if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
1006 DPRINTF(DP_DROPS, "Message is too short %d",
1012 * Check that inner source matches the outer destination.
1013 * XXX: We need some method to convert IPv4 into IPv6 address here,
1014 * and compare IPv6 addresses.
1016 if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {
1017 DPRINTF(DP_GENERIC, "Inner source doesn't match destination ",
1018 "%04x vs %04x", ip.ip_src.s_addr,
1019 nat64_get_ip4(&ip6->ip6_dst));
1024 * Create new mbuf for ICMPv6 datagram.
1025 * NOTE: len is data length just after inner IP header.
1027 len = m->m_pkthdr.len - hlen;
1028 if (sizeof(struct ip6_hdr) +
1029 sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)
1030 len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -
1031 sizeof(struct ip6_hdr);
1032 plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;
1033 n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);
1035 NAT64STAT_INC(&cfg->stats, nomem);
1039 m_move_pkthdr(n, m);
1040 M_ALIGN(n, offset + plen + max_hdr);
1041 n->m_len = n->m_pkthdr.len = offset + plen;
1042 /* Adjust ip6_plen in outer header */
1043 ip6->ip6_plen = htons(plen);
1044 /* Construct new inner IPv6 header */
1045 eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));
1046 eip6->ip6_src = ip6->ip6_dst;
1048 /* Use the same prefix that we have in outer header */
1049 eip6->ip6_dst = ip6->ip6_src;
1050 MPASS(cfg->flags & NAT64_PLATPFX);
1051 nat64_embed_ip4(&eip6->ip6_dst, cfg->plat_plen, ip.ip_dst.s_addr);
1053 eip6->ip6_flow = htonl(ip.ip_tos << 20);
1054 eip6->ip6_vfc |= IPV6_VERSION;
1055 eip6->ip6_hlim = ip.ip_ttl;
1056 eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));
1057 eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;
1058 m_copydata(m, hlen, len, (char *)(eip6 + 1));
1060 * We need to translate source port in the inner ULP header,
1061 * and adjust ULP checksum.
1065 if (len < offsetof(struct tcphdr, th_sum))
1067 tcp = TCP(eip6 + 1);
1069 tcp->th_sum = cksum_adjust(tcp->th_sum,
1070 tcp->th_sport, icmpid);
1071 tcp->th_sport = icmpid;
1073 tcp->th_sum = cksum_add(tcp->th_sum,
1074 ~nat64_cksum_convert(eip6, &ip));
1077 if (len < offsetof(struct udphdr, uh_sum))
1079 udp = UDP(eip6 + 1);
1081 udp->uh_sum = cksum_adjust(udp->uh_sum,
1082 udp->uh_sport, icmpid);
1083 udp->uh_sport = icmpid;
1085 udp->uh_sum = cksum_add(udp->uh_sum,
1086 ~nat64_cksum_convert(eip6, &ip));
1090 * Check if this is an ICMP error message for echo request
1091 * that we sent. I.e. ULP in the data containing invoking
1092 * packet is IPPROTO_ICMP and its type is ICMP_ECHO.
1094 icmp = (struct icmp *)(eip6 + 1);
1095 if (icmp->icmp_type != ICMP_ECHO) {
1100 * For our client this original datagram should looks
1101 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.
1102 * Thus we need adjust icmp_cksum and convert type from
1103 * ICMP_ECHO to ICMP6_ECHO_REQUEST.
1105 nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,
1106 ICMP6_ECHO_REQUEST);
1109 /* Convert ICMPv4 into ICMPv6 header */
1110 icmp = mtodo(n, offset);
1111 ICMP6(icmp)->icmp6_type = type;
1112 ICMP6(icmp)->icmp6_code = code;
1113 ICMP6(icmp)->icmp6_mtu = htonl(mtu);
1114 ICMP6(icmp)->icmp6_cksum = 0;
1115 ICMP6(icmp)->icmp6_cksum = cksum_add(
1116 ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),
1117 in_cksum_skip(n, n->m_pkthdr.len, offset));
1121 NAT64STAT_INC(&cfg->stats, dropped);
1126 nat64_getlasthdr(struct mbuf *m, int *offset)
1128 struct ip6_hdr *ip6;
1129 struct ip6_hbh *hbh;
1137 if (m->m_len < hlen + sizeof(*ip6))
1140 ip6 = mtodo(m, hlen);
1141 hlen += sizeof(*ip6);
1142 proto = ip6->ip6_nxt;
1143 /* Skip extension headers */
1144 while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
1145 proto == IPPROTO_DSTOPTS) {
1146 hbh = mtodo(m, hlen);
1148 * We expect mbuf has contigious data up to
1149 * upper level header.
1151 if (m->m_len < hlen)
1154 * We doesn't support Jumbo payload option,
1157 if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0)
1159 proto = hbh->ip6h_nxt;
1160 hlen += (hbh->ip6h_len + 1) << 3;
1168 nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
1169 struct in6_addr *daddr, uint16_t lport, struct nat64_config *cfg,
1172 struct nhop6_basic nh;
1174 struct sockaddr_in6 dst;
1177 uint16_t ip_id, ip_off;
1182 ip = mtod(m, struct ip*);
1184 if (ip->ip_ttl <= IPTTLDEC) {
1185 nat64_icmp_reflect(m, ICMP_TIMXCEED,
1186 ICMP_TIMXCEED_INTRANS, 0, &cfg->stats, logdata);
1187 return (NAT64RETURN);
1190 ip6.ip6_dst = *daddr;
1191 ip6.ip6_src = *saddr;
1193 hlen = ip->ip_hl << 2;
1194 plen = ntohs(ip->ip_len) - hlen;
1197 /* Save ip_id and ip_off, both are in network byte order */
1199 ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);
1201 /* Fragment length must be multiple of 8 octets */
1202 if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {
1203 nat64_icmp_reflect(m, ICMP_PARAMPROB,
1204 ICMP_PARAMPROB_LENGTH, 0, &cfg->stats, logdata);
1205 return (NAT64RETURN);
1207 /* Fragmented ICMP is unsupported */
1208 if (proto == IPPROTO_ICMP && ip_off != 0) {
1209 DPRINTF(DP_DROPS, "dropped due to fragmented ICMP");
1210 NAT64STAT_INC(&cfg->stats, dropped);
1211 return (NAT64MFREE);
1214 dst.sin6_addr = ip6.ip6_dst;
1215 if (nat64_find_route6(&nh, &dst, m) != 0) {
1216 NAT64STAT_INC(&cfg->stats, noroute6);
1217 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,
1218 &cfg->stats, logdata);
1219 return (NAT64RETURN);
1221 if (nh.nh_mtu < plen + sizeof(ip6) &&
1222 (ip->ip_off & htons(IP_DF)) != 0) {
1223 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
1224 FRAGSZ(nh.nh_mtu) + sizeof(struct ip), &cfg->stats, logdata);
1225 return (NAT64RETURN);
1228 ip6.ip6_flow = htonl(ip->ip_tos << 20);
1229 ip6.ip6_vfc |= IPV6_VERSION;
1230 ip6.ip6_hlim = ip->ip_ttl;
1231 /* Forwarding code will decrement TTL for netisr based output. */
1232 if (V_nat64out == &nat64_direct)
1233 ip6.ip6_hlim -= IPTTLDEC;
1234 ip6.ip6_plen = htons(plen);
1235 ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;
1236 /* Convert checksums. */
1239 csum = &TCP(mtodo(m, hlen))->th_sum;
1241 struct tcphdr *tcp = TCP(mtodo(m, hlen));
1242 *csum = cksum_adjust(*csum, tcp->th_dport, lport);
1243 tcp->th_dport = lport;
1245 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1248 csum = &UDP(mtodo(m, hlen))->uh_sum;
1250 struct udphdr *udp = UDP(mtodo(m, hlen));
1251 *csum = cksum_adjust(*csum, udp->uh_dport, lport);
1252 udp->uh_dport = lport;
1254 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1257 m = nat64_icmp_translate(m, &ip6, lport, hlen, cfg);
1258 if (m == NULL) /* stats already accounted */
1259 return (NAT64RETURN);
1263 mbufq_init(&mq, 255);
1264 nat64_fragment6(&cfg->stats, &ip6, &mq, m, nh.nh_mtu, ip_id, ip_off);
1265 while ((m = mbufq_dequeue(&mq)) != NULL) {
1266 if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst,
1267 &cfg->stats, logdata) != 0)
1269 NAT64STAT_INC(&cfg->stats, opcnt46);
1272 return (NAT64RETURN);
1276 nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
1277 struct nat64_config *cfg, void *logdata)
1280 struct icmp6_hdr *icmp6;
1281 struct ip6_frag *ip6f;
1282 struct ip6_hdr *ip6, *ip6i;
1288 ip6 = mtod(m, struct ip6_hdr *);
1289 if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1290 nat64_check_ip6(&ip6->ip6_dst) != 0)
1293 proto = nat64_getlasthdr(m, &hlen);
1294 if (proto != IPPROTO_ICMPV6) {
1296 "dropped due to mbuf isn't contigious");
1297 NAT64STAT_INC(&cfg->stats, dropped);
1298 return (NAT64MFREE);
1303 * Translate ICMPv6 type and code to ICMPv4 (RFC7915).
1304 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6().
1306 icmp6 = mtodo(m, hlen);
1308 switch (icmp6->icmp6_type) {
1309 case ICMP6_DST_UNREACH:
1310 type = ICMP_UNREACH;
1311 switch (icmp6->icmp6_code) {
1312 case ICMP6_DST_UNREACH_NOROUTE:
1313 case ICMP6_DST_UNREACH_BEYONDSCOPE:
1314 case ICMP6_DST_UNREACH_ADDR:
1315 code = ICMP_UNREACH_HOST;
1317 case ICMP6_DST_UNREACH_ADMIN:
1318 code = ICMP_UNREACH_HOST_PROHIB;
1320 case ICMP6_DST_UNREACH_NOPORT:
1321 code = ICMP_UNREACH_PORT;
1324 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1325 " code %d", icmp6->icmp6_type,
1327 NAT64STAT_INC(&cfg->stats, dropped);
1328 return (NAT64MFREE);
1331 case ICMP6_PACKET_TOO_BIG:
1332 type = ICMP_UNREACH;
1333 code = ICMP_UNREACH_NEEDFRAG;
1334 mtu = ntohl(icmp6->icmp6_mtu);
1335 if (mtu < IPV6_MMTU) {
1336 DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d,"
1337 " code %d", mtu, icmp6->icmp6_type,
1339 NAT64STAT_INC(&cfg->stats, dropped);
1340 return (NAT64MFREE);
1343 * Adjust MTU to reflect difference between
1344 * IPv6 an IPv4 headers.
1346 mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip);
1348 case ICMP6_TIME_EXCEEDED:
1349 type = ICMP_TIMXCEED;
1350 code = icmp6->icmp6_code;
1352 case ICMP6_PARAM_PROB:
1353 switch (icmp6->icmp6_code) {
1354 case ICMP6_PARAMPROB_HEADER:
1355 type = ICMP_PARAMPROB;
1356 code = ICMP_PARAMPROB_ERRATPTR;
1357 mtu = ntohl(icmp6->icmp6_pptr);
1359 case 0: /* Version/Traffic Class */
1360 case 1: /* Traffic Class/Flow Label */
1362 case 4: /* Payload Length */
1366 case 6: /* Next Header */
1369 case 7: /* Hop Limit */
1373 if (mtu >= 8 && mtu <= 23) {
1374 mtu = 12; /* Source address */
1377 if (mtu >= 24 && mtu <= 39) {
1378 mtu = 16; /* Destination address */
1381 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1382 " code %d, pptr %d", icmp6->icmp6_type,
1383 icmp6->icmp6_code, mtu);
1384 NAT64STAT_INC(&cfg->stats, dropped);
1385 return (NAT64MFREE);
1387 case ICMP6_PARAMPROB_NEXTHEADER:
1388 type = ICMP_UNREACH;
1389 code = ICMP_UNREACH_PROTOCOL;
1392 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1393 " code %d, pptr %d", icmp6->icmp6_type,
1394 icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr));
1395 NAT64STAT_INC(&cfg->stats, dropped);
1396 return (NAT64MFREE);
1400 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d",
1401 icmp6->icmp6_type, icmp6->icmp6_code);
1402 NAT64STAT_INC(&cfg->stats, dropped);
1403 return (NAT64MFREE);
1406 hlen += sizeof(struct icmp6_hdr);
1407 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
1408 NAT64STAT_INC(&cfg->stats, dropped);
1409 DPRINTF(DP_DROPS, "Message is too short %d",
1411 return (NAT64MFREE);
1414 * We need at least ICMP_MINLEN bytes of original datagram payload
1415 * to generate ICMP message. It is nice that ICMP_MINLEN is equal
1416 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment
1417 * header we will not have to do m_pullup() again.
1419 * What we have here:
1420 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost)
1421 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]
1422 * We need to translate it to:
1424 * Outer header: (alias_host, v4exthost)
1425 * Inner header: (v4exthost, alias_host) [sport, alias_port]
1427 * Assume caller function has checked if v4mapPRefix+v4host
1428 * matches configured prefix.
1429 * The only two things we should be provided with are mapping between
1430 * IPv6iHost <> alias_host and between dport and alias_port.
1432 if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
1433 m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
1435 NAT64STAT_INC(&cfg->stats, nomem);
1436 return (NAT64RETURN);
1438 ip6 = mtod(m, struct ip6_hdr *);
1439 ip6i = mtodo(m, hlen);
1441 proto = ip6i->ip6_nxt;
1442 plen = ntohs(ip6i->ip6_plen);
1443 hlen += sizeof(struct ip6_hdr);
1444 if (proto == IPPROTO_FRAGMENT) {
1445 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +
1448 ip6f = mtodo(m, hlen);
1449 proto = ip6f->ip6f_nxt;
1450 plen -= sizeof(struct ip6_frag);
1451 hlen += sizeof(struct ip6_frag);
1452 /* Ajust MTU to reflect frag header size */
1453 if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG)
1454 mtu -= sizeof(struct ip6_frag);
1456 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1457 DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header",
1461 if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||
1462 nat64_check_ip6(&ip6i->ip6_dst) != 0) {
1463 DPRINTF(DP_DROPS, "Inner addresses do not passes the check");
1466 /* Check if outer dst is the same as inner src */
1467 if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) {
1468 DPRINTF(DP_DROPS, "Inner src doesn't match outer dst");
1472 /* Now we need to make a fake IPv4 packet to generate ICMP message */
1473 ip.ip_dst.s_addr = aaddr;
1474 ip.ip_src.s_addr = nat64_extract_ip4(&ip6i->ip6_src, cfg->plat_plen);
1475 if (ip.ip_src.s_addr == 0)
1477 /* XXX: Make fake ulp header */
1478 if (V_nat64out == &nat64_direct) /* init_ip4hdr will decrement it */
1479 ip6i->ip6_hlim += IPV6_HLIMDEC;
1480 nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);
1481 m_adj(m, hlen - sizeof(struct ip));
1482 bcopy(&ip, mtod(m, void *), sizeof(ip));
1483 nat64_icmp_reflect(m, type, code, (uint16_t)mtu, &cfg->stats,
1485 return (NAT64RETURN);
1488 * We must call m_freem() because mbuf pointer could be
1489 * changed with m_pullup().
1492 NAT64STAT_INC(&cfg->stats, dropped);
1493 return (NAT64RETURN);
1497 nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
1498 struct nat64_config *cfg, void *logdata)
1501 struct nhop4_basic nh;
1502 struct sockaddr_in dst;
1503 struct ip6_frag *frag;
1504 struct ip6_hdr *ip6;
1505 struct icmp6_hdr *icmp6;
1507 int plen, hlen, proto;
1510 * XXX: we expect ipfw_chk() did m_pullup() up to upper level
1511 * protocol's headers. Also we skip some checks, that ip6_input(),
1512 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
1514 ip6 = mtod(m, struct ip6_hdr *);
1515 if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1516 nat64_check_ip6(&ip6->ip6_dst) != 0) {
1520 /* Starting from this point we must not return zero */
1521 ip.ip_src.s_addr = aaddr;
1522 if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {
1523 DPRINTF(DP_GENERIC | DP_DROPS, "invalid source address: %08x",
1525 NAT64STAT_INC(&cfg->stats, dropped);
1526 return (NAT64MFREE);
1529 ip.ip_dst.s_addr = nat64_extract_ip4(&ip6->ip6_dst, cfg->plat_plen);
1530 if (ip.ip_dst.s_addr == 0) {
1531 NAT64STAT_INC(&cfg->stats, dropped);
1532 return (NAT64MFREE);
1535 if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
1536 nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,
1537 ICMP6_TIME_EXCEED_TRANSIT, 0, &cfg->stats, logdata);
1538 return (NAT64RETURN);
1542 plen = ntohs(ip6->ip6_plen);
1543 proto = nat64_getlasthdr(m, &hlen);
1545 DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
1546 NAT64STAT_INC(&cfg->stats, dropped);
1547 return (NAT64MFREE);
1550 if (proto == IPPROTO_FRAGMENT) {
1551 /* ipfw_chk should m_pullup up to frag header */
1552 if (m->m_len < hlen + sizeof(*frag)) {
1554 "dropped due to mbuf isn't contigious");
1555 NAT64STAT_INC(&cfg->stats, dropped);
1556 return (NAT64MFREE);
1558 frag = mtodo(m, hlen);
1559 proto = frag->ip6f_nxt;
1560 hlen += sizeof(*frag);
1561 /* Fragmented ICMPv6 is unsupported */
1562 if (proto == IPPROTO_ICMPV6) {
1563 DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6");
1564 NAT64STAT_INC(&cfg->stats, dropped);
1565 return (NAT64MFREE);
1567 /* Fragment length must be multiple of 8 octets */
1568 if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&
1569 ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {
1570 nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,
1571 ICMP6_PARAMPROB_HEADER,
1572 offsetof(struct ip6_hdr, ip6_plen), &cfg->stats,
1574 return (NAT64RETURN);
1577 plen -= hlen - sizeof(struct ip6_hdr);
1578 if (plen < 0 || m->m_pkthdr.len < plen + hlen) {
1579 DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d",
1580 plen, m->m_pkthdr.len, hlen);
1581 NAT64STAT_INC(&cfg->stats, dropped);
1582 return (NAT64MFREE);
1585 icmp6 = NULL; /* Make gcc happy */
1586 if (proto == IPPROTO_ICMPV6) {
1587 icmp6 = mtodo(m, hlen);
1588 if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&
1589 icmp6->icmp6_type != ICMP6_ECHO_REPLY)
1590 return (nat64_handle_icmp6(m, hlen, aaddr, aport,
1593 dst.sin_addr.s_addr = ip.ip_dst.s_addr;
1594 if (nat64_find_route4(&nh, &dst, m) != 0) {
1595 NAT64STAT_INC(&cfg->stats, noroute4);
1596 nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,
1597 ICMP6_DST_UNREACH_NOROUTE, 0, &cfg->stats, logdata);
1598 return (NAT64RETURN);
1600 if (nh.nh_mtu < plen + sizeof(ip)) {
1601 nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu,
1602 &cfg->stats, logdata);
1603 return (NAT64RETURN);
1605 nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);
1606 /* Convert checksums. */
1609 csum = &TCP(mtodo(m, hlen))->th_sum;
1611 struct tcphdr *tcp = TCP(mtodo(m, hlen));
1612 *csum = cksum_adjust(*csum, tcp->th_sport, aport);
1613 tcp->th_sport = aport;
1615 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1618 csum = &UDP(mtodo(m, hlen))->uh_sum;
1620 struct udphdr *udp = UDP(mtodo(m, hlen));
1621 *csum = cksum_adjust(*csum, udp->uh_sport, aport);
1622 udp->uh_sport = aport;
1624 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1626 case IPPROTO_ICMPV6:
1627 /* Checksum in ICMPv6 covers pseudo header */
1628 csum = &icmp6->icmp6_cksum;
1629 *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,
1630 IPPROTO_ICMPV6, 0));
1631 /* Convert ICMPv6 types to ICMP */
1632 proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */
1633 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)
1634 icmp6->icmp6_type = ICMP_ECHO;
1635 else /* ICMP6_ECHO_REPLY */
1636 icmp6->icmp6_type = ICMP_ECHOREPLY;
1637 *csum = cksum_adjust(*csum, (uint16_t)proto,
1638 *(uint16_t *)icmp6);
1640 uint16_t old_id = icmp6->icmp6_id;
1641 icmp6->icmp6_id = aport;
1642 *csum = cksum_adjust(*csum, old_id, aport);
1647 m_adj(m, hlen - sizeof(ip));
1648 bcopy(&ip, mtod(m, void *), sizeof(ip));
1649 if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst,
1650 &cfg->stats, logdata) == 0)
1651 NAT64STAT_INC(&cfg->stats, opcnt64);
1652 return (NAT64RETURN);