2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2015-2019 Yandex LLC
5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6 * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/counter.h>
37 #include <sys/epoch.h>
38 #include <sys/errno.h>
40 #include <sys/kernel.h>
42 #include <sys/malloc.h>
44 #include <sys/module.h>
45 #include <sys/rmlock.h>
46 #include <sys/socket.h>
47 #include <sys/syslog.h>
48 #include <sys/sysctl.h>
51 #include <net/if_var.h>
52 #include <net/if_pflog.h>
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/ip_fw.h>
59 #include <netinet/ip6.h>
60 #include <netinet/icmp6.h>
61 #include <netinet/ip_icmp.h>
62 #include <netinet/tcp.h>
63 #include <netinet/udp.h>
64 #include <netinet6/in6_var.h>
65 #include <netinet6/ip6_var.h>
66 #include <netinet6/ip_fw_nat64.h>
68 #include <netpfil/ipfw/ip_fw_private.h>
69 #include <netpfil/pf/pf.h>
73 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
75 #define NAT64LSN_EPOCH_ENTER(et) NET_EPOCH_ENTER(et)
76 #define NAT64LSN_EPOCH_EXIT(et) NET_EPOCH_EXIT(et)
77 #define NAT64LSN_EPOCH_ASSERT() NET_EPOCH_ASSERT()
78 #define NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c))
80 static uma_zone_t nat64lsn_host_zone;
81 static uma_zone_t nat64lsn_pgchunk_zone;
82 static uma_zone_t nat64lsn_pg_zone;
83 static uma_zone_t nat64lsn_aliaslink_zone;
84 static uma_zone_t nat64lsn_state_zone;
85 static uma_zone_t nat64lsn_job_zone;
87 static void nat64lsn_periodic(void *data);
88 #define PERIODIC_DELAY 4
89 #define NAT64_LOOKUP(chain, cmd) \
90 (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
92 * Delayed job queue, used to create new hosts
101 struct nat64lsn_job_item {
102 STAILQ_ENTRY(nat64lsn_job_item) entries;
103 enum nat64lsn_jtype jtype;
106 struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
108 struct nat64lsn_host *host;
109 struct nat64lsn_state *state;
112 struct ipfw_flow_id f_id;
118 struct { /* used by JTYPE_DESTROY */
119 struct nat64lsn_hosts_slist hosts;
120 struct nat64lsn_pg_slist portgroups;
121 struct nat64lsn_pgchunk *pgchunk;
122 struct epoch_context epoch_ctx;
127 static struct mtx jmtx;
128 #define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
129 #define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx)
130 #define JQUEUE_LOCK() mtx_lock(&jmtx)
131 #define JQUEUE_UNLOCK() mtx_unlock(&jmtx)
133 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
134 struct nat64lsn_job_item *ji);
135 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
136 struct nat64lsn_job_item *ji);
137 static struct nat64lsn_job_item *nat64lsn_create_job(
138 struct nat64lsn_cfg *cfg, int jtype);
139 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
140 struct nat64lsn_job_item *ji);
141 static void nat64lsn_job_destroy(epoch_context_t ctx);
142 static void nat64lsn_destroy_host(struct nat64lsn_host *host);
143 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
145 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
146 const struct ipfw_flow_id *f_id, struct mbuf **mp);
147 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
148 struct ipfw_flow_id *f_id, struct mbuf **mp);
149 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
150 struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
152 #define NAT64_BIT_TCP_FIN 0 /* FIN was seen */
153 #define NAT64_BIT_TCP_SYN 1 /* First syn in->out */
154 #define NAT64_BIT_TCP_ESTAB 2 /* Packet with Ack */
155 #define NAT64_BIT_READY_IPV4 6 /* state is ready for translate4 */
156 #define NAT64_BIT_STALE 7 /* state is going to be expired */
158 #define NAT64_FLAG_FIN (1 << NAT64_BIT_TCP_FIN)
159 #define NAT64_FLAG_SYN (1 << NAT64_BIT_TCP_SYN)
160 #define NAT64_FLAG_ESTAB (1 << NAT64_BIT_TCP_ESTAB)
161 #define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
163 #define NAT64_FLAG_READY (1 << NAT64_BIT_READY_IPV4)
164 #define NAT64_FLAG_STALE (1 << NAT64_BIT_STALE)
166 static inline uint8_t
167 convert_tcp_flags(uint8_t flags)
171 result = flags & (TH_FIN|TH_SYN);
172 result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
173 result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
179 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
180 struct nat64lsn_state *state)
183 memset(plog, 0, sizeof(*plog));
184 plog->length = PFLOG_REAL_HDRLEN;
186 plog->action = PF_NAT;
188 plog->rulenr = htonl(state->ip_src);
189 plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
190 (state->proto << 8) | (state->ip_dst & 0xff));
191 plog->ruleset[0] = '\0';
192 strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
193 ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
196 #define HVAL(p, n, s) jenkins_hash32((const uint32_t *)(p), (n), (s))
197 #define HOST_HVAL(c, a) HVAL((a),\
198 sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
199 #define HOSTS(c, v) ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
201 #define ALIASLINK_HVAL(c, f) HVAL(&(f)->dst_ip6,\
202 sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
203 #define ALIAS_BYHASH(c, v) \
204 ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
205 static struct nat64lsn_aliaslink*
206 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
207 struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
211 * We can implement some different algorithms how
212 * select an alias address.
213 * XXX: for now we use first available.
215 return (CK_SLIST_FIRST(&host->aliases));
218 #define STATE_HVAL(c, d) HVAL((d), 2, (c)->hash_seed)
219 #define STATE_HASH(h, v) \
220 ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
221 #define STATES_CHUNK(p, v) \
222 ((p)->chunks_count == 1 ? (p)->states : \
223 ((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
226 #define FREEMASK_FFSLL(pg, faddr) \
227 ffsll(*FREEMASK_CHUNK((pg), (faddr)))
228 #define FREEMASK_BTR(pg, faddr, bit) \
229 ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
230 #define FREEMASK_BTS(pg, faddr, bit) \
231 ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
232 #define FREEMASK_ISSET(pg, faddr, bit) \
233 ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
234 #define FREEMASK_COPY(pg, n, out) \
235 (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
238 freemask_ffsll(uint32_t *freemask)
242 if ((i = ffsl(freemask[0])) != 0)
244 if ((i = ffsl(freemask[1])) != 0)
248 #define FREEMASK_FFSLL(pg, faddr) \
249 freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
250 #define FREEMASK_BTR(pg, faddr, bit) \
251 ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
252 #define FREEMASK_BTS(pg, faddr, bit) \
253 ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
254 #define FREEMASK_ISSET(pg, faddr, bit) \
255 ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
256 #define FREEMASK_COPY(pg, n, out) \
257 (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
258 ((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
259 #endif /* !__LP64__ */
261 #define NAT64LSN_TRY_PGCNT 32
262 static struct nat64lsn_pg*
263 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
264 struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
265 uint32_t *pgidx, in_addr_t faddr)
267 struct nat64lsn_pg *pg, *oldpg;
268 uint32_t idx, oldidx;
272 /* First try last used PG */
273 oldpg = pg = ck_pr_load_ptr(pgptr);
274 idx = oldidx = ck_pr_load_32(pgidx);
275 /* If pgidx is out of range, reset it to the first pgchunk */
276 if (!ISSET32(*chunkmask, idx / 32))
280 if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
282 * If last used PG has not free states,
283 * try to update pointer.
284 * NOTE: it can be already updated by jobs handler,
285 * thus we use CAS operation.
288 ck_pr_cas_ptr(pgptr, oldpg, pg);
291 /* Stop if idx is out of range */
292 if (!ISSET32(*chunkmask, idx / 32))
295 if (ISSET32(pgmask[idx / 32], idx % 32))
297 &chunks[idx / 32]->pgptr[idx % 32]);
302 } while (++cnt < NAT64LSN_TRY_PGCNT);
304 /* If pgidx is out of range, reset it to the first pgchunk */
305 if (!ISSET32(*chunkmask, idx / 32))
307 ck_pr_cas_32(pgidx, oldidx, idx);
311 static struct nat64lsn_state*
312 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
313 const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
314 uint16_t port, uint8_t proto)
316 struct nat64lsn_aliaslink *link;
317 struct nat64lsn_state *state;
318 struct nat64lsn_pg *pg;
321 NAT64LSN_EPOCH_ASSERT();
323 /* Check that we already have state for given arguments */
324 CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
325 if (state->proto == proto && state->ip_dst == faddr &&
326 state->sport == port && state->dport == f_id->dst_port)
330 link = nat64lsn_get_aliaslink(cfg, host, f_id);
336 pg = nat64lsn_get_pg(
337 &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
338 link->alias->tcp, &link->alias->tcp_pg,
339 &link->alias->tcp_pgidx, faddr);
342 pg = nat64lsn_get_pg(
343 &link->alias->udp_chunkmask, link->alias->udp_pgmask,
344 link->alias->udp, &link->alias->udp_pg,
345 &link->alias->udp_pgidx, faddr);
348 pg = nat64lsn_get_pg(
349 &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
350 link->alias->icmp, &link->alias->icmp_pg,
351 &link->alias->icmp_pgidx, faddr);
354 panic("%s: wrong proto %d", __func__, proto);
359 /* Check that PG has some free states */
361 i = FREEMASK_BITCOUNT(pg, faddr);
363 offset = FREEMASK_FFSLL(pg, faddr);
367 * No more free states in this PG.
372 /* Lets try to atomically grab the state */
373 if (FREEMASK_BTR(pg, faddr, offset - 1)) {
374 state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
376 state->flags = proto != IPPROTO_TCP ? 0 :
377 convert_tcp_flags(f_id->_flags);
378 state->proto = proto;
379 state->aport = pg->base_port + offset - 1;
380 state->dport = f_id->dst_port;
382 state->ip6_dst = f_id->dst_ip6;
383 state->ip_dst = faddr;
384 state->ip_src = link->alias->addr;
387 SET_AGE(state->timestamp);
389 /* Insert new state into host's hash table */
391 CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
393 host->states_count++;
395 * XXX: In case if host is going to be expired,
396 * reset NAT64LSN_DEADHOST flag.
398 host->flags &= ~NAT64LSN_DEADHOST;
400 NAT64STAT_INC(&cfg->base.stats, screated);
401 /* Mark the state as ready for translate4 */
403 ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
411 * Inspects icmp packets to see if the message contains different
412 * packet header so we need to alter @addr and @port.
415 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
423 ip = mtod(*mp, struct ip *); /* Outer IP header */
424 off = (ip->ip_hl << 2) + ICMP_MINLEN;
425 if ((*mp)->m_len < off)
426 *mp = m_pullup(*mp, off);
430 ip = mtod(*mp, struct ip *); /* Outer IP header */
431 icmp = L3HDR(ip, struct icmp *);
432 switch (icmp->icmp_type) {
435 /* Use icmp ID as distinguisher */
436 *port = ntohs(icmp->icmp_id);
445 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
448 if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
450 if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
451 *mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
454 ip = mtodo(*mp, off); /* Inner IP header */
455 inner_proto = ip->ip_p;
456 off += ip->ip_hl << 2; /* Skip inner IP header */
457 *addr = ntohl(ip->ip_src.s_addr);
458 if ((*mp)->m_len < off + ICMP_MINLEN)
459 *mp = m_pullup(*mp, off + ICMP_MINLEN);
462 switch (inner_proto) {
465 /* Copy source port from the header */
466 *port = ntohs(*((uint16_t *)mtodo(*mp, off)));
467 *proto = inner_proto;
471 * We will translate only ICMP errors for our ICMP
474 icmp = mtodo(*mp, off);
475 if (icmp->icmp_type != ICMP_ECHO)
477 *port = ntohs(icmp->icmp_id);
483 static struct nat64lsn_state*
484 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
485 in_addr_t faddr, uint16_t port, uint8_t proto)
487 struct nat64lsn_state *state;
488 struct nat64lsn_pg *pg;
489 int chunk_idx, pg_idx, state_idx;
491 NAT64LSN_EPOCH_ASSERT();
493 if (port < NAT64_MIN_PORT)
496 * Alias keeps 32 pgchunks for each protocol.
497 * Each pgchunk has 32 pointers to portgroup.
498 * Each portgroup has 64 states for ports.
500 port -= NAT64_MIN_PORT;
501 chunk_idx = port / 2048;
503 port -= chunk_idx * 2048;
505 state_idx = port % 64;
508 * First check in proto_chunkmask that we have allocated PG chunk.
509 * Then check in proto_pgmask that we have valid PG pointer.
514 if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
515 ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
516 pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
521 if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
522 ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
523 pg = alias->udp[chunk_idx]->pgptr[pg_idx];
528 if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
529 ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
530 pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
535 panic("%s: wrong proto %d", __func__, proto);
540 if (FREEMASK_ISSET(pg, faddr, state_idx))
543 state = &STATES_CHUNK(pg, faddr)->state[state_idx];
545 if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
551 * Reassemble IPv4 fragments, make PULLUP if needed, get some ULP fields
552 * that might be unknown until reassembling is completed.
555 nat64lsn_reassemble4(struct nat64lsn_cfg *cfg, struct mbuf *m,
564 /* IP header must be contigious after ip_reass() */
565 ip = mtod(m, struct ip *);
566 len = ip->ip_hl << 2;
569 len += ICMP_MINLEN; /* Enough to get icmp_id */
572 len += sizeof(struct tcphdr);
575 len += sizeof(struct udphdr);
579 NAT64STAT_INC(&cfg->base.stats, noproto);
582 if (m->m_len < len) {
583 m = m_pullup(m, len);
585 NAT64STAT_INC(&cfg->base.stats, nomem);
588 ip = mtod(m, struct ip *);
592 *port = ntohs(L3HDR(ip, struct tcphdr *)->th_dport);
595 *port = ntohs(L3HDR(ip, struct udphdr *)->uh_dport);
602 nat64lsn_translate4(struct nat64lsn_cfg *cfg,
603 const struct ipfw_flow_id *f_id, struct mbuf **mp)
605 struct pfloghdr loghdr, *logdata;
606 struct in6_addr src6;
607 struct nat64lsn_state *state;
608 struct nat64lsn_alias *alias;
609 uint32_t addr, flags;
615 port = f_id->dst_port;
617 if (addr < cfg->prefix4 || addr > cfg->pmask4) {
618 NAT64STAT_INC(&cfg->base.stats, nomatch4);
619 return (cfg->nomatch_verdict);
622 /* Reassemble fragments if needed */
623 ret = ntohs(mtod(*mp, struct ip *)->ip_off);
624 if ((ret & (IP_MF | IP_OFFMASK)) != 0) {
625 *mp = nat64lsn_reassemble4(cfg, *mp, &port);
630 /* Check if protocol is supported */
633 ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
636 NAT64STAT_INC(&cfg->base.stats, nomem);
639 NAT64STAT_INC(&cfg->base.stats, noproto);
640 return (cfg->nomatch_verdict);
642 if (addr < cfg->prefix4 || addr > cfg->pmask4) {
643 NAT64STAT_INC(&cfg->base.stats, nomatch4);
644 return (cfg->nomatch_verdict);
651 NAT64STAT_INC(&cfg->base.stats, noproto);
652 return (cfg->nomatch_verdict);
655 alias = &ALIAS_BYHASH(cfg, addr);
656 MPASS(addr == alias->addr);
658 /* Check that we have state for this port */
659 state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
662 NAT64STAT_INC(&cfg->base.stats, nomatch4);
663 return (cfg->nomatch_verdict);
666 /* TODO: Check flags to see if we need to do some static mapping */
668 /* Update some state fields if need */
670 if (f_id->proto == IPPROTO_TCP)
671 flags = convert_tcp_flags(f_id->_flags);
674 if (state->timestamp != ts)
675 state->timestamp = ts;
676 if ((state->flags & flags) != flags)
677 state->flags |= flags;
679 port = htons(state->sport);
680 src6 = state->ip6_dst;
682 if (cfg->base.flags & NAT64_LOG) {
684 nat64lsn_log(logdata, *mp, AF_INET, state);
689 * We already have src6 with embedded address, but it is possible,
690 * that src_ip is different than state->ip_dst, this is why we
691 * do embedding again.
693 nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
694 ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
695 &cfg->base, logdata);
696 if (ret == NAT64SKIP)
697 return (cfg->nomatch_verdict);
698 if (ret == NAT64RETURN)
704 * Check if particular state is stale and should be deleted.
705 * Return 1 if true, 0 otherwise.
708 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
712 /* State was marked as stale in previous pass. */
713 if (ISSET32(state->flags, NAT64_BIT_STALE))
716 /* State is not yet initialized, it is going to be READY */
717 if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
720 age = GET_AGE(state->timestamp);
721 switch (state->proto) {
723 if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
724 ttl = cfg->st_close_ttl;
725 else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
726 ttl = cfg->st_estab_ttl;
727 else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
728 ttl = cfg->st_syn_ttl;
730 ttl = cfg->st_syn_ttl;
735 if (age > cfg->st_udp_ttl)
739 if (age > cfg->st_icmp_ttl)
747 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
749 struct nat64lsn_state *state;
750 struct nat64lsn_host *host;
752 int c, i, update_age;
755 for (c = 0; c < pg->chunks_count; c++) {
756 FREEMASK_COPY(pg, c, freemask);
757 for (i = 0; i < 64; i++) {
758 if (ISSET64(freemask, i))
760 state = &STATES_CHUNK(pg, c)->state[i];
761 if (nat64lsn_check_state(cfg, state) == 0) {
767 * 1. Mark as STALE and unlink from host's hash.
768 * 2. Set bit in freemask.
770 if (ISSET32(state->flags, NAT64_BIT_STALE)) {
772 * State was marked as STALE in previous
773 * pass. Now it is safe to release it.
777 FREEMASK_BTS(pg, c, i);
778 NAT64STAT_INC(&cfg->base.stats, sdeleted);
781 MPASS(state->flags & NAT64_FLAG_READY);
785 CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
786 state, nat64lsn_state, entries);
787 host->states_count--;
790 /* Reset READY flag */
791 ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
792 /* And set STALE flag */
793 ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
796 * Now translate6 will not use this state, wait
797 * until it become safe for translate4, then mark
804 * We have some alive states, update timestamp.
807 SET_AGE(pg->timestamp);
809 if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
816 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
817 struct nat64lsn_pg_slist *portgroups)
819 struct nat64lsn_alias *alias;
820 struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
821 uint32_t *pgmask, *pgidx;
824 for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
825 alias = &cfg->aliases[i];
826 CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
827 if (nat64lsn_maintain_pg(cfg, pg) == 0)
829 /* Always keep first PG */
830 if (pg->base_port == NAT64_MIN_PORT)
833 * PG is expired, unlink it and schedule for
834 * deferred destroying.
836 idx = (pg->base_port - NAT64_MIN_PORT) / 64;
839 pgmask = alias->tcp_pgmask;
840 pgptr = &alias->tcp_pg;
841 pgidx = &alias->tcp_pgidx;
842 firstpg = alias->tcp[0]->pgptr[0];
845 pgmask = alias->udp_pgmask;
846 pgptr = &alias->udp_pg;
847 pgidx = &alias->udp_pgidx;
848 firstpg = alias->udp[0]->pgptr[0];
851 pgmask = alias->icmp_pgmask;
852 pgptr = &alias->icmp_pg;
853 pgidx = &alias->icmp_pgidx;
854 firstpg = alias->icmp[0]->pgptr[0];
857 /* Reset the corresponding bit in pgmask array. */
858 ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
860 /* If last used PG points to this PG, reset it. */
861 ck_pr_cas_ptr(pgptr, pg, firstpg);
862 ck_pr_cas_32(pgidx, idx, 0);
863 /* Unlink PG from alias's chain */
865 CK_SLIST_REMOVE(&alias->portgroups, pg,
866 nat64lsn_pg, entries);
867 alias->portgroups_count--;
869 /* And link to job's chain for deferred destroying */
870 NAT64STAT_INC(&cfg->base.stats, spgdeleted);
871 CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
877 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
878 struct nat64lsn_hosts_slist *hosts)
880 struct nat64lsn_host *host, *tmp;
883 for (i = 0; i < cfg->hosts_hashsize; i++) {
884 CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
886 /* Is host was marked in previous call? */
887 if (host->flags & NAT64LSN_DEADHOST) {
888 if (host->states_count > 0) {
889 host->flags &= ~NAT64LSN_DEADHOST;
893 * Unlink host from hash table and schedule
894 * it for deferred destroying.
897 CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
898 nat64lsn_host, entries);
901 CK_SLIST_INSERT_HEAD(hosts, host, entries);
904 if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
906 if (host->states_count > 0)
908 /* Mark host as going to be expired in next pass */
909 host->flags |= NAT64LSN_DEADHOST;
915 static struct nat64lsn_pgchunk*
916 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
919 struct nat64lsn_alias *alias;
920 struct nat64lsn_pgchunk *chunk;
924 for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
925 alias = &cfg->aliases[i];
926 if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
928 /* Always keep single chunk allocated */
929 for (c = 1; c < 32; c++) {
930 if ((alias->tcp_chunkmask & (1 << c)) == 0)
932 chunk = ck_pr_load_ptr(&alias->tcp[c]);
933 if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
935 ck_pr_btr_32(&alias->tcp_chunkmask, c);
937 if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
947 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
949 struct nat64lsn_host *h;
950 struct nat64lsn_states_slist *hash;
953 for (i = 0; i < cfg->hosts_hashsize; i++) {
954 CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
955 if (h->states_count / 2 < h->states_hashsize ||
956 h->states_hashsize >= NAT64LSN_MAX_HSIZE)
958 hsize = h->states_hashsize * 2;
959 hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
962 for (j = 0; j < hsize; j++)
963 CK_SLIST_INIT(&hash[i]);
965 ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
972 * This procedure is used to perform various maintance
973 * on dynamic hash list. Currently it is called every 4 seconds.
976 nat64lsn_periodic(void *data)
978 struct nat64lsn_job_item *ji;
979 struct nat64lsn_cfg *cfg;
981 cfg = (struct nat64lsn_cfg *) data;
982 CURVNET_SET(cfg->vp);
983 if (cfg->hosts_count > 0) {
984 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
986 ji->jtype = JTYPE_DESTROY;
987 CK_SLIST_INIT(&ji->hosts);
988 CK_SLIST_INIT(&ji->portgroups);
989 nat64lsn_expire_hosts(cfg, &ji->hosts);
990 nat64lsn_expire_portgroups(cfg, &ji->portgroups);
991 ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
992 NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
993 nat64lsn_job_destroy);
995 NAT64STAT_INC(&cfg->base.stats, jnomem);
997 callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
1001 #define ALLOC_ERROR(stage, type) ((stage) ? 10 * (type) + (stage): 0)
1002 #define HOST_ERROR(stage) ALLOC_ERROR(stage, 1)
1003 #define PG_ERROR(stage) ALLOC_ERROR(stage, 2)
1005 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1007 char a[INET6_ADDRSTRLEN];
1008 struct nat64lsn_aliaslink *link;
1009 struct nat64lsn_host *host;
1010 struct nat64lsn_state *state;
1011 uint32_t hval, data[2];
1014 /* Check that host was not yet added. */
1015 NAT64LSN_EPOCH_ASSERT();
1016 CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
1017 if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
1018 /* The host was allocated in previous call. */
1024 host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
1025 if (ji->host == NULL)
1026 return (HOST_ERROR(1));
1028 host->states_hashsize = NAT64LSN_HSIZE;
1029 host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
1030 host->states_hashsize, M_NAT64LSN, M_NOWAIT);
1031 if (host->states_hash == NULL) {
1032 uma_zfree(nat64lsn_host_zone, host);
1033 return (HOST_ERROR(2));
1036 link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
1038 free(host->states_hash, M_NAT64LSN);
1039 uma_zfree(nat64lsn_host_zone, host);
1040 return (HOST_ERROR(3));
1044 HOST_LOCK_INIT(host);
1045 SET_AGE(host->timestamp);
1046 host->addr = ji->f_id.src_ip6;
1047 host->hval = ji->src6_hval;
1049 host->states_count = 0;
1050 host->states_hashsize = NAT64LSN_HSIZE;
1051 CK_SLIST_INIT(&host->aliases);
1052 for (i = 0; i < host->states_hashsize; i++)
1053 CK_SLIST_INIT(&host->states_hash[i]);
1055 /* Determine alias from flow hash. */
1056 hval = ALIASLINK_HVAL(cfg, &ji->f_id);
1057 link->alias = &ALIAS_BYHASH(cfg, hval);
1058 CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
1060 ALIAS_LOCK(link->alias);
1061 CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
1062 link->alias->hosts_count++;
1063 ALIAS_UNLOCK(link->alias);
1066 CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
1071 data[0] = ji->faddr;
1072 data[1] = (ji->f_id.dst_port << 16) | ji->port;
1073 ji->state_hval = hval = STATE_HVAL(cfg, data);
1074 state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
1075 ji->faddr, ji->port, ji->proto);
1077 * We failed to obtain new state, used alias needs new PG.
1078 * XXX: or another alias should be used.
1080 if (state == NULL) {
1081 /* Try to allocate new PG */
1082 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1083 return (HOST_ERROR(4));
1084 /* We assume that nat64lsn_alloc_pg() got state */
1089 DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
1090 inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
1091 return (HOST_ERROR(0));
1095 nat64lsn_find_pg_place(uint32_t *data)
1099 for (i = 0; i < 32; i++) {
1102 return (i * 32 + ffs(~data[i]) - 1);
1108 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
1109 struct nat64lsn_alias *alias, uint32_t *chunkmask,
1110 uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
1111 struct nat64lsn_pg **pgptr, uint8_t proto)
1113 struct nat64lsn_pg *pg;
1114 int i, pg_idx, chunk_idx;
1116 /* Find place in pgchunk where PG can be added */
1117 pg_idx = nat64lsn_find_pg_place(pgmask);
1118 if (pg_idx < 0) /* no more PGs */
1119 return (PG_ERROR(1));
1120 /* Check that we have allocated pgchunk for given PG index */
1121 chunk_idx = pg_idx / 32;
1122 if (!ISSET32(*chunkmask, chunk_idx)) {
1123 chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
1125 if (chunks[chunk_idx] == NULL)
1126 return (PG_ERROR(2));
1127 ck_pr_bts_32(chunkmask, chunk_idx);
1128 ck_pr_fence_store();
1130 /* Allocate PG and states chunks */
1131 pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
1133 return (PG_ERROR(3));
1134 pg->chunks_count = cfg->states_chunks;
1135 if (pg->chunks_count > 1) {
1136 pg->freemask_chunk = malloc(pg->chunks_count *
1137 sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
1138 if (pg->freemask_chunk == NULL) {
1139 uma_zfree(nat64lsn_pg_zone, pg);
1140 return (PG_ERROR(4));
1142 pg->states_chunk = malloc(pg->chunks_count *
1143 sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
1145 if (pg->states_chunk == NULL) {
1146 free(pg->freemask_chunk, M_NAT64LSN);
1147 uma_zfree(nat64lsn_pg_zone, pg);
1148 return (PG_ERROR(5));
1150 for (i = 0; i < pg->chunks_count; i++) {
1151 pg->states_chunk[i] = uma_zalloc(
1152 nat64lsn_state_zone, M_NOWAIT);
1153 if (pg->states_chunk[i] == NULL)
1156 memset(pg->freemask_chunk, 0xff,
1157 sizeof(uint64_t) * pg->chunks_count);
1159 pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
1160 if (pg->states == NULL) {
1161 uma_zfree(nat64lsn_pg_zone, pg);
1162 return (PG_ERROR(6));
1164 memset(&pg->freemask64, 0xff, sizeof(uint64_t));
1167 /* Initialize PG and hook it to pgchunk */
1168 SET_AGE(pg->timestamp);
1170 pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
1171 ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
1172 ck_pr_fence_store();
1173 ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
1174 ck_pr_store_ptr(pgptr, pg);
1177 CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
1178 SET_AGE(alias->timestamp);
1179 alias->portgroups_count++;
1180 ALIAS_UNLOCK(alias);
1181 NAT64STAT_INC(&cfg->base.stats, spgcreated);
1182 return (PG_ERROR(0));
1185 for (i = 0; i < pg->chunks_count; i++)
1186 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1187 free(pg->freemask_chunk, M_NAT64LSN);
1188 free(pg->states_chunk, M_NAT64LSN);
1189 uma_zfree(nat64lsn_pg_zone, pg);
1190 return (PG_ERROR(7));
1194 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1196 struct nat64lsn_aliaslink *link;
1197 struct nat64lsn_alias *alias;
1200 link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
1202 return (PG_ERROR(1));
1205 * TODO: check that we did not already allocated PG in
1210 alias = link->alias;
1211 /* Find place in pgchunk where PG can be added */
1212 switch (ji->proto) {
1214 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1215 &alias->tcp_chunkmask, alias->tcp_pgmask,
1216 alias->tcp, &alias->tcp_pg, ji->proto);
1219 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1220 &alias->udp_chunkmask, alias->udp_pgmask,
1221 alias->udp, &alias->udp_pg, ji->proto);
1224 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1225 &alias->icmp_chunkmask, alias->icmp_pgmask,
1226 alias->icmp, &alias->icmp_pg, ji->proto);
1229 panic("%s: wrong proto %d", __func__, ji->proto);
1231 if (ret == PG_ERROR(1)) {
1233 * PG_ERROR(1) means that alias lacks free PGs
1234 * XXX: try next alias.
1236 printf("NAT64LSN: %s: failed to obtain PG\n",
1240 if (ret == PG_ERROR(0)) {
1241 ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
1242 ji->state_hval, ji->faddr, ji->port, ji->proto);
1243 if (ji->state == NULL)
1252 nat64lsn_do_request(void *data)
1254 struct epoch_tracker et;
1255 struct nat64lsn_job_head jhead;
1256 struct nat64lsn_job_item *ji, *ji2;
1257 struct nat64lsn_cfg *cfg;
1261 cfg = (struct nat64lsn_cfg *)data;
1265 CURVNET_SET(cfg->vp);
1266 STAILQ_INIT(&jhead);
1270 STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
1275 /* TODO: check if we need to resize hash */
1277 NAT64STAT_INC(&cfg->base.stats, jcalls);
1278 DPRINTF(DP_JQUEUE, "count=%d", jcount);
1282 * What we should do here is to build a hash
1283 * to ensure we don't have lots of duplicate requests.
1284 * Skip this for now.
1286 * TODO: Limit per-call number of items
1289 NAT64LSN_EPOCH_ENTER(et);
1290 STAILQ_FOREACH(ji, &jhead, entries) {
1291 switch (ji->jtype) {
1293 if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
1294 NAT64STAT_INC(&cfg->base.stats, jhostfails);
1296 case JTYPE_NEWPORTGROUP:
1297 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1298 NAT64STAT_INC(&cfg->base.stats, jportfails);
1303 if (ji->done != 0) {
1304 flags = ji->proto != IPPROTO_TCP ? 0 :
1305 convert_tcp_flags(ji->f_id._flags);
1306 nat64lsn_translate6_internal(cfg, &ji->m,
1308 NAT64STAT_INC(&cfg->base.stats, jreinjected);
1311 NAT64LSN_EPOCH_EXIT(et);
1313 ji = STAILQ_FIRST(&jhead);
1314 while (ji != NULL) {
1315 ji2 = STAILQ_NEXT(ji, entries);
1317 * In any case we must free mbuf if
1318 * translator did not consumed it.
1321 uma_zfree(nat64lsn_job_zone, ji);
1327 static struct nat64lsn_job_item *
1328 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
1330 struct nat64lsn_job_item *ji;
1333 * Do not try to lock possibly contested mutex if we're near the
1334 * limit. Drop packet instead.
1337 if (cfg->jlen >= cfg->jmaxlen)
1338 NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1340 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1342 NAT64STAT_INC(&cfg->base.stats, jnomem);
1345 NAT64STAT_INC(&cfg->base.stats, dropped);
1346 DPRINTF(DP_DROPS, "failed to create job");
1355 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1359 STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
1360 NAT64STAT_INC(&cfg->base.stats, jrequests);
1363 if (callout_pending(&cfg->jcallout) == 0)
1364 callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1369 nat64lsn_job_destroy(epoch_context_t ctx)
1371 struct nat64lsn_job_item *ji;
1372 struct nat64lsn_host *host;
1373 struct nat64lsn_pg *pg;
1376 ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
1377 MPASS(ji->jtype == JTYPE_DESTROY);
1378 while (!CK_SLIST_EMPTY(&ji->hosts)) {
1379 host = CK_SLIST_FIRST(&ji->hosts);
1380 CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
1381 if (host->states_count > 0) {
1383 * XXX: The state has been created
1384 * during host deletion.
1386 printf("NAT64LSN: %s: destroying host with %d "
1387 "states\n", __func__, host->states_count);
1389 nat64lsn_destroy_host(host);
1391 while (!CK_SLIST_EMPTY(&ji->portgroups)) {
1392 pg = CK_SLIST_FIRST(&ji->portgroups);
1393 CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
1394 for (i = 0; i < pg->chunks_count; i++) {
1395 if (FREEMASK_BITCOUNT(pg, i) != 64) {
1397 * XXX: The state has been created during
1400 printf("NAT64LSN: %s: destroying PG %p "
1401 "with non-empty chunk %d\n", __func__,
1405 nat64lsn_destroy_pg(pg);
1407 uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
1408 uma_zfree(nat64lsn_job_zone, ji);
1412 nat64lsn_request_host(struct nat64lsn_cfg *cfg,
1413 const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1414 in_addr_t faddr, uint16_t port, uint8_t proto)
1416 struct nat64lsn_job_item *ji;
1418 ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
1425 ji->src6_hval = hval;
1427 nat64lsn_enqueue_job(cfg, ji);
1428 NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1431 return (IP_FW_DENY);
1435 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
1436 const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1437 in_addr_t faddr, uint16_t port, uint8_t proto)
1439 struct nat64lsn_job_item *ji;
1441 ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
1448 ji->state_hval = hval;
1451 nat64lsn_enqueue_job(cfg, ji);
1452 NAT64STAT_INC(&cfg->base.stats, jportreq);
1455 return (IP_FW_DENY);
1459 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
1460 struct nat64lsn_state *state, uint8_t flags)
1462 struct pfloghdr loghdr, *logdata;
1466 /* Update timestamp and flags if needed */
1468 if (state->timestamp != ts)
1469 state->timestamp = ts;
1470 if ((state->flags & flags) != 0)
1471 state->flags |= flags;
1473 if (cfg->base.flags & NAT64_LOG) {
1475 nat64lsn_log(logdata, *mp, AF_INET6, state);
1479 ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
1480 htons(state->aport), &cfg->base, logdata);
1481 if (ret == NAT64SKIP)
1482 return (cfg->nomatch_verdict);
1483 if (ret == NAT64RETURN)
1485 return (IP_FW_DENY);
1489 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1492 struct nat64lsn_state *state;
1493 struct nat64lsn_host *host;
1494 struct icmp6_hdr *icmp6;
1495 uint32_t addr, hval, data[2];
1500 /* Check if protocol is supported */
1501 port = f_id->src_port;
1502 proto = f_id->proto;
1503 switch (f_id->proto) {
1504 case IPPROTO_ICMPV6:
1506 * For ICMPv6 echo reply/request we use icmp6_id as
1510 proto = nat64_getlasthdr(*mp, &offset);
1512 NAT64STAT_INC(&cfg->base.stats, dropped);
1513 DPRINTF(DP_DROPS, "mbuf isn't contigious");
1514 return (IP_FW_DENY);
1516 if (proto == IPPROTO_ICMPV6) {
1517 icmp6 = mtodo(*mp, offset);
1518 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1519 icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1520 port = ntohs(icmp6->icmp6_id);
1522 proto = IPPROTO_ICMP;
1528 NAT64STAT_INC(&cfg->base.stats, noproto);
1529 return (cfg->nomatch_verdict);
1532 /* Extract IPv4 from destination IPv6 address */
1533 addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
1534 if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
1535 char a[INET_ADDRSTRLEN];
1537 NAT64STAT_INC(&cfg->base.stats, dropped);
1538 DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
1539 inet_ntop(AF_INET, &addr, a, sizeof(a)));
1540 return (IP_FW_DENY); /* XXX: add extra stats? */
1543 /* Try to find host */
1544 hval = HOST_HVAL(cfg, &f_id->src_ip6);
1545 CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
1546 if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
1549 /* We use IPv4 address in host byte order */
1552 return (nat64lsn_request_host(cfg, f_id, mp,
1553 hval, addr, port, proto));
1555 flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
1558 data[1] = (f_id->dst_port << 16) | port;
1559 hval = STATE_HVAL(cfg, data);
1560 state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
1563 return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
1565 return (nat64lsn_translate6_internal(cfg, mp, state, flags));
1569 * Main dataplane entry point.
1572 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1573 ipfw_insn *cmd, int *done)
1575 struct nat64lsn_cfg *cfg;
1579 IPFW_RLOCK_ASSERT(ch);
1581 *done = 0; /* continue the search in case of failure */
1583 if (cmd->opcode != O_EXTERNAL_ACTION ||
1584 cmd->arg1 != V_nat64lsn_eid ||
1585 icmd->opcode != O_EXTERNAL_INSTANCE ||
1586 (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
1587 return (IP_FW_DENY);
1589 *done = 1; /* terminate the search */
1591 switch (args->f_id.addr_type) {
1593 ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
1597 * Check that destination IPv6 address matches our prefix6.
1599 if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
1600 memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
1601 cfg->base.plat_plen / 8) != 0) {
1602 ret = cfg->nomatch_verdict;
1605 ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
1608 ret = cfg->nomatch_verdict;
1611 if (ret != IP_FW_PASS && args->m != NULL) {
1619 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
1621 struct nat64lsn_states_chunk *chunk;
1624 chunk = (struct nat64lsn_states_chunk *)mem;
1625 for (i = 0; i < 64; i++)
1626 chunk->state[i].flags = 0;
1631 nat64lsn_init_internal(void)
1634 nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
1635 sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
1637 nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
1638 sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
1640 nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
1641 sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
1643 nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
1644 sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
1646 nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
1647 sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
1648 NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1649 nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
1650 sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
1656 nat64lsn_uninit_internal(void)
1659 /* XXX: epoch_task drain */
1660 JQUEUE_LOCK_DESTROY();
1661 uma_zdestroy(nat64lsn_host_zone);
1662 uma_zdestroy(nat64lsn_pgchunk_zone);
1663 uma_zdestroy(nat64lsn_pg_zone);
1664 uma_zdestroy(nat64lsn_aliaslink_zone);
1665 uma_zdestroy(nat64lsn_state_zone);
1666 uma_zdestroy(nat64lsn_job_zone);
1670 nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1674 callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1675 nat64lsn_periodic, cfg);
1676 CALLOUT_UNLOCK(cfg);
1679 struct nat64lsn_cfg *
1680 nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
1682 struct nat64lsn_cfg *cfg;
1683 struct nat64lsn_alias *alias;
1686 cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
1690 CALLOUT_LOCK_INIT(cfg);
1691 STAILQ_INIT(&cfg->jhead);
1693 COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1695 cfg->hash_seed = arc4random();
1696 cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
1697 cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
1698 cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
1699 for (i = 0; i < cfg->hosts_hashsize; i++)
1700 CK_SLIST_INIT(&cfg->hosts_hash[i]);
1702 naddr = 1 << (32 - plen);
1703 cfg->prefix4 = prefix;
1704 cfg->pmask4 = prefix | (naddr - 1);
1706 cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
1707 M_NAT64LSN, M_WAITOK | M_ZERO);
1708 for (i = 0; i < naddr; i++) {
1709 alias = &cfg->aliases[i];
1710 alias->addr = prefix + i; /* host byte order */
1711 CK_SLIST_INIT(&alias->hosts);
1712 ALIAS_LOCK_INIT(alias);
1715 callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
1716 callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1722 nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
1726 if (pg->chunks_count == 1) {
1727 uma_zfree(nat64lsn_state_zone, pg->states);
1729 for (i = 0; i < pg->chunks_count; i++)
1730 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1731 free(pg->states_chunk, M_NAT64LSN);
1732 free(pg->freemask_chunk, M_NAT64LSN);
1734 uma_zfree(nat64lsn_pg_zone, pg);
1738 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
1739 struct nat64lsn_alias *alias)
1741 struct nat64lsn_pg *pg;
1744 while (!CK_SLIST_EMPTY(&alias->portgroups)) {
1745 pg = CK_SLIST_FIRST(&alias->portgroups);
1746 CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
1747 nat64lsn_destroy_pg(pg);
1749 for (i = 0; i < 32; i++) {
1750 if (ISSET32(alias->tcp_chunkmask, i))
1751 uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
1752 if (ISSET32(alias->udp_chunkmask, i))
1753 uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
1754 if (ISSET32(alias->icmp_chunkmask, i))
1755 uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
1757 ALIAS_LOCK_DESTROY(alias);
1761 nat64lsn_destroy_host(struct nat64lsn_host *host)
1763 struct nat64lsn_aliaslink *link;
1765 while (!CK_SLIST_EMPTY(&host->aliases)) {
1766 link = CK_SLIST_FIRST(&host->aliases);
1767 CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
1769 ALIAS_LOCK(link->alias);
1770 CK_SLIST_REMOVE(&link->alias->hosts, link,
1771 nat64lsn_aliaslink, alias_entries);
1772 link->alias->hosts_count--;
1773 ALIAS_UNLOCK(link->alias);
1775 uma_zfree(nat64lsn_aliaslink_zone, link);
1777 HOST_LOCK_DESTROY(host);
1778 free(host->states_hash, M_NAT64LSN);
1779 uma_zfree(nat64lsn_host_zone, host);
1783 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
1785 struct nat64lsn_host *host;
1789 callout_drain(&cfg->periodic);
1790 CALLOUT_UNLOCK(cfg);
1791 callout_drain(&cfg->jcallout);
1793 for (i = 0; i < cfg->hosts_hashsize; i++) {
1794 while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
1795 host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
1796 CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
1797 nat64lsn_destroy_host(host);
1801 for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
1802 nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
1804 CALLOUT_LOCK_DESTROY(cfg);
1805 CFG_LOCK_DESTROY(cfg);
1806 COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1807 free(cfg->hosts_hash, M_NAT64LSN);
1808 free(cfg->aliases, M_NAT64LSN);
1809 free(cfg, M_NAT64LSN);