2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2015-2019 Yandex LLC
5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6 * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/counter.h>
37 #include <sys/epoch.h>
38 #include <sys/errno.h>
40 #include <sys/kernel.h>
42 #include <sys/malloc.h>
44 #include <sys/module.h>
45 #include <sys/rmlock.h>
46 #include <sys/socket.h>
47 #include <sys/syslog.h>
48 #include <sys/sysctl.h>
51 #include <net/if_var.h>
52 #include <net/if_pflog.h>
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/ip_fw.h>
59 #include <netinet/ip6.h>
60 #include <netinet/icmp6.h>
61 #include <netinet/ip_icmp.h>
62 #include <netinet/tcp.h>
63 #include <netinet/udp.h>
64 #include <netinet6/in6_var.h>
65 #include <netinet6/ip6_var.h>
66 #include <netinet6/ip_fw_nat64.h>
68 #include <netpfil/ipfw/ip_fw_private.h>
69 #include <netpfil/pf/pf.h>
73 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
75 static epoch_t nat64lsn_epoch;
76 #define NAT64LSN_EPOCH_ENTER(et) epoch_enter_preempt(nat64lsn_epoch, &(et))
77 #define NAT64LSN_EPOCH_EXIT(et) epoch_exit_preempt(nat64lsn_epoch, &(et))
78 #define NAT64LSN_EPOCH_WAIT() epoch_wait_preempt(nat64lsn_epoch)
79 #define NAT64LSN_EPOCH_ASSERT() MPASS(in_epoch(nat64lsn_epoch))
80 #define NAT64LSN_EPOCH_CALL(c, f) epoch_call(nat64lsn_epoch, (c), (f))
82 static uma_zone_t nat64lsn_host_zone;
83 static uma_zone_t nat64lsn_pgchunk_zone;
84 static uma_zone_t nat64lsn_pg_zone;
85 static uma_zone_t nat64lsn_aliaslink_zone;
86 static uma_zone_t nat64lsn_state_zone;
87 static uma_zone_t nat64lsn_job_zone;
89 static void nat64lsn_periodic(void *data);
90 #define PERIODIC_DELAY 4
91 #define NAT64_LOOKUP(chain, cmd) \
92 (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
94 * Delayed job queue, used to create new hosts
103 struct nat64lsn_job_item {
104 STAILQ_ENTRY(nat64lsn_job_item) entries;
105 enum nat64lsn_jtype jtype;
108 struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
110 struct nat64lsn_host *host;
111 struct nat64lsn_state *state;
114 struct ipfw_flow_id f_id;
120 struct { /* used by JTYPE_DESTROY */
121 struct nat64lsn_hosts_slist hosts;
122 struct nat64lsn_pg_slist portgroups;
123 struct nat64lsn_pgchunk *pgchunk;
124 struct epoch_context epoch_ctx;
129 static struct mtx jmtx;
130 #define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
131 #define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx)
132 #define JQUEUE_LOCK() mtx_lock(&jmtx)
133 #define JQUEUE_UNLOCK() mtx_unlock(&jmtx)
135 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
136 struct nat64lsn_job_item *ji);
137 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
138 struct nat64lsn_job_item *ji);
139 static struct nat64lsn_job_item *nat64lsn_create_job(
140 struct nat64lsn_cfg *cfg, int jtype);
141 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
142 struct nat64lsn_job_item *ji);
143 static void nat64lsn_job_destroy(epoch_context_t ctx);
144 static void nat64lsn_destroy_host(struct nat64lsn_host *host);
145 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
147 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
148 const struct ipfw_flow_id *f_id, struct mbuf **mp);
149 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
150 struct ipfw_flow_id *f_id, struct mbuf **mp);
151 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
152 struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
154 #define NAT64_BIT_TCP_FIN 0 /* FIN was seen */
155 #define NAT64_BIT_TCP_SYN 1 /* First syn in->out */
156 #define NAT64_BIT_TCP_ESTAB 2 /* Packet with Ack */
157 #define NAT64_BIT_READY_IPV4 6 /* state is ready for translate4 */
158 #define NAT64_BIT_STALE 7 /* state is going to be expired */
160 #define NAT64_FLAG_FIN (1 << NAT64_BIT_TCP_FIN)
161 #define NAT64_FLAG_SYN (1 << NAT64_BIT_TCP_SYN)
162 #define NAT64_FLAG_ESTAB (1 << NAT64_BIT_TCP_ESTAB)
163 #define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
165 #define NAT64_FLAG_READY (1 << NAT64_BIT_READY_IPV4)
166 #define NAT64_FLAG_STALE (1 << NAT64_BIT_STALE)
168 static inline uint8_t
169 convert_tcp_flags(uint8_t flags)
173 result = flags & (TH_FIN|TH_SYN);
174 result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
175 result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
181 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
182 struct nat64lsn_state *state)
185 memset(plog, 0, sizeof(*plog));
186 plog->length = PFLOG_REAL_HDRLEN;
188 plog->action = PF_NAT;
190 plog->rulenr = htonl(state->ip_src);
191 plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
192 (state->proto << 8) | (state->ip_dst & 0xff));
193 plog->ruleset[0] = '\0';
194 strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
195 ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
198 #define HVAL(p, n, s) jenkins_hash32((const uint32_t *)(p), (n), (s))
199 #define HOST_HVAL(c, a) HVAL((a),\
200 sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
201 #define HOSTS(c, v) ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
203 #define ALIASLINK_HVAL(c, f) HVAL(&(f)->dst_ip6,\
204 sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
205 #define ALIAS_BYHASH(c, v) \
206 ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
207 static struct nat64lsn_aliaslink*
208 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
209 struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
213 * We can implement some different algorithms how
214 * select an alias address.
215 * XXX: for now we use first available.
217 return (CK_SLIST_FIRST(&host->aliases));
220 #define STATE_HVAL(c, d) HVAL((d), 2, (c)->hash_seed)
221 #define STATE_HASH(h, v) \
222 ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
223 #define STATES_CHUNK(p, v) \
224 ((p)->chunks_count == 1 ? (p)->states : \
225 ((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
228 #define FREEMASK_FFSLL(pg, faddr) \
229 ffsll(*FREEMASK_CHUNK((pg), (faddr)))
230 #define FREEMASK_BTR(pg, faddr, bit) \
231 ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
232 #define FREEMASK_BTS(pg, faddr, bit) \
233 ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
234 #define FREEMASK_ISSET(pg, faddr, bit) \
235 ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
236 #define FREEMASK_COPY(pg, n, out) \
237 (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
240 freemask_ffsll(uint32_t *freemask)
244 if ((i = ffsl(freemask[0])) != 0)
246 if ((i = ffsl(freemask[1])) != 0)
250 #define FREEMASK_FFSLL(pg, faddr) \
251 freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
252 #define FREEMASK_BTR(pg, faddr, bit) \
253 ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
254 #define FREEMASK_BTS(pg, faddr, bit) \
255 ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
256 #define FREEMASK_ISSET(pg, faddr, bit) \
257 ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
258 #define FREEMASK_COPY(pg, n, out) \
259 (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
260 ((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
261 #endif /* !__LP64__ */
264 #define NAT64LSN_TRY_PGCNT 32
265 static struct nat64lsn_pg*
266 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
267 struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
268 uint32_t *pgidx, in_addr_t faddr)
270 struct nat64lsn_pg *pg, *oldpg;
271 uint32_t idx, oldidx;
275 /* First try last used PG */
276 oldpg = pg = ck_pr_load_ptr(pgptr);
277 idx = oldidx = ck_pr_load_32(pgidx);
278 /* If pgidx is out of range, reset it to the first pgchunk */
279 if (!ISSET32(*chunkmask, idx / 32))
283 if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
285 * If last used PG has not free states,
286 * try to update pointer.
287 * NOTE: it can be already updated by jobs handler,
288 * thus we use CAS operation.
291 ck_pr_cas_ptr(pgptr, oldpg, pg);
294 /* Stop if idx is out of range */
295 if (!ISSET32(*chunkmask, idx / 32))
298 if (ISSET32(pgmask[idx / 32], idx % 32))
300 &chunks[idx / 32]->pgptr[idx % 32]);
305 } while (++cnt < NAT64LSN_TRY_PGCNT);
307 /* If pgidx is out of range, reset it to the first pgchunk */
308 if (!ISSET32(*chunkmask, idx / 32))
310 ck_pr_cas_32(pgidx, oldidx, idx);
314 static struct nat64lsn_state*
315 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
316 const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
317 uint16_t port, uint8_t proto)
319 struct nat64lsn_aliaslink *link;
320 struct nat64lsn_state *state;
321 struct nat64lsn_pg *pg;
324 NAT64LSN_EPOCH_ASSERT();
326 /* Check that we already have state for given arguments */
327 CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
328 if (state->proto == proto && state->ip_dst == faddr &&
329 state->sport == port && state->dport == f_id->dst_port)
333 link = nat64lsn_get_aliaslink(cfg, host, f_id);
339 pg = nat64lsn_get_pg(
340 &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
341 link->alias->tcp, &link->alias->tcp_pg,
342 &link->alias->tcp_pgidx, faddr);
345 pg = nat64lsn_get_pg(
346 &link->alias->udp_chunkmask, link->alias->udp_pgmask,
347 link->alias->udp, &link->alias->udp_pg,
348 &link->alias->udp_pgidx, faddr);
351 pg = nat64lsn_get_pg(
352 &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
353 link->alias->icmp, &link->alias->icmp_pg,
354 &link->alias->icmp_pgidx, faddr);
357 panic("%s: wrong proto %d", __func__, proto);
362 /* Check that PG has some free states */
364 i = FREEMASK_BITCOUNT(pg, faddr);
366 offset = FREEMASK_FFSLL(pg, faddr);
370 * No more free states in this PG.
375 /* Lets try to atomically grab the state */
376 if (FREEMASK_BTR(pg, faddr, offset - 1)) {
377 state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
379 state->flags = proto != IPPROTO_TCP ? 0 :
380 convert_tcp_flags(f_id->_flags);
381 state->proto = proto;
382 state->aport = pg->base_port + offset - 1;
383 state->dport = f_id->dst_port;
385 state->ip6_dst = f_id->dst_ip6;
386 state->ip_dst = faddr;
387 state->ip_src = link->alias->addr;
390 SET_AGE(state->timestamp);
392 /* Insert new state into host's hash table */
394 CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
396 host->states_count++;
398 * XXX: In case if host is going to be expired,
399 * reset NAT64LSN_DEADHOST flag.
401 host->flags &= ~NAT64LSN_DEADHOST;
403 NAT64STAT_INC(&cfg->base.stats, screated);
404 /* Mark the state as ready for translate4 */
406 ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
414 * Inspects icmp packets to see if the message contains different
415 * packet header so we need to alter @addr and @port.
418 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
426 ip = mtod(*mp, struct ip *); /* Outer IP header */
427 off = (ip->ip_hl << 2) + ICMP_MINLEN;
428 if ((*mp)->m_len < off)
429 *mp = m_pullup(*mp, off);
433 ip = mtod(*mp, struct ip *); /* Outer IP header */
434 icmp = L3HDR(ip, struct icmp *);
435 switch (icmp->icmp_type) {
438 /* Use icmp ID as distinguisher */
439 *port = ntohs(icmp->icmp_id);
448 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
451 if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
453 if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
454 *mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
457 ip = mtodo(*mp, off); /* Inner IP header */
458 inner_proto = ip->ip_p;
459 off += ip->ip_hl << 2; /* Skip inner IP header */
460 *addr = ntohl(ip->ip_src.s_addr);
461 if ((*mp)->m_len < off + ICMP_MINLEN)
462 *mp = m_pullup(*mp, off + ICMP_MINLEN);
465 switch (inner_proto) {
468 /* Copy source port from the header */
469 *port = ntohs(*((uint16_t *)mtodo(*mp, off)));
470 *proto = inner_proto;
474 * We will translate only ICMP errors for our ICMP
477 icmp = mtodo(*mp, off);
478 if (icmp->icmp_type != ICMP_ECHO)
480 *port = ntohs(icmp->icmp_id);
486 static struct nat64lsn_state*
487 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
488 in_addr_t faddr, uint16_t port, uint8_t proto)
490 struct nat64lsn_state *state;
491 struct nat64lsn_pg *pg;
492 int chunk_idx, pg_idx, state_idx;
494 NAT64LSN_EPOCH_ASSERT();
496 if (port < NAT64_MIN_PORT)
499 * Alias keeps 32 pgchunks for each protocol.
500 * Each pgchunk has 32 pointers to portgroup.
501 * Each portgroup has 64 states for ports.
503 port -= NAT64_MIN_PORT;
504 chunk_idx = port / 2048;
506 port -= chunk_idx * 2048;
508 state_idx = port % 64;
511 * First check in proto_chunkmask that we have allocated PG chunk.
512 * Then check in proto_pgmask that we have valid PG pointer.
517 if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
518 ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
519 pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
524 if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
525 ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
526 pg = alias->udp[chunk_idx]->pgptr[pg_idx];
531 if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
532 ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
533 pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
538 panic("%s: wrong proto %d", __func__, proto);
543 if (FREEMASK_ISSET(pg, faddr, state_idx))
546 state = &STATES_CHUNK(pg, faddr)->state[state_idx];
548 if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
554 nat64lsn_translate4(struct nat64lsn_cfg *cfg,
555 const struct ipfw_flow_id *f_id, struct mbuf **mp)
557 struct pfloghdr loghdr, *logdata;
558 struct in6_addr src6;
559 struct nat64lsn_state *state;
560 struct nat64lsn_alias *alias;
561 uint32_t addr, flags;
567 port = f_id->dst_port;
569 if (addr < cfg->prefix4 || addr > cfg->pmask4) {
570 NAT64STAT_INC(&cfg->base.stats, nomatch4);
571 return (cfg->nomatch_verdict);
574 /* Check if protocol is supported */
577 ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
580 NAT64STAT_INC(&cfg->base.stats, nomem);
583 NAT64STAT_INC(&cfg->base.stats, noproto);
584 return (cfg->nomatch_verdict);
586 if (addr < cfg->prefix4 || addr > cfg->pmask4) {
587 NAT64STAT_INC(&cfg->base.stats, nomatch4);
588 return (cfg->nomatch_verdict);
595 NAT64STAT_INC(&cfg->base.stats, noproto);
596 return (cfg->nomatch_verdict);
599 alias = &ALIAS_BYHASH(cfg, addr);
600 MPASS(addr == alias->addr);
602 /* Check that we have state for this port */
603 state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
606 NAT64STAT_INC(&cfg->base.stats, nomatch4);
607 return (cfg->nomatch_verdict);
610 /* TODO: Check flags to see if we need to do some static mapping */
612 /* Update some state fields if need */
614 if (f_id->proto == IPPROTO_TCP)
615 flags = convert_tcp_flags(f_id->_flags);
618 if (state->timestamp != ts)
619 state->timestamp = ts;
620 if ((state->flags & flags) != flags)
621 state->flags |= flags;
623 port = htons(state->sport);
624 src6 = state->ip6_dst;
626 if (cfg->base.flags & NAT64_LOG) {
628 nat64lsn_log(logdata, *mp, AF_INET, state);
633 * We already have src6 with embedded address, but it is possible,
634 * that src_ip is different than state->ip_dst, this is why we
635 * do embedding again.
637 nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
638 ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
639 &cfg->base, logdata);
640 if (ret == NAT64SKIP)
641 return (cfg->nomatch_verdict);
642 if (ret == NAT64RETURN)
648 * Check if particular state is stale and should be deleted.
649 * Return 1 if true, 0 otherwise.
652 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
656 /* State was marked as stale in previous pass. */
657 if (ISSET32(state->flags, NAT64_BIT_STALE))
660 /* State is not yet initialized, it is going to be READY */
661 if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
664 age = GET_AGE(state->timestamp);
665 switch (state->proto) {
667 if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
668 ttl = cfg->st_close_ttl;
669 else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
670 ttl = cfg->st_estab_ttl;
671 else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
672 ttl = cfg->st_syn_ttl;
674 ttl = cfg->st_syn_ttl;
679 if (age > cfg->st_udp_ttl)
683 if (age > cfg->st_icmp_ttl)
691 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
693 struct nat64lsn_state *state;
694 struct nat64lsn_host *host;
696 int c, i, update_age;
699 for (c = 0; c < pg->chunks_count; c++) {
700 FREEMASK_COPY(pg, c, freemask);
701 for (i = 0; i < 64; i++) {
702 if (ISSET64(freemask, i))
704 state = &STATES_CHUNK(pg, c)->state[i];
705 if (nat64lsn_check_state(cfg, state) == 0) {
711 * 1. Mark as STALE and unlink from host's hash.
712 * 2. Set bit in freemask.
714 if (ISSET32(state->flags, NAT64_BIT_STALE)) {
716 * State was marked as STALE in previous
717 * pass. Now it is safe to release it.
721 FREEMASK_BTS(pg, c, i);
722 NAT64STAT_INC(&cfg->base.stats, sdeleted);
725 MPASS(state->flags & NAT64_FLAG_READY);
729 CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
730 state, nat64lsn_state, entries);
731 host->states_count--;
734 /* Reset READY flag */
735 ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
736 /* And set STALE flag */
737 ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
740 * Now translate6 will not use this state, wait
741 * until it become safe for translate4, then mark
748 * We have some alive states, update timestamp.
751 SET_AGE(pg->timestamp);
753 if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
760 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
761 struct nat64lsn_pg_slist *portgroups)
763 struct nat64lsn_alias *alias;
764 struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
765 uint32_t *pgmask, *pgidx;
768 for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
769 alias = &cfg->aliases[i];
770 CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
771 if (nat64lsn_maintain_pg(cfg, pg) == 0)
773 /* Always keep first PG */
774 if (pg->base_port == NAT64_MIN_PORT)
777 * PG is expired, unlink it and schedule for
778 * deferred destroying.
780 idx = (pg->base_port - NAT64_MIN_PORT) / 64;
783 pgmask = alias->tcp_pgmask;
784 pgptr = &alias->tcp_pg;
785 pgidx = &alias->tcp_pgidx;
786 firstpg = alias->tcp[0]->pgptr[0];
789 pgmask = alias->udp_pgmask;
790 pgptr = &alias->udp_pg;
791 pgidx = &alias->udp_pgidx;
792 firstpg = alias->udp[0]->pgptr[0];
795 pgmask = alias->icmp_pgmask;
796 pgptr = &alias->icmp_pg;
797 pgidx = &alias->icmp_pgidx;
798 firstpg = alias->icmp[0]->pgptr[0];
801 /* Reset the corresponding bit in pgmask array. */
802 ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
804 /* If last used PG points to this PG, reset it. */
805 ck_pr_cas_ptr(pgptr, pg, firstpg);
806 ck_pr_cas_32(pgidx, idx, 0);
807 /* Unlink PG from alias's chain */
809 CK_SLIST_REMOVE(&alias->portgroups, pg,
810 nat64lsn_pg, entries);
811 alias->portgroups_count--;
813 /* And link to job's chain for deferred destroying */
814 NAT64STAT_INC(&cfg->base.stats, spgdeleted);
815 CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
821 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
822 struct nat64lsn_hosts_slist *hosts)
824 struct nat64lsn_host *host, *tmp;
827 for (i = 0; i < cfg->hosts_hashsize; i++) {
828 CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
830 /* Is host was marked in previous call? */
831 if (host->flags & NAT64LSN_DEADHOST) {
832 if (host->states_count > 0) {
833 host->flags &= ~NAT64LSN_DEADHOST;
837 * Unlink host from hash table and schedule
838 * it for deferred destroying.
841 CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
842 nat64lsn_host, entries);
845 CK_SLIST_INSERT_HEAD(hosts, host, entries);
848 if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
850 if (host->states_count > 0)
852 /* Mark host as going to be expired in next pass */
853 host->flags |= NAT64LSN_DEADHOST;
859 static struct nat64lsn_pgchunk*
860 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
863 struct nat64lsn_alias *alias;
864 struct nat64lsn_pgchunk *chunk;
868 for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
869 alias = &cfg->aliases[i];
870 if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
872 /* Always keep single chunk allocated */
873 for (c = 1; c < 32; c++) {
874 if ((alias->tcp_chunkmask & (1 << c)) == 0)
876 chunk = ck_pr_load_ptr(&alias->tcp[c]);
877 if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
879 ck_pr_btr_32(&alias->tcp_chunkmask, c);
881 if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
891 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
893 struct nat64lsn_host *h;
894 struct nat64lsn_states_slist *hash;
897 for (i = 0; i < cfg->hosts_hashsize; i++) {
898 CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
899 if (h->states_count / 2 < h->states_hashsize ||
900 h->states_hashsize >= NAT64LSN_MAX_HSIZE)
902 hsize = h->states_hashsize * 2;
903 hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
906 for (j = 0; j < hsize; j++)
907 CK_SLIST_INIT(&hash[i]);
909 ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
916 * This procedure is used to perform various maintance
917 * on dynamic hash list. Currently it is called every 4 seconds.
920 nat64lsn_periodic(void *data)
922 struct nat64lsn_job_item *ji;
923 struct nat64lsn_cfg *cfg;
925 cfg = (struct nat64lsn_cfg *) data;
926 CURVNET_SET(cfg->vp);
927 if (cfg->hosts_count > 0) {
928 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
930 ji->jtype = JTYPE_DESTROY;
931 CK_SLIST_INIT(&ji->hosts);
932 CK_SLIST_INIT(&ji->portgroups);
933 nat64lsn_expire_hosts(cfg, &ji->hosts);
934 nat64lsn_expire_portgroups(cfg, &ji->portgroups);
935 ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
936 NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
937 nat64lsn_job_destroy);
939 NAT64STAT_INC(&cfg->base.stats, jnomem);
941 callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
945 #define ALLOC_ERROR(stage, type) ((stage) ? 10 * (type) + (stage): 0)
946 #define HOST_ERROR(stage) ALLOC_ERROR(stage, 1)
947 #define PG_ERROR(stage) ALLOC_ERROR(stage, 2)
949 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
951 char a[INET6_ADDRSTRLEN];
952 struct nat64lsn_aliaslink *link;
953 struct nat64lsn_host *host;
954 struct nat64lsn_state *state;
955 uint32_t hval, data[2];
958 /* Check that host was not yet added. */
959 NAT64LSN_EPOCH_ASSERT();
960 CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
961 if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
962 /* The host was allocated in previous call. */
968 host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
969 if (ji->host == NULL)
970 return (HOST_ERROR(1));
972 host->states_hashsize = NAT64LSN_HSIZE;
973 host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
974 host->states_hashsize, M_NAT64LSN, M_NOWAIT);
975 if (host->states_hash == NULL) {
976 uma_zfree(nat64lsn_host_zone, host);
977 return (HOST_ERROR(2));
980 link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
982 free(host->states_hash, M_NAT64LSN);
983 uma_zfree(nat64lsn_host_zone, host);
984 return (HOST_ERROR(3));
988 HOST_LOCK_INIT(host);
989 SET_AGE(host->timestamp);
990 host->addr = ji->f_id.src_ip6;
991 host->hval = ji->src6_hval;
993 host->states_count = 0;
994 host->states_hashsize = NAT64LSN_HSIZE;
995 CK_SLIST_INIT(&host->aliases);
996 for (i = 0; i < host->states_hashsize; i++)
997 CK_SLIST_INIT(&host->states_hash[i]);
999 /* Determine alias from flow hash. */
1000 hval = ALIASLINK_HVAL(cfg, &ji->f_id);
1001 link->alias = &ALIAS_BYHASH(cfg, hval);
1002 CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
1004 ALIAS_LOCK(link->alias);
1005 CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
1006 link->alias->hosts_count++;
1007 ALIAS_UNLOCK(link->alias);
1010 CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
1015 data[0] = ji->faddr;
1016 data[1] = (ji->f_id.dst_port << 16) | ji->port;
1017 ji->state_hval = hval = STATE_HVAL(cfg, data);
1018 state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
1019 ji->faddr, ji->port, ji->proto);
1021 * We failed to obtain new state, used alias needs new PG.
1022 * XXX: or another alias should be used.
1024 if (state == NULL) {
1025 /* Try to allocate new PG */
1026 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1027 return (HOST_ERROR(4));
1028 /* We assume that nat64lsn_alloc_pg() got state */
1033 DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
1034 inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
1035 return (HOST_ERROR(0));
1039 nat64lsn_find_pg_place(uint32_t *data)
1043 for (i = 0; i < 32; i++) {
1046 return (i * 32 + ffs(~data[i]) - 1);
1052 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
1053 struct nat64lsn_alias *alias, uint32_t *chunkmask,
1054 uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
1055 struct nat64lsn_pg **pgptr, uint8_t proto)
1057 struct nat64lsn_pg *pg;
1058 int i, pg_idx, chunk_idx;
1060 /* Find place in pgchunk where PG can be added */
1061 pg_idx = nat64lsn_find_pg_place(pgmask);
1062 if (pg_idx < 0) /* no more PGs */
1063 return (PG_ERROR(1));
1064 /* Check that we have allocated pgchunk for given PG index */
1065 chunk_idx = pg_idx / 32;
1066 if (!ISSET32(*chunkmask, chunk_idx)) {
1067 chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
1069 if (chunks[chunk_idx] == NULL)
1070 return (PG_ERROR(2));
1071 ck_pr_bts_32(chunkmask, chunk_idx);
1072 ck_pr_fence_store();
1074 /* Allocate PG and states chunks */
1075 pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
1077 return (PG_ERROR(3));
1078 pg->chunks_count = cfg->states_chunks;
1079 if (pg->chunks_count > 1) {
1080 pg->freemask_chunk = malloc(pg->chunks_count *
1081 sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
1082 if (pg->freemask_chunk == NULL) {
1083 uma_zfree(nat64lsn_pg_zone, pg);
1084 return (PG_ERROR(4));
1086 pg->states_chunk = malloc(pg->chunks_count *
1087 sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
1089 if (pg->states_chunk == NULL) {
1090 free(pg->freemask_chunk, M_NAT64LSN);
1091 uma_zfree(nat64lsn_pg_zone, pg);
1092 return (PG_ERROR(5));
1094 for (i = 0; i < pg->chunks_count; i++) {
1095 pg->states_chunk[i] = uma_zalloc(
1096 nat64lsn_state_zone, M_NOWAIT);
1097 if (pg->states_chunk[i] == NULL)
1100 memset(pg->freemask_chunk, 0xff,
1101 sizeof(uint64_t) * pg->chunks_count);
1103 pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
1104 if (pg->states == NULL) {
1105 uma_zfree(nat64lsn_pg_zone, pg);
1106 return (PG_ERROR(6));
1108 memset(&pg->freemask64, 0xff, sizeof(uint64_t));
1111 /* Initialize PG and hook it to pgchunk */
1112 SET_AGE(pg->timestamp);
1114 pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
1115 ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
1116 ck_pr_fence_store();
1117 ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
1118 ck_pr_store_ptr(pgptr, pg);
1121 CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
1122 SET_AGE(alias->timestamp);
1123 alias->portgroups_count++;
1124 ALIAS_UNLOCK(alias);
1125 NAT64STAT_INC(&cfg->base.stats, spgcreated);
1126 return (PG_ERROR(0));
1129 for (i = 0; i < pg->chunks_count; i++)
1130 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1131 free(pg->freemask_chunk, M_NAT64LSN);
1132 free(pg->states_chunk, M_NAT64LSN);
1133 uma_zfree(nat64lsn_pg_zone, pg);
1134 return (PG_ERROR(7));
1138 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1140 struct nat64lsn_aliaslink *link;
1141 struct nat64lsn_alias *alias;
1144 link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
1146 return (PG_ERROR(1));
1149 * TODO: check that we did not already allocated PG in
1154 alias = link->alias;
1155 /* Find place in pgchunk where PG can be added */
1156 switch (ji->proto) {
1158 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1159 &alias->tcp_chunkmask, alias->tcp_pgmask,
1160 alias->tcp, &alias->tcp_pg, ji->proto);
1163 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1164 &alias->udp_chunkmask, alias->udp_pgmask,
1165 alias->udp, &alias->udp_pg, ji->proto);
1168 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1169 &alias->icmp_chunkmask, alias->icmp_pgmask,
1170 alias->icmp, &alias->icmp_pg, ji->proto);
1173 panic("%s: wrong proto %d", __func__, ji->proto);
1175 if (ret == PG_ERROR(1)) {
1177 * PG_ERROR(1) means that alias lacks free PGs
1178 * XXX: try next alias.
1180 printf("NAT64LSN: %s: failed to obtain PG\n",
1184 if (ret == PG_ERROR(0)) {
1185 ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
1186 ji->state_hval, ji->faddr, ji->port, ji->proto);
1187 if (ji->state == NULL)
1196 nat64lsn_do_request(void *data)
1198 struct epoch_tracker et;
1199 struct nat64lsn_job_head jhead;
1200 struct nat64lsn_job_item *ji, *ji2;
1201 struct nat64lsn_cfg *cfg;
1205 cfg = (struct nat64lsn_cfg *)data;
1209 CURVNET_SET(cfg->vp);
1210 STAILQ_INIT(&jhead);
1214 STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
1219 /* TODO: check if we need to resize hash */
1221 NAT64STAT_INC(&cfg->base.stats, jcalls);
1222 DPRINTF(DP_JQUEUE, "count=%d", jcount);
1226 * What we should do here is to build a hash
1227 * to ensure we don't have lots of duplicate requests.
1228 * Skip this for now.
1230 * TODO: Limit per-call number of items
1233 NAT64LSN_EPOCH_ENTER(et);
1234 STAILQ_FOREACH(ji, &jhead, entries) {
1235 switch (ji->jtype) {
1237 if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
1238 NAT64STAT_INC(&cfg->base.stats, jhostfails);
1240 case JTYPE_NEWPORTGROUP:
1241 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1242 NAT64STAT_INC(&cfg->base.stats, jportfails);
1247 if (ji->done != 0) {
1248 flags = ji->proto != IPPROTO_TCP ? 0 :
1249 convert_tcp_flags(ji->f_id._flags);
1250 nat64lsn_translate6_internal(cfg, &ji->m,
1252 NAT64STAT_INC(&cfg->base.stats, jreinjected);
1255 NAT64LSN_EPOCH_EXIT(et);
1257 ji = STAILQ_FIRST(&jhead);
1258 while (ji != NULL) {
1259 ji2 = STAILQ_NEXT(ji, entries);
1261 * In any case we must free mbuf if
1262 * translator did not consumed it.
1265 uma_zfree(nat64lsn_job_zone, ji);
1271 static struct nat64lsn_job_item *
1272 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
1274 struct nat64lsn_job_item *ji;
1277 * Do not try to lock possibly contested mutex if we're near the
1278 * limit. Drop packet instead.
1281 if (cfg->jlen >= cfg->jmaxlen)
1282 NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1284 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1286 NAT64STAT_INC(&cfg->base.stats, jnomem);
1289 NAT64STAT_INC(&cfg->base.stats, dropped);
1290 DPRINTF(DP_DROPS, "failed to create job");
1299 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1303 STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
1304 NAT64STAT_INC(&cfg->base.stats, jrequests);
1307 if (callout_pending(&cfg->jcallout) == 0)
1308 callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1313 nat64lsn_job_destroy(epoch_context_t ctx)
1315 struct nat64lsn_job_item *ji;
1316 struct nat64lsn_host *host;
1317 struct nat64lsn_pg *pg;
1320 ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
1321 MPASS(ji->jtype == JTYPE_DESTROY);
1322 while (!CK_SLIST_EMPTY(&ji->hosts)) {
1323 host = CK_SLIST_FIRST(&ji->hosts);
1324 CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
1325 if (host->states_count > 0) {
1327 * XXX: The state has been created
1328 * during host deletion.
1330 printf("NAT64LSN: %s: destroying host with %d "
1331 "states\n", __func__, host->states_count);
1333 nat64lsn_destroy_host(host);
1335 while (!CK_SLIST_EMPTY(&ji->portgroups)) {
1336 pg = CK_SLIST_FIRST(&ji->portgroups);
1337 CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
1338 for (i = 0; i < pg->chunks_count; i++) {
1339 if (FREEMASK_BITCOUNT(pg, i) != 64) {
1341 * XXX: The state has been created during
1344 printf("NAT64LSN: %s: destroying PG %p "
1345 "with non-empty chunk %d\n", __func__,
1349 nat64lsn_destroy_pg(pg);
1351 uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
1352 uma_zfree(nat64lsn_job_zone, ji);
1356 nat64lsn_request_host(struct nat64lsn_cfg *cfg,
1357 const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1358 in_addr_t faddr, uint16_t port, uint8_t proto)
1360 struct nat64lsn_job_item *ji;
1362 ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
1369 ji->src6_hval = hval;
1371 nat64lsn_enqueue_job(cfg, ji);
1372 NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1375 return (IP_FW_DENY);
1379 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
1380 const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1381 in_addr_t faddr, uint16_t port, uint8_t proto)
1383 struct nat64lsn_job_item *ji;
1385 ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
1392 ji->state_hval = hval;
1395 nat64lsn_enqueue_job(cfg, ji);
1396 NAT64STAT_INC(&cfg->base.stats, jportreq);
1399 return (IP_FW_DENY);
1403 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
1404 struct nat64lsn_state *state, uint8_t flags)
1406 struct pfloghdr loghdr, *logdata;
1410 /* Update timestamp and flags if needed */
1412 if (state->timestamp != ts)
1413 state->timestamp = ts;
1414 if ((state->flags & flags) != 0)
1415 state->flags |= flags;
1417 if (cfg->base.flags & NAT64_LOG) {
1419 nat64lsn_log(logdata, *mp, AF_INET6, state);
1423 ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
1424 htons(state->aport), &cfg->base, logdata);
1425 if (ret == NAT64SKIP)
1426 return (cfg->nomatch_verdict);
1427 if (ret == NAT64RETURN)
1429 return (IP_FW_DENY);
1433 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1436 struct nat64lsn_state *state;
1437 struct nat64lsn_host *host;
1438 struct icmp6_hdr *icmp6;
1439 uint32_t addr, hval, data[2];
1444 /* Check if protocol is supported */
1445 port = f_id->src_port;
1446 proto = f_id->proto;
1447 switch (f_id->proto) {
1448 case IPPROTO_ICMPV6:
1450 * For ICMPv6 echo reply/request we use icmp6_id as
1454 proto = nat64_getlasthdr(*mp, &offset);
1456 NAT64STAT_INC(&cfg->base.stats, dropped);
1457 DPRINTF(DP_DROPS, "mbuf isn't contigious");
1458 return (IP_FW_DENY);
1460 if (proto == IPPROTO_ICMPV6) {
1461 icmp6 = mtodo(*mp, offset);
1462 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1463 icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1464 port = ntohs(icmp6->icmp6_id);
1466 proto = IPPROTO_ICMP;
1472 NAT64STAT_INC(&cfg->base.stats, noproto);
1473 return (cfg->nomatch_verdict);
1476 /* Extract IPv4 from destination IPv6 address */
1477 addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
1478 if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
1479 char a[INET_ADDRSTRLEN];
1481 NAT64STAT_INC(&cfg->base.stats, dropped);
1482 DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
1483 inet_ntop(AF_INET, &addr, a, sizeof(a)));
1484 return (IP_FW_DENY); /* XXX: add extra stats? */
1487 /* Try to find host */
1488 hval = HOST_HVAL(cfg, &f_id->src_ip6);
1489 CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
1490 if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
1493 /* We use IPv4 address in host byte order */
1496 return (nat64lsn_request_host(cfg, f_id, mp,
1497 hval, addr, port, proto));
1499 flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
1502 data[1] = (f_id->dst_port << 16) | port;
1503 hval = STATE_HVAL(cfg, data);
1504 state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
1507 return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
1509 return (nat64lsn_translate6_internal(cfg, mp, state, flags));
1513 * Main dataplane entry point.
1516 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1517 ipfw_insn *cmd, int *done)
1519 struct epoch_tracker et;
1520 struct nat64lsn_cfg *cfg;
1524 IPFW_RLOCK_ASSERT(ch);
1526 *done = 0; /* continue the search in case of failure */
1528 if (cmd->opcode != O_EXTERNAL_ACTION ||
1529 cmd->arg1 != V_nat64lsn_eid ||
1530 icmd->opcode != O_EXTERNAL_INSTANCE ||
1531 (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
1532 return (IP_FW_DENY);
1534 *done = 1; /* terminate the search */
1536 NAT64LSN_EPOCH_ENTER(et);
1537 switch (args->f_id.addr_type) {
1539 ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
1543 * Check that destination IPv6 address matches our prefix6.
1545 if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
1546 memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
1547 cfg->base.plat_plen / 8) != 0) {
1548 ret = cfg->nomatch_verdict;
1551 ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
1554 ret = cfg->nomatch_verdict;
1556 NAT64LSN_EPOCH_EXIT(et);
1558 if (ret != IP_FW_PASS && args->m != NULL) {
1566 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
1568 struct nat64lsn_states_chunk *chunk;
1571 chunk = (struct nat64lsn_states_chunk *)mem;
1572 for (i = 0; i < 64; i++)
1573 chunk->state[i].flags = 0;
1578 nat64lsn_init_internal(void)
1581 nat64lsn_epoch = epoch_alloc(EPOCH_PREEMPT);
1583 nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
1584 sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
1586 nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
1587 sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
1589 nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
1590 sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
1592 nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
1593 sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
1595 nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
1596 sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
1597 NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1598 nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
1599 sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
1605 nat64lsn_uninit_internal(void)
1608 /* XXX: epoch_task drain */
1609 epoch_free(nat64lsn_epoch);
1611 JQUEUE_LOCK_DESTROY();
1612 uma_zdestroy(nat64lsn_host_zone);
1613 uma_zdestroy(nat64lsn_pgchunk_zone);
1614 uma_zdestroy(nat64lsn_pg_zone);
1615 uma_zdestroy(nat64lsn_aliaslink_zone);
1616 uma_zdestroy(nat64lsn_state_zone);
1617 uma_zdestroy(nat64lsn_job_zone);
1621 nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1625 callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1626 nat64lsn_periodic, cfg);
1627 CALLOUT_UNLOCK(cfg);
1630 struct nat64lsn_cfg *
1631 nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
1633 struct nat64lsn_cfg *cfg;
1634 struct nat64lsn_alias *alias;
1637 cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
1641 CALLOUT_LOCK_INIT(cfg);
1642 STAILQ_INIT(&cfg->jhead);
1644 COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1646 cfg->hash_seed = arc4random();
1647 cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
1648 cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
1649 cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
1650 for (i = 0; i < cfg->hosts_hashsize; i++)
1651 CK_SLIST_INIT(&cfg->hosts_hash[i]);
1653 naddr = 1 << (32 - plen);
1654 cfg->prefix4 = prefix;
1655 cfg->pmask4 = prefix | (naddr - 1);
1657 cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
1658 M_NAT64LSN, M_WAITOK | M_ZERO);
1659 for (i = 0; i < naddr; i++) {
1660 alias = &cfg->aliases[i];
1661 alias->addr = prefix + i; /* host byte order */
1662 CK_SLIST_INIT(&alias->hosts);
1663 ALIAS_LOCK_INIT(alias);
1666 callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
1667 callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1673 nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
1677 if (pg->chunks_count == 1) {
1678 uma_zfree(nat64lsn_state_zone, pg->states);
1680 for (i = 0; i < pg->chunks_count; i++)
1681 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1682 free(pg->states_chunk, M_NAT64LSN);
1683 free(pg->freemask_chunk, M_NAT64LSN);
1685 uma_zfree(nat64lsn_pg_zone, pg);
1689 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
1690 struct nat64lsn_alias *alias)
1692 struct nat64lsn_pg *pg;
1695 while (!CK_SLIST_EMPTY(&alias->portgroups)) {
1696 pg = CK_SLIST_FIRST(&alias->portgroups);
1697 CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
1698 nat64lsn_destroy_pg(pg);
1700 for (i = 0; i < 32; i++) {
1701 if (ISSET32(alias->tcp_chunkmask, i))
1702 uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
1703 if (ISSET32(alias->udp_chunkmask, i))
1704 uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
1705 if (ISSET32(alias->icmp_chunkmask, i))
1706 uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
1708 ALIAS_LOCK_DESTROY(alias);
1712 nat64lsn_destroy_host(struct nat64lsn_host *host)
1714 struct nat64lsn_aliaslink *link;
1716 while (!CK_SLIST_EMPTY(&host->aliases)) {
1717 link = CK_SLIST_FIRST(&host->aliases);
1718 CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
1720 ALIAS_LOCK(link->alias);
1721 CK_SLIST_REMOVE(&link->alias->hosts, link,
1722 nat64lsn_aliaslink, alias_entries);
1723 link->alias->hosts_count--;
1724 ALIAS_UNLOCK(link->alias);
1726 uma_zfree(nat64lsn_aliaslink_zone, link);
1728 HOST_LOCK_DESTROY(host);
1729 free(host->states_hash, M_NAT64LSN);
1730 uma_zfree(nat64lsn_host_zone, host);
1734 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
1736 struct nat64lsn_host *host;
1740 callout_drain(&cfg->periodic);
1741 CALLOUT_UNLOCK(cfg);
1742 callout_drain(&cfg->jcallout);
1744 for (i = 0; i < cfg->hosts_hashsize; i++) {
1745 while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
1746 host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
1747 CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
1748 nat64lsn_destroy_host(host);
1752 for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
1753 nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
1755 CALLOUT_LOCK_DESTROY(cfg);
1756 CFG_LOCK_DESTROY(cfg);
1757 COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1758 free(cfg->hosts_hash, M_NAT64LSN);
1759 free(cfg->aliases, M_NAT64LSN);
1760 free(cfg, M_NAT64LSN);