]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netpfil/ipfw/nat64/nat64lsn.c
Add IPv4 fragments reassembling to NAT64LSN.
[FreeBSD/FreeBSD.git] / sys / netpfil / ipfw / nat64 / nat64lsn.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2015-2019 Yandex LLC
5  * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/counter.h>
36 #include <sys/ck.h>
37 #include <sys/epoch.h>
38 #include <sys/errno.h>
39 #include <sys/hash.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/module.h>
45 #include <sys/rmlock.h>
46 #include <sys/socket.h>
47 #include <sys/syslog.h>
48 #include <sys/sysctl.h>
49
50 #include <net/if.h>
51 #include <net/if_var.h>
52 #include <net/if_pflog.h>
53 #include <net/pfil.h>
54
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/ip_fw.h>
59 #include <netinet/ip6.h>
60 #include <netinet/icmp6.h>
61 #include <netinet/ip_icmp.h>
62 #include <netinet/tcp.h>
63 #include <netinet/udp.h>
64 #include <netinet6/in6_var.h>
65 #include <netinet6/ip6_var.h>
66 #include <netinet6/ip_fw_nat64.h>
67
68 #include <netpfil/ipfw/ip_fw_private.h>
69 #include <netpfil/pf/pf.h>
70
71 #include "nat64lsn.h"
72
73 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
74
75 #define NAT64LSN_EPOCH_ENTER(et)  NET_EPOCH_ENTER(et)
76 #define NAT64LSN_EPOCH_EXIT(et)   NET_EPOCH_EXIT(et)
77 #define NAT64LSN_EPOCH_ASSERT()   NET_EPOCH_ASSERT()
78 #define NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c))
79
80 static uma_zone_t nat64lsn_host_zone;
81 static uma_zone_t nat64lsn_pgchunk_zone;
82 static uma_zone_t nat64lsn_pg_zone;
83 static uma_zone_t nat64lsn_aliaslink_zone;
84 static uma_zone_t nat64lsn_state_zone;
85 static uma_zone_t nat64lsn_job_zone;
86
87 static void nat64lsn_periodic(void *data);
88 #define PERIODIC_DELAY          4
89 #define NAT64_LOOKUP(chain, cmd)        \
90         (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
91 /*
92  * Delayed job queue, used to create new hosts
93  * and new portgroups
94  */
95 enum nat64lsn_jtype {
96         JTYPE_NEWHOST = 1,
97         JTYPE_NEWPORTGROUP,
98         JTYPE_DESTROY,
99 };
100
101 struct nat64lsn_job_item {
102         STAILQ_ENTRY(nat64lsn_job_item) entries;
103         enum nat64lsn_jtype     jtype;
104
105         union {
106                 struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
107                         struct mbuf             *m;
108                         struct nat64lsn_host    *host;
109                         struct nat64lsn_state   *state;
110                         uint32_t                src6_hval;
111                         uint32_t                state_hval;
112                         struct ipfw_flow_id     f_id;
113                         in_addr_t               faddr;
114                         uint16_t                port;
115                         uint8_t                 proto;
116                         uint8_t                 done;
117                 };
118                 struct { /* used by JTYPE_DESTROY */
119                         struct nat64lsn_hosts_slist     hosts;
120                         struct nat64lsn_pg_slist        portgroups;
121                         struct nat64lsn_pgchunk         *pgchunk;
122                         struct epoch_context            epoch_ctx;
123                 };
124         };
125 };
126
127 static struct mtx jmtx;
128 #define JQUEUE_LOCK_INIT()      mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
129 #define JQUEUE_LOCK_DESTROY()   mtx_destroy(&jmtx)
130 #define JQUEUE_LOCK()           mtx_lock(&jmtx)
131 #define JQUEUE_UNLOCK()         mtx_unlock(&jmtx)
132
133 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
134     struct nat64lsn_job_item *ji);
135 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
136     struct nat64lsn_job_item *ji);
137 static struct nat64lsn_job_item *nat64lsn_create_job(
138     struct nat64lsn_cfg *cfg, int jtype);
139 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
140     struct nat64lsn_job_item *ji);
141 static void nat64lsn_job_destroy(epoch_context_t ctx);
142 static void nat64lsn_destroy_host(struct nat64lsn_host *host);
143 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
144
145 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
146     const struct ipfw_flow_id *f_id, struct mbuf **mp);
147 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
148     struct ipfw_flow_id *f_id, struct mbuf **mp);
149 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
150     struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
151
152 #define NAT64_BIT_TCP_FIN       0       /* FIN was seen */
153 #define NAT64_BIT_TCP_SYN       1       /* First syn in->out */
154 #define NAT64_BIT_TCP_ESTAB     2       /* Packet with Ack */
155 #define NAT64_BIT_READY_IPV4    6       /* state is ready for translate4 */
156 #define NAT64_BIT_STALE         7       /* state is going to be expired */
157
158 #define NAT64_FLAG_FIN          (1 << NAT64_BIT_TCP_FIN)
159 #define NAT64_FLAG_SYN          (1 << NAT64_BIT_TCP_SYN)
160 #define NAT64_FLAG_ESTAB        (1 << NAT64_BIT_TCP_ESTAB)
161 #define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
162
163 #define NAT64_FLAG_READY        (1 << NAT64_BIT_READY_IPV4)
164 #define NAT64_FLAG_STALE        (1 << NAT64_BIT_STALE)
165
166 static inline uint8_t
167 convert_tcp_flags(uint8_t flags)
168 {
169         uint8_t result;
170
171         result = flags & (TH_FIN|TH_SYN);
172         result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
173         result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
174
175         return (result);
176 }
177
178 static void
179 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
180     struct nat64lsn_state *state)
181 {
182
183         memset(plog, 0, sizeof(*plog));
184         plog->length = PFLOG_REAL_HDRLEN;
185         plog->af = family;
186         plog->action = PF_NAT;
187         plog->dir = PF_IN;
188         plog->rulenr = htonl(state->ip_src);
189         plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
190             (state->proto << 8) | (state->ip_dst & 0xff));
191         plog->ruleset[0] = '\0';
192         strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
193         ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
194 }
195
196 #define HVAL(p, n, s)   jenkins_hash32((const uint32_t *)(p), (n), (s))
197 #define HOST_HVAL(c, a) HVAL((a),\
198     sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
199 #define HOSTS(c, v)     ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
200
201 #define ALIASLINK_HVAL(c, f)    HVAL(&(f)->dst_ip6,\
202     sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
203 #define ALIAS_BYHASH(c, v)      \
204     ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
205 static struct nat64lsn_aliaslink*
206 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
207     struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
208 {
209
210         /*
211          * We can implement some different algorithms how
212          * select an alias address.
213          * XXX: for now we use first available.
214          */
215         return (CK_SLIST_FIRST(&host->aliases));
216 }
217
218 #define STATE_HVAL(c, d)        HVAL((d), 2, (c)->hash_seed)
219 #define STATE_HASH(h, v)        \
220     ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
221 #define STATES_CHUNK(p, v)      \
222     ((p)->chunks_count == 1 ? (p)->states : \
223         ((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
224
225 #ifdef __LP64__
226 #define FREEMASK_FFSLL(pg, faddr)               \
227     ffsll(*FREEMASK_CHUNK((pg), (faddr)))
228 #define FREEMASK_BTR(pg, faddr, bit)    \
229     ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
230 #define FREEMASK_BTS(pg, faddr, bit)    \
231     ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
232 #define FREEMASK_ISSET(pg, faddr, bit)  \
233     ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
234 #define FREEMASK_COPY(pg, n, out)       \
235     (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
236 #else
237 static inline int
238 freemask_ffsll(uint32_t *freemask)
239 {
240         int i;
241
242         if ((i = ffsl(freemask[0])) != 0)
243                 return (i);
244         if ((i = ffsl(freemask[1])) != 0)
245                 return (i + 32);
246         return (0);
247 }
248 #define FREEMASK_FFSLL(pg, faddr)               \
249     freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
250 #define FREEMASK_BTR(pg, faddr, bit)    \
251     ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
252 #define FREEMASK_BTS(pg, faddr, bit)    \
253     ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
254 #define FREEMASK_ISSET(pg, faddr, bit)  \
255     ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
256 #define FREEMASK_COPY(pg, n, out)       \
257     (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
258         ((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
259 #endif /* !__LP64__ */
260
261 #define NAT64LSN_TRY_PGCNT      32
262 static struct nat64lsn_pg*
263 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
264     struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
265     uint32_t *pgidx, in_addr_t faddr)
266 {
267         struct nat64lsn_pg *pg, *oldpg;
268         uint32_t idx, oldidx;
269         int cnt;
270
271         cnt = 0;
272         /* First try last used PG */
273         oldpg = pg = ck_pr_load_ptr(pgptr);
274         idx = oldidx = ck_pr_load_32(pgidx);
275         /* If pgidx is out of range, reset it to the first pgchunk */
276         if (!ISSET32(*chunkmask, idx / 32))
277                 idx = 0;
278         do {
279                 ck_pr_fence_load();
280                 if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
281                         /*
282                          * If last used PG has not free states,
283                          * try to update pointer.
284                          * NOTE: it can be already updated by jobs handler,
285                          *       thus we use CAS operation.
286                          */
287                         if (cnt > 0)
288                                 ck_pr_cas_ptr(pgptr, oldpg, pg);
289                         return (pg);
290                 }
291                 /* Stop if idx is out of range */
292                 if (!ISSET32(*chunkmask, idx / 32))
293                         break;
294
295                 if (ISSET32(pgmask[idx / 32], idx % 32))
296                         pg = ck_pr_load_ptr(
297                             &chunks[idx / 32]->pgptr[idx % 32]);
298                 else
299                         pg = NULL;
300
301                 idx++;
302         } while (++cnt < NAT64LSN_TRY_PGCNT);
303
304         /* If pgidx is out of range, reset it to the first pgchunk */
305         if (!ISSET32(*chunkmask, idx / 32))
306                 idx = 0;
307         ck_pr_cas_32(pgidx, oldidx, idx);
308         return (NULL);
309 }
310
311 static struct nat64lsn_state*
312 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
313     const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
314     uint16_t port, uint8_t proto)
315 {
316         struct nat64lsn_aliaslink *link;
317         struct nat64lsn_state *state;
318         struct nat64lsn_pg *pg;
319         int i, offset;
320
321         NAT64LSN_EPOCH_ASSERT();
322
323         /* Check that we already have state for given arguments */
324         CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
325                 if (state->proto == proto && state->ip_dst == faddr &&
326                     state->sport == port && state->dport == f_id->dst_port)
327                         return (state);
328         }
329
330         link = nat64lsn_get_aliaslink(cfg, host, f_id);
331         if (link == NULL)
332                 return (NULL);
333
334         switch (proto) {
335         case IPPROTO_TCP:
336                 pg = nat64lsn_get_pg(
337                     &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
338                     link->alias->tcp, &link->alias->tcp_pg,
339                     &link->alias->tcp_pgidx, faddr);
340                 break;
341         case IPPROTO_UDP:
342                 pg = nat64lsn_get_pg(
343                     &link->alias->udp_chunkmask, link->alias->udp_pgmask,
344                     link->alias->udp, &link->alias->udp_pg,
345                     &link->alias->udp_pgidx, faddr);
346                 break;
347         case IPPROTO_ICMP:
348                 pg = nat64lsn_get_pg(
349                     &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
350                     link->alias->icmp, &link->alias->icmp_pg,
351                     &link->alias->icmp_pgidx, faddr);
352                 break;
353         default:
354                 panic("%s: wrong proto %d", __func__, proto);
355         }
356         if (pg == NULL)
357                 return (NULL);
358
359         /* Check that PG has some free states */
360         state = NULL;
361         i = FREEMASK_BITCOUNT(pg, faddr);
362         while (i-- > 0) {
363                 offset = FREEMASK_FFSLL(pg, faddr);
364                 if (offset == 0) {
365                         /*
366                          * We lost the race.
367                          * No more free states in this PG.
368                          */
369                         break;
370                 }
371
372                 /* Lets try to atomically grab the state */
373                 if (FREEMASK_BTR(pg, faddr, offset - 1)) {
374                         state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
375                         /* Initialize */
376                         state->flags = proto != IPPROTO_TCP ? 0 :
377                             convert_tcp_flags(f_id->_flags);
378                         state->proto = proto;
379                         state->aport = pg->base_port + offset - 1;
380                         state->dport = f_id->dst_port;
381                         state->sport = port;
382                         state->ip6_dst = f_id->dst_ip6;
383                         state->ip_dst = faddr;
384                         state->ip_src = link->alias->addr;
385                         state->hval = hval;
386                         state->host = host;
387                         SET_AGE(state->timestamp);
388
389                         /* Insert new state into host's hash table */
390                         HOST_LOCK(host);
391                         CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
392                             state, entries);
393                         host->states_count++;
394                         /*
395                          * XXX: In case if host is going to be expired,
396                          * reset NAT64LSN_DEADHOST flag.
397                          */
398                         host->flags &= ~NAT64LSN_DEADHOST;
399                         HOST_UNLOCK(host);
400                         NAT64STAT_INC(&cfg->base.stats, screated);
401                         /* Mark the state as ready for translate4 */
402                         ck_pr_fence_store();
403                         ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
404                         break;
405                 }
406         }
407         return (state);
408 }
409
410 /*
411  * Inspects icmp packets to see if the message contains different
412  * packet header so we need to alter @addr and @port.
413  */
414 static int
415 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
416     uint16_t *port)
417 {
418         struct icmp *icmp;
419         struct ip *ip;
420         int off;
421         uint8_t inner_proto;
422
423         ip = mtod(*mp, struct ip *); /* Outer IP header */
424         off = (ip->ip_hl << 2) + ICMP_MINLEN;
425         if ((*mp)->m_len < off)
426                 *mp = m_pullup(*mp, off);
427         if (*mp == NULL)
428                 return (ENOMEM);
429
430         ip = mtod(*mp, struct ip *); /* Outer IP header */
431         icmp = L3HDR(ip, struct icmp *);
432         switch (icmp->icmp_type) {
433         case ICMP_ECHO:
434         case ICMP_ECHOREPLY:
435                 /* Use icmp ID as distinguisher */
436                 *port = ntohs(icmp->icmp_id);
437                 return (0);
438         case ICMP_UNREACH:
439         case ICMP_TIMXCEED:
440                 break;
441         default:
442                 return (EOPNOTSUPP);
443         }
444         /*
445          * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
446          * of ULP header.
447          */
448         if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
449                 return (EINVAL);
450         if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
451                 *mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
452         if (*mp == NULL)
453                 return (ENOMEM);
454         ip = mtodo(*mp, off); /* Inner IP header */
455         inner_proto = ip->ip_p;
456         off += ip->ip_hl << 2; /* Skip inner IP header */
457         *addr = ntohl(ip->ip_src.s_addr);
458         if ((*mp)->m_len < off + ICMP_MINLEN)
459                 *mp = m_pullup(*mp, off + ICMP_MINLEN);
460         if (*mp == NULL)
461                 return (ENOMEM);
462         switch (inner_proto) {
463         case IPPROTO_TCP:
464         case IPPROTO_UDP:
465                 /* Copy source port from the header */
466                 *port = ntohs(*((uint16_t *)mtodo(*mp, off)));
467                 *proto = inner_proto;
468                 return (0);
469         case IPPROTO_ICMP:
470                 /*
471                  * We will translate only ICMP errors for our ICMP
472                  * echo requests.
473                  */
474                 icmp = mtodo(*mp, off);
475                 if (icmp->icmp_type != ICMP_ECHO)
476                         return (EOPNOTSUPP);
477                 *port = ntohs(icmp->icmp_id);
478                 return (0);
479         };
480         return (EOPNOTSUPP);
481 }
482
483 static struct nat64lsn_state*
484 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
485     in_addr_t faddr, uint16_t port, uint8_t proto)
486 {
487         struct nat64lsn_state *state;
488         struct nat64lsn_pg *pg;
489         int chunk_idx, pg_idx, state_idx;
490
491         NAT64LSN_EPOCH_ASSERT();
492
493         if (port < NAT64_MIN_PORT)
494                 return (NULL);
495         /*
496          * Alias keeps 32 pgchunks for each protocol.
497          * Each pgchunk has 32 pointers to portgroup.
498          * Each portgroup has 64 states for ports.
499          */
500         port -= NAT64_MIN_PORT;
501         chunk_idx = port / 2048;
502
503         port -= chunk_idx * 2048;
504         pg_idx = port / 64;
505         state_idx = port % 64;
506
507         /*
508          * First check in proto_chunkmask that we have allocated PG chunk.
509          * Then check in proto_pgmask that we have valid PG pointer.
510          */
511         pg = NULL;
512         switch (proto) {
513         case IPPROTO_TCP:
514                 if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
515                     ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
516                         pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
517                         break;
518                 }
519                 return (NULL);
520         case IPPROTO_UDP:
521                 if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
522                     ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
523                         pg = alias->udp[chunk_idx]->pgptr[pg_idx];
524                         break;
525                 }
526                 return (NULL);
527         case IPPROTO_ICMP:
528                 if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
529                     ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
530                         pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
531                         break;
532                 }
533                 return (NULL);
534         default:
535                 panic("%s: wrong proto %d", __func__, proto);
536         }
537         if (pg == NULL)
538                 return (NULL);
539
540         if (FREEMASK_ISSET(pg, faddr, state_idx))
541                 return (NULL);
542
543         state = &STATES_CHUNK(pg, faddr)->state[state_idx];
544         ck_pr_fence_load();
545         if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
546                 return (state);
547         return (NULL);
548 }
549
550 /*
551  * Reassemble IPv4 fragments, make PULLUP if needed, get some ULP fields
552  * that might be unknown until reassembling is completed.
553  */
554 static struct mbuf*
555 nat64lsn_reassemble4(struct nat64lsn_cfg *cfg, struct mbuf *m,
556     uint16_t *port)
557 {
558         struct ip *ip;
559         int len;
560
561         m = ip_reass(m);
562         if (m == NULL)
563                 return (NULL);
564         /* IP header must be contigious after ip_reass() */
565         ip = mtod(m, struct ip *);
566         len = ip->ip_hl << 2;
567         switch (ip->ip_p) {
568         case IPPROTO_ICMP:
569                 len += ICMP_MINLEN; /* Enough to get icmp_id */
570                 break;
571         case IPPROTO_TCP:
572                 len += sizeof(struct tcphdr);
573                 break;
574         case IPPROTO_UDP:
575                 len += sizeof(struct udphdr);
576                 break;
577         default:
578                 m_freem(m);
579                 NAT64STAT_INC(&cfg->base.stats, noproto);
580                 return (NULL);
581         }
582         if (m->m_len < len) {
583                 m = m_pullup(m, len);
584                 if (m == NULL) {
585                         NAT64STAT_INC(&cfg->base.stats, nomem);
586                         return (NULL);
587                 }
588                 ip = mtod(m, struct ip *);
589         }
590         switch (ip->ip_p) {
591         case IPPROTO_TCP:
592                 *port = ntohs(L3HDR(ip, struct tcphdr *)->th_dport);
593                 break;
594         case IPPROTO_UDP:
595                 *port = ntohs(L3HDR(ip, struct udphdr *)->uh_dport);
596                 break;
597         }
598         return (m);
599 }
600
601 static int
602 nat64lsn_translate4(struct nat64lsn_cfg *cfg,
603     const struct ipfw_flow_id *f_id, struct mbuf **mp)
604 {
605         struct pfloghdr loghdr, *logdata;
606         struct in6_addr src6;
607         struct nat64lsn_state *state;
608         struct nat64lsn_alias *alias;
609         uint32_t addr, flags;
610         uint16_t port, ts;
611         int ret;
612         uint8_t proto;
613
614         addr = f_id->dst_ip;
615         port = f_id->dst_port;
616         proto = f_id->proto;
617         if (addr < cfg->prefix4 || addr > cfg->pmask4) {
618                 NAT64STAT_INC(&cfg->base.stats, nomatch4);
619                 return (cfg->nomatch_verdict);
620         }
621
622         /* Reassemble fragments if needed */
623         ret = ntohs(mtod(*mp, struct ip *)->ip_off);
624         if ((ret & (IP_MF | IP_OFFMASK)) != 0) {
625                 *mp = nat64lsn_reassemble4(cfg, *mp, &port);
626                 if (*mp == NULL)
627                         return (IP_FW_DENY);
628         }
629
630         /* Check if protocol is supported */
631         switch (proto) {
632         case IPPROTO_ICMP:
633                 ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
634                 if (ret != 0) {
635                         if (ret == ENOMEM) {
636                                 NAT64STAT_INC(&cfg->base.stats, nomem);
637                                 return (IP_FW_DENY);
638                         }
639                         NAT64STAT_INC(&cfg->base.stats, noproto);
640                         return (cfg->nomatch_verdict);
641                 }
642                 if (addr < cfg->prefix4 || addr > cfg->pmask4) {
643                         NAT64STAT_INC(&cfg->base.stats, nomatch4);
644                         return (cfg->nomatch_verdict);
645                 }
646                 /* FALLTHROUGH */
647         case IPPROTO_TCP:
648         case IPPROTO_UDP:
649                 break;
650         default:
651                 NAT64STAT_INC(&cfg->base.stats, noproto);
652                 return (cfg->nomatch_verdict);
653         }
654
655         alias = &ALIAS_BYHASH(cfg, addr);
656         MPASS(addr == alias->addr);
657
658         /* Check that we have state for this port */
659         state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
660             port, proto);
661         if (state == NULL) {
662                 NAT64STAT_INC(&cfg->base.stats, nomatch4);
663                 return (cfg->nomatch_verdict);
664         }
665
666         /* TODO: Check flags to see if we need to do some static mapping */
667
668         /* Update some state fields if need */
669         SET_AGE(ts);
670         if (f_id->proto == IPPROTO_TCP)
671                 flags = convert_tcp_flags(f_id->_flags);
672         else
673                 flags = 0;
674         if (state->timestamp != ts)
675                 state->timestamp = ts;
676         if ((state->flags & flags) != flags)
677                 state->flags |= flags;
678
679         port = htons(state->sport);
680         src6 = state->ip6_dst;
681
682         if (cfg->base.flags & NAT64_LOG) {
683                 logdata = &loghdr;
684                 nat64lsn_log(logdata, *mp, AF_INET, state);
685         } else
686                 logdata = NULL;
687
688         /*
689          * We already have src6 with embedded address, but it is possible,
690          * that src_ip is different than state->ip_dst, this is why we
691          * do embedding again.
692          */
693         nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
694         ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
695             &cfg->base, logdata);
696         if (ret == NAT64SKIP)
697                 return (cfg->nomatch_verdict);
698         if (ret == NAT64RETURN)
699                 *mp = NULL;
700         return (IP_FW_DENY);
701 }
702
703 /*
704  * Check if particular state is stale and should be deleted.
705  * Return 1 if true, 0 otherwise.
706  */
707 static int
708 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
709 {
710         int age, ttl;
711
712         /* State was marked as stale in previous pass. */
713         if (ISSET32(state->flags, NAT64_BIT_STALE))
714                 return (1);
715
716         /* State is not yet initialized, it is going to be READY */
717         if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
718                 return (0);
719
720         age = GET_AGE(state->timestamp);
721         switch (state->proto) {
722         case IPPROTO_TCP:
723                 if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
724                         ttl = cfg->st_close_ttl;
725                 else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
726                         ttl = cfg->st_estab_ttl;
727                 else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
728                         ttl = cfg->st_syn_ttl;
729                 else
730                         ttl = cfg->st_syn_ttl;
731                 if (age > ttl)
732                         return (1);
733                 break;
734         case IPPROTO_UDP:
735                 if (age > cfg->st_udp_ttl)
736                         return (1);
737                 break;
738         case IPPROTO_ICMP:
739                 if (age > cfg->st_icmp_ttl)
740                         return (1);
741                 break;
742         }
743         return (0);
744 }
745
746 static int
747 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
748 {
749         struct nat64lsn_state *state;
750         struct nat64lsn_host *host;
751         uint64_t freemask;
752         int c, i, update_age;
753
754         update_age = 0;
755         for (c = 0; c < pg->chunks_count; c++) {
756                 FREEMASK_COPY(pg, c, freemask);
757                 for (i = 0; i < 64; i++) {
758                         if (ISSET64(freemask, i))
759                                 continue;
760                         state = &STATES_CHUNK(pg, c)->state[i];
761                         if (nat64lsn_check_state(cfg, state) == 0) {
762                                 update_age = 1;
763                                 continue;
764                         }
765                         /*
766                          * Expire state:
767                          * 1. Mark as STALE and unlink from host's hash.
768                          * 2. Set bit in freemask.
769                          */
770                         if (ISSET32(state->flags, NAT64_BIT_STALE)) {
771                                 /*
772                                  * State was marked as STALE in previous
773                                  * pass. Now it is safe to release it.
774                                  */
775                                 state->flags = 0;
776                                 ck_pr_fence_store();
777                                 FREEMASK_BTS(pg, c, i);
778                                 NAT64STAT_INC(&cfg->base.stats, sdeleted);
779                                 continue;
780                         }
781                         MPASS(state->flags & NAT64_FLAG_READY);
782
783                         host = state->host;
784                         HOST_LOCK(host);
785                         CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
786                             state, nat64lsn_state, entries);
787                         host->states_count--;
788                         HOST_UNLOCK(host);
789
790                         /* Reset READY flag */
791                         ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
792                         /* And set STALE flag */
793                         ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
794                         ck_pr_fence_store();
795                         /*
796                          * Now translate6 will not use this state, wait
797                          * until it become safe for translate4, then mark
798                          * state as free.
799                          */
800                 }
801         }
802
803         /*
804          * We have some alive states, update timestamp.
805          */
806         if (update_age)
807                 SET_AGE(pg->timestamp);
808
809         if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
810                 return (0);
811
812         return (1);
813 }
814
815 static void
816 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
817     struct nat64lsn_pg_slist *portgroups)
818 {
819         struct nat64lsn_alias *alias;
820         struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
821         uint32_t *pgmask, *pgidx;
822         int i, idx;
823
824         for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
825                 alias = &cfg->aliases[i];
826                 CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
827                         if (nat64lsn_maintain_pg(cfg, pg) == 0)
828                                 continue;
829                         /* Always keep first PG */
830                         if (pg->base_port == NAT64_MIN_PORT)
831                                 continue;
832                         /*
833                          * PG is expired, unlink it and schedule for
834                          * deferred destroying.
835                          */
836                         idx = (pg->base_port - NAT64_MIN_PORT) / 64;
837                         switch (pg->proto) {
838                         case IPPROTO_TCP:
839                                 pgmask = alias->tcp_pgmask;
840                                 pgptr = &alias->tcp_pg;
841                                 pgidx = &alias->tcp_pgidx;
842                                 firstpg = alias->tcp[0]->pgptr[0];
843                                 break;
844                         case IPPROTO_UDP:
845                                 pgmask = alias->udp_pgmask;
846                                 pgptr = &alias->udp_pg;
847                                 pgidx = &alias->udp_pgidx;
848                                 firstpg = alias->udp[0]->pgptr[0];
849                                 break;
850                         case IPPROTO_ICMP:
851                                 pgmask = alias->icmp_pgmask;
852                                 pgptr = &alias->icmp_pg;
853                                 pgidx = &alias->icmp_pgidx;
854                                 firstpg = alias->icmp[0]->pgptr[0];
855                                 break;
856                         }
857                         /* Reset the corresponding bit in pgmask array. */
858                         ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
859                         ck_pr_fence_store();
860                         /* If last used PG points to this PG, reset it. */
861                         ck_pr_cas_ptr(pgptr, pg, firstpg);
862                         ck_pr_cas_32(pgidx, idx, 0);
863                         /* Unlink PG from alias's chain */
864                         ALIAS_LOCK(alias);
865                         CK_SLIST_REMOVE(&alias->portgroups, pg,
866                             nat64lsn_pg, entries);
867                         alias->portgroups_count--;
868                         ALIAS_UNLOCK(alias);
869                         /* And link to job's chain for deferred destroying */
870                         NAT64STAT_INC(&cfg->base.stats, spgdeleted);
871                         CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
872                 }
873         }
874 }
875
876 static void
877 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
878     struct nat64lsn_hosts_slist *hosts)
879 {
880         struct nat64lsn_host *host, *tmp;
881         int i;
882
883         for (i = 0; i < cfg->hosts_hashsize; i++) {
884                 CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
885                     entries, tmp) {
886                         /* Is host was marked in previous call? */
887                         if (host->flags & NAT64LSN_DEADHOST) {
888                                 if (host->states_count > 0) {
889                                         host->flags &= ~NAT64LSN_DEADHOST;
890                                         continue;
891                                 }
892                                 /*
893                                  * Unlink host from hash table and schedule
894                                  * it for deferred destroying.
895                                  */
896                                 CFG_LOCK(cfg);
897                                 CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
898                                     nat64lsn_host, entries);
899                                 cfg->hosts_count--;
900                                 CFG_UNLOCK(cfg);
901                                 CK_SLIST_INSERT_HEAD(hosts, host, entries);
902                                 continue;
903                         }
904                         if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
905                                 continue;
906                         if (host->states_count > 0)
907                                 continue;
908                         /* Mark host as going to be expired in next pass */
909                         host->flags |= NAT64LSN_DEADHOST;
910                         ck_pr_fence_store();
911                 }
912         }
913 }
914
915 static struct nat64lsn_pgchunk*
916 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
917 {
918 #if 0
919         struct nat64lsn_alias *alias;
920         struct nat64lsn_pgchunk *chunk;
921         uint32_t pgmask;
922         int i, c;
923
924         for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
925                 alias = &cfg->aliases[i];
926                 if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
927                         continue;
928                 /* Always keep single chunk allocated */
929                 for (c = 1; c < 32; c++) {
930                         if ((alias->tcp_chunkmask & (1 << c)) == 0)
931                                 break;
932                         chunk = ck_pr_load_ptr(&alias->tcp[c]);
933                         if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
934                                 continue;
935                         ck_pr_btr_32(&alias->tcp_chunkmask, c);
936                         ck_pr_fence_load();
937                         if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
938                                 continue;
939                 }
940         }
941 #endif
942         return (NULL);
943 }
944
945 #if 0
946 static void
947 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
948 {
949         struct nat64lsn_host *h;
950         struct nat64lsn_states_slist *hash;
951         int i, j, hsize;
952
953         for (i = 0; i < cfg->hosts_hashsize; i++) {
954                 CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
955                          if (h->states_count / 2 < h->states_hashsize ||
956                              h->states_hashsize >= NAT64LSN_MAX_HSIZE)
957                                  continue;
958                          hsize = h->states_hashsize * 2;
959                          hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
960                          if (hash == NULL)
961                                  continue;
962                          for (j = 0; j < hsize; j++)
963                                 CK_SLIST_INIT(&hash[i]);
964
965                          ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
966                 }
967         }
968 }
969 #endif
970
971 /*
972  * This procedure is used to perform various maintance
973  * on dynamic hash list. Currently it is called every 4 seconds.
974  */
975 static void
976 nat64lsn_periodic(void *data)
977 {
978         struct nat64lsn_job_item *ji;
979         struct nat64lsn_cfg *cfg;
980
981         cfg = (struct nat64lsn_cfg *) data;
982         CURVNET_SET(cfg->vp);
983         if (cfg->hosts_count > 0) {
984                 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
985                 if (ji != NULL) {
986                         ji->jtype = JTYPE_DESTROY;
987                         CK_SLIST_INIT(&ji->hosts);
988                         CK_SLIST_INIT(&ji->portgroups);
989                         nat64lsn_expire_hosts(cfg, &ji->hosts);
990                         nat64lsn_expire_portgroups(cfg, &ji->portgroups);
991                         ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
992                         NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
993                             nat64lsn_job_destroy);
994                 } else
995                         NAT64STAT_INC(&cfg->base.stats, jnomem);
996         }
997         callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
998         CURVNET_RESTORE();
999 }
1000
1001 #define ALLOC_ERROR(stage, type)        ((stage) ? 10 * (type) + (stage): 0)
1002 #define HOST_ERROR(stage)               ALLOC_ERROR(stage, 1)
1003 #define PG_ERROR(stage)                 ALLOC_ERROR(stage, 2)
1004 static int
1005 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1006 {
1007         char a[INET6_ADDRSTRLEN];
1008         struct nat64lsn_aliaslink *link;
1009         struct nat64lsn_host *host;
1010         struct nat64lsn_state *state;
1011         uint32_t hval, data[2];
1012         int i;
1013
1014         /* Check that host was not yet added. */
1015         NAT64LSN_EPOCH_ASSERT();
1016         CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
1017                 if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
1018                         /* The host was allocated in previous call. */
1019                         ji->host = host;
1020                         goto get_state;
1021                 }
1022         }
1023
1024         host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
1025         if (ji->host == NULL)
1026                 return (HOST_ERROR(1));
1027
1028         host->states_hashsize = NAT64LSN_HSIZE;
1029         host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
1030             host->states_hashsize, M_NAT64LSN, M_NOWAIT);
1031         if (host->states_hash == NULL) {
1032                 uma_zfree(nat64lsn_host_zone, host);
1033                 return (HOST_ERROR(2));
1034         }
1035
1036         link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
1037         if (link == NULL) {
1038                 free(host->states_hash, M_NAT64LSN);
1039                 uma_zfree(nat64lsn_host_zone, host);
1040                 return (HOST_ERROR(3));
1041         }
1042
1043         /* Initialize */
1044         HOST_LOCK_INIT(host);
1045         SET_AGE(host->timestamp);
1046         host->addr = ji->f_id.src_ip6;
1047         host->hval = ji->src6_hval;
1048         host->flags = 0;
1049         host->states_count = 0;
1050         host->states_hashsize = NAT64LSN_HSIZE;
1051         CK_SLIST_INIT(&host->aliases);
1052         for (i = 0; i < host->states_hashsize; i++)
1053                 CK_SLIST_INIT(&host->states_hash[i]);
1054
1055         /* Determine alias from flow hash. */
1056         hval = ALIASLINK_HVAL(cfg, &ji->f_id);
1057         link->alias = &ALIAS_BYHASH(cfg, hval);
1058         CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
1059
1060         ALIAS_LOCK(link->alias);
1061         CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
1062         link->alias->hosts_count++;
1063         ALIAS_UNLOCK(link->alias);
1064
1065         CFG_LOCK(cfg);
1066         CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
1067         cfg->hosts_count++;
1068         CFG_UNLOCK(cfg);
1069
1070 get_state:
1071         data[0] = ji->faddr;
1072         data[1] = (ji->f_id.dst_port << 16) | ji->port;
1073         ji->state_hval = hval = STATE_HVAL(cfg, data);
1074         state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
1075             ji->faddr, ji->port, ji->proto);
1076         /*
1077          * We failed to obtain new state, used alias needs new PG.
1078          * XXX: or another alias should be used.
1079          */
1080         if (state == NULL) {
1081                 /* Try to allocate new PG */
1082                 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1083                         return (HOST_ERROR(4));
1084                 /* We assume that nat64lsn_alloc_pg() got state */
1085         } else
1086                 ji->state = state;
1087
1088         ji->done = 1;
1089         DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
1090             inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
1091         return (HOST_ERROR(0));
1092 }
1093
1094 static int
1095 nat64lsn_find_pg_place(uint32_t *data)
1096 {
1097         int i;
1098
1099         for (i = 0; i < 32; i++) {
1100                 if (~data[i] == 0)
1101                         continue;
1102                 return (i * 32 + ffs(~data[i]) - 1);
1103         }
1104         return (-1);
1105 }
1106
1107 static int
1108 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
1109     struct nat64lsn_alias *alias, uint32_t *chunkmask,
1110     uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
1111     struct nat64lsn_pg **pgptr, uint8_t proto)
1112 {
1113         struct nat64lsn_pg *pg;
1114         int i, pg_idx, chunk_idx;
1115
1116         /* Find place in pgchunk where PG can be added */
1117         pg_idx = nat64lsn_find_pg_place(pgmask);
1118         if (pg_idx < 0) /* no more PGs */
1119                 return (PG_ERROR(1));
1120         /* Check that we have allocated pgchunk for given PG index */
1121         chunk_idx = pg_idx / 32;
1122         if (!ISSET32(*chunkmask, chunk_idx)) {
1123                 chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
1124                     M_NOWAIT);
1125                 if (chunks[chunk_idx] == NULL)
1126                         return (PG_ERROR(2));
1127                 ck_pr_bts_32(chunkmask, chunk_idx);
1128                 ck_pr_fence_store();
1129         }
1130         /* Allocate PG and states chunks */
1131         pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
1132         if (pg == NULL)
1133                 return (PG_ERROR(3));
1134         pg->chunks_count = cfg->states_chunks;
1135         if (pg->chunks_count > 1) {
1136                 pg->freemask_chunk = malloc(pg->chunks_count *
1137                     sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
1138                 if (pg->freemask_chunk == NULL) {
1139                         uma_zfree(nat64lsn_pg_zone, pg);
1140                         return (PG_ERROR(4));
1141                 }
1142                 pg->states_chunk = malloc(pg->chunks_count *
1143                     sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
1144                     M_NOWAIT | M_ZERO);
1145                 if (pg->states_chunk == NULL) {
1146                         free(pg->freemask_chunk, M_NAT64LSN);
1147                         uma_zfree(nat64lsn_pg_zone, pg);
1148                         return (PG_ERROR(5));
1149                 }
1150                 for (i = 0; i < pg->chunks_count; i++) {
1151                         pg->states_chunk[i] = uma_zalloc(
1152                             nat64lsn_state_zone, M_NOWAIT);
1153                         if (pg->states_chunk[i] == NULL)
1154                                 goto states_failed;
1155                 }
1156                 memset(pg->freemask_chunk, 0xff,
1157                     sizeof(uint64_t) * pg->chunks_count);
1158         } else {
1159                 pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
1160                 if (pg->states == NULL) {
1161                         uma_zfree(nat64lsn_pg_zone, pg);
1162                         return (PG_ERROR(6));
1163                 }
1164                 memset(&pg->freemask64, 0xff, sizeof(uint64_t));
1165         }
1166
1167         /* Initialize PG and hook it to pgchunk */
1168         SET_AGE(pg->timestamp);
1169         pg->proto = proto;
1170         pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
1171         ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
1172         ck_pr_fence_store();
1173         ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
1174         ck_pr_store_ptr(pgptr, pg);
1175
1176         ALIAS_LOCK(alias);
1177         CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
1178         SET_AGE(alias->timestamp);
1179         alias->portgroups_count++;
1180         ALIAS_UNLOCK(alias);
1181         NAT64STAT_INC(&cfg->base.stats, spgcreated);
1182         return (PG_ERROR(0));
1183
1184 states_failed:
1185         for (i = 0; i < pg->chunks_count; i++)
1186                 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1187         free(pg->freemask_chunk, M_NAT64LSN);
1188         free(pg->states_chunk, M_NAT64LSN);
1189         uma_zfree(nat64lsn_pg_zone, pg);
1190         return (PG_ERROR(7));
1191 }
1192
1193 static int
1194 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1195 {
1196         struct nat64lsn_aliaslink *link;
1197         struct nat64lsn_alias *alias;
1198         int ret;
1199
1200         link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
1201         if (link == NULL)
1202                 return (PG_ERROR(1));
1203
1204         /*
1205          * TODO: check that we did not already allocated PG in
1206          *       previous call.
1207          */
1208
1209         ret = 0;
1210         alias = link->alias;
1211         /* Find place in pgchunk where PG can be added */
1212         switch (ji->proto) {
1213         case IPPROTO_TCP:
1214                 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1215                     &alias->tcp_chunkmask, alias->tcp_pgmask,
1216                     alias->tcp, &alias->tcp_pg, ji->proto);
1217                 break;
1218         case IPPROTO_UDP:
1219                 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1220                     &alias->udp_chunkmask, alias->udp_pgmask,
1221                     alias->udp, &alias->udp_pg, ji->proto);
1222                 break;
1223         case IPPROTO_ICMP:
1224                 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1225                     &alias->icmp_chunkmask, alias->icmp_pgmask,
1226                     alias->icmp, &alias->icmp_pg, ji->proto);
1227                 break;
1228         default:
1229                 panic("%s: wrong proto %d", __func__, ji->proto);
1230         }
1231         if (ret == PG_ERROR(1)) {
1232                 /*
1233                  * PG_ERROR(1) means that alias lacks free PGs
1234                  * XXX: try next alias.
1235                  */
1236                 printf("NAT64LSN: %s: failed to obtain PG\n",
1237                     __func__);
1238                 return (ret);
1239         }
1240         if (ret == PG_ERROR(0)) {
1241                 ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
1242                     ji->state_hval, ji->faddr, ji->port, ji->proto);
1243                 if (ji->state == NULL)
1244                         ret = PG_ERROR(8);
1245                 else
1246                         ji->done = 1;
1247         }
1248         return (ret);
1249 }
1250
1251 static void
1252 nat64lsn_do_request(void *data)
1253 {
1254         struct epoch_tracker et;
1255         struct nat64lsn_job_head jhead;
1256         struct nat64lsn_job_item *ji, *ji2;
1257         struct nat64lsn_cfg *cfg;
1258         int jcount;
1259         uint8_t flags;
1260
1261         cfg = (struct nat64lsn_cfg *)data;
1262         if (cfg->jlen == 0)
1263                 return;
1264
1265         CURVNET_SET(cfg->vp);
1266         STAILQ_INIT(&jhead);
1267
1268         /* Grab queue */
1269         JQUEUE_LOCK();
1270         STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
1271         jcount = cfg->jlen;
1272         cfg->jlen = 0;
1273         JQUEUE_UNLOCK();
1274
1275         /* TODO: check if we need to resize hash */
1276
1277         NAT64STAT_INC(&cfg->base.stats, jcalls);
1278         DPRINTF(DP_JQUEUE, "count=%d", jcount);
1279
1280         /*
1281          * TODO:
1282          * What we should do here is to build a hash
1283          * to ensure we don't have lots of duplicate requests.
1284          * Skip this for now.
1285          *
1286          * TODO: Limit per-call number of items
1287          */
1288
1289         NAT64LSN_EPOCH_ENTER(et);
1290         STAILQ_FOREACH(ji, &jhead, entries) {
1291                 switch (ji->jtype) {
1292                 case JTYPE_NEWHOST:
1293                         if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
1294                                 NAT64STAT_INC(&cfg->base.stats, jhostfails);
1295                         break;
1296                 case JTYPE_NEWPORTGROUP:
1297                         if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1298                                 NAT64STAT_INC(&cfg->base.stats, jportfails);
1299                         break;
1300                 default:
1301                         continue;
1302                 }
1303                 if (ji->done != 0) {
1304                         flags = ji->proto != IPPROTO_TCP ? 0 :
1305                             convert_tcp_flags(ji->f_id._flags);
1306                         nat64lsn_translate6_internal(cfg, &ji->m,
1307                             ji->state, flags);
1308                         NAT64STAT_INC(&cfg->base.stats, jreinjected);
1309                 }
1310         }
1311         NAT64LSN_EPOCH_EXIT(et);
1312
1313         ji = STAILQ_FIRST(&jhead);
1314         while (ji != NULL) {
1315                 ji2 = STAILQ_NEXT(ji, entries);
1316                 /*
1317                  * In any case we must free mbuf if
1318                  * translator did not consumed it.
1319                  */
1320                 m_freem(ji->m);
1321                 uma_zfree(nat64lsn_job_zone, ji);
1322                 ji = ji2;
1323         }
1324         CURVNET_RESTORE();
1325 }
1326
1327 static struct nat64lsn_job_item *
1328 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
1329 {
1330         struct nat64lsn_job_item *ji;
1331
1332         /*
1333          * Do not try to lock possibly contested mutex if we're near the
1334          * limit. Drop packet instead.
1335          */
1336         ji = NULL;
1337         if (cfg->jlen >= cfg->jmaxlen)
1338                 NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1339         else {
1340                 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1341                 if (ji == NULL)
1342                         NAT64STAT_INC(&cfg->base.stats, jnomem);
1343         }
1344         if (ji == NULL) {
1345                 NAT64STAT_INC(&cfg->base.stats, dropped);
1346                 DPRINTF(DP_DROPS, "failed to create job");
1347         } else {
1348                 ji->jtype = jtype;
1349                 ji->done = 0;
1350         }
1351         return (ji);
1352 }
1353
1354 static void
1355 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1356 {
1357
1358         JQUEUE_LOCK();
1359         STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
1360         NAT64STAT_INC(&cfg->base.stats, jrequests);
1361         cfg->jlen++;
1362
1363         if (callout_pending(&cfg->jcallout) == 0)
1364                 callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1365         JQUEUE_UNLOCK();
1366 }
1367
1368 static void
1369 nat64lsn_job_destroy(epoch_context_t ctx)
1370 {
1371         struct nat64lsn_job_item *ji;
1372         struct nat64lsn_host *host;
1373         struct nat64lsn_pg *pg;
1374         int i;
1375
1376         ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
1377         MPASS(ji->jtype == JTYPE_DESTROY);
1378         while (!CK_SLIST_EMPTY(&ji->hosts)) {
1379                 host = CK_SLIST_FIRST(&ji->hosts);
1380                 CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
1381                 if (host->states_count > 0) {
1382                         /*
1383                          * XXX: The state has been created
1384                          * during host deletion.
1385                          */
1386                         printf("NAT64LSN: %s: destroying host with %d "
1387                             "states\n", __func__, host->states_count);
1388                 }
1389                 nat64lsn_destroy_host(host);
1390         }
1391         while (!CK_SLIST_EMPTY(&ji->portgroups)) {
1392                 pg = CK_SLIST_FIRST(&ji->portgroups);
1393                 CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
1394                 for (i = 0; i < pg->chunks_count; i++) {
1395                         if (FREEMASK_BITCOUNT(pg, i) != 64) {
1396                                 /*
1397                                  * XXX: The state has been created during
1398                                  * PG deletion.
1399                                  */
1400                                 printf("NAT64LSN: %s: destroying PG %p "
1401                                     "with non-empty chunk %d\n", __func__,
1402                                     pg, i);
1403                         }
1404                 }
1405                 nat64lsn_destroy_pg(pg);
1406         }
1407         uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
1408         uma_zfree(nat64lsn_job_zone, ji);
1409 }
1410
1411 static int
1412 nat64lsn_request_host(struct nat64lsn_cfg *cfg,
1413     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1414     in_addr_t faddr, uint16_t port, uint8_t proto)
1415 {
1416         struct nat64lsn_job_item *ji;
1417
1418         ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
1419         if (ji != NULL) {
1420                 ji->m = *mp;
1421                 ji->f_id = *f_id;
1422                 ji->faddr = faddr;
1423                 ji->port = port;
1424                 ji->proto = proto;
1425                 ji->src6_hval = hval;
1426
1427                 nat64lsn_enqueue_job(cfg, ji);
1428                 NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1429                 *mp = NULL;
1430         }
1431         return (IP_FW_DENY);
1432 }
1433
1434 static int
1435 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
1436     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1437     in_addr_t faddr, uint16_t port, uint8_t proto)
1438 {
1439         struct nat64lsn_job_item *ji;
1440
1441         ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
1442         if (ji != NULL) {
1443                 ji->m = *mp;
1444                 ji->f_id = *f_id;
1445                 ji->faddr = faddr;
1446                 ji->port = port;
1447                 ji->proto = proto;
1448                 ji->state_hval = hval;
1449                 ji->host = host;
1450
1451                 nat64lsn_enqueue_job(cfg, ji);
1452                 NAT64STAT_INC(&cfg->base.stats, jportreq);
1453                 *mp = NULL;
1454         }
1455         return (IP_FW_DENY);
1456 }
1457
1458 static int
1459 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
1460     struct nat64lsn_state *state, uint8_t flags)
1461 {
1462         struct pfloghdr loghdr, *logdata;
1463         int ret;
1464         uint16_t ts;
1465
1466         /* Update timestamp and flags if needed */
1467         SET_AGE(ts);
1468         if (state->timestamp != ts)
1469                 state->timestamp = ts;
1470         if ((state->flags & flags) != 0)
1471                 state->flags |= flags;
1472
1473         if (cfg->base.flags & NAT64_LOG) {
1474                 logdata = &loghdr;
1475                 nat64lsn_log(logdata, *mp, AF_INET6, state);
1476         } else
1477                 logdata = NULL;
1478
1479         ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
1480             htons(state->aport), &cfg->base, logdata);
1481         if (ret == NAT64SKIP)
1482                 return (cfg->nomatch_verdict);
1483         if (ret == NAT64RETURN)
1484                 *mp = NULL;
1485         return (IP_FW_DENY);
1486 }
1487
1488 static int
1489 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1490     struct mbuf **mp)
1491 {
1492         struct nat64lsn_state *state;
1493         struct nat64lsn_host *host;
1494         struct icmp6_hdr *icmp6;
1495         uint32_t addr, hval, data[2];
1496         int offset, proto;
1497         uint16_t port;
1498         uint8_t flags;
1499
1500         /* Check if protocol is supported */
1501         port = f_id->src_port;
1502         proto = f_id->proto;
1503         switch (f_id->proto) {
1504         case IPPROTO_ICMPV6:
1505                 /*
1506                  * For ICMPv6 echo reply/request we use icmp6_id as
1507                  * local port.
1508                  */
1509                 offset = 0;
1510                 proto = nat64_getlasthdr(*mp, &offset);
1511                 if (proto < 0) {
1512                         NAT64STAT_INC(&cfg->base.stats, dropped);
1513                         DPRINTF(DP_DROPS, "mbuf isn't contigious");
1514                         return (IP_FW_DENY);
1515                 }
1516                 if (proto == IPPROTO_ICMPV6) {
1517                         icmp6 = mtodo(*mp, offset);
1518                         if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1519                             icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1520                                 port = ntohs(icmp6->icmp6_id);
1521                 }
1522                 proto = IPPROTO_ICMP;
1523                 /* FALLTHROUGH */
1524         case IPPROTO_TCP:
1525         case IPPROTO_UDP:
1526                 break;
1527         default:
1528                 NAT64STAT_INC(&cfg->base.stats, noproto);
1529                 return (cfg->nomatch_verdict);
1530         }
1531
1532         /* Extract IPv4 from destination IPv6 address */
1533         addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
1534         if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
1535                 char a[INET_ADDRSTRLEN];
1536
1537                 NAT64STAT_INC(&cfg->base.stats, dropped);
1538                 DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
1539                     inet_ntop(AF_INET, &addr, a, sizeof(a)));
1540                 return (IP_FW_DENY); /* XXX: add extra stats? */
1541         }
1542
1543         /* Try to find host */
1544         hval = HOST_HVAL(cfg, &f_id->src_ip6);
1545         CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
1546                 if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
1547                         break;
1548         }
1549         /* We use IPv4 address in host byte order */
1550         addr = ntohl(addr);
1551         if (host == NULL)
1552                 return (nat64lsn_request_host(cfg, f_id, mp,
1553                     hval, addr, port, proto));
1554
1555         flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
1556
1557         data[0] = addr;
1558         data[1] = (f_id->dst_port << 16) | port;
1559         hval = STATE_HVAL(cfg, data);
1560         state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
1561             port, proto);
1562         if (state == NULL)
1563                 return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
1564                     port, proto));
1565         return (nat64lsn_translate6_internal(cfg, mp, state, flags));
1566 }
1567
1568 /*
1569  * Main dataplane entry point.
1570  */
1571 int
1572 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1573     ipfw_insn *cmd, int *done)
1574 {
1575         struct nat64lsn_cfg *cfg;
1576         ipfw_insn *icmd;
1577         int ret;
1578
1579         IPFW_RLOCK_ASSERT(ch);
1580
1581         *done = 0;      /* continue the search in case of failure */
1582         icmd = cmd + 1;
1583         if (cmd->opcode != O_EXTERNAL_ACTION ||
1584             cmd->arg1 != V_nat64lsn_eid ||
1585             icmd->opcode != O_EXTERNAL_INSTANCE ||
1586             (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
1587                 return (IP_FW_DENY);
1588
1589         *done = 1;      /* terminate the search */
1590
1591         switch (args->f_id.addr_type) {
1592         case 4:
1593                 ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
1594                 break;
1595         case 6:
1596                 /*
1597                  * Check that destination IPv6 address matches our prefix6.
1598                  */
1599                 if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
1600                     memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
1601                     cfg->base.plat_plen / 8) != 0) {
1602                         ret = cfg->nomatch_verdict;
1603                         break;
1604                 }
1605                 ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
1606                 break;
1607         default:
1608                 ret = cfg->nomatch_verdict;
1609         }
1610
1611         if (ret != IP_FW_PASS && args->m != NULL) {
1612                 m_freem(args->m);
1613                 args->m = NULL;
1614         }
1615         return (ret);
1616 }
1617
1618 static int
1619 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
1620 {
1621         struct nat64lsn_states_chunk *chunk;
1622         int i;
1623
1624         chunk = (struct nat64lsn_states_chunk *)mem;
1625         for (i = 0; i < 64; i++)
1626                 chunk->state[i].flags = 0;
1627         return (0);
1628 }
1629
1630 void
1631 nat64lsn_init_internal(void)
1632 {
1633
1634         nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
1635             sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
1636             UMA_ALIGN_PTR, 0);
1637         nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
1638             sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
1639             UMA_ALIGN_PTR, 0);
1640         nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
1641             sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
1642             UMA_ALIGN_PTR, 0);
1643         nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
1644             sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
1645             UMA_ALIGN_PTR, 0);
1646         nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
1647             sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
1648             NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1649         nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
1650             sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
1651             UMA_ALIGN_PTR, 0);
1652         JQUEUE_LOCK_INIT();
1653 }
1654
1655 void
1656 nat64lsn_uninit_internal(void)
1657 {
1658
1659         /* XXX: epoch_task drain */
1660         JQUEUE_LOCK_DESTROY();
1661         uma_zdestroy(nat64lsn_host_zone);
1662         uma_zdestroy(nat64lsn_pgchunk_zone);
1663         uma_zdestroy(nat64lsn_pg_zone);
1664         uma_zdestroy(nat64lsn_aliaslink_zone);
1665         uma_zdestroy(nat64lsn_state_zone);
1666         uma_zdestroy(nat64lsn_job_zone);
1667 }
1668
1669 void
1670 nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1671 {
1672
1673         CALLOUT_LOCK(cfg);
1674         callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1675             nat64lsn_periodic, cfg);
1676         CALLOUT_UNLOCK(cfg);
1677 }
1678
1679 struct nat64lsn_cfg *
1680 nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
1681 {
1682         struct nat64lsn_cfg *cfg;
1683         struct nat64lsn_alias *alias;
1684         int i, naddr;
1685
1686         cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
1687             M_WAITOK | M_ZERO);
1688
1689         CFG_LOCK_INIT(cfg);
1690         CALLOUT_LOCK_INIT(cfg);
1691         STAILQ_INIT(&cfg->jhead);
1692         cfg->vp = curvnet;
1693         COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1694
1695         cfg->hash_seed = arc4random();
1696         cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
1697         cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
1698             cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
1699         for (i = 0; i < cfg->hosts_hashsize; i++)
1700                 CK_SLIST_INIT(&cfg->hosts_hash[i]);
1701
1702         naddr = 1 << (32 - plen);
1703         cfg->prefix4 = prefix;
1704         cfg->pmask4 = prefix | (naddr - 1);
1705         cfg->plen4 = plen;
1706         cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
1707             M_NAT64LSN, M_WAITOK | M_ZERO);
1708         for (i = 0; i < naddr; i++) {
1709                 alias = &cfg->aliases[i];
1710                 alias->addr = prefix + i; /* host byte order */
1711                 CK_SLIST_INIT(&alias->hosts);
1712                 ALIAS_LOCK_INIT(alias);
1713         }
1714
1715         callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
1716         callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1717
1718         return (cfg);
1719 }
1720
1721 static void
1722 nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
1723 {
1724         int i;
1725
1726         if (pg->chunks_count == 1) {
1727                 uma_zfree(nat64lsn_state_zone, pg->states);
1728         } else {
1729                 for (i = 0; i < pg->chunks_count; i++)
1730                         uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1731                 free(pg->states_chunk, M_NAT64LSN);
1732                 free(pg->freemask_chunk, M_NAT64LSN);
1733         }
1734         uma_zfree(nat64lsn_pg_zone, pg);
1735 }
1736
1737 static void
1738 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
1739     struct nat64lsn_alias *alias)
1740 {
1741         struct nat64lsn_pg *pg;
1742         int i;
1743
1744         while (!CK_SLIST_EMPTY(&alias->portgroups)) {
1745                 pg = CK_SLIST_FIRST(&alias->portgroups);
1746                 CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
1747                 nat64lsn_destroy_pg(pg);
1748         }
1749         for (i = 0; i < 32; i++) {
1750                 if (ISSET32(alias->tcp_chunkmask, i))
1751                         uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
1752                 if (ISSET32(alias->udp_chunkmask, i))
1753                         uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
1754                 if (ISSET32(alias->icmp_chunkmask, i))
1755                         uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
1756         }
1757         ALIAS_LOCK_DESTROY(alias);
1758 }
1759
1760 static void
1761 nat64lsn_destroy_host(struct nat64lsn_host *host)
1762 {
1763         struct nat64lsn_aliaslink *link;
1764
1765         while (!CK_SLIST_EMPTY(&host->aliases)) {
1766                 link = CK_SLIST_FIRST(&host->aliases);
1767                 CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
1768
1769                 ALIAS_LOCK(link->alias);
1770                 CK_SLIST_REMOVE(&link->alias->hosts, link,
1771                     nat64lsn_aliaslink, alias_entries);
1772                 link->alias->hosts_count--;
1773                 ALIAS_UNLOCK(link->alias);
1774
1775                 uma_zfree(nat64lsn_aliaslink_zone, link);
1776         }
1777         HOST_LOCK_DESTROY(host);
1778         free(host->states_hash, M_NAT64LSN);
1779         uma_zfree(nat64lsn_host_zone, host);
1780 }
1781
1782 void
1783 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
1784 {
1785         struct nat64lsn_host *host;
1786         int i;
1787
1788         CALLOUT_LOCK(cfg);
1789         callout_drain(&cfg->periodic);
1790         CALLOUT_UNLOCK(cfg);
1791         callout_drain(&cfg->jcallout);
1792
1793         for (i = 0; i < cfg->hosts_hashsize; i++) {
1794                 while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
1795                         host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
1796                         CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
1797                         nat64lsn_destroy_host(host);
1798                 }
1799         }
1800
1801         for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
1802                 nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
1803
1804         CALLOUT_LOCK_DESTROY(cfg);
1805         CFG_LOCK_DESTROY(cfg);
1806         COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1807         free(cfg->hosts_hash, M_NAT64LSN);
1808         free(cfg->aliases, M_NAT64LSN);
1809         free(cfg, M_NAT64LSN);
1810 }