]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netpfil/ipfw/nat64/nat64lsn.c
Reapply r345274 with build fixes for 32-bit architectures.
[FreeBSD/FreeBSD.git] / sys / netpfil / ipfw / nat64 / nat64lsn.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2015-2019 Yandex LLC
5  * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/counter.h>
36 #include <sys/ck.h>
37 #include <sys/epoch.h>
38 #include <sys/errno.h>
39 #include <sys/hash.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/module.h>
45 #include <sys/rmlock.h>
46 #include <sys/socket.h>
47 #include <sys/syslog.h>
48 #include <sys/sysctl.h>
49
50 #include <net/if.h>
51 #include <net/if_var.h>
52 #include <net/if_pflog.h>
53 #include <net/pfil.h>
54
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/ip_fw.h>
59 #include <netinet/ip6.h>
60 #include <netinet/icmp6.h>
61 #include <netinet/ip_icmp.h>
62 #include <netinet/tcp.h>
63 #include <netinet/udp.h>
64 #include <netinet6/in6_var.h>
65 #include <netinet6/ip6_var.h>
66 #include <netinet6/ip_fw_nat64.h>
67
68 #include <netpfil/ipfw/ip_fw_private.h>
69 #include <netpfil/pf/pf.h>
70
71 #include "nat64lsn.h"
72
73 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
74
75 static epoch_t nat64lsn_epoch;
76 #define NAT64LSN_EPOCH_ENTER(et)  epoch_enter_preempt(nat64lsn_epoch, &(et))
77 #define NAT64LSN_EPOCH_EXIT(et)   epoch_exit_preempt(nat64lsn_epoch, &(et))
78 #define NAT64LSN_EPOCH_WAIT()     epoch_wait_preempt(nat64lsn_epoch)
79 #define NAT64LSN_EPOCH_ASSERT()   MPASS(in_epoch(nat64lsn_epoch))
80 #define NAT64LSN_EPOCH_CALL(c, f) epoch_call(nat64lsn_epoch, (c), (f))
81
82 static uma_zone_t nat64lsn_host_zone;
83 static uma_zone_t nat64lsn_pgchunk_zone;
84 static uma_zone_t nat64lsn_pg_zone;
85 static uma_zone_t nat64lsn_aliaslink_zone;
86 static uma_zone_t nat64lsn_state_zone;
87 static uma_zone_t nat64lsn_job_zone;
88
89 static void nat64lsn_periodic(void *data);
90 #define PERIODIC_DELAY          4
91 #define NAT64_LOOKUP(chain, cmd)        \
92         (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
93 /*
94  * Delayed job queue, used to create new hosts
95  * and new portgroups
96  */
97 enum nat64lsn_jtype {
98         JTYPE_NEWHOST = 1,
99         JTYPE_NEWPORTGROUP,
100         JTYPE_DESTROY,
101 };
102
103 struct nat64lsn_job_item {
104         STAILQ_ENTRY(nat64lsn_job_item) entries;
105         enum nat64lsn_jtype     jtype;
106
107         union {
108                 struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
109                         struct mbuf             *m;
110                         struct nat64lsn_host    *host;
111                         struct nat64lsn_state   *state;
112                         uint32_t                src6_hval;
113                         uint32_t                state_hval;
114                         struct ipfw_flow_id     f_id;
115                         in_addr_t               faddr;
116                         uint16_t                port;
117                         uint8_t                 proto;
118                         uint8_t                 done;
119                 };
120                 struct { /* used by JTYPE_DESTROY */
121                         struct nat64lsn_hosts_slist     hosts;
122                         struct nat64lsn_pg_slist        portgroups;
123                         struct nat64lsn_pgchunk         *pgchunk;
124                         struct epoch_context            epoch_ctx;
125                 };
126         };
127 };
128
129 static struct mtx jmtx;
130 #define JQUEUE_LOCK_INIT()      mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
131 #define JQUEUE_LOCK_DESTROY()   mtx_destroy(&jmtx)
132 #define JQUEUE_LOCK()           mtx_lock(&jmtx)
133 #define JQUEUE_UNLOCK()         mtx_unlock(&jmtx)
134
135 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
136     struct nat64lsn_job_item *ji);
137 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
138     struct nat64lsn_job_item *ji);
139 static struct nat64lsn_job_item *nat64lsn_create_job(
140     struct nat64lsn_cfg *cfg, int jtype);
141 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
142     struct nat64lsn_job_item *ji);
143 static void nat64lsn_job_destroy(epoch_context_t ctx);
144 static void nat64lsn_destroy_host(struct nat64lsn_host *host);
145 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
146
147 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
148     const struct ipfw_flow_id *f_id, struct mbuf **mp);
149 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
150     struct ipfw_flow_id *f_id, struct mbuf **mp);
151 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
152     struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
153
154 #define NAT64_BIT_TCP_FIN       0       /* FIN was seen */
155 #define NAT64_BIT_TCP_SYN       1       /* First syn in->out */
156 #define NAT64_BIT_TCP_ESTAB     2       /* Packet with Ack */
157 #define NAT64_BIT_READY_IPV4    6       /* state is ready for translate4 */
158 #define NAT64_BIT_STALE         7       /* state is going to be expired */
159
160 #define NAT64_FLAG_FIN          (1 << NAT64_BIT_TCP_FIN)
161 #define NAT64_FLAG_SYN          (1 << NAT64_BIT_TCP_SYN)
162 #define NAT64_FLAG_ESTAB        (1 << NAT64_BIT_TCP_ESTAB)
163 #define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
164
165 #define NAT64_FLAG_READY        (1 << NAT64_BIT_READY_IPV4)
166 #define NAT64_FLAG_STALE        (1 << NAT64_BIT_STALE)
167
168 static inline uint8_t
169 convert_tcp_flags(uint8_t flags)
170 {
171         uint8_t result;
172
173         result = flags & (TH_FIN|TH_SYN);
174         result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
175         result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
176
177         return (result);
178 }
179
180 static void
181 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
182     struct nat64lsn_state *state)
183 {
184
185         memset(plog, 0, sizeof(*plog));
186         plog->length = PFLOG_REAL_HDRLEN;
187         plog->af = family;
188         plog->action = PF_NAT;
189         plog->dir = PF_IN;
190         plog->rulenr = htonl(state->ip_src);
191         plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
192             (state->proto << 8) | (state->ip_dst & 0xff));
193         plog->ruleset[0] = '\0';
194         strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
195         ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
196 }
197
198 #define HVAL(p, n, s)   jenkins_hash32((const uint32_t *)(p), (n), (s))
199 #define HOST_HVAL(c, a) HVAL((a),\
200     sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
201 #define HOSTS(c, v)     ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
202
203 #define ALIASLINK_HVAL(c, f)    HVAL(&(f)->dst_ip6,\
204     sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
205 #define ALIAS_BYHASH(c, v)      \
206     ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
207 static struct nat64lsn_aliaslink*
208 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
209     struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
210 {
211
212         /*
213          * We can implement some different algorithms how
214          * select an alias address.
215          * XXX: for now we use first available.
216          */
217         return (CK_SLIST_FIRST(&host->aliases));
218 }
219
220 #define STATE_HVAL(c, d)        HVAL((d), 2, (c)->hash_seed)
221 #define STATE_HASH(h, v)        \
222     ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
223 #define STATES_CHUNK(p, v)      \
224     ((p)->chunks_count == 1 ? (p)->states : \
225         ((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
226
227 #ifdef __LP64__
228 #define FREEMASK_FFSLL(pg, faddr)               \
229     ffsll(*FREEMASK_CHUNK((pg), (faddr)))
230 #define FREEMASK_BTR(pg, faddr, bit)    \
231     ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
232 #define FREEMASK_BTS(pg, faddr, bit)    \
233     ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
234 #define FREEMASK_ISSET(pg, faddr, bit)  \
235     ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
236 #define FREEMASK_COPY(pg, n, out)       \
237     (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
238 #else
239 static inline int
240 freemask_ffsll(uint32_t *freemask)
241 {
242         int i;
243
244         if ((i = ffsl(freemask[0])) != 0)
245                 return (i);
246         if ((i = ffsl(freemask[1])) != 0)
247                 return (i + 32);
248         return (0);
249 }
250 #define FREEMASK_FFSLL(pg, faddr)               \
251     freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
252 #define FREEMASK_BTR(pg, faddr, bit)    \
253     ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
254 #define FREEMASK_BTS(pg, faddr, bit)    \
255     ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
256 #define FREEMASK_ISSET(pg, faddr, bit)  \
257     ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
258 #define FREEMASK_COPY(pg, n, out)       \
259     (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
260         ((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
261 #endif /* !__LP64__ */
262
263
264 #define NAT64LSN_TRY_PGCNT      32
265 static struct nat64lsn_pg*
266 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
267     struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
268     uint32_t *pgidx, in_addr_t faddr)
269 {
270         struct nat64lsn_pg *pg, *oldpg;
271         uint32_t idx, oldidx;
272         int cnt;
273
274         cnt = 0;
275         /* First try last used PG */
276         oldpg = pg = ck_pr_load_ptr(pgptr);
277         idx = oldidx = ck_pr_load_32(pgidx);
278         /* If pgidx is out of range, reset it to the first pgchunk */
279         if (!ISSET32(*chunkmask, idx / 32))
280                 idx = 0;
281         do {
282                 ck_pr_fence_load();
283                 if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
284                         /*
285                          * If last used PG has not free states,
286                          * try to update pointer.
287                          * NOTE: it can be already updated by jobs handler,
288                          *       thus we use CAS operation.
289                          */
290                         if (cnt > 0)
291                                 ck_pr_cas_ptr(pgptr, oldpg, pg);
292                         return (pg);
293                 }
294                 /* Stop if idx is out of range */
295                 if (!ISSET32(*chunkmask, idx / 32))
296                         break;
297
298                 if (ISSET32(pgmask[idx / 32], idx % 32))
299                         pg = ck_pr_load_ptr(
300                             &chunks[idx / 32]->pgptr[idx % 32]);
301                 else
302                         pg = NULL;
303
304                 idx++;
305         } while (++cnt < NAT64LSN_TRY_PGCNT);
306
307         /* If pgidx is out of range, reset it to the first pgchunk */
308         if (!ISSET32(*chunkmask, idx / 32))
309                 idx = 0;
310         ck_pr_cas_32(pgidx, oldidx, idx);
311         return (NULL);
312 }
313
314 static struct nat64lsn_state*
315 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
316     const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
317     uint16_t port, uint8_t proto)
318 {
319         struct nat64lsn_aliaslink *link;
320         struct nat64lsn_state *state;
321         struct nat64lsn_pg *pg;
322         int i, offset;
323
324         NAT64LSN_EPOCH_ASSERT();
325
326         /* Check that we already have state for given arguments */
327         CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
328                 if (state->proto == proto && state->ip_dst == faddr &&
329                     state->sport == port && state->dport == f_id->dst_port)
330                         return (state);
331         }
332
333         link = nat64lsn_get_aliaslink(cfg, host, f_id);
334         if (link == NULL)
335                 return (NULL);
336
337         switch (proto) {
338         case IPPROTO_TCP:
339                 pg = nat64lsn_get_pg(
340                     &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
341                     link->alias->tcp, &link->alias->tcp_pg,
342                     &link->alias->tcp_pgidx, faddr);
343                 break;
344         case IPPROTO_UDP:
345                 pg = nat64lsn_get_pg(
346                     &link->alias->udp_chunkmask, link->alias->udp_pgmask,
347                     link->alias->udp, &link->alias->udp_pg,
348                     &link->alias->udp_pgidx, faddr);
349                 break;
350         case IPPROTO_ICMP:
351                 pg = nat64lsn_get_pg(
352                     &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
353                     link->alias->icmp, &link->alias->icmp_pg,
354                     &link->alias->icmp_pgidx, faddr);
355                 break;
356         default:
357                 panic("%s: wrong proto %d", __func__, proto);
358         }
359         if (pg == NULL)
360                 return (NULL);
361
362         /* Check that PG has some free states */
363         state = NULL;
364         i = FREEMASK_BITCOUNT(pg, faddr);
365         while (i-- > 0) {
366                 offset = FREEMASK_FFSLL(pg, faddr);
367                 if (offset == 0) {
368                         /*
369                          * We lost the race.
370                          * No more free states in this PG.
371                          */
372                         break;
373                 }
374
375                 /* Lets try to atomically grab the state */
376                 if (FREEMASK_BTR(pg, faddr, offset - 1)) {
377                         state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
378                         /* Initialize */
379                         state->flags = proto != IPPROTO_TCP ? 0 :
380                             convert_tcp_flags(f_id->_flags);
381                         state->proto = proto;
382                         state->aport = pg->base_port + offset - 1;
383                         state->dport = f_id->dst_port;
384                         state->sport = port;
385                         state->ip6_dst = f_id->dst_ip6;
386                         state->ip_dst = faddr;
387                         state->ip_src = link->alias->addr;
388                         state->hval = hval;
389                         state->host = host;
390                         SET_AGE(state->timestamp);
391
392                         /* Insert new state into host's hash table */
393                         HOST_LOCK(host);
394                         CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
395                             state, entries);
396                         host->states_count++;
397                         /*
398                          * XXX: In case if host is going to be expired,
399                          * reset NAT64LSN_DEADHOST flag.
400                          */
401                         host->flags &= ~NAT64LSN_DEADHOST;
402                         HOST_UNLOCK(host);
403                         NAT64STAT_INC(&cfg->base.stats, screated);
404                         /* Mark the state as ready for translate4 */
405                         ck_pr_fence_store();
406                         ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
407                         break;
408                 }
409         }
410         return (state);
411 }
412
413 /*
414  * Inspects icmp packets to see if the message contains different
415  * packet header so we need to alter @addr and @port.
416  */
417 static int
418 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
419     uint16_t *port)
420 {
421         struct icmp *icmp;
422         struct ip *ip;
423         int off;
424         uint8_t inner_proto;
425
426         ip = mtod(*mp, struct ip *); /* Outer IP header */
427         off = (ip->ip_hl << 2) + ICMP_MINLEN;
428         if ((*mp)->m_len < off)
429                 *mp = m_pullup(*mp, off);
430         if (*mp == NULL)
431                 return (ENOMEM);
432
433         ip = mtod(*mp, struct ip *); /* Outer IP header */
434         icmp = L3HDR(ip, struct icmp *);
435         switch (icmp->icmp_type) {
436         case ICMP_ECHO:
437         case ICMP_ECHOREPLY:
438                 /* Use icmp ID as distinguisher */
439                 *port = ntohs(icmp->icmp_id);
440                 return (0);
441         case ICMP_UNREACH:
442         case ICMP_TIMXCEED:
443                 break;
444         default:
445                 return (EOPNOTSUPP);
446         }
447         /*
448          * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
449          * of ULP header.
450          */
451         if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
452                 return (EINVAL);
453         if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
454                 *mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
455         if (*mp == NULL)
456                 return (ENOMEM);
457         ip = mtodo(*mp, off); /* Inner IP header */
458         inner_proto = ip->ip_p;
459         off += ip->ip_hl << 2; /* Skip inner IP header */
460         *addr = ntohl(ip->ip_src.s_addr);
461         if ((*mp)->m_len < off + ICMP_MINLEN)
462                 *mp = m_pullup(*mp, off + ICMP_MINLEN);
463         if (*mp == NULL)
464                 return (ENOMEM);
465         switch (inner_proto) {
466         case IPPROTO_TCP:
467         case IPPROTO_UDP:
468                 /* Copy source port from the header */
469                 *port = ntohs(*((uint16_t *)mtodo(*mp, off)));
470                 *proto = inner_proto;
471                 return (0);
472         case IPPROTO_ICMP:
473                 /*
474                  * We will translate only ICMP errors for our ICMP
475                  * echo requests.
476                  */
477                 icmp = mtodo(*mp, off);
478                 if (icmp->icmp_type != ICMP_ECHO)
479                         return (EOPNOTSUPP);
480                 *port = ntohs(icmp->icmp_id);
481                 return (0);
482         };
483         return (EOPNOTSUPP);
484 }
485
486 static struct nat64lsn_state*
487 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
488     in_addr_t faddr, uint16_t port, uint8_t proto)
489 {
490         struct nat64lsn_state *state;
491         struct nat64lsn_pg *pg;
492         int chunk_idx, pg_idx, state_idx;
493
494         NAT64LSN_EPOCH_ASSERT();
495
496         if (port < NAT64_MIN_PORT)
497                 return (NULL);
498         /*
499          * Alias keeps 32 pgchunks for each protocol.
500          * Each pgchunk has 32 pointers to portgroup.
501          * Each portgroup has 64 states for ports.
502          */
503         port -= NAT64_MIN_PORT;
504         chunk_idx = port / 2048;
505
506         port -= chunk_idx * 2048;
507         pg_idx = port / 64;
508         state_idx = port % 64;
509
510         /*
511          * First check in proto_chunkmask that we have allocated PG chunk.
512          * Then check in proto_pgmask that we have valid PG pointer.
513          */
514         pg = NULL;
515         switch (proto) {
516         case IPPROTO_TCP:
517                 if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
518                     ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
519                         pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
520                         break;
521                 }
522                 return (NULL);
523         case IPPROTO_UDP:
524                 if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
525                     ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
526                         pg = alias->udp[chunk_idx]->pgptr[pg_idx];
527                         break;
528                 }
529                 return (NULL);
530         case IPPROTO_ICMP:
531                 if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
532                     ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
533                         pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
534                         break;
535                 }
536                 return (NULL);
537         default:
538                 panic("%s: wrong proto %d", __func__, proto);
539         }
540         if (pg == NULL)
541                 return (NULL);
542
543         if (FREEMASK_ISSET(pg, faddr, state_idx))
544                 return (NULL);
545
546         state = &STATES_CHUNK(pg, faddr)->state[state_idx];
547         ck_pr_fence_load();
548         if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
549                 return (state);
550         return (NULL);
551 }
552
553 static int
554 nat64lsn_translate4(struct nat64lsn_cfg *cfg,
555     const struct ipfw_flow_id *f_id, struct mbuf **mp)
556 {
557         struct pfloghdr loghdr, *logdata;
558         struct in6_addr src6;
559         struct nat64lsn_state *state;
560         struct nat64lsn_alias *alias;
561         uint32_t addr, flags;
562         uint16_t port, ts;
563         int ret;
564         uint8_t proto;
565
566         addr = f_id->dst_ip;
567         port = f_id->dst_port;
568         proto = f_id->proto;
569         if (addr < cfg->prefix4 || addr > cfg->pmask4) {
570                 NAT64STAT_INC(&cfg->base.stats, nomatch4);
571                 return (cfg->nomatch_verdict);
572         }
573
574         /* Check if protocol is supported */
575         switch (proto) {
576         case IPPROTO_ICMP:
577                 ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
578                 if (ret != 0) {
579                         if (ret == ENOMEM) {
580                                 NAT64STAT_INC(&cfg->base.stats, nomem);
581                                 return (IP_FW_DENY);
582                         }
583                         NAT64STAT_INC(&cfg->base.stats, noproto);
584                         return (cfg->nomatch_verdict);
585                 }
586                 if (addr < cfg->prefix4 || addr > cfg->pmask4) {
587                         NAT64STAT_INC(&cfg->base.stats, nomatch4);
588                         return (cfg->nomatch_verdict);
589                 }
590                 /* FALLTHROUGH */
591         case IPPROTO_TCP:
592         case IPPROTO_UDP:
593                 break;
594         default:
595                 NAT64STAT_INC(&cfg->base.stats, noproto);
596                 return (cfg->nomatch_verdict);
597         }
598
599         alias = &ALIAS_BYHASH(cfg, addr);
600         MPASS(addr == alias->addr);
601
602         /* Check that we have state for this port */
603         state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
604             port, proto);
605         if (state == NULL) {
606                 NAT64STAT_INC(&cfg->base.stats, nomatch4);
607                 return (cfg->nomatch_verdict);
608         }
609
610         /* TODO: Check flags to see if we need to do some static mapping */
611
612         /* Update some state fields if need */
613         SET_AGE(ts);
614         if (f_id->proto == IPPROTO_TCP)
615                 flags = convert_tcp_flags(f_id->_flags);
616         else
617                 flags = 0;
618         if (state->timestamp != ts)
619                 state->timestamp = ts;
620         if ((state->flags & flags) != flags)
621                 state->flags |= flags;
622
623         port = htons(state->sport);
624         src6 = state->ip6_dst;
625
626         if (cfg->base.flags & NAT64_LOG) {
627                 logdata = &loghdr;
628                 nat64lsn_log(logdata, *mp, AF_INET, state);
629         } else
630                 logdata = NULL;
631
632         /*
633          * We already have src6 with embedded address, but it is possible,
634          * that src_ip is different than state->ip_dst, this is why we
635          * do embedding again.
636          */
637         nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
638         ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
639             &cfg->base, logdata);
640         if (ret == NAT64SKIP)
641                 return (cfg->nomatch_verdict);
642         if (ret == NAT64RETURN)
643                 *mp = NULL;
644         return (IP_FW_DENY);
645 }
646
647 /*
648  * Check if particular state is stale and should be deleted.
649  * Return 1 if true, 0 otherwise.
650  */
651 static int
652 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
653 {
654         int age, ttl;
655
656         /* State was marked as stale in previous pass. */
657         if (ISSET32(state->flags, NAT64_BIT_STALE))
658                 return (1);
659
660         /* State is not yet initialized, it is going to be READY */
661         if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
662                 return (0);
663
664         age = GET_AGE(state->timestamp);
665         switch (state->proto) {
666         case IPPROTO_TCP:
667                 if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
668                         ttl = cfg->st_close_ttl;
669                 else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
670                         ttl = cfg->st_estab_ttl;
671                 else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
672                         ttl = cfg->st_syn_ttl;
673                 else
674                         ttl = cfg->st_syn_ttl;
675                 if (age > ttl)
676                         return (1);
677                 break;
678         case IPPROTO_UDP:
679                 if (age > cfg->st_udp_ttl)
680                         return (1);
681                 break;
682         case IPPROTO_ICMP:
683                 if (age > cfg->st_icmp_ttl)
684                         return (1);
685                 break;
686         }
687         return (0);
688 }
689
690 static int
691 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
692 {
693         struct nat64lsn_state *state;
694         struct nat64lsn_host *host;
695         uint64_t freemask;
696         int c, i, update_age;
697
698         update_age = 0;
699         for (c = 0; c < pg->chunks_count; c++) {
700                 FREEMASK_COPY(pg, c, freemask);
701                 for (i = 0; i < 64; i++) {
702                         if (ISSET64(freemask, i))
703                                 continue;
704                         state = &STATES_CHUNK(pg, c)->state[i];
705                         if (nat64lsn_check_state(cfg, state) == 0) {
706                                 update_age = 1;
707                                 continue;
708                         }
709                         /*
710                          * Expire state:
711                          * 1. Mark as STALE and unlink from host's hash.
712                          * 2. Set bit in freemask.
713                          */
714                         if (ISSET32(state->flags, NAT64_BIT_STALE)) {
715                                 /*
716                                  * State was marked as STALE in previous
717                                  * pass. Now it is safe to release it.
718                                  */
719                                 state->flags = 0;
720                                 ck_pr_fence_store();
721                                 FREEMASK_BTS(pg, c, i);
722                                 NAT64STAT_INC(&cfg->base.stats, sdeleted);
723                                 continue;
724                         }
725                         MPASS(state->flags & NAT64_FLAG_READY);
726
727                         host = state->host;
728                         HOST_LOCK(host);
729                         CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
730                             state, nat64lsn_state, entries);
731                         host->states_count--;
732                         HOST_UNLOCK(host);
733
734                         /* Reset READY flag */
735                         ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
736                         /* And set STALE flag */
737                         ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
738                         ck_pr_fence_store();
739                         /*
740                          * Now translate6 will not use this state, wait
741                          * until it become safe for translate4, then mark
742                          * state as free.
743                          */
744                 }
745         }
746
747         /*
748          * We have some alive states, update timestamp.
749          */
750         if (update_age)
751                 SET_AGE(pg->timestamp);
752
753         if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
754                 return (0);
755
756         return (1);
757 }
758
759 static void
760 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
761     struct nat64lsn_pg_slist *portgroups)
762 {
763         struct nat64lsn_alias *alias;
764         struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
765         uint32_t *pgmask, *pgidx;
766         int i, idx;
767
768         for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
769                 alias = &cfg->aliases[i];
770                 CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
771                         if (nat64lsn_maintain_pg(cfg, pg) == 0)
772                                 continue;
773                         /* Always keep first PG */
774                         if (pg->base_port == NAT64_MIN_PORT)
775                                 continue;
776                         /*
777                          * PG is expired, unlink it and schedule for
778                          * deferred destroying.
779                          */
780                         idx = (pg->base_port - NAT64_MIN_PORT) / 64;
781                         switch (pg->proto) {
782                         case IPPROTO_TCP:
783                                 pgmask = alias->tcp_pgmask;
784                                 pgptr = &alias->tcp_pg;
785                                 pgidx = &alias->tcp_pgidx;
786                                 firstpg = alias->tcp[0]->pgptr[0];
787                                 break;
788                         case IPPROTO_UDP:
789                                 pgmask = alias->udp_pgmask;
790                                 pgptr = &alias->udp_pg;
791                                 pgidx = &alias->udp_pgidx;
792                                 firstpg = alias->udp[0]->pgptr[0];
793                                 break;
794                         case IPPROTO_ICMP:
795                                 pgmask = alias->icmp_pgmask;
796                                 pgptr = &alias->icmp_pg;
797                                 pgidx = &alias->icmp_pgidx;
798                                 firstpg = alias->icmp[0]->pgptr[0];
799                                 break;
800                         }
801                         /* Reset the corresponding bit in pgmask array. */
802                         ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
803                         ck_pr_fence_store();
804                         /* If last used PG points to this PG, reset it. */
805                         ck_pr_cas_ptr(pgptr, pg, firstpg);
806                         ck_pr_cas_32(pgidx, idx, 0);
807                         /* Unlink PG from alias's chain */
808                         ALIAS_LOCK(alias);
809                         CK_SLIST_REMOVE(&alias->portgroups, pg,
810                             nat64lsn_pg, entries);
811                         alias->portgroups_count--;
812                         ALIAS_UNLOCK(alias);
813                         /* And link to job's chain for deferred destroying */
814                         NAT64STAT_INC(&cfg->base.stats, spgdeleted);
815                         CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
816                 }
817         }
818 }
819
820 static void
821 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
822     struct nat64lsn_hosts_slist *hosts)
823 {
824         struct nat64lsn_host *host, *tmp;
825         int i;
826
827         for (i = 0; i < cfg->hosts_hashsize; i++) {
828                 CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
829                     entries, tmp) {
830                         /* Is host was marked in previous call? */
831                         if (host->flags & NAT64LSN_DEADHOST) {
832                                 if (host->states_count > 0) {
833                                         host->flags &= ~NAT64LSN_DEADHOST;
834                                         continue;
835                                 }
836                                 /*
837                                  * Unlink host from hash table and schedule
838                                  * it for deferred destroying.
839                                  */
840                                 CFG_LOCK(cfg);
841                                 CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
842                                     nat64lsn_host, entries);
843                                 cfg->hosts_count--;
844                                 CFG_UNLOCK(cfg);
845                                 CK_SLIST_INSERT_HEAD(hosts, host, entries);
846                                 continue;
847                         }
848                         if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
849                                 continue;
850                         if (host->states_count > 0)
851                                 continue;
852                         /* Mark host as going to be expired in next pass */
853                         host->flags |= NAT64LSN_DEADHOST;
854                         ck_pr_fence_store();
855                 }
856         }
857 }
858
859 static struct nat64lsn_pgchunk*
860 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
861 {
862 #if 0
863         struct nat64lsn_alias *alias;
864         struct nat64lsn_pgchunk *chunk;
865         uint32_t pgmask;
866         int i, c;
867
868         for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
869                 alias = &cfg->aliases[i];
870                 if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
871                         continue;
872                 /* Always keep single chunk allocated */
873                 for (c = 1; c < 32; c++) {
874                         if ((alias->tcp_chunkmask & (1 << c)) == 0)
875                                 break;
876                         chunk = ck_pr_load_ptr(&alias->tcp[c]);
877                         if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
878                                 continue;
879                         ck_pr_btr_32(&alias->tcp_chunkmask, c);
880                         ck_pr_fence_load();
881                         if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
882                                 continue;
883                 }
884         }
885 #endif
886         return (NULL);
887 }
888
889 #if 0
890 static void
891 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
892 {
893         struct nat64lsn_host *h;
894         struct nat64lsn_states_slist *hash;
895         int i, j, hsize;
896
897         for (i = 0; i < cfg->hosts_hashsize; i++) {
898                 CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
899                          if (h->states_count / 2 < h->states_hashsize ||
900                              h->states_hashsize >= NAT64LSN_MAX_HSIZE)
901                                  continue;
902                          hsize = h->states_hashsize * 2;
903                          hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
904                          if (hash == NULL)
905                                  continue;
906                          for (j = 0; j < hsize; j++)
907                                 CK_SLIST_INIT(&hash[i]);
908
909                          ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
910                 }
911         }
912 }
913 #endif
914
915 /*
916  * This procedure is used to perform various maintance
917  * on dynamic hash list. Currently it is called every 4 seconds.
918  */
919 static void
920 nat64lsn_periodic(void *data)
921 {
922         struct nat64lsn_job_item *ji;
923         struct nat64lsn_cfg *cfg;
924
925         cfg = (struct nat64lsn_cfg *) data;
926         CURVNET_SET(cfg->vp);
927         if (cfg->hosts_count > 0) {
928                 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
929                 if (ji != NULL) {
930                         ji->jtype = JTYPE_DESTROY;
931                         CK_SLIST_INIT(&ji->hosts);
932                         CK_SLIST_INIT(&ji->portgroups);
933                         nat64lsn_expire_hosts(cfg, &ji->hosts);
934                         nat64lsn_expire_portgroups(cfg, &ji->portgroups);
935                         ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
936                         NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
937                             nat64lsn_job_destroy);
938                 } else
939                         NAT64STAT_INC(&cfg->base.stats, jnomem);
940         }
941         callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
942         CURVNET_RESTORE();
943 }
944
945 #define ALLOC_ERROR(stage, type)        ((stage) ? 10 * (type) + (stage): 0)
946 #define HOST_ERROR(stage)               ALLOC_ERROR(stage, 1)
947 #define PG_ERROR(stage)                 ALLOC_ERROR(stage, 2)
948 static int
949 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
950 {
951         char a[INET6_ADDRSTRLEN];
952         struct nat64lsn_aliaslink *link;
953         struct nat64lsn_host *host;
954         struct nat64lsn_state *state;
955         uint32_t hval, data[2];
956         int i;
957
958         /* Check that host was not yet added. */
959         NAT64LSN_EPOCH_ASSERT();
960         CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
961                 if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
962                         /* The host was allocated in previous call. */
963                         ji->host = host;
964                         goto get_state;
965                 }
966         }
967
968         host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
969         if (ji->host == NULL)
970                 return (HOST_ERROR(1));
971
972         host->states_hashsize = NAT64LSN_HSIZE;
973         host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
974             host->states_hashsize, M_NAT64LSN, M_NOWAIT);
975         if (host->states_hash == NULL) {
976                 uma_zfree(nat64lsn_host_zone, host);
977                 return (HOST_ERROR(2));
978         }
979
980         link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
981         if (link == NULL) {
982                 free(host->states_hash, M_NAT64LSN);
983                 uma_zfree(nat64lsn_host_zone, host);
984                 return (HOST_ERROR(3));
985         }
986
987         /* Initialize */
988         HOST_LOCK_INIT(host);
989         SET_AGE(host->timestamp);
990         host->addr = ji->f_id.src_ip6;
991         host->hval = ji->src6_hval;
992         host->flags = 0;
993         host->states_count = 0;
994         host->states_hashsize = NAT64LSN_HSIZE;
995         CK_SLIST_INIT(&host->aliases);
996         for (i = 0; i < host->states_hashsize; i++)
997                 CK_SLIST_INIT(&host->states_hash[i]);
998
999         /* Determine alias from flow hash. */
1000         hval = ALIASLINK_HVAL(cfg, &ji->f_id);
1001         link->alias = &ALIAS_BYHASH(cfg, hval);
1002         CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
1003
1004         ALIAS_LOCK(link->alias);
1005         CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
1006         link->alias->hosts_count++;
1007         ALIAS_UNLOCK(link->alias);
1008
1009         CFG_LOCK(cfg);
1010         CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
1011         cfg->hosts_count++;
1012         CFG_UNLOCK(cfg);
1013
1014 get_state:
1015         data[0] = ji->faddr;
1016         data[1] = (ji->f_id.dst_port << 16) | ji->port;
1017         ji->state_hval = hval = STATE_HVAL(cfg, data);
1018         state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
1019             ji->faddr, ji->port, ji->proto);
1020         /*
1021          * We failed to obtain new state, used alias needs new PG.
1022          * XXX: or another alias should be used.
1023          */
1024         if (state == NULL) {
1025                 /* Try to allocate new PG */
1026                 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1027                         return (HOST_ERROR(4));
1028                 /* We assume that nat64lsn_alloc_pg() got state */
1029         } else
1030                 ji->state = state;
1031
1032         ji->done = 1;
1033         DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
1034             inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
1035         return (HOST_ERROR(0));
1036 }
1037
1038 static int
1039 nat64lsn_find_pg_place(uint32_t *data)
1040 {
1041         int i;
1042
1043         for (i = 0; i < 32; i++) {
1044                 if (~data[i] == 0)
1045                         continue;
1046                 return (i * 32 + ffs(~data[i]) - 1);
1047         }
1048         return (-1);
1049 }
1050
1051 static int
1052 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
1053     struct nat64lsn_alias *alias, uint32_t *chunkmask,
1054     uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
1055     struct nat64lsn_pg **pgptr, uint8_t proto)
1056 {
1057         struct nat64lsn_pg *pg;
1058         int i, pg_idx, chunk_idx;
1059
1060         /* Find place in pgchunk where PG can be added */
1061         pg_idx = nat64lsn_find_pg_place(pgmask);
1062         if (pg_idx < 0) /* no more PGs */
1063                 return (PG_ERROR(1));
1064         /* Check that we have allocated pgchunk for given PG index */
1065         chunk_idx = pg_idx / 32;
1066         if (!ISSET32(*chunkmask, chunk_idx)) {
1067                 chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
1068                     M_NOWAIT);
1069                 if (chunks[chunk_idx] == NULL)
1070                         return (PG_ERROR(2));
1071                 ck_pr_bts_32(chunkmask, chunk_idx);
1072                 ck_pr_fence_store();
1073         }
1074         /* Allocate PG and states chunks */
1075         pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
1076         if (pg == NULL)
1077                 return (PG_ERROR(3));
1078         pg->chunks_count = cfg->states_chunks;
1079         if (pg->chunks_count > 1) {
1080                 pg->freemask_chunk = malloc(pg->chunks_count *
1081                     sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
1082                 if (pg->freemask_chunk == NULL) {
1083                         uma_zfree(nat64lsn_pg_zone, pg);
1084                         return (PG_ERROR(4));
1085                 }
1086                 pg->states_chunk = malloc(pg->chunks_count *
1087                     sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
1088                     M_NOWAIT | M_ZERO);
1089                 if (pg->states_chunk == NULL) {
1090                         free(pg->freemask_chunk, M_NAT64LSN);
1091                         uma_zfree(nat64lsn_pg_zone, pg);
1092                         return (PG_ERROR(5));
1093                 }
1094                 for (i = 0; i < pg->chunks_count; i++) {
1095                         pg->states_chunk[i] = uma_zalloc(
1096                             nat64lsn_state_zone, M_NOWAIT);
1097                         if (pg->states_chunk[i] == NULL)
1098                                 goto states_failed;
1099                 }
1100                 memset(pg->freemask_chunk, 0xff,
1101                     sizeof(uint64_t) * pg->chunks_count);
1102         } else {
1103                 pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
1104                 if (pg->states == NULL) {
1105                         uma_zfree(nat64lsn_pg_zone, pg);
1106                         return (PG_ERROR(6));
1107                 }
1108                 memset(&pg->freemask64, 0xff, sizeof(uint64_t));
1109         }
1110
1111         /* Initialize PG and hook it to pgchunk */
1112         SET_AGE(pg->timestamp);
1113         pg->proto = proto;
1114         pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
1115         ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
1116         ck_pr_fence_store();
1117         ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
1118         ck_pr_store_ptr(pgptr, pg);
1119
1120         ALIAS_LOCK(alias);
1121         CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
1122         SET_AGE(alias->timestamp);
1123         alias->portgroups_count++;
1124         ALIAS_UNLOCK(alias);
1125         NAT64STAT_INC(&cfg->base.stats, spgcreated);
1126         return (PG_ERROR(0));
1127
1128 states_failed:
1129         for (i = 0; i < pg->chunks_count; i++)
1130                 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1131         free(pg->freemask_chunk, M_NAT64LSN);
1132         free(pg->states_chunk, M_NAT64LSN);
1133         uma_zfree(nat64lsn_pg_zone, pg);
1134         return (PG_ERROR(7));
1135 }
1136
1137 static int
1138 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1139 {
1140         struct nat64lsn_aliaslink *link;
1141         struct nat64lsn_alias *alias;
1142         int ret;
1143
1144         link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
1145         if (link == NULL)
1146                 return (PG_ERROR(1));
1147
1148         /*
1149          * TODO: check that we did not already allocated PG in
1150          *       previous call.
1151          */
1152
1153         ret = 0;
1154         alias = link->alias;
1155         /* Find place in pgchunk where PG can be added */
1156         switch (ji->proto) {
1157         case IPPROTO_TCP:
1158                 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1159                     &alias->tcp_chunkmask, alias->tcp_pgmask,
1160                     alias->tcp, &alias->tcp_pg, ji->proto);
1161                 break;
1162         case IPPROTO_UDP:
1163                 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1164                     &alias->udp_chunkmask, alias->udp_pgmask,
1165                     alias->udp, &alias->udp_pg, ji->proto);
1166                 break;
1167         case IPPROTO_ICMP:
1168                 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1169                     &alias->icmp_chunkmask, alias->icmp_pgmask,
1170                     alias->icmp, &alias->icmp_pg, ji->proto);
1171                 break;
1172         default:
1173                 panic("%s: wrong proto %d", __func__, ji->proto);
1174         }
1175         if (ret == PG_ERROR(1)) {
1176                 /*
1177                  * PG_ERROR(1) means that alias lacks free PGs
1178                  * XXX: try next alias.
1179                  */
1180                 printf("NAT64LSN: %s: failed to obtain PG\n",
1181                     __func__);
1182                 return (ret);
1183         }
1184         if (ret == PG_ERROR(0)) {
1185                 ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
1186                     ji->state_hval, ji->faddr, ji->port, ji->proto);
1187                 if (ji->state == NULL)
1188                         ret = PG_ERROR(8);
1189                 else
1190                         ji->done = 1;
1191         }
1192         return (ret);
1193 }
1194
1195 static void
1196 nat64lsn_do_request(void *data)
1197 {
1198         struct epoch_tracker et;
1199         struct nat64lsn_job_head jhead;
1200         struct nat64lsn_job_item *ji, *ji2;
1201         struct nat64lsn_cfg *cfg;
1202         int jcount;
1203         uint8_t flags;
1204
1205         cfg = (struct nat64lsn_cfg *)data;
1206         if (cfg->jlen == 0)
1207                 return;
1208
1209         CURVNET_SET(cfg->vp);
1210         STAILQ_INIT(&jhead);
1211
1212         /* Grab queue */
1213         JQUEUE_LOCK();
1214         STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
1215         jcount = cfg->jlen;
1216         cfg->jlen = 0;
1217         JQUEUE_UNLOCK();
1218
1219         /* TODO: check if we need to resize hash */
1220
1221         NAT64STAT_INC(&cfg->base.stats, jcalls);
1222         DPRINTF(DP_JQUEUE, "count=%d", jcount);
1223
1224         /*
1225          * TODO:
1226          * What we should do here is to build a hash
1227          * to ensure we don't have lots of duplicate requests.
1228          * Skip this for now.
1229          *
1230          * TODO: Limit per-call number of items
1231          */
1232
1233         NAT64LSN_EPOCH_ENTER(et);
1234         STAILQ_FOREACH(ji, &jhead, entries) {
1235                 switch (ji->jtype) {
1236                 case JTYPE_NEWHOST:
1237                         if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
1238                                 NAT64STAT_INC(&cfg->base.stats, jhostfails);
1239                         break;
1240                 case JTYPE_NEWPORTGROUP:
1241                         if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1242                                 NAT64STAT_INC(&cfg->base.stats, jportfails);
1243                         break;
1244                 default:
1245                         continue;
1246                 }
1247                 if (ji->done != 0) {
1248                         flags = ji->proto != IPPROTO_TCP ? 0 :
1249                             convert_tcp_flags(ji->f_id._flags);
1250                         nat64lsn_translate6_internal(cfg, &ji->m,
1251                             ji->state, flags);
1252                         NAT64STAT_INC(&cfg->base.stats, jreinjected);
1253                 }
1254         }
1255         NAT64LSN_EPOCH_EXIT(et);
1256
1257         ji = STAILQ_FIRST(&jhead);
1258         while (ji != NULL) {
1259                 ji2 = STAILQ_NEXT(ji, entries);
1260                 /*
1261                  * In any case we must free mbuf if
1262                  * translator did not consumed it.
1263                  */
1264                 m_freem(ji->m);
1265                 uma_zfree(nat64lsn_job_zone, ji);
1266                 ji = ji2;
1267         }
1268         CURVNET_RESTORE();
1269 }
1270
1271 static struct nat64lsn_job_item *
1272 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
1273 {
1274         struct nat64lsn_job_item *ji;
1275
1276         /*
1277          * Do not try to lock possibly contested mutex if we're near the
1278          * limit. Drop packet instead.
1279          */
1280         ji = NULL;
1281         if (cfg->jlen >= cfg->jmaxlen)
1282                 NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1283         else {
1284                 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1285                 if (ji == NULL)
1286                         NAT64STAT_INC(&cfg->base.stats, jnomem);
1287         }
1288         if (ji == NULL) {
1289                 NAT64STAT_INC(&cfg->base.stats, dropped);
1290                 DPRINTF(DP_DROPS, "failed to create job");
1291         } else {
1292                 ji->jtype = jtype;
1293                 ji->done = 0;
1294         }
1295         return (ji);
1296 }
1297
1298 static void
1299 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1300 {
1301
1302         JQUEUE_LOCK();
1303         STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
1304         NAT64STAT_INC(&cfg->base.stats, jrequests);
1305         cfg->jlen++;
1306
1307         if (callout_pending(&cfg->jcallout) == 0)
1308                 callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1309         JQUEUE_UNLOCK();
1310 }
1311
1312 static void
1313 nat64lsn_job_destroy(epoch_context_t ctx)
1314 {
1315         struct nat64lsn_job_item *ji;
1316         struct nat64lsn_host *host;
1317         struct nat64lsn_pg *pg;
1318         int i;
1319
1320         ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
1321         MPASS(ji->jtype == JTYPE_DESTROY);
1322         while (!CK_SLIST_EMPTY(&ji->hosts)) {
1323                 host = CK_SLIST_FIRST(&ji->hosts);
1324                 CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
1325                 if (host->states_count > 0) {
1326                         /*
1327                          * XXX: The state has been created
1328                          * during host deletion.
1329                          */
1330                         printf("NAT64LSN: %s: destroying host with %d "
1331                             "states\n", __func__, host->states_count);
1332                 }
1333                 nat64lsn_destroy_host(host);
1334         }
1335         while (!CK_SLIST_EMPTY(&ji->portgroups)) {
1336                 pg = CK_SLIST_FIRST(&ji->portgroups);
1337                 CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
1338                 for (i = 0; i < pg->chunks_count; i++) {
1339                         if (FREEMASK_BITCOUNT(pg, i) != 64) {
1340                                 /*
1341                                  * XXX: The state has been created during
1342                                  * PG deletion.
1343                                  */
1344                                 printf("NAT64LSN: %s: destroying PG %p "
1345                                     "with non-empty chunk %d\n", __func__,
1346                                     pg, i);
1347                         }
1348                 }
1349                 nat64lsn_destroy_pg(pg);
1350         }
1351         uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
1352         uma_zfree(nat64lsn_job_zone, ji);
1353 }
1354
1355 static int
1356 nat64lsn_request_host(struct nat64lsn_cfg *cfg,
1357     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1358     in_addr_t faddr, uint16_t port, uint8_t proto)
1359 {
1360         struct nat64lsn_job_item *ji;
1361
1362         ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
1363         if (ji != NULL) {
1364                 ji->m = *mp;
1365                 ji->f_id = *f_id;
1366                 ji->faddr = faddr;
1367                 ji->port = port;
1368                 ji->proto = proto;
1369                 ji->src6_hval = hval;
1370
1371                 nat64lsn_enqueue_job(cfg, ji);
1372                 NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1373                 *mp = NULL;
1374         }
1375         return (IP_FW_DENY);
1376 }
1377
1378 static int
1379 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
1380     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1381     in_addr_t faddr, uint16_t port, uint8_t proto)
1382 {
1383         struct nat64lsn_job_item *ji;
1384
1385         ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
1386         if (ji != NULL) {
1387                 ji->m = *mp;
1388                 ji->f_id = *f_id;
1389                 ji->faddr = faddr;
1390                 ji->port = port;
1391                 ji->proto = proto;
1392                 ji->state_hval = hval;
1393                 ji->host = host;
1394
1395                 nat64lsn_enqueue_job(cfg, ji);
1396                 NAT64STAT_INC(&cfg->base.stats, jportreq);
1397                 *mp = NULL;
1398         }
1399         return (IP_FW_DENY);
1400 }
1401
1402 static int
1403 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
1404     struct nat64lsn_state *state, uint8_t flags)
1405 {
1406         struct pfloghdr loghdr, *logdata;
1407         int ret;
1408         uint16_t ts;
1409
1410         /* Update timestamp and flags if needed */
1411         SET_AGE(ts);
1412         if (state->timestamp != ts)
1413                 state->timestamp = ts;
1414         if ((state->flags & flags) != 0)
1415                 state->flags |= flags;
1416
1417         if (cfg->base.flags & NAT64_LOG) {
1418                 logdata = &loghdr;
1419                 nat64lsn_log(logdata, *mp, AF_INET6, state);
1420         } else
1421                 logdata = NULL;
1422
1423         ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
1424             htons(state->aport), &cfg->base, logdata);
1425         if (ret == NAT64SKIP)
1426                 return (cfg->nomatch_verdict);
1427         if (ret == NAT64RETURN)
1428                 *mp = NULL;
1429         return (IP_FW_DENY);
1430 }
1431
1432 static int
1433 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1434     struct mbuf **mp)
1435 {
1436         struct nat64lsn_state *state;
1437         struct nat64lsn_host *host;
1438         struct icmp6_hdr *icmp6;
1439         uint32_t addr, hval, data[2];
1440         int offset, proto;
1441         uint16_t port;
1442         uint8_t flags;
1443
1444         /* Check if protocol is supported */
1445         port = f_id->src_port;
1446         proto = f_id->proto;
1447         switch (f_id->proto) {
1448         case IPPROTO_ICMPV6:
1449                 /*
1450                  * For ICMPv6 echo reply/request we use icmp6_id as
1451                  * local port.
1452                  */
1453                 offset = 0;
1454                 proto = nat64_getlasthdr(*mp, &offset);
1455                 if (proto < 0) {
1456                         NAT64STAT_INC(&cfg->base.stats, dropped);
1457                         DPRINTF(DP_DROPS, "mbuf isn't contigious");
1458                         return (IP_FW_DENY);
1459                 }
1460                 if (proto == IPPROTO_ICMPV6) {
1461                         icmp6 = mtodo(*mp, offset);
1462                         if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1463                             icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1464                                 port = ntohs(icmp6->icmp6_id);
1465                 }
1466                 proto = IPPROTO_ICMP;
1467                 /* FALLTHROUGH */
1468         case IPPROTO_TCP:
1469         case IPPROTO_UDP:
1470                 break;
1471         default:
1472                 NAT64STAT_INC(&cfg->base.stats, noproto);
1473                 return (cfg->nomatch_verdict);
1474         }
1475
1476         /* Extract IPv4 from destination IPv6 address */
1477         addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
1478         if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
1479                 char a[INET_ADDRSTRLEN];
1480
1481                 NAT64STAT_INC(&cfg->base.stats, dropped);
1482                 DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
1483                     inet_ntop(AF_INET, &addr, a, sizeof(a)));
1484                 return (IP_FW_DENY); /* XXX: add extra stats? */
1485         }
1486
1487         /* Try to find host */
1488         hval = HOST_HVAL(cfg, &f_id->src_ip6);
1489         CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
1490                 if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
1491                         break;
1492         }
1493         /* We use IPv4 address in host byte order */
1494         addr = ntohl(addr);
1495         if (host == NULL)
1496                 return (nat64lsn_request_host(cfg, f_id, mp,
1497                     hval, addr, port, proto));
1498
1499         flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
1500
1501         data[0] = addr;
1502         data[1] = (f_id->dst_port << 16) | port;
1503         hval = STATE_HVAL(cfg, data);
1504         state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
1505             port, proto);
1506         if (state == NULL)
1507                 return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
1508                     port, proto));
1509         return (nat64lsn_translate6_internal(cfg, mp, state, flags));
1510 }
1511
1512 /*
1513  * Main dataplane entry point.
1514  */
1515 int
1516 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1517     ipfw_insn *cmd, int *done)
1518 {
1519         struct epoch_tracker et;
1520         struct nat64lsn_cfg *cfg;
1521         ipfw_insn *icmd;
1522         int ret;
1523
1524         IPFW_RLOCK_ASSERT(ch);
1525
1526         *done = 0;      /* continue the search in case of failure */
1527         icmd = cmd + 1;
1528         if (cmd->opcode != O_EXTERNAL_ACTION ||
1529             cmd->arg1 != V_nat64lsn_eid ||
1530             icmd->opcode != O_EXTERNAL_INSTANCE ||
1531             (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
1532                 return (IP_FW_DENY);
1533
1534         *done = 1;      /* terminate the search */
1535
1536         NAT64LSN_EPOCH_ENTER(et);
1537         switch (args->f_id.addr_type) {
1538         case 4:
1539                 ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
1540                 break;
1541         case 6:
1542                 /*
1543                  * Check that destination IPv6 address matches our prefix6.
1544                  */
1545                 if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
1546                     memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
1547                     cfg->base.plat_plen / 8) != 0) {
1548                         ret = cfg->nomatch_verdict;
1549                         break;
1550                 }
1551                 ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
1552                 break;
1553         default:
1554                 ret = cfg->nomatch_verdict;
1555         }
1556         NAT64LSN_EPOCH_EXIT(et);
1557
1558         if (ret != IP_FW_PASS && args->m != NULL) {
1559                 m_freem(args->m);
1560                 args->m = NULL;
1561         }
1562         return (ret);
1563 }
1564
1565 static int
1566 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
1567 {
1568         struct nat64lsn_states_chunk *chunk;
1569         int i;
1570
1571         chunk = (struct nat64lsn_states_chunk *)mem;
1572         for (i = 0; i < 64; i++)
1573                 chunk->state[i].flags = 0;
1574         return (0);
1575 }
1576
1577 void
1578 nat64lsn_init_internal(void)
1579 {
1580
1581         nat64lsn_epoch = epoch_alloc(EPOCH_PREEMPT);
1582
1583         nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
1584             sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
1585             UMA_ALIGN_PTR, 0);
1586         nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
1587             sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
1588             UMA_ALIGN_PTR, 0);
1589         nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
1590             sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
1591             UMA_ALIGN_PTR, 0);
1592         nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
1593             sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
1594             UMA_ALIGN_PTR, 0);
1595         nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
1596             sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
1597             NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1598         nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
1599             sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
1600             UMA_ALIGN_PTR, 0);
1601         JQUEUE_LOCK_INIT();
1602 }
1603
1604 void
1605 nat64lsn_uninit_internal(void)
1606 {
1607
1608         /* XXX: epoch_task drain */
1609         epoch_free(nat64lsn_epoch);
1610
1611         JQUEUE_LOCK_DESTROY();
1612         uma_zdestroy(nat64lsn_host_zone);
1613         uma_zdestroy(nat64lsn_pgchunk_zone);
1614         uma_zdestroy(nat64lsn_pg_zone);
1615         uma_zdestroy(nat64lsn_aliaslink_zone);
1616         uma_zdestroy(nat64lsn_state_zone);
1617         uma_zdestroy(nat64lsn_job_zone);
1618 }
1619
1620 void
1621 nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1622 {
1623
1624         CALLOUT_LOCK(cfg);
1625         callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1626             nat64lsn_periodic, cfg);
1627         CALLOUT_UNLOCK(cfg);
1628 }
1629
1630 struct nat64lsn_cfg *
1631 nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
1632 {
1633         struct nat64lsn_cfg *cfg;
1634         struct nat64lsn_alias *alias;
1635         int i, naddr;
1636
1637         cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
1638             M_WAITOK | M_ZERO);
1639
1640         CFG_LOCK_INIT(cfg);
1641         CALLOUT_LOCK_INIT(cfg);
1642         STAILQ_INIT(&cfg->jhead);
1643         cfg->vp = curvnet;
1644         COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1645
1646         cfg->hash_seed = arc4random();
1647         cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
1648         cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
1649             cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
1650         for (i = 0; i < cfg->hosts_hashsize; i++)
1651                 CK_SLIST_INIT(&cfg->hosts_hash[i]);
1652
1653         naddr = 1 << (32 - plen);
1654         cfg->prefix4 = prefix;
1655         cfg->pmask4 = prefix | (naddr - 1);
1656         cfg->plen4 = plen;
1657         cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
1658             M_NAT64LSN, M_WAITOK | M_ZERO);
1659         for (i = 0; i < naddr; i++) {
1660                 alias = &cfg->aliases[i];
1661                 alias->addr = prefix + i; /* host byte order */
1662                 CK_SLIST_INIT(&alias->hosts);
1663                 ALIAS_LOCK_INIT(alias);
1664         }
1665
1666         callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
1667         callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1668
1669         return (cfg);
1670 }
1671
1672 static void
1673 nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
1674 {
1675         int i;
1676
1677         if (pg->chunks_count == 1) {
1678                 uma_zfree(nat64lsn_state_zone, pg->states);
1679         } else {
1680                 for (i = 0; i < pg->chunks_count; i++)
1681                         uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1682                 free(pg->states_chunk, M_NAT64LSN);
1683                 free(pg->freemask_chunk, M_NAT64LSN);
1684         }
1685         uma_zfree(nat64lsn_pg_zone, pg);
1686 }
1687
1688 static void
1689 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
1690     struct nat64lsn_alias *alias)
1691 {
1692         struct nat64lsn_pg *pg;
1693         int i;
1694
1695         while (!CK_SLIST_EMPTY(&alias->portgroups)) {
1696                 pg = CK_SLIST_FIRST(&alias->portgroups);
1697                 CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
1698                 nat64lsn_destroy_pg(pg);
1699         }
1700         for (i = 0; i < 32; i++) {
1701                 if (ISSET32(alias->tcp_chunkmask, i))
1702                         uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
1703                 if (ISSET32(alias->udp_chunkmask, i))
1704                         uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
1705                 if (ISSET32(alias->icmp_chunkmask, i))
1706                         uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
1707         }
1708         ALIAS_LOCK_DESTROY(alias);
1709 }
1710
1711 static void
1712 nat64lsn_destroy_host(struct nat64lsn_host *host)
1713 {
1714         struct nat64lsn_aliaslink *link;
1715
1716         while (!CK_SLIST_EMPTY(&host->aliases)) {
1717                 link = CK_SLIST_FIRST(&host->aliases);
1718                 CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
1719
1720                 ALIAS_LOCK(link->alias);
1721                 CK_SLIST_REMOVE(&link->alias->hosts, link,
1722                     nat64lsn_aliaslink, alias_entries);
1723                 link->alias->hosts_count--;
1724                 ALIAS_UNLOCK(link->alias);
1725
1726                 uma_zfree(nat64lsn_aliaslink_zone, link);
1727         }
1728         HOST_LOCK_DESTROY(host);
1729         free(host->states_hash, M_NAT64LSN);
1730         uma_zfree(nat64lsn_host_zone, host);
1731 }
1732
1733 void
1734 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
1735 {
1736         struct nat64lsn_host *host;
1737         int i;
1738
1739         CALLOUT_LOCK(cfg);
1740         callout_drain(&cfg->periodic);
1741         CALLOUT_UNLOCK(cfg);
1742         callout_drain(&cfg->jcallout);
1743
1744         for (i = 0; i < cfg->hosts_hashsize; i++) {
1745                 while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
1746                         host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
1747                         CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
1748                         nat64lsn_destroy_host(host);
1749                 }
1750         }
1751
1752         for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
1753                 nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
1754
1755         CALLOUT_LOCK_DESTROY(cfg);
1756         CFG_LOCK_DESTROY(cfg);
1757         COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1758         free(cfg->hosts_hash, M_NAT64LSN);
1759         free(cfg->aliases, M_NAT64LSN);
1760         free(cfg, M_NAT64LSN);
1761 }
1762