From e171491f01d440154c08f060219ffa1895adb3c9 Mon Sep 17 00:00:00 2001 From: ae Date: Mon, 18 Mar 2019 14:00:19 +0000 Subject: [PATCH] Revert r345274. It appears that not all 32-bit architectures have necessary CK primitives. --- sbin/ipfw/ipfw.8 | 28 +- sbin/ipfw/ipfw2.h | 1 - sbin/ipfw/nat64lsn.c | 124 +- sys/conf/files | 4 +- sys/modules/ipfw_nat64/Makefile | 2 - sys/netinet6/ip_fw_nat64.h | 45 +- sys/netpfil/ipfw/nat64/nat64lsn.c | 2493 +++++++++++---------- sys/netpfil/ipfw/nat64/nat64lsn.h | 406 ++-- sys/netpfil/ipfw/nat64/nat64lsn_control.c | 429 ++-- 9 files changed, 1821 insertions(+), 1711 deletions(-) diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8 index dbe3515164a..31448aff92b 100644 --- a/sbin/ipfw/ipfw.8 +++ b/sbin/ipfw/ipfw.8 @@ -3300,7 +3300,6 @@ See .Sx SYSCTL VARIABLES for more info. .Sh IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION -.Ss Stateful translation .Nm supports in-kernel IPv6/IPv4 network address and protocol translation. Stateful NAT64 translation allows IPv6-only clients to contact IPv4 servers @@ -3318,8 +3317,7 @@ to be able use stateful NAT64 translator. Stateful NAT64 uses a bunch of memory for several types of objects. When IPv6 client initiates connection, NAT64 translator creates a host entry in the states table. -Each host entry uses preallocated IPv4 alias entry. -Each alias entry has a number of ports group entries allocated on demand. +Each host entry has a number of ports group entries allocated on demand. Ports group entries contains connection state entries. There are several options to control limits and lifetime for these objects. .Pp @@ -3339,11 +3337,6 @@ First time an original packet is handled and consumed by translator, and then it is handled again as translated packet. This behavior can be changed by sysctl variable .Va net.inet.ip.fw.nat64_direct_output . -Also translated packet can be tagged using -.Cm tag -rule action, and then matched by -.Cm tagged -opcode to avoid loops and extra overhead. .Pp The stateful NAT64 configuration command is the following: .Bd -ragged -offset indent @@ -3371,16 +3364,15 @@ to represent IPv4 addresses. This IPv6 prefix should be configured in DNS64. The translator implementation follows RFC6052, that restricts the length of prefixes to one of following: 32, 40, 48, 56, 64, or 96. The Well-Known IPv6 Prefix 64:ff9b:: must be 96 bits long. -The special -.Ar ::/length -prefix can be used to handle several IPv6 prefixes with one NAT64 instance. -The NAT64 instance will determine a destination IPv4 address from prefix -.Ar length . -.It Cm states_chunks Ar number -The number of states chunks in single ports group. -Each ports group by default can keep 64 state entries in single chunk. -The above value affects the maximum number of states that can be associated with single IPv4 alias address and port. -The value must be power of 2, and up to 128. +.It Cm max_ports Ar number +Maximum number of ports reserved for upper level protocols to one IPv6 client. +All reserved ports are divided into chunks between supported protocols. +The number of connections from one IPv6 client is limited by this option. +Note that closed TCP connections still remain in the list of connections until +.Cm tcp_close_age +interval will not expire. +Default value is +.Ar 2048 . .It Cm host_del_age Ar seconds The number of seconds until the host entry for a IPv6 client will be deleted and all its resources will be released due to inactivity. diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h index 2b562734d15..ff6990ae1c0 100644 --- a/sbin/ipfw/ipfw2.h +++ b/sbin/ipfw/ipfw2.h @@ -278,7 +278,6 @@ enum tokens { TOK_AGG_LEN, TOK_AGG_COUNT, TOK_MAX_PORTS, - TOK_STATES_CHUNKS, TOK_JMAXLEN, TOK_PORT_RANGE, TOK_HOST_DEL_AGE, diff --git a/sbin/ipfw/nat64lsn.c b/sbin/ipfw/nat64lsn.c index 4a6d7a7914c..c6a89257281 100644 --- a/sbin/ipfw/nat64lsn.c +++ b/sbin/ipfw/nat64lsn.c @@ -87,70 +87,68 @@ nat64lsn_print_states(void *buf) char sflags[4], *sf, *proto; ipfw_obj_header *oh; ipfw_obj_data *od; - ipfw_nat64lsn_stg_v1 *stg; - ipfw_nat64lsn_state_v1 *ste; + ipfw_nat64lsn_stg *stg; + ipfw_nat64lsn_state *ste; uint64_t next_idx; int i, sz; oh = (ipfw_obj_header *)buf; od = (ipfw_obj_data *)(oh + 1); - stg = (ipfw_nat64lsn_stg_v1 *)(od + 1); + stg = (ipfw_nat64lsn_stg *)(od + 1); sz = od->head.length - sizeof(*od); next_idx = 0; while (sz > 0 && next_idx != 0xFF) { - next_idx = stg->next.index; + next_idx = stg->next_idx; sz -= sizeof(*stg); if (stg->count == 0) { stg++; continue; } - /* - * NOTE: addresses are in network byte order, - * ports are in host byte order. - */ + switch (stg->proto) { + case IPPROTO_TCP: + proto = "TCP"; + break; + case IPPROTO_UDP: + proto = "UDP"; + break; + case IPPROTO_ICMPV6: + proto = "ICMPv6"; + break; + } + inet_ntop(AF_INET6, &stg->host6, s, sizeof(s)); inet_ntop(AF_INET, &stg->alias4, a, sizeof(a)); - ste = (ipfw_nat64lsn_state_v1 *)(stg + 1); + ste = (ipfw_nat64lsn_state *)(stg + 1); for (i = 0; i < stg->count && sz > 0; i++) { sf = sflags; - inet_ntop(AF_INET6, &ste->host6, s, sizeof(s)); inet_ntop(AF_INET, &ste->daddr, f, sizeof(f)); - switch (ste->proto) { - case IPPROTO_TCP: - proto = "TCP"; + if (stg->proto == IPPROTO_TCP) { if (ste->flags & 0x02) *sf++ = 'S'; if (ste->flags & 0x04) *sf++ = 'E'; if (ste->flags & 0x01) *sf++ = 'F'; - break; - case IPPROTO_UDP: - proto = "UDP"; - break; - case IPPROTO_ICMP: - proto = "ICMPv6"; - break; } *sf = '\0'; - switch (ste->proto) { + switch (stg->proto) { case IPPROTO_TCP: case IPPROTO_UDP: printf("%s:%d\t%s:%d\t%s\t%s\t%d\t%s:%d\n", s, ste->sport, a, ste->aport, proto, sflags, ste->idle, f, ste->dport); break; - case IPPROTO_ICMP: + case IPPROTO_ICMPV6: printf("%s\t%s\t%s\t\t%d\t%s\n", s, a, proto, ste->idle, f); break; default: printf("%s\t%s\t%d\t\t%d\t%s\n", - s, a, ste->proto, ste->idle, f); + s, a, stg->proto, ste->idle, f); } ste++; sz -= sizeof(*ste); } - stg = (ipfw_nat64lsn_stg_v1 *)ste; + stg = (ipfw_nat64lsn_stg *)ste; } return (next_idx); } @@ -176,7 +174,6 @@ nat64lsn_states_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set) err(EX_OSERR, NULL); do { oh = (ipfw_obj_header *)buf; - oh->opheader.version = 1; /* Force using ov new API */ od = (ipfw_obj_data *)(oh + 1); nat64lsn_fill_ntlv(&oh->ntlv, cfg->name, set); od->head.type = IPFW_TLV_OBJDATA; @@ -366,8 +363,12 @@ nat64lsn_parse_int(const char *arg, const char *desc) static struct _s_x nat64newcmds[] = { { "prefix6", TOK_PREFIX6 }, + { "agg_len", TOK_AGG_LEN }, /* not yet */ + { "agg_count", TOK_AGG_COUNT }, /* not yet */ + { "port_range", TOK_PORT_RANGE }, /* not yet */ { "jmaxlen", TOK_JMAXLEN }, { "prefix4", TOK_PREFIX4 }, + { "max_ports", TOK_MAX_PORTS }, { "host_del_age", TOK_HOST_DEL_AGE }, { "pg_del_age", TOK_PG_DEL_AGE }, { "tcp_syn_age", TOK_TCP_SYN_AGE }, @@ -375,13 +376,10 @@ static struct _s_x nat64newcmds[] = { { "tcp_est_age", TOK_TCP_EST_AGE }, { "udp_age", TOK_UDP_AGE }, { "icmp_age", TOK_ICMP_AGE }, - { "states_chunks",TOK_STATES_CHUNKS }, { "log", TOK_LOG }, { "-log", TOK_LOGOFF }, { "allow_private", TOK_PRIVATE }, { "-allow_private", TOK_PRIVATEOFF }, - /* for compatibility with old configurations */ - { "max_ports", TOK_MAX_PORTS }, /* unused */ { NULL, 0 } }; @@ -438,10 +436,34 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av) nat64lsn_parse_prefix(*av, AF_INET6, &cfg->prefix6, &cfg->plen6); if (ipfw_check_nat64prefix(&cfg->prefix6, - cfg->plen6) != 0 && - !IN6_IS_ADDR_UNSPECIFIED(&cfg->prefix6)) + cfg->plen6) != 0) errx(EX_USAGE, "Bad prefix6 %s", *av); + ac--; av++; + break; +#if 0 + case TOK_AGG_LEN: + NEED1("Aggregation prefix len required"); + cfg->agg_prefix_len = nat64lsn_parse_int(*av, opt); + ac--; av++; + break; + case TOK_AGG_COUNT: + NEED1("Max per-prefix count required"); + cfg->agg_prefix_max = nat64lsn_parse_int(*av, opt); + ac--; av++; + break; + case TOK_PORT_RANGE: + NEED1("port range x[:y] required"); + if ((p = strchr(*av, ':')) == NULL) + cfg->min_port = (uint16_t)nat64lsn_parse_int( + *av, opt); + else { + *p++ = '\0'; + cfg->min_port = (uint16_t)nat64lsn_parse_int( + *av, opt); + cfg->max_port = (uint16_t)nat64lsn_parse_int( + p, opt); + } ac--; av++; break; case TOK_JMAXLEN: @@ -449,6 +471,7 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av) cfg->jmaxlen = nat64lsn_parse_int(*av, opt); ac--; av++; break; +#endif case TOK_MAX_PORTS: NEED1("Max per-user ports required"); cfg->max_ports = nat64lsn_parse_int(*av, opt); @@ -496,12 +519,6 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av) *av, opt); ac--; av++; break; - case TOK_STATES_CHUNKS: - NEED1("number of chunks required"); - cfg->states_chunks = (uint8_t)nat64lsn_parse_int( - *av, opt); - ac--; av++; - break; case TOK_LOG: cfg->flags |= NAT64_LOG; break; @@ -613,12 +630,6 @@ nat64lsn_config(const char *name, uint8_t set, int ac, char **av) *av, opt); ac--; av++; break; - case TOK_STATES_CHUNKS: - NEED1("number of chunks required"); - cfg->states_chunks = (uint8_t)nat64lsn_parse_int( - *av, opt); - ac--; av++; - break; case TOK_LOG: cfg->flags |= NAT64_LOG; break; @@ -778,24 +789,31 @@ nat64lsn_show_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set) printf("nat64lsn %s prefix4 %s/%u", cfg->name, abuf, cfg->plen4); inet_ntop(AF_INET6, &cfg->prefix6, abuf, sizeof(abuf)); printf(" prefix6 %s/%u", abuf, cfg->plen6); - if (co.verbose || cfg->states_chunks > 1) - printf(" states_chunks %u", cfg->states_chunks); - if (co.verbose || cfg->nh_delete_delay != NAT64LSN_HOST_AGE) +#if 0 + printf("agg_len %u agg_count %u ", cfg->agg_prefix_len, + cfg->agg_prefix_max); + if (cfg->min_port != NAT64LSN_PORT_MIN || + cfg->max_port != NAT64LSN_PORT_MAX) + printf(" port_range %u:%u", cfg->min_port, cfg->max_port); + if (cfg->jmaxlen != NAT64LSN_JMAXLEN) + printf(" jmaxlen %u ", cfg->jmaxlen); +#endif + if (cfg->max_ports != NAT64LSN_MAX_PORTS) + printf(" max_ports %u", cfg->max_ports); + if (cfg->nh_delete_delay != NAT64LSN_HOST_AGE) printf(" host_del_age %u", cfg->nh_delete_delay); - if (co.verbose || cfg->pg_delete_delay != NAT64LSN_PG_AGE) + if (cfg->pg_delete_delay != NAT64LSN_PG_AGE) printf(" pg_del_age %u ", cfg->pg_delete_delay); - if (co.verbose || cfg->st_syn_ttl != NAT64LSN_TCP_SYN_AGE) + if (cfg->st_syn_ttl != NAT64LSN_TCP_SYN_AGE) printf(" tcp_syn_age %u", cfg->st_syn_ttl); - if (co.verbose || cfg->st_close_ttl != NAT64LSN_TCP_FIN_AGE) + if (cfg->st_close_ttl != NAT64LSN_TCP_FIN_AGE) printf(" tcp_close_age %u", cfg->st_close_ttl); - if (co.verbose || cfg->st_estab_ttl != NAT64LSN_TCP_EST_AGE) + if (cfg->st_estab_ttl != NAT64LSN_TCP_EST_AGE) printf(" tcp_est_age %u", cfg->st_estab_ttl); - if (co.verbose || cfg->st_udp_ttl != NAT64LSN_UDP_AGE) + if (cfg->st_udp_ttl != NAT64LSN_UDP_AGE) printf(" udp_age %u", cfg->st_udp_ttl); - if (co.verbose || cfg->st_icmp_ttl != NAT64LSN_ICMP_AGE) + if (cfg->st_icmp_ttl != NAT64LSN_ICMP_AGE) printf(" icmp_age %u", cfg->st_icmp_ttl); - if (co.verbose || cfg->jmaxlen != NAT64LSN_JMAXLEN) - printf(" jmaxlen %u ", cfg->jmaxlen); if (cfg->flags & NAT64_LOG) printf(" log"); if (cfg->flags & NAT64_ALLOW_PRIVATE) diff --git a/sys/conf/files b/sys/conf/files index ed982409534..45968c43852 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4398,9 +4398,9 @@ netpfil/ipfw/nat64/nat64clat.c optional inet inet6 ipfirewall \ netpfil/ipfw/nat64/nat64clat_control.c optional inet inet6 ipfirewall \ ipfirewall_nat64 netpfil/ipfw/nat64/nat64lsn.c optional inet inet6 ipfirewall \ - ipfirewall_nat64 compile-with "${NORMAL_C} -I$S/contrib/ck/include" + ipfirewall_nat64 netpfil/ipfw/nat64/nat64lsn_control.c optional inet inet6 ipfirewall \ - ipfirewall_nat64 compile-with "${NORMAL_C} -I$S/contrib/ck/include" + ipfirewall_nat64 netpfil/ipfw/nat64/nat64stl.c optional inet inet6 ipfirewall \ ipfirewall_nat64 netpfil/ipfw/nat64/nat64stl_control.c optional inet inet6 ipfirewall \ diff --git a/sys/modules/ipfw_nat64/Makefile b/sys/modules/ipfw_nat64/Makefile index 037215a7148..ee2ad7da15a 100644 --- a/sys/modules/ipfw_nat64/Makefile +++ b/sys/modules/ipfw_nat64/Makefile @@ -8,6 +8,4 @@ SRCS+= nat64clat.c nat64clat_control.c SRCS+= nat64lsn.c nat64lsn_control.c SRCS+= nat64stl.c nat64stl_control.c -CFLAGS+= -I${SRCTOP}/sys/contrib/ck/include - .include diff --git a/sys/netinet6/ip_fw_nat64.h b/sys/netinet6/ip_fw_nat64.h index 40e3441132e..47c0a70d167 100644 --- a/sys/netinet6/ip_fw_nat64.h +++ b/sys/netinet6/ip_fw_nat64.h @@ -122,7 +122,7 @@ typedef struct _ipfw_nat64clat_cfg { /* * NAT64LSN default configuration values */ -#define NAT64LSN_MAX_PORTS 2048 /* Unused */ +#define NAT64LSN_MAX_PORTS 2048 /* Max number of ports per host */ #define NAT64LSN_JMAXLEN 2048 /* Max outstanding requests. */ #define NAT64LSN_TCP_SYN_AGE 10 /* State's TTL after SYN received. */ #define NAT64LSN_TCP_EST_AGE (2 * 3600) /* TTL for established connection */ @@ -135,20 +135,16 @@ typedef struct _ipfw_nat64clat_cfg { typedef struct _ipfw_nat64lsn_cfg { char name[64]; /* NAT name */ uint32_t flags; - - uint32_t max_ports; /* Unused */ - uint32_t agg_prefix_len; /* Unused */ - uint32_t agg_prefix_max; /* Unused */ - + uint32_t max_ports; /* Max ports per client */ + uint32_t agg_prefix_len; /* Prefix length to count */ + uint32_t agg_prefix_max; /* Max hosts per agg prefix */ struct in_addr prefix4; uint16_t plen4; /* Prefix length */ uint16_t plen6; /* Prefix length */ struct in6_addr prefix6; /* NAT64 prefix */ uint32_t jmaxlen; /* Max jobqueue length */ - - uint16_t min_port; /* Unused */ - uint16_t max_port; /* Unused */ - + uint16_t min_port; /* Min port group # to use */ + uint16_t max_port; /* Max port group # to use */ uint16_t nh_delete_delay;/* Stale host delete delay */ uint16_t pg_delete_delay;/* Stale portgroup delete delay */ uint16_t st_syn_ttl; /* TCP syn expire */ @@ -157,7 +153,7 @@ typedef struct _ipfw_nat64lsn_cfg { uint16_t st_udp_ttl; /* UDP expire */ uint16_t st_icmp_ttl; /* ICMP expire */ uint8_t set; /* Named instance set [0..31] */ - uint8_t states_chunks; /* Number of states chunks per PG */ + uint8_t spare; } ipfw_nat64lsn_cfg; typedef struct _ipfw_nat64lsn_state { @@ -181,30 +177,5 @@ typedef struct _ipfw_nat64lsn_stg { uint32_t spare2; } ipfw_nat64lsn_stg; -typedef struct _ipfw_nat64lsn_state_v1 { - struct in6_addr host6; /* Bound IPv6 host */ - struct in_addr daddr; /* Remote IPv4 address */ - uint16_t dport; /* Remote destination port */ - uint16_t aport; /* Local alias port */ - uint16_t sport; /* Source port */ - uint16_t spare; - uint16_t idle; /* Last used time */ - uint8_t flags; /* State flags */ - uint8_t proto; /* protocol */ -} ipfw_nat64lsn_state_v1; - -typedef struct _ipfw_nat64lsn_stg_v1 { - union nat64lsn_pgidx { - uint64_t index; - struct { - uint8_t chunk; /* states chunk */ - uint8_t proto; /* protocol */ - uint16_t port; /* base port */ - in_addr_t addr; /* alias address */ - }; - } next; /* next state index */ - struct in_addr alias4; /* IPv4 alias address */ - uint32_t count; /* Number of states */ -} ipfw_nat64lsn_stg_v1; - #endif /* _NETINET6_IP_FW_NAT64_H_ */ + diff --git a/sys/netpfil/ipfw/nat64/nat64lsn.c b/sys/netpfil/ipfw/nat64/nat64lsn.c index acca4c3c459..1ddeaafc7dc 100644 --- a/sys/netpfil/ipfw/nat64/nat64lsn.c +++ b/sys/netpfil/ipfw/nat64/nat64lsn.c @@ -33,17 +33,16 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include #include -#include #include #include #include #include #include #include +#include #include +#include #include #include @@ -72,22 +71,17 @@ __FBSDID("$FreeBSD$"); MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN"); -static epoch_t nat64lsn_epoch; -#define NAT64LSN_EPOCH_ENTER(et) epoch_enter_preempt(nat64lsn_epoch, &(et)) -#define NAT64LSN_EPOCH_EXIT(et) epoch_exit_preempt(nat64lsn_epoch, &(et)) -#define NAT64LSN_EPOCH_WAIT() epoch_wait_preempt(nat64lsn_epoch) -#define NAT64LSN_EPOCH_ASSERT() MPASS(in_epoch(nat64lsn_epoch)) -#define NAT64LSN_EPOCH_CALL(c, f) epoch_call(nat64lsn_epoch, (c), (f)) +static void nat64lsn_periodic(void *data); +#define PERIODIC_DELAY 4 +static uint8_t nat64lsn_proto_map[256]; +uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO]; -static uma_zone_t nat64lsn_host_zone; -static uma_zone_t nat64lsn_pgchunk_zone; -static uma_zone_t nat64lsn_pg_zone; -static uma_zone_t nat64lsn_aliaslink_zone; -static uma_zone_t nat64lsn_state_zone; -static uma_zone_t nat64lsn_job_zone; +#define NAT64_FLAG_FIN 0x01 /* FIN was seen */ +#define NAT64_FLAG_SYN 0x02 /* First syn in->out */ +#define NAT64_FLAG_ESTAB 0x04 /* Packet with Ack */ +#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN) -static void nat64lsn_periodic(void *data); -#define PERIODIC_DELAY 4 +#define NAT64_FLAG_RDR 0x80 /* Port redirect */ #define NAT64_LOOKUP(chain, cmd) \ (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1) /* @@ -97,33 +91,25 @@ static void nat64lsn_periodic(void *data); enum nat64lsn_jtype { JTYPE_NEWHOST = 1, JTYPE_NEWPORTGROUP, - JTYPE_DESTROY, + JTYPE_DELPORTGROUP, }; struct nat64lsn_job_item { - STAILQ_ENTRY(nat64lsn_job_item) entries; + TAILQ_ENTRY(nat64lsn_job_item) next; enum nat64lsn_jtype jtype; - - union { - struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */ - struct mbuf *m; - struct nat64lsn_host *host; - struct nat64lsn_state *state; - uint32_t src6_hval; - uint32_t state_hval; - struct ipfw_flow_id f_id; - in_addr_t faddr; - uint16_t port; - uint8_t proto; - uint8_t done; - }; - struct { /* used by JTYPE_DESTROY */ - struct nat64lsn_hosts_slist hosts; - struct nat64lsn_pg_slist portgroups; - struct nat64lsn_pgchunk *pgchunk; - struct epoch_context epoch_ctx; - }; - }; + struct nat64lsn_host *nh; + struct nat64lsn_portgroup *pg; + void *spare_idx; + struct in6_addr haddr; + uint8_t nat_proto; + uint8_t done; + int needs_idx; + int delcount; + unsigned int fhash; /* Flow hash */ + uint32_t aaddr; /* Last used address (net) */ + struct mbuf *m; + struct ipfw_flow_id f_id; + uint64_t delmask[NAT64LSN_PGPTRNMASK]; }; static struct mtx jmtx; @@ -132,278 +118,143 @@ static struct mtx jmtx; #define JQUEUE_LOCK() mtx_lock(&jmtx) #define JQUEUE_UNLOCK() mtx_unlock(&jmtx) -static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, - struct nat64lsn_job_item *ji); -static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, - struct nat64lsn_job_item *ji); -static struct nat64lsn_job_item *nat64lsn_create_job( - struct nat64lsn_cfg *cfg, int jtype); static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji); -static void nat64lsn_job_destroy(epoch_context_t ctx); -static void nat64lsn_destroy_host(struct nat64lsn_host *host); -static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg); - +static void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg, + struct nat64lsn_job_head *jhead, int jlen); + +static struct nat64lsn_job_item *nat64lsn_create_job(struct nat64lsn_cfg *cfg, + const struct ipfw_flow_id *f_id, int jtype); +static int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg, + const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr, + int needs_idx); +static int nat64lsn_request_host(struct nat64lsn_cfg *cfg, + const struct ipfw_flow_id *f_id, struct mbuf **pm); static int nat64lsn_translate4(struct nat64lsn_cfg *cfg, - const struct ipfw_flow_id *f_id, struct mbuf **mp); + const struct ipfw_flow_id *f_id, struct mbuf **pm); static int nat64lsn_translate6(struct nat64lsn_cfg *cfg, - struct ipfw_flow_id *f_id, struct mbuf **mp); -static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, - struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags); - -#define NAT64_BIT_TCP_FIN 0 /* FIN was seen */ -#define NAT64_BIT_TCP_SYN 1 /* First syn in->out */ -#define NAT64_BIT_TCP_ESTAB 2 /* Packet with Ack */ -#define NAT64_BIT_READY_IPV4 6 /* state is ready for translate4 */ -#define NAT64_BIT_STALE 7 /* state is going to be expired */ - -#define NAT64_FLAG_FIN (1 << NAT64_BIT_TCP_FIN) -#define NAT64_FLAG_SYN (1 << NAT64_BIT_TCP_SYN) -#define NAT64_FLAG_ESTAB (1 << NAT64_BIT_TCP_ESTAB) -#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN) + struct ipfw_flow_id *f_id, struct mbuf **pm); -#define NAT64_FLAG_READY (1 << NAT64_BIT_READY_IPV4) -#define NAT64_FLAG_STALE (1 << NAT64_BIT_STALE) +static int alloc_portgroup(struct nat64lsn_job_item *ji); +static void destroy_portgroup(struct nat64lsn_portgroup *pg); +static void destroy_host6(struct nat64lsn_host *nh); +static int alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji); -static inline uint8_t -convert_tcp_flags(uint8_t flags) -{ - uint8_t result; +static int attach_portgroup(struct nat64lsn_cfg *cfg, + struct nat64lsn_job_item *ji); +static int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji); - result = flags & (TH_FIN|TH_SYN); - result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */ - result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */ - return (result); -} +/* XXX tmp */ +static uma_zone_t nat64lsn_host_zone; +static uma_zone_t nat64lsn_pg_zone; +static uma_zone_t nat64lsn_pgidx_zone; -static void -nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family, - uintptr_t state) -{ +static unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, + struct nat64lsn_host *nh); - memset(plog, 0, sizeof(*plog)); - plog->length = PFLOG_REAL_HDRLEN; - plog->af = family; - plog->action = PF_NAT; - plog->dir = PF_IN; - plog->rulenr = htonl(state >> 32); - plog->subrulenr = htonl(state & 0xffffffff); - plog->ruleset[0] = '\0'; - strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname)); - ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m); -} +#define I6_hash(x) (djb_hash((const unsigned char *)(x), 16)) +#define I6_first(_ph, h) (_ph)[h] +#define I6_next(x) (x)->next +#define I6_val(x) (&(x)->addr) +#define I6_cmp(a, b) IN6_ARE_ADDR_EQUAL(a, b) +#define I6_lock(a, b) +#define I6_unlock(a, b) -#define HVAL(p, n, s) jenkins_hash32((const uint32_t *)(p), (n), (s)) -#define HOST_HVAL(c, a) HVAL((a),\ - sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed) -#define HOSTS(c, v) ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)]) - -#define ALIASLINK_HVAL(c, f) HVAL(&(f)->dst_ip6,\ - sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed) -#define ALIAS_BYHASH(c, v) \ - ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)]) -static struct nat64lsn_aliaslink* -nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused, - struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused) -{ +#define I6HASH_FIND(_cfg, _res, _a) \ + CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a) +#define I6HASH_INSERT(_cfg, _i) \ + CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i) +#define I6HASH_REMOVE(_cfg, _res, _tmp, _a) \ + CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a) - /* - * We can implement some different algorithms how - * select an alias address. - * XXX: for now we use first available. - */ - return (CK_SLIST_FIRST(&host->aliases)); -} +#define I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg) \ + CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg) -#define FADDR_CHUNK(p, a) ((a) & ((p)->chunks_count - 1)) -#define FREEMASK_CHUNK(p, v) \ - ((p)->chunks_count == 1 ? &(p)->freemask : \ - &((p)->freemask_chunk[FADDR_CHUNK(p, v)])) -#define STATES_CHUNK(p, v) \ - ((p)->chunks_count == 1 ? (p)->states : \ - ((p)->states_chunk[FADDR_CHUNK(p, v)])) -#define STATE_HVAL(c, d) HVAL((d), 2, (c)->hash_seed) -#define STATE_HASH(h, v) \ - ((h)->states_hash[(v) & ((h)->states_hashsize - 1)]) - -#define NAT64LSN_TRY_PGCNT 32 -static struct nat64lsn_pg* -nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask, - struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr, - uint32_t *pgidx, in_addr_t faddr) -{ - struct nat64lsn_pg *pg, *oldpg; - uint32_t idx, oldidx; - int cnt; - - cnt = 0; - /* First try last used PG */ - oldpg = pg = ck_pr_load_ptr(pgptr); - idx = oldidx = ck_pr_load_32(pgidx); - /* If pgidx is out of range, reset it to the first pgchunk */ - if (!ISSET32(*chunkmask, idx / 32)) - idx = 0; - do { - ck_pr_fence_load(); - if (pg != NULL && - bitcount64(*FREEMASK_CHUNK(pg, faddr)) > 0) { - /* - * If last used PG has not free states, - * try to update pointer. - * NOTE: it can be already updated by jobs handler, - * thus we use CAS operation. - */ - if (cnt > 0) - ck_pr_cas_ptr(pgptr, oldpg, pg); - return (pg); - } - /* Stop if idx is out of range */ - if (!ISSET32(*chunkmask, idx / 32)) - break; +#define HASH_IN4(x) djb_hash((const unsigned char *)(x), 8) - if (ISSET32(pgmask[idx / 32], idx % 32)) - pg = ck_pr_load_ptr( - &chunks[idx / 32]->pgptr[idx % 32]); - else - pg = NULL; +static unsigned +djb_hash(const unsigned char *h, const int len) +{ + unsigned int result = 0; + int i; - idx++; - } while (++cnt < NAT64LSN_TRY_PGCNT); + for (i = 0; i < len; i++) + result = 33 * result ^ h[i]; - /* If pgidx is out of range, reset it to the first pgchunk */ - if (!ISSET32(*chunkmask, idx / 32)) - idx = 0; - ck_pr_cas_32(pgidx, oldidx, idx); - return (NULL); + return (result); } -static struct nat64lsn_state* -nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host, - const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr, - uint16_t port, uint8_t proto) +/* +static size_t +bitmask_size(size_t num, int *level) { - struct nat64lsn_aliaslink *link; - struct nat64lsn_state *state; - struct nat64lsn_pg *pg; - int i, offset; - - NAT64LSN_EPOCH_ASSERT(); - - /* Check that we already have state for given arguments */ - CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) { - if (state->proto == proto && state->ip_dst == faddr && - state->sport == port && state->dport == f_id->dst_port) - return (state); - } + size_t x; + int c; - link = nat64lsn_get_aliaslink(cfg, host, f_id); - if (link == NULL) - return (NULL); + for (c = 0, x = num; num > 1; num /= 64, c++) + ; - switch (proto) { - case IPPROTO_TCP: - pg = nat64lsn_get_pg( - &link->alias->tcp_chunkmask, link->alias->tcp_pgmask, - link->alias->tcp, &link->alias->tcp_pg, - &link->alias->tcp_pgidx, faddr); - break; - case IPPROTO_UDP: - pg = nat64lsn_get_pg( - &link->alias->udp_chunkmask, link->alias->udp_pgmask, - link->alias->udp, &link->alias->udp_pg, - &link->alias->udp_pgidx, faddr); - break; - case IPPROTO_ICMP: - pg = nat64lsn_get_pg( - &link->alias->icmp_chunkmask, link->alias->icmp_pgmask, - link->alias->icmp, &link->alias->icmp_pg, - &link->alias->icmp_pgidx, faddr); - break; - default: - panic("%s: wrong proto %d", __func__, proto); - } - if (pg == NULL) - return (NULL); + return (x); +} - /* Check that PG has some free states */ - state = NULL; - i = bitcount64(*FREEMASK_CHUNK(pg, faddr)); - while (i-- > 0) { - offset = ffsll(*FREEMASK_CHUNK(pg, faddr)); - if (offset == 0) { - /* - * We lost the race. - * No more free states in this PG. - */ - break; - } +static void +bitmask_prepare(uint64_t *pmask, size_t bufsize, int level) +{ + size_t x, z; - /* Lets try to atomically grab the state */ - if (ck_pr_btr_64(FREEMASK_CHUNK(pg, faddr), offset - 1)) { - state = &STATES_CHUNK(pg, faddr)->state[offset - 1]; - /* Initialize */ - state->flags = proto != IPPROTO_TCP ? 0 : - convert_tcp_flags(f_id->_flags); - state->proto = proto; - state->aport = pg->base_port + offset - 1; - state->dport = f_id->dst_port; - state->sport = port; - state->ip6_dst = f_id->dst_ip6; - state->ip_dst = faddr; - state->ip_src = link->alias->addr; - state->hval = hval; - state->host = host; - SET_AGE(state->timestamp); - - /* Insert new state into host's hash table */ - HOST_LOCK(host); - CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval), - state, entries); - host->states_count++; - /* - * XXX: In case if host is going to be expired, - * reset NAT64LSN_DEADHOST flag. - */ - host->flags &= ~NAT64LSN_DEADHOST; - HOST_UNLOCK(host); - NAT64STAT_INC(&cfg->base.stats, screated); - /* Mark the state as ready for translate4 */ - ck_pr_fence_store(); - ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4); - break; - } - } - return (state); + memset(pmask, 0xFF, bufsize); + for (x = 0, z = 1; level > 1; x += z, z *= 64, level--) + ; + pmask[x] ~= 0x01; } +*/ + +static void +nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family, + uint32_t n, uint32_t sn) +{ + memset(plog, 0, sizeof(*plog)); + plog->length = PFLOG_REAL_HDRLEN; + plog->af = family; + plog->action = PF_NAT; + plog->dir = PF_IN; + plog->rulenr = htonl(n); + plog->subrulenr = htonl(sn); + plog->ruleset[0] = '\0'; + strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname)); + ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m); +} /* * Inspects icmp packets to see if the message contains different * packet header so we need to alter @addr and @port. */ static int -inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr, +inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr, uint16_t *port) { - struct icmp *icmp; struct ip *ip; + struct tcphdr *tcp; + struct udphdr *udp; + struct icmphdr *icmp; int off; - uint8_t inner_proto; + uint8_t proto; - ip = mtod(*mp, struct ip *); /* Outer IP header */ + ip = mtod(*m, struct ip *); /* Outer IP header */ off = (ip->ip_hl << 2) + ICMP_MINLEN; - if ((*mp)->m_len < off) - *mp = m_pullup(*mp, off); - if (*mp == NULL) + if ((*m)->m_len < off) + *m = m_pullup(*m, off); + if (*m == NULL) return (ENOMEM); - ip = mtod(*mp, struct ip *); /* Outer IP header */ - icmp = L3HDR(ip, struct icmp *); + ip = mtod(*m, struct ip *); /* Outer IP header */ + icmp = L3HDR(ip, struct icmphdr *); switch (icmp->icmp_type) { case ICMP_ECHO: case ICMP_ECHOREPLY: /* Use icmp ID as distinguisher */ - *port = ntohs(icmp->icmp_id); + *port = ntohs(*((uint16_t *)(icmp + 1))); return (0); case ICMP_UNREACH: case ICMP_TIMXCEED: @@ -415,133 +266,90 @@ inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr, * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits * of ULP header. */ - if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN) + if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN) return (EINVAL); - if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN) - *mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN); - if (*mp == NULL) + if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN) + *m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN); + if (*m == NULL) return (ENOMEM); - ip = mtodo(*mp, off); /* Inner IP header */ - inner_proto = ip->ip_p; + ip = mtodo(*m, off); /* Inner IP header */ + proto = ip->ip_p; off += ip->ip_hl << 2; /* Skip inner IP header */ *addr = ntohl(ip->ip_src.s_addr); - if ((*mp)->m_len < off + ICMP_MINLEN) - *mp = m_pullup(*mp, off + ICMP_MINLEN); - if (*mp == NULL) + if ((*m)->m_len < off + ICMP_MINLEN) + *m = m_pullup(*m, off + ICMP_MINLEN); + if (*m == NULL) return (ENOMEM); - switch (inner_proto) { + switch (proto) { case IPPROTO_TCP: + tcp = mtodo(*m, off); + *nat_proto = NAT_PROTO_TCP; + *port = ntohs(tcp->th_sport); + return (0); case IPPROTO_UDP: - /* Copy source port from the header */ - *port = ntohs(*((uint16_t *)mtodo(*mp, off))); - *proto = inner_proto; + udp = mtodo(*m, off); + *nat_proto = NAT_PROTO_UDP; + *port = ntohs(udp->uh_sport); return (0); case IPPROTO_ICMP: /* * We will translate only ICMP errors for our ICMP * echo requests. */ - icmp = mtodo(*mp, off); + icmp = mtodo(*m, off); if (icmp->icmp_type != ICMP_ECHO) return (EOPNOTSUPP); - *port = ntohs(icmp->icmp_id); + *port = ntohs(*((uint16_t *)(icmp + 1))); return (0); }; return (EOPNOTSUPP); } -static struct nat64lsn_state* -nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias, - in_addr_t faddr, uint16_t port, uint8_t proto) +static inline uint8_t +convert_tcp_flags(uint8_t flags) { - struct nat64lsn_state *state; - struct nat64lsn_pg *pg; - int chunk_idx, pg_idx, state_idx; - - NAT64LSN_EPOCH_ASSERT(); - - if (port < NAT64_MIN_PORT) - return (NULL); - /* - * Alias keeps 32 pgchunks for each protocol. - * Each pgchunk has 32 pointers to portgroup. - * Each portgroup has 64 states for ports. - */ - port -= NAT64_MIN_PORT; - chunk_idx = port / 2048; - - port -= chunk_idx * 2048; - pg_idx = port / 64; - state_idx = port % 64; - - /* - * First check in proto_chunkmask that we have allocated PG chunk. - * Then check in proto_pgmask that we have valid PG pointer. - */ - pg = NULL; - switch (proto) { - case IPPROTO_TCP: - if (ISSET32(alias->tcp_chunkmask, chunk_idx) && - ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) { - pg = alias->tcp[chunk_idx]->pgptr[pg_idx]; - break; - } - return (NULL); - case IPPROTO_UDP: - if (ISSET32(alias->udp_chunkmask, chunk_idx) && - ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) { - pg = alias->udp[chunk_idx]->pgptr[pg_idx]; - break; - } - return (NULL); - case IPPROTO_ICMP: - if (ISSET32(alias->icmp_chunkmask, chunk_idx) && - ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) { - pg = alias->icmp[chunk_idx]->pgptr[pg_idx]; - break; - } - return (NULL); - default: - panic("%s: wrong proto %d", __func__, proto); - } - if (pg == NULL) - return (NULL); + uint8_t result; - if (ISSET64(*FREEMASK_CHUNK(pg, faddr), state_idx)) - return (NULL); + result = flags & (TH_FIN|TH_SYN); + result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */ + result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */ - state = &STATES_CHUNK(pg, faddr)->state[state_idx]; - ck_pr_fence_load(); - if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY) - return (state); - return (NULL); + return (result); } -static int -nat64lsn_translate4(struct nat64lsn_cfg *cfg, - const struct ipfw_flow_id *f_id, struct mbuf **mp) +static NAT64NOINLINE int +nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id, + struct mbuf **pm) { struct pfloghdr loghdr, *logdata; struct in6_addr src6; - struct nat64lsn_state *state; - struct nat64lsn_alias *alias; - uint32_t addr, flags; - uint16_t port, ts; + struct nat64lsn_portgroup *pg; + struct nat64lsn_host *nh; + struct nat64lsn_state *st; + struct ip *ip; + uint32_t addr; + uint16_t state_flags, state_ts; + uint16_t port, lport; + uint8_t nat_proto; int ret; - uint8_t proto; addr = f_id->dst_ip; port = f_id->dst_port; - proto = f_id->proto; if (addr < cfg->prefix4 || addr > cfg->pmask4) { NAT64STAT_INC(&cfg->base.stats, nomatch4); return (cfg->nomatch_verdict); } - /* Check if protocol is supported */ - switch (proto) { - case IPPROTO_ICMP: - ret = inspect_icmp_mbuf(mp, &proto, &addr, &port); + /* Check if protocol is supported and get its short id */ + nat_proto = nat64lsn_proto_map[f_id->proto]; + if (nat_proto == 0) { + NAT64STAT_INC(&cfg->base.stats, noproto); + return (cfg->nomatch_verdict); + } + + /* We might need to handle icmp differently */ + if (nat_proto == NAT_PROTO_ICMP) { + ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port); if (ret != 0) { if (ret == ENOMEM) { NAT64STAT_INC(&cfg->base.stats, nomem); @@ -550,640 +358,804 @@ nat64lsn_translate4(struct nat64lsn_cfg *cfg, NAT64STAT_INC(&cfg->base.stats, noproto); return (cfg->nomatch_verdict); } + /* XXX: Check addr for validity */ if (addr < cfg->prefix4 || addr > cfg->pmask4) { NAT64STAT_INC(&cfg->base.stats, nomatch4); return (cfg->nomatch_verdict); } - /* FALLTHROUGH */ - case IPPROTO_TCP: - case IPPROTO_UDP: - break; - default: - NAT64STAT_INC(&cfg->base.stats, noproto); - return (cfg->nomatch_verdict); } - alias = &ALIAS_BYHASH(cfg, addr); - MPASS(addr == alias->addr); + /* Calc portgroup offset w.r.t protocol */ + pg = GET_PORTGROUP(cfg, addr, nat_proto, port); - /* Check that we have state for this port */ - state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip, - port, proto); - if (state == NULL) { + /* Check if this port is occupied by any portgroup */ + if (pg == NULL) { NAT64STAT_INC(&cfg->base.stats, nomatch4); +#if 0 + DPRINTF(DP_STATE, "NOMATCH %u %d %d (%d)", addr, nat_proto, port, + _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port)); +#endif return (cfg->nomatch_verdict); } /* TODO: Check flags to see if we need to do some static mapping */ - - /* Update some state fields if need */ - SET_AGE(ts); - if (f_id->proto == IPPROTO_TCP) - flags = convert_tcp_flags(f_id->_flags); + nh = pg->host; + + /* Prepare some fields we might need to update */ + SET_AGE(state_ts); + ip = mtod(*pm, struct ip *); + if (ip->ip_p == IPPROTO_TCP) + state_flags = convert_tcp_flags( + L3HDR(ip, struct tcphdr *)->th_flags); else - flags = 0; - if (state->timestamp != ts) - state->timestamp = ts; - if ((state->flags & flags) != flags) - state->flags |= flags; + state_flags = 0; + + /* Lock host and get port mapping */ + NAT64_LOCK(nh); + + st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)]; + if (st->timestamp != state_ts) + st->timestamp = state_ts; + if ((st->flags & state_flags) != state_flags) + st->flags |= state_flags; + lport = htons(st->u.s.lport); - port = htons(state->sport); - src6 = state->ip6_dst; + NAT64_UNLOCK(nh); if (cfg->base.flags & NAT64_LOG) { logdata = &loghdr; - nat64lsn_log(logdata, *mp, AF_INET, (uintptr_t)state); + nat64lsn_log(logdata, *pm, AF_INET, pg->idx, st->cur.off); } else logdata = NULL; - /* - * We already have src6 with embedded address, but it is possible, - * that src_ip is different than state->ip_dst, this is why we - * do embedding again. - */ nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip)); - ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port, + ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport, &cfg->base, logdata); + if (ret == NAT64SKIP) return (cfg->nomatch_verdict); - if (ret == NAT64RETURN) - *mp = NULL; + if (ret == NAT64MFREE) + m_freem(*pm); + *pm = NULL; + return (IP_FW_DENY); } +void +nat64lsn_dump_state(const struct nat64lsn_cfg *cfg, + const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st, + const char *px, int off) +{ + char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN]; + + if ((V_nat64_debug & DP_STATE) == 0) + return; + inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s)); + inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a)); + inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d)); + + DPRINTF(DP_STATE, "%s: PG %d ST [%p|%d]: %s:%d/%d <%s:%d> " + "%s:%d AGE %d", px, pg->idx, st, off, + s, st->u.s.lport, pg->nat_proto, a, pg->aport + off, + d, st->u.s.fport, GET_AGE(st->timestamp)); +} + /* - * Check if particular state is stale and should be deleted. + * Check if particular TCP state is stale and should be deleted. * Return 1 if true, 0 otherwise. */ static int -nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state) +nat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg, + const struct nat64lsn_state *st, int age) { - int age, ttl; + int ttl; + + if (st->flags & NAT64_FLAG_FIN) + ttl = cfg->st_close_ttl; + else if (st->flags & NAT64_FLAG_ESTAB) + ttl = cfg->st_estab_ttl; + else if (st->flags & NAT64_FLAG_SYN) + ttl = cfg->st_syn_ttl; + else + ttl = cfg->st_syn_ttl; - /* State was marked as stale in previous pass. */ - if (ISSET32(state->flags, NAT64_BIT_STALE)) + if (age > ttl) return (1); + return (0); +} + +/* + * Check if nat state @st is stale and should be deleted. + * Return 1 if true, 0 otherwise. + */ +static NAT64NOINLINE int +nat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg, + const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st) +{ + int age, delete; - /* State is not yet initialized, it is going to be READY */ - if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4)) + age = GET_AGE(st->timestamp); + delete = 0; + + /* Skip immutable records */ + if (st->flags & NAT64_FLAG_RDR) return (0); - age = GET_AGE(state->timestamp); - switch (state->proto) { - case IPPROTO_TCP: - if (ISSET32(state->flags, NAT64_BIT_TCP_FIN)) - ttl = cfg->st_close_ttl; - else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB)) - ttl = cfg->st_estab_ttl; - else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN)) - ttl = cfg->st_syn_ttl; - else - ttl = cfg->st_syn_ttl; - if (age > ttl) - return (1); - break; - case IPPROTO_UDP: - if (age > cfg->st_udp_ttl) - return (1); - break; - case IPPROTO_ICMP: - if (age > cfg->st_icmp_ttl) - return (1); - break; + switch (pg->nat_proto) { + case NAT_PROTO_TCP: + delete = nat64lsn_periodic_check_tcp(cfg, st, age); + break; + case NAT_PROTO_UDP: + if (age > cfg->st_udp_ttl) + delete = 1; + break; + case NAT_PROTO_ICMP: + if (age > cfg->st_icmp_ttl) + delete = 1; + break; } - return (0); + + return (delete); } -static int -nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg) + +/* + * The following structures and functions + * are used to perform SLIST_FOREACH_SAFE() + * analog for states identified by struct st_ptr. + */ + +struct st_idx { + struct nat64lsn_portgroup *pg; + struct nat64lsn_state *st; + struct st_ptr sidx_next; +}; + +static struct st_idx * +st_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh, + struct st_ptr *sidx, struct st_idx *si) { - struct nat64lsn_state *state; - struct nat64lsn_host *host; - uint64_t freemask; - int c, i, update_age; - - update_age = 0; - for (c = 0; c < pg->chunks_count; c++) { - freemask = ck_pr_load_64(FREEMASK_CHUNK(pg, c)); - for (i = 0; i < 64; i++) { - if (ISSET64(freemask, i)) - continue; - state = &STATES_CHUNK(pg, c)->state[i]; - if (nat64lsn_check_state(cfg, state) == 0) { - update_age = 1; - continue; - } - /* - * Expire state: - * 1. Mark as STALE and unlink from host's hash. - * 2. Set bit in freemask. - */ - if (ISSET32(state->flags, NAT64_BIT_STALE)) { - /* - * State was marked as STALE in previous - * pass. Now it is safe to release it. - */ - state->flags = 0; - ck_pr_fence_store(); - ck_pr_bts_64(FREEMASK_CHUNK(pg, c), i); - NAT64STAT_INC(&cfg->base.stats, sdeleted); - continue; - } - MPASS(state->flags & NAT64_FLAG_READY); - - host = state->host; - HOST_LOCK(host); - CK_SLIST_REMOVE(&STATE_HASH(host, state->hval), - state, nat64lsn_state, entries); - host->states_count--; - HOST_UNLOCK(host); - - /* Reset READY flag */ - ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4); - /* And set STALE flag */ - ck_pr_bts_32(&state->flags, NAT64_BIT_STALE); - ck_pr_fence_store(); - /* - * Now translate6 will not use this state, wait - * until it become safe for translate4, then mark - * state as free. - */ - } + struct nat64lsn_portgroup *pg; + struct nat64lsn_state *st; + + if (sidx->idx == 0) { + memset(si, 0, sizeof(*si)); + return (si); } - /* - * We have some alive states, update timestamp. - */ - if (update_age) - SET_AGE(pg->timestamp); + pg = PORTGROUP_BYSIDX(cfg, nh, sidx->idx); + st = &pg->states[sidx->off]; - if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay) - return (0); + si->pg = pg; + si->st = st; + si->sidx_next = st->next; - return (1); + return (si); } -static void -nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg, - struct nat64lsn_pg_slist *portgroups) +static struct st_idx * +st_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh, + struct st_idx *si) { - struct nat64lsn_alias *alias; - struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr; - uint32_t *pgmask, *pgidx; - int i, idx; - - for (i = 0; i < 1 << (32 - cfg->plen4); i++) { - alias = &cfg->aliases[i]; - CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) { - if (nat64lsn_maintain_pg(cfg, pg) == 0) - continue; - /* Always keep first PG */ - if (pg->base_port == NAT64_MIN_PORT) - continue; - /* - * PG is expired, unlink it and schedule for - * deferred destroying. - */ - idx = (pg->base_port - NAT64_MIN_PORT) / 64; - switch (pg->proto) { - case IPPROTO_TCP: - pgmask = alias->tcp_pgmask; - pgptr = &alias->tcp_pg; - pgidx = &alias->tcp_pgidx; - firstpg = alias->tcp[0]->pgptr[0]; - break; - case IPPROTO_UDP: - pgmask = alias->udp_pgmask; - pgptr = &alias->udp_pg; - pgidx = &alias->udp_pgidx; - firstpg = alias->udp[0]->pgptr[0]; - break; - case IPPROTO_ICMP: - pgmask = alias->icmp_pgmask; - pgptr = &alias->icmp_pg; - pgidx = &alias->icmp_pgidx; - firstpg = alias->icmp[0]->pgptr[0]; - break; - } - /* Reset the corresponding bit in pgmask array. */ - ck_pr_btr_32(&pgmask[idx / 32], idx % 32); - ck_pr_fence_store(); - /* If last used PG points to this PG, reset it. */ - ck_pr_cas_ptr(pgptr, pg, firstpg); - ck_pr_cas_32(pgidx, idx, 0); - /* Unlink PG from alias's chain */ - ALIAS_LOCK(alias); - CK_SLIST_REMOVE(&alias->portgroups, pg, - nat64lsn_pg, entries); - alias->portgroups_count--; - ALIAS_UNLOCK(alias); - /* And link to job's chain for deferred destroying */ - NAT64STAT_INC(&cfg->base.stats, spgdeleted); - CK_SLIST_INSERT_HEAD(portgroups, pg, entries); - } + struct st_ptr sidx; + struct nat64lsn_portgroup *pg; + struct nat64lsn_state *st; + + sidx = si->sidx_next; + if (sidx.idx == 0) { + memset(si, 0, sizeof(*si)); + si->st = NULL; + si->pg = NULL; + return (si); } + + pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); + st = &pg->states[sidx.off]; + + si->pg = pg; + si->st = st; + si->sidx_next = st->next; + + return (si); } -static void -nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg, - struct nat64lsn_hosts_slist *hosts) +static struct st_idx * +st_save_cond(struct st_idx *si_dst, struct st_idx *si) { - struct nat64lsn_host *host, *tmp; - int i; + if (si->st != NULL) + *si_dst = *si; - for (i = 0; i < cfg->hosts_hashsize; i++) { - CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i], - entries, tmp) { - /* Is host was marked in previous call? */ - if (host->flags & NAT64LSN_DEADHOST) { - if (host->states_count > 0) { - host->flags &= ~NAT64LSN_DEADHOST; - continue; - } - /* - * Unlink host from hash table and schedule - * it for deferred destroying. - */ - CFG_LOCK(cfg); - CK_SLIST_REMOVE(&cfg->hosts_hash[i], host, - nat64lsn_host, entries); - cfg->hosts_count--; - CFG_UNLOCK(cfg); - CK_SLIST_INSERT_HEAD(hosts, host, entries); - continue; - } - if (GET_AGE(host->timestamp) < cfg->host_delete_delay) - continue; - if (host->states_count > 0) - continue; - /* Mark host as going to be expired in next pass */ - host->flags |= NAT64LSN_DEADHOST; - ck_pr_fence_store(); - } - } + return (si_dst); } -static struct nat64lsn_pgchunk* -nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg) +unsigned int +nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh) { -#if 0 - struct nat64lsn_alias *alias; - struct nat64lsn_pgchunk *chunk; - uint32_t pgmask; - int i, c; - - for (i = 0; i < 1 << (32 - cfg->plen4); i++) { - alias = &cfg->aliases[i]; - if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay) - continue; - /* Always keep single chunk allocated */ - for (c = 1; c < 32; c++) { - if ((alias->tcp_chunkmask & (1 << c)) == 0) - break; - chunk = ck_pr_load_ptr(&alias->tcp[c]); - if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0) - continue; - ck_pr_btr_32(&alias->tcp_chunkmask, c); - ck_pr_fence_load(); - if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0) + struct st_idx si, si_prev; + int i; + unsigned int delcount; + + delcount = 0; + for (i = 0; i < nh->hsize; i++) { + memset(&si_prev, 0, sizeof(si_prev)); + for (st_first(cfg, nh, &nh->phash[i], &si); + si.st != NULL; + st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) { + if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0) continue; + nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE", + si.st->cur.off); + /* Unlink from hash */ + if (si_prev.st != NULL) + si_prev.st->next = si.st->next; + else + nh->phash[i] = si.st->next; + /* Delete state and free its data */ + PG_MARK_FREE_IDX(si.pg, si.st->cur.off); + memset(si.st, 0, sizeof(struct nat64lsn_state)); + si.st = NULL; + delcount++; + + /* Update portgroup timestamp */ + SET_AGE(si.pg->timestamp); } } -#endif - return (NULL); + NAT64STAT_ADD(&cfg->base.stats, sdeleted, delcount); + return (delcount); } -#if 0 -static void -nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg) +/* + * Checks if portgroup is not used and can be deleted, + * Returns 1 if stale, 0 otherwise + */ +static int +stale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg) +{ + + if (!PG_IS_EMPTY(pg)) + return (0); + if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay) + return (0); + return (1); +} + +/* + * Checks if host record is not used and can be deleted, + * Returns 1 if stale, 0 otherwise + */ +static int +stale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh) +{ + + if (nh->pg_used != 0) + return (0); + if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay) + return (0); + return (1); +} + +struct nat64lsn_periodic_data { + struct nat64lsn_cfg *cfg; + struct nat64lsn_job_head jhead; + int jlen; +}; + +static NAT64NOINLINE int +nat64lsn_periodic_chkhost(struct nat64lsn_host *nh, + struct nat64lsn_periodic_data *d) { - struct nat64lsn_host *h; - struct nat64lsn_states_slist *hash; - int i, j, hsize; - - for (i = 0; i < cfg->hosts_hashsize; i++) { - CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) { - if (h->states_count / 2 < h->states_hashsize || - h->states_hashsize >= NAT64LSN_MAX_HSIZE) - continue; - hsize = h->states_hashsize * 2; - hash = malloc(sizeof(*hash)* hsize, M_NOWAIT); - if (hash == NULL) - continue; - for (j = 0; j < hsize; j++) - CK_SLIST_INIT(&hash[i]); - - ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH); + struct nat64lsn_portgroup *pg; + struct nat64lsn_job_item *ji; + uint64_t delmask[NAT64LSN_PGPTRNMASK]; + int delcount, i; + + delcount = 0; + memset(delmask, 0, sizeof(delmask)); + + if (V_nat64_debug & DP_JQUEUE) { + char a[INET6_ADDRSTRLEN]; + + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_JQUEUE, "Checking %s host %s on cpu %d", + stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu); + } + if (!stale_nh(d->cfg, nh)) { + /* Non-stale host. Inspect internals */ + NAT64_LOCK(nh); + + /* Stage 1: Check&expire states */ + if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0) + SET_AGE(nh->timestamp); + + /* Stage 2: Check if we need to expire */ + for (i = 0; i < nh->pg_used; i++) { + pg = PORTGROUP_BYSIDX(d->cfg, nh, i + 1); + if (pg == NULL) + continue; + + /* Check if we can delete portgroup */ + if (stale_pg(d->cfg, pg) == 0) + continue; + + DPRINTF(DP_JQUEUE, "Check PG %d", i); + delmask[i / 64] |= ((uint64_t)1 << (i % 64)); + delcount++; } + + NAT64_UNLOCK(nh); + if (delcount == 0) + return (0); } + + DPRINTF(DP_JQUEUE, "Queueing %d portgroups for deleting", delcount); + /* We have something to delete - add it to queue */ + ji = nat64lsn_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP); + if (ji == NULL) + return (0); + + ji->haddr = nh->addr; + ji->delcount = delcount; + memcpy(ji->delmask, delmask, sizeof(ji->delmask)); + + TAILQ_INSERT_TAIL(&d->jhead, ji, next); + d->jlen++; + return (0); } -#endif /* * This procedure is used to perform various maintance - * on dynamic hash list. Currently it is called every 4 seconds. + * on dynamic hash list. Currently it is called every second. */ static void nat64lsn_periodic(void *data) { - struct nat64lsn_job_item *ji; + struct ip_fw_chain *ch; + IPFW_RLOCK_TRACKER; struct nat64lsn_cfg *cfg; + struct nat64lsn_periodic_data d; + struct nat64lsn_host *nh, *tmp; cfg = (struct nat64lsn_cfg *) data; + ch = cfg->ch; CURVNET_SET(cfg->vp); - if (cfg->hosts_count > 0) { - ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT); - if (ji != NULL) { - ji->jtype = JTYPE_DESTROY; - CK_SLIST_INIT(&ji->hosts); - CK_SLIST_INIT(&ji->portgroups); - nat64lsn_expire_hosts(cfg, &ji->hosts); - nat64lsn_expire_portgroups(cfg, &ji->portgroups); - ji->pgchunk = nat64lsn_expire_pgchunk(cfg); - NAT64LSN_EPOCH_CALL(&ji->epoch_ctx, - nat64lsn_job_destroy); - } else - NAT64STAT_INC(&cfg->base.stats, jnomem); - } + + memset(&d, 0, sizeof(d)); + d.cfg = cfg; + TAILQ_INIT(&d.jhead); + + IPFW_RLOCK(ch); + + /* Stage 1: foreach host, check all its portgroups */ + I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d); + + /* Enqueue everything we have requested */ + nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen); + callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY); + + IPFW_RUNLOCK(ch); + CURVNET_RESTORE(); } -#define ALLOC_ERROR(stage, type) ((stage) ? 10 * (type) + (stage): 0) -#define HOST_ERROR(stage) ALLOC_ERROR(stage, 1) -#define PG_ERROR(stage) ALLOC_ERROR(stage, 2) -static int -nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) +static NAT64NOINLINE void +reinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) +{ + + if (ji->m == NULL) + return; + + /* Request has failed or packet type is wrong */ + if (ji->f_id.addr_type != 6 || ji->done == 0) { + m_freem(ji->m); + ji->m = NULL; + NAT64STAT_INC(&cfg->base.stats, dropped); + DPRINTF(DP_DROPS, "mbuf dropped: type %d, done %d", + ji->jtype, ji->done); + return; + } + + /* + * XXX: Limit recursion level + */ + + NAT64STAT_INC(&cfg->base.stats, jreinjected); + DPRINTF(DP_JQUEUE, "Reinject mbuf"); + nat64lsn_translate6(cfg, &ji->f_id, &ji->m); +} + +static void +destroy_portgroup(struct nat64lsn_portgroup *pg) +{ + + DPRINTF(DP_OBJ, "DESTROY PORTGROUP %d %p", pg->idx, pg); + uma_zfree(nat64lsn_pg_zone, pg); +} + +static NAT64NOINLINE int +alloc_portgroup(struct nat64lsn_job_item *ji) +{ + struct nat64lsn_portgroup *pg; + + pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT); + if (pg == NULL) + return (1); + + if (ji->needs_idx != 0) { + ji->spare_idx = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT); + /* Failed alloc isn't always fatal, so don't check */ + } + memset(&pg->freemask, 0xFF, sizeof(pg->freemask)); + pg->nat_proto = ji->nat_proto; + ji->pg = pg; + return (0); + +} + +static void +destroy_host6(struct nat64lsn_host *nh) { char a[INET6_ADDRSTRLEN]; - struct nat64lsn_aliaslink *link; - struct nat64lsn_host *host; - struct nat64lsn_state *state; - uint32_t hval, data[2]; int i; - /* Check that host was not yet added. */ - NAT64LSN_EPOCH_ASSERT(); - CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) { - if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) { - /* The host was allocated in previous call. */ - ji->host = host; - goto get_state; - } + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_OBJ, "DESTROY HOST %s %p (pg used %d)", a, nh, + nh->pg_used); + NAT64_LOCK_DESTROY(nh); + for (i = 0; i < nh->pg_allocated / NAT64LSN_PGIDX_CHUNK; i++) + uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, i)); + uma_zfree(nat64lsn_host_zone, nh); +} + +static NAT64NOINLINE int +alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) +{ + struct nat64lsn_host *nh; + char a[INET6_ADDRSTRLEN]; + + nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT); + if (nh == NULL) + return (1); + PORTGROUP_CHUNK(nh, 0) = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT); + if (PORTGROUP_CHUNK(nh, 0) == NULL) { + uma_zfree(nat64lsn_host_zone, nh); + return (2); + } + if (alloc_portgroup(ji) != 0) { + NAT64STAT_INC(&cfg->base.stats, jportfails); + uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, 0)); + uma_zfree(nat64lsn_host_zone, nh); + return (3); } - host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT); - if (ji->host == NULL) - return (HOST_ERROR(1)); + NAT64_LOCK_INIT(nh); + nh->addr = ji->haddr; + nh->hsize = NAT64LSN_HSIZE; /* XXX: hardcoded size */ + nh->pg_allocated = NAT64LSN_PGIDX_CHUNK; + nh->pg_used = 0; + ji->nh = nh; - host->states_hashsize = NAT64LSN_HSIZE; - host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) * - host->states_hashsize, M_NAT64LSN, M_NOWAIT); - if (host->states_hash == NULL) { - uma_zfree(nat64lsn_host_zone, host); - return (HOST_ERROR(2)); + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_OBJ, "ALLOC HOST %s %p", a, ji->nh); + return (0); +} + +/* + * Finds free @pg index inside @nh + */ +static NAT64NOINLINE int +find_nh_pg_idx(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, int *idx) +{ + int i; + + for (i = 0; i < nh->pg_allocated; i++) { + if (PORTGROUP_BYSIDX(cfg, nh, i + 1) == NULL) { + *idx = i; + return (0); + } } + return (1); +} - link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT); - if (link == NULL) { - free(host->states_hash, M_NAT64LSN); - uma_zfree(nat64lsn_host_zone, host); - return (HOST_ERROR(3)); +static NAT64NOINLINE int +attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) +{ + char a[INET6_ADDRSTRLEN]; + struct nat64lsn_host *nh; + + I6HASH_FIND(cfg, nh, &ji->haddr); + if (nh == NULL) { + /* Add new host to list */ + nh = ji->nh; + I6HASH_INSERT(cfg, nh); + cfg->ihcount++; + ji->nh = NULL; + + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_OBJ, "ATTACH HOST %s %p", a, nh); + /* + * Try to add portgroup. + * Note it will automatically set + * 'done' on ji if successful. + */ + if (attach_portgroup(cfg, ji) != 0) { + DPRINTF(DP_DROPS, "%s %p failed to attach PG", + a, nh); + NAT64STAT_INC(&cfg->base.stats, jportfails); + return (1); + } + return (0); } - /* Initialize */ - HOST_LOCK_INIT(host); - SET_AGE(host->timestamp); - host->addr = ji->f_id.src_ip6; - host->hval = ji->src6_hval; - host->flags = 0; - host->states_count = 0; - host->states_hashsize = NAT64LSN_HSIZE; - CK_SLIST_INIT(&host->aliases); - for (i = 0; i < host->states_hashsize; i++) - CK_SLIST_INIT(&host->states_hash[i]); - - /* Determine alias from flow hash. */ - hval = ALIASLINK_HVAL(cfg, &ji->f_id); - link->alias = &ALIAS_BYHASH(cfg, hval); - CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries); - - ALIAS_LOCK(link->alias); - CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries); - link->alias->hosts_count++; - ALIAS_UNLOCK(link->alias); - - CFG_LOCK(cfg); - CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries); - cfg->hosts_count++; - CFG_UNLOCK(cfg); - -get_state: - data[0] = ji->faddr; - data[1] = (ji->f_id.dst_port << 16) | ji->port; - ji->state_hval = hval = STATE_HVAL(cfg, data); - state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval, - ji->faddr, ji->port, ji->proto); /* - * We failed to obtain new state, used alias needs new PG. - * XXX: or another alias should be used. + * nh isn't NULL. This probably means we had several simultaneous + * host requests. The previous one request has already attached + * this host. Requeue attached mbuf and mark job as done, but + * leave nh and pg pointers not changed, so nat64lsn_do_request() + * will release all allocated resources. */ - if (state == NULL) { - /* Try to allocate new PG */ - if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0)) - return (HOST_ERROR(4)); - /* We assume that nat64lsn_alloc_pg() got state */ - } else - ji->state = state; - + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_OBJ, "%s %p is already attached as %p", + a, ji->nh, nh); ji->done = 1; - DPRINTF(DP_OBJ, "ALLOC HOST %s %p", - inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host); - return (HOST_ERROR(0)); + return (0); } -static int -nat64lsn_find_pg_place(uint32_t *data) +static NAT64NOINLINE int +find_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off, + int nat_proto, uint16_t *aport, int *ppg_idx) { - int i; + int j, pg_idx; + + pg_idx = addr_off * _ADDR_PG_COUNT + + (nat_proto - 1) * _ADDR_PG_PROTO_COUNT; - for (i = 0; i < 32; i++) { - if (~data[i] == 0) + for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) { + if (cfg->pg[pg_idx + j] != NULL) continue; - return (i * 32 + ffs(~data[i]) - 1); + + *aport = j * NAT64_CHUNK_SIZE; + *ppg_idx = pg_idx + j; + return (1); } - return (-1); + + return (0); } -static int -nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg, - struct nat64lsn_alias *alias, uint32_t *chunkmask, - uint32_t *pgmask, struct nat64lsn_pgchunk **chunks, - struct nat64lsn_pg **pgptr, uint8_t proto) +/* + * XXX: This function needs to be rewritten to + * use free bitmask for faster pg finding, + * additionally, it should take into consideration + * a) randomization and + * b) previous addresses allocated to given nat instance + * + */ +static NAT64NOINLINE int +find_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji, + uint32_t *aaddr, uint16_t *aport, int *ppg_idx) { - struct nat64lsn_pg *pg; - int i, pg_idx, chunk_idx; - - /* Find place in pgchunk where PG can be added */ - pg_idx = nat64lsn_find_pg_place(pgmask); - if (pg_idx < 0) /* no more PGs */ - return (PG_ERROR(1)); - /* Check that we have allocated pgchunk for given PG index */ - chunk_idx = pg_idx / 32; - if (!ISSET32(*chunkmask, chunk_idx)) { - chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone, - M_NOWAIT); - if (chunks[chunk_idx] == NULL) - return (PG_ERROR(2)); - ck_pr_bts_32(chunkmask, chunk_idx); - ck_pr_fence_store(); - } - /* Allocate PG and states chunks */ - pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT); - if (pg == NULL) - return (PG_ERROR(3)); - pg->chunks_count = cfg->states_chunks; - if (pg->chunks_count > 1) { - pg->freemask_chunk = malloc(pg->chunks_count * - sizeof(uint64_t), M_NAT64LSN, M_NOWAIT); - if (pg->freemask_chunk == NULL) { - uma_zfree(nat64lsn_pg_zone, pg); - return (PG_ERROR(4)); - } - pg->states_chunk = malloc(pg->chunks_count * - sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN, - M_NOWAIT | M_ZERO); - if (pg->states_chunk == NULL) { - free(pg->freemask_chunk, M_NAT64LSN); - uma_zfree(nat64lsn_pg_zone, pg); - return (PG_ERROR(5)); - } - for (i = 0; i < pg->chunks_count; i++) { - pg->states_chunk[i] = uma_zalloc( - nat64lsn_state_zone, M_NOWAIT); - if (pg->states_chunk[i] == NULL) - goto states_failed; + int i, nat_proto; + + /* + * XXX: Use bitmask index to be able to find/check if IP address + * has some spare pg's + */ + nat_proto = ji->nat_proto; + + /* First, try to use same address */ + if (ji->aaddr != 0) { + i = ntohl(ji->aaddr) - cfg->prefix4; + if (find_pg_place_addr(cfg, i, nat_proto, aport, + ppg_idx) != 0){ + /* Found! */ + *aaddr = htonl(cfg->prefix4 + i); + return (0); } - memset(pg->freemask_chunk, 0xFF, - sizeof(uint64_t) * pg->chunks_count); - } else { - pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT); - if (pg->states == NULL) { - uma_zfree(nat64lsn_pg_zone, pg); - return (PG_ERROR(6)); + } + + /* Next, try to use random address based on flow hash */ + i = ji->fhash % (1 << (32 - cfg->plen4)); + if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) { + /* Found! */ + *aaddr = htonl(cfg->prefix4 + i); + return (0); + } + + + /* Last one: simply find ANY available */ + for (i = 0; i < (1 << (32 - cfg->plen4)); i++) { + if (find_pg_place_addr(cfg, i, nat_proto, aport, + ppg_idx) != 0){ + /* Found! */ + *aaddr = htonl(cfg->prefix4 + i); + return (0); } - memset(&pg->freemask, 0xFF, sizeof(uint64_t)); } - /* Initialize PG and hook it to pgchunk */ - SET_AGE(pg->timestamp); - pg->proto = proto; - pg->base_port = NAT64_MIN_PORT + 64 * pg_idx; - ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg); - ck_pr_fence_store(); - ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32); - ck_pr_store_ptr(pgptr, pg); - - ALIAS_LOCK(alias); - CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries); - SET_AGE(alias->timestamp); - alias->portgroups_count++; - ALIAS_UNLOCK(alias); + return (1); +} + +static NAT64NOINLINE int +attach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) +{ + char a[INET6_ADDRSTRLEN]; + struct nat64lsn_portgroup *pg; + struct nat64lsn_host *nh; + uint32_t aaddr; + uint16_t aport; + int nh_pg_idx, pg_idx; + + pg = ji->pg; + + /* + * Find source host and bind: we can't rely on + * pg->host + */ + I6HASH_FIND(cfg, nh, &ji->haddr); + if (nh == NULL) + return (1); + + /* Find spare port chunk */ + if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0) { + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_OBJ | DP_DROPS, "empty PG not found for %s", a); + return (2); + } + + /* Expand PG indexes if needed */ + if (nh->pg_allocated < cfg->max_chunks && ji->spare_idx != NULL) { + PORTGROUP_CHUNK(nh, nh->pg_allocated / NAT64LSN_PGIDX_CHUNK) = + ji->spare_idx; + nh->pg_allocated += NAT64LSN_PGIDX_CHUNK; + ji->spare_idx = NULL; + } + + /* Find empty index to store PG in the @nh */ + if (find_nh_pg_idx(cfg, nh, &nh_pg_idx) != 0) { + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_OBJ | DP_DROPS, "free PG index not found for %s", + a); + return (3); + } + + cfg->pg[pg_idx] = pg; + cfg->protochunks[pg->nat_proto]++; NAT64STAT_INC(&cfg->base.stats, spgcreated); - return (PG_ERROR(0)); -states_failed: - for (i = 0; i < pg->chunks_count; i++) - uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]); - free(pg->freemask_chunk, M_NAT64LSN); - free(pg->states_chunk, M_NAT64LSN); - uma_zfree(nat64lsn_pg_zone, pg); - return (PG_ERROR(7)); + pg->aaddr = aaddr; + pg->aport = aport; + pg->host = nh; + pg->idx = pg_idx; + SET_AGE(pg->timestamp); + + PORTGROUP_BYSIDX(cfg, nh, nh_pg_idx + 1) = pg; + if (nh->pg_used == nh_pg_idx) + nh->pg_used++; + SET_AGE(nh->timestamp); + + ji->pg = NULL; + ji->done = 1; + + return (0); } -static int -nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) +static NAT64NOINLINE void +consider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) { - struct nat64lsn_aliaslink *link; - struct nat64lsn_alias *alias; - int ret; + struct nat64lsn_host *nh, *nh_tmp; + struct nat64lsn_portgroup *pg, *pg_list[256]; + int i, pg_lidx, idx; + + /* Find source host */ + I6HASH_FIND(cfg, nh, &ji->haddr); + if (nh == NULL || nh->pg_used == 0) + return; - link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id); - if (link == NULL) - return (PG_ERROR(1)); + memset(pg_list, 0, sizeof(pg_list)); + pg_lidx = 0; - /* - * TODO: check that we did not already allocated PG in - * previous call. - */ + NAT64_LOCK(nh); - ret = 0; - alias = link->alias; - /* Find place in pgchunk where PG can be added */ - switch (ji->proto) { - case IPPROTO_TCP: - ret = nat64lsn_alloc_proto_pg(cfg, alias, - &alias->tcp_chunkmask, alias->tcp_pgmask, - alias->tcp, &alias->tcp_pg, ji->proto); - break; - case IPPROTO_UDP: - ret = nat64lsn_alloc_proto_pg(cfg, alias, - &alias->udp_chunkmask, alias->udp_pgmask, - alias->udp, &alias->udp_pg, ji->proto); - break; - case IPPROTO_ICMP: - ret = nat64lsn_alloc_proto_pg(cfg, alias, - &alias->icmp_chunkmask, alias->icmp_pgmask, - alias->icmp, &alias->icmp_pg, ji->proto); - break; - default: - panic("%s: wrong proto %d", __func__, ji->proto); + for (i = nh->pg_used - 1; i >= 0; i--) { + if ((ji->delmask[i / 64] & ((uint64_t)1 << (i % 64))) == 0) + continue; + pg = PORTGROUP_BYSIDX(cfg, nh, i + 1); + + /* Check that PG isn't busy. */ + if (stale_pg(cfg, pg) == 0) + continue; + + /* DO delete */ + pg_list[pg_lidx++] = pg; + PORTGROUP_BYSIDX(cfg, nh, i + 1) = NULL; + + idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto, + pg->aport); + KASSERT(cfg->pg[idx] == pg, ("Non matched pg")); + cfg->pg[idx] = NULL; + cfg->protochunks[pg->nat_proto]--; + NAT64STAT_INC(&cfg->base.stats, spgdeleted); + + /* Decrease pg_used */ + while (nh->pg_used > 0 && + PORTGROUP_BYSIDX(cfg, nh, nh->pg_used) == NULL) + nh->pg_used--; + + /* Check if on-stack buffer has ended */ + if (pg_lidx == nitems(pg_list)) + break; } - if (ret == PG_ERROR(1)) { - /* - * PG_ERROR(1) means that alias lacks free PGs - * XXX: try next alias. - */ - printf("NAT64LSN: %s: failed to obtain PG\n", - __func__); - return (ret); + + NAT64_UNLOCK(nh); + + if (stale_nh(cfg, nh)) { + I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr); + KASSERT(nh != NULL, ("Unable to find address")); + cfg->ihcount--; + ji->nh = nh; + I6HASH_FIND(cfg, nh, &ji->haddr); + KASSERT(nh == NULL, ("Failed to delete address")); } - if (ret == PG_ERROR(0)) { - ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id, - ji->state_hval, ji->faddr, ji->port, ji->proto); - if (ji->state == NULL) - ret = PG_ERROR(8); - else - ji->done = 1; + + /* TODO: Delay freeing portgroups */ + while (pg_lidx > 0) { + pg_lidx--; + NAT64STAT_INC(&cfg->base.stats, spgdeleted); + destroy_portgroup(pg_list[pg_lidx]); } - return (ret); } -static void -nat64lsn_do_request(void *data) +/* + * Main request handler. + * Responsible for handling jqueue, e.g. + * creating new hosts, addind/deleting portgroups. + */ +static NAT64NOINLINE void +nat64lsn_do_request(void *data) { - struct epoch_tracker et; + IPFW_RLOCK_TRACKER; struct nat64lsn_job_head jhead; - struct nat64lsn_job_item *ji, *ji2; - struct nat64lsn_cfg *cfg; - int jcount; - uint8_t flags; - - cfg = (struct nat64lsn_cfg *)data; - if (cfg->jlen == 0) - return; + struct nat64lsn_job_item *ji; + int jcount, nhsize; + struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data; + struct ip_fw_chain *ch; + int delcount; CURVNET_SET(cfg->vp); - STAILQ_INIT(&jhead); + + TAILQ_INIT(&jhead); + + /* XXX: We're running unlocked here */ + + ch = cfg->ch; + delcount = 0; + IPFW_RLOCK(ch); /* Grab queue */ JQUEUE_LOCK(); - STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item); + TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next); jcount = cfg->jlen; cfg->jlen = 0; JQUEUE_UNLOCK(); - /* TODO: check if we need to resize hash */ + /* check if we need to resize hash */ + nhsize = 0; + if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) { + nhsize = cfg->ihsize; + for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2) + ; + } else if (cfg->ihcount < cfg->ihsize * 4) { + nhsize = cfg->ihsize; + for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2) + ; + } + + IPFW_RUNLOCK(ch); + + if (TAILQ_EMPTY(&jhead)) { + CURVNET_RESTORE(); + return; + } NAT64STAT_INC(&cfg->base.stats, jcalls); DPRINTF(DP_JQUEUE, "count=%d", jcount); @@ -1197,283 +1169,442 @@ nat64lsn_do_request(void *data) * TODO: Limit per-call number of items */ - NAT64LSN_EPOCH_ENTER(et); - STAILQ_FOREACH(ji, &jhead, entries) { + /* Pre-allocate everything for entire chain */ + TAILQ_FOREACH(ji, &jhead, next) { switch (ji->jtype) { - case JTYPE_NEWHOST: - if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0)) - NAT64STAT_INC(&cfg->base.stats, jhostfails); - break; - case JTYPE_NEWPORTGROUP: - if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0)) - NAT64STAT_INC(&cfg->base.stats, jportfails); - break; - default: - continue; + case JTYPE_NEWHOST: + if (alloc_host6(cfg, ji) != 0) + NAT64STAT_INC(&cfg->base.stats, + jhostfails); + break; + case JTYPE_NEWPORTGROUP: + if (alloc_portgroup(ji) != 0) + NAT64STAT_INC(&cfg->base.stats, + jportfails); + break; + case JTYPE_DELPORTGROUP: + delcount += ji->delcount; + break; + default: + break; } - if (ji->done != 0) { - flags = ji->proto != IPPROTO_TCP ? 0 : - convert_tcp_flags(ji->f_id._flags); - nat64lsn_translate6_internal(cfg, &ji->m, - ji->state, flags); - NAT64STAT_INC(&cfg->base.stats, jreinjected); + } + + /* + * TODO: Alloc hew hash + */ + nhsize = 0; + if (nhsize > 0) { + /* XXX: */ + } + + /* Apply all changes in batch */ + IPFW_UH_WLOCK(ch); + IPFW_WLOCK(ch); + + TAILQ_FOREACH(ji, &jhead, next) { + switch (ji->jtype) { + case JTYPE_NEWHOST: + if (ji->nh != NULL) + attach_host6(cfg, ji); + break; + case JTYPE_NEWPORTGROUP: + if (ji->pg != NULL && + attach_portgroup(cfg, ji) != 0) + NAT64STAT_INC(&cfg->base.stats, + jportfails); + break; + case JTYPE_DELPORTGROUP: + consider_del_portgroup(cfg, ji); + break; } } - NAT64LSN_EPOCH_EXIT(et); - ji = STAILQ_FIRST(&jhead); - while (ji != NULL) { - ji2 = STAILQ_NEXT(ji, entries); - /* - * In any case we must free mbuf if - * translator did not consumed it. - */ - m_freem(ji->m); - uma_zfree(nat64lsn_job_zone, ji); - ji = ji2; + if (nhsize > 0) { + /* XXX: Move everything to new hash */ + } + + IPFW_WUNLOCK(ch); + IPFW_UH_WUNLOCK(ch); + + /* Flush unused entries */ + while (!TAILQ_EMPTY(&jhead)) { + ji = TAILQ_FIRST(&jhead); + TAILQ_REMOVE(&jhead, ji, next); + if (ji->nh != NULL) + destroy_host6(ji->nh); + if (ji->pg != NULL) + destroy_portgroup(ji->pg); + if (ji->m != NULL) + reinject_mbuf(cfg, ji); + if (ji->spare_idx != NULL) + uma_zfree(nat64lsn_pgidx_zone, ji->spare_idx); + free(ji, M_IPFW); } CURVNET_RESTORE(); } -static struct nat64lsn_job_item * -nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype) +static NAT64NOINLINE struct nat64lsn_job_item * +nat64lsn_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id, + int jtype) { struct nat64lsn_job_item *ji; + struct in6_addr haddr; + uint8_t nat_proto; /* - * Do not try to lock possibly contested mutex if we're near the - * limit. Drop packet instead. + * Do not try to lock possibly contested mutex if we're near the limit. + * Drop packet instead. */ - ji = NULL; - if (cfg->jlen >= cfg->jmaxlen) + if (cfg->jlen >= cfg->jmaxlen) { NAT64STAT_INC(&cfg->base.stats, jmaxlen); - else { - ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT); - if (ji == NULL) - NAT64STAT_INC(&cfg->base.stats, jnomem); + return (NULL); + } + + memset(&haddr, 0, sizeof(haddr)); + nat_proto = 0; + if (f_id != NULL) { + haddr = f_id->src_ip6; + nat_proto = nat64lsn_proto_map[f_id->proto]; + + DPRINTF(DP_JQUEUE, "REQUEST pg nat_proto %d on proto %d", + nat_proto, f_id->proto); + + if (nat_proto == 0) + return (NULL); } + + ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW, + M_NOWAIT | M_ZERO); + if (ji == NULL) { - NAT64STAT_INC(&cfg->base.stats, dropped); - DPRINTF(DP_DROPS, "failed to create job"); - } else { - ji->jtype = jtype; - ji->done = 0; + NAT64STAT_INC(&cfg->base.stats, jnomem); + return (NULL); + } + + ji->jtype = jtype; + + if (f_id != NULL) { + ji->f_id = *f_id; + ji->haddr = haddr; + ji->nat_proto = nat_proto; } + return (ji); } -static void +static NAT64NOINLINE void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) { + if (ji == NULL) + return; + JQUEUE_LOCK(); - STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries); - NAT64STAT_INC(&cfg->base.stats, jrequests); + TAILQ_INSERT_TAIL(&cfg->jhead, ji, next); cfg->jlen++; + NAT64STAT_INC(&cfg->base.stats, jrequests); if (callout_pending(&cfg->jcallout) == 0) callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg); JQUEUE_UNLOCK(); } -static void -nat64lsn_job_destroy(epoch_context_t ctx) +static NAT64NOINLINE void +nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg, + struct nat64lsn_job_head *jhead, int jlen) { - struct nat64lsn_job_item *ji; - struct nat64lsn_host *host; - struct nat64lsn_pg *pg; - int i; - ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx); - MPASS(ji->jtype == JTYPE_DESTROY); - while (!CK_SLIST_EMPTY(&ji->hosts)) { - host = CK_SLIST_FIRST(&ji->hosts); - CK_SLIST_REMOVE_HEAD(&ji->hosts, entries); - if (host->states_count > 0) { - /* - * XXX: The state has been created - * during host deletion. - */ - printf("NAT64LSN: %s: destroying host with %d " - "states\n", __func__, host->states_count); - } - nat64lsn_destroy_host(host); - } - while (!CK_SLIST_EMPTY(&ji->portgroups)) { - pg = CK_SLIST_FIRST(&ji->portgroups); - CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries); - for (i = 0; i < pg->chunks_count; i++) { - if (~(*FREEMASK_CHUNK(pg, i)) != 0) { - /* - * XXX: The state has been created during - * PG deletion. - */ - printf("NAT64LSN: %s: destroying PG " - "with 0x%jx freemask\n", __func__, - (uintmax_t)*FREEMASK_CHUNK(pg, i)); - } - } - nat64lsn_destroy_pg(pg); - } - uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk); - uma_zfree(nat64lsn_job_zone, ji); + if (TAILQ_EMPTY(jhead)) + return; + + /* Attach current queue to execution one */ + JQUEUE_LOCK(); + TAILQ_CONCAT(&cfg->jhead, jhead, next); + cfg->jlen += jlen; + NAT64STAT_ADD(&cfg->base.stats, jrequests, jlen); + + if (callout_pending(&cfg->jcallout) == 0) + callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg); + JQUEUE_UNLOCK(); } -static int +static unsigned int +flow6_hash(const struct ipfw_flow_id *f_id) +{ + unsigned char hbuf[36]; + + memcpy(hbuf, &f_id->dst_ip6, 16); + memcpy(&hbuf[16], &f_id->src_ip6, 16); + memcpy(&hbuf[32], &f_id->dst_port, 2); + memcpy(&hbuf[32], &f_id->src_port, 2); + + return (djb_hash(hbuf, sizeof(hbuf))); +} + +static NAT64NOINLINE int nat64lsn_request_host(struct nat64lsn_cfg *cfg, - const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval, - in_addr_t faddr, uint16_t port, uint8_t proto) + const struct ipfw_flow_id *f_id, struct mbuf **pm) { struct nat64lsn_job_item *ji; + struct mbuf *m; - ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST); - if (ji != NULL) { - ji->m = *mp; - ji->f_id = *f_id; - ji->faddr = faddr; - ji->port = port; - ji->proto = proto; - ji->src6_hval = hval; + m = *pm; + *pm = NULL; + ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWHOST); + if (ji == NULL) { + m_freem(m); + NAT64STAT_INC(&cfg->base.stats, dropped); + DPRINTF(DP_DROPS, "failed to create job"); + } else { + ji->m = m; + /* Provide pseudo-random value based on flow */ + ji->fhash = flow6_hash(f_id); nat64lsn_enqueue_job(cfg, ji); NAT64STAT_INC(&cfg->base.stats, jhostsreq); - *mp = NULL; } + return (IP_FW_DENY); } -static int -nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host, - const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval, - in_addr_t faddr, uint16_t port, uint8_t proto) +static NAT64NOINLINE int +nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg, + const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr, + int needs_idx) { struct nat64lsn_job_item *ji; + struct mbuf *m; - ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP); - if (ji != NULL) { - ji->m = *mp; - ji->f_id = *f_id; - ji->faddr = faddr; - ji->port = port; - ji->proto = proto; - ji->state_hval = hval; - ji->host = host; + m = *pm; + *pm = NULL; + ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWPORTGROUP); + if (ji == NULL) { + m_freem(m); + NAT64STAT_INC(&cfg->base.stats, dropped); + DPRINTF(DP_DROPS, "failed to create job"); + } else { + ji->m = m; + /* Provide pseudo-random value based on flow */ + ji->fhash = flow6_hash(f_id); + ji->aaddr = aaddr; + ji->needs_idx = needs_idx; nat64lsn_enqueue_job(cfg, ji); NAT64STAT_INC(&cfg->base.stats, jportreq); - *mp = NULL; } + return (IP_FW_DENY); } -static int -nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp, - struct nat64lsn_state *state, uint8_t flags) +static NAT64NOINLINE struct nat64lsn_state * +nat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, + int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr) { - struct pfloghdr loghdr, *logdata; - int ret; - uint16_t ts; + struct nat64lsn_portgroup *pg; + struct nat64lsn_state *st; + int i, hval, off; + + /* XXX: create additional bitmask for selecting proper portgroup */ + for (i = 0; i < nh->pg_used; i++) { + pg = PORTGROUP_BYSIDX(cfg, nh, i + 1); + if (pg == NULL) + continue; + if (*aaddr == 0) + *aaddr = pg->aaddr; + if (pg->nat_proto != nat_proto) + continue; - /* Update timestamp and flags if needed */ - SET_AGE(ts); - if (state->timestamp != ts) - state->timestamp = ts; - if ((state->flags & flags) != 0) - state->flags |= flags; + off = PG_GET_FREE_IDX(pg); + if (off != 0) { + /* We have found spare state. Use it */ + off--; + PG_MARK_BUSY_IDX(pg, off); + st = &pg->states[off]; - if (cfg->base.flags & NAT64_LOG) { - logdata = &loghdr; - nat64lsn_log(logdata, *mp, AF_INET6, (uintptr_t)state); - } else - logdata = NULL; + /* + * Fill in new info. Assume state was zeroed. + * Timestamp and flags will be filled by caller. + */ + st->u.s = kst->u.s; + st->cur.idx = i + 1; + st->cur.off = off; - ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src), - htons(state->aport), &cfg->base, logdata); - if (ret == NAT64SKIP) - return (cfg->nomatch_verdict); - if (ret == NAT64RETURN) - *mp = NULL; - return (IP_FW_DENY); + /* Insert into host hash table */ + hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1); + st->next = nh->phash[hval]; + nh->phash[hval] = st->cur; + + nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off); + + NAT64STAT_INC(&cfg->base.stats, screated); + + return (st); + } + /* Saev last used alias affress */ + *aaddr = pg->aaddr; + } + + return (NULL); } -static int +static NAT64NOINLINE int nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id, - struct mbuf **mp) + struct mbuf **pm) { - struct nat64lsn_state *state; - struct nat64lsn_host *host; + struct pfloghdr loghdr, *logdata; + char a[INET6_ADDRSTRLEN]; + struct nat64lsn_host *nh; + struct st_ptr sidx; + struct nat64lsn_state *st, kst; + struct nat64lsn_portgroup *pg; struct icmp6_hdr *icmp6; - uint32_t addr, hval, data[2]; - int offset, proto; - uint16_t port; - uint8_t flags; - - /* Check if protocol is supported */ - port = f_id->src_port; - proto = f_id->proto; - switch (f_id->proto) { - case IPPROTO_ICMPV6: + uint32_t aaddr; + int action, hval, nat_proto, proto; + uint16_t aport, state_ts, state_flags; + + /* Check if af/protocol is supported and get it short id */ + nat_proto = nat64lsn_proto_map[f_id->proto]; + if (nat_proto == 0) { /* - * For ICMPv6 echo reply/request we use icmp6_id as - * local port. + * Since we can be called from jobs handler, we need + * to free mbuf by self, do not leave this task to + * ipfw_check_packet(). */ - offset = 0; - proto = nat64_getlasthdr(*mp, &offset); - if (proto < 0) { - NAT64STAT_INC(&cfg->base.stats, dropped); - DPRINTF(DP_DROPS, "mbuf isn't contigious"); - return (IP_FW_DENY); - } - if (proto == IPPROTO_ICMPV6) { - icmp6 = mtodo(*mp, offset); - if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST || - icmp6->icmp6_type == ICMP6_ECHO_REPLY) - port = ntohs(icmp6->icmp6_id); - } - proto = IPPROTO_ICMP; - /* FALLTHROUGH */ - case IPPROTO_TCP: - case IPPROTO_UDP: - break; - default: NAT64STAT_INC(&cfg->base.stats, noproto); - return (cfg->nomatch_verdict); + goto drop; } - /* Extract IPv4 from destination IPv6 address */ - addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen); - if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) { - char a[INET_ADDRSTRLEN]; + /* Try to find host first */ + I6HASH_FIND(cfg, nh, &f_id->src_ip6); + if (nh == NULL) + return (nat64lsn_request_host(cfg, f_id, pm)); + + /* Fill-in on-stack state structure */ + kst.u.s.faddr = nat64_extract_ip4(&f_id->dst_ip6, + cfg->base.plat_plen); + if (kst.u.s.faddr == 0 || + nat64_check_private_ip4(&cfg->base, kst.u.s.faddr) != 0) { + NAT64STAT_INC(&cfg->base.stats, dropped); + goto drop; + } + kst.u.s.fport = f_id->dst_port; + kst.u.s.lport = f_id->src_port; + + /* Prepare some fields we might need to update */ + hval = 0; + proto = nat64_getlasthdr(*pm, &hval); + if (proto < 0) { NAT64STAT_INC(&cfg->base.stats, dropped); - DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s", - inet_ntop(AF_INET, &addr, a, sizeof(a))); - return (IP_FW_DENY); /* XXX: add extra stats? */ + DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); + goto drop; } - /* Try to find host */ - hval = HOST_HVAL(cfg, &f_id->src_ip6); - CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) { - if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr)) + SET_AGE(state_ts); + if (proto == IPPROTO_TCP) + state_flags = convert_tcp_flags( + TCP(mtodo(*pm, hval))->th_flags); + else + state_flags = 0; + if (proto == IPPROTO_ICMPV6) { + /* Alter local port data */ + icmp6 = mtodo(*pm, hval); + if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST || + icmp6->icmp6_type == ICMP6_ECHO_REPLY) + kst.u.s.lport = ntohs(icmp6->icmp6_id); + } + + hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1); + pg = NULL; + st = NULL; + + /* OK, let's find state in host hash */ + NAT64_LOCK(nh); + sidx = nh->phash[hval]; + int k = 0; + while (sidx.idx != 0) { + pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); + st = &pg->states[sidx.off]; + //DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off, + //st->next.idx, st->next.off); + if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto) break; + if (k++ > 1000) { + DPRINTF(DP_ALL, "XXX: too long %d/%d %d/%d\n", + sidx.idx, sidx.off, st->next.idx, st->next.off); + DPRINTF(DP_GENERIC, "TR host %s %p on cpu %d", + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)), + nh, curcpu); + k = 0; + } + sidx = st->next; + } + + if (sidx.idx == 0) { + aaddr = 0; + st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr); + if (st == NULL) { + /* No free states. Request more if we can */ + if (nh->pg_used >= cfg->max_chunks) { + /* Limit reached */ + DPRINTF(DP_DROPS, "PG limit reached " + " for host %s (used %u, allocated %u, " + "limit %u)", inet_ntop(AF_INET6, + &nh->addr, a, sizeof(a)), + nh->pg_used * NAT64_CHUNK_SIZE, + nh->pg_allocated * NAT64_CHUNK_SIZE, + cfg->max_chunks * NAT64_CHUNK_SIZE); + NAT64_UNLOCK(nh); + NAT64STAT_INC(&cfg->base.stats, dropped); + goto drop; + } + if ((nh->pg_allocated <= + nh->pg_used + NAT64LSN_REMAININGPG) && + nh->pg_allocated < cfg->max_chunks) + action = 1; /* Request new indexes */ + else + action = 0; + NAT64_UNLOCK(nh); + //DPRINTF("No state, unlock for %p", nh); + return (nat64lsn_request_portgroup(cfg, f_id, + pm, aaddr, action)); + } + + /* We've got new state. */ + sidx = st->cur; + pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); + } + + /* Okay, state found */ + + /* Update necessary fileds */ + if (st->timestamp != state_ts) + st->timestamp = state_ts; + if ((st->flags & state_flags) != 0) + st->flags |= state_flags; + + /* Copy needed state data */ + aaddr = pg->aaddr; + aport = htons(pg->aport + sidx.off); + + NAT64_UNLOCK(nh); + + if (cfg->base.flags & NAT64_LOG) { + logdata = &loghdr; + nat64lsn_log(logdata, *pm, AF_INET6, pg->idx, st->cur.off); + } else + logdata = NULL; + + action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->base, logdata); + if (action == NAT64SKIP) + return (cfg->nomatch_verdict); + if (action == NAT64MFREE) { +drop: + m_freem(*pm); } - /* We use IPv4 address in host byte order */ - addr = ntohl(addr); - if (host == NULL) - return (nat64lsn_request_host(cfg, f_id, mp, - hval, addr, port, proto)); - - flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags); - - data[0] = addr; - data[1] = (f_id->dst_port << 16) | port; - hval = STATE_HVAL(cfg, data); - state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr, - port, proto); - if (state == NULL) - return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr, - port, proto)); - return (nat64lsn_translate6_internal(cfg, mp, state, flags)); + *pm = NULL; /* mark mbuf as consumed */ + return (IP_FW_DENY); } /* @@ -1483,61 +1614,49 @@ int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args, ipfw_insn *cmd, int *done) { - struct epoch_tracker et; - struct nat64lsn_cfg *cfg; ipfw_insn *icmd; + struct nat64lsn_cfg *cfg; int ret; IPFW_RLOCK_ASSERT(ch); - *done = 0; /* continue the search in case of failure */ + *done = 1; /* terminate the search */ icmd = cmd + 1; if (cmd->opcode != O_EXTERNAL_ACTION || cmd->arg1 != V_nat64lsn_eid || icmd->opcode != O_EXTERNAL_INSTANCE || (cfg = NAT64_LOOKUP(ch, icmd)) == NULL) - return (IP_FW_DENY); - - *done = 1; /* terminate the search */ + return (0); - NAT64LSN_EPOCH_ENTER(et); switch (args->f_id.addr_type) { case 4: ret = nat64lsn_translate4(cfg, &args->f_id, &args->m); break; case 6: - /* - * Check that destination IPv6 address matches our prefix6. - */ - if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 && - memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix, - cfg->base.plat_plen / 8) != 0) { - ret = cfg->nomatch_verdict; - break; - } ret = nat64lsn_translate6(cfg, &args->f_id, &args->m); break; default: - ret = cfg->nomatch_verdict; - } - NAT64LSN_EPOCH_EXIT(et); - - if (ret != IP_FW_PASS && args->m != NULL) { - m_freem(args->m); - args->m = NULL; + return (cfg->nomatch_verdict); } return (ret); } static int -nat64lsn_state_ctor(void *mem, int size, void *arg, int flags) +nat64lsn_ctor_host(void *mem, int size, void *arg, int flags) +{ + struct nat64lsn_host *nh; + + nh = (struct nat64lsn_host *)mem; + memset(nh->pg_ptr, 0, sizeof(nh->pg_ptr)); + memset(nh->phash, 0, sizeof(nh->phash)); + return (0); +} + +static int +nat64lsn_ctor_pgidx(void *mem, int size, void *arg, int flags) { - struct nat64lsn_states_chunk *chunk; - int i; - chunk = (struct nat64lsn_states_chunk *)mem; - for (i = 0; i < 64; i++) - chunk->state[i].flags = 0; + memset(mem, 0, size); return (0); } @@ -1545,185 +1664,109 @@ void nat64lsn_init_internal(void) { - nat64lsn_epoch = epoch_alloc(EPOCH_PREEMPT); + memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map)); + /* Set up supported protocol map */ + nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP; + nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP; + nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP; + nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP; + /* Fill in reverse proto map */ + memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map)); + nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP; + nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP; + nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6; - nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts", - sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks", - sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups", - sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links", - sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - nat64lsn_state_zone = uma_zcreate("NAT64LSN states", - sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor, - NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs", - sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); JQUEUE_LOCK_INIT(); + nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone", + sizeof(struct nat64lsn_host), nat64lsn_ctor_host, NULL, + NULL, NULL, UMA_ALIGN_PTR, 0); + nat64lsn_pg_zone = uma_zcreate("NAT64 portgroups zone", + sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + nat64lsn_pgidx_zone = uma_zcreate("NAT64 portgroup indexes zone", + sizeof(struct nat64lsn_portgroup *) * NAT64LSN_PGIDX_CHUNK, + nat64lsn_ctor_pgidx, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } void nat64lsn_uninit_internal(void) { - /* XXX: epoch_task drain */ - epoch_free(nat64lsn_epoch); - JQUEUE_LOCK_DESTROY(); uma_zdestroy(nat64lsn_host_zone); - uma_zdestroy(nat64lsn_pgchunk_zone); uma_zdestroy(nat64lsn_pg_zone); - uma_zdestroy(nat64lsn_aliaslink_zone); - uma_zdestroy(nat64lsn_state_zone); - uma_zdestroy(nat64lsn_job_zone); + uma_zdestroy(nat64lsn_pgidx_zone); } void nat64lsn_start_instance(struct nat64lsn_cfg *cfg) { - CALLOUT_LOCK(cfg); callout_reset(&cfg->periodic, hz * PERIODIC_DELAY, nat64lsn_periodic, cfg); - CALLOUT_UNLOCK(cfg); } struct nat64lsn_cfg * -nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen) +nat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr) { struct nat64lsn_cfg *cfg; - struct nat64lsn_alias *alias; - int i, naddr; - cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN, - M_WAITOK | M_ZERO); - - CFG_LOCK_INIT(cfg); - CALLOUT_LOCK_INIT(cfg); - STAILQ_INIT(&cfg->jhead); + cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO); + TAILQ_INIT(&cfg->jhead); cfg->vp = curvnet; + cfg->ch = ch; COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK); - cfg->hash_seed = arc4random(); - cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE; - cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) * - cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO); - for (i = 0; i < cfg->hosts_hashsize; i++) - CK_SLIST_INIT(&cfg->hosts_hash[i]); - - naddr = 1 << (32 - plen); - cfg->prefix4 = prefix; - cfg->pmask4 = prefix | (naddr - 1); - cfg->plen4 = plen; - cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr, - M_NAT64LSN, M_WAITOK | M_ZERO); - for (i = 0; i < naddr; i++) { - alias = &cfg->aliases[i]; - alias->addr = prefix + i; /* host byte order */ - CK_SLIST_INIT(&alias->hosts); - ALIAS_LOCK_INIT(alias); - } + cfg->ihsize = NAT64LSN_HSIZE; + cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW, + M_WAITOK | M_ZERO); - callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0); + cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW, + M_WAITOK | M_ZERO); + + callout_init(&cfg->periodic, CALLOUT_MPSAFE); callout_init(&cfg->jcallout, CALLOUT_MPSAFE); return (cfg); } -static void -nat64lsn_destroy_pg(struct nat64lsn_pg *pg) -{ - int i; - - if (pg->chunks_count == 1) { - uma_zfree(nat64lsn_state_zone, pg->states); - } else { - for (i = 0; i < pg->chunks_count; i++) - uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]); - free(pg->states_chunk, M_NAT64LSN); - free(pg->freemask_chunk, M_NAT64LSN); - } - uma_zfree(nat64lsn_pg_zone, pg); -} - -static void -nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg, - struct nat64lsn_alias *alias) +/* + * Destroy all hosts callback. + * Called on module unload when all activity already finished, so + * can work without any locks. + */ +static NAT64NOINLINE int +nat64lsn_destroy_host(struct nat64lsn_host *nh, struct nat64lsn_cfg *cfg) { - struct nat64lsn_pg *pg; + struct nat64lsn_portgroup *pg; int i; - while (!CK_SLIST_EMPTY(&alias->portgroups)) { - pg = CK_SLIST_FIRST(&alias->portgroups); - CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries); - nat64lsn_destroy_pg(pg); - } - for (i = 0; i < 32; i++) { - if (ISSET32(alias->tcp_chunkmask, i)) - uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]); - if (ISSET32(alias->udp_chunkmask, i)) - uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]); - if (ISSET32(alias->icmp_chunkmask, i)) - uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]); - } - ALIAS_LOCK_DESTROY(alias); -} - -static void -nat64lsn_destroy_host(struct nat64lsn_host *host) -{ - struct nat64lsn_aliaslink *link; - - while (!CK_SLIST_EMPTY(&host->aliases)) { - link = CK_SLIST_FIRST(&host->aliases); - CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries); - - ALIAS_LOCK(link->alias); - CK_SLIST_REMOVE(&link->alias->hosts, link, - nat64lsn_aliaslink, alias_entries); - link->alias->hosts_count--; - ALIAS_UNLOCK(link->alias); - - uma_zfree(nat64lsn_aliaslink_zone, link); + for (i = nh->pg_used; i > 0; i--) { + pg = PORTGROUP_BYSIDX(cfg, nh, i); + if (pg == NULL) + continue; + cfg->pg[pg->idx] = NULL; + destroy_portgroup(pg); + nh->pg_used--; } - HOST_LOCK_DESTROY(host); - free(host->states_hash, M_NAT64LSN); - uma_zfree(nat64lsn_host_zone, host); + destroy_host6(nh); + cfg->ihcount--; + return (0); } void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg) { - struct nat64lsn_host *host; - int i; + struct nat64lsn_host *nh, *tmp; - CALLOUT_LOCK(cfg); - callout_drain(&cfg->periodic); - CALLOUT_UNLOCK(cfg); callout_drain(&cfg->jcallout); + callout_drain(&cfg->periodic); + I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_destroy_host, cfg); + DPRINTF(DP_OBJ, "instance %s: hosts %d", cfg->name, cfg->ihcount); - for (i = 0; i < cfg->hosts_hashsize; i++) { - while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) { - host = CK_SLIST_FIRST(&cfg->hosts_hash[i]); - CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries); - nat64lsn_destroy_host(host); - } - } - - for (i = 0; i < (1 << (32 - cfg->plen4)); i++) - nat64lsn_destroy_alias(cfg, &cfg->aliases[i]); - - CALLOUT_LOCK_DESTROY(cfg); - CFG_LOCK_DESTROY(cfg); COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS); - free(cfg->hosts_hash, M_NAT64LSN); - free(cfg->aliases, M_NAT64LSN); - free(cfg, M_NAT64LSN); + free(cfg->ih, M_IPFW); + free(cfg->pg, M_IPFW); + free(cfg, M_IPFW); } diff --git a/sys/netpfil/ipfw/nat64/nat64lsn.h b/sys/netpfil/ipfw/nat64/nat64lsn.h index d974efcd12a..44036cb3efc 100644 --- a/sys/netpfil/ipfw/nat64/nat64lsn.h +++ b/sys/netpfil/ipfw/nat64/nat64lsn.h @@ -35,130 +35,75 @@ #include "ip_fw_nat64.h" #include "nat64_translate.h" -#define NAT64_MIN_PORT 1024 -struct nat64lsn_host; -struct nat64lsn_alias; +#define NAT64_CHUNK_SIZE_BITS 6 /* 64 ports */ +#define NAT64_CHUNK_SIZE (1 << NAT64_CHUNK_SIZE_BITS) -struct nat64lsn_state { - /* IPv6 host entry keeps hash table to speedup state lookup */ - CK_SLIST_ENTRY(nat64lsn_state) entries; - struct nat64lsn_host *host; - - struct in6_addr ip6_dst; /* Destination IPv6 address */ - - in_addr_t ip_src; /* Alias IPv4 address */ - in_addr_t ip_dst; /* Destination IPv4 address */ - uint16_t dport; /* Destination port */ - uint16_t sport; /* Source port */ - - uint32_t hval; - uint32_t flags; /* Internal flags */ - uint16_t aport; - uint16_t timestamp; /* last used */ - uint8_t proto; - uint8_t _spare[7]; -}; +#define NAT64_MIN_PORT 1024 +#define NAT64_MIN_CHUNK (NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS) -struct nat64lsn_states_chunk { - struct nat64lsn_state state[64]; +struct st_ptr { + uint8_t idx; /* index in nh->pg_ptr array. + * NOTE: it starts from 1. + */ + uint8_t off; }; +#define NAT64LSN_MAXPGPTR ((1 << (sizeof(uint8_t) * NBBY)) - 1) +#define NAT64LSN_PGPTRMASKBITS (sizeof(uint64_t) * NBBY) +#define NAT64LSN_PGPTRNMASK (roundup(NAT64LSN_MAXPGPTR, \ + NAT64LSN_PGPTRMASKBITS) / NAT64LSN_PGPTRMASKBITS) -#define ISSET64(mask, bit) ((mask) & ((uint64_t)1 << (bit))) -#define ISSET32(mask, bit) ((mask) & ((uint32_t)1 << (bit))) -struct nat64lsn_pg { - CK_SLIST_ENTRY(nat64lsn_pg) entries; - - uint16_t base_port; - uint16_t timestamp; - uint8_t proto; - uint8_t chunks_count; - uint8_t spare[2]; - - union { - uint64_t freemask; - uint64_t *freemask_chunk; - }; - union { - struct nat64lsn_states_chunk *states; - struct nat64lsn_states_chunk **states_chunk; - }; -}; - -struct nat64lsn_pgchunk { - struct nat64lsn_pg *pgptr[32]; +struct nat64lsn_portgroup; +/* sizeof(struct nat64lsn_host) = 64 + 64x2 + 8x8 = 256 bytes */ +struct nat64lsn_host { + struct rwlock h_lock; /* Host states lock */ + + struct in6_addr addr; + struct nat64lsn_host *next; + uint16_t timestamp; /* Last altered */ + uint16_t hsize; /* ports hash size */ + uint16_t pg_used; /* Number of portgroups used */ +#define NAT64LSN_REMAININGPG 8 /* Number of remaining PG before + * requesting of new chunk of indexes. + */ + uint16_t pg_allocated; /* Number of portgroups indexes + * allocated. + */ +#define NAT64LSN_HSIZE 64 + struct st_ptr phash[NAT64LSN_HSIZE]; /* XXX: hardcoded size */ + /* + * PG indexes are stored in chunks with 32 elements. + * The maximum count is limited to 255 due to st_ptr->idx is uint8_t. + */ +#define NAT64LSN_PGIDX_CHUNK 32 +#define NAT64LSN_PGNIDX (roundup(NAT64LSN_MAXPGPTR, \ + NAT64LSN_PGIDX_CHUNK) / NAT64LSN_PGIDX_CHUNK) + struct nat64lsn_portgroup **pg_ptr[NAT64LSN_PGNIDX]; /* PG indexes */ }; -struct nat64lsn_aliaslink { - CK_SLIST_ENTRY(nat64lsn_aliaslink) alias_entries; - CK_SLIST_ENTRY(nat64lsn_aliaslink) host_entries; - struct nat64lsn_alias *alias; -}; +#define NAT64_RLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_RLOCKED) +#define NAT64_WLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_WLOCKED) -CK_SLIST_HEAD(nat64lsn_aliaslink_slist, nat64lsn_aliaslink); -CK_SLIST_HEAD(nat64lsn_states_slist, nat64lsn_state); -CK_SLIST_HEAD(nat64lsn_hosts_slist, nat64lsn_host); -CK_SLIST_HEAD(nat64lsn_pg_slist, nat64lsn_pg); - -struct nat64lsn_alias { - struct nat64lsn_aliaslink_slist hosts; - struct nat64lsn_pg_slist portgroups; - - struct mtx lock; - in_addr_t addr; /* host byte order */ - uint32_t hosts_count; - uint32_t portgroups_count; - uint32_t tcp_chunkmask; - uint32_t udp_chunkmask; - uint32_t icmp_chunkmask; - - uint32_t tcp_pgidx; - uint32_t udp_pgidx; - uint32_t icmp_pgidx; - uint16_t timestamp; - uint16_t spare; - - uint32_t tcp_pgmask[32]; - uint32_t udp_pgmask[32]; - uint32_t icmp_pgmask[32]; - struct nat64lsn_pgchunk *tcp[32]; - struct nat64lsn_pgchunk *udp[32]; - struct nat64lsn_pgchunk *icmp[32]; - - /* pointer to PG that can be used for faster state allocation */ - struct nat64lsn_pg *tcp_pg; - struct nat64lsn_pg *udp_pg; - struct nat64lsn_pg *icmp_pg; -}; -#define ALIAS_LOCK_INIT(p) \ - mtx_init(&(p)->lock, "alias_lock", NULL, MTX_DEF) -#define ALIAS_LOCK_DESTROY(p) mtx_destroy(&(p)->lock) -#define ALIAS_LOCK(p) mtx_lock(&(p)->lock) -#define ALIAS_UNLOCK(p) mtx_unlock(&(p)->lock) +#define NAT64_RLOCK(h) rw_rlock(&(h)->h_lock) +#define NAT64_RUNLOCK(h) rw_runlock(&(h)->h_lock) +#define NAT64_WLOCK(h) rw_wlock(&(h)->h_lock) +#define NAT64_WUNLOCK(h) rw_wunlock(&(h)->h_lock) +#define NAT64_LOCK(h) NAT64_WLOCK(h) +#define NAT64_UNLOCK(h) NAT64_WUNLOCK(h) +#define NAT64_LOCK_INIT(h) do { \ + rw_init(&(h)->h_lock, "NAT64 host lock"); \ + } while (0) -#define NAT64LSN_HSIZE 256 -#define NAT64LSN_MAX_HSIZE 4096 -#define NAT64LSN_HOSTS_HSIZE 1024 +#define NAT64_LOCK_DESTROY(h) do { \ + rw_destroy(&(h)->h_lock); \ + } while (0) -struct nat64lsn_host { - struct in6_addr addr; - struct nat64lsn_aliaslink_slist aliases; - struct nat64lsn_states_slist *states_hash; - CK_SLIST_ENTRY(nat64lsn_host) entries; - uint32_t states_count; - uint32_t hval; - uint32_t flags; -#define NAT64LSN_DEADHOST 1 -#define NAT64LSN_GROWHASH 2 - uint16_t states_hashsize; - uint16_t timestamp; - struct mtx lock; -}; +/* Internal proto index */ +#define NAT_PROTO_TCP 1 +#define NAT_PROTO_UDP 2 +#define NAT_PROTO_ICMP 3 -#define HOST_LOCK_INIT(p) \ - mtx_init(&(p)->lock, "host_lock", NULL, MTX_DEF|MTX_NEW) -#define HOST_LOCK_DESTROY(p) mtx_destroy(&(p)->lock) -#define HOST_LOCK(p) mtx_lock(&(p)->lock) -#define HOST_UNLOCK(p) mtx_unlock(&(p)->lock) +#define NAT_MAX_PROTO 4 +extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO]; VNET_DECLARE(uint16_t, nat64lsn_eid); #define V_nat64lsn_eid VNET(nat64lsn_eid) @@ -167,65 +112,124 @@ VNET_DECLARE(uint16_t, nat64lsn_eid); /* Timestamp macro */ #define _CT ((int)time_uptime % 65536) #define SET_AGE(x) (x) = _CT -#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x): (int)65536 + _CT - (x)) +#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x) : \ + (int)65536 + _CT - (x)) -STAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item); +#ifdef __LP64__ +/* ffsl() is capable of checking 64-bit ints */ +#define _FFS64 +#endif -struct nat64lsn_cfg { - struct named_object no; - - struct nat64lsn_hosts_slist *hosts_hash; - struct nat64lsn_alias *aliases; /* array of aliases */ +/* 16 bytes */ +struct nat64lsn_state { + union { + struct { + in_addr_t faddr; /* Remote IPv4 address */ + uint16_t fport; /* Remote IPv4 port */ + uint16_t lport; /* Local IPv6 port */ + }s; + uint64_t hkey; + } u; + uint8_t nat_proto; + uint8_t flags; + uint16_t timestamp; + struct st_ptr cur; /* Index of portgroup in nat64lsn_host */ + struct st_ptr next; /* Next entry index */ +}; - struct mtx lock; - uint32_t hosts_hashsize; - uint32_t hash_seed; +/* + * 1024+32 bytes per 64 states, used to store state + * AND for outside-in state lookup + */ +struct nat64lsn_portgroup { + struct nat64lsn_host *host; /* IPv6 source host info */ + in_addr_t aaddr; /* Alias addr, network format */ + uint16_t aport; /* Base port */ + uint16_t timestamp; + uint8_t nat_proto; + uint8_t spare[3]; + uint32_t idx; +#ifdef _FFS64 + uint64_t freemask; /* Mask of free entries */ +#else + uint32_t freemask[2]; /* Mask of free entries */ +#endif + struct nat64lsn_state states[NAT64_CHUNK_SIZE]; /* State storage */ +}; +#ifdef _FFS64 +#define PG_MARK_BUSY_IDX(_pg, _idx) (_pg)->freemask &= ~((uint64_t)1<<(_idx)) +#define PG_MARK_FREE_IDX(_pg, _idx) (_pg)->freemask |= ((uint64_t)1<<(_idx)) +#define PG_IS_FREE_IDX(_pg, _idx) ((_pg)->freemask & ((uint64_t)1<<(_idx))) +#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0) +#define PG_GET_FREE_IDX(_pg) (ffsll((_pg)->freemask)) +#define PG_IS_EMPTY(_pg) (((_pg)->freemask + 1) == 0) +#else +#define PG_MARK_BUSY_IDX(_pg, _idx) \ + (_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32)) +#define PG_MARK_FREE_IDX(_pg, _idx) \ + (_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx) % 32)) +#define PG_IS_FREE_IDX(_pg, _idx) \ + ((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32))) +#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0) +#define PG_GET_FREE_IDX(_pg) _pg_get_free_idx(_pg) +#define PG_IS_EMPTY(_pg) \ + ((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0)) + +static inline int +_pg_get_free_idx(const struct nat64lsn_portgroup *pg) +{ + int i; + + if ((i = ffsl(pg->freemask[0])) != 0) + return (i); + if ((i = ffsl(pg->freemask[1])) != 0) + return (i + 32); + return (0); +} + +#endif + +TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item); +struct nat64lsn_cfg { + struct named_object no; + struct nat64lsn_portgroup **pg; /* XXX: array of pointers */ + struct nat64lsn_host **ih; /* Host hash */ uint32_t prefix4; /* IPv4 prefix */ uint32_t pmask4; /* IPv4 prefix mask */ + uint32_t ihsize; /* IPv6 host hash size */ uint8_t plen4; - uint8_t nomatch_verdict;/* Return value on no-match */ + uint8_t nomatch_verdict;/* What to return to ipfw on no-match */ - uint32_t hosts_count; /* Number of items in host hash */ - uint32_t states_chunks; /* Number of states chunks per PG */ + uint32_t ihcount; /* Number of items in host hash */ + int max_chunks; /* Max chunks per client */ + int agg_prefix_len; /* Prefix length to count */ + int agg_prefix_max; /* Max hosts per agg prefix */ uint32_t jmaxlen; /* Max jobqueue length */ - uint16_t host_delete_delay; /* Stale host delete delay */ - uint16_t pgchunk_delete_delay; + uint16_t min_chunk; /* Min port group # to use */ + uint16_t max_chunk; /* Max port group # to use */ + uint16_t nh_delete_delay; /* Stale host delete delay */ uint16_t pg_delete_delay; /* Stale portgroup del delay */ uint16_t st_syn_ttl; /* TCP syn expire */ uint16_t st_close_ttl; /* TCP fin expire */ uint16_t st_estab_ttl; /* TCP established expire */ uint16_t st_udp_ttl; /* UDP expire */ uint16_t st_icmp_ttl; /* ICMP expire */ - + uint32_t protochunks[NAT_MAX_PROTO];/* Number of chunks used */ struct nat64_config base; #define NAT64LSN_FLAGSMASK (NAT64_LOG | NAT64_ALLOW_PRIVATE) -#define NAT64LSN_ANYPREFIX 0x00000100 - struct mtx periodic_lock; struct callout periodic; struct callout jcallout; + struct ip_fw_chain *ch; struct vnet *vp; struct nat64lsn_job_head jhead; int jlen; char name[64]; /* Nat instance name */ }; -/* CFG_LOCK protects cfg->hosts_hash from modification */ -#define CFG_LOCK_INIT(p) \ - mtx_init(&(p)->lock, "cfg_lock", NULL, MTX_DEF) -#define CFG_LOCK_DESTROY(p) mtx_destroy(&(p)->lock) -#define CFG_LOCK(p) mtx_lock(&(p)->lock) -#define CFG_UNLOCK(p) mtx_unlock(&(p)->lock) - -#define CALLOUT_LOCK_INIT(p) \ - mtx_init(&(p)->periodic_lock, "periodic_lock", NULL, MTX_DEF) -#define CALLOUT_LOCK_DESTROY(p) mtx_destroy(&(p)->periodic_lock) -#define CALLOUT_LOCK(p) mtx_lock(&(p)->periodic_lock) -#define CALLOUT_UNLOCK(p) mtx_unlock(&(p)->periodic_lock) - struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch, - in_addr_t prefix, int plen); + size_t numaddr); void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg); void nat64lsn_start_instance(struct nat64lsn_cfg *cfg); void nat64lsn_init_internal(void); @@ -233,4 +237,114 @@ void nat64lsn_uninit_internal(void); int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args, ipfw_insn *cmd, int *done); +void +nat64lsn_dump_state(const struct nat64lsn_cfg *cfg, + const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st, + const char *px, int off); +/* + * Portgroup layout + * addr x nat_proto x port_off + * + */ + +#define _ADDR_PG_PROTO_COUNT (65536 >> NAT64_CHUNK_SIZE_BITS) +#define _ADDR_PG_COUNT (_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO) + +#define GET_ADDR_IDX(_cfg, _addr) ((_addr) - ((_cfg)->prefix4)) +#define __GET_PORTGROUP_IDX(_proto, _port) \ + ((_proto - 1) * _ADDR_PG_PROTO_COUNT + \ + ((_port) >> NAT64_CHUNK_SIZE_BITS)) + +#define _GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port) \ + GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT + \ + __GET_PORTGROUP_IDX(_proto, _port) +#define GET_PORTGROUP(_cfg, _addr, _proto, _port) \ + ((_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)]) + +#define PORTGROUP_CHUNK(_nh, _idx) \ + ((_nh)->pg_ptr[(_idx)]) +#define PORTGROUP_BYSIDX(_cfg, _nh, _idx) \ + (PORTGROUP_CHUNK(_nh, (_idx - 1) / NAT64LSN_PGIDX_CHUNK) \ + [((_idx) - 1) % NAT64LSN_PGIDX_CHUNK]) + + +/* Chained hash table */ +#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do { \ + unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \ + _PX##lock(_ph, _buck); \ + _x = _PX##first(_ph, _buck); \ + for ( ; _x != NULL; _x = _PX##next(_x)) { \ + if (_PX##cmp(_key, _PX##val(_x))) \ + break; \ + } \ + if (_x == NULL) \ + _PX##unlock(_ph, _buck); \ +} while(0) + +#define CHT_UNLOCK_BUCK(_ph, _PX, _buck) \ + _PX##unlock(_ph, _buck); + +#define CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do { \ + unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \ + _PX##unlock(_ph, _buck); \ +} while(0) + +#define CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do { \ + unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1); \ + _PX##lock(_ph, _buck); \ + _PX##next(_i) = _PX##first(_ph, _buck); \ + _PX##first(_ph, _buck) = _i; \ + _PX##unlock(_ph, _buck); \ +} while(0) + +#define CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do { \ + unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \ + _PX##lock(_ph, _buck); \ + _x = _PX##first(_ph, _buck); \ + _tmp = NULL; \ + for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \ + if (_PX##cmp(_key, _PX##val(_x))) \ + break; \ + } \ + if (_x != NULL) { \ + if (_tmp == NULL) \ + _PX##first(_ph, _buck) = _PX##next(_x); \ + else \ + _PX##next(_tmp) = _PX##next(_x); \ + } \ + _PX##unlock(_ph, _buck); \ +} while(0) + +#define CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do { \ + for (unsigned int _i = 0; _i < _hsize; _i++) { \ + _PX##lock(_ph, _i); \ + _x = _PX##first(_ph, _i); \ + _tmp = NULL; \ + for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \ + if (_cb(_x, _arg) == 0) \ + continue; \ + if (_tmp == NULL) \ + _PX##first(_ph, _i) = _PX##next(_x); \ + else \ + _tmp = _PX##next(_x); \ + } \ + _PX##unlock(_ph, _i); \ + } \ +} while(0) + +#define CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do { \ + unsigned int _buck; \ + for (unsigned int _i = 0; _i < _hsize; _i++) { \ + _x = _PX##first(_ph, _i); \ + _y = _x; \ + while (_y != NULL) { \ + _buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\ + _y = _PX##next(_x); \ + _PX##next(_x) = _PX##first(_nph, _buck); \ + _PX##first(_nph, _buck) = _x; \ + } \ + } \ +} while(0) + #endif /* _IP_FW_NAT64LSN_H_ */ + diff --git a/sys/netpfil/ipfw/nat64/nat64lsn_control.c b/sys/netpfil/ipfw/nat64/nat64lsn_control.c index bbc7c207bac..6bb48d29e38 100644 --- a/sys/netpfil/ipfw/nat64/nat64lsn_control.c +++ b/sys/netpfil/ipfw/nat64/nat64lsn_control.c @@ -33,8 +33,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include #include #include #include @@ -45,8 +43,10 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include +#include #include #include @@ -75,6 +75,12 @@ static void nat64lsn_default_config(ipfw_nat64lsn_cfg *uc) { + if (uc->max_ports == 0) + uc->max_ports = NAT64LSN_MAX_PORTS; + else + uc->max_ports = roundup(uc->max_ports, NAT64_CHUNK_SIZE); + if (uc->max_ports > NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR) + uc->max_ports = NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR; if (uc->jmaxlen == 0) uc->jmaxlen = NAT64LSN_JMAXLEN; if (uc->jmaxlen > 65536) @@ -93,13 +99,6 @@ nat64lsn_default_config(ipfw_nat64lsn_cfg *uc) uc->st_udp_ttl = NAT64LSN_UDP_AGE; if (uc->st_icmp_ttl == 0) uc->st_icmp_ttl = NAT64LSN_ICMP_AGE; - - if (uc->states_chunks == 0) - uc->states_chunks = 1; - else if (uc->states_chunks >= 128) - uc->states_chunks = 128; - else if (!powerof2(uc->states_chunks)) - uc->states_chunks = 1 << fls(uc->states_chunks); } /* @@ -128,20 +127,12 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3, if (ipfw_check_object_name_generic(uc->name) != 0) return (EINVAL); - if (uc->set >= IPFW_MAX_SETS) + if (uc->agg_prefix_len > 127 || uc->set >= IPFW_MAX_SETS) return (EINVAL); if (uc->plen4 > 32) return (EINVAL); - - /* - * Unspecified address has special meaning. But it must - * have valid prefix length. This length will be used to - * correctly extract and embedd IPv4 address into IPv6. - */ - if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 && - IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) && - nat64_check_prefixlen(uc->plen6) != 0) + if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0) return (EINVAL); /* XXX: Check prefix4 to be global */ @@ -149,6 +140,14 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3, mask4 = ~((1 << (32 - uc->plen4)) - 1); if ((addr4 & mask4) != addr4) return (EINVAL); + if (uc->min_port == 0) + uc->min_port = NAT64_MIN_PORT; + if (uc->max_port == 0) + uc->max_port = 65535; + if (uc->min_port > uc->max_port) + return (EINVAL); + uc->min_port = roundup(uc->min_port, NAT64_CHUNK_SIZE); + uc->max_port = roundup(uc->max_port, NAT64_CHUNK_SIZE); nat64lsn_default_config(uc); @@ -160,7 +159,7 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3, } IPFW_UH_RUNLOCK(ch); - cfg = nat64lsn_init_instance(ch, addr4, uc->plen4); + cfg = nat64lsn_init_instance(ch, 1 << (32 - uc->plen4)); strlcpy(cfg->name, uc->name, sizeof(cfg->name)); cfg->no.name = cfg->name; cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME; @@ -171,12 +170,20 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3, cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX; if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix)) cfg->base.flags |= NAT64_WKPFX; - else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix)) - cfg->base.flags |= NAT64LSN_ANYPREFIX; - cfg->states_chunks = uc->states_chunks; + cfg->prefix4 = addr4; + cfg->pmask4 = addr4 | ~mask4; + cfg->plen4 = uc->plen4; + + cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE; + cfg->agg_prefix_len = uc->agg_prefix_len; + cfg->agg_prefix_max = uc->agg_prefix_max; + + cfg->min_chunk = uc->min_port / NAT64_CHUNK_SIZE; + cfg->max_chunk = uc->max_port / NAT64_CHUNK_SIZE; + cfg->jmaxlen = uc->jmaxlen; - cfg->host_delete_delay = uc->nh_delete_delay; + cfg->nh_delete_delay = uc->nh_delete_delay; cfg->pg_delete_delay = uc->pg_delete_delay; cfg->st_syn_ttl = uc->st_syn_ttl; cfg->st_close_ttl = uc->st_close_ttl; @@ -242,7 +249,7 @@ nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3, cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_WUNLOCK(ch); - return (ENOENT); + return (ESRCH); } if (cfg->no.refcnt > 0) { @@ -265,8 +272,6 @@ static void export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg, struct ipfw_nat64lsn_stats *stats) { - struct nat64lsn_alias *alias; - int i, j; __COPY_STAT_FIELD(cfg, stats, opcnt64); __COPY_STAT_FIELD(cfg, stats, opcnt46); @@ -294,16 +299,10 @@ export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg, __COPY_STAT_FIELD(cfg, stats, spgcreated); __COPY_STAT_FIELD(cfg, stats, spgdeleted); - stats->hostcount = cfg->hosts_count; - for (i = 0; i < (1 << (32 - cfg->plen4)); i++) { - alias = &cfg->aliases[i]; - for (j = 0; j < 32 && ISSET32(alias->tcp_chunkmask, j); j++) - stats->tcpchunks += bitcount32(alias->tcp_pgmask[j]); - for (j = 0; j < 32 && ISSET32(alias->udp_chunkmask, j); j++) - stats->udpchunks += bitcount32(alias->udp_pgmask[j]); - for (j = 0; j < 32 && ISSET32(alias->icmp_chunkmask, j); j++) - stats->icmpchunks += bitcount32(alias->icmp_pgmask[j]); - } + stats->hostcount = cfg->ihcount; + stats->tcpchunks = cfg->protochunks[NAT_PROTO_TCP]; + stats->udpchunks = cfg->protochunks[NAT_PROTO_UDP]; + stats->icmpchunks = cfg->protochunks[NAT_PROTO_ICMP]; } #undef __COPY_STAT_FIELD @@ -313,9 +312,12 @@ nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg, { uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK; - uc->states_chunks = cfg->states_chunks; + uc->max_ports = cfg->max_chunks * NAT64_CHUNK_SIZE; + uc->agg_prefix_len = cfg->agg_prefix_len; + uc->agg_prefix_max = cfg->agg_prefix_max; + uc->jmaxlen = cfg->jmaxlen; - uc->nh_delete_delay = cfg->host_delete_delay; + uc->nh_delete_delay = cfg->nh_delete_delay; uc->pg_delete_delay = cfg->pg_delete_delay; uc->st_syn_ttl = cfg->st_syn_ttl; uc->st_close_ttl = cfg->st_close_ttl; @@ -423,7 +425,7 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op, cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_RUNLOCK(ch); - return (ENOENT); + return (EEXIST); } nat64lsn_export_config(ch, cfg, uc); IPFW_UH_RUNLOCK(ch); @@ -436,18 +438,18 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op, cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_WUNLOCK(ch); - return (ENOENT); + return (EEXIST); } /* * For now allow to change only following values: * jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age, - * tcp_est_age, udp_age, icmp_age, flags, states_chunks. + * tcp_est_age, udp_age, icmp_age, flags, max_ports. */ - cfg->states_chunks = uc->states_chunks; + cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE; cfg->jmaxlen = uc->jmaxlen; - cfg->host_delete_delay = uc->nh_delete_delay; + cfg->nh_delete_delay = uc->nh_delete_delay; cfg->pg_delete_delay = uc->pg_delete_delay; cfg->st_syn_ttl = uc->st_syn_ttl; cfg->st_close_ttl = uc->st_close_ttl; @@ -494,7 +496,7 @@ nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_RUNLOCK(ch); - return (ENOENT); + return (ESRCH); } export_stats(ch, cfg, &stats); @@ -536,7 +538,7 @@ nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_WUNLOCK(ch); - return (ENOENT); + return (ESRCH); } COUNTER_ARRAY_ZERO(cfg->base.stats.cnt, NAT64STATS); IPFW_UH_WUNLOCK(ch); @@ -548,159 +550,151 @@ nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, * ipfw_nat64lsn_state x count, ... ] ] */ static int -nat64lsn_export_states_v1(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx, - struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count) +export_pg_states(struct nat64lsn_cfg *cfg, struct nat64lsn_portgroup *pg, + ipfw_nat64lsn_stg *stg, struct sockopt_data *sd) { - ipfw_nat64lsn_state_v1 *s; - struct nat64lsn_state *state; - uint64_t mask; - uint32_t i, count; - - /* validate user input */ - if (idx->chunk > pg->chunks_count - 1) - return (EINVAL); - - mask = pg->chunks_count == 1 ? ~pg->freemask : - ~pg->freemask_chunk[idx->chunk]; - count = bitcount64(mask); - if (count == 0) - return (0); /* Try next PG/chunk */ + ipfw_nat64lsn_state *ste; + struct nat64lsn_state *st; + int i, count; - DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d", - (uintmax_t)idx->index, count); + NAT64_LOCK(pg->host); + count = 0; + for (i = 0; i < 64; i++) { + if (PG_IS_BUSY_IDX(pg, i)) + count++; + } + DPRINTF(DP_STATE, "EXPORT PG %d, count %d", pg->idx, count); - s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd, - count * sizeof(ipfw_nat64lsn_state_v1)); - if (s == NULL) - return (ENOMEM); + if (count == 0) { + stg->count = 0; + NAT64_UNLOCK(pg->host); + return (0); + } + ste = (ipfw_nat64lsn_state *)ipfw_get_sopt_space(sd, + count * sizeof(ipfw_nat64lsn_state)); + if (ste == NULL) { + NAT64_UNLOCK(pg->host); + return (1); + } + stg->alias4.s_addr = pg->aaddr; + stg->proto = nat64lsn_rproto_map[pg->nat_proto]; + stg->flags = 0; + stg->host6 = pg->host->addr; + stg->count = count; for (i = 0; i < 64; i++) { - if (!ISSET64(mask, i)) + if (PG_IS_FREE_IDX(pg, i)) continue; - state = pg->chunks_count == 1 ? &pg->states->state[i] : - &pg->states_chunk[idx->chunk]->state[i]; - - s->host6 = state->host->addr; - s->daddr.s_addr = htonl(state->ip_dst); - s->dport = state->dport; - s->sport = state->sport; - s->aport = state->aport; - s->flags = (uint8_t)(state->flags & 7); - s->proto = state->proto; - s->idle = GET_AGE(state->timestamp); - s++; + st = &pg->states[i]; + ste->daddr.s_addr = st->u.s.faddr; + ste->dport = st->u.s.fport; + ste->aport = pg->aport + i; + ste->sport = st->u.s.lport; + ste->flags = st->flags; /* XXX filter flags */ + ste->idle = GET_AGE(st->timestamp); + ste++; } - *ret_count = count; + NAT64_UNLOCK(pg->host); + return (0); } -#define LAST_IDX 0xFF static int -nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg, - union nat64lsn_pgidx *idx) +get_next_idx(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto, + uint16_t *port) { - /* First iterate over chunks */ - if (pg != NULL) { - if (idx->chunk < pg->chunks_count - 1) { - idx->chunk++; - return (0); - } - } - idx->chunk = 0; - /* Then over PGs */ - if (idx->port < UINT16_MAX - 64) { - idx->port += 64; + if (*port < 65536 - NAT64_CHUNK_SIZE) { + *port += NAT64_CHUNK_SIZE; return (0); } - idx->port = NAT64_MIN_PORT; - /* Then over supported protocols */ - switch (idx->proto) { - case IPPROTO_ICMP: - idx->proto = IPPROTO_TCP; - return (0); - case IPPROTO_TCP: - idx->proto = IPPROTO_UDP; + *port = 0; + + if (*nat_proto < NAT_MAX_PROTO - 1) { + *nat_proto += 1; return (0); - default: - idx->proto = IPPROTO_ICMP; } - /* And then over IPv4 alias addresses */ - if (idx->addr < cfg->pmask4) { - idx->addr++; - return (1); /* New states group is needed */ + *nat_proto = 1; + + if (*addr < cfg->pmask4) { + *addr += 1; + return (0); } - idx->index = LAST_IDX; - return (-1); /* No more states */ + + /* End of space. */ + return (1); } -static struct nat64lsn_pg* -nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx) +#define PACK_IDX(addr, proto, port) \ + ((uint64_t)addr << 32) | ((uint32_t)port << 16) | (proto << 8) +#define UNPACK_IDX(idx, addr, proto, port) \ + (addr) = (uint32_t)((idx) >> 32); \ + (port) = (uint16_t)(((idx) >> 16) & 0xFFFF); \ + (proto) = (uint8_t)(((idx) >> 8) & 0xFF) + +static struct nat64lsn_portgroup * +get_next_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto, + uint16_t *port) { - struct nat64lsn_alias *alias; - int pg_idx; - - alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)]; - MPASS(alias->addr == idx->addr); - - pg_idx = (idx->port - NAT64_MIN_PORT) / 64; - switch (idx->proto) { - case IPPROTO_ICMP: - if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32)) - return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]); - break; - case IPPROTO_TCP: - if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32)) - return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]); - break; - case IPPROTO_UDP: - if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32)) - return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]); - break; + struct nat64lsn_portgroup *pg; + uint64_t pre_pack, post_pack; + + pg = NULL; + pre_pack = PACK_IDX(*addr, *nat_proto, *port); + for (;;) { + if (get_next_idx(cfg, addr, nat_proto, port) != 0) { + /* End of states */ + return (pg); + } + + pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port); + if (pg != NULL) + break; } - return (NULL); + + post_pack = PACK_IDX(*addr, *nat_proto, *port); + if (pre_pack == post_pack) + DPRINTF(DP_STATE, "XXX: PACK_IDX %u %d %d", + *addr, *nat_proto, *port); + return (pg); } -/* - * Lists nat64lsn states. - * Data layout (v0): - * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]] - * Reply: [ ipfw_obj_header ipfw_obj_data [ - * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ] - * - * Returns 0 on success - */ -static int -nat64lsn_states_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) +static NAT64NOINLINE struct nat64lsn_portgroup * +get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto, + uint16_t *port) { + struct nat64lsn_portgroup *pg; - /* TODO: implement states listing for old ipfw(8) binaries */ - return (EOPNOTSUPP); + pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port); + if (pg == NULL) + pg = get_next_pg(cfg, addr, nat_proto, port); + + return (pg); } /* * Lists nat64lsn states. - * Data layout (v1)(current): + * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]] * Reply: [ ipfw_obj_header ipfw_obj_data [ - * ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ] + * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ] * * Returns 0 on success */ static int -nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, +nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_header *oh; ipfw_obj_data *od; - ipfw_nat64lsn_stg_v1 *stg; + ipfw_nat64lsn_stg *stg; struct nat64lsn_cfg *cfg; - struct nat64lsn_pg *pg; - union nat64lsn_pgidx idx; + struct nat64lsn_portgroup *pg, *pg_next; + uint64_t next_idx; size_t sz; - uint32_t count, total; - int ret; + uint32_t addr, states; + uint16_t port; + uint8_t nat_proto; sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) + sizeof(uint64_t); @@ -714,96 +708,78 @@ nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, od->head.length != sz - sizeof(ipfw_obj_header)) return (EINVAL); - idx.index = *(uint64_t *)(od + 1); - if (idx.index != 0 && idx.proto != IPPROTO_ICMP && - idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP) + next_idx = *(uint64_t *)(od + 1); + /* Translate index to the request position to start from */ + UNPACK_IDX(next_idx, addr, nat_proto, port); + if (nat_proto >= NAT_MAX_PROTO) return (EINVAL); - if (idx.index == LAST_IDX) + if (nat_proto == 0 && addr != 0) return (EINVAL); IPFW_UH_RLOCK(ch); cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_RUNLOCK(ch); - return (ENOENT); + return (ESRCH); } - if (idx.index == 0) { /* Fill in starting point */ - idx.addr = cfg->prefix4; - idx.proto = IPPROTO_ICMP; - idx.port = NAT64_MIN_PORT; + /* Fill in starting point */ + if (addr == 0) { + addr = cfg->prefix4; + nat_proto = 1; + port = 0; } - if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 || - idx.port < NAT64_MIN_PORT) { + if (addr < cfg->prefix4 || addr > cfg->pmask4) { IPFW_UH_RUNLOCK(ch); + DPRINTF(DP_GENERIC | DP_STATE, "XXX: %ju %u %u", + (uintmax_t)next_idx, addr, cfg->pmask4); return (EINVAL); } + sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) + - sizeof(ipfw_nat64lsn_stg_v1); - if (sd->valsize < sz) { - IPFW_UH_RUNLOCK(ch); + sizeof(ipfw_nat64lsn_stg); + if (sd->valsize < sz) return (ENOMEM); - } oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz); od = (ipfw_obj_data *)(oh + 1); od->head.type = IPFW_TLV_OBJDATA; od->head.length = sz - sizeof(ipfw_obj_header); - stg = (ipfw_nat64lsn_stg_v1 *)(od + 1); - stg->count = total = 0; - stg->next.index = idx.index; - /* - * Acquire CALLOUT_LOCK to avoid races with expiration code. - * Thus states, hosts and PGs will not expire while we hold it. - */ - CALLOUT_LOCK(cfg); - ret = 0; - do { - pg = nat64lsn_get_pg_byidx(cfg, &idx); - if (pg != NULL) { - count = 0; - ret = nat64lsn_export_states_v1(cfg, &idx, pg, - sd, &count); - if (ret != 0) - break; - if (count > 0) { - stg->count += count; - total += count; - /* Update total size of reply */ - od->head.length += - count * sizeof(ipfw_nat64lsn_state_v1); - sz += count * sizeof(ipfw_nat64lsn_state_v1); - } - stg->alias4.s_addr = htonl(idx.addr); + stg = (ipfw_nat64lsn_stg *)(od + 1); + + pg = get_first_pg(cfg, &addr, &nat_proto, &port); + if (pg == NULL) { + /* No states */ + stg->next_idx = 0xFF; + stg->count = 0; + IPFW_UH_RUNLOCK(ch); + return (0); + } + states = 0; + pg_next = NULL; + while (pg != NULL) { + pg_next = get_next_pg(cfg, &addr, &nat_proto, &port); + if (pg_next == NULL) + stg->next_idx = 0xFF; + else + stg->next_idx = PACK_IDX(addr, nat_proto, port); + + if (export_pg_states(cfg, pg, stg, sd) != 0) { + IPFW_UH_RUNLOCK(ch); + return (states == 0 ? ENOMEM: 0); } - /* Determine new index */ - switch (nat64lsn_next_pgidx(cfg, pg, &idx)) { - case -1: - ret = ENOENT; /* End of search */ - break; - case 1: /* - * Next alias address, new group may be needed. - * If states count is zero, use this group. - */ - if (stg->count == 0) - continue; - /* Otherwise try to create new group */ - sz += sizeof(ipfw_nat64lsn_stg_v1); - if (sd->valsize < sz) { - ret = ENOMEM; + states += stg->count; + od->head.length += stg->count * sizeof(ipfw_nat64lsn_state); + sz += stg->count * sizeof(ipfw_nat64lsn_state); + if (pg_next != NULL) { + sz += sizeof(ipfw_nat64lsn_stg); + if (sd->valsize < sz) break; - } - /* Save next index in current group */ - stg->next.index = idx.index; - stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd, - sizeof(ipfw_nat64lsn_stg_v1)); - od->head.length += sizeof(ipfw_nat64lsn_stg_v1); - stg->count = 0; - break; + stg = (ipfw_nat64lsn_stg *)ipfw_get_sopt_space(sd, + sizeof(ipfw_nat64lsn_stg)); } - stg->next.index = idx.index; - } while (ret == 0); - CALLOUT_UNLOCK(cfg); + pg = pg_next; + } IPFW_UH_RUNLOCK(ch); - return ((total > 0 || idx.index == LAST_IDX) ? 0: ret); + return (0); } static struct ipfw_sopt_handler scodes[] = { @@ -813,8 +789,7 @@ static struct ipfw_sopt_handler scodes[] = { { IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list }, { IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats }, { IP_FW_NAT64LSN_RESET_STATS,0, HDIR_SET, nat64lsn_reset_stats }, - { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states_v0 }, - { IP_FW_NAT64LSN_LIST_STATES,1, HDIR_GET, nat64lsn_states_v1 }, + { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states }, }; static int -- 2.45.0