4 * Copyright (C) 1995-2003 by Darren Reed.
6 * See the IPFILTER.LICENCE file for details on licencing.
8 #if defined(KERNEL) || defined(_KERNEL)
14 #include <sys/errno.h>
15 #include <sys/types.h>
16 #include <sys/param.h>
19 #if defined(_KERNEL) && defined(__NetBSD_Version__) && \
20 (__NetBSD_Version__ >= 399002000)
21 # include <sys/kauth.h>
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
25 #if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
26 # include "opt_ipfilter_log.h"
28 # include "opt_ipfilter.h"
42 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
43 # include <sys/filio.h>
44 # include <sys/fcntl.h>
46 # include <sys/ioctl.h>
49 # include <sys/fcntl.h>
52 # include <sys/protosw.h>
54 #include <sys/socket.h>
56 # include <sys/systm.h>
57 # if !defined(__SVR4) && !defined(__svr4__)
58 # include <sys/mbuf.h>
61 #if defined(__SVR4) || defined(__svr4__)
62 # include <sys/filio.h>
63 # include <sys/byteorder.h>
65 # include <sys/dditypes.h>
67 # include <sys/stream.h>
68 # include <sys/kmem.h>
70 #if __FreeBSD_version >= 300000
71 # include <sys/queue.h>
74 #if __FreeBSD_version >= 300000
75 # include <net/if_var.h>
76 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
77 # include "opt_ipfilter.h"
83 #include <net/route.h>
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/ip.h>
90 # include <vpn/ipsec.h>
91 extern struct ifnet vpnif;
95 # include <netinet/ip_var.h>
97 #include <netinet/tcp.h>
98 #include <netinet/udp.h>
99 #include <netinet/ip_icmp.h>
100 #include "netinet/ip_compat.h"
101 #include <netinet/tcpip.h>
102 #include "netinet/ip_fil.h"
103 #include "netinet/ip_nat.h"
104 #include "netinet/ip_frag.h"
105 #include "netinet/ip_state.h"
106 #include "netinet/ip_proxy.h"
108 #include "netinet/ip_sync.h"
110 #if (__FreeBSD_version >= 300000)
111 # include <sys/malloc.h>
113 /* END OF INCLUDES */
116 #define SOCKADDR_IN struct sockaddr_in
119 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed";
120 static const char rcsid[] = "@(#)$FreeBSD$";
121 /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */
125 /* ======================================================================== */
126 /* How the NAT is organised and works. */
128 /* Inside (interface y) NAT Outside (interface x) */
129 /* -------------------- -+- ------------------------------------- */
130 /* Packet going | out, processsed by fr_checknatout() for x */
131 /* ------------> | ------------> */
132 /* src=10.1.1.1 | src=192.1.1.1 */
134 /* | in, processed by fr_checknatin() for x */
135 /* <------------ | <------------ */
136 /* dst=10.1.1.1 | dst=192.1.1.1 */
137 /* -------------------- -+- ------------------------------------- */
138 /* fr_checknatout() - changes ip_src and if required, sport */
139 /* - creates a new mapping, if required. */
140 /* fr_checknatin() - changes ip_dst and if required, dport */
142 /* In the NAT table, internal source is recorded as "in" and externally */
144 /* ======================================================================== */
147 nat_t **nat_table[2] = { NULL, NULL },
148 *nat_instances = NULL;
149 ipnat_t *nat_list = NULL;
150 u_int ipf_nattable_max = NAT_TABLE_MAX;
151 u_int ipf_nattable_sz = NAT_TABLE_SZ;
152 u_int ipf_natrules_sz = NAT_SIZE;
153 u_int ipf_rdrrules_sz = RDR_SIZE;
154 u_int ipf_hostmap_sz = HOSTMAP_SIZE;
155 u_int fr_nat_maxbucket = 0,
156 fr_nat_maxbucket_reset = 1;
157 u_32_t nat_masks = 0;
158 u_32_t rdr_masks = 0;
159 u_long nat_last_force_flush = 0;
160 ipnat_t **nat_rules = NULL;
161 ipnat_t **rdr_rules = NULL;
162 hostmap_t **ipf_hm_maptable = NULL;
163 hostmap_t *ipf_hm_maplist = NULL;
164 ipftq_t nat_tqb[IPF_TCP_NSTATES];
168 ipftq_t *nat_utqe = NULL;
169 int fr_nat_doflush = 0;
176 u_long fr_defnatage = DEF_NAT_AGE,
177 fr_defnatipage = 120, /* 60 seconds */
178 fr_defnaticmpage = 6; /* 3 seconds */
182 #if SOLARIS && !defined(_INET_IP_STACK_H)
183 extern int pfil_delayed_copy;
186 static int nat_flush_entry __P((void *));
187 static int nat_flushtable __P((void));
188 static int nat_clearlist __P((void));
189 static void nat_addnat __P((struct ipnat *));
190 static void nat_addrdr __P((struct ipnat *));
191 static void nat_delrdr __P((struct ipnat *));
192 static void nat_delnat __P((struct ipnat *));
193 static int fr_natgetent __P((caddr_t));
194 static int fr_natgetsz __P((caddr_t));
195 static int fr_natputent __P((caddr_t, int));
196 static int nat_extraflush __P((int));
197 static int nat_gettable __P((char *));
198 static void nat_tabmove __P((nat_t *));
199 static int nat_match __P((fr_info_t *, ipnat_t *));
200 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
201 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
202 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
203 struct in_addr, struct in_addr, u_32_t));
204 static int nat_icmpquerytype4 __P((int));
205 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
206 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
207 static int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
208 tcphdr_t *, nat_t **, int));
209 static int nat_resolverule __P((ipnat_t *));
210 static nat_t *fr_natclone __P((fr_info_t *, nat_t *));
211 static void nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
212 static int nat_wildok __P((nat_t *, int, int, int, int));
213 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *));
214 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *));
217 /* ------------------------------------------------------------------------ */
218 /* Function: fr_natinit */
219 /* Returns: int - 0 == success, -1 == failure */
220 /* Parameters: Nil */
222 /* Initialise all of the NAT locks, tables and other structures. */
223 /* ------------------------------------------------------------------------ */
228 KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
229 if (nat_table[0] != NULL)
230 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
234 KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
235 if (nat_table[1] != NULL)
236 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
240 KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
241 if (nat_rules != NULL)
242 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
246 KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
247 if (rdr_rules != NULL)
248 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
252 KMALLOCS(ipf_hm_maptable, hostmap_t **, \
253 sizeof(hostmap_t *) * ipf_hostmap_sz);
254 if (ipf_hm_maptable != NULL)
255 bzero((char *)ipf_hm_maptable,
256 sizeof(hostmap_t *) * ipf_hostmap_sz);
259 ipf_hm_maplist = NULL;
261 KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
262 ipf_nattable_sz * sizeof(u_long));
263 if (nat_stats.ns_bucketlen[0] == NULL)
265 bzero((char *)nat_stats.ns_bucketlen[0],
266 ipf_nattable_sz * sizeof(u_long));
268 KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
269 ipf_nattable_sz * sizeof(u_long));
270 if (nat_stats.ns_bucketlen[1] == NULL)
273 bzero((char *)nat_stats.ns_bucketlen[1],
274 ipf_nattable_sz * sizeof(u_long));
276 if (fr_nat_maxbucket == 0) {
277 for (i = ipf_nattable_sz; i > 0; i >>= 1)
279 fr_nat_maxbucket *= 2;
282 fr_sttab_init(nat_tqb);
284 * Increase this because we may have "keep state" following this too
285 * and packet storms can occur if this is removed too quickly.
287 nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
288 nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
289 nat_udptq.ifq_ttl = fr_defnatage;
290 nat_udptq.ifq_ref = 1;
291 nat_udptq.ifq_head = NULL;
292 nat_udptq.ifq_tail = &nat_udptq.ifq_head;
293 MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
294 nat_udptq.ifq_next = &nat_icmptq;
295 nat_icmptq.ifq_ttl = fr_defnaticmpage;
296 nat_icmptq.ifq_ref = 1;
297 nat_icmptq.ifq_head = NULL;
298 nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
299 MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
300 nat_icmptq.ifq_next = &nat_iptq;
301 nat_iptq.ifq_ttl = fr_defnatipage;
302 nat_iptq.ifq_ref = 1;
303 nat_iptq.ifq_head = NULL;
304 nat_iptq.ifq_tail = &nat_iptq.ifq_head;
305 MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
306 nat_iptq.ifq_next = NULL;
308 for (i = 0; i < IPF_TCP_NSTATES; i++) {
309 if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
310 nat_tqb[i].ifq_ttl = fr_defnaticmpage;
312 else if (nat_tqb[i].ifq_ttl > fr_defnatage)
313 nat_tqb[i].ifq_ttl = fr_defnatage;
318 * Increase this because we may have "keep state" following
319 * this too and packet storms can occur if this is removed
322 nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
324 RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
325 RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
326 MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
327 MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
335 /* ------------------------------------------------------------------------ */
336 /* Function: nat_addrdr */
338 /* Parameters: n(I) - pointer to NAT rule to add */
340 /* Adds a redirect rule to the hash table of redirect rules and the list of */
341 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */
342 /* use by redirect rules. */
343 /* ------------------------------------------------------------------------ */
344 static void nat_addrdr(n)
352 k = count4bits(n->in_outmsk);
353 if ((k >= 0) && (k != 32))
355 j = (n->in_outip & n->in_outmsk);
356 hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
359 np = &(*np)->in_rnext;
367 /* ------------------------------------------------------------------------ */
368 /* Function: nat_addnat */
370 /* Parameters: n(I) - pointer to NAT rule to add */
372 /* Adds a NAT map rule to the hash table of rules and the list of loaded */
373 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */
374 /* redirect rules. */
375 /* ------------------------------------------------------------------------ */
376 static void nat_addnat(n)
384 k = count4bits(n->in_inmsk);
385 if ((k >= 0) && (k != 32))
387 j = (n->in_inip & n->in_inmsk);
388 hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
391 np = &(*np)->in_mnext;
399 /* ------------------------------------------------------------------------ */
400 /* Function: nat_delrdr */
402 /* Parameters: n(I) - pointer to NAT rule to delete */
404 /* Removes a redirect rule from the hash table of redirect rules. */
405 /* ------------------------------------------------------------------------ */
406 static void nat_delrdr(n)
410 n->in_rnext->in_prnext = n->in_prnext;
411 *n->in_prnext = n->in_rnext;
415 /* ------------------------------------------------------------------------ */
416 /* Function: nat_delnat */
418 /* Parameters: n(I) - pointer to NAT rule to delete */
420 /* Removes a NAT map rule from the hash table of NAT map rules. */
421 /* ------------------------------------------------------------------------ */
422 static void nat_delnat(n)
425 if (n->in_mnext != NULL)
426 n->in_mnext->in_pmnext = n->in_pmnext;
427 *n->in_pmnext = n->in_mnext;
431 /* ------------------------------------------------------------------------ */
432 /* Function: nat_hostmap */
433 /* Returns: struct hostmap* - NULL if no hostmap could be created, */
434 /* else a pointer to the hostmapping to use */
435 /* Parameters: np(I) - pointer to NAT rule */
436 /* real(I) - real IP address */
437 /* map(I) - mapped IP address */
438 /* port(I) - destination port number */
439 /* Write Locks: ipf_nat */
441 /* Check if an ip address has already been allocated for a given mapping */
442 /* that is not doing port based translation. If is not yet allocated, then */
443 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */
444 /* ------------------------------------------------------------------------ */
445 static struct hostmap *nat_hostmap(np, src, dst, map, port)
455 hv = (src.s_addr ^ dst.s_addr);
459 for (hm = ipf_hm_maptable[hv]; hm; hm = hm->hm_next)
460 if ((hm->hm_srcip.s_addr == src.s_addr) &&
461 (hm->hm_dstip.s_addr == dst.s_addr) &&
462 ((np == NULL) || (np == hm->hm_ipnat)) &&
463 ((port == 0) || (port == hm->hm_port))) {
471 KMALLOC(hm, hostmap_t *);
473 hm->hm_next = ipf_hm_maplist;
474 hm->hm_pnext = &ipf_hm_maplist;
475 if (ipf_hm_maplist != NULL)
476 ipf_hm_maplist->hm_pnext = &hm->hm_next;
478 hm->hm_hnext = ipf_hm_maptable[hv];
479 hm->hm_phnext = ipf_hm_maptable + hv;
480 if (ipf_hm_maptable[hv] != NULL)
481 ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
482 ipf_hm_maptable[hv] = hm;
494 /* ------------------------------------------------------------------------ */
495 /* Function: fr_hostmapdel */
497 /* Parameters: hmp(I) - pointer to hostmap structure pointer */
498 /* Write Locks: ipf_nat */
500 /* Decrement the references to this hostmap structure by one. If this */
501 /* reaches zero then remove it and free it. */
502 /* ------------------------------------------------------------------------ */
503 void fr_hostmapdel(hmp)
504 struct hostmap **hmp;
512 if (hm->hm_ref == 0) {
514 hm->hm_hnext->hm_phnext = hm->hm_phnext;
515 *hm->hm_phnext = hm->hm_hnext;
517 hm->hm_next->hm_pnext = hm->hm_pnext;
518 *hm->hm_pnext = hm->hm_next;
524 /* ------------------------------------------------------------------------ */
525 /* Function: fix_outcksum */
527 /* Parameters: fin(I) - pointer to packet information */
528 /* sp(I) - location of 16bit checksum to update */
529 /* n((I) - amount to adjust checksum by */
531 /* Adjusts the 16bit checksum by "n" for packets going out. */
532 /* ------------------------------------------------------------------------ */
533 void fix_outcksum(fin, sp, n)
544 if (n & NAT_HW_CKSUM) {
547 n = (n & 0xffff) + (n >> 16);
551 sum1 = (~ntohs(*sp)) & 0xffff;
553 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
555 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
556 sumshort = ~(u_short)sum1;
557 *(sp) = htons(sumshort);
561 /* ------------------------------------------------------------------------ */
562 /* Function: fix_incksum */
564 /* Parameters: fin(I) - pointer to packet information */
565 /* sp(I) - location of 16bit checksum to update */
566 /* n((I) - amount to adjust checksum by */
568 /* Adjusts the 16bit checksum by "n" for packets going in. */
569 /* ------------------------------------------------------------------------ */
570 void fix_incksum(fin, sp, n)
581 if (n & NAT_HW_CKSUM) {
584 n = (n & 0xffff) + (n >> 16);
588 sum1 = (~ntohs(*sp)) & 0xffff;
589 sum1 += ~(n) & 0xffff;
590 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
592 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
593 sumshort = ~(u_short)sum1;
594 *(sp) = htons(sumshort);
598 /* ------------------------------------------------------------------------ */
599 /* Function: fix_datacksum */
601 /* Parameters: sp(I) - location of 16bit checksum to update */
602 /* n((I) - amount to adjust checksum by */
604 /* Fix_datacksum is used *only* for the adjustments of checksums in the */
605 /* data section of an IP packet. */
607 /* The only situation in which you need to do this is when NAT'ing an */
608 /* ICMP error message. Such a message, contains in its body the IP header */
609 /* of the original IP packet, that causes the error. */
611 /* You can't use fix_incksum or fix_outcksum in that case, because for the */
612 /* kernel the data section of the ICMP error is just data, and no special */
613 /* processing like hardware cksum or ntohs processing have been done by the */
614 /* kernel on the data section. */
615 /* ------------------------------------------------------------------------ */
616 void fix_datacksum(sp, n)
626 sum1 = (~ntohs(*sp)) & 0xffff;
628 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
630 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
631 sumshort = ~(u_short)sum1;
632 *(sp) = htons(sumshort);
636 /* ------------------------------------------------------------------------ */
637 /* Function: fr_nat_ioctl */
638 /* Returns: int - 0 == success, != 0 == failure */
639 /* Parameters: data(I) - pointer to ioctl data */
640 /* cmd(I) - ioctl command integer */
641 /* mode(I) - file mode bits used with open */
643 /* Processes an ioctl call made to operate on the IP Filter NAT device. */
644 /* ------------------------------------------------------------------------ */
645 int fr_nat_ioctl(data, cmd, mode, uid, ctx)
651 ipnat_t *nat, *nt, *n = NULL, **np = NULL;
652 int error = 0, ret, arg, getlock;
656 #if (BSD >= 199306) && defined(_KERNEL)
657 # if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
658 if ((mode & FWRITE) &&
659 kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
660 KAUTH_REQ_NETWORK_FIREWALL_FW,
665 if ((securelevel >= 3) && (mode & FWRITE)) {
671 #if defined(__osf__) && defined(_KERNEL)
674 getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
677 nat = NULL; /* XXX gcc -Wuninitialized */
678 if (cmd == (ioctlcmd_t)SIOCADNAT) {
679 KMALLOC(nt, ipnat_t *);
684 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
685 if (mode & NAT_SYSSPACE) {
686 bcopy(data, (char *)&natd, sizeof(natd));
689 error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
697 * For add/delete, look to see if the NAT entry is already present
699 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
701 if (nat->in_v == 0) /* For backward compat. */
703 nat->in_flags &= IPN_USERFLAGS;
704 if ((nat->in_redir & NAT_MAPBLK) == 0) {
705 if ((nat->in_flags & IPN_SPLIT) == 0)
706 nat->in_inip &= nat->in_inmsk;
707 if ((nat->in_flags & IPN_IPRANGE) == 0)
708 nat->in_outip &= nat->in_outmsk;
710 MUTEX_ENTER(&ipf_natio);
711 for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
712 if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
714 if (nat->in_redir == NAT_REDIRECT &&
715 nat->in_pnext != n->in_pnext)
728 if (!(mode & FWRITE))
731 tmp = ipflog_clear(IPL_LOGNAT);
732 error = BCOPYOUT((char *)&tmp, (char *)data,
741 if (!(mode & FWRITE))
744 error = BCOPYIN((char *)data, (char *)&nat_logging,
745 sizeof(nat_logging));
752 error = BCOPYOUT((char *)&nat_logging, (char *)data,
753 sizeof(nat_logging));
759 arg = iplused[IPL_LOGNAT];
760 error = BCOPYOUT(&arg, data, sizeof(arg));
766 if (!(mode & FWRITE)) {
768 } else if (n != NULL) {
770 } else if (nt == NULL) {
774 MUTEX_EXIT(&ipf_natio);
777 bcopy((char *)nat, (char *)nt, sizeof(*n));
778 error = nat_siocaddnat(nt, np, getlock);
779 MUTEX_EXIT(&ipf_natio);
785 if (!(mode & FWRITE)) {
788 } else if (n == NULL) {
793 MUTEX_EXIT(&ipf_natio);
796 nat_siocdelnat(n, np, getlock);
798 MUTEX_EXIT(&ipf_natio);
803 nat_stats.ns_table[0] = nat_table[0];
804 nat_stats.ns_table[1] = nat_table[1];
805 nat_stats.ns_list = nat_list;
806 nat_stats.ns_maptable = ipf_hm_maptable;
807 nat_stats.ns_maplist = ipf_hm_maplist;
808 nat_stats.ns_nattab_sz = ipf_nattable_sz;
809 nat_stats.ns_nattab_max = ipf_nattable_max;
810 nat_stats.ns_rultab_sz = ipf_natrules_sz;
811 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
812 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
813 nat_stats.ns_instances = nat_instances;
814 nat_stats.ns_apslist = ap_sess_list;
815 nat_stats.ns_ticks = fr_ticks;
816 error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
824 READ_ENTER(&ipf_nat);
826 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
828 if (nat_lookupredir(&nl) != NULL) {
829 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
835 RWLOCK_EXIT(&ipf_nat);
840 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */
841 if (!(mode & FWRITE)) {
846 WRITE_ENTER(&ipf_nat);
849 error = BCOPYIN(data, &arg, sizeof(arg));
854 ret = nat_flushtable();
856 ret = nat_clearlist();
858 ret = nat_extraflush(arg);
862 RWLOCK_EXIT(&ipf_nat);
865 error = BCOPYOUT(&ret, data, sizeof(ret));
870 error = appr_ioctl(data, cmd, mode, ctx);
874 if (!(mode & FWRITE)) {
877 error = fr_lock(data, &fr_nat_lock);
882 if ((mode & FWRITE) != 0) {
883 error = fr_natputent(data, getlock);
892 READ_ENTER(&ipf_nat);
894 error = fr_natgetsz(data);
896 RWLOCK_EXIT(&ipf_nat);
905 READ_ENTER(&ipf_nat);
907 error = fr_natgetent(data);
909 RWLOCK_EXIT(&ipf_nat);
921 error = fr_inobj(data, &iter, IPFOBJ_GENITER);
923 token = ipf_findtoken(iter.igi_type, uid, ctx);
925 error = nat_iterator(token, &iter);
927 RWLOCK_EXIT(&ipf_tokens);
934 error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
937 error = ipf_deltoken(arg, uid, ctx);
945 error = fr_outobj(data, nat_tqb, IPFOBJ_STATETQTAB);
949 error = nat_gettable(data);
963 /* ------------------------------------------------------------------------ */
964 /* Function: nat_siocaddnat */
965 /* Returns: int - 0 == success, != 0 == failure */
966 /* Parameters: n(I) - pointer to new NAT rule */
967 /* np(I) - pointer to where to insert new NAT rule */
968 /* getlock(I) - flag indicating if lock on ipf_nat is held */
969 /* Mutex Locks: ipf_natio */
971 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
972 /* from information passed to the kernel, then add it to the appropriate */
973 /* NAT rule table(s). */
974 /* ------------------------------------------------------------------------ */
975 static int nat_siocaddnat(n, np, getlock)
981 if (nat_resolverule(n) != 0)
984 if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
988 if (n->in_redir & NAT_MAPBLK)
989 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
990 else if (n->in_flags & IPN_AUTOPORTMAP)
991 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
992 else if (n->in_flags & IPN_IPRANGE)
993 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
994 else if (n->in_flags & IPN_SPLIT)
996 else if (n->in_outmsk != 0)
997 n->in_space = ~ntohl(n->in_outmsk);
1002 * Calculate the number of valid IP addresses in the output
1003 * mapping range. In all cases, the range is inclusive of
1004 * the start and ending IP addresses.
1005 * If to a CIDR address, lose 2: broadcast + network address
1007 * If to a range, add one.
1008 * If to a single IP address, set to 1.
1011 if ((n->in_flags & IPN_IPRANGE) != 0)
1018 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
1019 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
1020 n->in_nip = ntohl(n->in_outip) + 1;
1021 else if ((n->in_flags & IPN_SPLIT) &&
1022 (n->in_redir & NAT_REDIRECT))
1023 n->in_nip = ntohl(n->in_inip);
1025 n->in_nip = ntohl(n->in_outip);
1026 if (n->in_redir & NAT_MAP) {
1027 n->in_pnext = ntohs(n->in_pmin);
1029 * Multiply by the number of ports made available.
1031 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1032 n->in_space *= (ntohs(n->in_pmax) -
1033 ntohs(n->in_pmin) + 1);
1035 * Because two different sources can map to
1036 * different destinations but use the same
1038 * If the result is smaller than in_space, then
1039 * we may have wrapped around 32bits.
1042 if ((i != 0) && (i != 0xffffffff)) {
1043 j = n->in_space * (~ntohl(i) + 1);
1044 if (j >= n->in_space)
1047 n->in_space = 0xffffffff;
1051 * If no protocol is specified, multiple by 256 to allow for
1052 * at least one IP:IP mapping per protocol.
1054 if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1055 j = n->in_space * 256;
1056 if (j >= n->in_space)
1059 n->in_space = 0xffffffff;
1063 /* Otherwise, these fields are preset */
1066 WRITE_ENTER(&ipf_nat);
1071 if (n->in_age[0] != 0)
1072 n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
1074 if (n->in_age[1] != 0)
1075 n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
1077 if (n->in_redir & NAT_REDIRECT) {
1078 n->in_flags &= ~IPN_NOTDST;
1081 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1082 n->in_flags &= ~IPN_NOTSRC;
1085 MUTEX_INIT(&n->in_lock, "ipnat rule lock");
1088 nat_stats.ns_rules++;
1089 #if SOLARIS && !defined(_INET_IP_STACK_H)
1090 pfil_delayed_copy = 0;
1093 RWLOCK_EXIT(&ipf_nat); /* WRITE */
1100 /* ------------------------------------------------------------------------ */
1101 /* Function: nat_resolvrule */
1103 /* Parameters: n(I) - pointer to NAT rule */
1105 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
1106 /* from information passed to the kernel, then add it to the appropriate */
1107 /* NAT rule table(s). */
1108 /* ------------------------------------------------------------------------ */
1109 static int nat_resolverule(n)
1112 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1113 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
1115 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1116 if (n->in_ifnames[1][0] == '\0') {
1117 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1118 n->in_ifps[1] = n->in_ifps[0];
1120 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
1123 if (n->in_plabel[0] != '\0') {
1124 n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1125 if (n->in_apr == NULL)
1132 /* ------------------------------------------------------------------------ */
1133 /* Function: nat_siocdelnat */
1134 /* Returns: int - 0 == success, != 0 == failure */
1135 /* Parameters: n(I) - pointer to new NAT rule */
1136 /* np(I) - pointer to where to insert new NAT rule */
1137 /* getlock(I) - flag indicating if lock on ipf_nat is held */
1138 /* Mutex Locks: ipf_natio */
1140 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
1141 /* from information passed to the kernel, then add it to the appropriate */
1142 /* NAT rule table(s). */
1143 /* ------------------------------------------------------------------------ */
1144 static void nat_siocdelnat(n, np, getlock)
1149 WRITE_ENTER(&ipf_nat);
1151 if (n->in_redir & NAT_REDIRECT)
1153 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1155 if (nat_list == NULL) {
1160 if (n->in_tqehead[0] != NULL) {
1161 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1162 fr_freetimeoutqueue(n->in_tqehead[1]);
1166 if (n->in_tqehead[1] != NULL) {
1167 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1168 fr_freetimeoutqueue(n->in_tqehead[1]);
1174 if (n->in_use == 0) {
1176 appr_free(n->in_apr);
1177 MUTEX_DESTROY(&n->in_lock);
1179 nat_stats.ns_rules--;
1180 #if SOLARIS && !defined(_INET_IP_STACK_H)
1181 if (nat_stats.ns_rules == 0)
1182 pfil_delayed_copy = 1;
1185 n->in_flags |= IPN_DELETE;
1189 RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */
1194 /* ------------------------------------------------------------------------ */
1195 /* Function: fr_natgetsz */
1196 /* Returns: int - 0 == success, != 0 is the error value. */
1197 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1198 /* get the size of. */
1200 /* Handle SIOCSTGSZ. */
1201 /* Return the size of the nat list entry to be copied back to user space. */
1202 /* The size of the entry is stored in the ng_sz field and the enture natget */
1203 /* structure is copied back to the user. */
1204 /* ------------------------------------------------------------------------ */
1205 static int fr_natgetsz(data)
1212 if (BCOPYIN(data, &ng, sizeof(ng)) != 0)
1217 nat = nat_instances;
1220 * Empty list so the size returned is 0. Simple.
1223 if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1229 * Make sure the pointer we're copying from exists in the
1230 * current list of entries. Security precaution to prevent
1231 * copying of random kernel data.
1233 for (n = nat_instances; n; n = n->nat_next)
1241 * Incluse any space required for proxy data structures.
1243 ng.ng_sz = sizeof(nat_save_t);
1246 ng.ng_sz += sizeof(ap_session_t) - 4;
1247 if (aps->aps_data != 0)
1248 ng.ng_sz += aps->aps_psiz;
1251 if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1257 /* ------------------------------------------------------------------------ */
1258 /* Function: fr_natgetent */
1259 /* Returns: int - 0 == success, != 0 is the error value. */
1260 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1261 /* to NAT structure to copy out. */
1263 /* Handle SIOCSTGET. */
1264 /* Copies out NAT entry to user space. Any additional data held for a */
1265 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1266 /* ------------------------------------------------------------------------ */
1267 static int fr_natgetent(data)
1272 nat_save_t *ipn, ipns;
1275 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1279 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1282 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1286 ipn->ipn_dsize = ipns.ipn_dsize;
1287 nat = ipns.ipn_next;
1289 nat = nat_instances;
1291 if (nat_instances == NULL)
1297 * Make sure the pointer we're copying from exists in the
1298 * current list of entries. Security precaution to prevent
1299 * copying of random kernel data.
1301 for (n = nat_instances; n; n = n->nat_next)
1309 ipn->ipn_next = nat->nat_next;
1312 * Copy the NAT structure.
1314 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1317 * If we have a pointer to the NAT rule it belongs to, save that too.
1319 if (nat->nat_ptr != NULL)
1320 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1321 sizeof(ipn->ipn_ipnat));
1324 * If we also know the NAT entry has an associated filter rule,
1327 if (nat->nat_fr != NULL)
1328 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1329 sizeof(ipn->ipn_fr));
1332 * Last but not least, if there is an application proxy session set
1333 * up for this NAT entry, then copy that out too, including any
1334 * private data saved along side it by the proxy.
1337 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1341 if (outsize < sizeof(*aps)) {
1347 bcopy((char *)aps, s, sizeof(*aps));
1349 outsize -= sizeof(*aps);
1350 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1351 bcopy(aps->aps_data, s, aps->aps_psiz);
1356 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1361 KFREES(ipn, ipns.ipn_dsize);
1367 /* ------------------------------------------------------------------------ */
1368 /* Function: fr_natputent */
1369 /* Returns: int - 0 == success, != 0 is the error value. */
1370 /* Parameters: data(I) - pointer to natget structure with NAT */
1371 /* structure information to load into the kernel */
1372 /* getlock(I) - flag indicating whether or not a write lock */
1373 /* on ipf_nat is already held. */
1375 /* Handle SIOCSTPUT. */
1376 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1377 /* firewall rule data structures, if pointers to them indicate so. */
1378 /* ------------------------------------------------------------------------ */
1379 static int fr_natputent(data, getlock)
1383 nat_save_t ipn, *ipnn;
1391 error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1396 * Initialise early because of code at junkput label.
1405 * New entry, copy in the rest of the NAT entry if it's size is more
1406 * than just the nat_t structure.
1408 if (ipn.ipn_dsize > sizeof(ipn)) {
1409 if (ipn.ipn_dsize > 81920) {
1414 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1418 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1426 KMALLOC(nat, nat_t *);
1432 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1434 * Initialize all these so that nat_delete() doesn't cause a crash.
1436 bzero((char *)nat, offsetof(struct nat, nat_tqe));
1437 nat->nat_tqe.tqe_pnext = NULL;
1438 nat->nat_tqe.tqe_next = NULL;
1439 nat->nat_tqe.tqe_ifq = NULL;
1440 nat->nat_tqe.tqe_parent = nat;
1443 * Restore the rule associated with this nat session
1445 in = ipnn->ipn_nat.nat_ptr;
1447 KMALLOC(in, ipnat_t *);
1453 bzero((char *)in, offsetof(struct ipnat, in_next6));
1454 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1456 in->in_flags |= IPN_DELETE;
1458 ATOMIC_INC(nat_stats.ns_rules);
1460 if (nat_resolverule(in) != 0) {
1467 * Check that the NAT entry doesn't already exist in the kernel.
1469 * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry. To do
1470 * this, we check to see if the inbound combination of addresses and
1471 * ports is already known. Similar logic is applied for NAT_INBOUND.
1474 bzero((char *)&fin, sizeof(fin));
1475 fin.fin_p = nat->nat_p;
1476 if (nat->nat_dir == NAT_OUTBOUND) {
1477 fin.fin_ifp = nat->nat_ifps[0];
1478 fin.fin_data[0] = ntohs(nat->nat_oport);
1479 fin.fin_data[1] = ntohs(nat->nat_outport);
1481 READ_ENTER(&ipf_nat);
1483 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1484 nat->nat_oip, nat->nat_inip);
1486 RWLOCK_EXIT(&ipf_nat);
1492 } else if (nat->nat_dir == NAT_INBOUND) {
1493 fin.fin_ifp = nat->nat_ifps[0];
1494 fin.fin_data[0] = ntohs(nat->nat_outport);
1495 fin.fin_data[1] = ntohs(nat->nat_oport);
1497 READ_ENTER(&ipf_nat);
1499 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1500 nat->nat_outip, nat->nat_oip);
1502 RWLOCK_EXIT(&ipf_nat);
1514 * Restore ap_session_t structure. Include the private data allocated
1519 KMALLOC(aps, ap_session_t *);
1525 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1527 aps->aps_apr = in->in_apr;
1529 aps->aps_apr = NULL;
1530 if (aps->aps_psiz != 0) {
1531 if (aps->aps_psiz > 81920) {
1535 KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1536 if (aps->aps_data == NULL) {
1540 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1544 aps->aps_data = NULL;
1549 * If there was a filtering rule associated with this entry then
1550 * build up a new one.
1554 if ((nat->nat_flags & SI_NEWFR) != 0) {
1555 KMALLOC(fr, frentry_t *);
1561 ipnn->ipn_nat.nat_fr = fr;
1563 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1564 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1569 fr->fr_type = FR_T_NONE;
1571 MUTEX_NUKE(&fr->fr_lock);
1572 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1575 READ_ENTER(&ipf_nat);
1577 for (n = nat_instances; n; n = n->nat_next)
1578 if (n->nat_fr == fr)
1582 MUTEX_ENTER(&fr->fr_lock);
1584 MUTEX_EXIT(&fr->fr_lock);
1587 RWLOCK_EXIT(&ipf_nat);
1598 KFREES(ipnn, ipn.ipn_dsize);
1603 WRITE_ENTER(&ipf_nat);
1605 error = nat_insert(nat, nat->nat_rev);
1606 if ((error == 0) && (aps != NULL)) {
1607 aps->aps_next = ap_sess_list;
1611 RWLOCK_EXIT(&ipf_nat);
1621 (void) fr_derefrule(&fr);
1623 if ((ipnn != NULL) && (ipnn != &ipn)) {
1624 KFREES(ipnn, ipn.ipn_dsize);
1628 if (aps->aps_data != NULL) {
1629 KFREES(aps->aps_data, aps->aps_psiz);
1635 appr_free(in->in_apr);
1644 /* ------------------------------------------------------------------------ */
1645 /* Function: nat_delete */
1647 /* Parameters: natd(I) - pointer to NAT structure to delete */
1648 /* logtype(I) - type of LOG record to create before deleting */
1649 /* Write Lock: ipf_nat */
1651 /* Delete a nat entry from the various lists and table. If NAT logging is */
1652 /* enabled then generate a NAT log record for this event. */
1653 /* ------------------------------------------------------------------------ */
1654 void nat_delete(nat, logtype)
1661 if (logtype != 0 && nat_logging != 0)
1662 nat_log(nat, logtype);
1665 * Take it as a general indication that all the pointers are set if
1668 if (nat->nat_pnext != NULL) {
1671 nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1672 nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1674 *nat->nat_pnext = nat->nat_next;
1675 if (nat->nat_next != NULL) {
1676 nat->nat_next->nat_pnext = nat->nat_pnext;
1677 nat->nat_next = NULL;
1679 nat->nat_pnext = NULL;
1681 *nat->nat_phnext[0] = nat->nat_hnext[0];
1682 if (nat->nat_hnext[0] != NULL) {
1683 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1684 nat->nat_hnext[0] = NULL;
1686 nat->nat_phnext[0] = NULL;
1688 *nat->nat_phnext[1] = nat->nat_hnext[1];
1689 if (nat->nat_hnext[1] != NULL) {
1690 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1691 nat->nat_hnext[1] = NULL;
1693 nat->nat_phnext[1] = NULL;
1695 if ((nat->nat_flags & SI_WILDP) != 0)
1696 nat_stats.ns_wilds--;
1699 if (nat->nat_me != NULL) {
1700 *nat->nat_me = NULL;
1704 if (nat->nat_tqe.tqe_ifq != NULL)
1705 fr_deletequeueentry(&nat->nat_tqe);
1707 if (logtype == NL_EXPIRE)
1708 nat_stats.ns_expire++;
1710 MUTEX_ENTER(&nat->nat_lock);
1712 * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1713 * This happens when a nat'd packet is blocked and we want to throw
1714 * away the NAT session.
1716 if (logtype == NL_DESTROY) {
1717 if (nat->nat_ref > 2) {
1719 MUTEX_EXIT(&nat->nat_lock);
1721 nat_stats.ns_orphans++;
1724 } else if (nat->nat_ref > 1) {
1726 MUTEX_EXIT(&nat->nat_lock);
1728 nat_stats.ns_orphans++;
1731 MUTEX_EXIT(&nat->nat_lock);
1734 * At this point, nat_ref is 1, doing "--" would make it 0..
1738 nat_stats.ns_orphans--;
1740 #ifdef IPFILTER_SYNC
1742 ipfsync_del(nat->nat_sync);
1745 if (nat->nat_fr != NULL)
1746 (void) fr_derefrule(&nat->nat_fr);
1748 if (nat->nat_hm != NULL)
1749 fr_hostmapdel(&nat->nat_hm);
1752 * If there is an active reference from the nat entry to its parent
1753 * rule, decrement the rule's reference count and free it too if no
1754 * longer being used.
1758 fr_ipnatderef(&ipn);
1761 MUTEX_DESTROY(&nat->nat_lock);
1763 aps_free(nat->nat_aps);
1764 nat_stats.ns_inuse--;
1767 * If there's a fragment table entry too for this nat entry, then
1768 * dereference that as well. This is after nat_lock is released
1771 fr_forgetnat((void *)nat);
1777 /* ------------------------------------------------------------------------ */
1778 /* Function: nat_flushtable */
1779 /* Returns: int - number of NAT rules deleted */
1780 /* Parameters: Nil */
1782 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */
1783 /* log record should be emitted in nat_delete() if NAT logging is enabled. */
1784 /* ------------------------------------------------------------------------ */
1786 * nat_flushtable - clear the NAT table of all mapping entries.
1788 static int nat_flushtable()
1794 * ALL NAT mappings deleted, so lets just make the deletions
1797 if (nat_table[0] != NULL)
1798 bzero((char *)nat_table[0],
1799 sizeof(nat_table[0]) * ipf_nattable_sz);
1800 if (nat_table[1] != NULL)
1801 bzero((char *)nat_table[1],
1802 sizeof(nat_table[1]) * ipf_nattable_sz);
1804 while ((nat = nat_instances) != NULL) {
1805 nat_delete(nat, NL_FLUSH);
1809 nat_stats.ns_inuse = 0;
1814 /* ------------------------------------------------------------------------ */
1815 /* Function: nat_clearlist */
1816 /* Returns: int - number of NAT/RDR rules deleted */
1817 /* Parameters: Nil */
1819 /* Delete all rules in the current list of rules. There is nothing elegant */
1820 /* about this cleanup: simply free all entries on the list of rules and */
1821 /* clear out the tables used for hashed NAT rule lookups. */
1822 /* ------------------------------------------------------------------------ */
1823 static int nat_clearlist()
1825 ipnat_t *n, **np = &nat_list;
1828 if (nat_rules != NULL)
1829 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1830 if (rdr_rules != NULL)
1831 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1833 while ((n = *np) != NULL) {
1835 if (n->in_use == 0) {
1836 if (n->in_apr != NULL)
1837 appr_free(n->in_apr);
1838 MUTEX_DESTROY(&n->in_lock);
1840 nat_stats.ns_rules--;
1842 n->in_flags |= IPN_DELETE;
1847 #if SOLARIS && !defined(_INET_IP_STACK_H)
1848 pfil_delayed_copy = 1;
1856 /* ------------------------------------------------------------------------ */
1857 /* Function: nat_newmap */
1858 /* Returns: int - -1 == error, 0 == success */
1859 /* Parameters: fin(I) - pointer to packet information */
1860 /* nat(I) - pointer to NAT entry */
1861 /* ni(I) - pointer to structure with misc. information needed */
1862 /* to create new NAT entry. */
1864 /* Given an empty NAT structure, populate it with new information about a */
1865 /* new NAT session, as defined by the matching NAT rule. */
1866 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1867 /* to the new IP address for the translation. */
1868 /* ------------------------------------------------------------------------ */
1869 static INLINE int nat_newmap(fin, nat, ni)
1874 u_short st_port, dport, sport, port, sp, dp;
1875 struct in_addr in, inb;
1884 * If it's an outbound packet which doesn't match any existing
1885 * record, then create a new port
1891 st_port = np->in_pnext;
1892 flags = ni->nai_flags;
1893 sport = ni->nai_sport;
1894 dport = ni->nai_dport;
1897 * Do a loop until we either run out of entries to try or we find
1898 * a NAT mapping that isn't currently being used. This is done
1899 * because the change to the source is not (usually) being fixed.
1903 in.s_addr = htonl(np->in_nip);
1906 * Check to see if there is an existing NAT
1907 * setup for this IP address pair.
1909 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1912 in.s_addr = hm->hm_mapip.s_addr;
1913 } else if ((l == 1) && (hm != NULL)) {
1916 in.s_addr = ntohl(in.s_addr);
1920 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1925 if (np->in_redir == NAT_BIMAP &&
1926 np->in_inmsk == np->in_outmsk) {
1928 * map the address block in a 1:1 fashion
1930 in.s_addr = np->in_outip;
1931 in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1932 in.s_addr = ntohl(in.s_addr);
1934 } else if (np->in_redir & NAT_MAPBLK) {
1935 if ((l >= np->in_ppip) || ((l > 0) &&
1936 !(flags & IPN_TCPUDP)))
1939 * map-block - Calculate destination address.
1941 in.s_addr = ntohl(fin->fin_saddr);
1942 in.s_addr &= ntohl(~np->in_inmsk);
1943 inb.s_addr = in.s_addr;
1944 in.s_addr /= np->in_ippip;
1945 in.s_addr &= ntohl(~np->in_outmsk);
1946 in.s_addr += ntohl(np->in_outip);
1948 * Calculate destination port.
1950 if ((flags & IPN_TCPUDP) &&
1951 (np->in_ppip != 0)) {
1952 port = ntohs(sport) + l;
1953 port %= np->in_ppip;
1954 port += np->in_ppip *
1955 (inb.s_addr % np->in_ippip);
1956 port += MAPBLK_MINPORT;
1960 } else if ((np->in_outip == 0) &&
1961 (np->in_outmsk == 0xffffffff)) {
1963 * 0/32 - use the interface's IP address.
1966 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1969 in.s_addr = ntohl(in.s_addr);
1971 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1973 * 0/0 - use the original source address/port.
1977 in.s_addr = ntohl(fin->fin_saddr);
1979 } else if ((np->in_outmsk != 0xffffffff) &&
1980 (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
1985 if ((flags & IPN_TCPUDP) &&
1986 ((np->in_redir & NAT_MAPBLK) == 0) &&
1987 (np->in_flags & IPN_AUTOPORTMAP)) {
1989 * "ports auto" (without map-block)
1991 if ((l > 0) && (l % np->in_ppip == 0)) {
1992 if (l > np->in_space) {
1994 } else if ((l > np->in_ppip) &&
1995 np->in_outmsk != 0xffffffff)
1998 if (np->in_ppip != 0) {
1999 port = ntohs(sport);
2000 port += (l % np->in_ppip);
2001 port %= np->in_ppip;
2002 port += np->in_ppip *
2003 (ntohl(fin->fin_saddr) %
2005 port += MAPBLK_MINPORT;
2009 } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2010 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2012 * Standard port translation. Select next port.
2014 port = htons(np->in_pnext++);
2016 if (np->in_pnext > ntohs(np->in_pmax)) {
2017 np->in_pnext = ntohs(np->in_pmin);
2018 if (np->in_outmsk != 0xffffffff)
2023 if (np->in_flags & IPN_IPRANGE) {
2024 if (np->in_nip > ntohl(np->in_outmsk))
2025 np->in_nip = ntohl(np->in_outip);
2027 if ((np->in_outmsk != 0xffffffff) &&
2028 ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2029 ntohl(np->in_outip))
2030 np->in_nip = ntohl(np->in_outip) + 1;
2033 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2037 * Here we do a lookup of the connection as seen from
2038 * the outside. If an IP# pair already exists, try
2039 * again. So if you have A->B becomes C->B, you can
2040 * also have D->E become C->E but not D->B causing
2041 * another C->B. Also take protocol and ports into
2042 * account when determining whether a pre-existing
2043 * NAT setup will cause an external conflict where
2044 * this is appropriate.
2046 inb.s_addr = htonl(in.s_addr);
2047 sp = fin->fin_data[0];
2048 dp = fin->fin_data[1];
2049 fin->fin_data[0] = fin->fin_data[1];
2050 fin->fin_data[1] = htons(port);
2051 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2052 (u_int)fin->fin_p, fin->fin_dst, inb);
2053 fin->fin_data[0] = sp;
2054 fin->fin_data[1] = dp;
2057 * Has the search wrapped around and come back to the
2060 if ((natl != NULL) &&
2061 (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2062 (np->in_nip != 0) && (st_ip == np->in_nip))
2065 } while (natl != NULL);
2067 if (np->in_space > 0)
2070 /* Setup the NAT table */
2071 nat->nat_inip = fin->fin_src;
2072 nat->nat_outip.s_addr = htonl(in.s_addr);
2073 nat->nat_oip = fin->fin_dst;
2074 if (nat->nat_hm == NULL)
2075 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2079 * The ICMP checksum does not have a pseudo header containing
2082 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2083 ni->nai_sum2 = LONG_SUM(in.s_addr);
2084 if ((flags & IPN_TCPUDP)) {
2085 ni->nai_sum1 += ntohs(sport);
2086 ni->nai_sum2 += ntohs(port);
2089 if (flags & IPN_TCPUDP) {
2090 nat->nat_inport = sport;
2091 nat->nat_outport = port; /* sport */
2092 nat->nat_oport = dport;
2093 ((tcphdr_t *)fin->fin_dp)->th_sport = port;
2094 } else if (flags & IPN_ICMPQUERY) {
2095 ((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2096 nat->nat_inport = port;
2097 nat->nat_outport = port;
2098 } else if (fin->fin_p == IPPROTO_GRE) {
2100 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2101 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2102 nat->nat_oport = 0;/*fin->fin_data[1];*/
2103 nat->nat_inport = 0;/*fin->fin_data[0];*/
2104 nat->nat_outport = 0;/*fin->fin_data[0];*/
2105 nat->nat_call[0] = fin->fin_data[0];
2106 nat->nat_call[1] = fin->fin_data[0];
2110 ni->nai_ip.s_addr = in.s_addr;
2111 ni->nai_port = port;
2112 ni->nai_nport = dport;
2117 /* ------------------------------------------------------------------------ */
2118 /* Function: nat_newrdr */
2119 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */
2120 /* allow rule to be moved if IPN_ROUNDR is set. */
2121 /* Parameters: fin(I) - pointer to packet information */
2122 /* nat(I) - pointer to NAT entry */
2123 /* ni(I) - pointer to structure with misc. information needed */
2124 /* to create new NAT entry. */
2126 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2127 /* to the new IP address for the translation. */
2128 /* ------------------------------------------------------------------------ */
2129 static INLINE int nat_newrdr(fin, nat, ni)
2134 u_short nport, dport, sport;
2135 struct in_addr in, inb;
2147 flags = ni->nai_flags;
2148 sport = ni->nai_sport;
2149 dport = ni->nai_dport;
2152 * If the matching rule has IPN_STICKY set, then we want to have the
2153 * same rule kick in as before. Why would this happen? If you have
2154 * a collection of rdr rules with "round-robin sticky", the current
2155 * packet might match a different one to the previous connection but
2156 * we want the same destination to be used.
2158 if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
2159 ((np->in_flags & IPN_STICKY) != 0)) {
2160 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2163 in.s_addr = ntohl(hm->hm_mapip.s_addr);
2171 * Otherwise, it's an inbound packet. Most likely, we don't
2172 * want to rewrite source ports and source addresses. Instead,
2173 * we want to rewrite to a fixed internal address and fixed
2176 if (np->in_flags & IPN_SPLIT) {
2177 in.s_addr = np->in_nip;
2179 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2180 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst,
2183 in.s_addr = hm->hm_mapip.s_addr;
2188 if (hm == NULL || hm->hm_ref == 1) {
2189 if (np->in_inip == htonl(in.s_addr)) {
2190 np->in_nip = ntohl(np->in_inmsk);
2193 np->in_nip = ntohl(np->in_inip);
2197 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2199 * 0/32 - use the interface's IP address.
2201 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2203 in.s_addr = ntohl(in.s_addr);
2205 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2207 * 0/0 - use the original destination address/port.
2209 in.s_addr = ntohl(fin->fin_daddr);
2211 } else if (np->in_redir == NAT_BIMAP &&
2212 np->in_inmsk == np->in_outmsk) {
2214 * map the address block in a 1:1 fashion
2216 in.s_addr = np->in_inip;
2217 in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2218 in.s_addr = ntohl(in.s_addr);
2220 in.s_addr = ntohl(np->in_inip);
2223 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2227 * Whilst not optimized for the case where
2228 * pmin == pmax, the gain is not significant.
2230 if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2231 (np->in_pmin != np->in_pmax)) {
2232 nport = ntohs(dport) - ntohs(np->in_pmin) +
2233 ntohs(np->in_pnext);
2234 nport = htons(nport);
2236 nport = np->in_pnext;
2240 * When the redirect-to address is set to 0.0.0.0, just
2241 * assume a blank `forwarding' of the packet. We don't
2242 * setup any translation for this either.
2244 if (in.s_addr == 0) {
2247 in.s_addr = ntohl(fin->fin_daddr);
2251 * Check to see if this redirect mapping already exists and if
2252 * it does, return "failure" (allowing it to be created will just
2253 * cause one or both of these "connections" to stop working.)
2255 inb.s_addr = htonl(in.s_addr);
2256 sp = fin->fin_data[0];
2257 dp = fin->fin_data[1];
2258 fin->fin_data[1] = fin->fin_data[0];
2259 fin->fin_data[0] = ntohs(nport);
2260 natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2261 (u_int)fin->fin_p, inb, fin->fin_src);
2262 fin->fin_data[0] = sp;
2263 fin->fin_data[1] = dp;
2267 nat->nat_inip.s_addr = htonl(in.s_addr);
2268 nat->nat_outip = fin->fin_dst;
2269 nat->nat_oip = fin->fin_src;
2270 if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
2271 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, in,
2274 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2275 ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2277 ni->nai_ip.s_addr = in.s_addr;
2278 ni->nai_nport = nport;
2279 ni->nai_port = sport;
2281 if (flags & IPN_TCPUDP) {
2282 nat->nat_inport = nport;
2283 nat->nat_outport = dport;
2284 nat->nat_oport = sport;
2285 ((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2286 } else if (flags & IPN_ICMPQUERY) {
2287 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2288 nat->nat_inport = nport;
2289 nat->nat_outport = nport;
2290 } else if (fin->fin_p == IPPROTO_GRE) {
2292 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2293 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2294 nat->nat_call[0] = fin->fin_data[0];
2295 nat->nat_call[1] = fin->fin_data[1];
2296 nat->nat_oport = 0; /*fin->fin_data[0];*/
2297 nat->nat_inport = 0; /*fin->fin_data[1];*/
2298 nat->nat_outport = 0; /*fin->fin_data[1];*/
2306 /* ------------------------------------------------------------------------ */
2307 /* Function: nat_new */
2308 /* Returns: nat_t* - NULL == failure to create new NAT structure, */
2309 /* else pointer to new NAT structure */
2310 /* Parameters: fin(I) - pointer to packet information */
2311 /* np(I) - pointer to NAT rule */
2312 /* natsave(I) - pointer to where to store NAT struct pointer */
2313 /* flags(I) - flags describing the current packet */
2314 /* direction(I) - direction of packet (in/out) */
2315 /* Write Lock: ipf_nat */
2317 /* Attempts to create a new NAT entry. Does not actually change the packet */
2320 /* This fucntion is in three main parts: (1) deal with creating a new NAT */
2321 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */
2322 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2323 /* and (3) building that structure and putting it into the NAT table(s). */
2325 /* NOTE: natsave should NOT be used top point back to an ipstate_t struct */
2326 /* as it can result in memory being corrupted. */
2327 /* ------------------------------------------------------------------------ */
2328 nat_t *nat_new(fin, np, natsave, flags, direction)
2335 u_short port = 0, sport = 0, dport = 0, nport = 0;
2336 tcphdr_t *tcp = NULL;
2337 hostmap_t *hm = NULL;
2344 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2345 qpktinfo_t *qpi = fin->fin_qpi;
2348 if (nat_stats.ns_inuse >= ipf_nattable_max) {
2349 nat_stats.ns_memfail++;
2355 nflags = np->in_flags & flags;
2356 nflags &= NAT_FROMRULE;
2359 ni.nai_nflags = nflags;
2360 ni.nai_flags = flags;
2364 /* Give me a new nat */
2365 KMALLOC(nat, nat_t *);
2367 nat_stats.ns_memfail++;
2369 * Try to automatically tune the max # of entries in the
2370 * table allowed to be less than what will cause kmem_alloc()
2371 * to fail and try to eliminate panics due to out of memory
2372 * conditions arising.
2374 if (ipf_nattable_max > ipf_nattable_sz) {
2375 ipf_nattable_max = nat_stats.ns_inuse - 100;
2376 printf("ipf_nattable_max reduced to %d\n",
2382 if (flags & IPN_TCPUDP) {
2384 ni.nai_sport = htons(fin->fin_sport);
2385 ni.nai_dport = htons(fin->fin_dport);
2386 } else if (flags & IPN_ICMPQUERY) {
2388 * In the ICMP query NAT code, we translate the ICMP id fields
2389 * to make them unique. This is indepedent of the ICMP type
2390 * (e.g. in the unlikely event that a host sends an echo and
2391 * an tstamp request with the same id, both packets will have
2392 * their ip address/id field changed in the same way).
2394 /* The icmp_id field is used by the sender to identify the
2395 * process making the icmp request. (the receiver justs
2396 * copies it back in its response). So, it closely matches
2397 * the concept of source port. We overlay sport, so we can
2398 * maximally reuse the existing code.
2400 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2401 ni.nai_dport = ni.nai_sport;
2404 bzero((char *)nat, sizeof(*nat));
2405 nat->nat_flags = flags;
2406 nat->nat_redir = np->in_redir;
2408 if ((flags & NAT_SLAVE) == 0) {
2409 MUTEX_ENTER(&ipf_nat_new);
2413 * Search the current table for a match.
2415 if (direction == NAT_OUTBOUND) {
2417 * We can now arrange to call this for the same connection
2418 * because ipf_nat_new doesn't protect the code path into
2421 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2422 fin->fin_src, fin->fin_dst);
2429 move = nat_newmap(fin, nat, &ni);
2437 * NAT_INBOUND is used only for redirects rules
2439 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2440 fin->fin_src, fin->fin_dst);
2447 move = nat_newrdr(fin, nat, &ni);
2455 nport = ni.nai_nport;
2457 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2458 if (np->in_redir == NAT_REDIRECT) {
2461 } else if (np->in_redir == NAT_MAP) {
2467 if (flags & IPN_TCPUDP) {
2468 sport = ni.nai_sport;
2469 dport = ni.nai_dport;
2470 } else if (flags & IPN_ICMPQUERY) {
2471 sport = ni.nai_sport;
2475 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2476 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2477 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2478 if ((flags & IPN_TCP) && dohwcksum &&
2479 (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2480 if (direction == NAT_OUTBOUND)
2481 ni.nai_sum1 = LONG_SUM(in.s_addr);
2483 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2484 ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2486 ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2487 nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2490 nat->nat_sumd[1] = nat->nat_sumd[0];
2492 if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2493 if (direction == NAT_OUTBOUND)
2494 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2496 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2498 ni.nai_sum2 = LONG_SUM(in.s_addr);
2500 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2501 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2503 nat->nat_ipsumd = nat->nat_sumd[0];
2504 if (!(flags & IPN_TCPUDPICMP)) {
2505 nat->nat_sumd[0] = 0;
2506 nat->nat_sumd[1] = 0;
2510 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2514 if (flags & SI_WILDP)
2515 nat_stats.ns_wilds++;
2516 fin->fin_flx |= FI_NEWNAT;
2519 nat_stats.ns_badnat++;
2520 if ((hm = nat->nat_hm) != NULL)
2525 if ((flags & NAT_SLAVE) == 0) {
2526 MUTEX_EXIT(&ipf_nat_new);
2532 /* ------------------------------------------------------------------------ */
2533 /* Function: nat_finalise */
2534 /* Returns: int - 0 == sucess, -1 == failure */
2535 /* Parameters: fin(I) - pointer to packet information */
2536 /* nat(I) - pointer to NAT entry */
2537 /* ni(I) - pointer to structure with misc. information needed */
2538 /* to create new NAT entry. */
2539 /* Write Lock: ipf_nat */
2541 /* This is the tail end of constructing a new NAT entry and is the same */
2542 /* for both IPv4 and IPv6. */
2543 /* ------------------------------------------------------------------------ */
2545 static int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2558 if (np->in_ifps[0] != NULL) {
2559 COPYIFNAME(4, np->in_ifps[0], nat->nat_ifnames[0]);
2561 if (np->in_ifps[1] != NULL) {
2562 COPYIFNAME(4, np->in_ifps[1], nat->nat_ifnames[1]);
2564 #ifdef IPFILTER_SYNC
2565 if ((nat->nat_flags & SI_CLONE) == 0)
2566 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2569 nat->nat_me = natsave;
2570 nat->nat_dir = direction;
2571 nat->nat_ifps[0] = np->in_ifps[0];
2572 nat->nat_ifps[1] = np->in_ifps[1];
2574 nat->nat_p = fin->fin_p;
2575 nat->nat_mssclamp = np->in_mssclamp;
2576 if (nat->nat_p == IPPROTO_TCP)
2577 nat->nat_seqnext[0] = ntohl(tcp->th_seq);
2579 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2580 if (appr_new(fin, nat) == -1)
2583 if (nat_insert(nat, fin->fin_rev) == 0) {
2585 nat_log(nat, (u_int)np->in_redir);
2590 MUTEX_ENTER(&fr->fr_lock);
2592 MUTEX_EXIT(&fr->fr_lock);
2598 * nat_insert failed, so cleanup time...
2604 /* ------------------------------------------------------------------------ */
2605 /* Function: nat_insert */
2606 /* Returns: int - 0 == sucess, -1 == failure */
2607 /* Parameters: nat(I) - pointer to NAT structure */
2608 /* rev(I) - flag indicating forward/reverse direction of packet */
2609 /* Write Lock: ipf_nat */
2611 /* Insert a NAT entry into the hash tables for searching and add it to the */
2612 /* list of active NAT entries. Adjust global counters when complete. */
2613 /* ------------------------------------------------------------------------ */
2614 int nat_insert(nat, rev)
2622 * Try and return an error as early as possible, so calculate the hash
2623 * entry numbers first and then proceed.
2625 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2626 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2628 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2630 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2632 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2635 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2636 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2637 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2638 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2641 if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2642 nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2646 nat->nat_hv[0] = hv1;
2647 nat->nat_hv[1] = hv2;
2649 MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2653 nat->nat_bytes[0] = 0;
2654 nat->nat_pkts[0] = 0;
2655 nat->nat_bytes[1] = 0;
2656 nat->nat_pkts[1] = 0;
2658 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2659 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2661 if (nat->nat_ifnames[1][0] != '\0') {
2662 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2663 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2665 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2667 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2668 nat->nat_ifps[1] = nat->nat_ifps[0];
2671 nat->nat_next = nat_instances;
2672 nat->nat_pnext = &nat_instances;
2674 nat_instances->nat_pnext = &nat->nat_next;
2675 nat_instances = nat;
2677 natp = &nat_table[0][hv1];
2679 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2680 nat->nat_phnext[0] = natp;
2681 nat->nat_hnext[0] = *natp;
2683 nat_stats.ns_bucketlen[0][hv1]++;
2685 natp = &nat_table[1][hv2];
2687 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2688 nat->nat_phnext[1] = natp;
2689 nat->nat_hnext[1] = *natp;
2691 nat_stats.ns_bucketlen[1][hv2]++;
2693 fr_setnatqueue(nat, rev);
2695 nat_stats.ns_added++;
2696 nat_stats.ns_inuse++;
2701 /* ------------------------------------------------------------------------ */
2702 /* Function: nat_icmperrorlookup */
2703 /* Returns: nat_t* - point to matching NAT structure */
2704 /* Parameters: fin(I) - pointer to packet information */
2705 /* dir(I) - direction of packet (in/out) */
2707 /* Check if the ICMP error message is related to an existing TCP, UDP or */
2708 /* ICMP query nat entry. It is assumed that the packet is already of the */
2709 /* the required length. */
2710 /* ------------------------------------------------------------------------ */
2711 nat_t *nat_icmperrorlookup(fin, dir)
2715 int flags = 0, type, minlen;
2716 icmphdr_t *icmp, *orgicmp;
2717 tcphdr_t *tcp = NULL;
2724 type = icmp->icmp_type;
2726 * Does it at least have the return (basic) IP header ?
2727 * Only a basic IP header (no options) should be with an ICMP error
2728 * header. Also, if it's not an error type, then return.
2730 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2736 oip = (ip_t *)((char *)fin->fin_dp + 8);
2737 minlen = IP_HL(oip) << 2;
2738 if ((minlen < sizeof(ip_t)) ||
2739 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2742 * Is the buffer big enough for all of it ? It's the size of the IP
2743 * header claimed in the encapsulated part which is of concern. It
2744 * may be too big to be in this buffer but not so big that it's
2745 * outside the ICMP packet, leading to TCP deref's causing problems.
2746 * This is possible because we don't know how big oip_hl is when we
2747 * do the pullup early in fr_check() and thus can't gaurantee it is
2755 # if defined(MENTAT)
2756 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2759 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2760 (char *)fin->fin_ip + M_LEN(m))
2766 if (fin->fin_daddr != oip->ip_src.s_addr)
2770 if (p == IPPROTO_TCP)
2772 else if (p == IPPROTO_UDP)
2774 else if (p == IPPROTO_ICMP) {
2775 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2777 /* see if this is related to an ICMP query */
2778 if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2779 data[0] = fin->fin_data[0];
2780 data[1] = fin->fin_data[1];
2781 fin->fin_data[0] = 0;
2782 fin->fin_data[1] = orgicmp->icmp_id;
2784 flags = IPN_ICMPERR|IPN_ICMPQUERY;
2786 * NOTE : dir refers to the direction of the original
2787 * ip packet. By definition the icmp error
2788 * message flows in the opposite direction.
2790 if (dir == NAT_INBOUND)
2791 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2794 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2796 fin->fin_data[0] = data[0];
2797 fin->fin_data[1] = data[1];
2802 if (flags & IPN_TCPUDP) {
2803 minlen += 8; /* + 64bits of data to get ports */
2804 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2807 data[0] = fin->fin_data[0];
2808 data[1] = fin->fin_data[1];
2809 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2810 fin->fin_data[0] = ntohs(tcp->th_dport);
2811 fin->fin_data[1] = ntohs(tcp->th_sport);
2813 if (dir == NAT_INBOUND) {
2814 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2817 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2820 fin->fin_data[0] = data[0];
2821 fin->fin_data[1] = data[1];
2824 if (dir == NAT_INBOUND)
2825 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2827 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2831 /* ------------------------------------------------------------------------ */
2832 /* Function: nat_icmperror */
2833 /* Returns: nat_t* - point to matching NAT structure */
2834 /* Parameters: fin(I) - pointer to packet information */
2835 /* nflags(I) - NAT flags for this packet */
2836 /* dir(I) - direction of packet (in/out) */
2838 /* Fix up an ICMP packet which is an error message for an existing NAT */
2839 /* session. This will correct both packet header data and checksums. */
2841 /* This should *ONLY* be used for incoming ICMP error packets to make sure */
2842 /* a NAT'd ICMP packet gets correctly recognised. */
2843 /* ------------------------------------------------------------------------ */
2844 nat_t *nat_icmperror(fin, nflags, dir)
2849 u_32_t sum1, sum2, sumd, sumd2;
2850 struct in_addr a1, a2;
2851 int flags, dlen, odst;
2859 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2862 * nat_icmperrorlookup() will return NULL for `defective' packets.
2864 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2871 *nflags = IPN_ICMPERR;
2873 oip = (ip_t *)&icmp->icmp_ip;
2874 dp = (((char *)oip) + (IP_HL(oip) << 2));
2875 if (oip->ip_p == IPPROTO_TCP) {
2876 tcp = (tcphdr_t *)dp;
2877 csump = (u_short *)&tcp->th_sum;
2879 } else if (oip->ip_p == IPPROTO_UDP) {
2882 udp = (udphdr_t *)dp;
2883 tcp = (tcphdr_t *)dp;
2884 csump = (u_short *)&udp->uh_sum;
2886 } else if (oip->ip_p == IPPROTO_ICMP)
2887 flags = IPN_ICMPQUERY;
2888 dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2891 * Need to adjust ICMP header to include the real IP#'s and
2892 * port #'s. Only apply a checksum change relative to the
2893 * IP address change as it will be modified again in fr_checknatout
2894 * for both address and port. Two checksum changes are
2895 * necessary for the two header address changes. Be careful
2896 * to only modify the checksum once for the port # and twice
2902 * Fix the IP addresses in the offending IP packet. You also need
2903 * to adjust the IP header checksum of that offending IP packet.
2905 * Normally, you would expect that the ICMP checksum of the
2906 * ICMP error message needs to be adjusted as well for the
2907 * IP address change in oip.
2908 * However, this is a NOP, because the ICMP checksum is
2909 * calculated over the complete ICMP packet, which includes the
2910 * changed oip IP addresses and oip->ip_sum. However, these
2911 * two changes cancel each other out (if the delta for
2912 * the IP address is x, then the delta for ip_sum is minus x),
2913 * so no change in the icmp_cksum is necessary.
2917 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2918 * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2919 * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2921 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2922 * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2923 * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2927 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2928 * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2929 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2931 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2932 * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2933 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2936 odst = (oip->ip_dst.s_addr == nat->nat_oip.s_addr) ? 1 : 0;
2938 a1.s_addr = ntohl(nat->nat_inip.s_addr);
2939 a2.s_addr = ntohl(oip->ip_src.s_addr);
2940 oip->ip_src.s_addr = htonl(a1.s_addr);
2942 a1.s_addr = ntohl(nat->nat_outip.s_addr);
2943 a2.s_addr = ntohl(oip->ip_dst.s_addr);
2944 oip->ip_dst.s_addr = htonl(a1.s_addr);
2947 sumd = a2.s_addr - a1.s_addr;
2949 if (a1.s_addr > a2.s_addr)
2953 fix_datacksum(&oip->ip_sum, sumd);
2961 * Fix UDP pseudo header checksum to compensate for the
2962 * IP address change.
2964 if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
2967 * For offending TCP/UDP IP packets, translate the ports as
2968 * well, based on the NAT specification. Of course such
2969 * a change may be reflected in the ICMP checksum as well.
2971 * Since the port fields are part of the TCP/UDP checksum
2972 * of the offending IP packet, you need to adjust that checksum
2973 * as well... except that the change in the port numbers should
2974 * be offset by the checksum change. However, the TCP/UDP
2975 * checksum will also need to change if there has been an
2976 * IP address change.
2979 sum1 = ntohs(nat->nat_inport);
2980 sum2 = ntohs(tcp->th_sport);
2982 tcp->th_sport = htons(sum1);
2984 sum1 = ntohs(nat->nat_outport);
2985 sum2 = ntohs(tcp->th_dport);
2987 tcp->th_dport = htons(sum1);
2990 sumd += sum1 - sum2;
2991 if (sumd != 0 || sumd2 != 0) {
2993 * At this point, sumd is the delta to apply to the
2994 * TCP/UDP header, given the changes in both the IP
2995 * address and the ports and sumd2 is the delta to
2996 * apply to the ICMP header, given the IP address
2997 * change delta that may need to be applied to the
2998 * TCP/UDP checksum instead.
3000 * If we will both the IP and TCP/UDP checksums
3001 * then the ICMP checksum changes by the address
3002 * delta applied to the TCP/UDP checksum. If we
3003 * do not change the TCP/UDP checksum them we
3004 * apply the delta in ports to the ICMP checksum.
3006 if (oip->ip_p == IPPROTO_UDP) {
3007 if ((dlen >= 8) && (*csump != 0)) {
3008 fix_datacksum(csump, sumd);
3010 sumd2 = sum1 - sum2;
3014 } else if (oip->ip_p == IPPROTO_TCP) {
3016 fix_datacksum(csump, sumd);
3018 sumd2 = sum2 - sum1;
3028 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3029 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3030 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3032 if ((odst == 0) && (dir == NAT_OUTBOUND) &&
3033 (fin->fin_rev == 0) && (np != NULL) &&
3034 (np->in_redir & NAT_REDIRECT)) {
3035 fix_outcksum(fin, &icmp->icmp_cksum,
3038 fix_incksum(fin, &icmp->icmp_cksum,
3043 } else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
3047 * XXX - what if this is bogus hl and we go off the end ?
3048 * In this case, nat_icmperrorlookup() will have returned NULL.
3050 orgicmp = (icmphdr_t *)dp;
3053 if (orgicmp->icmp_id != nat->nat_inport) {
3056 * Fix ICMP checksum (of the offening ICMP
3057 * query packet) to compensate the change
3058 * in the ICMP id of the offending ICMP
3061 * Since you modify orgicmp->icmp_id with
3062 * a delta (say x) and you compensate that
3063 * in origicmp->icmp_cksum with a delta
3064 * minus x, you don't have to adjust the
3065 * overall icmp->icmp_cksum
3067 sum1 = ntohs(orgicmp->icmp_id);
3068 sum2 = ntohs(nat->nat_inport);
3069 CALC_SUMD(sum1, sum2, sumd);
3070 orgicmp->icmp_id = nat->nat_inport;
3071 fix_datacksum(&orgicmp->icmp_cksum, sumd);
3073 } /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3080 * NB: these lookups don't lock access to the list, it assumed that it has
3081 * already been done!
3084 /* ------------------------------------------------------------------------ */
3085 /* Function: nat_inlookup */
3086 /* Returns: nat_t* - NULL == no match, */
3087 /* else pointer to matching NAT entry */
3088 /* Parameters: fin(I) - pointer to packet information */
3089 /* flags(I) - NAT flags for this packet */
3090 /* p(I) - protocol for this packet */
3091 /* src(I) - source IP address */
3092 /* mapdst(I) - destination IP address */
3094 /* Lookup a nat entry based on the mapped destination ip address/port and */
3095 /* real source address/port. We use this lookup when receiving a packet, */
3096 /* we're looking for a table entry, based on the destination address. */
3098 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3100 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3101 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3103 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3104 /* the packet is of said protocol */
3105 /* ------------------------------------------------------------------------ */
3106 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3109 struct in_addr src , mapdst;
3111 u_short sport, dport;
3125 dst = mapdst.s_addr;
3126 sflags = flags & NAT_TCPUDPICMP;
3132 sport = htons(fin->fin_data[0]);
3133 dport = htons(fin->fin_data[1]);
3136 if (flags & IPN_ICMPERR)
3137 sport = fin->fin_data[1];
3139 dport = fin->fin_data[1];
3146 if ((flags & SI_WILDP) != 0)
3147 goto find_in_wild_ports;
3149 hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3150 hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3151 nat = nat_table[1][hv];
3152 for (; nat; nat = nat->nat_hnext[1]) {
3153 if (nat->nat_ifps[0] != NULL) {
3154 if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3156 } else if (ifp != NULL)
3157 nat->nat_ifps[0] = ifp;
3159 nflags = nat->nat_flags;
3161 if (nat->nat_oip.s_addr == src.s_addr &&
3162 nat->nat_outip.s_addr == dst &&
3164 (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3165 || (p == nat->nat_p))) {
3170 if (nat->nat_call[1] != fin->fin_data[0])
3175 if ((flags & IPN_ICMPERR) != 0) {
3176 if (nat->nat_outport != sport)
3179 if (nat->nat_outport != dport)
3185 if (nat->nat_oport != sport)
3187 if (nat->nat_outport != dport)
3195 if ((ipn != NULL) && (nat->nat_aps != NULL))
3196 if (appr_match(fin, nat) != 0)
3203 * So if we didn't find it but there are wildcard members in the hash
3204 * table, go back and look for them. We do this search and update here
3205 * because it is modifying the NAT table and we want to do this only
3206 * for the first packet that matches. The exception, of course, is
3207 * for "dummy" (FI_IGNORE) lookups.
3210 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3212 if (nat_stats.ns_wilds == 0)
3215 RWLOCK_EXIT(&ipf_nat);
3217 hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3218 hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3220 WRITE_ENTER(&ipf_nat);
3222 nat = nat_table[1][hv];
3223 for (; nat; nat = nat->nat_hnext[1]) {
3224 if (nat->nat_ifps[0] != NULL) {
3225 if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3227 } else if (ifp != NULL)
3228 nat->nat_ifps[0] = ifp;
3230 if (nat->nat_p != fin->fin_p)
3232 if (nat->nat_oip.s_addr != src.s_addr ||
3233 nat->nat_outip.s_addr != dst)
3236 nflags = nat->nat_flags;
3237 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3240 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3241 NAT_INBOUND) == 1) {
3242 if ((fin->fin_flx & FI_IGNORE) != 0)
3244 if ((nflags & SI_CLONE) != 0) {
3245 nat = fr_natclone(fin, nat);
3249 MUTEX_ENTER(&ipf_nat_new);
3250 nat_stats.ns_wilds--;
3251 MUTEX_EXIT(&ipf_nat_new);
3253 nat->nat_oport = sport;
3254 nat->nat_outport = dport;
3255 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3261 MUTEX_DOWNGRADE(&ipf_nat);
3267 /* ------------------------------------------------------------------------ */
3268 /* Function: nat_tabmove */
3270 /* Parameters: nat(I) - pointer to NAT structure */
3271 /* Write Lock: ipf_nat */
3273 /* This function is only called for TCP/UDP NAT table entries where the */
3274 /* original was placed in the table without hashing on the ports and we now */
3275 /* want to include hashing on port numbers. */
3276 /* ------------------------------------------------------------------------ */
3277 static void nat_tabmove(nat)
3283 if (nat->nat_flags & SI_CLONE)
3287 * Remove the NAT entry from the old location
3289 if (nat->nat_hnext[0])
3290 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3291 *nat->nat_phnext[0] = nat->nat_hnext[0];
3292 nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3294 if (nat->nat_hnext[1])
3295 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3296 *nat->nat_phnext[1] = nat->nat_hnext[1];
3297 nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3300 * Add into the NAT table in the new position
3302 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3303 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3305 nat->nat_hv[0] = hv;
3306 natp = &nat_table[0][hv];
3308 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3309 nat->nat_phnext[0] = natp;
3310 nat->nat_hnext[0] = *natp;
3312 nat_stats.ns_bucketlen[0][hv]++;
3314 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3315 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3317 nat->nat_hv[1] = hv;
3318 natp = &nat_table[1][hv];
3320 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3321 nat->nat_phnext[1] = natp;
3322 nat->nat_hnext[1] = *natp;
3324 nat_stats.ns_bucketlen[1][hv]++;
3328 /* ------------------------------------------------------------------------ */
3329 /* Function: nat_outlookup */
3330 /* Returns: nat_t* - NULL == no match, */
3331 /* else pointer to matching NAT entry */
3332 /* Parameters: fin(I) - pointer to packet information */
3333 /* flags(I) - NAT flags for this packet */
3334 /* p(I) - protocol for this packet */
3335 /* src(I) - source IP address */
3336 /* dst(I) - destination IP address */
3337 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */
3339 /* Lookup a nat entry based on the source 'real' ip address/port and */
3340 /* destination address/port. We use this lookup when sending a packet out, */
3341 /* we're looking for a table entry, based on the source address. */
3343 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3345 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3346 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3348 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3349 /* the packet is of said protocol */
3350 /* ------------------------------------------------------------------------ */
3351 nat_t *nat_outlookup(fin, flags, p, src, dst)
3354 struct in_addr src , dst;
3356 u_short sport, dport;
3367 sflags = flags & IPN_TCPUDPICMP;
3375 sport = htons(fin->fin_data[0]);
3376 dport = htons(fin->fin_data[1]);
3379 if (flags & IPN_ICMPERR)
3380 sport = fin->fin_data[1];
3382 dport = fin->fin_data[1];
3388 if ((flags & SI_WILDP) != 0)
3389 goto find_out_wild_ports;
3391 hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3392 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3393 nat = nat_table[0][hv];
3394 for (; nat; nat = nat->nat_hnext[0]) {
3395 if (nat->nat_ifps[1] != NULL) {
3396 if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3398 } else if (ifp != NULL)
3399 nat->nat_ifps[1] = ifp;
3401 nflags = nat->nat_flags;
3403 if (nat->nat_inip.s_addr == srcip &&
3404 nat->nat_oip.s_addr == dst.s_addr &&
3405 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3406 || (p == nat->nat_p))) {
3411 if (nat->nat_call[1] != fin->fin_data[0])
3417 if (nat->nat_oport != dport)
3419 if (nat->nat_inport != sport)
3427 if ((ipn != NULL) && (nat->nat_aps != NULL))
3428 if (appr_match(fin, nat) != 0)
3435 * So if we didn't find it but there are wildcard members in the hash
3436 * table, go back and look for them. We do this search and update here
3437 * because it is modifying the NAT table and we want to do this only
3438 * for the first packet that matches. The exception, of course, is
3439 * for "dummy" (FI_IGNORE) lookups.
3441 find_out_wild_ports:
3442 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3444 if (nat_stats.ns_wilds == 0)
3447 RWLOCK_EXIT(&ipf_nat);
3449 hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3450 hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3452 WRITE_ENTER(&ipf_nat);
3454 nat = nat_table[0][hv];
3455 for (; nat; nat = nat->nat_hnext[0]) {
3456 if (nat->nat_ifps[1] != NULL) {
3457 if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3459 } else if (ifp != NULL)
3460 nat->nat_ifps[1] = ifp;
3462 if (nat->nat_p != fin->fin_p)
3464 if ((nat->nat_inip.s_addr != srcip) ||
3465 (nat->nat_oip.s_addr != dst.s_addr))
3468 nflags = nat->nat_flags;
3469 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3472 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3473 NAT_OUTBOUND) == 1) {
3474 if ((fin->fin_flx & FI_IGNORE) != 0)
3476 if ((nflags & SI_CLONE) != 0) {
3477 nat = fr_natclone(fin, nat);
3481 MUTEX_ENTER(&ipf_nat_new);
3482 nat_stats.ns_wilds--;
3483 MUTEX_EXIT(&ipf_nat_new);
3485 nat->nat_inport = sport;
3486 nat->nat_oport = dport;
3487 if (nat->nat_outport == 0)
3488 nat->nat_outport = sport;
3489 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3495 MUTEX_DOWNGRADE(&ipf_nat);
3501 /* ------------------------------------------------------------------------ */
3502 /* Function: nat_lookupredir */
3503 /* Returns: nat_t* - NULL == no match, */
3504 /* else pointer to matching NAT entry */
3505 /* Parameters: np(I) - pointer to description of packet to find NAT table */
3508 /* Lookup the NAT tables to search for a matching redirect */
3509 /* The contents of natlookup_t should imitate those found in a packet that */
3510 /* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3511 /* We can do the lookup in one of two ways, imitating an inbound or */
3512 /* outbound packet. By default we assume outbound, unless IPN_IN is set. */
3513 /* For IN, the fields are set as follows: */
3514 /* nl_real* = source information */
3515 /* nl_out* = destination information (translated) */
3516 /* For an out packet, the fields are set like this: */
3517 /* nl_in* = source information (untranslated) */
3518 /* nl_out* = destination information (translated) */
3519 /* ------------------------------------------------------------------------ */
3520 nat_t *nat_lookupredir(np)
3526 bzero((char *)&fi, sizeof(fi));
3527 if (np->nl_flags & IPN_IN) {
3528 fi.fin_data[0] = ntohs(np->nl_realport);
3529 fi.fin_data[1] = ntohs(np->nl_outport);
3531 fi.fin_data[0] = ntohs(np->nl_inport);
3532 fi.fin_data[1] = ntohs(np->nl_outport);
3534 if (np->nl_flags & IPN_TCP)
3535 fi.fin_p = IPPROTO_TCP;
3536 else if (np->nl_flags & IPN_UDP)
3537 fi.fin_p = IPPROTO_UDP;
3538 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3539 fi.fin_p = IPPROTO_ICMP;
3542 * We can do two sorts of lookups:
3543 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3544 * - default: we have the `in' and `out' address, look for `real'.
3546 if (np->nl_flags & IPN_IN) {
3547 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3548 np->nl_realip, np->nl_outip))) {
3549 np->nl_inip = nat->nat_inip;
3550 np->nl_inport = nat->nat_inport;
3554 * If nl_inip is non null, this is a lookup based on the real
3555 * ip address. Else, we use the fake.
3557 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3558 np->nl_inip, np->nl_outip))) {
3560 if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3562 bzero((char *)&fin, sizeof(fin));
3563 fin.fin_p = nat->nat_p;
3564 fin.fin_data[0] = ntohs(nat->nat_outport);
3565 fin.fin_data[1] = ntohs(nat->nat_oport);
3566 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3568 nat->nat_oip) != NULL) {
3569 np->nl_flags &= ~IPN_FINDFORWARD;
3573 np->nl_realip = nat->nat_outip;
3574 np->nl_realport = nat->nat_outport;
3582 /* ------------------------------------------------------------------------ */
3583 /* Function: nat_match */
3584 /* Returns: int - 0 == no match, 1 == match */
3585 /* Parameters: fin(I) - pointer to packet information */
3586 /* np(I) - pointer to NAT rule */
3588 /* Pull the matching of a packet against a NAT rule out of that complex */
3589 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3590 /* ------------------------------------------------------------------------ */
3591 static int nat_match(fin, np)
3597 if (fin->fin_v != 4)
3600 if (np->in_p && fin->fin_p != np->in_p)
3604 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3606 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3607 ^ ((np->in_flags & IPN_NOTSRC) != 0))
3609 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3610 ^ ((np->in_flags & IPN_NOTDST) != 0))
3613 if (!(np->in_redir & NAT_REDIRECT))
3615 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3616 ^ ((np->in_flags & IPN_NOTSRC) != 0))
3618 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3619 ^ ((np->in_flags & IPN_NOTDST) != 0))
3624 if (!(fin->fin_flx & FI_TCPUDP) ||
3625 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3626 if (ft->ftu_scmp || ft->ftu_dcmp)
3631 return fr_tcpudpchk(fin, ft);
3635 /* ------------------------------------------------------------------------ */
3636 /* Function: nat_update */
3638 /* Parameters: nat(I) - pointer to NAT structure */
3639 /* np(I) - pointer to NAT rule */
3641 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */
3642 /* called with fin_rev updated - i.e. after calling nat_proto(). */
3643 /* ------------------------------------------------------------------------ */
3644 void nat_update(fin, nat, np)
3649 ipftq_t *ifq, *ifq2;
3652 MUTEX_ENTER(&nat->nat_lock);
3653 tqe = &nat->nat_tqe;
3657 * We allow over-riding of NAT timeouts from NAT rules, even for
3658 * TCP, however, if it is TCP and there is no rule timeout set,
3659 * then do not update the timeout here.
3662 ifq2 = np->in_tqehead[fin->fin_rev];
3666 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3673 tcpflags = tcp->th_flags;
3674 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
3675 ((tcpflags & TH_SYN) ? 1 : 0) +
3676 ((tcpflags & TH_FIN) ? 1 : 0);
3678 ack = ntohl(tcp->th_ack);
3679 end = ntohl(tcp->th_seq) + dsize;
3681 if (SEQ_GT(ack, nat->nat_seqnext[1 - fin->fin_rev]))
3682 nat->nat_seqnext[1 - fin->fin_rev] = ack;
3684 if (nat->nat_seqnext[fin->fin_rev] == 0)
3685 nat->nat_seqnext[fin->fin_rev] = end;
3687 (void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3690 if (nat->nat_p == IPPROTO_UDP)
3692 else if (nat->nat_p == IPPROTO_ICMP)
3698 fr_movequeue(tqe, ifq, ifq2);
3700 MUTEX_EXIT(&nat->nat_lock);
3704 /* ------------------------------------------------------------------------ */
3705 /* Function: fr_checknatout */
3706 /* Returns: int - -1 == packet failed NAT checks so block it, */
3707 /* 0 == no packet translation occurred, */
3708 /* 1 == packet was successfully translated. */
3709 /* Parameters: fin(I) - pointer to packet information */
3710 /* passp(I) - pointer to filtering result flags */
3712 /* Check to see if an outcoming packet should be changed. ICMP packets are */
3713 /* first checked to see if they match an existing entry (if an error), */
3714 /* otherwise a search of the current NAT table is made. If neither results */
3715 /* in a match then a search for a matching NAT rule is made. Create a new */
3716 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
3717 /* packet header(s) as required. */
3718 /* ------------------------------------------------------------------------ */
3719 int fr_checknatout(fin, passp)
3723 struct ifnet *ifp, *sifp;
3724 icmphdr_t *icmp = NULL;
3725 tcphdr_t *tcp = NULL;
3726 int rval, natfailed;
3734 if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
3739 sifp = fin->fin_ifp;
3741 ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3742 if ((ifp != NULL) && (ifp != (void *)-1))
3747 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3760 * This is an incoming packet, so the destination is
3761 * the icmp_id and the source port equals 0
3763 if (nat_icmpquerytype4(icmp->icmp_type))
3764 nflags = IPN_ICMPQUERY;
3770 if ((nflags & IPN_TCPUDP))
3774 ipa = fin->fin_saddr;
3776 READ_ENTER(&ipf_nat);
3778 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3779 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3781 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3783 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3784 fin->fin_src, fin->fin_dst))) {
3785 nflags = nat->nat_flags;
3787 u_32_t hv, msk, nmsk;
3790 * If there is no current entry in the nat table for this IP#,
3791 * create one for it (if there is a matching rule).
3793 RWLOCK_EXIT(&ipf_nat);
3796 WRITE_ENTER(&ipf_nat);
3798 iph = ipa & htonl(msk);
3799 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3800 for (np = nat_rules[hv]; np; np = np->in_mnext)
3802 if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3804 if (np->in_v != fin->fin_v)
3806 if (np->in_p && (np->in_p != fin->fin_p))
3808 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3810 if (np->in_flags & IPN_FILTER) {
3811 if (!nat_match(fin, np))
3813 } else if ((ipa & np->in_inmsk) != np->in_inip)
3817 !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3820 if (*np->in_plabel != '\0') {
3821 if (((np->in_flags & IPN_FILTER) == 0) &&
3822 (np->in_dport != tcp->th_dport))
3824 if (appr_ok(fin, tcp, np) == 0)
3828 if ((nat = nat_new(fin, np, NULL, nflags,
3835 if ((np == NULL) && (nmsk != 0)) {
3838 if (nmsk & 0x80000000)
3847 MUTEX_DOWNGRADE(&ipf_nat);
3851 rval = fr_natout(fin, nat, natadd, nflags);
3853 MUTEX_ENTER(&nat->nat_lock);
3855 MUTEX_EXIT(&nat->nat_lock);
3856 nat->nat_touched = fr_ticks;
3861 RWLOCK_EXIT(&ipf_nat);
3866 fin->fin_flx |= FI_BADNAT;
3868 fin->fin_ifp = sifp;
3872 /* ------------------------------------------------------------------------ */
3873 /* Function: fr_natout */
3874 /* Returns: int - -1 == packet failed NAT checks so block it, */
3875 /* 1 == packet was successfully translated. */
3876 /* Parameters: fin(I) - pointer to packet information */
3877 /* nat(I) - pointer to NAT structure */
3878 /* natadd(I) - flag indicating if it is safe to add frag cache */
3879 /* nflags(I) - NAT flags set for this packet */
3881 /* Translate a packet coming "out" on an interface. */
3882 /* ------------------------------------------------------------------------ */
3883 int fr_natout(fin, nat, natadd, nflags)
3900 if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3901 (void) fr_nat_newfrag(fin, 0, nat);
3903 MUTEX_ENTER(&nat->nat_lock);
3904 nat->nat_bytes[1] += fin->fin_plen;
3906 MUTEX_EXIT(&nat->nat_lock);
3909 * Fix up checksums, not by recalculating them, but
3910 * simply computing adjustments.
3911 * This is only done for STREAMS based IP implementations where the
3912 * checksum has already been calculated by IP. In all other cases,
3913 * IPFilter is called before the checksum needs calculating so there
3914 * is no call to modify whatever is in the header now.
3916 if (fin->fin_v == 4) {
3917 if (nflags == IPN_ICMPERR) {
3918 u_32_t s1, s2, sumd;
3920 s1 = LONG_SUM(ntohl(fin->fin_saddr));
3921 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3922 CALC_SUMD(s1, s2, sumd);
3923 fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3925 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3926 defined(linux) || defined(BRIDGE_IPF)
3929 * Strictly speaking, this isn't necessary on BSD
3930 * kernels because they do checksum calculation after
3931 * this code has run BUT if ipfilter is being used
3932 * to do NAT as a bridge, that code doesn't exist.
3934 if (nat->nat_dir == NAT_OUTBOUND)
3935 fix_outcksum(fin, &fin->fin_ip->ip_sum,
3938 fix_incksum(fin, &fin->fin_ip->ip_sum,
3944 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3945 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3948 tcp->th_sport = nat->nat_outport;
3949 fin->fin_data[0] = ntohs(nat->nat_outport);
3952 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3954 icmp->icmp_id = nat->nat_outport;
3957 csump = nat_proto(fin, nat, nflags);
3960 fin->fin_ip->ip_src = nat->nat_outip;
3962 nat_update(fin, nat, np);
3965 * The above comments do not hold for layer 4 (or higher) checksums...
3967 if (csump != NULL) {
3968 if (nat->nat_dir == NAT_OUTBOUND)
3969 fix_outcksum(fin, csump, nat->nat_sumd[1]);
3971 fix_incksum(fin, csump, nat->nat_sumd[1]);
3973 #ifdef IPFILTER_SYNC
3974 ipfsync_update(SMC_NAT, fin, nat->nat_sync);
3976 /* ------------------------------------------------------------- */
3977 /* A few quick notes: */
3978 /* Following are test conditions prior to calling the */
3979 /* appr_check routine. */
3981 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
3982 /* with a redirect rule, we attempt to match the packet's */
3983 /* source port against in_dport, otherwise we'd compare the */
3984 /* packet's destination. */
3985 /* ------------------------------------------------------------- */
3986 if ((np != NULL) && (np->in_apr != NULL)) {
3987 i = appr_check(fin, nat);
3992 ATOMIC_INCL(nat_stats.ns_mapped[1]);
3993 fin->fin_flx |= FI_NATED;
3998 /* ------------------------------------------------------------------------ */
3999 /* Function: fr_checknatin */
4000 /* Returns: int - -1 == packet failed NAT checks so block it, */
4001 /* 0 == no packet translation occurred, */
4002 /* 1 == packet was successfully translated. */
4003 /* Parameters: fin(I) - pointer to packet information */
4004 /* passp(I) - pointer to filtering result flags */
4006 /* Check to see if an incoming packet should be changed. ICMP packets are */
4007 /* first checked to see if they match an existing entry (if an error), */
4008 /* otherwise a search of the current NAT table is made. If neither results */
4009 /* in a match then a search for a matching NAT rule is made. Create a new */
4010 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
4011 /* packet header(s) as required. */
4012 /* ------------------------------------------------------------------------ */
4013 int fr_checknatin(fin, passp)
4017 u_int nflags, natadd;
4018 int rval, natfailed;
4028 if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
4039 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4052 * This is an incoming packet, so the destination is
4053 * the icmp_id and the source port equals 0
4055 if (nat_icmpquerytype4(icmp->icmp_type)) {
4056 nflags = IPN_ICMPQUERY;
4057 dport = icmp->icmp_id;
4063 if ((nflags & IPN_TCPUDP)) {
4065 dport = tcp->th_dport;
4071 READ_ENTER(&ipf_nat);
4073 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4074 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4076 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4078 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4079 fin->fin_src, in))) {
4080 nflags = nat->nat_flags;
4082 u_32_t hv, msk, rmsk;
4084 RWLOCK_EXIT(&ipf_nat);
4087 WRITE_ENTER(&ipf_nat);
4089 * If there is no current entry in the nat table for this IP#,
4090 * create one for it (if there is a matching rule).
4093 iph = in.s_addr & htonl(msk);
4094 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
4095 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
4096 if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4098 if (np->in_v != fin->fin_v)
4100 if (np->in_p && (np->in_p != fin->fin_p))
4102 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4104 if (np->in_flags & IPN_FILTER) {
4105 if (!nat_match(fin, np))
4108 if ((in.s_addr & np->in_outmsk) != np->in_outip)
4111 ((ntohs(np->in_pmax) < ntohs(dport)) ||
4112 (ntohs(dport) < ntohs(np->in_pmin))))
4116 if (*np->in_plabel != '\0') {
4117 if (!appr_ok(fin, tcp, np)) {
4122 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4130 if ((np == NULL) && (rmsk != 0)) {
4133 if (rmsk & 0x80000000)
4142 MUTEX_DOWNGRADE(&ipf_nat);
4145 rval = fr_natin(fin, nat, natadd, nflags);
4147 MUTEX_ENTER(&nat->nat_lock);
4149 MUTEX_EXIT(&nat->nat_lock);
4150 nat->nat_touched = fr_ticks;
4155 RWLOCK_EXIT(&ipf_nat);
4160 fin->fin_flx |= FI_BADNAT;
4166 /* ------------------------------------------------------------------------ */
4167 /* Function: fr_natin */
4168 /* Returns: int - -1 == packet failed NAT checks so block it, */
4169 /* 1 == packet was successfully translated. */
4170 /* Parameters: fin(I) - pointer to packet information */
4171 /* nat(I) - pointer to NAT structure */
4172 /* natadd(I) - flag indicating if it is safe to add frag cache */
4173 /* nflags(I) - NAT flags set for this packet */
4174 /* Locks Held: ipf_nat (READ) */
4176 /* Translate a packet coming "in" on an interface. */
4177 /* ------------------------------------------------------------------------ */
4178 int fr_natin(fin, nat, natadd, nflags)
4193 fin->fin_fr = nat->nat_fr;
4196 if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4197 (void) fr_nat_newfrag(fin, 0, nat);
4199 /* ------------------------------------------------------------- */
4200 /* A few quick notes: */
4201 /* Following are test conditions prior to calling the */
4202 /* appr_check routine. */
4204 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
4205 /* with a map rule, we attempt to match the packet's */
4206 /* source port against in_dport, otherwise we'd compare the */
4207 /* packet's destination. */
4208 /* ------------------------------------------------------------- */
4209 if (np->in_apr != NULL) {
4210 i = appr_check(fin, nat);
4217 #ifdef IPFILTER_SYNC
4218 ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4221 MUTEX_ENTER(&nat->nat_lock);
4222 nat->nat_bytes[0] += fin->fin_plen;
4224 MUTEX_EXIT(&nat->nat_lock);
4226 fin->fin_ip->ip_dst = nat->nat_inip;
4227 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4228 if (nflags & IPN_TCPUDP)
4232 * Fix up checksums, not by recalculating them, but
4233 * simply computing adjustments.
4234 * Why only do this for some platforms on inbound packets ?
4235 * Because for those that it is done, IP processing is yet to happen
4236 * and so the IPv4 header checksum has not yet been evaluated.
4237 * Perhaps it should always be done for the benefit of things like
4238 * fast forwarding (so that it doesn't need to be recomputed) but with
4239 * header checksum offloading, perhaps it is a moot point.
4241 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4242 defined(__osf__) || defined(linux)
4243 if (nat->nat_dir == NAT_OUTBOUND)
4244 fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4246 fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4249 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4250 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4251 tcp->th_dport = nat->nat_inport;
4252 fin->fin_data[1] = ntohs(nat->nat_inport);
4256 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4259 icmp->icmp_id = nat->nat_inport;
4262 csump = nat_proto(fin, nat, nflags);
4265 nat_update(fin, nat, np);
4268 * The above comments do not hold for layer 4 (or higher) checksums...
4270 if (csump != NULL) {
4271 if (nat->nat_dir == NAT_OUTBOUND)
4272 fix_incksum(fin, csump, nat->nat_sumd[0]);
4274 fix_outcksum(fin, csump, nat->nat_sumd[0]);
4276 ATOMIC_INCL(nat_stats.ns_mapped[0]);
4277 fin->fin_flx |= FI_NATED;
4278 if (np != NULL && np->in_tag.ipt_num[0] != 0)
4279 fin->fin_nattag = &np->in_tag;
4284 /* ------------------------------------------------------------------------ */
4285 /* Function: nat_proto */
4286 /* Returns: u_short* - pointer to transport header checksum to update, */
4287 /* NULL if the transport protocol is not recognised */
4288 /* as needing a checksum update. */
4289 /* Parameters: fin(I) - pointer to packet information */
4290 /* nat(I) - pointer to NAT structure */
4291 /* nflags(I) - NAT flags set for this packet */
4293 /* Return the pointer to the checksum field for each protocol so understood.*/
4294 /* If support for making other changes to a protocol header is required, */
4295 /* that is not strictly 'address' translation, such as clamping the MSS in */
4296 /* TCP down to a specific value, then do it from here. */
4297 /* ------------------------------------------------------------------------ */
4298 u_short *nat_proto(fin, nat, nflags)
4309 if (fin->fin_out == 0) {
4310 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4312 fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4320 csump = &tcp->th_sum;
4323 * Do a MSS CLAMPING on a SYN packet,
4324 * only deal IPv4 for now.
4326 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4327 nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4335 csump = &udp->uh_sum;
4341 if ((nflags & IPN_ICMPQUERY) != 0) {
4342 if (icmp->icmp_cksum != 0)
4343 csump = &icmp->icmp_cksum;
4351 /* ------------------------------------------------------------------------ */
4352 /* Function: fr_natunload */
4354 /* Parameters: Nil */
4356 /* Free all memory used by NAT structures allocated at runtime. */
4357 /* ------------------------------------------------------------------------ */
4360 ipftq_t *ifq, *ifqnext;
4362 (void) nat_clearlist();
4363 (void) nat_flushtable();
4366 * Proxy timeout queues are not cleaned here because although they
4367 * exist on the NAT list, appr_unload is called after fr_natunload
4368 * and the proxies actually are responsible for them being created.
4369 * Should the proxy timeouts have their own list? There's no real
4370 * justification as this is the only complication.
4372 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4373 ifqnext = ifq->ifq_next;
4374 if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4375 (fr_deletetimeoutqueue(ifq) == 0))
4376 fr_freetimeoutqueue(ifq);
4379 if (nat_table[0] != NULL) {
4380 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4381 nat_table[0] = NULL;
4383 if (nat_table[1] != NULL) {
4384 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4385 nat_table[1] = NULL;
4387 if (nat_rules != NULL) {
4388 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4391 if (rdr_rules != NULL) {
4392 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4395 if (ipf_hm_maptable != NULL) {
4396 KFREES(ipf_hm_maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4397 ipf_hm_maptable = NULL;
4399 if (nat_stats.ns_bucketlen[0] != NULL) {
4400 KFREES(nat_stats.ns_bucketlen[0],
4401 sizeof(u_long *) * ipf_nattable_sz);
4402 nat_stats.ns_bucketlen[0] = NULL;
4404 if (nat_stats.ns_bucketlen[1] != NULL) {
4405 KFREES(nat_stats.ns_bucketlen[1],
4406 sizeof(u_long *) * ipf_nattable_sz);
4407 nat_stats.ns_bucketlen[1] = NULL;
4410 if (fr_nat_maxbucket_reset == 1)
4411 fr_nat_maxbucket = 0;
4413 if (fr_nat_init == 1) {
4415 fr_sttab_destroy(nat_tqb);
4417 RW_DESTROY(&ipf_natfrag);
4418 RW_DESTROY(&ipf_nat);
4420 MUTEX_DESTROY(&ipf_nat_new);
4421 MUTEX_DESTROY(&ipf_natio);
4423 MUTEX_DESTROY(&nat_udptq.ifq_lock);
4424 MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4425 MUTEX_DESTROY(&nat_iptq.ifq_lock);
4430 /* ------------------------------------------------------------------------ */
4431 /* Function: fr_natexpire */
4433 /* Parameters: Nil */
4435 /* Check all of the timeout queues for entries at the top which need to be */
4437 /* ------------------------------------------------------------------------ */
4440 ipftq_t *ifq, *ifqnext;
4441 ipftqent_t *tqe, *tqn;
4446 WRITE_ENTER(&ipf_nat);
4447 for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4448 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4449 if (tqe->tqe_die > fr_ticks)
4451 tqn = tqe->tqe_next;
4452 nat_delete(tqe->tqe_parent, NL_EXPIRE);
4456 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4457 ifqnext = ifq->ifq_next;
4459 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4460 if (tqe->tqe_die > fr_ticks)
4462 tqn = tqe->tqe_next;
4463 nat_delete(tqe->tqe_parent, NL_EXPIRE);
4467 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4468 ifqnext = ifq->ifq_next;
4470 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4471 (ifq->ifq_ref == 0)) {
4472 fr_freetimeoutqueue(ifq);
4476 if (fr_nat_doflush != 0) {
4481 RWLOCK_EXIT(&ipf_nat);
4486 /* ------------------------------------------------------------------------ */
4487 /* Function: fr_natsync */
4489 /* Parameters: ifp(I) - pointer to network interface */
4491 /* Walk through all of the currently active NAT sessions, looking for those */
4492 /* which need to have their translated address updated. */
4493 /* ------------------------------------------------------------------------ */
4494 void fr_natsync(ifp)
4497 u_32_t sum1, sum2, sumd;
4504 if (fr_running <= 0)
4508 * Change IP addresses for NAT sessions for any protocol except TCP
4509 * since it will break the TCP connection anyway. The only rules
4510 * which will get changed are those which are "map ... -> 0/32",
4511 * where the rule specifies the address is taken from the interface.
4514 WRITE_ENTER(&ipf_nat);
4516 if (fr_running <= 0) {
4517 RWLOCK_EXIT(&ipf_nat);
4521 for (nat = nat_instances; nat; nat = nat->nat_next) {
4522 if ((nat->nat_flags & IPN_TCP) != 0)
4526 (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4528 if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4529 (ifp == nat->nat_ifps[1]))) {
4530 nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4531 if (nat->nat_ifnames[1][0] != '\0') {
4532 nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4535 nat->nat_ifps[1] = nat->nat_ifps[0];
4536 ifp2 = nat->nat_ifps[0];
4541 * Change the map-to address to be the same as the
4544 sum1 = nat->nat_outip.s_addr;
4545 if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4546 nat->nat_outip = in;
4547 sum2 = nat->nat_outip.s_addr;
4552 * Readjust the checksum adjustment to take into
4553 * account the new IP#.
4555 CALC_SUMD(sum1, sum2, sumd);
4556 /* XXX - dont change for TCP when solaris does
4557 * hardware checksumming.
4559 sumd += nat->nat_sumd[0];
4560 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4561 nat->nat_sumd[1] = nat->nat_sumd[0];
4565 for (n = nat_list; (n != NULL); n = n->in_next) {
4566 if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4567 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4568 if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4569 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4571 RWLOCK_EXIT(&ipf_nat);
4576 /* ------------------------------------------------------------------------ */
4577 /* Function: nat_icmpquerytype4 */
4578 /* Returns: int - 1 == success, 0 == failure */
4579 /* Parameters: icmptype(I) - ICMP type number */
4581 /* Tests to see if the ICMP type number passed is a query/response type or */
4583 /* ------------------------------------------------------------------------ */
4584 static int nat_icmpquerytype4(icmptype)
4589 * For the ICMP query NAT code, it is essential that both the query
4590 * and the reply match on the NAT rule. Because the NAT structure
4591 * does not keep track of the icmptype, and a single NAT structure
4592 * is used for all icmp types with the same src, dest and id, we
4593 * simply define the replies as queries as well. The funny thing is,
4594 * altough it seems silly to call a reply a query, this is exactly
4595 * as it is defined in the IPv4 specification
4601 case ICMP_ECHOREPLY:
4603 /* route aedvertisement/solliciation is currently unsupported: */
4604 /* it would require rewriting the ICMP data section */
4606 case ICMP_TSTAMPREPLY:
4608 case ICMP_IREQREPLY:
4610 case ICMP_MASKREPLY:
4618 /* ------------------------------------------------------------------------ */
4619 /* Function: nat_log */
4621 /* Parameters: nat(I) - pointer to NAT structure */
4622 /* type(I) - type of log entry to create */
4624 /* Creates a NAT log entry. */
4625 /* ------------------------------------------------------------------------ */
4626 void nat_log(nat, type)
4640 natl.nl_inip = nat->nat_inip;
4641 natl.nl_outip = nat->nat_outip;
4642 natl.nl_origip = nat->nat_oip;
4643 natl.nl_bytes[0] = nat->nat_bytes[0];
4644 natl.nl_bytes[1] = nat->nat_bytes[1];
4645 natl.nl_pkts[0] = nat->nat_pkts[0];
4646 natl.nl_pkts[1] = nat->nat_pkts[1];
4647 natl.nl_origport = nat->nat_oport;
4648 natl.nl_inport = nat->nat_inport;
4649 natl.nl_outport = nat->nat_outport;
4650 natl.nl_p = nat->nat_p;
4651 natl.nl_type = type;
4654 if (nat->nat_ptr != NULL) {
4655 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4656 if (np == nat->nat_ptr) {
4657 natl.nl_rule = rulen;
4663 sizes[0] = sizeof(natl);
4666 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4671 #if defined(__OpenBSD__)
4672 /* ------------------------------------------------------------------------ */
4673 /* Function: nat_ifdetach */
4675 /* Parameters: ifp(I) - pointer to network interface */
4677 /* Compatibility interface for OpenBSD to trigger the correct updating of */
4678 /* interface references within IPFilter. */
4679 /* ------------------------------------------------------------------------ */
4680 void nat_ifdetach(ifp)
4689 /* ------------------------------------------------------------------------ */
4690 /* Function: fr_ipnatderef */
4692 /* Parameters: isp(I) - pointer to pointer to NAT rule */
4693 /* Write Locks: ipf_nat */
4695 /* ------------------------------------------------------------------------ */
4696 void fr_ipnatderef(inp)
4705 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4707 appr_free(in->in_apr);
4708 MUTEX_DESTROY(&in->in_lock);
4710 nat_stats.ns_rules--;
4711 #if SOLARIS && !defined(_INET_IP_STACK_H)
4712 if (nat_stats.ns_rules == 0)
4713 pfil_delayed_copy = 1;
4719 /* ------------------------------------------------------------------------ */
4720 /* Function: fr_natderef */
4722 /* Parameters: isp(I) - pointer to pointer to NAT table entry */
4724 /* Decrement the reference counter for this NAT table entry and free it if */
4725 /* there are no more things using it. */
4727 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4728 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4729 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */
4730 /* because nat_delete() will do that and send nat_ref to -1. */
4732 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4733 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4734 /* ------------------------------------------------------------------------ */
4735 void fr_natderef(natp)
4743 MUTEX_ENTER(&nat->nat_lock);
4744 if (nat->nat_ref > 1) {
4746 MUTEX_EXIT(&nat->nat_lock);
4749 MUTEX_EXIT(&nat->nat_lock);
4751 WRITE_ENTER(&ipf_nat);
4752 nat_delete(nat, NL_EXPIRE);
4753 RWLOCK_EXIT(&ipf_nat);
4757 /* ------------------------------------------------------------------------ */
4758 /* Function: fr_natclone */
4759 /* Returns: ipstate_t* - NULL == cloning failed, */
4760 /* else pointer to new state structure */
4761 /* Parameters: fin(I) - pointer to packet information */
4762 /* is(I) - pointer to master state structure */
4763 /* Write Lock: ipf_nat */
4765 /* Create a "duplcate" state table entry from the master. */
4766 /* ------------------------------------------------------------------------ */
4767 static nat_t *fr_natclone(fin, nat)
4775 KMALLOC(clone, nat_t *);
4778 bcopy((char *)nat, (char *)clone, sizeof(*clone));
4780 MUTEX_NUKE(&clone->nat_lock);
4782 clone->nat_aps = NULL;
4784 * Initialize all these so that nat_delete() doesn't cause a crash.
4786 clone->nat_tqe.tqe_pnext = NULL;
4787 clone->nat_tqe.tqe_next = NULL;
4788 clone->nat_tqe.tqe_ifq = NULL;
4789 clone->nat_tqe.tqe_parent = clone;
4791 clone->nat_flags &= ~SI_CLONE;
4792 clone->nat_flags |= SI_CLONED;
4795 clone->nat_hm->hm_ref++;
4797 if (nat_insert(clone, fin->fin_rev) == -1) {
4801 np = clone->nat_ptr;
4804 nat_log(clone, (u_int)np->in_redir);
4809 MUTEX_ENTER(&fr->fr_lock);
4811 MUTEX_EXIT(&fr->fr_lock);
4815 * Because the clone is created outside the normal loop of things and
4816 * TCP has special needs in terms of state, initialise the timeout
4817 * state of the new NAT from here.
4819 if (clone->nat_p == IPPROTO_TCP) {
4820 (void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
4823 #ifdef IPFILTER_SYNC
4824 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4827 nat_log(clone, NL_CLONE);
4832 /* ------------------------------------------------------------------------ */
4833 /* Function: nat_wildok */
4834 /* Returns: int - 1 == packet's ports match wildcards */
4835 /* 0 == packet's ports don't match wildcards */
4836 /* Parameters: nat(I) - NAT entry */
4837 /* sport(I) - source port */
4838 /* dport(I) - destination port */
4839 /* flags(I) - wildcard flags */
4840 /* dir(I) - packet direction */
4842 /* Use NAT entry and packet direction to determine which combination of */
4843 /* wildcard flags should be used. */
4844 /* ------------------------------------------------------------------------ */
4845 static int nat_wildok(nat, sport, dport, flags, dir)
4853 * When called by dir is set to
4854 * nat_inlookup NAT_INBOUND (0)
4855 * nat_outlookup NAT_OUTBOUND (1)
4857 * We simply combine the packet's direction in dir with the original
4858 * "intended" direction of that NAT entry in nat->nat_dir to decide
4859 * which combination of wildcard flags to allow.
4862 switch ((dir << 1) | nat->nat_dir)
4864 case 3: /* outbound packet / outbound entry */
4865 if (((nat->nat_inport == sport) ||
4866 (flags & SI_W_SPORT)) &&
4867 ((nat->nat_oport == dport) ||
4868 (flags & SI_W_DPORT)))
4871 case 2: /* outbound packet / inbound entry */
4872 if (((nat->nat_outport == sport) ||
4873 (flags & SI_W_DPORT)) &&
4874 ((nat->nat_oport == dport) ||
4875 (flags & SI_W_SPORT)))
4878 case 1: /* inbound packet / outbound entry */
4879 if (((nat->nat_oport == sport) ||
4880 (flags & SI_W_DPORT)) &&
4881 ((nat->nat_outport == dport) ||
4882 (flags & SI_W_SPORT)))
4885 case 0: /* inbound packet / inbound entry */
4886 if (((nat->nat_oport == sport) ||
4887 (flags & SI_W_SPORT)) &&
4888 ((nat->nat_outport == dport) ||
4889 (flags & SI_W_DPORT)))
4900 /* ------------------------------------------------------------------------ */
4901 /* Function: nat_mssclamp */
4903 /* Parameters: tcp(I) - pointer to TCP header */
4904 /* maxmss(I) - value to clamp the TCP MSS to */
4905 /* fin(I) - pointer to packet information */
4906 /* csump(I) - pointer to TCP checksum */
4908 /* Check for MSS option and clamp it if necessary. If found and changed, */
4909 /* then the TCP header checksum will be updated to reflect the change in */
4911 /* ------------------------------------------------------------------------ */
4912 static void nat_mssclamp(tcp, maxmss, fin, csump)
4918 u_char *cp, *ep, opt;
4922 hlen = TCP_OFF(tcp) << 2;
4923 if (hlen > sizeof(*tcp)) {
4924 cp = (u_char *)tcp + sizeof(*tcp);
4925 ep = (u_char *)tcp + hlen;
4929 if (opt == TCPOPT_EOL)
4931 else if (opt == TCPOPT_NOP) {
4939 if ((cp + advance > ep) || (advance <= 0))
4946 mss = cp[2] * 256 + cp[3];
4948 cp[2] = maxmss / 256;
4949 cp[3] = maxmss & 0xff;
4950 CALC_SUMD(mss, maxmss, sumd);
4951 fix_outcksum(fin, csump, sumd);
4955 /* ignore unknown options */
4965 /* ------------------------------------------------------------------------ */
4966 /* Function: fr_setnatqueue */
4968 /* Parameters: nat(I)- pointer to NAT structure */
4969 /* rev(I) - forward(0) or reverse(1) direction */
4970 /* Locks: ipf_nat (read or write) */
4972 /* Put the NAT entry on its default queue entry, using rev as a helped in */
4973 /* determining which queue it should be placed on. */
4974 /* ------------------------------------------------------------------------ */
4975 void fr_setnatqueue(nat, rev)
4979 ipftq_t *oifq, *nifq;
4981 if (nat->nat_ptr != NULL)
4982 nifq = nat->nat_ptr->in_tqehead[rev];
4996 nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
5004 oifq = nat->nat_tqe.tqe_ifq;
5006 * If it's currently on a timeout queue, move it from one queue to
5007 * another, else put it on the end of the newly determined queue.
5010 fr_movequeue(&nat->nat_tqe, oifq, nifq);
5012 fr_queueappend(&nat->nat_tqe, nifq, nat);
5017 /* ------------------------------------------------------------------------ */
5018 /* Function: nat_getnext */
5019 /* Returns: int - 0 == ok, else error */
5020 /* Parameters: t(I) - pointer to ipftoken structure */
5021 /* itp(I) - pointer to ipfgeniter_t structure */
5023 /* Fetch the next nat/ipnat structure pointer from the linked list and */
5024 /* copy it out to the storage space pointed to by itp_data. The next item */
5025 /* in the list to look at is put back in the ipftoken struture. */
5026 /* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/
5027 /* ipf_freetoken will call a deref function for us and we dont want to call */
5028 /* that twice (second time would be in the second switch statement below. */
5029 /* ------------------------------------------------------------------------ */
5030 static int nat_getnext(t, itp)
5034 hostmap_t *hm, *nexthm = NULL, zerohm;
5035 ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5036 nat_t *nat, *nextnat = NULL, zeronat;
5037 int error = 0, count;
5040 count = itp->igi_nitems;
5044 READ_ENTER(&ipf_nat);
5046 switch (itp->igi_type)
5048 case IPFGENITER_HOSTMAP :
5051 nexthm = ipf_hm_maplist;
5053 nexthm = hm->hm_next;
5057 case IPFGENITER_IPNAT :
5060 nextipnat = nat_list;
5062 nextipnat = ipn->in_next;
5066 case IPFGENITER_NAT :
5069 nextnat = nat_instances;
5071 nextnat = nat->nat_next;
5075 RWLOCK_EXIT(&ipf_nat);
5079 dst = itp->igi_data;
5081 switch (itp->igi_type)
5083 case IPFGENITER_HOSTMAP :
5084 if (nexthm != NULL) {
5086 ATOMIC_INC32(nexthm->hm_ref);
5087 t->ipt_data = nexthm;
5090 bzero(&zerohm, sizeof(zerohm));
5097 case IPFGENITER_IPNAT :
5098 if (nextipnat != NULL) {
5100 MUTEX_ENTER(&nextipnat->in_lock);
5101 nextipnat->in_use++;
5102 MUTEX_EXIT(&nextipnat->in_lock);
5103 t->ipt_data = nextipnat;
5106 bzero(&zeroipn, sizeof(zeroipn));
5107 nextipnat = &zeroipn;
5113 case IPFGENITER_NAT :
5114 if (nextnat != NULL) {
5116 MUTEX_ENTER(&nextnat->nat_lock);
5118 MUTEX_EXIT(&nextnat->nat_lock);
5119 t->ipt_data = nextnat;
5122 bzero(&zeronat, sizeof(zeronat));
5131 RWLOCK_EXIT(&ipf_nat);
5134 * Copying out to user space needs to be done without the lock.
5136 switch (itp->igi_type)
5138 case IPFGENITER_HOSTMAP :
5139 error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5143 dst += sizeof(*nexthm);
5146 case IPFGENITER_IPNAT :
5147 error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5151 dst += sizeof(*nextipnat);
5154 case IPFGENITER_NAT :
5155 error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5159 dst += sizeof(*nextnat);
5163 if ((count == 1) || (error != 0))
5168 READ_ENTER(&ipf_nat);
5171 * We need to have the lock again here to make sure that
5172 * using _next is consistent.
5174 switch (itp->igi_type)
5176 case IPFGENITER_HOSTMAP :
5177 nexthm = nexthm->hm_next;
5179 case IPFGENITER_IPNAT :
5180 nextipnat = nextipnat->in_next;
5182 case IPFGENITER_NAT :
5183 nextnat = nextnat->nat_next;
5189 switch (itp->igi_type)
5191 case IPFGENITER_HOSTMAP :
5193 WRITE_ENTER(&ipf_nat);
5195 RWLOCK_EXIT(&ipf_nat);
5198 case IPFGENITER_IPNAT :
5200 fr_ipnatderef(&ipn);
5203 case IPFGENITER_NAT :
5216 /* ------------------------------------------------------------------------ */
5217 /* Function: nat_iterator */
5218 /* Returns: int - 0 == ok, else error */
5219 /* Parameters: token(I) - pointer to ipftoken structure */
5220 /* itp(I) - pointer to ipfgeniter_t structure */
5222 /* This function acts as a handler for the SIOCGENITER ioctls that use a */
5223 /* generic structure to iterate through a list. There are three different */
5224 /* linked lists of NAT related information to go through: NAT rules, active */
5225 /* NAT mappings and the NAT fragment cache. */
5226 /* ------------------------------------------------------------------------ */
5227 static int nat_iterator(token, itp)
5233 if (itp->igi_data == NULL)
5236 token->ipt_subtype = itp->igi_type;
5238 switch (itp->igi_type)
5240 case IPFGENITER_HOSTMAP :
5241 case IPFGENITER_IPNAT :
5242 case IPFGENITER_NAT :
5243 error = nat_getnext(token, itp);
5246 case IPFGENITER_NATFRAG :
5248 error = fr_nextfrag(token, itp, &ipfr_natlist,
5249 &ipfr_nattail, &ipf_natfrag);
5251 error = fr_nextfrag(token, itp, &ipfr_natlist, &ipfr_nattail);
5263 /* ------------------------------------------------------------------------ */
5264 /* Function: nat_extraflush */
5265 /* Returns: int - 0 == success, -1 == failure */
5266 /* Parameters: which(I) - how to flush the active NAT table */
5267 /* Write Locks: ipf_nat */
5269 /* Flush nat tables. Three actions currently defined: */
5270 /* which == 0 : flush all nat table entries */
5271 /* which == 1 : flush TCP connections which have started to close but are */
5272 /* stuck for some reason. */
5273 /* which == 2 : flush TCP connections which have been idle for a long time, */
5274 /* starting at > 4 days idle and working back in successive half-*/
5275 /* days to at most 12 hours old. If this fails to free enough */
5276 /* slots then work backwards in half hour slots to 30 minutes. */
5277 /* If that too fails, then work backwards in 30 second intervals */
5278 /* for the last 30 minutes to at worst 30 seconds idle. */
5279 /* ------------------------------------------------------------------------ */
5280 static int nat_extraflush(which)
5283 ipftq_t *ifq, *ifqnext;
5297 * Style 0 flush removes everything...
5299 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5300 nat_delete(nat, NL_FLUSH);
5307 * Since we're only interested in things that are closing,
5308 * we can start with the appropriate timeout queue.
5310 for (ifq = nat_tqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL;
5311 ifq = ifq->ifq_next) {
5313 for (tqn = ifq->ifq_head; tqn != NULL; ) {
5314 nat = tqn->tqe_parent;
5315 tqn = tqn->tqe_next;
5316 if (nat->nat_p != IPPROTO_TCP)
5318 nat_delete(nat, NL_EXPIRE);
5324 * Also need to look through the user defined queues.
5326 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
5327 ifqnext = ifq->ifq_next;
5328 for (tqn = ifq->ifq_head; tqn != NULL; ) {
5329 nat = tqn->tqe_parent;
5330 tqn = tqn->tqe_next;
5331 if (nat->nat_p != IPPROTO_TCP)
5334 if ((nat->nat_tcpstate[0] >
5335 IPF_TCPS_ESTABLISHED) &&
5336 (nat->nat_tcpstate[1] >
5337 IPF_TCPS_ESTABLISHED)) {
5338 nat_delete(nat, NL_EXPIRE);
5346 * Args 5-11 correspond to flushing those particular states
5347 * for TCP connections.
5349 case IPF_TCPS_CLOSE_WAIT :
5350 case IPF_TCPS_FIN_WAIT_1 :
5351 case IPF_TCPS_CLOSING :
5352 case IPF_TCPS_LAST_ACK :
5353 case IPF_TCPS_FIN_WAIT_2 :
5354 case IPF_TCPS_TIME_WAIT :
5355 case IPF_TCPS_CLOSED :
5356 tqn = nat_tqb[which].ifq_head;
5357 while (tqn != NULL) {
5358 nat = tqn->tqe_parent;
5359 tqn = tqn->tqe_next;
5360 nat_delete(nat, NL_FLUSH);
5370 * Take a large arbitrary number to mean the number of seconds
5371 * for which which consider to be the maximum value we'll allow
5372 * the expiration to be.
5374 which = IPF_TTLVAL(which);
5375 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5376 if (fr_ticks - nat->nat_touched > which) {
5377 nat_delete(nat, NL_FLUSH);
5380 natp = &nat->nat_next;
5391 * Asked to remove inactive entries because the table is full.
5393 if (fr_ticks - nat_last_force_flush > IPF_TTLVAL(5)) {
5394 nat_last_force_flush = fr_ticks;
5395 removed = ipf_queueflush(nat_flush_entry, nat_tqb, nat_utqe);
5403 /* ------------------------------------------------------------------------ */
5404 /* Function: nat_flush_entry */
5405 /* Returns: 0 - always succeeds */
5406 /* Parameters: entry(I) - pointer to NAT entry */
5407 /* Write Locks: ipf_nat */
5409 /* This function is a stepping stone between ipf_queueflush() and */
5410 /* nat_dlete(). It is used so we can provide a uniform interface via the */
5411 /* ipf_queueflush() function. Since the nat_delete() function returns void */
5412 /* we translate that to mean it always succeeds in deleting something. */
5413 /* ------------------------------------------------------------------------ */
5414 static int nat_flush_entry(entry)
5417 nat_delete(entry, NL_FLUSH);
5422 /* ------------------------------------------------------------------------ */
5423 /* Function: nat_gettable */
5424 /* Returns: int - 0 = success, else error */
5425 /* Parameters: data(I) - pointer to ioctl data */
5427 /* This function handles ioctl requests for tables of nat information. */
5428 /* At present the only table it deals with is the hash bucket statistics. */
5429 /* ------------------------------------------------------------------------ */
5430 static int nat_gettable(data)
5436 error = fr_inobj(data, &table, IPFOBJ_GTABLE);
5440 switch (table.ita_type)
5442 case IPFTABLE_BUCKETS_NATIN :
5443 error = COPYOUT(nat_stats.ns_bucketlen[0], table.ita_table,
5444 ipf_nattable_sz * sizeof(u_long));
5447 case IPFTABLE_BUCKETS_NATOUT :
5448 error = COPYOUT(nat_stats.ns_bucketlen[1], table.ita_table,
5449 ipf_nattable_sz * sizeof(u_long));