4 * Copyright (C) 1995-2003 by Darren Reed.
6 * See the IPFILTER.LICENCE file for details on licencing.
8 #if defined(KERNEL) || defined(_KERNEL)
14 #include <sys/errno.h>
15 #include <sys/types.h>
16 #include <sys/param.h>
19 #if defined(_KERNEL) && defined(__NetBSD_Version__) && \
20 (__NetBSD_Version__ >= 399002000)
21 # include <sys/kauth.h>
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
25 #if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
26 # include "opt_ipfilter_log.h"
28 # include "opt_ipfilter.h"
42 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
43 # include <sys/filio.h>
44 # include <sys/fcntl.h>
46 # include <sys/ioctl.h>
49 # include <sys/fcntl.h>
52 # include <sys/protosw.h>
54 #include <sys/socket.h>
56 # include <sys/systm.h>
57 # if !defined(__SVR4) && !defined(__svr4__)
58 # include <sys/mbuf.h>
61 #if defined(__SVR4) || defined(__svr4__)
62 # include <sys/filio.h>
63 # include <sys/byteorder.h>
65 # include <sys/dditypes.h>
67 # include <sys/stream.h>
68 # include <sys/kmem.h>
70 #if __FreeBSD_version >= 300000
71 # include <sys/queue.h>
74 #if __FreeBSD_version >= 300000
75 # include <net/if_var.h>
76 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
77 # include "opt_ipfilter.h"
83 #include <net/route.h>
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/ip.h>
90 # include <vpn/ipsec.h>
91 extern struct ifnet vpnif;
95 # include <netinet/ip_var.h>
97 #include <netinet/tcp.h>
98 #include <netinet/udp.h>
99 #include <netinet/ip_icmp.h>
100 #include "netinet/ip_compat.h"
101 #include <netinet/tcpip.h>
102 #include "netinet/ip_fil.h"
103 #include "netinet/ip_nat.h"
104 #include "netinet/ip_frag.h"
105 #include "netinet/ip_state.h"
106 #include "netinet/ip_proxy.h"
108 #include "netinet/ip_sync.h"
110 #if (__FreeBSD_version >= 300000)
111 # include <sys/malloc.h>
113 /* END OF INCLUDES */
116 #define SOCKADDR_IN struct sockaddr_in
119 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed";
120 static const char rcsid[] = "@(#)$FreeBSD$";
121 /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */
125 /* ======================================================================== */
126 /* How the NAT is organised and works. */
128 /* Inside (interface y) NAT Outside (interface x) */
129 /* -------------------- -+- ------------------------------------- */
130 /* Packet going | out, processsed by fr_checknatout() for x */
131 /* ------------> | ------------> */
132 /* src=10.1.1.1 | src=192.1.1.1 */
134 /* | in, processed by fr_checknatin() for x */
135 /* <------------ | <------------ */
136 /* dst=10.1.1.1 | dst=192.1.1.1 */
137 /* -------------------- -+- ------------------------------------- */
138 /* fr_checknatout() - changes ip_src and if required, sport */
139 /* - creates a new mapping, if required. */
140 /* fr_checknatin() - changes ip_dst and if required, dport */
142 /* In the NAT table, internal source is recorded as "in" and externally */
144 /* ======================================================================== */
147 nat_t **nat_table[2] = { NULL, NULL },
148 *nat_instances = NULL;
149 ipnat_t *nat_list = NULL;
150 u_int ipf_nattable_max = NAT_TABLE_MAX;
151 u_int ipf_nattable_sz = NAT_TABLE_SZ;
152 u_int ipf_natrules_sz = NAT_SIZE;
153 u_int ipf_rdrrules_sz = RDR_SIZE;
154 u_int ipf_hostmap_sz = HOSTMAP_SIZE;
155 u_int fr_nat_maxbucket = 0,
156 fr_nat_maxbucket_reset = 1;
157 u_32_t nat_masks = 0;
158 u_32_t rdr_masks = 0;
159 u_long nat_last_force_flush = 0;
160 ipnat_t **nat_rules = NULL;
161 ipnat_t **rdr_rules = NULL;
162 hostmap_t **ipf_hm_maptable = NULL;
163 hostmap_t *ipf_hm_maplist = NULL;
164 ipftq_t nat_tqb[IPF_TCP_NSTATES];
168 ipftq_t *nat_utqe = NULL;
169 int fr_nat_doflush = 0;
176 u_long fr_defnatage = DEF_NAT_AGE,
177 fr_defnatipage = 120, /* 60 seconds */
178 fr_defnaticmpage = 6; /* 3 seconds */
182 #if SOLARIS && !defined(_INET_IP_STACK_H)
183 extern int pfil_delayed_copy;
186 static int nat_flush_entry __P((void *));
187 static int nat_flushtable __P((void));
188 static int nat_clearlist __P((void));
189 static void nat_addnat __P((struct ipnat *));
190 static void nat_addrdr __P((struct ipnat *));
191 static void nat_delrdr __P((struct ipnat *));
192 static void nat_delnat __P((struct ipnat *));
193 static int fr_natgetent __P((caddr_t, int));
194 static int fr_natgetsz __P((caddr_t, int));
195 static int fr_natputent __P((caddr_t, int));
196 static int nat_extraflush __P((int));
197 static int nat_gettable __P((char *));
198 static void nat_tabmove __P((nat_t *));
199 static int nat_match __P((fr_info_t *, ipnat_t *));
200 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
201 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
202 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
203 struct in_addr, struct in_addr, u_32_t));
204 static int nat_icmpquerytype4 __P((int));
205 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
206 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
207 static int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
208 tcphdr_t *, nat_t **, int));
209 static int nat_resolverule __P((ipnat_t *));
210 static nat_t *fr_natclone __P((fr_info_t *, nat_t *));
211 static void nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
212 static int nat_wildok __P((nat_t *, int, int, int, int));
213 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *));
214 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *));
217 /* ------------------------------------------------------------------------ */
218 /* Function: fr_natinit */
219 /* Returns: int - 0 == success, -1 == failure */
220 /* Parameters: Nil */
222 /* Initialise all of the NAT locks, tables and other structures. */
223 /* ------------------------------------------------------------------------ */
228 KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
229 if (nat_table[0] != NULL)
230 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
234 KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
235 if (nat_table[1] != NULL)
236 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
240 KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
241 if (nat_rules != NULL)
242 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
246 KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
247 if (rdr_rules != NULL)
248 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
252 KMALLOCS(ipf_hm_maptable, hostmap_t **, \
253 sizeof(hostmap_t *) * ipf_hostmap_sz);
254 if (ipf_hm_maptable != NULL)
255 bzero((char *)ipf_hm_maptable,
256 sizeof(hostmap_t *) * ipf_hostmap_sz);
259 ipf_hm_maplist = NULL;
261 KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
262 ipf_nattable_sz * sizeof(u_long));
263 if (nat_stats.ns_bucketlen[0] == NULL)
265 bzero((char *)nat_stats.ns_bucketlen[0],
266 ipf_nattable_sz * sizeof(u_long));
268 KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
269 ipf_nattable_sz * sizeof(u_long));
270 if (nat_stats.ns_bucketlen[1] == NULL)
273 bzero((char *)nat_stats.ns_bucketlen[1],
274 ipf_nattable_sz * sizeof(u_long));
276 if (fr_nat_maxbucket == 0) {
277 for (i = ipf_nattable_sz; i > 0; i >>= 1)
279 fr_nat_maxbucket *= 2;
282 fr_sttab_init(nat_tqb);
284 * Increase this because we may have "keep state" following this too
285 * and packet storms can occur if this is removed too quickly.
287 nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
288 nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
289 nat_udptq.ifq_ttl = fr_defnatage;
290 nat_udptq.ifq_ref = 1;
291 nat_udptq.ifq_head = NULL;
292 nat_udptq.ifq_tail = &nat_udptq.ifq_head;
293 MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
294 nat_udptq.ifq_next = &nat_icmptq;
295 nat_icmptq.ifq_ttl = fr_defnaticmpage;
296 nat_icmptq.ifq_ref = 1;
297 nat_icmptq.ifq_head = NULL;
298 nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
299 MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
300 nat_icmptq.ifq_next = &nat_iptq;
301 nat_iptq.ifq_ttl = fr_defnatipage;
302 nat_iptq.ifq_ref = 1;
303 nat_iptq.ifq_head = NULL;
304 nat_iptq.ifq_tail = &nat_iptq.ifq_head;
305 MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
306 nat_iptq.ifq_next = NULL;
308 for (i = 0; i < IPF_TCP_NSTATES; i++) {
309 if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
310 nat_tqb[i].ifq_ttl = fr_defnaticmpage;
312 else if (nat_tqb[i].ifq_ttl > fr_defnatage)
313 nat_tqb[i].ifq_ttl = fr_defnatage;
318 * Increase this because we may have "keep state" following
319 * this too and packet storms can occur if this is removed
322 nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
324 RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
325 RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
326 MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
327 MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
335 /* ------------------------------------------------------------------------ */
336 /* Function: nat_addrdr */
338 /* Parameters: n(I) - pointer to NAT rule to add */
340 /* Adds a redirect rule to the hash table of redirect rules and the list of */
341 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */
342 /* use by redirect rules. */
343 /* ------------------------------------------------------------------------ */
344 static void nat_addrdr(n)
352 k = count4bits(n->in_outmsk);
353 if ((k >= 0) && (k != 32))
355 j = (n->in_outip & n->in_outmsk);
356 hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
359 np = &(*np)->in_rnext;
367 /* ------------------------------------------------------------------------ */
368 /* Function: nat_addnat */
370 /* Parameters: n(I) - pointer to NAT rule to add */
372 /* Adds a NAT map rule to the hash table of rules and the list of loaded */
373 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */
374 /* redirect rules. */
375 /* ------------------------------------------------------------------------ */
376 static void nat_addnat(n)
384 k = count4bits(n->in_inmsk);
385 if ((k >= 0) && (k != 32))
387 j = (n->in_inip & n->in_inmsk);
388 hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
391 np = &(*np)->in_mnext;
399 /* ------------------------------------------------------------------------ */
400 /* Function: nat_delrdr */
402 /* Parameters: n(I) - pointer to NAT rule to delete */
404 /* Removes a redirect rule from the hash table of redirect rules. */
405 /* ------------------------------------------------------------------------ */
406 static void nat_delrdr(n)
410 n->in_rnext->in_prnext = n->in_prnext;
411 *n->in_prnext = n->in_rnext;
415 /* ------------------------------------------------------------------------ */
416 /* Function: nat_delnat */
418 /* Parameters: n(I) - pointer to NAT rule to delete */
420 /* Removes a NAT map rule from the hash table of NAT map rules. */
421 /* ------------------------------------------------------------------------ */
422 static void nat_delnat(n)
425 if (n->in_mnext != NULL)
426 n->in_mnext->in_pmnext = n->in_pmnext;
427 *n->in_pmnext = n->in_mnext;
431 /* ------------------------------------------------------------------------ */
432 /* Function: nat_hostmap */
433 /* Returns: struct hostmap* - NULL if no hostmap could be created, */
434 /* else a pointer to the hostmapping to use */
435 /* Parameters: np(I) - pointer to NAT rule */
436 /* real(I) - real IP address */
437 /* map(I) - mapped IP address */
438 /* port(I) - destination port number */
439 /* Write Locks: ipf_nat */
441 /* Check if an ip address has already been allocated for a given mapping */
442 /* that is not doing port based translation. If is not yet allocated, then */
443 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */
444 /* ------------------------------------------------------------------------ */
445 static struct hostmap *nat_hostmap(np, src, dst, map, port)
455 hv = (src.s_addr ^ dst.s_addr);
459 for (hm = ipf_hm_maptable[hv]; hm; hm = hm->hm_next)
460 if ((hm->hm_srcip.s_addr == src.s_addr) &&
461 (hm->hm_dstip.s_addr == dst.s_addr) &&
462 ((np == NULL) || (np == hm->hm_ipnat)) &&
463 ((port == 0) || (port == hm->hm_port))) {
471 KMALLOC(hm, hostmap_t *);
473 hm->hm_next = ipf_hm_maplist;
474 hm->hm_pnext = &ipf_hm_maplist;
475 if (ipf_hm_maplist != NULL)
476 ipf_hm_maplist->hm_pnext = &hm->hm_next;
478 hm->hm_hnext = ipf_hm_maptable[hv];
479 hm->hm_phnext = ipf_hm_maptable + hv;
480 if (ipf_hm_maptable[hv] != NULL)
481 ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
482 ipf_hm_maptable[hv] = hm;
494 /* ------------------------------------------------------------------------ */
495 /* Function: fr_hostmapdel */
497 /* Parameters: hmp(I) - pointer to hostmap structure pointer */
498 /* Write Locks: ipf_nat */
500 /* Decrement the references to this hostmap structure by one. If this */
501 /* reaches zero then remove it and free it. */
502 /* ------------------------------------------------------------------------ */
503 void fr_hostmapdel(hmp)
504 struct hostmap **hmp;
512 if (hm->hm_ref == 0) {
514 hm->hm_hnext->hm_phnext = hm->hm_phnext;
515 *hm->hm_phnext = hm->hm_hnext;
517 hm->hm_next->hm_pnext = hm->hm_pnext;
518 *hm->hm_pnext = hm->hm_next;
524 /* ------------------------------------------------------------------------ */
525 /* Function: fix_outcksum */
527 /* Parameters: fin(I) - pointer to packet information */
528 /* sp(I) - location of 16bit checksum to update */
529 /* n((I) - amount to adjust checksum by */
531 /* Adjusts the 16bit checksum by "n" for packets going out. */
532 /* ------------------------------------------------------------------------ */
533 void fix_outcksum(fin, sp, n)
544 if (n & NAT_HW_CKSUM) {
547 n = (n & 0xffff) + (n >> 16);
551 sum1 = (~ntohs(*sp)) & 0xffff;
553 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
555 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
556 sumshort = ~(u_short)sum1;
557 *(sp) = htons(sumshort);
561 /* ------------------------------------------------------------------------ */
562 /* Function: fix_incksum */
564 /* Parameters: fin(I) - pointer to packet information */
565 /* sp(I) - location of 16bit checksum to update */
566 /* n((I) - amount to adjust checksum by */
568 /* Adjusts the 16bit checksum by "n" for packets going in. */
569 /* ------------------------------------------------------------------------ */
570 void fix_incksum(fin, sp, n)
581 if (n & NAT_HW_CKSUM) {
584 n = (n & 0xffff) + (n >> 16);
588 sum1 = (~ntohs(*sp)) & 0xffff;
589 sum1 += ~(n) & 0xffff;
590 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
592 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
593 sumshort = ~(u_short)sum1;
594 *(sp) = htons(sumshort);
598 /* ------------------------------------------------------------------------ */
599 /* Function: fix_datacksum */
601 /* Parameters: sp(I) - location of 16bit checksum to update */
602 /* n((I) - amount to adjust checksum by */
604 /* Fix_datacksum is used *only* for the adjustments of checksums in the */
605 /* data section of an IP packet. */
607 /* The only situation in which you need to do this is when NAT'ing an */
608 /* ICMP error message. Such a message, contains in its body the IP header */
609 /* of the original IP packet, that causes the error. */
611 /* You can't use fix_incksum or fix_outcksum in that case, because for the */
612 /* kernel the data section of the ICMP error is just data, and no special */
613 /* processing like hardware cksum or ntohs processing have been done by the */
614 /* kernel on the data section. */
615 /* ------------------------------------------------------------------------ */
616 void fix_datacksum(sp, n)
626 sum1 = (~ntohs(*sp)) & 0xffff;
628 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
630 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
631 sumshort = ~(u_short)sum1;
632 *(sp) = htons(sumshort);
636 /* ------------------------------------------------------------------------ */
637 /* Function: fr_nat_ioctl */
638 /* Returns: int - 0 == success, != 0 == failure */
639 /* Parameters: data(I) - pointer to ioctl data */
640 /* cmd(I) - ioctl command integer */
641 /* mode(I) - file mode bits used with open */
643 /* Processes an ioctl call made to operate on the IP Filter NAT device. */
644 /* ------------------------------------------------------------------------ */
645 int fr_nat_ioctl(data, cmd, mode, uid, ctx)
651 ipnat_t *nat, *nt, *n = NULL, **np = NULL;
652 int error = 0, ret, arg, getlock;
656 #if (BSD >= 199306) && defined(_KERNEL)
657 # if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
658 if ((mode & FWRITE) &&
659 kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
660 KAUTH_REQ_NETWORK_FIREWALL_FW,
665 if ((securelevel >= 3) && (mode & FWRITE)) {
671 #if defined(__osf__) && defined(_KERNEL)
674 getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
677 nat = NULL; /* XXX gcc -Wuninitialized */
678 if (cmd == (ioctlcmd_t)SIOCADNAT) {
679 KMALLOC(nt, ipnat_t *);
684 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
685 if (mode & NAT_SYSSPACE) {
686 bcopy(data, (char *)&natd, sizeof(natd));
689 error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
697 * For add/delete, look to see if the NAT entry is already present
699 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
701 if (nat->in_v == 0) /* For backward compat. */
703 nat->in_flags &= IPN_USERFLAGS;
704 if ((nat->in_redir & NAT_MAPBLK) == 0) {
705 if ((nat->in_flags & IPN_SPLIT) == 0)
706 nat->in_inip &= nat->in_inmsk;
707 if ((nat->in_flags & IPN_IPRANGE) == 0)
708 nat->in_outip &= nat->in_outmsk;
710 MUTEX_ENTER(&ipf_natio);
711 for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
712 if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
714 if (nat->in_redir == NAT_REDIRECT &&
715 nat->in_pnext != n->in_pnext)
728 if (!(mode & FWRITE))
731 tmp = ipflog_clear(IPL_LOGNAT);
732 error = BCOPYOUT((char *)&tmp, (char *)data,
741 if (!(mode & FWRITE))
744 error = BCOPYIN((char *)data, (char *)&nat_logging,
745 sizeof(nat_logging));
752 error = BCOPYOUT((char *)&nat_logging, (char *)data,
753 sizeof(nat_logging));
759 arg = iplused[IPL_LOGNAT];
760 error = BCOPYOUT(&arg, data, sizeof(arg));
766 if (!(mode & FWRITE)) {
768 } else if (n != NULL) {
770 } else if (nt == NULL) {
774 MUTEX_EXIT(&ipf_natio);
777 bcopy((char *)nat, (char *)nt, sizeof(*n));
778 error = nat_siocaddnat(nt, np, getlock);
779 MUTEX_EXIT(&ipf_natio);
785 if (!(mode & FWRITE)) {
788 } else if (n == NULL) {
793 MUTEX_EXIT(&ipf_natio);
796 nat_siocdelnat(n, np, getlock);
798 MUTEX_EXIT(&ipf_natio);
803 nat_stats.ns_table[0] = nat_table[0];
804 nat_stats.ns_table[1] = nat_table[1];
805 nat_stats.ns_list = nat_list;
806 nat_stats.ns_maptable = ipf_hm_maptable;
807 nat_stats.ns_maplist = ipf_hm_maplist;
808 nat_stats.ns_nattab_sz = ipf_nattable_sz;
809 nat_stats.ns_nattab_max = ipf_nattable_max;
810 nat_stats.ns_rultab_sz = ipf_natrules_sz;
811 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
812 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
813 nat_stats.ns_instances = nat_instances;
814 nat_stats.ns_apslist = ap_sess_list;
815 nat_stats.ns_ticks = fr_ticks;
816 error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
823 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
828 READ_ENTER(&ipf_nat);
830 ptr = nat_lookupredir(&nl);
832 RWLOCK_EXIT(&ipf_nat);
835 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
843 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */
844 if (!(mode & FWRITE)) {
849 WRITE_ENTER(&ipf_nat);
852 error = BCOPYIN(data, &arg, sizeof(arg));
857 ret = nat_flushtable();
859 ret = nat_clearlist();
861 ret = nat_extraflush(arg);
865 RWLOCK_EXIT(&ipf_nat);
868 error = BCOPYOUT(&ret, data, sizeof(ret));
873 error = appr_ioctl(data, cmd, mode, ctx);
877 if (!(mode & FWRITE)) {
880 error = fr_lock(data, &fr_nat_lock);
885 if ((mode & FWRITE) != 0) {
886 error = fr_natputent(data, getlock);
894 error = fr_natgetsz(data, getlock);
901 error = fr_natgetent(data, getlock);
912 error = fr_inobj(data, &iter, IPFOBJ_GENITER);
914 token = ipf_findtoken(iter.igi_type, uid, ctx);
916 error = nat_iterator(token, &iter);
918 RWLOCK_EXIT(&ipf_tokens);
925 error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
928 error = ipf_deltoken(arg, uid, ctx);
936 error = fr_outobj(data, nat_tqb, IPFOBJ_STATETQTAB);
940 error = nat_gettable(data);
954 /* ------------------------------------------------------------------------ */
955 /* Function: nat_siocaddnat */
956 /* Returns: int - 0 == success, != 0 == failure */
957 /* Parameters: n(I) - pointer to new NAT rule */
958 /* np(I) - pointer to where to insert new NAT rule */
959 /* getlock(I) - flag indicating if lock on ipf_nat is held */
960 /* Mutex Locks: ipf_natio */
962 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
963 /* from information passed to the kernel, then add it to the appropriate */
964 /* NAT rule table(s). */
965 /* ------------------------------------------------------------------------ */
966 static int nat_siocaddnat(n, np, getlock)
972 if (nat_resolverule(n) != 0)
975 if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
979 if (n->in_redir & NAT_MAPBLK)
980 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
981 else if (n->in_flags & IPN_AUTOPORTMAP)
982 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
983 else if (n->in_flags & IPN_IPRANGE)
984 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
985 else if (n->in_flags & IPN_SPLIT)
987 else if (n->in_outmsk != 0)
988 n->in_space = ~ntohl(n->in_outmsk);
993 * Calculate the number of valid IP addresses in the output
994 * mapping range. In all cases, the range is inclusive of
995 * the start and ending IP addresses.
996 * If to a CIDR address, lose 2: broadcast + network address
998 * If to a range, add one.
999 * If to a single IP address, set to 1.
1002 if ((n->in_flags & IPN_IPRANGE) != 0)
1009 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
1010 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
1011 n->in_nip = ntohl(n->in_outip) + 1;
1012 else if ((n->in_flags & IPN_SPLIT) &&
1013 (n->in_redir & NAT_REDIRECT))
1014 n->in_nip = ntohl(n->in_inip);
1016 n->in_nip = ntohl(n->in_outip);
1017 if (n->in_redir & NAT_MAP) {
1018 n->in_pnext = ntohs(n->in_pmin);
1020 * Multiply by the number of ports made available.
1022 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1023 n->in_space *= (ntohs(n->in_pmax) -
1024 ntohs(n->in_pmin) + 1);
1026 * Because two different sources can map to
1027 * different destinations but use the same
1029 * If the result is smaller than in_space, then
1030 * we may have wrapped around 32bits.
1033 if ((i != 0) && (i != 0xffffffff)) {
1034 j = n->in_space * (~ntohl(i) + 1);
1035 if (j >= n->in_space)
1038 n->in_space = 0xffffffff;
1042 * If no protocol is specified, multiple by 256 to allow for
1043 * at least one IP:IP mapping per protocol.
1045 if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1046 j = n->in_space * 256;
1047 if (j >= n->in_space)
1050 n->in_space = 0xffffffff;
1054 /* Otherwise, these fields are preset */
1057 WRITE_ENTER(&ipf_nat);
1062 if (n->in_age[0] != 0)
1063 n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
1065 if (n->in_age[1] != 0)
1066 n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
1068 if (n->in_redir & NAT_REDIRECT) {
1069 n->in_flags &= ~IPN_NOTDST;
1072 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1073 n->in_flags &= ~IPN_NOTSRC;
1076 MUTEX_INIT(&n->in_lock, "ipnat rule lock");
1079 nat_stats.ns_rules++;
1080 #if SOLARIS && !defined(_INET_IP_STACK_H)
1081 pfil_delayed_copy = 0;
1084 RWLOCK_EXIT(&ipf_nat); /* WRITE */
1091 /* ------------------------------------------------------------------------ */
1092 /* Function: nat_resolvrule */
1094 /* Parameters: n(I) - pointer to NAT rule */
1096 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
1097 /* from information passed to the kernel, then add it to the appropriate */
1098 /* NAT rule table(s). */
1099 /* ------------------------------------------------------------------------ */
1100 static int nat_resolverule(n)
1103 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1104 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
1106 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1107 if (n->in_ifnames[1][0] == '\0') {
1108 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1109 n->in_ifps[1] = n->in_ifps[0];
1111 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
1114 if (n->in_plabel[0] != '\0') {
1115 n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1116 if (n->in_apr == NULL)
1123 /* ------------------------------------------------------------------------ */
1124 /* Function: nat_siocdelnat */
1125 /* Returns: int - 0 == success, != 0 == failure */
1126 /* Parameters: n(I) - pointer to new NAT rule */
1127 /* np(I) - pointer to where to insert new NAT rule */
1128 /* getlock(I) - flag indicating if lock on ipf_nat is held */
1129 /* Mutex Locks: ipf_natio */
1131 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
1132 /* from information passed to the kernel, then add it to the appropriate */
1133 /* NAT rule table(s). */
1134 /* ------------------------------------------------------------------------ */
1135 static void nat_siocdelnat(n, np, getlock)
1140 WRITE_ENTER(&ipf_nat);
1142 if (n->in_redir & NAT_REDIRECT)
1144 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1146 if (nat_list == NULL) {
1151 if (n->in_tqehead[0] != NULL) {
1152 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1153 fr_freetimeoutqueue(n->in_tqehead[1]);
1157 if (n->in_tqehead[1] != NULL) {
1158 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1159 fr_freetimeoutqueue(n->in_tqehead[1]);
1165 if (n->in_use == 0) {
1167 appr_free(n->in_apr);
1168 MUTEX_DESTROY(&n->in_lock);
1170 nat_stats.ns_rules--;
1171 #if SOLARIS && !defined(_INET_IP_STACK_H)
1172 if (nat_stats.ns_rules == 0)
1173 pfil_delayed_copy = 1;
1176 n->in_flags |= IPN_DELETE;
1180 RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */
1185 /* ------------------------------------------------------------------------ */
1186 /* Function: fr_natgetsz */
1187 /* Returns: int - 0 == success, != 0 is the error value. */
1188 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1189 /* get the size of. */
1191 /* Handle SIOCSTGSZ. */
1192 /* Return the size of the nat list entry to be copied back to user space. */
1193 /* The size of the entry is stored in the ng_sz field and the enture natget */
1194 /* structure is copied back to the user. */
1195 /* ------------------------------------------------------------------------ */
1196 static int fr_natgetsz(data, getlock)
1204 if (BCOPYIN(data, &ng, sizeof(ng)) != 0)
1208 READ_ENTER(&ipf_nat);
1213 nat = nat_instances;
1216 * Empty list so the size returned is 0. Simple.
1220 RWLOCK_EXIT(&ipf_nat);
1222 if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1228 * Make sure the pointer we're copying from exists in the
1229 * current list of entries. Security precaution to prevent
1230 * copying of random kernel data.
1232 for (n = nat_instances; n; n = n->nat_next)
1237 RWLOCK_EXIT(&ipf_nat);
1244 * Incluse any space required for proxy data structures.
1246 ng.ng_sz = sizeof(nat_save_t);
1249 ng.ng_sz += sizeof(ap_session_t) - 4;
1250 if (aps->aps_data != 0)
1251 ng.ng_sz += aps->aps_psiz;
1254 RWLOCK_EXIT(&ipf_nat);
1257 if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1263 /* ------------------------------------------------------------------------ */
1264 /* Function: fr_natgetent */
1265 /* Returns: int - 0 == success, != 0 is the error value. */
1266 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1267 /* to NAT structure to copy out. */
1269 /* Handle SIOCSTGET. */
1270 /* Copies out NAT entry to user space. Any additional data held for a */
1271 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1272 /* ------------------------------------------------------------------------ */
1273 static int fr_natgetent(data, getlock)
1279 nat_save_t *ipn, ipns;
1282 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1286 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1289 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1294 READ_ENTER(&ipf_nat);
1297 ipn->ipn_dsize = ipns.ipn_dsize;
1298 nat = ipns.ipn_next;
1300 nat = nat_instances;
1302 if (nat_instances == NULL)
1308 * Make sure the pointer we're copying from exists in the
1309 * current list of entries. Security precaution to prevent
1310 * copying of random kernel data.
1312 for (n = nat_instances; n; n = n->nat_next)
1320 ipn->ipn_next = nat->nat_next;
1323 * Copy the NAT structure.
1325 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1328 * If we have a pointer to the NAT rule it belongs to, save that too.
1330 if (nat->nat_ptr != NULL)
1331 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1332 sizeof(ipn->ipn_ipnat));
1335 * If we also know the NAT entry has an associated filter rule,
1338 if (nat->nat_fr != NULL)
1339 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1340 sizeof(ipn->ipn_fr));
1343 * Last but not least, if there is an application proxy session set
1344 * up for this NAT entry, then copy that out too, including any
1345 * private data saved along side it by the proxy.
1348 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1352 if (outsize < sizeof(*aps)) {
1358 bcopy((char *)aps, s, sizeof(*aps));
1360 outsize -= sizeof(*aps);
1361 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1362 bcopy(aps->aps_data, s, aps->aps_psiz);
1368 RWLOCK_EXIT(&ipf_nat);
1371 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1376 RWLOCK_EXIT(&ipf_nat);
1379 KFREES(ipn, ipns.ipn_dsize);
1385 /* ------------------------------------------------------------------------ */
1386 /* Function: fr_natputent */
1387 /* Returns: int - 0 == success, != 0 is the error value. */
1388 /* Parameters: data(I) - pointer to natget structure with NAT */
1389 /* structure information to load into the kernel */
1390 /* getlock(I) - flag indicating whether or not a write lock */
1391 /* on ipf_nat is already held. */
1393 /* Handle SIOCSTPUT. */
1394 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1395 /* firewall rule data structures, if pointers to them indicate so. */
1396 /* ------------------------------------------------------------------------ */
1397 static int fr_natputent(data, getlock)
1401 nat_save_t ipn, *ipnn;
1409 error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1414 * Initialise early because of code at junkput label.
1423 * New entry, copy in the rest of the NAT entry if it's size is more
1424 * than just the nat_t structure.
1426 if (ipn.ipn_dsize > sizeof(ipn)) {
1427 if (ipn.ipn_dsize > 81920) {
1432 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1436 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1444 KMALLOC(nat, nat_t *);
1450 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1452 * Initialize all these so that nat_delete() doesn't cause a crash.
1454 bzero((char *)nat, offsetof(struct nat, nat_tqe));
1455 nat->nat_tqe.tqe_pnext = NULL;
1456 nat->nat_tqe.tqe_next = NULL;
1457 nat->nat_tqe.tqe_ifq = NULL;
1458 nat->nat_tqe.tqe_parent = nat;
1461 * Restore the rule associated with this nat session
1463 in = ipnn->ipn_nat.nat_ptr;
1465 KMALLOC(in, ipnat_t *);
1471 bzero((char *)in, offsetof(struct ipnat, in_next6));
1472 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1474 in->in_flags |= IPN_DELETE;
1476 ATOMIC_INC(nat_stats.ns_rules);
1478 if (nat_resolverule(in) != 0) {
1485 * Check that the NAT entry doesn't already exist in the kernel.
1487 * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry. To do
1488 * this, we check to see if the inbound combination of addresses and
1489 * ports is already known. Similar logic is applied for NAT_INBOUND.
1492 bzero((char *)&fin, sizeof(fin));
1493 fin.fin_p = nat->nat_p;
1494 if (nat->nat_dir == NAT_OUTBOUND) {
1495 fin.fin_ifp = nat->nat_ifps[0];
1496 fin.fin_data[0] = ntohs(nat->nat_oport);
1497 fin.fin_data[1] = ntohs(nat->nat_outport);
1499 READ_ENTER(&ipf_nat);
1501 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1502 nat->nat_oip, nat->nat_inip);
1504 RWLOCK_EXIT(&ipf_nat);
1510 } else if (nat->nat_dir == NAT_INBOUND) {
1511 fin.fin_ifp = nat->nat_ifps[0];
1512 fin.fin_data[0] = ntohs(nat->nat_outport);
1513 fin.fin_data[1] = ntohs(nat->nat_oport);
1515 READ_ENTER(&ipf_nat);
1517 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1518 nat->nat_outip, nat->nat_oip);
1520 RWLOCK_EXIT(&ipf_nat);
1532 * Restore ap_session_t structure. Include the private data allocated
1537 KMALLOC(aps, ap_session_t *);
1543 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1545 aps->aps_apr = in->in_apr;
1547 aps->aps_apr = NULL;
1548 if (aps->aps_psiz != 0) {
1549 if (aps->aps_psiz > 81920) {
1553 KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1554 if (aps->aps_data == NULL) {
1558 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1562 aps->aps_data = NULL;
1567 * If there was a filtering rule associated with this entry then
1568 * build up a new one.
1572 if ((nat->nat_flags & SI_NEWFR) != 0) {
1573 KMALLOC(fr, frentry_t *);
1579 ipnn->ipn_nat.nat_fr = fr;
1581 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1582 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1587 fr->fr_type = FR_T_NONE;
1589 MUTEX_NUKE(&fr->fr_lock);
1590 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1593 READ_ENTER(&ipf_nat);
1595 for (n = nat_instances; n; n = n->nat_next)
1596 if (n->nat_fr == fr)
1600 MUTEX_ENTER(&fr->fr_lock);
1602 MUTEX_EXIT(&fr->fr_lock);
1605 RWLOCK_EXIT(&ipf_nat);
1616 KFREES(ipnn, ipn.ipn_dsize);
1621 WRITE_ENTER(&ipf_nat);
1623 error = nat_insert(nat, nat->nat_rev);
1624 if ((error == 0) && (aps != NULL)) {
1625 aps->aps_next = ap_sess_list;
1629 RWLOCK_EXIT(&ipf_nat);
1639 (void) fr_derefrule(&fr);
1641 if ((ipnn != NULL) && (ipnn != &ipn)) {
1642 KFREES(ipnn, ipn.ipn_dsize);
1646 if (aps->aps_data != NULL) {
1647 KFREES(aps->aps_data, aps->aps_psiz);
1653 appr_free(in->in_apr);
1662 /* ------------------------------------------------------------------------ */
1663 /* Function: nat_delete */
1665 /* Parameters: natd(I) - pointer to NAT structure to delete */
1666 /* logtype(I) - type of LOG record to create before deleting */
1667 /* Write Lock: ipf_nat */
1669 /* Delete a nat entry from the various lists and table. If NAT logging is */
1670 /* enabled then generate a NAT log record for this event. */
1671 /* ------------------------------------------------------------------------ */
1672 void nat_delete(nat, logtype)
1679 if (logtype != 0 && nat_logging != 0)
1680 nat_log(nat, logtype);
1683 * Take it as a general indication that all the pointers are set if
1686 if (nat->nat_pnext != NULL) {
1689 nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1690 nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1692 *nat->nat_pnext = nat->nat_next;
1693 if (nat->nat_next != NULL) {
1694 nat->nat_next->nat_pnext = nat->nat_pnext;
1695 nat->nat_next = NULL;
1697 nat->nat_pnext = NULL;
1699 *nat->nat_phnext[0] = nat->nat_hnext[0];
1700 if (nat->nat_hnext[0] != NULL) {
1701 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1702 nat->nat_hnext[0] = NULL;
1704 nat->nat_phnext[0] = NULL;
1706 *nat->nat_phnext[1] = nat->nat_hnext[1];
1707 if (nat->nat_hnext[1] != NULL) {
1708 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1709 nat->nat_hnext[1] = NULL;
1711 nat->nat_phnext[1] = NULL;
1713 if ((nat->nat_flags & SI_WILDP) != 0)
1714 nat_stats.ns_wilds--;
1717 if (nat->nat_me != NULL) {
1718 *nat->nat_me = NULL;
1722 if (nat->nat_tqe.tqe_ifq != NULL)
1723 fr_deletequeueentry(&nat->nat_tqe);
1725 if (logtype == NL_EXPIRE)
1726 nat_stats.ns_expire++;
1728 MUTEX_ENTER(&nat->nat_lock);
1730 * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1731 * This happens when a nat'd packet is blocked and we want to throw
1732 * away the NAT session.
1734 if (logtype == NL_DESTROY) {
1735 if (nat->nat_ref > 2) {
1737 MUTEX_EXIT(&nat->nat_lock);
1739 nat_stats.ns_orphans++;
1742 } else if (nat->nat_ref > 1) {
1744 MUTEX_EXIT(&nat->nat_lock);
1746 nat_stats.ns_orphans++;
1749 MUTEX_EXIT(&nat->nat_lock);
1752 * At this point, nat_ref is 1, doing "--" would make it 0..
1756 nat_stats.ns_orphans--;
1758 #ifdef IPFILTER_SYNC
1760 ipfsync_del(nat->nat_sync);
1763 if (nat->nat_fr != NULL)
1764 (void) fr_derefrule(&nat->nat_fr);
1766 if (nat->nat_hm != NULL)
1767 fr_hostmapdel(&nat->nat_hm);
1770 * If there is an active reference from the nat entry to its parent
1771 * rule, decrement the rule's reference count and free it too if no
1772 * longer being used.
1776 fr_ipnatderef(&ipn);
1779 MUTEX_DESTROY(&nat->nat_lock);
1781 aps_free(nat->nat_aps);
1782 nat_stats.ns_inuse--;
1785 * If there's a fragment table entry too for this nat entry, then
1786 * dereference that as well. This is after nat_lock is released
1789 fr_forgetnat((void *)nat);
1795 /* ------------------------------------------------------------------------ */
1796 /* Function: nat_flushtable */
1797 /* Returns: int - number of NAT rules deleted */
1798 /* Parameters: Nil */
1800 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */
1801 /* log record should be emitted in nat_delete() if NAT logging is enabled. */
1802 /* ------------------------------------------------------------------------ */
1804 * nat_flushtable - clear the NAT table of all mapping entries.
1806 static int nat_flushtable()
1812 * ALL NAT mappings deleted, so lets just make the deletions
1815 if (nat_table[0] != NULL)
1816 bzero((char *)nat_table[0],
1817 sizeof(nat_table[0]) * ipf_nattable_sz);
1818 if (nat_table[1] != NULL)
1819 bzero((char *)nat_table[1],
1820 sizeof(nat_table[1]) * ipf_nattable_sz);
1822 while ((nat = nat_instances) != NULL) {
1823 nat_delete(nat, NL_FLUSH);
1827 nat_stats.ns_inuse = 0;
1832 /* ------------------------------------------------------------------------ */
1833 /* Function: nat_clearlist */
1834 /* Returns: int - number of NAT/RDR rules deleted */
1835 /* Parameters: Nil */
1837 /* Delete all rules in the current list of rules. There is nothing elegant */
1838 /* about this cleanup: simply free all entries on the list of rules and */
1839 /* clear out the tables used for hashed NAT rule lookups. */
1840 /* ------------------------------------------------------------------------ */
1841 static int nat_clearlist()
1843 ipnat_t *n, **np = &nat_list;
1846 if (nat_rules != NULL)
1847 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1848 if (rdr_rules != NULL)
1849 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1851 while ((n = *np) != NULL) {
1853 if (n->in_use == 0) {
1854 if (n->in_apr != NULL)
1855 appr_free(n->in_apr);
1856 MUTEX_DESTROY(&n->in_lock);
1858 nat_stats.ns_rules--;
1860 n->in_flags |= IPN_DELETE;
1865 #if SOLARIS && !defined(_INET_IP_STACK_H)
1866 pfil_delayed_copy = 1;
1874 /* ------------------------------------------------------------------------ */
1875 /* Function: nat_newmap */
1876 /* Returns: int - -1 == error, 0 == success */
1877 /* Parameters: fin(I) - pointer to packet information */
1878 /* nat(I) - pointer to NAT entry */
1879 /* ni(I) - pointer to structure with misc. information needed */
1880 /* to create new NAT entry. */
1882 /* Given an empty NAT structure, populate it with new information about a */
1883 /* new NAT session, as defined by the matching NAT rule. */
1884 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1885 /* to the new IP address for the translation. */
1886 /* ------------------------------------------------------------------------ */
1887 static INLINE int nat_newmap(fin, nat, ni)
1892 u_short st_port, dport, sport, port, sp, dp;
1893 struct in_addr in, inb;
1902 * If it's an outbound packet which doesn't match any existing
1903 * record, then create a new port
1909 st_port = np->in_pnext;
1910 flags = ni->nai_flags;
1911 sport = ni->nai_sport;
1912 dport = ni->nai_dport;
1915 * Do a loop until we either run out of entries to try or we find
1916 * a NAT mapping that isn't currently being used. This is done
1917 * because the change to the source is not (usually) being fixed.
1921 in.s_addr = htonl(np->in_nip);
1924 * Check to see if there is an existing NAT
1925 * setup for this IP address pair.
1927 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1930 in.s_addr = hm->hm_mapip.s_addr;
1931 } else if ((l == 1) && (hm != NULL)) {
1934 in.s_addr = ntohl(in.s_addr);
1938 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1943 if (np->in_redir == NAT_BIMAP &&
1944 np->in_inmsk == np->in_outmsk) {
1946 * map the address block in a 1:1 fashion
1948 in.s_addr = np->in_outip;
1949 in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1950 in.s_addr = ntohl(in.s_addr);
1952 } else if (np->in_redir & NAT_MAPBLK) {
1953 if ((l >= np->in_ppip) || ((l > 0) &&
1954 !(flags & IPN_TCPUDP)))
1957 * map-block - Calculate destination address.
1959 in.s_addr = ntohl(fin->fin_saddr);
1960 in.s_addr &= ntohl(~np->in_inmsk);
1961 inb.s_addr = in.s_addr;
1962 in.s_addr /= np->in_ippip;
1963 in.s_addr &= ntohl(~np->in_outmsk);
1964 in.s_addr += ntohl(np->in_outip);
1966 * Calculate destination port.
1968 if ((flags & IPN_TCPUDP) &&
1969 (np->in_ppip != 0)) {
1970 port = ntohs(sport) + l;
1971 port %= np->in_ppip;
1972 port += np->in_ppip *
1973 (inb.s_addr % np->in_ippip);
1974 port += MAPBLK_MINPORT;
1978 } else if ((np->in_outip == 0) &&
1979 (np->in_outmsk == 0xffffffff)) {
1981 * 0/32 - use the interface's IP address.
1984 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1987 in.s_addr = ntohl(in.s_addr);
1989 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1991 * 0/0 - use the original source address/port.
1995 in.s_addr = ntohl(fin->fin_saddr);
1997 } else if ((np->in_outmsk != 0xffffffff) &&
1998 (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2003 if ((flags & IPN_TCPUDP) &&
2004 ((np->in_redir & NAT_MAPBLK) == 0) &&
2005 (np->in_flags & IPN_AUTOPORTMAP)) {
2007 * "ports auto" (without map-block)
2009 if ((l > 0) && (l % np->in_ppip == 0)) {
2010 if (l > np->in_space) {
2012 } else if ((l > np->in_ppip) &&
2013 np->in_outmsk != 0xffffffff)
2016 if (np->in_ppip != 0) {
2017 port = ntohs(sport);
2018 port += (l % np->in_ppip);
2019 port %= np->in_ppip;
2020 port += np->in_ppip *
2021 (ntohl(fin->fin_saddr) %
2023 port += MAPBLK_MINPORT;
2027 } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2028 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2030 * Standard port translation. Select next port.
2032 port = htons(np->in_pnext++);
2034 if (np->in_pnext > ntohs(np->in_pmax)) {
2035 np->in_pnext = ntohs(np->in_pmin);
2036 if (np->in_outmsk != 0xffffffff)
2041 if (np->in_flags & IPN_IPRANGE) {
2042 if (np->in_nip > ntohl(np->in_outmsk))
2043 np->in_nip = ntohl(np->in_outip);
2045 if ((np->in_outmsk != 0xffffffff) &&
2046 ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2047 ntohl(np->in_outip))
2048 np->in_nip = ntohl(np->in_outip) + 1;
2051 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2055 * Here we do a lookup of the connection as seen from
2056 * the outside. If an IP# pair already exists, try
2057 * again. So if you have A->B becomes C->B, you can
2058 * also have D->E become C->E but not D->B causing
2059 * another C->B. Also take protocol and ports into
2060 * account when determining whether a pre-existing
2061 * NAT setup will cause an external conflict where
2062 * this is appropriate.
2064 inb.s_addr = htonl(in.s_addr);
2065 sp = fin->fin_data[0];
2066 dp = fin->fin_data[1];
2067 fin->fin_data[0] = fin->fin_data[1];
2068 fin->fin_data[1] = htons(port);
2069 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2070 (u_int)fin->fin_p, fin->fin_dst, inb);
2071 fin->fin_data[0] = sp;
2072 fin->fin_data[1] = dp;
2075 * Has the search wrapped around and come back to the
2078 if ((natl != NULL) &&
2079 (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2080 (np->in_nip != 0) && (st_ip == np->in_nip))
2083 } while (natl != NULL);
2085 if (np->in_space > 0)
2088 /* Setup the NAT table */
2089 nat->nat_inip = fin->fin_src;
2090 nat->nat_outip.s_addr = htonl(in.s_addr);
2091 nat->nat_oip = fin->fin_dst;
2092 if (nat->nat_hm == NULL)
2093 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2097 * The ICMP checksum does not have a pseudo header containing
2100 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2101 ni->nai_sum2 = LONG_SUM(in.s_addr);
2102 if ((flags & IPN_TCPUDP)) {
2103 ni->nai_sum1 += ntohs(sport);
2104 ni->nai_sum2 += ntohs(port);
2107 if (flags & IPN_TCPUDP) {
2108 nat->nat_inport = sport;
2109 nat->nat_outport = port; /* sport */
2110 nat->nat_oport = dport;
2111 ((tcphdr_t *)fin->fin_dp)->th_sport = port;
2112 } else if (flags & IPN_ICMPQUERY) {
2113 ((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2114 nat->nat_inport = port;
2115 nat->nat_outport = port;
2116 } else if (fin->fin_p == IPPROTO_GRE) {
2118 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2119 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2120 nat->nat_oport = 0;/*fin->fin_data[1];*/
2121 nat->nat_inport = 0;/*fin->fin_data[0];*/
2122 nat->nat_outport = 0;/*fin->fin_data[0];*/
2123 nat->nat_call[0] = fin->fin_data[0];
2124 nat->nat_call[1] = fin->fin_data[0];
2128 ni->nai_ip.s_addr = in.s_addr;
2129 ni->nai_port = port;
2130 ni->nai_nport = dport;
2135 /* ------------------------------------------------------------------------ */
2136 /* Function: nat_newrdr */
2137 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */
2138 /* allow rule to be moved if IPN_ROUNDR is set. */
2139 /* Parameters: fin(I) - pointer to packet information */
2140 /* nat(I) - pointer to NAT entry */
2141 /* ni(I) - pointer to structure with misc. information needed */
2142 /* to create new NAT entry. */
2144 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2145 /* to the new IP address for the translation. */
2146 /* ------------------------------------------------------------------------ */
2147 static INLINE int nat_newrdr(fin, nat, ni)
2152 u_short nport, dport, sport;
2153 struct in_addr in, inb;
2165 flags = ni->nai_flags;
2166 sport = ni->nai_sport;
2167 dport = ni->nai_dport;
2170 * If the matching rule has IPN_STICKY set, then we want to have the
2171 * same rule kick in as before. Why would this happen? If you have
2172 * a collection of rdr rules with "round-robin sticky", the current
2173 * packet might match a different one to the previous connection but
2174 * we want the same destination to be used.
2176 if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
2177 ((np->in_flags & IPN_STICKY) != 0)) {
2178 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2181 in.s_addr = ntohl(hm->hm_mapip.s_addr);
2189 * Otherwise, it's an inbound packet. Most likely, we don't
2190 * want to rewrite source ports and source addresses. Instead,
2191 * we want to rewrite to a fixed internal address and fixed
2194 if (np->in_flags & IPN_SPLIT) {
2195 in.s_addr = np->in_nip;
2197 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2198 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst,
2201 in.s_addr = hm->hm_mapip.s_addr;
2206 if (hm == NULL || hm->hm_ref == 1) {
2207 if (np->in_inip == htonl(in.s_addr)) {
2208 np->in_nip = ntohl(np->in_inmsk);
2211 np->in_nip = ntohl(np->in_inip);
2215 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2217 * 0/32 - use the interface's IP address.
2219 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2221 in.s_addr = ntohl(in.s_addr);
2223 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2225 * 0/0 - use the original destination address/port.
2227 in.s_addr = ntohl(fin->fin_daddr);
2229 } else if (np->in_redir == NAT_BIMAP &&
2230 np->in_inmsk == np->in_outmsk) {
2232 * map the address block in a 1:1 fashion
2234 in.s_addr = np->in_inip;
2235 in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2236 in.s_addr = ntohl(in.s_addr);
2238 in.s_addr = ntohl(np->in_inip);
2241 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2245 * Whilst not optimized for the case where
2246 * pmin == pmax, the gain is not significant.
2248 if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2249 (np->in_pmin != np->in_pmax)) {
2250 nport = ntohs(dport) - ntohs(np->in_pmin) +
2251 ntohs(np->in_pnext);
2252 nport = htons(nport);
2254 nport = np->in_pnext;
2258 * When the redirect-to address is set to 0.0.0.0, just
2259 * assume a blank `forwarding' of the packet. We don't
2260 * setup any translation for this either.
2262 if (in.s_addr == 0) {
2265 in.s_addr = ntohl(fin->fin_daddr);
2269 * Check to see if this redirect mapping already exists and if
2270 * it does, return "failure" (allowing it to be created will just
2271 * cause one or both of these "connections" to stop working.)
2273 inb.s_addr = htonl(in.s_addr);
2274 sp = fin->fin_data[0];
2275 dp = fin->fin_data[1];
2276 fin->fin_data[1] = fin->fin_data[0];
2277 fin->fin_data[0] = ntohs(nport);
2278 natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2279 (u_int)fin->fin_p, inb, fin->fin_src);
2280 fin->fin_data[0] = sp;
2281 fin->fin_data[1] = dp;
2285 nat->nat_inip.s_addr = htonl(in.s_addr);
2286 nat->nat_outip = fin->fin_dst;
2287 nat->nat_oip = fin->fin_src;
2288 if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
2289 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, in,
2292 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2293 ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2295 ni->nai_ip.s_addr = in.s_addr;
2296 ni->nai_nport = nport;
2297 ni->nai_port = sport;
2299 if (flags & IPN_TCPUDP) {
2300 nat->nat_inport = nport;
2301 nat->nat_outport = dport;
2302 nat->nat_oport = sport;
2303 ((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2304 } else if (flags & IPN_ICMPQUERY) {
2305 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2306 nat->nat_inport = nport;
2307 nat->nat_outport = nport;
2308 } else if (fin->fin_p == IPPROTO_GRE) {
2310 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2311 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2312 nat->nat_call[0] = fin->fin_data[0];
2313 nat->nat_call[1] = fin->fin_data[1];
2314 nat->nat_oport = 0; /*fin->fin_data[0];*/
2315 nat->nat_inport = 0; /*fin->fin_data[1];*/
2316 nat->nat_outport = 0; /*fin->fin_data[1];*/
2324 /* ------------------------------------------------------------------------ */
2325 /* Function: nat_new */
2326 /* Returns: nat_t* - NULL == failure to create new NAT structure, */
2327 /* else pointer to new NAT structure */
2328 /* Parameters: fin(I) - pointer to packet information */
2329 /* np(I) - pointer to NAT rule */
2330 /* natsave(I) - pointer to where to store NAT struct pointer */
2331 /* flags(I) - flags describing the current packet */
2332 /* direction(I) - direction of packet (in/out) */
2333 /* Write Lock: ipf_nat */
2335 /* Attempts to create a new NAT entry. Does not actually change the packet */
2338 /* This fucntion is in three main parts: (1) deal with creating a new NAT */
2339 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */
2340 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2341 /* and (3) building that structure and putting it into the NAT table(s). */
2343 /* NOTE: natsave should NOT be used top point back to an ipstate_t struct */
2344 /* as it can result in memory being corrupted. */
2345 /* ------------------------------------------------------------------------ */
2346 nat_t *nat_new(fin, np, natsave, flags, direction)
2353 u_short port = 0, sport = 0, dport = 0, nport = 0;
2354 tcphdr_t *tcp = NULL;
2355 hostmap_t *hm = NULL;
2362 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2363 qpktinfo_t *qpi = fin->fin_qpi;
2366 if (nat_stats.ns_inuse >= ipf_nattable_max) {
2367 nat_stats.ns_memfail++;
2373 nflags = np->in_flags & flags;
2374 nflags &= NAT_FROMRULE;
2377 ni.nai_nflags = nflags;
2378 ni.nai_flags = flags;
2382 /* Give me a new nat */
2383 KMALLOC(nat, nat_t *);
2385 nat_stats.ns_memfail++;
2387 * Try to automatically tune the max # of entries in the
2388 * table allowed to be less than what will cause kmem_alloc()
2389 * to fail and try to eliminate panics due to out of memory
2390 * conditions arising.
2392 if (ipf_nattable_max > ipf_nattable_sz) {
2393 ipf_nattable_max = nat_stats.ns_inuse - 100;
2394 printf("ipf_nattable_max reduced to %d\n",
2400 if (flags & IPN_TCPUDP) {
2402 ni.nai_sport = htons(fin->fin_sport);
2403 ni.nai_dport = htons(fin->fin_dport);
2404 } else if (flags & IPN_ICMPQUERY) {
2406 * In the ICMP query NAT code, we translate the ICMP id fields
2407 * to make them unique. This is indepedent of the ICMP type
2408 * (e.g. in the unlikely event that a host sends an echo and
2409 * an tstamp request with the same id, both packets will have
2410 * their ip address/id field changed in the same way).
2412 /* The icmp_id field is used by the sender to identify the
2413 * process making the icmp request. (the receiver justs
2414 * copies it back in its response). So, it closely matches
2415 * the concept of source port. We overlay sport, so we can
2416 * maximally reuse the existing code.
2418 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2419 ni.nai_dport = ni.nai_sport;
2422 bzero((char *)nat, sizeof(*nat));
2423 nat->nat_flags = flags;
2424 nat->nat_redir = np->in_redir;
2426 if ((flags & NAT_SLAVE) == 0) {
2427 MUTEX_ENTER(&ipf_nat_new);
2431 * Search the current table for a match.
2433 if (direction == NAT_OUTBOUND) {
2435 * We can now arrange to call this for the same connection
2436 * because ipf_nat_new doesn't protect the code path into
2439 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2440 fin->fin_src, fin->fin_dst);
2447 move = nat_newmap(fin, nat, &ni);
2455 * NAT_INBOUND is used only for redirects rules
2457 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2458 fin->fin_src, fin->fin_dst);
2465 move = nat_newrdr(fin, nat, &ni);
2473 nport = ni.nai_nport;
2475 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2476 if (np->in_redir == NAT_REDIRECT) {
2479 } else if (np->in_redir == NAT_MAP) {
2485 if (flags & IPN_TCPUDP) {
2486 sport = ni.nai_sport;
2487 dport = ni.nai_dport;
2488 } else if (flags & IPN_ICMPQUERY) {
2489 sport = ni.nai_sport;
2493 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2494 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2495 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2496 if ((flags & IPN_TCP) && dohwcksum &&
2497 (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2498 if (direction == NAT_OUTBOUND)
2499 ni.nai_sum1 = LONG_SUM(in.s_addr);
2501 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2502 ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2504 ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2505 nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2508 nat->nat_sumd[1] = nat->nat_sumd[0];
2510 if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2511 if (direction == NAT_OUTBOUND)
2512 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2514 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2516 ni.nai_sum2 = LONG_SUM(in.s_addr);
2518 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2519 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2521 nat->nat_ipsumd = nat->nat_sumd[0];
2522 if (!(flags & IPN_TCPUDPICMP)) {
2523 nat->nat_sumd[0] = 0;
2524 nat->nat_sumd[1] = 0;
2528 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2532 if (flags & SI_WILDP)
2533 nat_stats.ns_wilds++;
2534 fin->fin_flx |= FI_NEWNAT;
2537 nat_stats.ns_badnat++;
2538 if ((hm = nat->nat_hm) != NULL)
2543 if ((flags & NAT_SLAVE) == 0) {
2544 MUTEX_EXIT(&ipf_nat_new);
2550 /* ------------------------------------------------------------------------ */
2551 /* Function: nat_finalise */
2552 /* Returns: int - 0 == sucess, -1 == failure */
2553 /* Parameters: fin(I) - pointer to packet information */
2554 /* nat(I) - pointer to NAT entry */
2555 /* ni(I) - pointer to structure with misc. information needed */
2556 /* to create new NAT entry. */
2557 /* Write Lock: ipf_nat */
2559 /* This is the tail end of constructing a new NAT entry and is the same */
2560 /* for both IPv4 and IPv6. */
2561 /* ------------------------------------------------------------------------ */
2563 static int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2576 if (np->in_ifps[0] != NULL) {
2577 COPYIFNAME(4, np->in_ifps[0], nat->nat_ifnames[0]);
2579 if (np->in_ifps[1] != NULL) {
2580 COPYIFNAME(4, np->in_ifps[1], nat->nat_ifnames[1]);
2582 #ifdef IPFILTER_SYNC
2583 if ((nat->nat_flags & SI_CLONE) == 0)
2584 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2587 nat->nat_me = natsave;
2588 nat->nat_dir = direction;
2589 nat->nat_ifps[0] = np->in_ifps[0];
2590 nat->nat_ifps[1] = np->in_ifps[1];
2592 nat->nat_p = fin->fin_p;
2593 nat->nat_mssclamp = np->in_mssclamp;
2594 if (nat->nat_p == IPPROTO_TCP)
2595 nat->nat_seqnext[0] = ntohl(tcp->th_seq);
2597 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2598 if (appr_new(fin, nat) == -1)
2601 if (nat_insert(nat, fin->fin_rev) == 0) {
2603 nat_log(nat, (u_int)np->in_redir);
2608 MUTEX_ENTER(&fr->fr_lock);
2610 MUTEX_EXIT(&fr->fr_lock);
2616 * nat_insert failed, so cleanup time...
2622 /* ------------------------------------------------------------------------ */
2623 /* Function: nat_insert */
2624 /* Returns: int - 0 == sucess, -1 == failure */
2625 /* Parameters: nat(I) - pointer to NAT structure */
2626 /* rev(I) - flag indicating forward/reverse direction of packet */
2627 /* Write Lock: ipf_nat */
2629 /* Insert a NAT entry into the hash tables for searching and add it to the */
2630 /* list of active NAT entries. Adjust global counters when complete. */
2631 /* ------------------------------------------------------------------------ */
2632 int nat_insert(nat, rev)
2640 * Try and return an error as early as possible, so calculate the hash
2641 * entry numbers first and then proceed.
2643 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2644 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2646 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2648 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2650 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2653 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2654 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2655 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2656 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2659 if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2660 nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2664 nat->nat_hv[0] = hv1;
2665 nat->nat_hv[1] = hv2;
2667 MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2671 nat->nat_bytes[0] = 0;
2672 nat->nat_pkts[0] = 0;
2673 nat->nat_bytes[1] = 0;
2674 nat->nat_pkts[1] = 0;
2676 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2677 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2679 if (nat->nat_ifnames[1][0] != '\0') {
2680 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2681 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2683 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2685 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2686 nat->nat_ifps[1] = nat->nat_ifps[0];
2689 nat->nat_next = nat_instances;
2690 nat->nat_pnext = &nat_instances;
2692 nat_instances->nat_pnext = &nat->nat_next;
2693 nat_instances = nat;
2695 natp = &nat_table[0][hv1];
2697 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2698 nat->nat_phnext[0] = natp;
2699 nat->nat_hnext[0] = *natp;
2701 nat_stats.ns_bucketlen[0][hv1]++;
2703 natp = &nat_table[1][hv2];
2705 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2706 nat->nat_phnext[1] = natp;
2707 nat->nat_hnext[1] = *natp;
2709 nat_stats.ns_bucketlen[1][hv2]++;
2711 fr_setnatqueue(nat, rev);
2713 nat_stats.ns_added++;
2714 nat_stats.ns_inuse++;
2719 /* ------------------------------------------------------------------------ */
2720 /* Function: nat_icmperrorlookup */
2721 /* Returns: nat_t* - point to matching NAT structure */
2722 /* Parameters: fin(I) - pointer to packet information */
2723 /* dir(I) - direction of packet (in/out) */
2725 /* Check if the ICMP error message is related to an existing TCP, UDP or */
2726 /* ICMP query nat entry. It is assumed that the packet is already of the */
2727 /* the required length. */
2728 /* ------------------------------------------------------------------------ */
2729 nat_t *nat_icmperrorlookup(fin, dir)
2733 int flags = 0, type, minlen;
2734 icmphdr_t *icmp, *orgicmp;
2735 tcphdr_t *tcp = NULL;
2742 type = icmp->icmp_type;
2744 * Does it at least have the return (basic) IP header ?
2745 * Only a basic IP header (no options) should be with an ICMP error
2746 * header. Also, if it's not an error type, then return.
2748 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2754 oip = (ip_t *)((char *)fin->fin_dp + 8);
2755 minlen = IP_HL(oip) << 2;
2756 if ((minlen < sizeof(ip_t)) ||
2757 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2760 * Is the buffer big enough for all of it ? It's the size of the IP
2761 * header claimed in the encapsulated part which is of concern. It
2762 * may be too big to be in this buffer but not so big that it's
2763 * outside the ICMP packet, leading to TCP deref's causing problems.
2764 * This is possible because we don't know how big oip_hl is when we
2765 * do the pullup early in fr_check() and thus can't gaurantee it is
2773 # if defined(MENTAT)
2774 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2777 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2778 (char *)fin->fin_ip + M_LEN(m))
2784 if (fin->fin_daddr != oip->ip_src.s_addr)
2788 if (p == IPPROTO_TCP)
2790 else if (p == IPPROTO_UDP)
2792 else if (p == IPPROTO_ICMP) {
2793 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2795 /* see if this is related to an ICMP query */
2796 if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2797 data[0] = fin->fin_data[0];
2798 data[1] = fin->fin_data[1];
2799 fin->fin_data[0] = 0;
2800 fin->fin_data[1] = orgicmp->icmp_id;
2802 flags = IPN_ICMPERR|IPN_ICMPQUERY;
2804 * NOTE : dir refers to the direction of the original
2805 * ip packet. By definition the icmp error
2806 * message flows in the opposite direction.
2808 if (dir == NAT_INBOUND)
2809 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2812 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2814 fin->fin_data[0] = data[0];
2815 fin->fin_data[1] = data[1];
2820 if (flags & IPN_TCPUDP) {
2821 minlen += 8; /* + 64bits of data to get ports */
2822 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2825 data[0] = fin->fin_data[0];
2826 data[1] = fin->fin_data[1];
2827 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2828 fin->fin_data[0] = ntohs(tcp->th_dport);
2829 fin->fin_data[1] = ntohs(tcp->th_sport);
2831 if (dir == NAT_INBOUND) {
2832 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2835 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2838 fin->fin_data[0] = data[0];
2839 fin->fin_data[1] = data[1];
2842 if (dir == NAT_INBOUND)
2843 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2845 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2849 /* ------------------------------------------------------------------------ */
2850 /* Function: nat_icmperror */
2851 /* Returns: nat_t* - point to matching NAT structure */
2852 /* Parameters: fin(I) - pointer to packet information */
2853 /* nflags(I) - NAT flags for this packet */
2854 /* dir(I) - direction of packet (in/out) */
2856 /* Fix up an ICMP packet which is an error message for an existing NAT */
2857 /* session. This will correct both packet header data and checksums. */
2859 /* This should *ONLY* be used for incoming ICMP error packets to make sure */
2860 /* a NAT'd ICMP packet gets correctly recognised. */
2861 /* ------------------------------------------------------------------------ */
2862 nat_t *nat_icmperror(fin, nflags, dir)
2867 u_32_t sum1, sum2, sumd, sumd2;
2868 struct in_addr a1, a2;
2869 int flags, dlen, odst;
2877 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2880 * nat_icmperrorlookup() will return NULL for `defective' packets.
2882 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2889 *nflags = IPN_ICMPERR;
2891 oip = (ip_t *)&icmp->icmp_ip;
2892 dp = (((char *)oip) + (IP_HL(oip) << 2));
2893 if (oip->ip_p == IPPROTO_TCP) {
2894 tcp = (tcphdr_t *)dp;
2895 csump = (u_short *)&tcp->th_sum;
2897 } else if (oip->ip_p == IPPROTO_UDP) {
2900 udp = (udphdr_t *)dp;
2901 tcp = (tcphdr_t *)dp;
2902 csump = (u_short *)&udp->uh_sum;
2904 } else if (oip->ip_p == IPPROTO_ICMP)
2905 flags = IPN_ICMPQUERY;
2906 dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2909 * Need to adjust ICMP header to include the real IP#'s and
2910 * port #'s. Only apply a checksum change relative to the
2911 * IP address change as it will be modified again in fr_checknatout
2912 * for both address and port. Two checksum changes are
2913 * necessary for the two header address changes. Be careful
2914 * to only modify the checksum once for the port # and twice
2920 * Fix the IP addresses in the offending IP packet. You also need
2921 * to adjust the IP header checksum of that offending IP packet.
2923 * Normally, you would expect that the ICMP checksum of the
2924 * ICMP error message needs to be adjusted as well for the
2925 * IP address change in oip.
2926 * However, this is a NOP, because the ICMP checksum is
2927 * calculated over the complete ICMP packet, which includes the
2928 * changed oip IP addresses and oip->ip_sum. However, these
2929 * two changes cancel each other out (if the delta for
2930 * the IP address is x, then the delta for ip_sum is minus x),
2931 * so no change in the icmp_cksum is necessary.
2935 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2936 * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2937 * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2939 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2940 * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2941 * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2945 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2946 * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2947 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2949 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2950 * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2951 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2954 odst = (oip->ip_dst.s_addr == nat->nat_oip.s_addr) ? 1 : 0;
2956 a1.s_addr = ntohl(nat->nat_inip.s_addr);
2957 a2.s_addr = ntohl(oip->ip_src.s_addr);
2958 oip->ip_src.s_addr = htonl(a1.s_addr);
2960 a1.s_addr = ntohl(nat->nat_outip.s_addr);
2961 a2.s_addr = ntohl(oip->ip_dst.s_addr);
2962 oip->ip_dst.s_addr = htonl(a1.s_addr);
2965 sumd = a2.s_addr - a1.s_addr;
2967 if (a1.s_addr > a2.s_addr)
2971 fix_datacksum(&oip->ip_sum, sumd);
2979 * Fix UDP pseudo header checksum to compensate for the
2980 * IP address change.
2982 if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
2985 * For offending TCP/UDP IP packets, translate the ports as
2986 * well, based on the NAT specification. Of course such
2987 * a change may be reflected in the ICMP checksum as well.
2989 * Since the port fields are part of the TCP/UDP checksum
2990 * of the offending IP packet, you need to adjust that checksum
2991 * as well... except that the change in the port numbers should
2992 * be offset by the checksum change. However, the TCP/UDP
2993 * checksum will also need to change if there has been an
2994 * IP address change.
2997 sum1 = ntohs(nat->nat_inport);
2998 sum2 = ntohs(tcp->th_sport);
3000 tcp->th_sport = htons(sum1);
3002 sum1 = ntohs(nat->nat_outport);
3003 sum2 = ntohs(tcp->th_dport);
3005 tcp->th_dport = htons(sum1);
3008 sumd += sum1 - sum2;
3009 if (sumd != 0 || sumd2 != 0) {
3011 * At this point, sumd is the delta to apply to the
3012 * TCP/UDP header, given the changes in both the IP
3013 * address and the ports and sumd2 is the delta to
3014 * apply to the ICMP header, given the IP address
3015 * change delta that may need to be applied to the
3016 * TCP/UDP checksum instead.
3018 * If we will both the IP and TCP/UDP checksums
3019 * then the ICMP checksum changes by the address
3020 * delta applied to the TCP/UDP checksum. If we
3021 * do not change the TCP/UDP checksum them we
3022 * apply the delta in ports to the ICMP checksum.
3024 if (oip->ip_p == IPPROTO_UDP) {
3025 if ((dlen >= 8) && (*csump != 0)) {
3026 fix_datacksum(csump, sumd);
3028 sumd2 = sum1 - sum2;
3032 } else if (oip->ip_p == IPPROTO_TCP) {
3034 fix_datacksum(csump, sumd);
3036 sumd2 = sum2 - sum1;
3046 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3047 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3048 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3050 if ((odst == 0) && (dir == NAT_OUTBOUND) &&
3051 (fin->fin_rev == 0) && (np != NULL) &&
3052 (np->in_redir & NAT_REDIRECT)) {
3053 fix_outcksum(fin, &icmp->icmp_cksum,
3056 fix_incksum(fin, &icmp->icmp_cksum,
3061 } else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
3065 * XXX - what if this is bogus hl and we go off the end ?
3066 * In this case, nat_icmperrorlookup() will have returned NULL.
3068 orgicmp = (icmphdr_t *)dp;
3071 if (orgicmp->icmp_id != nat->nat_inport) {
3074 * Fix ICMP checksum (of the offening ICMP
3075 * query packet) to compensate the change
3076 * in the ICMP id of the offending ICMP
3079 * Since you modify orgicmp->icmp_id with
3080 * a delta (say x) and you compensate that
3081 * in origicmp->icmp_cksum with a delta
3082 * minus x, you don't have to adjust the
3083 * overall icmp->icmp_cksum
3085 sum1 = ntohs(orgicmp->icmp_id);
3086 sum2 = ntohs(nat->nat_inport);
3087 CALC_SUMD(sum1, sum2, sumd);
3088 orgicmp->icmp_id = nat->nat_inport;
3089 fix_datacksum(&orgicmp->icmp_cksum, sumd);
3091 } /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3098 * NB: these lookups don't lock access to the list, it assumed that it has
3099 * already been done!
3102 /* ------------------------------------------------------------------------ */
3103 /* Function: nat_inlookup */
3104 /* Returns: nat_t* - NULL == no match, */
3105 /* else pointer to matching NAT entry */
3106 /* Parameters: fin(I) - pointer to packet information */
3107 /* flags(I) - NAT flags for this packet */
3108 /* p(I) - protocol for this packet */
3109 /* src(I) - source IP address */
3110 /* mapdst(I) - destination IP address */
3112 /* Lookup a nat entry based on the mapped destination ip address/port and */
3113 /* real source address/port. We use this lookup when receiving a packet, */
3114 /* we're looking for a table entry, based on the destination address. */
3116 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3118 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3119 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3121 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3122 /* the packet is of said protocol */
3123 /* ------------------------------------------------------------------------ */
3124 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3127 struct in_addr src , mapdst;
3129 u_short sport, dport;
3143 dst = mapdst.s_addr;
3144 sflags = flags & NAT_TCPUDPICMP;
3150 sport = htons(fin->fin_data[0]);
3151 dport = htons(fin->fin_data[1]);
3154 if (flags & IPN_ICMPERR)
3155 sport = fin->fin_data[1];
3157 dport = fin->fin_data[1];
3164 if ((flags & SI_WILDP) != 0)
3165 goto find_in_wild_ports;
3167 hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3168 hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3169 nat = nat_table[1][hv];
3170 for (; nat; nat = nat->nat_hnext[1]) {
3171 if (nat->nat_ifps[0] != NULL) {
3172 if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3174 } else if (ifp != NULL)
3175 nat->nat_ifps[0] = ifp;
3177 nflags = nat->nat_flags;
3179 if (nat->nat_oip.s_addr == src.s_addr &&
3180 nat->nat_outip.s_addr == dst &&
3182 (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3183 || (p == nat->nat_p))) {
3188 if (nat->nat_call[1] != fin->fin_data[0])
3193 if ((flags & IPN_ICMPERR) != 0) {
3194 if (nat->nat_outport != sport)
3197 if (nat->nat_outport != dport)
3203 if (nat->nat_oport != sport)
3205 if (nat->nat_outport != dport)
3213 if ((ipn != NULL) && (nat->nat_aps != NULL))
3214 if (appr_match(fin, nat) != 0)
3221 * So if we didn't find it but there are wildcard members in the hash
3222 * table, go back and look for them. We do this search and update here
3223 * because it is modifying the NAT table and we want to do this only
3224 * for the first packet that matches. The exception, of course, is
3225 * for "dummy" (FI_IGNORE) lookups.
3228 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3230 if (nat_stats.ns_wilds == 0)
3233 RWLOCK_EXIT(&ipf_nat);
3235 hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3236 hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3238 WRITE_ENTER(&ipf_nat);
3240 nat = nat_table[1][hv];
3241 for (; nat; nat = nat->nat_hnext[1]) {
3242 if (nat->nat_ifps[0] != NULL) {
3243 if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3245 } else if (ifp != NULL)
3246 nat->nat_ifps[0] = ifp;
3248 if (nat->nat_p != fin->fin_p)
3250 if (nat->nat_oip.s_addr != src.s_addr ||
3251 nat->nat_outip.s_addr != dst)
3254 nflags = nat->nat_flags;
3255 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3258 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3259 NAT_INBOUND) == 1) {
3260 if ((fin->fin_flx & FI_IGNORE) != 0)
3262 if ((nflags & SI_CLONE) != 0) {
3263 nat = fr_natclone(fin, nat);
3267 MUTEX_ENTER(&ipf_nat_new);
3268 nat_stats.ns_wilds--;
3269 MUTEX_EXIT(&ipf_nat_new);
3271 nat->nat_oport = sport;
3272 nat->nat_outport = dport;
3273 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3279 MUTEX_DOWNGRADE(&ipf_nat);
3285 /* ------------------------------------------------------------------------ */
3286 /* Function: nat_tabmove */
3288 /* Parameters: nat(I) - pointer to NAT structure */
3289 /* Write Lock: ipf_nat */
3291 /* This function is only called for TCP/UDP NAT table entries where the */
3292 /* original was placed in the table without hashing on the ports and we now */
3293 /* want to include hashing on port numbers. */
3294 /* ------------------------------------------------------------------------ */
3295 static void nat_tabmove(nat)
3301 if (nat->nat_flags & SI_CLONE)
3305 * Remove the NAT entry from the old location
3307 if (nat->nat_hnext[0])
3308 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3309 *nat->nat_phnext[0] = nat->nat_hnext[0];
3310 nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3312 if (nat->nat_hnext[1])
3313 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3314 *nat->nat_phnext[1] = nat->nat_hnext[1];
3315 nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3318 * Add into the NAT table in the new position
3320 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3321 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3323 nat->nat_hv[0] = hv;
3324 natp = &nat_table[0][hv];
3326 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3327 nat->nat_phnext[0] = natp;
3328 nat->nat_hnext[0] = *natp;
3330 nat_stats.ns_bucketlen[0][hv]++;
3332 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3333 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3335 nat->nat_hv[1] = hv;
3336 natp = &nat_table[1][hv];
3338 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3339 nat->nat_phnext[1] = natp;
3340 nat->nat_hnext[1] = *natp;
3342 nat_stats.ns_bucketlen[1][hv]++;
3346 /* ------------------------------------------------------------------------ */
3347 /* Function: nat_outlookup */
3348 /* Returns: nat_t* - NULL == no match, */
3349 /* else pointer to matching NAT entry */
3350 /* Parameters: fin(I) - pointer to packet information */
3351 /* flags(I) - NAT flags for this packet */
3352 /* p(I) - protocol for this packet */
3353 /* src(I) - source IP address */
3354 /* dst(I) - destination IP address */
3355 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */
3357 /* Lookup a nat entry based on the source 'real' ip address/port and */
3358 /* destination address/port. We use this lookup when sending a packet out, */
3359 /* we're looking for a table entry, based on the source address. */
3361 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3363 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3364 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3366 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3367 /* the packet is of said protocol */
3368 /* ------------------------------------------------------------------------ */
3369 nat_t *nat_outlookup(fin, flags, p, src, dst)
3372 struct in_addr src , dst;
3374 u_short sport, dport;
3385 sflags = flags & IPN_TCPUDPICMP;
3393 sport = htons(fin->fin_data[0]);
3394 dport = htons(fin->fin_data[1]);
3397 if (flags & IPN_ICMPERR)
3398 sport = fin->fin_data[1];
3400 dport = fin->fin_data[1];
3406 if ((flags & SI_WILDP) != 0)
3407 goto find_out_wild_ports;
3409 hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3410 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3411 nat = nat_table[0][hv];
3412 for (; nat; nat = nat->nat_hnext[0]) {
3413 if (nat->nat_ifps[1] != NULL) {
3414 if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3416 } else if (ifp != NULL)
3417 nat->nat_ifps[1] = ifp;
3419 nflags = nat->nat_flags;
3421 if (nat->nat_inip.s_addr == srcip &&
3422 nat->nat_oip.s_addr == dst.s_addr &&
3423 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3424 || (p == nat->nat_p))) {
3429 if (nat->nat_call[1] != fin->fin_data[0])
3435 if (nat->nat_oport != dport)
3437 if (nat->nat_inport != sport)
3445 if ((ipn != NULL) && (nat->nat_aps != NULL))
3446 if (appr_match(fin, nat) != 0)
3453 * So if we didn't find it but there are wildcard members in the hash
3454 * table, go back and look for them. We do this search and update here
3455 * because it is modifying the NAT table and we want to do this only
3456 * for the first packet that matches. The exception, of course, is
3457 * for "dummy" (FI_IGNORE) lookups.
3459 find_out_wild_ports:
3460 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3462 if (nat_stats.ns_wilds == 0)
3465 RWLOCK_EXIT(&ipf_nat);
3467 hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3468 hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3470 WRITE_ENTER(&ipf_nat);
3472 nat = nat_table[0][hv];
3473 for (; nat; nat = nat->nat_hnext[0]) {
3474 if (nat->nat_ifps[1] != NULL) {
3475 if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3477 } else if (ifp != NULL)
3478 nat->nat_ifps[1] = ifp;
3480 if (nat->nat_p != fin->fin_p)
3482 if ((nat->nat_inip.s_addr != srcip) ||
3483 (nat->nat_oip.s_addr != dst.s_addr))
3486 nflags = nat->nat_flags;
3487 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3490 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3491 NAT_OUTBOUND) == 1) {
3492 if ((fin->fin_flx & FI_IGNORE) != 0)
3494 if ((nflags & SI_CLONE) != 0) {
3495 nat = fr_natclone(fin, nat);
3499 MUTEX_ENTER(&ipf_nat_new);
3500 nat_stats.ns_wilds--;
3501 MUTEX_EXIT(&ipf_nat_new);
3503 nat->nat_inport = sport;
3504 nat->nat_oport = dport;
3505 if (nat->nat_outport == 0)
3506 nat->nat_outport = sport;
3507 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3513 MUTEX_DOWNGRADE(&ipf_nat);
3519 /* ------------------------------------------------------------------------ */
3520 /* Function: nat_lookupredir */
3521 /* Returns: nat_t* - NULL == no match, */
3522 /* else pointer to matching NAT entry */
3523 /* Parameters: np(I) - pointer to description of packet to find NAT table */
3526 /* Lookup the NAT tables to search for a matching redirect */
3527 /* The contents of natlookup_t should imitate those found in a packet that */
3528 /* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3529 /* We can do the lookup in one of two ways, imitating an inbound or */
3530 /* outbound packet. By default we assume outbound, unless IPN_IN is set. */
3531 /* For IN, the fields are set as follows: */
3532 /* nl_real* = source information */
3533 /* nl_out* = destination information (translated) */
3534 /* For an out packet, the fields are set like this: */
3535 /* nl_in* = source information (untranslated) */
3536 /* nl_out* = destination information (translated) */
3537 /* ------------------------------------------------------------------------ */
3538 nat_t *nat_lookupredir(np)
3544 bzero((char *)&fi, sizeof(fi));
3545 if (np->nl_flags & IPN_IN) {
3546 fi.fin_data[0] = ntohs(np->nl_realport);
3547 fi.fin_data[1] = ntohs(np->nl_outport);
3549 fi.fin_data[0] = ntohs(np->nl_inport);
3550 fi.fin_data[1] = ntohs(np->nl_outport);
3552 if (np->nl_flags & IPN_TCP)
3553 fi.fin_p = IPPROTO_TCP;
3554 else if (np->nl_flags & IPN_UDP)
3555 fi.fin_p = IPPROTO_UDP;
3556 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3557 fi.fin_p = IPPROTO_ICMP;
3560 * We can do two sorts of lookups:
3561 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3562 * - default: we have the `in' and `out' address, look for `real'.
3564 if (np->nl_flags & IPN_IN) {
3565 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3566 np->nl_realip, np->nl_outip))) {
3567 np->nl_inip = nat->nat_inip;
3568 np->nl_inport = nat->nat_inport;
3572 * If nl_inip is non null, this is a lookup based on the real
3573 * ip address. Else, we use the fake.
3575 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3576 np->nl_inip, np->nl_outip))) {
3578 if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3580 bzero((char *)&fin, sizeof(fin));
3581 fin.fin_p = nat->nat_p;
3582 fin.fin_data[0] = ntohs(nat->nat_outport);
3583 fin.fin_data[1] = ntohs(nat->nat_oport);
3584 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3586 nat->nat_oip) != NULL) {
3587 np->nl_flags &= ~IPN_FINDFORWARD;
3591 np->nl_realip = nat->nat_outip;
3592 np->nl_realport = nat->nat_outport;
3600 /* ------------------------------------------------------------------------ */
3601 /* Function: nat_match */
3602 /* Returns: int - 0 == no match, 1 == match */
3603 /* Parameters: fin(I) - pointer to packet information */
3604 /* np(I) - pointer to NAT rule */
3606 /* Pull the matching of a packet against a NAT rule out of that complex */
3607 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3608 /* ------------------------------------------------------------------------ */
3609 static int nat_match(fin, np)
3615 if (fin->fin_v != 4)
3618 if (np->in_p && fin->fin_p != np->in_p)
3622 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3624 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3625 ^ ((np->in_flags & IPN_NOTSRC) != 0))
3627 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3628 ^ ((np->in_flags & IPN_NOTDST) != 0))
3631 if (!(np->in_redir & NAT_REDIRECT))
3633 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3634 ^ ((np->in_flags & IPN_NOTSRC) != 0))
3636 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3637 ^ ((np->in_flags & IPN_NOTDST) != 0))
3642 if (!(fin->fin_flx & FI_TCPUDP) ||
3643 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3644 if (ft->ftu_scmp || ft->ftu_dcmp)
3649 return fr_tcpudpchk(fin, ft);
3653 /* ------------------------------------------------------------------------ */
3654 /* Function: nat_update */
3656 /* Parameters: nat(I) - pointer to NAT structure */
3657 /* np(I) - pointer to NAT rule */
3659 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */
3660 /* called with fin_rev updated - i.e. after calling nat_proto(). */
3661 /* ------------------------------------------------------------------------ */
3662 void nat_update(fin, nat, np)
3667 ipftq_t *ifq, *ifq2;
3670 MUTEX_ENTER(&nat->nat_lock);
3671 tqe = &nat->nat_tqe;
3675 * We allow over-riding of NAT timeouts from NAT rules, even for
3676 * TCP, however, if it is TCP and there is no rule timeout set,
3677 * then do not update the timeout here.
3680 ifq2 = np->in_tqehead[fin->fin_rev];
3684 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3691 tcpflags = tcp->th_flags;
3692 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
3693 ((tcpflags & TH_SYN) ? 1 : 0) +
3694 ((tcpflags & TH_FIN) ? 1 : 0);
3696 ack = ntohl(tcp->th_ack);
3697 end = ntohl(tcp->th_seq) + dsize;
3699 if (SEQ_GT(ack, nat->nat_seqnext[1 - fin->fin_rev]))
3700 nat->nat_seqnext[1 - fin->fin_rev] = ack;
3702 if (nat->nat_seqnext[fin->fin_rev] == 0)
3703 nat->nat_seqnext[fin->fin_rev] = end;
3705 (void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3708 if (nat->nat_p == IPPROTO_UDP)
3710 else if (nat->nat_p == IPPROTO_ICMP)
3716 fr_movequeue(tqe, ifq, ifq2);
3718 MUTEX_EXIT(&nat->nat_lock);
3722 /* ------------------------------------------------------------------------ */
3723 /* Function: fr_checknatout */
3724 /* Returns: int - -1 == packet failed NAT checks so block it, */
3725 /* 0 == no packet translation occurred, */
3726 /* 1 == packet was successfully translated. */
3727 /* Parameters: fin(I) - pointer to packet information */
3728 /* passp(I) - pointer to filtering result flags */
3730 /* Check to see if an outcoming packet should be changed. ICMP packets are */
3731 /* first checked to see if they match an existing entry (if an error), */
3732 /* otherwise a search of the current NAT table is made. If neither results */
3733 /* in a match then a search for a matching NAT rule is made. Create a new */
3734 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
3735 /* packet header(s) as required. */
3736 /* ------------------------------------------------------------------------ */
3737 int fr_checknatout(fin, passp)
3741 struct ifnet *ifp, *sifp;
3742 icmphdr_t *icmp = NULL;
3743 tcphdr_t *tcp = NULL;
3744 int rval, natfailed;
3752 if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
3757 sifp = fin->fin_ifp;
3759 ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3760 if ((ifp != NULL) && (ifp != (void *)-1))
3765 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3778 * This is an incoming packet, so the destination is
3779 * the icmp_id and the source port equals 0
3781 if (nat_icmpquerytype4(icmp->icmp_type))
3782 nflags = IPN_ICMPQUERY;
3788 if ((nflags & IPN_TCPUDP))
3792 ipa = fin->fin_saddr;
3794 READ_ENTER(&ipf_nat);
3796 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3797 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3799 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3801 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3802 fin->fin_src, fin->fin_dst))) {
3803 nflags = nat->nat_flags;
3805 u_32_t hv, msk, nmsk;
3808 * If there is no current entry in the nat table for this IP#,
3809 * create one for it (if there is a matching rule).
3811 RWLOCK_EXIT(&ipf_nat);
3814 WRITE_ENTER(&ipf_nat);
3816 iph = ipa & htonl(msk);
3817 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3818 for (np = nat_rules[hv]; np; np = np->in_mnext)
3820 if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3822 if (np->in_v != fin->fin_v)
3824 if (np->in_p && (np->in_p != fin->fin_p))
3826 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3828 if (np->in_flags & IPN_FILTER) {
3829 if (!nat_match(fin, np))
3831 } else if ((ipa & np->in_inmsk) != np->in_inip)
3835 !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3838 if (*np->in_plabel != '\0') {
3839 if (((np->in_flags & IPN_FILTER) == 0) &&
3840 (np->in_dport != tcp->th_dport))
3842 if (appr_ok(fin, tcp, np) == 0)
3846 if ((nat = nat_new(fin, np, NULL, nflags,
3853 if ((np == NULL) && (nmsk != 0)) {
3856 if (nmsk & 0x80000000)
3865 MUTEX_DOWNGRADE(&ipf_nat);
3869 rval = fr_natout(fin, nat, natadd, nflags);
3871 MUTEX_ENTER(&nat->nat_lock);
3873 MUTEX_EXIT(&nat->nat_lock);
3874 nat->nat_touched = fr_ticks;
3879 RWLOCK_EXIT(&ipf_nat);
3884 fin->fin_flx |= FI_BADNAT;
3886 fin->fin_ifp = sifp;
3890 /* ------------------------------------------------------------------------ */
3891 /* Function: fr_natout */
3892 /* Returns: int - -1 == packet failed NAT checks so block it, */
3893 /* 1 == packet was successfully translated. */
3894 /* Parameters: fin(I) - pointer to packet information */
3895 /* nat(I) - pointer to NAT structure */
3896 /* natadd(I) - flag indicating if it is safe to add frag cache */
3897 /* nflags(I) - NAT flags set for this packet */
3899 /* Translate a packet coming "out" on an interface. */
3900 /* ------------------------------------------------------------------------ */
3901 int fr_natout(fin, nat, natadd, nflags)
3918 if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3919 (void) fr_nat_newfrag(fin, 0, nat);
3921 MUTEX_ENTER(&nat->nat_lock);
3922 nat->nat_bytes[1] += fin->fin_plen;
3924 MUTEX_EXIT(&nat->nat_lock);
3927 * Fix up checksums, not by recalculating them, but
3928 * simply computing adjustments.
3929 * This is only done for STREAMS based IP implementations where the
3930 * checksum has already been calculated by IP. In all other cases,
3931 * IPFilter is called before the checksum needs calculating so there
3932 * is no call to modify whatever is in the header now.
3934 if (fin->fin_v == 4) {
3935 if (nflags == IPN_ICMPERR) {
3936 u_32_t s1, s2, sumd;
3938 s1 = LONG_SUM(ntohl(fin->fin_saddr));
3939 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3940 CALC_SUMD(s1, s2, sumd);
3941 fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3943 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3944 defined(linux) || defined(BRIDGE_IPF)
3947 * Strictly speaking, this isn't necessary on BSD
3948 * kernels because they do checksum calculation after
3949 * this code has run BUT if ipfilter is being used
3950 * to do NAT as a bridge, that code doesn't exist.
3952 if (nat->nat_dir == NAT_OUTBOUND)
3953 fix_outcksum(fin, &fin->fin_ip->ip_sum,
3956 fix_incksum(fin, &fin->fin_ip->ip_sum,
3962 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3963 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3966 tcp->th_sport = nat->nat_outport;
3967 fin->fin_data[0] = ntohs(nat->nat_outport);
3970 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3972 icmp->icmp_id = nat->nat_outport;
3975 csump = nat_proto(fin, nat, nflags);
3978 fin->fin_ip->ip_src = nat->nat_outip;
3980 nat_update(fin, nat, np);
3983 * The above comments do not hold for layer 4 (or higher) checksums...
3985 if (csump != NULL) {
3986 if (nat->nat_dir == NAT_OUTBOUND)
3987 fix_outcksum(fin, csump, nat->nat_sumd[1]);
3989 fix_incksum(fin, csump, nat->nat_sumd[1]);
3991 #ifdef IPFILTER_SYNC
3992 ipfsync_update(SMC_NAT, fin, nat->nat_sync);
3994 /* ------------------------------------------------------------- */
3995 /* A few quick notes: */
3996 /* Following are test conditions prior to calling the */
3997 /* appr_check routine. */
3999 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
4000 /* with a redirect rule, we attempt to match the packet's */
4001 /* source port against in_dport, otherwise we'd compare the */
4002 /* packet's destination. */
4003 /* ------------------------------------------------------------- */
4004 if ((np != NULL) && (np->in_apr != NULL)) {
4005 i = appr_check(fin, nat);
4010 ATOMIC_INCL(nat_stats.ns_mapped[1]);
4011 fin->fin_flx |= FI_NATED;
4016 /* ------------------------------------------------------------------------ */
4017 /* Function: fr_checknatin */
4018 /* Returns: int - -1 == packet failed NAT checks so block it, */
4019 /* 0 == no packet translation occurred, */
4020 /* 1 == packet was successfully translated. */
4021 /* Parameters: fin(I) - pointer to packet information */
4022 /* passp(I) - pointer to filtering result flags */
4024 /* Check to see if an incoming packet should be changed. ICMP packets are */
4025 /* first checked to see if they match an existing entry (if an error), */
4026 /* otherwise a search of the current NAT table is made. If neither results */
4027 /* in a match then a search for a matching NAT rule is made. Create a new */
4028 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
4029 /* packet header(s) as required. */
4030 /* ------------------------------------------------------------------------ */
4031 int fr_checknatin(fin, passp)
4035 u_int nflags, natadd;
4036 int rval, natfailed;
4046 if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
4057 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4070 * This is an incoming packet, so the destination is
4071 * the icmp_id and the source port equals 0
4073 if (nat_icmpquerytype4(icmp->icmp_type)) {
4074 nflags = IPN_ICMPQUERY;
4075 dport = icmp->icmp_id;
4081 if ((nflags & IPN_TCPUDP)) {
4083 dport = tcp->th_dport;
4089 READ_ENTER(&ipf_nat);
4091 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4092 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4094 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4096 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4097 fin->fin_src, in))) {
4098 nflags = nat->nat_flags;
4100 u_32_t hv, msk, rmsk;
4102 RWLOCK_EXIT(&ipf_nat);
4105 WRITE_ENTER(&ipf_nat);
4107 * If there is no current entry in the nat table for this IP#,
4108 * create one for it (if there is a matching rule).
4111 iph = in.s_addr & htonl(msk);
4112 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
4113 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
4114 if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4116 if (np->in_v != fin->fin_v)
4118 if (np->in_p && (np->in_p != fin->fin_p))
4120 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4122 if (np->in_flags & IPN_FILTER) {
4123 if (!nat_match(fin, np))
4126 if ((in.s_addr & np->in_outmsk) != np->in_outip)
4129 ((ntohs(np->in_pmax) < ntohs(dport)) ||
4130 (ntohs(dport) < ntohs(np->in_pmin))))
4134 if (*np->in_plabel != '\0') {
4135 if (!appr_ok(fin, tcp, np)) {
4140 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4148 if ((np == NULL) && (rmsk != 0)) {
4151 if (rmsk & 0x80000000)
4160 MUTEX_DOWNGRADE(&ipf_nat);
4163 rval = fr_natin(fin, nat, natadd, nflags);
4165 MUTEX_ENTER(&nat->nat_lock);
4167 MUTEX_EXIT(&nat->nat_lock);
4168 nat->nat_touched = fr_ticks;
4173 RWLOCK_EXIT(&ipf_nat);
4178 fin->fin_flx |= FI_BADNAT;
4184 /* ------------------------------------------------------------------------ */
4185 /* Function: fr_natin */
4186 /* Returns: int - -1 == packet failed NAT checks so block it, */
4187 /* 1 == packet was successfully translated. */
4188 /* Parameters: fin(I) - pointer to packet information */
4189 /* nat(I) - pointer to NAT structure */
4190 /* natadd(I) - flag indicating if it is safe to add frag cache */
4191 /* nflags(I) - NAT flags set for this packet */
4192 /* Locks Held: ipf_nat (READ) */
4194 /* Translate a packet coming "in" on an interface. */
4195 /* ------------------------------------------------------------------------ */
4196 int fr_natin(fin, nat, natadd, nflags)
4211 fin->fin_fr = nat->nat_fr;
4214 if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4215 (void) fr_nat_newfrag(fin, 0, nat);
4217 /* ------------------------------------------------------------- */
4218 /* A few quick notes: */
4219 /* Following are test conditions prior to calling the */
4220 /* appr_check routine. */
4222 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
4223 /* with a map rule, we attempt to match the packet's */
4224 /* source port against in_dport, otherwise we'd compare the */
4225 /* packet's destination. */
4226 /* ------------------------------------------------------------- */
4227 if (np->in_apr != NULL) {
4228 i = appr_check(fin, nat);
4235 #ifdef IPFILTER_SYNC
4236 ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4239 MUTEX_ENTER(&nat->nat_lock);
4240 nat->nat_bytes[0] += fin->fin_plen;
4242 MUTEX_EXIT(&nat->nat_lock);
4244 fin->fin_ip->ip_dst = nat->nat_inip;
4245 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4246 if (nflags & IPN_TCPUDP)
4250 * Fix up checksums, not by recalculating them, but
4251 * simply computing adjustments.
4252 * Why only do this for some platforms on inbound packets ?
4253 * Because for those that it is done, IP processing is yet to happen
4254 * and so the IPv4 header checksum has not yet been evaluated.
4255 * Perhaps it should always be done for the benefit of things like
4256 * fast forwarding (so that it doesn't need to be recomputed) but with
4257 * header checksum offloading, perhaps it is a moot point.
4259 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4260 defined(__osf__) || defined(linux)
4261 if (nat->nat_dir == NAT_OUTBOUND)
4262 fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4264 fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4267 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4268 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4269 tcp->th_dport = nat->nat_inport;
4270 fin->fin_data[1] = ntohs(nat->nat_inport);
4274 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4277 icmp->icmp_id = nat->nat_inport;
4280 csump = nat_proto(fin, nat, nflags);
4283 nat_update(fin, nat, np);
4286 * The above comments do not hold for layer 4 (or higher) checksums...
4288 if (csump != NULL) {
4289 if (nat->nat_dir == NAT_OUTBOUND)
4290 fix_incksum(fin, csump, nat->nat_sumd[0]);
4292 fix_outcksum(fin, csump, nat->nat_sumd[0]);
4294 ATOMIC_INCL(nat_stats.ns_mapped[0]);
4295 fin->fin_flx |= FI_NATED;
4296 if (np != NULL && np->in_tag.ipt_num[0] != 0)
4297 fin->fin_nattag = &np->in_tag;
4302 /* ------------------------------------------------------------------------ */
4303 /* Function: nat_proto */
4304 /* Returns: u_short* - pointer to transport header checksum to update, */
4305 /* NULL if the transport protocol is not recognised */
4306 /* as needing a checksum update. */
4307 /* Parameters: fin(I) - pointer to packet information */
4308 /* nat(I) - pointer to NAT structure */
4309 /* nflags(I) - NAT flags set for this packet */
4311 /* Return the pointer to the checksum field for each protocol so understood.*/
4312 /* If support for making other changes to a protocol header is required, */
4313 /* that is not strictly 'address' translation, such as clamping the MSS in */
4314 /* TCP down to a specific value, then do it from here. */
4315 /* ------------------------------------------------------------------------ */
4316 u_short *nat_proto(fin, nat, nflags)
4327 if (fin->fin_out == 0) {
4328 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4330 fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4338 csump = &tcp->th_sum;
4341 * Do a MSS CLAMPING on a SYN packet,
4342 * only deal IPv4 for now.
4344 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4345 nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4353 csump = &udp->uh_sum;
4359 if ((nflags & IPN_ICMPQUERY) != 0) {
4360 if (icmp->icmp_cksum != 0)
4361 csump = &icmp->icmp_cksum;
4369 /* ------------------------------------------------------------------------ */
4370 /* Function: fr_natunload */
4372 /* Parameters: Nil */
4374 /* Free all memory used by NAT structures allocated at runtime. */
4375 /* ------------------------------------------------------------------------ */
4378 ipftq_t *ifq, *ifqnext;
4380 (void) nat_clearlist();
4381 (void) nat_flushtable();
4384 * Proxy timeout queues are not cleaned here because although they
4385 * exist on the NAT list, appr_unload is called after fr_natunload
4386 * and the proxies actually are responsible for them being created.
4387 * Should the proxy timeouts have their own list? There's no real
4388 * justification as this is the only complication.
4390 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4391 ifqnext = ifq->ifq_next;
4392 if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4393 (fr_deletetimeoutqueue(ifq) == 0))
4394 fr_freetimeoutqueue(ifq);
4397 if (nat_table[0] != NULL) {
4398 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4399 nat_table[0] = NULL;
4401 if (nat_table[1] != NULL) {
4402 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4403 nat_table[1] = NULL;
4405 if (nat_rules != NULL) {
4406 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4409 if (rdr_rules != NULL) {
4410 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4413 if (ipf_hm_maptable != NULL) {
4414 KFREES(ipf_hm_maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4415 ipf_hm_maptable = NULL;
4417 if (nat_stats.ns_bucketlen[0] != NULL) {
4418 KFREES(nat_stats.ns_bucketlen[0],
4419 sizeof(u_long *) * ipf_nattable_sz);
4420 nat_stats.ns_bucketlen[0] = NULL;
4422 if (nat_stats.ns_bucketlen[1] != NULL) {
4423 KFREES(nat_stats.ns_bucketlen[1],
4424 sizeof(u_long *) * ipf_nattable_sz);
4425 nat_stats.ns_bucketlen[1] = NULL;
4428 if (fr_nat_maxbucket_reset == 1)
4429 fr_nat_maxbucket = 0;
4431 if (fr_nat_init == 1) {
4433 fr_sttab_destroy(nat_tqb);
4435 RW_DESTROY(&ipf_natfrag);
4436 RW_DESTROY(&ipf_nat);
4438 MUTEX_DESTROY(&ipf_nat_new);
4439 MUTEX_DESTROY(&ipf_natio);
4441 MUTEX_DESTROY(&nat_udptq.ifq_lock);
4442 MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4443 MUTEX_DESTROY(&nat_iptq.ifq_lock);
4448 /* ------------------------------------------------------------------------ */
4449 /* Function: fr_natexpire */
4451 /* Parameters: Nil */
4453 /* Check all of the timeout queues for entries at the top which need to be */
4455 /* ------------------------------------------------------------------------ */
4458 ipftq_t *ifq, *ifqnext;
4459 ipftqent_t *tqe, *tqn;
4464 WRITE_ENTER(&ipf_nat);
4465 for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4466 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4467 if (tqe->tqe_die > fr_ticks)
4469 tqn = tqe->tqe_next;
4470 nat_delete(tqe->tqe_parent, NL_EXPIRE);
4474 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4475 ifqnext = ifq->ifq_next;
4477 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4478 if (tqe->tqe_die > fr_ticks)
4480 tqn = tqe->tqe_next;
4481 nat_delete(tqe->tqe_parent, NL_EXPIRE);
4485 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4486 ifqnext = ifq->ifq_next;
4488 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4489 (ifq->ifq_ref == 0)) {
4490 fr_freetimeoutqueue(ifq);
4494 if (fr_nat_doflush != 0) {
4499 RWLOCK_EXIT(&ipf_nat);
4504 /* ------------------------------------------------------------------------ */
4505 /* Function: fr_natsync */
4507 /* Parameters: ifp(I) - pointer to network interface */
4509 /* Walk through all of the currently active NAT sessions, looking for those */
4510 /* which need to have their translated address updated. */
4511 /* ------------------------------------------------------------------------ */
4512 void fr_natsync(ifp)
4515 u_32_t sum1, sum2, sumd;
4522 if (fr_running <= 0)
4526 * Change IP addresses for NAT sessions for any protocol except TCP
4527 * since it will break the TCP connection anyway. The only rules
4528 * which will get changed are those which are "map ... -> 0/32",
4529 * where the rule specifies the address is taken from the interface.
4532 WRITE_ENTER(&ipf_nat);
4534 if (fr_running <= 0) {
4535 RWLOCK_EXIT(&ipf_nat);
4539 for (nat = nat_instances; nat; nat = nat->nat_next) {
4540 if ((nat->nat_flags & IPN_TCP) != 0)
4544 (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4546 if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4547 (ifp == nat->nat_ifps[1]))) {
4548 nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4549 if (nat->nat_ifnames[1][0] != '\0') {
4550 nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4553 nat->nat_ifps[1] = nat->nat_ifps[0];
4554 ifp2 = nat->nat_ifps[0];
4559 * Change the map-to address to be the same as the
4562 sum1 = nat->nat_outip.s_addr;
4563 if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4564 nat->nat_outip = in;
4565 sum2 = nat->nat_outip.s_addr;
4570 * Readjust the checksum adjustment to take into
4571 * account the new IP#.
4573 CALC_SUMD(sum1, sum2, sumd);
4574 /* XXX - dont change for TCP when solaris does
4575 * hardware checksumming.
4577 sumd += nat->nat_sumd[0];
4578 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4579 nat->nat_sumd[1] = nat->nat_sumd[0];
4583 for (n = nat_list; (n != NULL); n = n->in_next) {
4584 if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4585 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4586 if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4587 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4589 RWLOCK_EXIT(&ipf_nat);
4594 /* ------------------------------------------------------------------------ */
4595 /* Function: nat_icmpquerytype4 */
4596 /* Returns: int - 1 == success, 0 == failure */
4597 /* Parameters: icmptype(I) - ICMP type number */
4599 /* Tests to see if the ICMP type number passed is a query/response type or */
4601 /* ------------------------------------------------------------------------ */
4602 static int nat_icmpquerytype4(icmptype)
4607 * For the ICMP query NAT code, it is essential that both the query
4608 * and the reply match on the NAT rule. Because the NAT structure
4609 * does not keep track of the icmptype, and a single NAT structure
4610 * is used for all icmp types with the same src, dest and id, we
4611 * simply define the replies as queries as well. The funny thing is,
4612 * altough it seems silly to call a reply a query, this is exactly
4613 * as it is defined in the IPv4 specification
4619 case ICMP_ECHOREPLY:
4621 /* route aedvertisement/solliciation is currently unsupported: */
4622 /* it would require rewriting the ICMP data section */
4624 case ICMP_TSTAMPREPLY:
4626 case ICMP_IREQREPLY:
4628 case ICMP_MASKREPLY:
4636 /* ------------------------------------------------------------------------ */
4637 /* Function: nat_log */
4639 /* Parameters: nat(I) - pointer to NAT structure */
4640 /* type(I) - type of log entry to create */
4642 /* Creates a NAT log entry. */
4643 /* ------------------------------------------------------------------------ */
4644 void nat_log(nat, type)
4658 natl.nl_inip = nat->nat_inip;
4659 natl.nl_outip = nat->nat_outip;
4660 natl.nl_origip = nat->nat_oip;
4661 natl.nl_bytes[0] = nat->nat_bytes[0];
4662 natl.nl_bytes[1] = nat->nat_bytes[1];
4663 natl.nl_pkts[0] = nat->nat_pkts[0];
4664 natl.nl_pkts[1] = nat->nat_pkts[1];
4665 natl.nl_origport = nat->nat_oport;
4666 natl.nl_inport = nat->nat_inport;
4667 natl.nl_outport = nat->nat_outport;
4668 natl.nl_p = nat->nat_p;
4669 natl.nl_type = type;
4672 if (nat->nat_ptr != NULL) {
4673 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4674 if (np == nat->nat_ptr) {
4675 natl.nl_rule = rulen;
4681 sizes[0] = sizeof(natl);
4684 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4689 #if defined(__OpenBSD__)
4690 /* ------------------------------------------------------------------------ */
4691 /* Function: nat_ifdetach */
4693 /* Parameters: ifp(I) - pointer to network interface */
4695 /* Compatibility interface for OpenBSD to trigger the correct updating of */
4696 /* interface references within IPFilter. */
4697 /* ------------------------------------------------------------------------ */
4698 void nat_ifdetach(ifp)
4707 /* ------------------------------------------------------------------------ */
4708 /* Function: fr_ipnatderef */
4710 /* Parameters: isp(I) - pointer to pointer to NAT rule */
4711 /* Write Locks: ipf_nat */
4713 /* ------------------------------------------------------------------------ */
4714 void fr_ipnatderef(inp)
4723 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4725 appr_free(in->in_apr);
4726 MUTEX_DESTROY(&in->in_lock);
4728 nat_stats.ns_rules--;
4729 #if SOLARIS && !defined(_INET_IP_STACK_H)
4730 if (nat_stats.ns_rules == 0)
4731 pfil_delayed_copy = 1;
4737 /* ------------------------------------------------------------------------ */
4738 /* Function: fr_natderef */
4740 /* Parameters: isp(I) - pointer to pointer to NAT table entry */
4742 /* Decrement the reference counter for this NAT table entry and free it if */
4743 /* there are no more things using it. */
4745 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4746 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4747 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */
4748 /* because nat_delete() will do that and send nat_ref to -1. */
4750 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4751 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4752 /* ------------------------------------------------------------------------ */
4753 void fr_natderef(natp)
4761 MUTEX_ENTER(&nat->nat_lock);
4762 if (nat->nat_ref > 1) {
4764 MUTEX_EXIT(&nat->nat_lock);
4767 MUTEX_EXIT(&nat->nat_lock);
4769 WRITE_ENTER(&ipf_nat);
4770 nat_delete(nat, NL_EXPIRE);
4771 RWLOCK_EXIT(&ipf_nat);
4775 /* ------------------------------------------------------------------------ */
4776 /* Function: fr_natclone */
4777 /* Returns: ipstate_t* - NULL == cloning failed, */
4778 /* else pointer to new state structure */
4779 /* Parameters: fin(I) - pointer to packet information */
4780 /* is(I) - pointer to master state structure */
4781 /* Write Lock: ipf_nat */
4783 /* Create a "duplcate" state table entry from the master. */
4784 /* ------------------------------------------------------------------------ */
4785 static nat_t *fr_natclone(fin, nat)
4793 KMALLOC(clone, nat_t *);
4796 bcopy((char *)nat, (char *)clone, sizeof(*clone));
4798 MUTEX_NUKE(&clone->nat_lock);
4800 clone->nat_aps = NULL;
4802 * Initialize all these so that nat_delete() doesn't cause a crash.
4804 clone->nat_tqe.tqe_pnext = NULL;
4805 clone->nat_tqe.tqe_next = NULL;
4806 clone->nat_tqe.tqe_ifq = NULL;
4807 clone->nat_tqe.tqe_parent = clone;
4809 clone->nat_flags &= ~SI_CLONE;
4810 clone->nat_flags |= SI_CLONED;
4813 clone->nat_hm->hm_ref++;
4815 if (nat_insert(clone, fin->fin_rev) == -1) {
4819 np = clone->nat_ptr;
4822 nat_log(clone, (u_int)np->in_redir);
4827 MUTEX_ENTER(&fr->fr_lock);
4829 MUTEX_EXIT(&fr->fr_lock);
4833 * Because the clone is created outside the normal loop of things and
4834 * TCP has special needs in terms of state, initialise the timeout
4835 * state of the new NAT from here.
4837 if (clone->nat_p == IPPROTO_TCP) {
4838 (void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
4841 #ifdef IPFILTER_SYNC
4842 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4845 nat_log(clone, NL_CLONE);
4850 /* ------------------------------------------------------------------------ */
4851 /* Function: nat_wildok */
4852 /* Returns: int - 1 == packet's ports match wildcards */
4853 /* 0 == packet's ports don't match wildcards */
4854 /* Parameters: nat(I) - NAT entry */
4855 /* sport(I) - source port */
4856 /* dport(I) - destination port */
4857 /* flags(I) - wildcard flags */
4858 /* dir(I) - packet direction */
4860 /* Use NAT entry and packet direction to determine which combination of */
4861 /* wildcard flags should be used. */
4862 /* ------------------------------------------------------------------------ */
4863 static int nat_wildok(nat, sport, dport, flags, dir)
4871 * When called by dir is set to
4872 * nat_inlookup NAT_INBOUND (0)
4873 * nat_outlookup NAT_OUTBOUND (1)
4875 * We simply combine the packet's direction in dir with the original
4876 * "intended" direction of that NAT entry in nat->nat_dir to decide
4877 * which combination of wildcard flags to allow.
4880 switch ((dir << 1) | nat->nat_dir)
4882 case 3: /* outbound packet / outbound entry */
4883 if (((nat->nat_inport == sport) ||
4884 (flags & SI_W_SPORT)) &&
4885 ((nat->nat_oport == dport) ||
4886 (flags & SI_W_DPORT)))
4889 case 2: /* outbound packet / inbound entry */
4890 if (((nat->nat_outport == sport) ||
4891 (flags & SI_W_DPORT)) &&
4892 ((nat->nat_oport == dport) ||
4893 (flags & SI_W_SPORT)))
4896 case 1: /* inbound packet / outbound entry */
4897 if (((nat->nat_oport == sport) ||
4898 (flags & SI_W_DPORT)) &&
4899 ((nat->nat_outport == dport) ||
4900 (flags & SI_W_SPORT)))
4903 case 0: /* inbound packet / inbound entry */
4904 if (((nat->nat_oport == sport) ||
4905 (flags & SI_W_SPORT)) &&
4906 ((nat->nat_outport == dport) ||
4907 (flags & SI_W_DPORT)))
4918 /* ------------------------------------------------------------------------ */
4919 /* Function: nat_mssclamp */
4921 /* Parameters: tcp(I) - pointer to TCP header */
4922 /* maxmss(I) - value to clamp the TCP MSS to */
4923 /* fin(I) - pointer to packet information */
4924 /* csump(I) - pointer to TCP checksum */
4926 /* Check for MSS option and clamp it if necessary. If found and changed, */
4927 /* then the TCP header checksum will be updated to reflect the change in */
4929 /* ------------------------------------------------------------------------ */
4930 static void nat_mssclamp(tcp, maxmss, fin, csump)
4936 u_char *cp, *ep, opt;
4940 hlen = TCP_OFF(tcp) << 2;
4941 if (hlen > sizeof(*tcp)) {
4942 cp = (u_char *)tcp + sizeof(*tcp);
4943 ep = (u_char *)tcp + hlen;
4947 if (opt == TCPOPT_EOL)
4949 else if (opt == TCPOPT_NOP) {
4957 if ((cp + advance > ep) || (advance <= 0))
4964 mss = cp[2] * 256 + cp[3];
4966 cp[2] = maxmss / 256;
4967 cp[3] = maxmss & 0xff;
4968 CALC_SUMD(mss, maxmss, sumd);
4969 fix_outcksum(fin, csump, sumd);
4973 /* ignore unknown options */
4983 /* ------------------------------------------------------------------------ */
4984 /* Function: fr_setnatqueue */
4986 /* Parameters: nat(I)- pointer to NAT structure */
4987 /* rev(I) - forward(0) or reverse(1) direction */
4988 /* Locks: ipf_nat (read or write) */
4990 /* Put the NAT entry on its default queue entry, using rev as a helped in */
4991 /* determining which queue it should be placed on. */
4992 /* ------------------------------------------------------------------------ */
4993 void fr_setnatqueue(nat, rev)
4997 ipftq_t *oifq, *nifq;
4999 if (nat->nat_ptr != NULL)
5000 nifq = nat->nat_ptr->in_tqehead[rev];
5014 nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
5022 oifq = nat->nat_tqe.tqe_ifq;
5024 * If it's currently on a timeout queue, move it from one queue to
5025 * another, else put it on the end of the newly determined queue.
5028 fr_movequeue(&nat->nat_tqe, oifq, nifq);
5030 fr_queueappend(&nat->nat_tqe, nifq, nat);
5035 /* ------------------------------------------------------------------------ */
5036 /* Function: nat_getnext */
5037 /* Returns: int - 0 == ok, else error */
5038 /* Parameters: t(I) - pointer to ipftoken structure */
5039 /* itp(I) - pointer to ipfgeniter_t structure */
5041 /* Fetch the next nat/ipnat structure pointer from the linked list and */
5042 /* copy it out to the storage space pointed to by itp_data. The next item */
5043 /* in the list to look at is put back in the ipftoken struture. */
5044 /* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/
5045 /* ipf_freetoken will call a deref function for us and we dont want to call */
5046 /* that twice (second time would be in the second switch statement below. */
5047 /* ------------------------------------------------------------------------ */
5048 static int nat_getnext(t, itp)
5052 hostmap_t *hm, *nexthm = NULL, zerohm;
5053 ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5054 nat_t *nat, *nextnat = NULL, zeronat;
5055 int error = 0, count;
5058 count = itp->igi_nitems;
5062 READ_ENTER(&ipf_nat);
5064 switch (itp->igi_type)
5066 case IPFGENITER_HOSTMAP :
5069 nexthm = ipf_hm_maplist;
5071 nexthm = hm->hm_next;
5075 case IPFGENITER_IPNAT :
5078 nextipnat = nat_list;
5080 nextipnat = ipn->in_next;
5084 case IPFGENITER_NAT :
5087 nextnat = nat_instances;
5089 nextnat = nat->nat_next;
5093 RWLOCK_EXIT(&ipf_nat);
5097 dst = itp->igi_data;
5099 switch (itp->igi_type)
5101 case IPFGENITER_HOSTMAP :
5102 if (nexthm != NULL) {
5104 ATOMIC_INC32(nexthm->hm_ref);
5105 t->ipt_data = nexthm;
5108 bzero(&zerohm, sizeof(zerohm));
5115 case IPFGENITER_IPNAT :
5116 if (nextipnat != NULL) {
5118 MUTEX_ENTER(&nextipnat->in_lock);
5119 nextipnat->in_use++;
5120 MUTEX_EXIT(&nextipnat->in_lock);
5121 t->ipt_data = nextipnat;
5124 bzero(&zeroipn, sizeof(zeroipn));
5125 nextipnat = &zeroipn;
5131 case IPFGENITER_NAT :
5132 if (nextnat != NULL) {
5134 MUTEX_ENTER(&nextnat->nat_lock);
5136 MUTEX_EXIT(&nextnat->nat_lock);
5137 t->ipt_data = nextnat;
5140 bzero(&zeronat, sizeof(zeronat));
5149 RWLOCK_EXIT(&ipf_nat);
5152 * Copying out to user space needs to be done without the lock.
5154 switch (itp->igi_type)
5156 case IPFGENITER_HOSTMAP :
5157 error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5161 dst += sizeof(*nexthm);
5164 case IPFGENITER_IPNAT :
5165 error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5169 dst += sizeof(*nextipnat);
5172 case IPFGENITER_NAT :
5173 error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5177 dst += sizeof(*nextnat);
5181 if ((count == 1) || (error != 0))
5186 READ_ENTER(&ipf_nat);
5189 * We need to have the lock again here to make sure that
5190 * using _next is consistent.
5192 switch (itp->igi_type)
5194 case IPFGENITER_HOSTMAP :
5195 nexthm = nexthm->hm_next;
5197 case IPFGENITER_IPNAT :
5198 nextipnat = nextipnat->in_next;
5200 case IPFGENITER_NAT :
5201 nextnat = nextnat->nat_next;
5207 switch (itp->igi_type)
5209 case IPFGENITER_HOSTMAP :
5211 WRITE_ENTER(&ipf_nat);
5213 RWLOCK_EXIT(&ipf_nat);
5216 case IPFGENITER_IPNAT :
5218 fr_ipnatderef(&ipn);
5221 case IPFGENITER_NAT :
5234 /* ------------------------------------------------------------------------ */
5235 /* Function: nat_iterator */
5236 /* Returns: int - 0 == ok, else error */
5237 /* Parameters: token(I) - pointer to ipftoken structure */
5238 /* itp(I) - pointer to ipfgeniter_t structure */
5240 /* This function acts as a handler for the SIOCGENITER ioctls that use a */
5241 /* generic structure to iterate through a list. There are three different */
5242 /* linked lists of NAT related information to go through: NAT rules, active */
5243 /* NAT mappings and the NAT fragment cache. */
5244 /* ------------------------------------------------------------------------ */
5245 static int nat_iterator(token, itp)
5251 if (itp->igi_data == NULL)
5254 token->ipt_subtype = itp->igi_type;
5256 switch (itp->igi_type)
5258 case IPFGENITER_HOSTMAP :
5259 case IPFGENITER_IPNAT :
5260 case IPFGENITER_NAT :
5261 error = nat_getnext(token, itp);
5264 case IPFGENITER_NATFRAG :
5266 error = fr_nextfrag(token, itp, &ipfr_natlist,
5267 &ipfr_nattail, &ipf_natfrag);
5269 error = fr_nextfrag(token, itp, &ipfr_natlist, &ipfr_nattail);
5281 /* ------------------------------------------------------------------------ */
5282 /* Function: nat_extraflush */
5283 /* Returns: int - 0 == success, -1 == failure */
5284 /* Parameters: which(I) - how to flush the active NAT table */
5285 /* Write Locks: ipf_nat */
5287 /* Flush nat tables. Three actions currently defined: */
5288 /* which == 0 : flush all nat table entries */
5289 /* which == 1 : flush TCP connections which have started to close but are */
5290 /* stuck for some reason. */
5291 /* which == 2 : flush TCP connections which have been idle for a long time, */
5292 /* starting at > 4 days idle and working back in successive half-*/
5293 /* days to at most 12 hours old. If this fails to free enough */
5294 /* slots then work backwards in half hour slots to 30 minutes. */
5295 /* If that too fails, then work backwards in 30 second intervals */
5296 /* for the last 30 minutes to at worst 30 seconds idle. */
5297 /* ------------------------------------------------------------------------ */
5298 static int nat_extraflush(which)
5301 ipftq_t *ifq, *ifqnext;
5315 * Style 0 flush removes everything...
5317 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5318 nat_delete(nat, NL_FLUSH);
5325 * Since we're only interested in things that are closing,
5326 * we can start with the appropriate timeout queue.
5328 for (ifq = nat_tqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL;
5329 ifq = ifq->ifq_next) {
5331 for (tqn = ifq->ifq_head; tqn != NULL; ) {
5332 nat = tqn->tqe_parent;
5333 tqn = tqn->tqe_next;
5334 if (nat->nat_p != IPPROTO_TCP)
5336 nat_delete(nat, NL_EXPIRE);
5342 * Also need to look through the user defined queues.
5344 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
5345 ifqnext = ifq->ifq_next;
5346 for (tqn = ifq->ifq_head; tqn != NULL; ) {
5347 nat = tqn->tqe_parent;
5348 tqn = tqn->tqe_next;
5349 if (nat->nat_p != IPPROTO_TCP)
5352 if ((nat->nat_tcpstate[0] >
5353 IPF_TCPS_ESTABLISHED) &&
5354 (nat->nat_tcpstate[1] >
5355 IPF_TCPS_ESTABLISHED)) {
5356 nat_delete(nat, NL_EXPIRE);
5364 * Args 5-11 correspond to flushing those particular states
5365 * for TCP connections.
5367 case IPF_TCPS_CLOSE_WAIT :
5368 case IPF_TCPS_FIN_WAIT_1 :
5369 case IPF_TCPS_CLOSING :
5370 case IPF_TCPS_LAST_ACK :
5371 case IPF_TCPS_FIN_WAIT_2 :
5372 case IPF_TCPS_TIME_WAIT :
5373 case IPF_TCPS_CLOSED :
5374 tqn = nat_tqb[which].ifq_head;
5375 while (tqn != NULL) {
5376 nat = tqn->tqe_parent;
5377 tqn = tqn->tqe_next;
5378 nat_delete(nat, NL_FLUSH);
5388 * Take a large arbitrary number to mean the number of seconds
5389 * for which which consider to be the maximum value we'll allow
5390 * the expiration to be.
5392 which = IPF_TTLVAL(which);
5393 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5394 if (fr_ticks - nat->nat_touched > which) {
5395 nat_delete(nat, NL_FLUSH);
5398 natp = &nat->nat_next;
5409 * Asked to remove inactive entries because the table is full.
5411 if (fr_ticks - nat_last_force_flush > IPF_TTLVAL(5)) {
5412 nat_last_force_flush = fr_ticks;
5413 removed = ipf_queueflush(nat_flush_entry, nat_tqb, nat_utqe);
5421 /* ------------------------------------------------------------------------ */
5422 /* Function: nat_flush_entry */
5423 /* Returns: 0 - always succeeds */
5424 /* Parameters: entry(I) - pointer to NAT entry */
5425 /* Write Locks: ipf_nat */
5427 /* This function is a stepping stone between ipf_queueflush() and */
5428 /* nat_dlete(). It is used so we can provide a uniform interface via the */
5429 /* ipf_queueflush() function. Since the nat_delete() function returns void */
5430 /* we translate that to mean it always succeeds in deleting something. */
5431 /* ------------------------------------------------------------------------ */
5432 static int nat_flush_entry(entry)
5435 nat_delete(entry, NL_FLUSH);
5440 /* ------------------------------------------------------------------------ */
5441 /* Function: nat_gettable */
5442 /* Returns: int - 0 = success, else error */
5443 /* Parameters: data(I) - pointer to ioctl data */
5445 /* This function handles ioctl requests for tables of nat information. */
5446 /* At present the only table it deals with is the hash bucket statistics. */
5447 /* ------------------------------------------------------------------------ */
5448 static int nat_gettable(data)
5454 error = fr_inobj(data, &table, IPFOBJ_GTABLE);
5458 switch (table.ita_type)
5460 case IPFTABLE_BUCKETS_NATIN :
5461 error = COPYOUT(nat_stats.ns_bucketlen[0], table.ita_table,
5462 ipf_nattable_sz * sizeof(u_long));
5465 case IPFTABLE_BUCKETS_NATOUT :
5466 error = COPYOUT(nat_stats.ns_bucketlen[1], table.ita_table,
5467 ipf_nattable_sz * sizeof(u_long));