4 * Copyright (C) 1995-2003 by Darren Reed.
6 * See the IPFILTER.LICENCE file for details on licencing.
8 #if defined(KERNEL) || defined(_KERNEL)
14 #include <sys/errno.h>
15 #include <sys/types.h>
16 #include <sys/param.h>
19 #if defined(_KERNEL) && defined(__NetBSD_Version__) && \
20 (__NetBSD_Version__ >= 399002000)
21 # include <sys/kauth.h>
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
25 #if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
26 # include "opt_ipfilter_log.h"
28 # include "opt_ipfilter.h"
42 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
43 # include <sys/filio.h>
44 # include <sys/fcntl.h>
46 # include <sys/ioctl.h>
49 # include <sys/fcntl.h>
52 # include <sys/protosw.h>
54 #include <sys/socket.h>
56 # include <sys/systm.h>
57 # if !defined(__SVR4) && !defined(__svr4__)
58 # include <sys/mbuf.h>
61 #if defined(__SVR4) || defined(__svr4__)
62 # include <sys/filio.h>
63 # include <sys/byteorder.h>
65 # include <sys/dditypes.h>
67 # include <sys/stream.h>
68 # include <sys/kmem.h>
70 #if __FreeBSD_version >= 300000
71 # include <sys/queue.h>
74 #if __FreeBSD_version >= 300000
75 # include <net/if_var.h>
76 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
77 # include "opt_ipfilter.h"
83 #include <net/route.h>
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/ip.h>
90 # include <vpn/ipsec.h>
91 extern struct ifnet vpnif;
95 # include <netinet/ip_var.h>
97 #include <netinet/tcp.h>
98 #include <netinet/udp.h>
99 #include <netinet/ip_icmp.h>
100 #include "netinet/ip_compat.h"
101 #include <netinet/tcpip.h>
102 #include "netinet/ip_fil.h"
103 #include "netinet/ip_nat.h"
104 #include "netinet/ip_frag.h"
105 #include "netinet/ip_state.h"
106 #include "netinet/ip_proxy.h"
108 #include "netinet/ip_sync.h"
110 #if (__FreeBSD_version >= 300000)
111 # include <sys/malloc.h>
113 /* END OF INCLUDES */
116 #define SOCKADDR_IN struct sockaddr_in
119 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed";
120 static const char rcsid[] = "@(#)$FreeBSD$";
121 /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */
125 /* ======================================================================== */
126 /* How the NAT is organised and works. */
128 /* Inside (interface y) NAT Outside (interface x) */
129 /* -------------------- -+- ------------------------------------- */
130 /* Packet going | out, processsed by fr_checknatout() for x */
131 /* ------------> | ------------> */
132 /* src=10.1.1.1 | src=192.1.1.1 */
134 /* | in, processed by fr_checknatin() for x */
135 /* <------------ | <------------ */
136 /* dst=10.1.1.1 | dst=192.1.1.1 */
137 /* -------------------- -+- ------------------------------------- */
138 /* fr_checknatout() - changes ip_src and if required, sport */
139 /* - creates a new mapping, if required. */
140 /* fr_checknatin() - changes ip_dst and if required, dport */
142 /* In the NAT table, internal source is recorded as "in" and externally */
144 /* ======================================================================== */
147 nat_t **nat_table[2] = { NULL, NULL },
148 *nat_instances = NULL;
149 ipnat_t *nat_list = NULL;
150 u_int ipf_nattable_max = NAT_TABLE_MAX;
151 u_int ipf_nattable_sz = NAT_TABLE_SZ;
152 u_int ipf_natrules_sz = NAT_SIZE;
153 u_int ipf_rdrrules_sz = RDR_SIZE;
154 u_int ipf_hostmap_sz = HOSTMAP_SIZE;
155 u_int fr_nat_maxbucket = 0,
156 fr_nat_maxbucket_reset = 1;
157 u_32_t nat_masks = 0;
158 u_32_t rdr_masks = 0;
159 u_long nat_last_force_flush = 0;
160 ipnat_t **nat_rules = NULL;
161 ipnat_t **rdr_rules = NULL;
162 hostmap_t **ipf_hm_maptable = NULL;
163 hostmap_t *ipf_hm_maplist = NULL;
164 ipftq_t nat_tqb[IPF_TCP_NSTATES];
168 ipftq_t *nat_utqe = NULL;
169 int fr_nat_doflush = 0;
176 u_long fr_defnatage = DEF_NAT_AGE,
177 fr_defnatipage = 120, /* 60 seconds */
178 fr_defnaticmpage = 6; /* 3 seconds */
182 #if SOLARIS && !defined(_INET_IP_STACK_H)
183 extern int pfil_delayed_copy;
186 static int nat_flush_entry __P((void *));
187 static int nat_flushtable __P((void));
188 static int nat_clearlist __P((void));
189 static void nat_addnat __P((struct ipnat *));
190 static void nat_addrdr __P((struct ipnat *));
191 static void nat_delrdr __P((struct ipnat *));
192 static void nat_delnat __P((struct ipnat *));
193 static int fr_natgetent __P((caddr_t, int));
194 static int fr_natgetsz __P((caddr_t, int));
195 static int fr_natputent __P((caddr_t, int));
196 static int nat_extraflush __P((int));
197 static int nat_gettable __P((char *));
198 static void nat_tabmove __P((nat_t *));
199 static int nat_match __P((fr_info_t *, ipnat_t *));
200 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
201 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
202 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
203 struct in_addr, struct in_addr, u_32_t));
204 static int nat_icmpquerytype4 __P((int));
205 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
206 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
207 static int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
208 tcphdr_t *, nat_t **, int));
209 static int nat_resolverule __P((ipnat_t *));
210 static nat_t *fr_natclone __P((fr_info_t *, nat_t *));
211 static void nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
212 static int nat_wildok __P((nat_t *, int, int, int, int));
213 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *));
214 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *));
217 /* ------------------------------------------------------------------------ */
218 /* Function: fr_natinit */
219 /* Returns: int - 0 == success, -1 == failure */
220 /* Parameters: Nil */
222 /* Initialise all of the NAT locks, tables and other structures. */
223 /* ------------------------------------------------------------------------ */
228 KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
229 if (nat_table[0] != NULL)
230 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
234 KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
235 if (nat_table[1] != NULL)
236 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
240 KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
241 if (nat_rules != NULL)
242 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
246 KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
247 if (rdr_rules != NULL)
248 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
252 KMALLOCS(ipf_hm_maptable, hostmap_t **, \
253 sizeof(hostmap_t *) * ipf_hostmap_sz);
254 if (ipf_hm_maptable != NULL)
255 bzero((char *)ipf_hm_maptable,
256 sizeof(hostmap_t *) * ipf_hostmap_sz);
259 ipf_hm_maplist = NULL;
261 KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
262 ipf_nattable_sz * sizeof(u_long));
263 if (nat_stats.ns_bucketlen[0] == NULL)
265 bzero((char *)nat_stats.ns_bucketlen[0],
266 ipf_nattable_sz * sizeof(u_long));
268 KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
269 ipf_nattable_sz * sizeof(u_long));
270 if (nat_stats.ns_bucketlen[1] == NULL)
273 bzero((char *)nat_stats.ns_bucketlen[1],
274 ipf_nattable_sz * sizeof(u_long));
276 if (fr_nat_maxbucket == 0) {
277 for (i = ipf_nattable_sz; i > 0; i >>= 1)
279 fr_nat_maxbucket *= 2;
282 fr_sttab_init(nat_tqb);
284 * Increase this because we may have "keep state" following this too
285 * and packet storms can occur if this is removed too quickly.
287 nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
288 nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
289 nat_udptq.ifq_ttl = fr_defnatage;
290 nat_udptq.ifq_ref = 1;
291 nat_udptq.ifq_head = NULL;
292 nat_udptq.ifq_tail = &nat_udptq.ifq_head;
293 MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
294 nat_udptq.ifq_next = &nat_icmptq;
295 nat_icmptq.ifq_ttl = fr_defnaticmpage;
296 nat_icmptq.ifq_ref = 1;
297 nat_icmptq.ifq_head = NULL;
298 nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
299 MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
300 nat_icmptq.ifq_next = &nat_iptq;
301 nat_iptq.ifq_ttl = fr_defnatipage;
302 nat_iptq.ifq_ref = 1;
303 nat_iptq.ifq_head = NULL;
304 nat_iptq.ifq_tail = &nat_iptq.ifq_head;
305 MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
306 nat_iptq.ifq_next = NULL;
308 for (i = 0; i < IPF_TCP_NSTATES; i++) {
309 if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
310 nat_tqb[i].ifq_ttl = fr_defnaticmpage;
312 else if (nat_tqb[i].ifq_ttl > fr_defnatage)
313 nat_tqb[i].ifq_ttl = fr_defnatage;
318 * Increase this because we may have "keep state" following
319 * this too and packet storms can occur if this is removed
322 nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
324 RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
325 RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
326 MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
327 MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
335 /* ------------------------------------------------------------------------ */
336 /* Function: nat_addrdr */
338 /* Parameters: n(I) - pointer to NAT rule to add */
340 /* Adds a redirect rule to the hash table of redirect rules and the list of */
341 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */
342 /* use by redirect rules. */
343 /* ------------------------------------------------------------------------ */
344 static void nat_addrdr(n)
352 k = count4bits(n->in_outmsk);
353 if ((k >= 0) && (k != 32))
355 j = (n->in_outip & n->in_outmsk);
356 hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
359 np = &(*np)->in_rnext;
367 /* ------------------------------------------------------------------------ */
368 /* Function: nat_addnat */
370 /* Parameters: n(I) - pointer to NAT rule to add */
372 /* Adds a NAT map rule to the hash table of rules and the list of loaded */
373 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */
374 /* redirect rules. */
375 /* ------------------------------------------------------------------------ */
376 static void nat_addnat(n)
384 k = count4bits(n->in_inmsk);
385 if ((k >= 0) && (k != 32))
387 j = (n->in_inip & n->in_inmsk);
388 hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
391 np = &(*np)->in_mnext;
399 /* ------------------------------------------------------------------------ */
400 /* Function: nat_delrdr */
402 /* Parameters: n(I) - pointer to NAT rule to delete */
404 /* Removes a redirect rule from the hash table of redirect rules. */
405 /* ------------------------------------------------------------------------ */
406 static void nat_delrdr(n)
410 n->in_rnext->in_prnext = n->in_prnext;
411 *n->in_prnext = n->in_rnext;
415 /* ------------------------------------------------------------------------ */
416 /* Function: nat_delnat */
418 /* Parameters: n(I) - pointer to NAT rule to delete */
420 /* Removes a NAT map rule from the hash table of NAT map rules. */
421 /* ------------------------------------------------------------------------ */
422 static void nat_delnat(n)
425 if (n->in_mnext != NULL)
426 n->in_mnext->in_pmnext = n->in_pmnext;
427 *n->in_pmnext = n->in_mnext;
431 /* ------------------------------------------------------------------------ */
432 /* Function: nat_hostmap */
433 /* Returns: struct hostmap* - NULL if no hostmap could be created, */
434 /* else a pointer to the hostmapping to use */
435 /* Parameters: np(I) - pointer to NAT rule */
436 /* real(I) - real IP address */
437 /* map(I) - mapped IP address */
438 /* port(I) - destination port number */
439 /* Write Locks: ipf_nat */
441 /* Check if an ip address has already been allocated for a given mapping */
442 /* that is not doing port based translation. If is not yet allocated, then */
443 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */
444 /* ------------------------------------------------------------------------ */
445 static struct hostmap *nat_hostmap(np, src, dst, map, port)
455 hv = (src.s_addr ^ dst.s_addr);
459 for (hm = ipf_hm_maptable[hv]; hm; hm = hm->hm_next)
460 if ((hm->hm_srcip.s_addr == src.s_addr) &&
461 (hm->hm_dstip.s_addr == dst.s_addr) &&
462 ((np == NULL) || (np == hm->hm_ipnat)) &&
463 ((port == 0) || (port == hm->hm_port))) {
471 KMALLOC(hm, hostmap_t *);
473 hm->hm_next = ipf_hm_maplist;
474 hm->hm_pnext = &ipf_hm_maplist;
475 if (ipf_hm_maplist != NULL)
476 ipf_hm_maplist->hm_pnext = &hm->hm_next;
478 hm->hm_hnext = ipf_hm_maptable[hv];
479 hm->hm_phnext = ipf_hm_maptable + hv;
480 if (ipf_hm_maptable[hv] != NULL)
481 ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
482 ipf_hm_maptable[hv] = hm;
494 /* ------------------------------------------------------------------------ */
495 /* Function: fr_hostmapdel */
497 /* Parameters: hmp(I) - pointer to hostmap structure pointer */
498 /* Write Locks: ipf_nat */
500 /* Decrement the references to this hostmap structure by one. If this */
501 /* reaches zero then remove it and free it. */
502 /* ------------------------------------------------------------------------ */
503 void fr_hostmapdel(hmp)
504 struct hostmap **hmp;
512 if (hm->hm_ref == 0) {
514 hm->hm_hnext->hm_phnext = hm->hm_phnext;
515 *hm->hm_phnext = hm->hm_hnext;
517 hm->hm_next->hm_pnext = hm->hm_pnext;
518 *hm->hm_pnext = hm->hm_next;
524 /* ------------------------------------------------------------------------ */
525 /* Function: fix_outcksum */
527 /* Parameters: fin(I) - pointer to packet information */
528 /* sp(I) - location of 16bit checksum to update */
529 /* n((I) - amount to adjust checksum by */
531 /* Adjusts the 16bit checksum by "n" for packets going out. */
532 /* ------------------------------------------------------------------------ */
533 void fix_outcksum(fin, sp, n)
544 if (n & NAT_HW_CKSUM) {
547 n = (n & 0xffff) + (n >> 16);
551 sum1 = (~ntohs(*sp)) & 0xffff;
553 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
555 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
556 sumshort = ~(u_short)sum1;
557 *(sp) = htons(sumshort);
561 /* ------------------------------------------------------------------------ */
562 /* Function: fix_incksum */
564 /* Parameters: fin(I) - pointer to packet information */
565 /* sp(I) - location of 16bit checksum to update */
566 /* n((I) - amount to adjust checksum by */
568 /* Adjusts the 16bit checksum by "n" for packets going in. */
569 /* ------------------------------------------------------------------------ */
570 void fix_incksum(fin, sp, n)
581 if (n & NAT_HW_CKSUM) {
584 n = (n & 0xffff) + (n >> 16);
588 sum1 = (~ntohs(*sp)) & 0xffff;
589 sum1 += ~(n) & 0xffff;
590 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
592 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
593 sumshort = ~(u_short)sum1;
594 *(sp) = htons(sumshort);
598 /* ------------------------------------------------------------------------ */
599 /* Function: fix_datacksum */
601 /* Parameters: sp(I) - location of 16bit checksum to update */
602 /* n((I) - amount to adjust checksum by */
604 /* Fix_datacksum is used *only* for the adjustments of checksums in the */
605 /* data section of an IP packet. */
607 /* The only situation in which you need to do this is when NAT'ing an */
608 /* ICMP error message. Such a message, contains in its body the IP header */
609 /* of the original IP packet, that causes the error. */
611 /* You can't use fix_incksum or fix_outcksum in that case, because for the */
612 /* kernel the data section of the ICMP error is just data, and no special */
613 /* processing like hardware cksum or ntohs processing have been done by the */
614 /* kernel on the data section. */
615 /* ------------------------------------------------------------------------ */
616 void fix_datacksum(sp, n)
626 sum1 = (~ntohs(*sp)) & 0xffff;
628 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
630 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
631 sumshort = ~(u_short)sum1;
632 *(sp) = htons(sumshort);
636 /* ------------------------------------------------------------------------ */
637 /* Function: fr_nat_ioctl */
638 /* Returns: int - 0 == success, != 0 == failure */
639 /* Parameters: data(I) - pointer to ioctl data */
640 /* cmd(I) - ioctl command integer */
641 /* mode(I) - file mode bits used with open */
643 /* Processes an ioctl call made to operate on the IP Filter NAT device. */
644 /* ------------------------------------------------------------------------ */
645 int fr_nat_ioctl(data, cmd, mode, uid, ctx)
651 ipnat_t *nat, *nt, *n = NULL, **np = NULL;
652 int error = 0, ret, arg, getlock;
656 #if (BSD >= 199306) && defined(_KERNEL)
657 # if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
658 if ((mode & FWRITE) &&
659 kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
660 KAUTH_REQ_NETWORK_FIREWALL_FW,
665 # if defined(__FreeBSD_version) && (__FreeBSD_version >= 500034)
666 if (securelevel_ge(curthread->td_ucred, 3) && (mode & FWRITE)) {
668 if ((securelevel >= 3) && (mode & FWRITE)) {
675 #if defined(__osf__) && defined(_KERNEL)
678 getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
681 nat = NULL; /* XXX gcc -Wuninitialized */
682 if (cmd == (ioctlcmd_t)SIOCADNAT) {
683 KMALLOC(nt, ipnat_t *);
688 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
689 if (mode & NAT_SYSSPACE) {
690 bcopy(data, (char *)&natd, sizeof(natd));
693 error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
701 * For add/delete, look to see if the NAT entry is already present
703 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
705 if (nat->in_v == 0) /* For backward compat. */
707 nat->in_flags &= IPN_USERFLAGS;
708 if ((nat->in_redir & NAT_MAPBLK) == 0) {
709 if ((nat->in_flags & IPN_SPLIT) == 0)
710 nat->in_inip &= nat->in_inmsk;
711 if ((nat->in_flags & IPN_IPRANGE) == 0)
712 nat->in_outip &= nat->in_outmsk;
714 MUTEX_ENTER(&ipf_natio);
715 for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
716 if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
718 if (nat->in_redir == NAT_REDIRECT &&
719 nat->in_pnext != n->in_pnext)
732 if (!(mode & FWRITE))
735 tmp = ipflog_clear(IPL_LOGNAT);
736 error = BCOPYOUT((char *)&tmp, (char *)data,
745 if (!(mode & FWRITE))
748 error = BCOPYIN((char *)data, (char *)&nat_logging,
749 sizeof(nat_logging));
756 error = BCOPYOUT((char *)&nat_logging, (char *)data,
757 sizeof(nat_logging));
763 arg = iplused[IPL_LOGNAT];
764 error = BCOPYOUT(&arg, data, sizeof(arg));
770 if (!(mode & FWRITE)) {
772 } else if (n != NULL) {
774 } else if (nt == NULL) {
778 MUTEX_EXIT(&ipf_natio);
781 bcopy((char *)nat, (char *)nt, sizeof(*n));
782 error = nat_siocaddnat(nt, np, getlock);
783 MUTEX_EXIT(&ipf_natio);
789 if (!(mode & FWRITE)) {
792 } else if (n == NULL) {
797 MUTEX_EXIT(&ipf_natio);
800 nat_siocdelnat(n, np, getlock);
802 MUTEX_EXIT(&ipf_natio);
807 nat_stats.ns_table[0] = nat_table[0];
808 nat_stats.ns_table[1] = nat_table[1];
809 nat_stats.ns_list = nat_list;
810 nat_stats.ns_maptable = ipf_hm_maptable;
811 nat_stats.ns_maplist = ipf_hm_maplist;
812 nat_stats.ns_nattab_sz = ipf_nattable_sz;
813 nat_stats.ns_nattab_max = ipf_nattable_max;
814 nat_stats.ns_rultab_sz = ipf_natrules_sz;
815 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
816 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
817 nat_stats.ns_instances = nat_instances;
818 nat_stats.ns_apslist = ap_sess_list;
819 nat_stats.ns_ticks = fr_ticks;
820 error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
827 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
832 READ_ENTER(&ipf_nat);
834 ptr = nat_lookupredir(&nl);
836 RWLOCK_EXIT(&ipf_nat);
839 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
847 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */
848 if (!(mode & FWRITE)) {
853 WRITE_ENTER(&ipf_nat);
856 error = BCOPYIN(data, &arg, sizeof(arg));
861 ret = nat_flushtable();
863 ret = nat_clearlist();
865 ret = nat_extraflush(arg);
869 RWLOCK_EXIT(&ipf_nat);
872 error = BCOPYOUT(&ret, data, sizeof(ret));
877 error = appr_ioctl(data, cmd, mode, ctx);
881 if (!(mode & FWRITE)) {
884 error = fr_lock(data, &fr_nat_lock);
889 if ((mode & FWRITE) != 0) {
890 error = fr_natputent(data, getlock);
898 error = fr_natgetsz(data, getlock);
905 error = fr_natgetent(data, getlock);
916 error = fr_inobj(data, &iter, IPFOBJ_GENITER);
918 token = ipf_findtoken(iter.igi_type, uid, ctx);
920 error = nat_iterator(token, &iter);
922 RWLOCK_EXIT(&ipf_tokens);
929 error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
932 error = ipf_deltoken(arg, uid, ctx);
940 error = fr_outobj(data, nat_tqb, IPFOBJ_STATETQTAB);
944 error = nat_gettable(data);
958 /* ------------------------------------------------------------------------ */
959 /* Function: nat_siocaddnat */
960 /* Returns: int - 0 == success, != 0 == failure */
961 /* Parameters: n(I) - pointer to new NAT rule */
962 /* np(I) - pointer to where to insert new NAT rule */
963 /* getlock(I) - flag indicating if lock on ipf_nat is held */
964 /* Mutex Locks: ipf_natio */
966 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
967 /* from information passed to the kernel, then add it to the appropriate */
968 /* NAT rule table(s). */
969 /* ------------------------------------------------------------------------ */
970 static int nat_siocaddnat(n, np, getlock)
976 if (nat_resolverule(n) != 0)
979 if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
983 if (n->in_redir & NAT_MAPBLK)
984 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
985 else if (n->in_flags & IPN_AUTOPORTMAP)
986 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
987 else if (n->in_flags & IPN_IPRANGE)
988 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
989 else if (n->in_flags & IPN_SPLIT)
991 else if (n->in_outmsk != 0)
992 n->in_space = ~ntohl(n->in_outmsk);
997 * Calculate the number of valid IP addresses in the output
998 * mapping range. In all cases, the range is inclusive of
999 * the start and ending IP addresses.
1000 * If to a CIDR address, lose 2: broadcast + network address
1002 * If to a range, add one.
1003 * If to a single IP address, set to 1.
1006 if ((n->in_flags & IPN_IPRANGE) != 0)
1013 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
1014 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
1015 n->in_nip = ntohl(n->in_outip) + 1;
1016 else if ((n->in_flags & IPN_SPLIT) &&
1017 (n->in_redir & NAT_REDIRECT))
1018 n->in_nip = ntohl(n->in_inip);
1020 n->in_nip = ntohl(n->in_outip);
1021 if (n->in_redir & NAT_MAP) {
1022 n->in_pnext = ntohs(n->in_pmin);
1024 * Multiply by the number of ports made available.
1026 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1027 n->in_space *= (ntohs(n->in_pmax) -
1028 ntohs(n->in_pmin) + 1);
1030 * Because two different sources can map to
1031 * different destinations but use the same
1033 * If the result is smaller than in_space, then
1034 * we may have wrapped around 32bits.
1037 if ((i != 0) && (i != 0xffffffff)) {
1038 j = n->in_space * (~ntohl(i) + 1);
1039 if (j >= n->in_space)
1042 n->in_space = 0xffffffff;
1046 * If no protocol is specified, multiple by 256 to allow for
1047 * at least one IP:IP mapping per protocol.
1049 if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1050 j = n->in_space * 256;
1051 if (j >= n->in_space)
1054 n->in_space = 0xffffffff;
1058 /* Otherwise, these fields are preset */
1061 WRITE_ENTER(&ipf_nat);
1066 if (n->in_age[0] != 0)
1067 n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
1069 if (n->in_age[1] != 0)
1070 n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
1072 if (n->in_redir & NAT_REDIRECT) {
1073 n->in_flags &= ~IPN_NOTDST;
1076 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1077 n->in_flags &= ~IPN_NOTSRC;
1080 MUTEX_INIT(&n->in_lock, "ipnat rule lock");
1083 nat_stats.ns_rules++;
1084 #if SOLARIS && !defined(_INET_IP_STACK_H)
1085 pfil_delayed_copy = 0;
1088 RWLOCK_EXIT(&ipf_nat); /* WRITE */
1095 /* ------------------------------------------------------------------------ */
1096 /* Function: nat_resolvrule */
1098 /* Parameters: n(I) - pointer to NAT rule */
1100 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
1101 /* from information passed to the kernel, then add it to the appropriate */
1102 /* NAT rule table(s). */
1103 /* ------------------------------------------------------------------------ */
1104 static int nat_resolverule(n)
1107 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1108 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
1110 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1111 if (n->in_ifnames[1][0] == '\0') {
1112 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1113 n->in_ifps[1] = n->in_ifps[0];
1115 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
1118 if (n->in_plabel[0] != '\0') {
1119 n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1120 if (n->in_apr == NULL)
1127 /* ------------------------------------------------------------------------ */
1128 /* Function: nat_siocdelnat */
1129 /* Returns: int - 0 == success, != 0 == failure */
1130 /* Parameters: n(I) - pointer to new NAT rule */
1131 /* np(I) - pointer to where to insert new NAT rule */
1132 /* getlock(I) - flag indicating if lock on ipf_nat is held */
1133 /* Mutex Locks: ipf_natio */
1135 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
1136 /* from information passed to the kernel, then add it to the appropriate */
1137 /* NAT rule table(s). */
1138 /* ------------------------------------------------------------------------ */
1139 static void nat_siocdelnat(n, np, getlock)
1144 WRITE_ENTER(&ipf_nat);
1146 if (n->in_redir & NAT_REDIRECT)
1148 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1150 if (nat_list == NULL) {
1155 if (n->in_tqehead[0] != NULL) {
1156 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1157 fr_freetimeoutqueue(n->in_tqehead[1]);
1161 if (n->in_tqehead[1] != NULL) {
1162 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1163 fr_freetimeoutqueue(n->in_tqehead[1]);
1169 if (n->in_use == 0) {
1171 appr_free(n->in_apr);
1172 MUTEX_DESTROY(&n->in_lock);
1174 nat_stats.ns_rules--;
1175 #if SOLARIS && !defined(_INET_IP_STACK_H)
1176 if (nat_stats.ns_rules == 0)
1177 pfil_delayed_copy = 1;
1180 n->in_flags |= IPN_DELETE;
1184 RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */
1189 /* ------------------------------------------------------------------------ */
1190 /* Function: fr_natgetsz */
1191 /* Returns: int - 0 == success, != 0 is the error value. */
1192 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1193 /* get the size of. */
1195 /* Handle SIOCSTGSZ. */
1196 /* Return the size of the nat list entry to be copied back to user space. */
1197 /* The size of the entry is stored in the ng_sz field and the enture natget */
1198 /* structure is copied back to the user. */
1199 /* ------------------------------------------------------------------------ */
1200 static int fr_natgetsz(data, getlock)
1208 if (BCOPYIN(data, &ng, sizeof(ng)) != 0)
1212 READ_ENTER(&ipf_nat);
1217 nat = nat_instances;
1220 * Empty list so the size returned is 0. Simple.
1224 RWLOCK_EXIT(&ipf_nat);
1226 if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1232 * Make sure the pointer we're copying from exists in the
1233 * current list of entries. Security precaution to prevent
1234 * copying of random kernel data.
1236 for (n = nat_instances; n; n = n->nat_next)
1241 RWLOCK_EXIT(&ipf_nat);
1248 * Incluse any space required for proxy data structures.
1250 ng.ng_sz = sizeof(nat_save_t);
1253 ng.ng_sz += sizeof(ap_session_t) - 4;
1254 if (aps->aps_data != 0)
1255 ng.ng_sz += aps->aps_psiz;
1258 RWLOCK_EXIT(&ipf_nat);
1261 if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1267 /* ------------------------------------------------------------------------ */
1268 /* Function: fr_natgetent */
1269 /* Returns: int - 0 == success, != 0 is the error value. */
1270 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1271 /* to NAT structure to copy out. */
1273 /* Handle SIOCSTGET. */
1274 /* Copies out NAT entry to user space. Any additional data held for a */
1275 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1276 /* ------------------------------------------------------------------------ */
1277 static int fr_natgetent(data, getlock)
1283 nat_save_t *ipn, ipns;
1286 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1290 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1293 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1298 READ_ENTER(&ipf_nat);
1301 ipn->ipn_dsize = ipns.ipn_dsize;
1302 nat = ipns.ipn_next;
1304 nat = nat_instances;
1306 if (nat_instances == NULL)
1312 * Make sure the pointer we're copying from exists in the
1313 * current list of entries. Security precaution to prevent
1314 * copying of random kernel data.
1316 for (n = nat_instances; n; n = n->nat_next)
1324 ipn->ipn_next = nat->nat_next;
1327 * Copy the NAT structure.
1329 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1332 * If we have a pointer to the NAT rule it belongs to, save that too.
1334 if (nat->nat_ptr != NULL)
1335 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1336 sizeof(ipn->ipn_ipnat));
1339 * If we also know the NAT entry has an associated filter rule,
1342 if (nat->nat_fr != NULL)
1343 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1344 sizeof(ipn->ipn_fr));
1347 * Last but not least, if there is an application proxy session set
1348 * up for this NAT entry, then copy that out too, including any
1349 * private data saved along side it by the proxy.
1352 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1356 if (outsize < sizeof(*aps)) {
1362 bcopy((char *)aps, s, sizeof(*aps));
1364 outsize -= sizeof(*aps);
1365 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1366 bcopy(aps->aps_data, s, aps->aps_psiz);
1372 RWLOCK_EXIT(&ipf_nat);
1375 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1380 RWLOCK_EXIT(&ipf_nat);
1383 KFREES(ipn, ipns.ipn_dsize);
1389 /* ------------------------------------------------------------------------ */
1390 /* Function: fr_natputent */
1391 /* Returns: int - 0 == success, != 0 is the error value. */
1392 /* Parameters: data(I) - pointer to natget structure with NAT */
1393 /* structure information to load into the kernel */
1394 /* getlock(I) - flag indicating whether or not a write lock */
1395 /* on ipf_nat is already held. */
1397 /* Handle SIOCSTPUT. */
1398 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1399 /* firewall rule data structures, if pointers to them indicate so. */
1400 /* ------------------------------------------------------------------------ */
1401 static int fr_natputent(data, getlock)
1405 nat_save_t ipn, *ipnn;
1413 error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1418 * Initialise early because of code at junkput label.
1427 * New entry, copy in the rest of the NAT entry if it's size is more
1428 * than just the nat_t structure.
1430 if (ipn.ipn_dsize > sizeof(ipn)) {
1431 if (ipn.ipn_dsize > 81920) {
1436 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1440 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1448 KMALLOC(nat, nat_t *);
1454 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1456 * Initialize all these so that nat_delete() doesn't cause a crash.
1458 bzero((char *)nat, offsetof(struct nat, nat_tqe));
1459 nat->nat_tqe.tqe_pnext = NULL;
1460 nat->nat_tqe.tqe_next = NULL;
1461 nat->nat_tqe.tqe_ifq = NULL;
1462 nat->nat_tqe.tqe_parent = nat;
1465 * Restore the rule associated with this nat session
1467 in = ipnn->ipn_nat.nat_ptr;
1469 KMALLOC(in, ipnat_t *);
1475 bzero((char *)in, offsetof(struct ipnat, in_next6));
1476 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1478 in->in_flags |= IPN_DELETE;
1480 ATOMIC_INC(nat_stats.ns_rules);
1482 if (nat_resolverule(in) != 0) {
1489 * Check that the NAT entry doesn't already exist in the kernel.
1491 * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry. To do
1492 * this, we check to see if the inbound combination of addresses and
1493 * ports is already known. Similar logic is applied for NAT_INBOUND.
1496 bzero((char *)&fin, sizeof(fin));
1497 fin.fin_p = nat->nat_p;
1498 if (nat->nat_dir == NAT_OUTBOUND) {
1499 fin.fin_ifp = nat->nat_ifps[0];
1500 fin.fin_data[0] = ntohs(nat->nat_oport);
1501 fin.fin_data[1] = ntohs(nat->nat_outport);
1503 READ_ENTER(&ipf_nat);
1505 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1506 nat->nat_oip, nat->nat_inip);
1508 RWLOCK_EXIT(&ipf_nat);
1514 } else if (nat->nat_dir == NAT_INBOUND) {
1515 fin.fin_ifp = nat->nat_ifps[0];
1516 fin.fin_data[0] = ntohs(nat->nat_outport);
1517 fin.fin_data[1] = ntohs(nat->nat_oport);
1519 READ_ENTER(&ipf_nat);
1521 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1522 nat->nat_outip, nat->nat_oip);
1524 RWLOCK_EXIT(&ipf_nat);
1536 * Restore ap_session_t structure. Include the private data allocated
1541 KMALLOC(aps, ap_session_t *);
1547 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1549 aps->aps_apr = in->in_apr;
1551 aps->aps_apr = NULL;
1552 if (aps->aps_psiz != 0) {
1553 if (aps->aps_psiz > 81920) {
1557 KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1558 if (aps->aps_data == NULL) {
1562 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1566 aps->aps_data = NULL;
1571 * If there was a filtering rule associated with this entry then
1572 * build up a new one.
1576 if ((nat->nat_flags & SI_NEWFR) != 0) {
1577 KMALLOC(fr, frentry_t *);
1583 ipnn->ipn_nat.nat_fr = fr;
1585 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1586 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1591 fr->fr_type = FR_T_NONE;
1593 MUTEX_NUKE(&fr->fr_lock);
1594 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1597 READ_ENTER(&ipf_nat);
1599 for (n = nat_instances; n; n = n->nat_next)
1600 if (n->nat_fr == fr)
1604 MUTEX_ENTER(&fr->fr_lock);
1606 MUTEX_EXIT(&fr->fr_lock);
1609 RWLOCK_EXIT(&ipf_nat);
1620 KFREES(ipnn, ipn.ipn_dsize);
1625 WRITE_ENTER(&ipf_nat);
1627 error = nat_insert(nat, nat->nat_rev);
1628 if ((error == 0) && (aps != NULL)) {
1629 aps->aps_next = ap_sess_list;
1633 RWLOCK_EXIT(&ipf_nat);
1643 (void) fr_derefrule(&fr);
1645 if ((ipnn != NULL) && (ipnn != &ipn)) {
1646 KFREES(ipnn, ipn.ipn_dsize);
1650 if (aps->aps_data != NULL) {
1651 KFREES(aps->aps_data, aps->aps_psiz);
1657 appr_free(in->in_apr);
1666 /* ------------------------------------------------------------------------ */
1667 /* Function: nat_delete */
1669 /* Parameters: natd(I) - pointer to NAT structure to delete */
1670 /* logtype(I) - type of LOG record to create before deleting */
1671 /* Write Lock: ipf_nat */
1673 /* Delete a nat entry from the various lists and table. If NAT logging is */
1674 /* enabled then generate a NAT log record for this event. */
1675 /* ------------------------------------------------------------------------ */
1676 void nat_delete(nat, logtype)
1683 if (logtype != 0 && nat_logging != 0)
1684 nat_log(nat, logtype);
1685 #if defined(NEED_LOCAL_RAND) && defined(_KERNEL)
1686 ipf_rand_push(nat, sizeof(*nat));
1690 * Take it as a general indication that all the pointers are set if
1693 if (nat->nat_pnext != NULL) {
1696 nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1697 nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1699 *nat->nat_pnext = nat->nat_next;
1700 if (nat->nat_next != NULL) {
1701 nat->nat_next->nat_pnext = nat->nat_pnext;
1702 nat->nat_next = NULL;
1704 nat->nat_pnext = NULL;
1706 *nat->nat_phnext[0] = nat->nat_hnext[0];
1707 if (nat->nat_hnext[0] != NULL) {
1708 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1709 nat->nat_hnext[0] = NULL;
1711 nat->nat_phnext[0] = NULL;
1713 *nat->nat_phnext[1] = nat->nat_hnext[1];
1714 if (nat->nat_hnext[1] != NULL) {
1715 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1716 nat->nat_hnext[1] = NULL;
1718 nat->nat_phnext[1] = NULL;
1720 if ((nat->nat_flags & SI_WILDP) != 0)
1721 nat_stats.ns_wilds--;
1724 if (nat->nat_me != NULL) {
1725 *nat->nat_me = NULL;
1729 if (nat->nat_tqe.tqe_ifq != NULL)
1730 fr_deletequeueentry(&nat->nat_tqe);
1732 if (logtype == NL_EXPIRE)
1733 nat_stats.ns_expire++;
1735 MUTEX_ENTER(&nat->nat_lock);
1737 * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1738 * This happens when a nat'd packet is blocked and we want to throw
1739 * away the NAT session.
1741 if (logtype == NL_DESTROY) {
1742 if (nat->nat_ref > 2) {
1744 MUTEX_EXIT(&nat->nat_lock);
1746 nat_stats.ns_orphans++;
1749 } else if (nat->nat_ref > 1) {
1751 MUTEX_EXIT(&nat->nat_lock);
1753 nat_stats.ns_orphans++;
1756 MUTEX_EXIT(&nat->nat_lock);
1759 * At this point, nat_ref is 1, doing "--" would make it 0..
1763 nat_stats.ns_orphans--;
1765 #ifdef IPFILTER_SYNC
1767 ipfsync_del(nat->nat_sync);
1770 if (nat->nat_fr != NULL)
1771 (void) fr_derefrule(&nat->nat_fr);
1773 if (nat->nat_hm != NULL)
1774 fr_hostmapdel(&nat->nat_hm);
1777 * If there is an active reference from the nat entry to its parent
1778 * rule, decrement the rule's reference count and free it too if no
1779 * longer being used.
1783 fr_ipnatderef(&ipn);
1786 MUTEX_DESTROY(&nat->nat_lock);
1788 aps_free(nat->nat_aps);
1789 nat_stats.ns_inuse--;
1792 * If there's a fragment table entry too for this nat entry, then
1793 * dereference that as well. This is after nat_lock is released
1796 fr_forgetnat((void *)nat);
1802 /* ------------------------------------------------------------------------ */
1803 /* Function: nat_flushtable */
1804 /* Returns: int - number of NAT rules deleted */
1805 /* Parameters: Nil */
1807 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */
1808 /* log record should be emitted in nat_delete() if NAT logging is enabled. */
1809 /* ------------------------------------------------------------------------ */
1811 * nat_flushtable - clear the NAT table of all mapping entries.
1813 static int nat_flushtable()
1819 * ALL NAT mappings deleted, so lets just make the deletions
1822 if (nat_table[0] != NULL)
1823 bzero((char *)nat_table[0],
1824 sizeof(nat_table[0]) * ipf_nattable_sz);
1825 if (nat_table[1] != NULL)
1826 bzero((char *)nat_table[1],
1827 sizeof(nat_table[1]) * ipf_nattable_sz);
1829 while ((nat = nat_instances) != NULL) {
1830 nat_delete(nat, NL_FLUSH);
1834 nat_stats.ns_inuse = 0;
1839 /* ------------------------------------------------------------------------ */
1840 /* Function: nat_clearlist */
1841 /* Returns: int - number of NAT/RDR rules deleted */
1842 /* Parameters: Nil */
1844 /* Delete all rules in the current list of rules. There is nothing elegant */
1845 /* about this cleanup: simply free all entries on the list of rules and */
1846 /* clear out the tables used for hashed NAT rule lookups. */
1847 /* ------------------------------------------------------------------------ */
1848 static int nat_clearlist()
1850 ipnat_t *n, **np = &nat_list;
1853 if (nat_rules != NULL)
1854 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1855 if (rdr_rules != NULL)
1856 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1858 while ((n = *np) != NULL) {
1860 if (n->in_use == 0) {
1861 if (n->in_apr != NULL)
1862 appr_free(n->in_apr);
1863 MUTEX_DESTROY(&n->in_lock);
1865 nat_stats.ns_rules--;
1867 n->in_flags |= IPN_DELETE;
1872 #if SOLARIS && !defined(_INET_IP_STACK_H)
1873 pfil_delayed_copy = 1;
1881 /* ------------------------------------------------------------------------ */
1882 /* Function: nat_newmap */
1883 /* Returns: int - -1 == error, 0 == success */
1884 /* Parameters: fin(I) - pointer to packet information */
1885 /* nat(I) - pointer to NAT entry */
1886 /* ni(I) - pointer to structure with misc. information needed */
1887 /* to create new NAT entry. */
1889 /* Given an empty NAT structure, populate it with new information about a */
1890 /* new NAT session, as defined by the matching NAT rule. */
1891 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1892 /* to the new IP address for the translation. */
1893 /* ------------------------------------------------------------------------ */
1894 static INLINE int nat_newmap(fin, nat, ni)
1899 u_short st_port, dport, sport, port, sp, dp;
1900 struct in_addr in, inb;
1909 * If it's an outbound packet which doesn't match any existing
1910 * record, then create a new port
1916 st_port = np->in_pnext;
1917 flags = ni->nai_flags;
1918 sport = ni->nai_sport;
1919 dport = ni->nai_dport;
1922 * Do a loop until we either run out of entries to try or we find
1923 * a NAT mapping that isn't currently being used. This is done
1924 * because the change to the source is not (usually) being fixed.
1928 in.s_addr = htonl(np->in_nip);
1931 * Check to see if there is an existing NAT
1932 * setup for this IP address pair.
1934 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1937 in.s_addr = hm->hm_mapip.s_addr;
1938 } else if ((l == 1) && (hm != NULL)) {
1941 in.s_addr = ntohl(in.s_addr);
1945 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1950 if (np->in_redir == NAT_BIMAP &&
1951 np->in_inmsk == np->in_outmsk) {
1953 * map the address block in a 1:1 fashion
1955 in.s_addr = np->in_outip;
1956 in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1957 in.s_addr = ntohl(in.s_addr);
1959 } else if (np->in_redir & NAT_MAPBLK) {
1960 if ((l >= np->in_ppip) || ((l > 0) &&
1961 !(flags & IPN_TCPUDP)))
1964 * map-block - Calculate destination address.
1966 in.s_addr = ntohl(fin->fin_saddr);
1967 in.s_addr &= ntohl(~np->in_inmsk);
1968 inb.s_addr = in.s_addr;
1969 in.s_addr /= np->in_ippip;
1970 in.s_addr &= ntohl(~np->in_outmsk);
1971 in.s_addr += ntohl(np->in_outip);
1973 * Calculate destination port.
1975 if ((flags & IPN_TCPUDP) &&
1976 (np->in_ppip != 0)) {
1977 port = ntohs(sport) + l;
1978 port %= np->in_ppip;
1979 port += np->in_ppip *
1980 (inb.s_addr % np->in_ippip);
1981 port += MAPBLK_MINPORT;
1985 } else if ((np->in_outip == 0) &&
1986 (np->in_outmsk == 0xffffffff)) {
1988 * 0/32 - use the interface's IP address.
1991 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1994 in.s_addr = ntohl(in.s_addr);
1996 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1998 * 0/0 - use the original source address/port.
2002 in.s_addr = ntohl(fin->fin_saddr);
2004 } else if ((np->in_outmsk != 0xffffffff) &&
2005 (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2010 if ((flags & IPN_TCPUDP) &&
2011 ((np->in_redir & NAT_MAPBLK) == 0) &&
2012 (np->in_flags & IPN_AUTOPORTMAP)) {
2014 * "ports auto" (without map-block)
2016 if ((l > 0) && (l % np->in_ppip == 0)) {
2017 if (l > np->in_space) {
2019 } else if ((l > np->in_ppip) &&
2020 np->in_outmsk != 0xffffffff)
2023 if (np->in_ppip != 0) {
2024 port = ntohs(sport);
2025 port += (l % np->in_ppip);
2026 port %= np->in_ppip;
2027 port += np->in_ppip *
2028 (ntohl(fin->fin_saddr) %
2030 port += MAPBLK_MINPORT;
2034 } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2035 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2037 * Standard port translation. Select next port.
2039 if (np->in_flags & IPN_SEQUENTIAL) {
2040 port = np->in_pnext;
2042 port = ipf_random() % (ntohs(np->in_pmax) -
2043 ntohs(np->in_pmin));
2044 port += ntohs(np->in_pmin);
2049 if (np->in_pnext > ntohs(np->in_pmax)) {
2050 np->in_pnext = ntohs(np->in_pmin);
2051 if (np->in_outmsk != 0xffffffff)
2056 if (np->in_flags & IPN_IPRANGE) {
2057 if (np->in_nip > ntohl(np->in_outmsk))
2058 np->in_nip = ntohl(np->in_outip);
2060 if ((np->in_outmsk != 0xffffffff) &&
2061 ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2062 ntohl(np->in_outip))
2063 np->in_nip = ntohl(np->in_outip) + 1;
2066 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2070 * Here we do a lookup of the connection as seen from
2071 * the outside. If an IP# pair already exists, try
2072 * again. So if you have A->B becomes C->B, you can
2073 * also have D->E become C->E but not D->B causing
2074 * another C->B. Also take protocol and ports into
2075 * account when determining whether a pre-existing
2076 * NAT setup will cause an external conflict where
2077 * this is appropriate.
2079 inb.s_addr = htonl(in.s_addr);
2080 sp = fin->fin_data[0];
2081 dp = fin->fin_data[1];
2082 fin->fin_data[0] = fin->fin_data[1];
2083 fin->fin_data[1] = htons(port);
2084 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2085 (u_int)fin->fin_p, fin->fin_dst, inb);
2086 fin->fin_data[0] = sp;
2087 fin->fin_data[1] = dp;
2090 * Has the search wrapped around and come back to the
2093 if ((natl != NULL) &&
2094 (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2095 (np->in_nip != 0) && (st_ip == np->in_nip))
2098 } while (natl != NULL);
2100 if (np->in_space > 0)
2103 /* Setup the NAT table */
2104 nat->nat_inip = fin->fin_src;
2105 nat->nat_outip.s_addr = htonl(in.s_addr);
2106 nat->nat_oip = fin->fin_dst;
2107 if (nat->nat_hm == NULL)
2108 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2112 * The ICMP checksum does not have a pseudo header containing
2115 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2116 ni->nai_sum2 = LONG_SUM(in.s_addr);
2117 if ((flags & IPN_TCPUDP)) {
2118 ni->nai_sum1 += ntohs(sport);
2119 ni->nai_sum2 += ntohs(port);
2122 if (flags & IPN_TCPUDP) {
2123 nat->nat_inport = sport;
2124 nat->nat_outport = port; /* sport */
2125 nat->nat_oport = dport;
2126 ((tcphdr_t *)fin->fin_dp)->th_sport = port;
2127 } else if (flags & IPN_ICMPQUERY) {
2128 ((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2129 nat->nat_inport = port;
2130 nat->nat_outport = port;
2131 } else if (fin->fin_p == IPPROTO_GRE) {
2133 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2134 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2135 nat->nat_oport = 0;/*fin->fin_data[1];*/
2136 nat->nat_inport = 0;/*fin->fin_data[0];*/
2137 nat->nat_outport = 0;/*fin->fin_data[0];*/
2138 nat->nat_call[0] = fin->fin_data[0];
2139 nat->nat_call[1] = fin->fin_data[0];
2143 ni->nai_ip.s_addr = in.s_addr;
2144 ni->nai_port = port;
2145 ni->nai_nport = dport;
2150 /* ------------------------------------------------------------------------ */
2151 /* Function: nat_newrdr */
2152 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */
2153 /* allow rule to be moved if IPN_ROUNDR is set. */
2154 /* Parameters: fin(I) - pointer to packet information */
2155 /* nat(I) - pointer to NAT entry */
2156 /* ni(I) - pointer to structure with misc. information needed */
2157 /* to create new NAT entry. */
2159 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2160 /* to the new IP address for the translation. */
2161 /* ------------------------------------------------------------------------ */
2162 static INLINE int nat_newrdr(fin, nat, ni)
2167 u_short nport, dport, sport;
2168 struct in_addr in, inb;
2180 flags = ni->nai_flags;
2181 sport = ni->nai_sport;
2182 dport = ni->nai_dport;
2185 * If the matching rule has IPN_STICKY set, then we want to have the
2186 * same rule kick in as before. Why would this happen? If you have
2187 * a collection of rdr rules with "round-robin sticky", the current
2188 * packet might match a different one to the previous connection but
2189 * we want the same destination to be used.
2191 if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
2192 ((np->in_flags & IPN_STICKY) != 0)) {
2193 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2196 in.s_addr = ntohl(hm->hm_mapip.s_addr);
2204 * Otherwise, it's an inbound packet. Most likely, we don't
2205 * want to rewrite source ports and source addresses. Instead,
2206 * we want to rewrite to a fixed internal address and fixed
2209 if (np->in_flags & IPN_SPLIT) {
2210 in.s_addr = np->in_nip;
2212 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2213 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst,
2216 in.s_addr = hm->hm_mapip.s_addr;
2221 if (hm == NULL || hm->hm_ref == 1) {
2222 if (np->in_inip == htonl(in.s_addr)) {
2223 np->in_nip = ntohl(np->in_inmsk);
2226 np->in_nip = ntohl(np->in_inip);
2230 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2232 * 0/32 - use the interface's IP address.
2234 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2236 in.s_addr = ntohl(in.s_addr);
2238 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2240 * 0/0 - use the original destination address/port.
2242 in.s_addr = ntohl(fin->fin_daddr);
2244 } else if (np->in_redir == NAT_BIMAP &&
2245 np->in_inmsk == np->in_outmsk) {
2247 * map the address block in a 1:1 fashion
2249 in.s_addr = np->in_inip;
2250 in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2251 in.s_addr = ntohl(in.s_addr);
2253 in.s_addr = ntohl(np->in_inip);
2256 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2260 * Whilst not optimized for the case where
2261 * pmin == pmax, the gain is not significant.
2263 if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2264 (np->in_pmin != np->in_pmax)) {
2265 nport = ntohs(dport) - ntohs(np->in_pmin) +
2266 ntohs(np->in_pnext);
2267 nport = htons(nport);
2269 nport = np->in_pnext;
2273 * When the redirect-to address is set to 0.0.0.0, just
2274 * assume a blank `forwarding' of the packet. We don't
2275 * setup any translation for this either.
2277 if (in.s_addr == 0) {
2280 in.s_addr = ntohl(fin->fin_daddr);
2284 * Check to see if this redirect mapping already exists and if
2285 * it does, return "failure" (allowing it to be created will just
2286 * cause one or both of these "connections" to stop working.)
2288 inb.s_addr = htonl(in.s_addr);
2289 sp = fin->fin_data[0];
2290 dp = fin->fin_data[1];
2291 fin->fin_data[1] = fin->fin_data[0];
2292 fin->fin_data[0] = ntohs(nport);
2293 natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2294 (u_int)fin->fin_p, inb, fin->fin_src);
2295 fin->fin_data[0] = sp;
2296 fin->fin_data[1] = dp;
2300 nat->nat_inip.s_addr = htonl(in.s_addr);
2301 nat->nat_outip = fin->fin_dst;
2302 nat->nat_oip = fin->fin_src;
2303 if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
2304 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, in,
2307 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2308 ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2310 ni->nai_ip.s_addr = in.s_addr;
2311 ni->nai_nport = nport;
2312 ni->nai_port = sport;
2314 if (flags & IPN_TCPUDP) {
2315 nat->nat_inport = nport;
2316 nat->nat_outport = dport;
2317 nat->nat_oport = sport;
2318 ((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2319 } else if (flags & IPN_ICMPQUERY) {
2320 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2321 nat->nat_inport = nport;
2322 nat->nat_outport = nport;
2323 } else if (fin->fin_p == IPPROTO_GRE) {
2325 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2326 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2327 nat->nat_call[0] = fin->fin_data[0];
2328 nat->nat_call[1] = fin->fin_data[1];
2329 nat->nat_oport = 0; /*fin->fin_data[0];*/
2330 nat->nat_inport = 0; /*fin->fin_data[1];*/
2331 nat->nat_outport = 0; /*fin->fin_data[1];*/
2339 /* ------------------------------------------------------------------------ */
2340 /* Function: nat_new */
2341 /* Returns: nat_t* - NULL == failure to create new NAT structure, */
2342 /* else pointer to new NAT structure */
2343 /* Parameters: fin(I) - pointer to packet information */
2344 /* np(I) - pointer to NAT rule */
2345 /* natsave(I) - pointer to where to store NAT struct pointer */
2346 /* flags(I) - flags describing the current packet */
2347 /* direction(I) - direction of packet (in/out) */
2348 /* Write Lock: ipf_nat */
2350 /* Attempts to create a new NAT entry. Does not actually change the packet */
2353 /* This fucntion is in three main parts: (1) deal with creating a new NAT */
2354 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */
2355 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2356 /* and (3) building that structure and putting it into the NAT table(s). */
2358 /* NOTE: natsave should NOT be used top point back to an ipstate_t struct */
2359 /* as it can result in memory being corrupted. */
2360 /* ------------------------------------------------------------------------ */
2361 nat_t *nat_new(fin, np, natsave, flags, direction)
2368 u_short port = 0, sport = 0, dport = 0, nport = 0;
2369 tcphdr_t *tcp = NULL;
2370 hostmap_t *hm = NULL;
2377 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2378 qpktinfo_t *qpi = fin->fin_qpi;
2381 if (nat_stats.ns_inuse >= ipf_nattable_max) {
2382 nat_stats.ns_memfail++;
2388 nflags = np->in_flags & flags;
2389 nflags &= NAT_FROMRULE;
2392 ni.nai_nflags = nflags;
2393 ni.nai_flags = flags;
2397 /* Give me a new nat */
2398 KMALLOC(nat, nat_t *);
2400 nat_stats.ns_memfail++;
2402 * Try to automatically tune the max # of entries in the
2403 * table allowed to be less than what will cause kmem_alloc()
2404 * to fail and try to eliminate panics due to out of memory
2405 * conditions arising.
2407 if (ipf_nattable_max > ipf_nattable_sz) {
2408 ipf_nattable_max = nat_stats.ns_inuse - 100;
2409 printf("ipf_nattable_max reduced to %d\n",
2415 if (flags & IPN_TCPUDP) {
2417 ni.nai_sport = htons(fin->fin_sport);
2418 ni.nai_dport = htons(fin->fin_dport);
2419 } else if (flags & IPN_ICMPQUERY) {
2421 * In the ICMP query NAT code, we translate the ICMP id fields
2422 * to make them unique. This is indepedent of the ICMP type
2423 * (e.g. in the unlikely event that a host sends an echo and
2424 * an tstamp request with the same id, both packets will have
2425 * their ip address/id field changed in the same way).
2427 /* The icmp_id field is used by the sender to identify the
2428 * process making the icmp request. (the receiver justs
2429 * copies it back in its response). So, it closely matches
2430 * the concept of source port. We overlay sport, so we can
2431 * maximally reuse the existing code.
2433 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2434 ni.nai_dport = ni.nai_sport;
2437 bzero((char *)nat, sizeof(*nat));
2438 nat->nat_flags = flags;
2439 nat->nat_redir = np->in_redir;
2441 if ((flags & NAT_SLAVE) == 0) {
2442 MUTEX_ENTER(&ipf_nat_new);
2446 * Search the current table for a match.
2448 if (direction == NAT_OUTBOUND) {
2450 * We can now arrange to call this for the same connection
2451 * because ipf_nat_new doesn't protect the code path into
2454 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2455 fin->fin_src, fin->fin_dst);
2462 move = nat_newmap(fin, nat, &ni);
2470 * NAT_INBOUND is used only for redirects rules
2472 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2473 fin->fin_src, fin->fin_dst);
2480 move = nat_newrdr(fin, nat, &ni);
2488 nport = ni.nai_nport;
2490 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2491 if (np->in_redir == NAT_REDIRECT) {
2494 } else if (np->in_redir == NAT_MAP) {
2500 if (flags & IPN_TCPUDP) {
2501 sport = ni.nai_sport;
2502 dport = ni.nai_dport;
2503 } else if (flags & IPN_ICMPQUERY) {
2504 sport = ni.nai_sport;
2508 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2509 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2510 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2511 if ((flags & IPN_TCP) && dohwcksum &&
2512 (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2513 if (direction == NAT_OUTBOUND)
2514 ni.nai_sum1 = LONG_SUM(in.s_addr);
2516 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2517 ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2519 ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2520 nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2523 nat->nat_sumd[1] = nat->nat_sumd[0];
2525 if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2526 if (direction == NAT_OUTBOUND)
2527 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2529 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2531 ni.nai_sum2 = LONG_SUM(in.s_addr);
2533 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2534 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2536 nat->nat_ipsumd = nat->nat_sumd[0];
2537 if (!(flags & IPN_TCPUDPICMP)) {
2538 nat->nat_sumd[0] = 0;
2539 nat->nat_sumd[1] = 0;
2543 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2547 if (flags & SI_WILDP)
2548 nat_stats.ns_wilds++;
2549 fin->fin_flx |= FI_NEWNAT;
2552 nat_stats.ns_badnat++;
2553 if ((hm = nat->nat_hm) != NULL)
2558 if ((flags & NAT_SLAVE) == 0) {
2559 MUTEX_EXIT(&ipf_nat_new);
2565 /* ------------------------------------------------------------------------ */
2566 /* Function: nat_finalise */
2567 /* Returns: int - 0 == sucess, -1 == failure */
2568 /* Parameters: fin(I) - pointer to packet information */
2569 /* nat(I) - pointer to NAT entry */
2570 /* ni(I) - pointer to structure with misc. information needed */
2571 /* to create new NAT entry. */
2572 /* Write Lock: ipf_nat */
2574 /* This is the tail end of constructing a new NAT entry and is the same */
2575 /* for both IPv4 and IPv6. */
2576 /* ------------------------------------------------------------------------ */
2578 static int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2591 if (np->in_ifps[0] != NULL) {
2592 COPYIFNAME(4, np->in_ifps[0], nat->nat_ifnames[0]);
2594 if (np->in_ifps[1] != NULL) {
2595 COPYIFNAME(4, np->in_ifps[1], nat->nat_ifnames[1]);
2597 #ifdef IPFILTER_SYNC
2598 if ((nat->nat_flags & SI_CLONE) == 0)
2599 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2602 nat->nat_me = natsave;
2603 nat->nat_dir = direction;
2604 nat->nat_ifps[0] = np->in_ifps[0];
2605 nat->nat_ifps[1] = np->in_ifps[1];
2607 nat->nat_p = fin->fin_p;
2608 nat->nat_mssclamp = np->in_mssclamp;
2609 if (nat->nat_p == IPPROTO_TCP)
2610 nat->nat_seqnext[0] = ntohl(tcp->th_seq);
2612 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2613 if (appr_new(fin, nat) == -1)
2616 if (nat_insert(nat, fin->fin_rev) == 0) {
2618 nat_log(nat, (u_int)np->in_redir);
2623 MUTEX_ENTER(&fr->fr_lock);
2625 MUTEX_EXIT(&fr->fr_lock);
2631 * nat_insert failed, so cleanup time...
2637 /* ------------------------------------------------------------------------ */
2638 /* Function: nat_insert */
2639 /* Returns: int - 0 == sucess, -1 == failure */
2640 /* Parameters: nat(I) - pointer to NAT structure */
2641 /* rev(I) - flag indicating forward/reverse direction of packet */
2642 /* Write Lock: ipf_nat */
2644 /* Insert a NAT entry into the hash tables for searching and add it to the */
2645 /* list of active NAT entries. Adjust global counters when complete. */
2646 /* ------------------------------------------------------------------------ */
2647 int nat_insert(nat, rev)
2655 * Try and return an error as early as possible, so calculate the hash
2656 * entry numbers first and then proceed.
2658 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2659 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2661 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2663 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2665 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2668 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2669 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2670 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2671 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2674 if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2675 nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2679 nat->nat_hv[0] = hv1;
2680 nat->nat_hv[1] = hv2;
2682 MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2686 nat->nat_bytes[0] = 0;
2687 nat->nat_pkts[0] = 0;
2688 nat->nat_bytes[1] = 0;
2689 nat->nat_pkts[1] = 0;
2691 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2692 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2694 if (nat->nat_ifnames[1][0] != '\0') {
2695 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2696 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2698 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2700 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2701 nat->nat_ifps[1] = nat->nat_ifps[0];
2704 nat->nat_next = nat_instances;
2705 nat->nat_pnext = &nat_instances;
2707 nat_instances->nat_pnext = &nat->nat_next;
2708 nat_instances = nat;
2710 natp = &nat_table[0][hv1];
2712 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2713 nat->nat_phnext[0] = natp;
2714 nat->nat_hnext[0] = *natp;
2716 nat_stats.ns_bucketlen[0][hv1]++;
2718 natp = &nat_table[1][hv2];
2720 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2721 nat->nat_phnext[1] = natp;
2722 nat->nat_hnext[1] = *natp;
2724 nat_stats.ns_bucketlen[1][hv2]++;
2726 fr_setnatqueue(nat, rev);
2728 nat_stats.ns_added++;
2729 nat_stats.ns_inuse++;
2734 /* ------------------------------------------------------------------------ */
2735 /* Function: nat_icmperrorlookup */
2736 /* Returns: nat_t* - point to matching NAT structure */
2737 /* Parameters: fin(I) - pointer to packet information */
2738 /* dir(I) - direction of packet (in/out) */
2740 /* Check if the ICMP error message is related to an existing TCP, UDP or */
2741 /* ICMP query nat entry. It is assumed that the packet is already of the */
2742 /* the required length. */
2743 /* ------------------------------------------------------------------------ */
2744 nat_t *nat_icmperrorlookup(fin, dir)
2748 int flags = 0, type, minlen;
2749 icmphdr_t *icmp, *orgicmp;
2750 tcphdr_t *tcp = NULL;
2757 type = icmp->icmp_type;
2759 * Does it at least have the return (basic) IP header ?
2760 * Only a basic IP header (no options) should be with an ICMP error
2761 * header. Also, if it's not an error type, then return.
2763 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2769 oip = (ip_t *)((char *)fin->fin_dp + 8);
2770 minlen = IP_HL(oip) << 2;
2771 if ((minlen < sizeof(ip_t)) ||
2772 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2775 * Is the buffer big enough for all of it ? It's the size of the IP
2776 * header claimed in the encapsulated part which is of concern. It
2777 * may be too big to be in this buffer but not so big that it's
2778 * outside the ICMP packet, leading to TCP deref's causing problems.
2779 * This is possible because we don't know how big oip_hl is when we
2780 * do the pullup early in fr_check() and thus can't gaurantee it is
2788 # if defined(MENTAT)
2789 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2792 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2793 (char *)fin->fin_ip + M_LEN(m))
2799 if (fin->fin_daddr != oip->ip_src.s_addr)
2803 if (p == IPPROTO_TCP)
2805 else if (p == IPPROTO_UDP)
2807 else if (p == IPPROTO_ICMP) {
2808 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2810 /* see if this is related to an ICMP query */
2811 if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2812 data[0] = fin->fin_data[0];
2813 data[1] = fin->fin_data[1];
2814 fin->fin_data[0] = 0;
2815 fin->fin_data[1] = orgicmp->icmp_id;
2817 flags = IPN_ICMPERR|IPN_ICMPQUERY;
2819 * NOTE : dir refers to the direction of the original
2820 * ip packet. By definition the icmp error
2821 * message flows in the opposite direction.
2823 if (dir == NAT_INBOUND)
2824 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2827 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2829 fin->fin_data[0] = data[0];
2830 fin->fin_data[1] = data[1];
2835 if (flags & IPN_TCPUDP) {
2836 minlen += 8; /* + 64bits of data to get ports */
2837 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2840 data[0] = fin->fin_data[0];
2841 data[1] = fin->fin_data[1];
2842 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2843 fin->fin_data[0] = ntohs(tcp->th_dport);
2844 fin->fin_data[1] = ntohs(tcp->th_sport);
2846 if (dir == NAT_INBOUND) {
2847 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2850 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2853 fin->fin_data[0] = data[0];
2854 fin->fin_data[1] = data[1];
2857 if (dir == NAT_INBOUND)
2858 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2860 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2864 /* ------------------------------------------------------------------------ */
2865 /* Function: nat_icmperror */
2866 /* Returns: nat_t* - point to matching NAT structure */
2867 /* Parameters: fin(I) - pointer to packet information */
2868 /* nflags(I) - NAT flags for this packet */
2869 /* dir(I) - direction of packet (in/out) */
2871 /* Fix up an ICMP packet which is an error message for an existing NAT */
2872 /* session. This will correct both packet header data and checksums. */
2874 /* This should *ONLY* be used for incoming ICMP error packets to make sure */
2875 /* a NAT'd ICMP packet gets correctly recognised. */
2876 /* ------------------------------------------------------------------------ */
2877 nat_t *nat_icmperror(fin, nflags, dir)
2882 u_32_t sum1, sum2, sumd, sumd2;
2883 struct in_addr a1, a2;
2884 int flags, dlen, odst;
2892 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2895 * nat_icmperrorlookup() will return NULL for `defective' packets.
2897 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2904 *nflags = IPN_ICMPERR;
2906 oip = (ip_t *)&icmp->icmp_ip;
2907 dp = (((char *)oip) + (IP_HL(oip) << 2));
2908 if (oip->ip_p == IPPROTO_TCP) {
2909 tcp = (tcphdr_t *)dp;
2910 csump = (u_short *)&tcp->th_sum;
2912 } else if (oip->ip_p == IPPROTO_UDP) {
2915 udp = (udphdr_t *)dp;
2916 tcp = (tcphdr_t *)dp;
2917 csump = (u_short *)&udp->uh_sum;
2919 } else if (oip->ip_p == IPPROTO_ICMP)
2920 flags = IPN_ICMPQUERY;
2921 dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2924 * Need to adjust ICMP header to include the real IP#'s and
2925 * port #'s. Only apply a checksum change relative to the
2926 * IP address change as it will be modified again in fr_checknatout
2927 * for both address and port. Two checksum changes are
2928 * necessary for the two header address changes. Be careful
2929 * to only modify the checksum once for the port # and twice
2935 * Fix the IP addresses in the offending IP packet. You also need
2936 * to adjust the IP header checksum of that offending IP packet.
2938 * Normally, you would expect that the ICMP checksum of the
2939 * ICMP error message needs to be adjusted as well for the
2940 * IP address change in oip.
2941 * However, this is a NOP, because the ICMP checksum is
2942 * calculated over the complete ICMP packet, which includes the
2943 * changed oip IP addresses and oip->ip_sum. However, these
2944 * two changes cancel each other out (if the delta for
2945 * the IP address is x, then the delta for ip_sum is minus x),
2946 * so no change in the icmp_cksum is necessary.
2950 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2951 * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2952 * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2954 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2955 * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2956 * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2960 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2961 * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2962 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2964 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2965 * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2966 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2969 odst = (oip->ip_dst.s_addr == nat->nat_oip.s_addr) ? 1 : 0;
2971 a1.s_addr = ntohl(nat->nat_inip.s_addr);
2972 a2.s_addr = ntohl(oip->ip_src.s_addr);
2973 oip->ip_src.s_addr = htonl(a1.s_addr);
2975 a1.s_addr = ntohl(nat->nat_outip.s_addr);
2976 a2.s_addr = ntohl(oip->ip_dst.s_addr);
2977 oip->ip_dst.s_addr = htonl(a1.s_addr);
2980 sumd = a2.s_addr - a1.s_addr;
2982 if (a1.s_addr > a2.s_addr)
2986 fix_datacksum(&oip->ip_sum, sumd);
2994 * Fix UDP pseudo header checksum to compensate for the
2995 * IP address change.
2997 if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
3000 * For offending TCP/UDP IP packets, translate the ports as
3001 * well, based on the NAT specification. Of course such
3002 * a change may be reflected in the ICMP checksum as well.
3004 * Since the port fields are part of the TCP/UDP checksum
3005 * of the offending IP packet, you need to adjust that checksum
3006 * as well... except that the change in the port numbers should
3007 * be offset by the checksum change. However, the TCP/UDP
3008 * checksum will also need to change if there has been an
3009 * IP address change.
3012 sum1 = ntohs(nat->nat_inport);
3013 sum2 = ntohs(tcp->th_sport);
3015 tcp->th_sport = htons(sum1);
3017 sum1 = ntohs(nat->nat_outport);
3018 sum2 = ntohs(tcp->th_dport);
3020 tcp->th_dport = htons(sum1);
3023 sumd += sum1 - sum2;
3024 if (sumd != 0 || sumd2 != 0) {
3026 * At this point, sumd is the delta to apply to the
3027 * TCP/UDP header, given the changes in both the IP
3028 * address and the ports and sumd2 is the delta to
3029 * apply to the ICMP header, given the IP address
3030 * change delta that may need to be applied to the
3031 * TCP/UDP checksum instead.
3033 * If we will both the IP and TCP/UDP checksums
3034 * then the ICMP checksum changes by the address
3035 * delta applied to the TCP/UDP checksum. If we
3036 * do not change the TCP/UDP checksum them we
3037 * apply the delta in ports to the ICMP checksum.
3039 if (oip->ip_p == IPPROTO_UDP) {
3040 if ((dlen >= 8) && (*csump != 0)) {
3041 fix_datacksum(csump, sumd);
3043 sumd2 = sum1 - sum2;
3047 } else if (oip->ip_p == IPPROTO_TCP) {
3049 fix_datacksum(csump, sumd);
3051 sumd2 = sum2 - sum1;
3061 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3062 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3063 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3065 if ((odst == 0) && (dir == NAT_OUTBOUND) &&
3066 (fin->fin_rev == 0) && (np != NULL) &&
3067 (np->in_redir & NAT_REDIRECT)) {
3068 fix_outcksum(fin, &icmp->icmp_cksum,
3071 fix_incksum(fin, &icmp->icmp_cksum,
3076 } else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
3080 * XXX - what if this is bogus hl and we go off the end ?
3081 * In this case, nat_icmperrorlookup() will have returned NULL.
3083 orgicmp = (icmphdr_t *)dp;
3086 if (orgicmp->icmp_id != nat->nat_inport) {
3089 * Fix ICMP checksum (of the offening ICMP
3090 * query packet) to compensate the change
3091 * in the ICMP id of the offending ICMP
3094 * Since you modify orgicmp->icmp_id with
3095 * a delta (say x) and you compensate that
3096 * in origicmp->icmp_cksum with a delta
3097 * minus x, you don't have to adjust the
3098 * overall icmp->icmp_cksum
3100 sum1 = ntohs(orgicmp->icmp_id);
3101 sum2 = ntohs(nat->nat_inport);
3102 CALC_SUMD(sum1, sum2, sumd);
3103 orgicmp->icmp_id = nat->nat_inport;
3104 fix_datacksum(&orgicmp->icmp_cksum, sumd);
3106 } /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3113 * NB: these lookups don't lock access to the list, it assumed that it has
3114 * already been done!
3117 /* ------------------------------------------------------------------------ */
3118 /* Function: nat_inlookup */
3119 /* Returns: nat_t* - NULL == no match, */
3120 /* else pointer to matching NAT entry */
3121 /* Parameters: fin(I) - pointer to packet information */
3122 /* flags(I) - NAT flags for this packet */
3123 /* p(I) - protocol for this packet */
3124 /* src(I) - source IP address */
3125 /* mapdst(I) - destination IP address */
3127 /* Lookup a nat entry based on the mapped destination ip address/port and */
3128 /* real source address/port. We use this lookup when receiving a packet, */
3129 /* we're looking for a table entry, based on the destination address. */
3131 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3133 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3134 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3136 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3137 /* the packet is of said protocol */
3138 /* ------------------------------------------------------------------------ */
3139 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3142 struct in_addr src , mapdst;
3144 u_short sport, dport;
3158 dst = mapdst.s_addr;
3159 sflags = flags & NAT_TCPUDPICMP;
3165 sport = htons(fin->fin_data[0]);
3166 dport = htons(fin->fin_data[1]);
3169 if (flags & IPN_ICMPERR)
3170 sport = fin->fin_data[1];
3172 dport = fin->fin_data[1];
3179 if ((flags & SI_WILDP) != 0)
3180 goto find_in_wild_ports;
3182 hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3183 hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3184 nat = nat_table[1][hv];
3185 for (; nat; nat = nat->nat_hnext[1]) {
3186 if (nat->nat_ifps[0] != NULL) {
3187 if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3189 } else if (ifp != NULL)
3190 nat->nat_ifps[0] = ifp;
3192 nflags = nat->nat_flags;
3194 if (nat->nat_oip.s_addr == src.s_addr &&
3195 nat->nat_outip.s_addr == dst &&
3197 (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3198 || (p == nat->nat_p))) {
3203 if (nat->nat_call[1] != fin->fin_data[0])
3208 if ((flags & IPN_ICMPERR) != 0) {
3209 if (nat->nat_outport != sport)
3212 if (nat->nat_outport != dport)
3218 if (nat->nat_oport != sport)
3220 if (nat->nat_outport != dport)
3228 if ((ipn != NULL) && (nat->nat_aps != NULL))
3229 if (appr_match(fin, nat) != 0)
3236 * So if we didn't find it but there are wildcard members in the hash
3237 * table, go back and look for them. We do this search and update here
3238 * because it is modifying the NAT table and we want to do this only
3239 * for the first packet that matches. The exception, of course, is
3240 * for "dummy" (FI_IGNORE) lookups.
3243 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3245 if (nat_stats.ns_wilds == 0)
3248 RWLOCK_EXIT(&ipf_nat);
3250 hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3251 hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3253 WRITE_ENTER(&ipf_nat);
3255 nat = nat_table[1][hv];
3256 for (; nat; nat = nat->nat_hnext[1]) {
3257 if (nat->nat_ifps[0] != NULL) {
3258 if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3260 } else if (ifp != NULL)
3261 nat->nat_ifps[0] = ifp;
3263 if (nat->nat_p != fin->fin_p)
3265 if (nat->nat_oip.s_addr != src.s_addr ||
3266 nat->nat_outip.s_addr != dst)
3269 nflags = nat->nat_flags;
3270 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3273 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3274 NAT_INBOUND) == 1) {
3275 if ((fin->fin_flx & FI_IGNORE) != 0)
3277 if ((nflags & SI_CLONE) != 0) {
3278 nat = fr_natclone(fin, nat);
3282 MUTEX_ENTER(&ipf_nat_new);
3283 nat_stats.ns_wilds--;
3284 MUTEX_EXIT(&ipf_nat_new);
3286 nat->nat_oport = sport;
3287 nat->nat_outport = dport;
3288 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3294 MUTEX_DOWNGRADE(&ipf_nat);
3300 /* ------------------------------------------------------------------------ */
3301 /* Function: nat_tabmove */
3303 /* Parameters: nat(I) - pointer to NAT structure */
3304 /* Write Lock: ipf_nat */
3306 /* This function is only called for TCP/UDP NAT table entries where the */
3307 /* original was placed in the table without hashing on the ports and we now */
3308 /* want to include hashing on port numbers. */
3309 /* ------------------------------------------------------------------------ */
3310 static void nat_tabmove(nat)
3316 if (nat->nat_flags & SI_CLONE)
3320 * Remove the NAT entry from the old location
3322 if (nat->nat_hnext[0])
3323 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3324 *nat->nat_phnext[0] = nat->nat_hnext[0];
3325 nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3327 if (nat->nat_hnext[1])
3328 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3329 *nat->nat_phnext[1] = nat->nat_hnext[1];
3330 nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3333 * Add into the NAT table in the new position
3335 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3336 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3338 nat->nat_hv[0] = hv;
3339 natp = &nat_table[0][hv];
3341 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3342 nat->nat_phnext[0] = natp;
3343 nat->nat_hnext[0] = *natp;
3345 nat_stats.ns_bucketlen[0][hv]++;
3347 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3348 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3350 nat->nat_hv[1] = hv;
3351 natp = &nat_table[1][hv];
3353 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3354 nat->nat_phnext[1] = natp;
3355 nat->nat_hnext[1] = *natp;
3357 nat_stats.ns_bucketlen[1][hv]++;
3361 /* ------------------------------------------------------------------------ */
3362 /* Function: nat_outlookup */
3363 /* Returns: nat_t* - NULL == no match, */
3364 /* else pointer to matching NAT entry */
3365 /* Parameters: fin(I) - pointer to packet information */
3366 /* flags(I) - NAT flags for this packet */
3367 /* p(I) - protocol for this packet */
3368 /* src(I) - source IP address */
3369 /* dst(I) - destination IP address */
3370 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */
3372 /* Lookup a nat entry based on the source 'real' ip address/port and */
3373 /* destination address/port. We use this lookup when sending a packet out, */
3374 /* we're looking for a table entry, based on the source address. */
3376 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3378 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3379 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3381 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3382 /* the packet is of said protocol */
3383 /* ------------------------------------------------------------------------ */
3384 nat_t *nat_outlookup(fin, flags, p, src, dst)
3387 struct in_addr src , dst;
3389 u_short sport, dport;
3400 sflags = flags & IPN_TCPUDPICMP;
3408 sport = htons(fin->fin_data[0]);
3409 dport = htons(fin->fin_data[1]);
3412 if (flags & IPN_ICMPERR)
3413 sport = fin->fin_data[1];
3415 dport = fin->fin_data[1];
3421 if ((flags & SI_WILDP) != 0)
3422 goto find_out_wild_ports;
3424 hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3425 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3426 nat = nat_table[0][hv];
3427 for (; nat; nat = nat->nat_hnext[0]) {
3428 if (nat->nat_ifps[1] != NULL) {
3429 if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3431 } else if (ifp != NULL)
3432 nat->nat_ifps[1] = ifp;
3434 nflags = nat->nat_flags;
3436 if (nat->nat_inip.s_addr == srcip &&
3437 nat->nat_oip.s_addr == dst.s_addr &&
3438 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3439 || (p == nat->nat_p))) {
3444 if (nat->nat_call[1] != fin->fin_data[0])
3450 if (nat->nat_oport != dport)
3452 if (nat->nat_inport != sport)
3460 if ((ipn != NULL) && (nat->nat_aps != NULL))
3461 if (appr_match(fin, nat) != 0)
3468 * So if we didn't find it but there are wildcard members in the hash
3469 * table, go back and look for them. We do this search and update here
3470 * because it is modifying the NAT table and we want to do this only
3471 * for the first packet that matches. The exception, of course, is
3472 * for "dummy" (FI_IGNORE) lookups.
3474 find_out_wild_ports:
3475 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3477 if (nat_stats.ns_wilds == 0)
3480 RWLOCK_EXIT(&ipf_nat);
3482 hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3483 hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3485 WRITE_ENTER(&ipf_nat);
3487 nat = nat_table[0][hv];
3488 for (; nat; nat = nat->nat_hnext[0]) {
3489 if (nat->nat_ifps[1] != NULL) {
3490 if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3492 } else if (ifp != NULL)
3493 nat->nat_ifps[1] = ifp;
3495 if (nat->nat_p != fin->fin_p)
3497 if ((nat->nat_inip.s_addr != srcip) ||
3498 (nat->nat_oip.s_addr != dst.s_addr))
3501 nflags = nat->nat_flags;
3502 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3505 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3506 NAT_OUTBOUND) == 1) {
3507 if ((fin->fin_flx & FI_IGNORE) != 0)
3509 if ((nflags & SI_CLONE) != 0) {
3510 nat = fr_natclone(fin, nat);
3514 MUTEX_ENTER(&ipf_nat_new);
3515 nat_stats.ns_wilds--;
3516 MUTEX_EXIT(&ipf_nat_new);
3518 nat->nat_inport = sport;
3519 nat->nat_oport = dport;
3520 if (nat->nat_outport == 0)
3521 nat->nat_outport = sport;
3522 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3528 MUTEX_DOWNGRADE(&ipf_nat);
3534 /* ------------------------------------------------------------------------ */
3535 /* Function: nat_lookupredir */
3536 /* Returns: nat_t* - NULL == no match, */
3537 /* else pointer to matching NAT entry */
3538 /* Parameters: np(I) - pointer to description of packet to find NAT table */
3541 /* Lookup the NAT tables to search for a matching redirect */
3542 /* The contents of natlookup_t should imitate those found in a packet that */
3543 /* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3544 /* We can do the lookup in one of two ways, imitating an inbound or */
3545 /* outbound packet. By default we assume outbound, unless IPN_IN is set. */
3546 /* For IN, the fields are set as follows: */
3547 /* nl_real* = source information */
3548 /* nl_out* = destination information (translated) */
3549 /* For an out packet, the fields are set like this: */
3550 /* nl_in* = source information (untranslated) */
3551 /* nl_out* = destination information (translated) */
3552 /* ------------------------------------------------------------------------ */
3553 nat_t *nat_lookupredir(np)
3559 bzero((char *)&fi, sizeof(fi));
3560 if (np->nl_flags & IPN_IN) {
3561 fi.fin_data[0] = ntohs(np->nl_realport);
3562 fi.fin_data[1] = ntohs(np->nl_outport);
3564 fi.fin_data[0] = ntohs(np->nl_inport);
3565 fi.fin_data[1] = ntohs(np->nl_outport);
3567 if (np->nl_flags & IPN_TCP)
3568 fi.fin_p = IPPROTO_TCP;
3569 else if (np->nl_flags & IPN_UDP)
3570 fi.fin_p = IPPROTO_UDP;
3571 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3572 fi.fin_p = IPPROTO_ICMP;
3575 * We can do two sorts of lookups:
3576 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3577 * - default: we have the `in' and `out' address, look for `real'.
3579 if (np->nl_flags & IPN_IN) {
3580 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3581 np->nl_realip, np->nl_outip))) {
3582 np->nl_inip = nat->nat_inip;
3583 np->nl_inport = nat->nat_inport;
3587 * If nl_inip is non null, this is a lookup based on the real
3588 * ip address. Else, we use the fake.
3590 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3591 np->nl_inip, np->nl_outip))) {
3593 if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3595 bzero((char *)&fin, sizeof(fin));
3596 fin.fin_p = nat->nat_p;
3597 fin.fin_data[0] = ntohs(nat->nat_outport);
3598 fin.fin_data[1] = ntohs(nat->nat_oport);
3599 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3601 nat->nat_oip) != NULL) {
3602 np->nl_flags &= ~IPN_FINDFORWARD;
3606 np->nl_realip = nat->nat_outip;
3607 np->nl_realport = nat->nat_outport;
3615 /* ------------------------------------------------------------------------ */
3616 /* Function: nat_match */
3617 /* Returns: int - 0 == no match, 1 == match */
3618 /* Parameters: fin(I) - pointer to packet information */
3619 /* np(I) - pointer to NAT rule */
3621 /* Pull the matching of a packet against a NAT rule out of that complex */
3622 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3623 /* ------------------------------------------------------------------------ */
3624 static int nat_match(fin, np)
3630 if (fin->fin_v != 4)
3633 if (np->in_p && fin->fin_p != np->in_p)
3637 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3639 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3640 ^ ((np->in_flags & IPN_NOTSRC) != 0))
3642 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3643 ^ ((np->in_flags & IPN_NOTDST) != 0))
3646 if (!(np->in_redir & NAT_REDIRECT))
3648 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3649 ^ ((np->in_flags & IPN_NOTSRC) != 0))
3651 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3652 ^ ((np->in_flags & IPN_NOTDST) != 0))
3657 if (!(fin->fin_flx & FI_TCPUDP) ||
3658 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3659 if (ft->ftu_scmp || ft->ftu_dcmp)
3664 return fr_tcpudpchk(fin, ft);
3668 /* ------------------------------------------------------------------------ */
3669 /* Function: nat_update */
3671 /* Parameters: nat(I) - pointer to NAT structure */
3672 /* np(I) - pointer to NAT rule */
3674 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */
3675 /* called with fin_rev updated - i.e. after calling nat_proto(). */
3676 /* ------------------------------------------------------------------------ */
3677 void nat_update(fin, nat, np)
3682 ipftq_t *ifq, *ifq2;
3685 MUTEX_ENTER(&nat->nat_lock);
3686 tqe = &nat->nat_tqe;
3690 * We allow over-riding of NAT timeouts from NAT rules, even for
3691 * TCP, however, if it is TCP and there is no rule timeout set,
3692 * then do not update the timeout here.
3695 ifq2 = np->in_tqehead[fin->fin_rev];
3699 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3706 tcpflags = tcp->th_flags;
3707 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
3708 ((tcpflags & TH_SYN) ? 1 : 0) +
3709 ((tcpflags & TH_FIN) ? 1 : 0);
3711 ack = ntohl(tcp->th_ack);
3712 end = ntohl(tcp->th_seq) + dsize;
3714 if (SEQ_GT(ack, nat->nat_seqnext[1 - fin->fin_rev]))
3715 nat->nat_seqnext[1 - fin->fin_rev] = ack;
3717 if (nat->nat_seqnext[fin->fin_rev] == 0)
3718 nat->nat_seqnext[fin->fin_rev] = end;
3720 (void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3723 if (nat->nat_p == IPPROTO_UDP)
3725 else if (nat->nat_p == IPPROTO_ICMP)
3731 fr_movequeue(tqe, ifq, ifq2);
3733 MUTEX_EXIT(&nat->nat_lock);
3737 /* ------------------------------------------------------------------------ */
3738 /* Function: fr_checknatout */
3739 /* Returns: int - -1 == packet failed NAT checks so block it, */
3740 /* 0 == no packet translation occurred, */
3741 /* 1 == packet was successfully translated. */
3742 /* Parameters: fin(I) - pointer to packet information */
3743 /* passp(I) - pointer to filtering result flags */
3745 /* Check to see if an outcoming packet should be changed. ICMP packets are */
3746 /* first checked to see if they match an existing entry (if an error), */
3747 /* otherwise a search of the current NAT table is made. If neither results */
3748 /* in a match then a search for a matching NAT rule is made. Create a new */
3749 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
3750 /* packet header(s) as required. */
3751 /* ------------------------------------------------------------------------ */
3752 int fr_checknatout(fin, passp)
3756 struct ifnet *ifp, *sifp;
3757 icmphdr_t *icmp = NULL;
3758 tcphdr_t *tcp = NULL;
3759 int rval, natfailed;
3767 if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
3772 sifp = fin->fin_ifp;
3774 ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3775 if ((ifp != NULL) && (ifp != (void *)-1))
3780 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3793 * This is an incoming packet, so the destination is
3794 * the icmp_id and the source port equals 0
3796 if (nat_icmpquerytype4(icmp->icmp_type))
3797 nflags = IPN_ICMPQUERY;
3803 if ((nflags & IPN_TCPUDP))
3807 ipa = fin->fin_saddr;
3809 READ_ENTER(&ipf_nat);
3811 if (((fin->fin_flx & FI_ICMPERR) != 0) &&
3812 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3814 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3816 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3817 fin->fin_src, fin->fin_dst))) {
3818 nflags = nat->nat_flags;
3820 u_32_t hv, msk, nmsk;
3823 * If there is no current entry in the nat table for this IP#,
3824 * create one for it (if there is a matching rule).
3826 RWLOCK_EXIT(&ipf_nat);
3829 WRITE_ENTER(&ipf_nat);
3831 iph = ipa & htonl(msk);
3832 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3833 for (np = nat_rules[hv]; np; np = np->in_mnext)
3835 if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3837 if (np->in_v != fin->fin_v)
3839 if (np->in_p && (np->in_p != fin->fin_p))
3841 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3843 if (np->in_flags & IPN_FILTER) {
3844 if (!nat_match(fin, np))
3846 } else if ((ipa & np->in_inmsk) != np->in_inip)
3850 !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3853 if (*np->in_plabel != '\0') {
3854 if (((np->in_flags & IPN_FILTER) == 0) &&
3855 (np->in_dport != tcp->th_dport))
3857 if (appr_ok(fin, tcp, np) == 0)
3861 if ((nat = nat_new(fin, np, NULL, nflags,
3868 if ((np == NULL) && (nmsk != 0)) {
3871 if (nmsk & 0x80000000)
3880 MUTEX_DOWNGRADE(&ipf_nat);
3884 rval = fr_natout(fin, nat, natadd, nflags);
3886 MUTEX_ENTER(&nat->nat_lock);
3888 MUTEX_EXIT(&nat->nat_lock);
3889 nat->nat_touched = fr_ticks;
3894 RWLOCK_EXIT(&ipf_nat);
3899 fin->fin_flx |= FI_BADNAT;
3901 fin->fin_ifp = sifp;
3905 /* ------------------------------------------------------------------------ */
3906 /* Function: fr_natout */
3907 /* Returns: int - -1 == packet failed NAT checks so block it, */
3908 /* 1 == packet was successfully translated. */
3909 /* Parameters: fin(I) - pointer to packet information */
3910 /* nat(I) - pointer to NAT structure */
3911 /* natadd(I) - flag indicating if it is safe to add frag cache */
3912 /* nflags(I) - NAT flags set for this packet */
3914 /* Translate a packet coming "out" on an interface. */
3915 /* ------------------------------------------------------------------------ */
3916 int fr_natout(fin, nat, natadd, nflags)
3933 if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3934 (void) fr_nat_newfrag(fin, 0, nat);
3936 MUTEX_ENTER(&nat->nat_lock);
3937 nat->nat_bytes[1] += fin->fin_plen;
3939 MUTEX_EXIT(&nat->nat_lock);
3942 * Fix up checksums, not by recalculating them, but
3943 * simply computing adjustments.
3944 * This is only done for STREAMS based IP implementations where the
3945 * checksum has already been calculated by IP. In all other cases,
3946 * IPFilter is called before the checksum needs calculating so there
3947 * is no call to modify whatever is in the header now.
3949 if (fin->fin_v == 4) {
3950 if (nflags == IPN_ICMPERR) {
3951 u_32_t s1, s2, sumd;
3953 s1 = LONG_SUM(ntohl(fin->fin_saddr));
3954 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3955 CALC_SUMD(s1, s2, sumd);
3956 fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3958 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3959 defined(linux) || defined(BRIDGE_IPF)
3962 * Strictly speaking, this isn't necessary on BSD
3963 * kernels because they do checksum calculation after
3964 * this code has run BUT if ipfilter is being used
3965 * to do NAT as a bridge, that code doesn't exist.
3967 if (nat->nat_dir == NAT_OUTBOUND)
3968 fix_outcksum(fin, &fin->fin_ip->ip_sum,
3971 fix_incksum(fin, &fin->fin_ip->ip_sum,
3977 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3978 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3981 tcp->th_sport = nat->nat_outport;
3982 fin->fin_data[0] = ntohs(nat->nat_outport);
3985 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3987 icmp->icmp_id = nat->nat_outport;
3990 csump = nat_proto(fin, nat, nflags);
3993 fin->fin_ip->ip_src = nat->nat_outip;
3995 nat_update(fin, nat, np);
3998 * The above comments do not hold for layer 4 (or higher) checksums...
4000 if (csump != NULL) {
4001 if (nat->nat_dir == NAT_OUTBOUND)
4002 fix_outcksum(fin, csump, nat->nat_sumd[1]);
4004 fix_incksum(fin, csump, nat->nat_sumd[1]);
4006 #ifdef IPFILTER_SYNC
4007 ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4009 /* ------------------------------------------------------------- */
4010 /* A few quick notes: */
4011 /* Following are test conditions prior to calling the */
4012 /* appr_check routine. */
4014 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
4015 /* with a redirect rule, we attempt to match the packet's */
4016 /* source port against in_dport, otherwise we'd compare the */
4017 /* packet's destination. */
4018 /* ------------------------------------------------------------- */
4019 if ((np != NULL) && (np->in_apr != NULL)) {
4020 i = appr_check(fin, nat);
4025 ATOMIC_INCL(nat_stats.ns_mapped[1]);
4026 fin->fin_flx |= FI_NATED;
4031 /* ------------------------------------------------------------------------ */
4032 /* Function: fr_checknatin */
4033 /* Returns: int - -1 == packet failed NAT checks so block it, */
4034 /* 0 == no packet translation occurred, */
4035 /* 1 == packet was successfully translated. */
4036 /* Parameters: fin(I) - pointer to packet information */
4037 /* passp(I) - pointer to filtering result flags */
4039 /* Check to see if an incoming packet should be changed. ICMP packets are */
4040 /* first checked to see if they match an existing entry (if an error), */
4041 /* otherwise a search of the current NAT table is made. If neither results */
4042 /* in a match then a search for a matching NAT rule is made. Create a new */
4043 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
4044 /* packet header(s) as required. */
4045 /* ------------------------------------------------------------------------ */
4046 int fr_checknatin(fin, passp)
4050 u_int nflags, natadd;
4051 int rval, natfailed;
4061 if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
4072 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4085 * This is an incoming packet, so the destination is
4086 * the icmp_id and the source port equals 0
4088 if (nat_icmpquerytype4(icmp->icmp_type)) {
4089 nflags = IPN_ICMPQUERY;
4090 dport = icmp->icmp_id;
4096 if ((nflags & IPN_TCPUDP)) {
4098 dport = tcp->th_dport;
4104 READ_ENTER(&ipf_nat);
4106 if (((fin->fin_flx & FI_ICMPERR) != 0) &&
4107 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4109 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4111 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4112 fin->fin_src, in))) {
4113 nflags = nat->nat_flags;
4115 u_32_t hv, msk, rmsk;
4117 RWLOCK_EXIT(&ipf_nat);
4120 WRITE_ENTER(&ipf_nat);
4122 * If there is no current entry in the nat table for this IP#,
4123 * create one for it (if there is a matching rule).
4126 iph = in.s_addr & htonl(msk);
4127 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
4128 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
4129 if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4131 if (np->in_v != fin->fin_v)
4133 if (np->in_p && (np->in_p != fin->fin_p))
4135 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4137 if (np->in_flags & IPN_FILTER) {
4138 if (!nat_match(fin, np))
4141 if ((in.s_addr & np->in_outmsk) != np->in_outip)
4144 ((ntohs(np->in_pmax) < ntohs(dport)) ||
4145 (ntohs(dport) < ntohs(np->in_pmin))))
4149 if (*np->in_plabel != '\0') {
4150 if (!appr_ok(fin, tcp, np)) {
4155 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4163 if ((np == NULL) && (rmsk != 0)) {
4166 if (rmsk & 0x80000000)
4175 MUTEX_DOWNGRADE(&ipf_nat);
4178 rval = fr_natin(fin, nat, natadd, nflags);
4180 MUTEX_ENTER(&nat->nat_lock);
4182 MUTEX_EXIT(&nat->nat_lock);
4183 nat->nat_touched = fr_ticks;
4188 RWLOCK_EXIT(&ipf_nat);
4193 fin->fin_flx |= FI_BADNAT;
4199 /* ------------------------------------------------------------------------ */
4200 /* Function: fr_natin */
4201 /* Returns: int - -1 == packet failed NAT checks so block it, */
4202 /* 1 == packet was successfully translated. */
4203 /* Parameters: fin(I) - pointer to packet information */
4204 /* nat(I) - pointer to NAT structure */
4205 /* natadd(I) - flag indicating if it is safe to add frag cache */
4206 /* nflags(I) - NAT flags set for this packet */
4207 /* Locks Held: ipf_nat (READ) */
4209 /* Translate a packet coming "in" on an interface. */
4210 /* ------------------------------------------------------------------------ */
4211 int fr_natin(fin, nat, natadd, nflags)
4226 fin->fin_fr = nat->nat_fr;
4229 if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4230 (void) fr_nat_newfrag(fin, 0, nat);
4232 /* ------------------------------------------------------------- */
4233 /* A few quick notes: */
4234 /* Following are test conditions prior to calling the */
4235 /* appr_check routine. */
4237 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
4238 /* with a map rule, we attempt to match the packet's */
4239 /* source port against in_dport, otherwise we'd compare the */
4240 /* packet's destination. */
4241 /* ------------------------------------------------------------- */
4242 if (np->in_apr != NULL) {
4243 i = appr_check(fin, nat);
4250 #ifdef IPFILTER_SYNC
4251 ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4254 MUTEX_ENTER(&nat->nat_lock);
4255 nat->nat_bytes[0] += fin->fin_plen;
4257 MUTEX_EXIT(&nat->nat_lock);
4259 fin->fin_ip->ip_dst = nat->nat_inip;
4260 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4261 if (nflags & IPN_TCPUDP)
4265 * Fix up checksums, not by recalculating them, but
4266 * simply computing adjustments.
4267 * Why only do this for some platforms on inbound packets ?
4268 * Because for those that it is done, IP processing is yet to happen
4269 * and so the IPv4 header checksum has not yet been evaluated.
4270 * Perhaps it should always be done for the benefit of things like
4271 * fast forwarding (so that it doesn't need to be recomputed) but with
4272 * header checksum offloading, perhaps it is a moot point.
4274 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4275 defined(__osf__) || defined(linux)
4276 if (nat->nat_dir == NAT_OUTBOUND)
4277 fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4279 fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4282 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4283 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4284 tcp->th_dport = nat->nat_inport;
4285 fin->fin_data[1] = ntohs(nat->nat_inport);
4289 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4292 icmp->icmp_id = nat->nat_inport;
4295 csump = nat_proto(fin, nat, nflags);
4298 nat_update(fin, nat, np);
4301 * The above comments do not hold for layer 4 (or higher) checksums...
4303 if (csump != NULL) {
4304 if (nat->nat_dir == NAT_OUTBOUND)
4305 fix_incksum(fin, csump, nat->nat_sumd[0]);
4307 fix_outcksum(fin, csump, nat->nat_sumd[0]);
4309 ATOMIC_INCL(nat_stats.ns_mapped[0]);
4310 fin->fin_flx |= FI_NATED;
4311 if (np != NULL && np->in_tag.ipt_num[0] != 0)
4312 fin->fin_nattag = &np->in_tag;
4317 /* ------------------------------------------------------------------------ */
4318 /* Function: nat_proto */
4319 /* Returns: u_short* - pointer to transport header checksum to update, */
4320 /* NULL if the transport protocol is not recognised */
4321 /* as needing a checksum update. */
4322 /* Parameters: fin(I) - pointer to packet information */
4323 /* nat(I) - pointer to NAT structure */
4324 /* nflags(I) - NAT flags set for this packet */
4326 /* Return the pointer to the checksum field for each protocol so understood.*/
4327 /* If support for making other changes to a protocol header is required, */
4328 /* that is not strictly 'address' translation, such as clamping the MSS in */
4329 /* TCP down to a specific value, then do it from here. */
4330 /* ------------------------------------------------------------------------ */
4331 u_short *nat_proto(fin, nat, nflags)
4342 if (fin->fin_out == 0) {
4343 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4345 fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4353 csump = &tcp->th_sum;
4356 * Do a MSS CLAMPING on a SYN packet,
4357 * only deal IPv4 for now.
4359 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4360 nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4368 csump = &udp->uh_sum;
4374 if ((nflags & IPN_ICMPQUERY) != 0) {
4375 if (icmp->icmp_cksum != 0)
4376 csump = &icmp->icmp_cksum;
4384 /* ------------------------------------------------------------------------ */
4385 /* Function: fr_natunload */
4387 /* Parameters: Nil */
4389 /* Free all memory used by NAT structures allocated at runtime. */
4390 /* ------------------------------------------------------------------------ */
4393 ipftq_t *ifq, *ifqnext;
4395 (void) nat_clearlist();
4396 (void) nat_flushtable();
4399 * Proxy timeout queues are not cleaned here because although they
4400 * exist on the NAT list, appr_unload is called after fr_natunload
4401 * and the proxies actually are responsible for them being created.
4402 * Should the proxy timeouts have their own list? There's no real
4403 * justification as this is the only complication.
4405 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4406 ifqnext = ifq->ifq_next;
4407 if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4408 (fr_deletetimeoutqueue(ifq) == 0))
4409 fr_freetimeoutqueue(ifq);
4412 if (nat_table[0] != NULL) {
4413 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4414 nat_table[0] = NULL;
4416 if (nat_table[1] != NULL) {
4417 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4418 nat_table[1] = NULL;
4420 if (nat_rules != NULL) {
4421 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4424 if (rdr_rules != NULL) {
4425 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4428 if (ipf_hm_maptable != NULL) {
4429 KFREES(ipf_hm_maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4430 ipf_hm_maptable = NULL;
4432 if (nat_stats.ns_bucketlen[0] != NULL) {
4433 KFREES(nat_stats.ns_bucketlen[0],
4434 sizeof(u_long *) * ipf_nattable_sz);
4435 nat_stats.ns_bucketlen[0] = NULL;
4437 if (nat_stats.ns_bucketlen[1] != NULL) {
4438 KFREES(nat_stats.ns_bucketlen[1],
4439 sizeof(u_long *) * ipf_nattable_sz);
4440 nat_stats.ns_bucketlen[1] = NULL;
4443 if (fr_nat_maxbucket_reset == 1)
4444 fr_nat_maxbucket = 0;
4446 if (fr_nat_init == 1) {
4448 fr_sttab_destroy(nat_tqb);
4450 RW_DESTROY(&ipf_natfrag);
4451 RW_DESTROY(&ipf_nat);
4453 MUTEX_DESTROY(&ipf_nat_new);
4454 MUTEX_DESTROY(&ipf_natio);
4456 MUTEX_DESTROY(&nat_udptq.ifq_lock);
4457 MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4458 MUTEX_DESTROY(&nat_iptq.ifq_lock);
4463 /* ------------------------------------------------------------------------ */
4464 /* Function: fr_natexpire */
4466 /* Parameters: Nil */
4468 /* Check all of the timeout queues for entries at the top which need to be */
4470 /* ------------------------------------------------------------------------ */
4473 ipftq_t *ifq, *ifqnext;
4474 ipftqent_t *tqe, *tqn;
4479 WRITE_ENTER(&ipf_nat);
4480 for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4481 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4482 if (tqe->tqe_die > fr_ticks)
4484 tqn = tqe->tqe_next;
4485 nat_delete(tqe->tqe_parent, NL_EXPIRE);
4489 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4490 ifqnext = ifq->ifq_next;
4492 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4493 if (tqe->tqe_die > fr_ticks)
4495 tqn = tqe->tqe_next;
4496 nat_delete(tqe->tqe_parent, NL_EXPIRE);
4500 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4501 ifqnext = ifq->ifq_next;
4503 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4504 (ifq->ifq_ref == 0)) {
4505 fr_freetimeoutqueue(ifq);
4509 if (fr_nat_doflush != 0) {
4514 RWLOCK_EXIT(&ipf_nat);
4519 /* ------------------------------------------------------------------------ */
4520 /* Function: fr_natsync */
4522 /* Parameters: ifp(I) - pointer to network interface */
4524 /* Walk through all of the currently active NAT sessions, looking for those */
4525 /* which need to have their translated address updated. */
4526 /* ------------------------------------------------------------------------ */
4527 void fr_natsync(ifp)
4530 u_32_t sum1, sum2, sumd;
4537 if (fr_running <= 0)
4541 * Change IP addresses for NAT sessions for any protocol except TCP
4542 * since it will break the TCP connection anyway. The only rules
4543 * which will get changed are those which are "map ... -> 0/32",
4544 * where the rule specifies the address is taken from the interface.
4547 WRITE_ENTER(&ipf_nat);
4549 if (fr_running <= 0) {
4550 RWLOCK_EXIT(&ipf_nat);
4554 for (nat = nat_instances; nat; nat = nat->nat_next) {
4555 if ((nat->nat_flags & IPN_TCP) != 0)
4559 (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4561 if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4562 (ifp == nat->nat_ifps[1]))) {
4563 nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4564 if (nat->nat_ifnames[1][0] != '\0') {
4565 nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4568 nat->nat_ifps[1] = nat->nat_ifps[0];
4569 ifp2 = nat->nat_ifps[0];
4574 * Change the map-to address to be the same as the
4577 sum1 = nat->nat_outip.s_addr;
4578 if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4579 nat->nat_outip = in;
4580 sum2 = nat->nat_outip.s_addr;
4585 * Readjust the checksum adjustment to take into
4586 * account the new IP#.
4588 CALC_SUMD(sum1, sum2, sumd);
4589 /* XXX - dont change for TCP when solaris does
4590 * hardware checksumming.
4592 sumd += nat->nat_sumd[0];
4593 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4594 nat->nat_sumd[1] = nat->nat_sumd[0];
4598 for (n = nat_list; (n != NULL); n = n->in_next) {
4599 if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4600 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4601 if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4602 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4604 RWLOCK_EXIT(&ipf_nat);
4609 /* ------------------------------------------------------------------------ */
4610 /* Function: nat_icmpquerytype4 */
4611 /* Returns: int - 1 == success, 0 == failure */
4612 /* Parameters: icmptype(I) - ICMP type number */
4614 /* Tests to see if the ICMP type number passed is a query/response type or */
4616 /* ------------------------------------------------------------------------ */
4617 static int nat_icmpquerytype4(icmptype)
4622 * For the ICMP query NAT code, it is essential that both the query
4623 * and the reply match on the NAT rule. Because the NAT structure
4624 * does not keep track of the icmptype, and a single NAT structure
4625 * is used for all icmp types with the same src, dest and id, we
4626 * simply define the replies as queries as well. The funny thing is,
4627 * altough it seems silly to call a reply a query, this is exactly
4628 * as it is defined in the IPv4 specification
4634 case ICMP_ECHOREPLY:
4636 /* route aedvertisement/solliciation is currently unsupported: */
4637 /* it would require rewriting the ICMP data section */
4639 case ICMP_TSTAMPREPLY:
4641 case ICMP_IREQREPLY:
4643 case ICMP_MASKREPLY:
4651 /* ------------------------------------------------------------------------ */
4652 /* Function: nat_log */
4654 /* Parameters: nat(I) - pointer to NAT structure */
4655 /* type(I) - type of log entry to create */
4657 /* Creates a NAT log entry. */
4658 /* ------------------------------------------------------------------------ */
4659 void nat_log(nat, type)
4673 natl.nl_inip = nat->nat_inip;
4674 natl.nl_outip = nat->nat_outip;
4675 natl.nl_origip = nat->nat_oip;
4676 natl.nl_bytes[0] = nat->nat_bytes[0];
4677 natl.nl_bytes[1] = nat->nat_bytes[1];
4678 natl.nl_pkts[0] = nat->nat_pkts[0];
4679 natl.nl_pkts[1] = nat->nat_pkts[1];
4680 natl.nl_origport = nat->nat_oport;
4681 natl.nl_inport = nat->nat_inport;
4682 natl.nl_outport = nat->nat_outport;
4683 natl.nl_p = nat->nat_p;
4684 natl.nl_type = type;
4687 if (nat->nat_ptr != NULL) {
4688 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4689 if (np == nat->nat_ptr) {
4690 natl.nl_rule = rulen;
4696 sizes[0] = sizeof(natl);
4699 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4704 #if defined(__OpenBSD__)
4705 /* ------------------------------------------------------------------------ */
4706 /* Function: nat_ifdetach */
4708 /* Parameters: ifp(I) - pointer to network interface */
4710 /* Compatibility interface for OpenBSD to trigger the correct updating of */
4711 /* interface references within IPFilter. */
4712 /* ------------------------------------------------------------------------ */
4713 void nat_ifdetach(ifp)
4722 /* ------------------------------------------------------------------------ */
4723 /* Function: fr_ipnatderef */
4725 /* Parameters: isp(I) - pointer to pointer to NAT rule */
4726 /* Write Locks: ipf_nat */
4728 /* ------------------------------------------------------------------------ */
4729 void fr_ipnatderef(inp)
4738 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4740 appr_free(in->in_apr);
4741 MUTEX_DESTROY(&in->in_lock);
4743 nat_stats.ns_rules--;
4744 #if SOLARIS && !defined(_INET_IP_STACK_H)
4745 if (nat_stats.ns_rules == 0)
4746 pfil_delayed_copy = 1;
4752 /* ------------------------------------------------------------------------ */
4753 /* Function: fr_natderef */
4755 /* Parameters: isp(I) - pointer to pointer to NAT table entry */
4757 /* Decrement the reference counter for this NAT table entry and free it if */
4758 /* there are no more things using it. */
4760 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4761 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4762 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */
4763 /* because nat_delete() will do that and send nat_ref to -1. */
4765 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4766 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4767 /* ------------------------------------------------------------------------ */
4768 void fr_natderef(natp)
4776 MUTEX_ENTER(&nat->nat_lock);
4777 if (nat->nat_ref > 1) {
4779 MUTEX_EXIT(&nat->nat_lock);
4782 MUTEX_EXIT(&nat->nat_lock);
4784 WRITE_ENTER(&ipf_nat);
4785 nat_delete(nat, NL_EXPIRE);
4786 RWLOCK_EXIT(&ipf_nat);
4790 /* ------------------------------------------------------------------------ */
4791 /* Function: fr_natclone */
4792 /* Returns: ipstate_t* - NULL == cloning failed, */
4793 /* else pointer to new state structure */
4794 /* Parameters: fin(I) - pointer to packet information */
4795 /* is(I) - pointer to master state structure */
4796 /* Write Lock: ipf_nat */
4798 /* Create a "duplcate" state table entry from the master. */
4799 /* ------------------------------------------------------------------------ */
4800 static nat_t *fr_natclone(fin, nat)
4808 KMALLOC(clone, nat_t *);
4811 bcopy((char *)nat, (char *)clone, sizeof(*clone));
4813 MUTEX_NUKE(&clone->nat_lock);
4815 clone->nat_aps = NULL;
4817 * Initialize all these so that nat_delete() doesn't cause a crash.
4819 clone->nat_tqe.tqe_pnext = NULL;
4820 clone->nat_tqe.tqe_next = NULL;
4821 clone->nat_tqe.tqe_ifq = NULL;
4822 clone->nat_tqe.tqe_parent = clone;
4824 clone->nat_flags &= ~SI_CLONE;
4825 clone->nat_flags |= SI_CLONED;
4828 clone->nat_hm->hm_ref++;
4830 if (nat_insert(clone, fin->fin_rev) == -1) {
4834 np = clone->nat_ptr;
4837 nat_log(clone, (u_int)np->in_redir);
4842 MUTEX_ENTER(&fr->fr_lock);
4844 MUTEX_EXIT(&fr->fr_lock);
4848 * Because the clone is created outside the normal loop of things and
4849 * TCP has special needs in terms of state, initialise the timeout
4850 * state of the new NAT from here.
4852 if (clone->nat_p == IPPROTO_TCP) {
4853 (void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
4856 #ifdef IPFILTER_SYNC
4857 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4860 nat_log(clone, NL_CLONE);
4865 /* ------------------------------------------------------------------------ */
4866 /* Function: nat_wildok */
4867 /* Returns: int - 1 == packet's ports match wildcards */
4868 /* 0 == packet's ports don't match wildcards */
4869 /* Parameters: nat(I) - NAT entry */
4870 /* sport(I) - source port */
4871 /* dport(I) - destination port */
4872 /* flags(I) - wildcard flags */
4873 /* dir(I) - packet direction */
4875 /* Use NAT entry and packet direction to determine which combination of */
4876 /* wildcard flags should be used. */
4877 /* ------------------------------------------------------------------------ */
4878 static int nat_wildok(nat, sport, dport, flags, dir)
4886 * When called by dir is set to
4887 * nat_inlookup NAT_INBOUND (0)
4888 * nat_outlookup NAT_OUTBOUND (1)
4890 * We simply combine the packet's direction in dir with the original
4891 * "intended" direction of that NAT entry in nat->nat_dir to decide
4892 * which combination of wildcard flags to allow.
4895 switch ((dir << 1) | nat->nat_dir)
4897 case 3: /* outbound packet / outbound entry */
4898 if (((nat->nat_inport == sport) ||
4899 (flags & SI_W_SPORT)) &&
4900 ((nat->nat_oport == dport) ||
4901 (flags & SI_W_DPORT)))
4904 case 2: /* outbound packet / inbound entry */
4905 if (((nat->nat_outport == sport) ||
4906 (flags & SI_W_DPORT)) &&
4907 ((nat->nat_oport == dport) ||
4908 (flags & SI_W_SPORT)))
4911 case 1: /* inbound packet / outbound entry */
4912 if (((nat->nat_oport == sport) ||
4913 (flags & SI_W_DPORT)) &&
4914 ((nat->nat_outport == dport) ||
4915 (flags & SI_W_SPORT)))
4918 case 0: /* inbound packet / inbound entry */
4919 if (((nat->nat_oport == sport) ||
4920 (flags & SI_W_SPORT)) &&
4921 ((nat->nat_outport == dport) ||
4922 (flags & SI_W_DPORT)))
4933 /* ------------------------------------------------------------------------ */
4934 /* Function: nat_mssclamp */
4936 /* Parameters: tcp(I) - pointer to TCP header */
4937 /* maxmss(I) - value to clamp the TCP MSS to */
4938 /* fin(I) - pointer to packet information */
4939 /* csump(I) - pointer to TCP checksum */
4941 /* Check for MSS option and clamp it if necessary. If found and changed, */
4942 /* then the TCP header checksum will be updated to reflect the change in */
4944 /* ------------------------------------------------------------------------ */
4945 static void nat_mssclamp(tcp, maxmss, fin, csump)
4951 u_char *cp, *ep, opt;
4955 hlen = TCP_OFF(tcp) << 2;
4956 if (hlen > sizeof(*tcp)) {
4957 cp = (u_char *)tcp + sizeof(*tcp);
4958 ep = (u_char *)tcp + hlen;
4962 if (opt == TCPOPT_EOL)
4964 else if (opt == TCPOPT_NOP) {
4972 if ((cp + advance > ep) || (advance <= 0))
4979 mss = cp[2] * 256 + cp[3];
4981 cp[2] = maxmss / 256;
4982 cp[3] = maxmss & 0xff;
4983 CALC_SUMD(mss, maxmss, sumd);
4984 fix_outcksum(fin, csump, sumd);
4988 /* ignore unknown options */
4998 /* ------------------------------------------------------------------------ */
4999 /* Function: fr_setnatqueue */
5001 /* Parameters: nat(I)- pointer to NAT structure */
5002 /* rev(I) - forward(0) or reverse(1) direction */
5003 /* Locks: ipf_nat (read or write) */
5005 /* Put the NAT entry on its default queue entry, using rev as a helped in */
5006 /* determining which queue it should be placed on. */
5007 /* ------------------------------------------------------------------------ */
5008 void fr_setnatqueue(nat, rev)
5012 ipftq_t *oifq, *nifq;
5014 if (nat->nat_ptr != NULL)
5015 nifq = nat->nat_ptr->in_tqehead[rev];
5029 nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
5037 oifq = nat->nat_tqe.tqe_ifq;
5039 * If it's currently on a timeout queue, move it from one queue to
5040 * another, else put it on the end of the newly determined queue.
5043 fr_movequeue(&nat->nat_tqe, oifq, nifq);
5045 fr_queueappend(&nat->nat_tqe, nifq, nat);
5050 /* ------------------------------------------------------------------------ */
5051 /* Function: nat_getnext */
5052 /* Returns: int - 0 == ok, else error */
5053 /* Parameters: t(I) - pointer to ipftoken structure */
5054 /* itp(I) - pointer to ipfgeniter_t structure */
5056 /* Fetch the next nat/ipnat structure pointer from the linked list and */
5057 /* copy it out to the storage space pointed to by itp_data. The next item */
5058 /* in the list to look at is put back in the ipftoken struture. */
5059 /* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/
5060 /* ipf_freetoken will call a deref function for us and we dont want to call */
5061 /* that twice (second time would be in the second switch statement below. */
5062 /* ------------------------------------------------------------------------ */
5063 static int nat_getnext(t, itp)
5067 hostmap_t *hm, *nexthm = NULL, zerohm;
5068 ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5069 nat_t *nat, *nextnat = NULL, zeronat;
5070 int error = 0, count;
5073 count = itp->igi_nitems;
5077 READ_ENTER(&ipf_nat);
5079 switch (itp->igi_type)
5081 case IPFGENITER_HOSTMAP :
5084 nexthm = ipf_hm_maplist;
5086 nexthm = hm->hm_next;
5090 case IPFGENITER_IPNAT :
5093 nextipnat = nat_list;
5095 nextipnat = ipn->in_next;
5099 case IPFGENITER_NAT :
5102 nextnat = nat_instances;
5104 nextnat = nat->nat_next;
5108 RWLOCK_EXIT(&ipf_nat);
5112 dst = itp->igi_data;
5114 switch (itp->igi_type)
5116 case IPFGENITER_HOSTMAP :
5117 if (nexthm != NULL) {
5119 ATOMIC_INC32(nexthm->hm_ref);
5120 t->ipt_data = nexthm;
5123 bzero(&zerohm, sizeof(zerohm));
5130 case IPFGENITER_IPNAT :
5131 if (nextipnat != NULL) {
5133 MUTEX_ENTER(&nextipnat->in_lock);
5134 nextipnat->in_use++;
5135 MUTEX_EXIT(&nextipnat->in_lock);
5136 t->ipt_data = nextipnat;
5139 bzero(&zeroipn, sizeof(zeroipn));
5140 nextipnat = &zeroipn;
5146 case IPFGENITER_NAT :
5147 if (nextnat != NULL) {
5149 MUTEX_ENTER(&nextnat->nat_lock);
5151 MUTEX_EXIT(&nextnat->nat_lock);
5152 t->ipt_data = nextnat;
5155 bzero(&zeronat, sizeof(zeronat));
5164 RWLOCK_EXIT(&ipf_nat);
5167 * Copying out to user space needs to be done without the lock.
5169 switch (itp->igi_type)
5171 case IPFGENITER_HOSTMAP :
5172 error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5176 dst += sizeof(*nexthm);
5179 case IPFGENITER_IPNAT :
5180 error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5184 dst += sizeof(*nextipnat);
5187 case IPFGENITER_NAT :
5188 error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5192 dst += sizeof(*nextnat);
5196 if ((count == 1) || (error != 0))
5201 READ_ENTER(&ipf_nat);
5204 * We need to have the lock again here to make sure that
5205 * using _next is consistent.
5207 switch (itp->igi_type)
5209 case IPFGENITER_HOSTMAP :
5210 nexthm = nexthm->hm_next;
5212 case IPFGENITER_IPNAT :
5213 nextipnat = nextipnat->in_next;
5215 case IPFGENITER_NAT :
5216 nextnat = nextnat->nat_next;
5222 switch (itp->igi_type)
5224 case IPFGENITER_HOSTMAP :
5226 WRITE_ENTER(&ipf_nat);
5228 RWLOCK_EXIT(&ipf_nat);
5231 case IPFGENITER_IPNAT :
5233 fr_ipnatderef(&ipn);
5236 case IPFGENITER_NAT :
5249 /* ------------------------------------------------------------------------ */
5250 /* Function: nat_iterator */
5251 /* Returns: int - 0 == ok, else error */
5252 /* Parameters: token(I) - pointer to ipftoken structure */
5253 /* itp(I) - pointer to ipfgeniter_t structure */
5255 /* This function acts as a handler for the SIOCGENITER ioctls that use a */
5256 /* generic structure to iterate through a list. There are three different */
5257 /* linked lists of NAT related information to go through: NAT rules, active */
5258 /* NAT mappings and the NAT fragment cache. */
5259 /* ------------------------------------------------------------------------ */
5260 static int nat_iterator(token, itp)
5266 if (itp->igi_data == NULL)
5269 token->ipt_subtype = itp->igi_type;
5271 switch (itp->igi_type)
5273 case IPFGENITER_HOSTMAP :
5274 case IPFGENITER_IPNAT :
5275 case IPFGENITER_NAT :
5276 error = nat_getnext(token, itp);
5279 case IPFGENITER_NATFRAG :
5281 error = fr_nextfrag(token, itp, &ipfr_natlist,
5282 &ipfr_nattail, &ipf_natfrag);
5284 error = fr_nextfrag(token, itp, &ipfr_natlist, &ipfr_nattail);
5296 /* ------------------------------------------------------------------------ */
5297 /* Function: nat_extraflush */
5298 /* Returns: int - 0 == success, -1 == failure */
5299 /* Parameters: which(I) - how to flush the active NAT table */
5300 /* Write Locks: ipf_nat */
5302 /* Flush nat tables. Three actions currently defined: */
5303 /* which == 0 : flush all nat table entries */
5304 /* which == 1 : flush TCP connections which have started to close but are */
5305 /* stuck for some reason. */
5306 /* which == 2 : flush TCP connections which have been idle for a long time, */
5307 /* starting at > 4 days idle and working back in successive half-*/
5308 /* days to at most 12 hours old. If this fails to free enough */
5309 /* slots then work backwards in half hour slots to 30 minutes. */
5310 /* If that too fails, then work backwards in 30 second intervals */
5311 /* for the last 30 minutes to at worst 30 seconds idle. */
5312 /* ------------------------------------------------------------------------ */
5313 static int nat_extraflush(which)
5316 ipftq_t *ifq, *ifqnext;
5330 * Style 0 flush removes everything...
5332 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5333 nat_delete(nat, NL_FLUSH);
5340 * Since we're only interested in things that are closing,
5341 * we can start with the appropriate timeout queue.
5343 for (ifq = nat_tqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL;
5344 ifq = ifq->ifq_next) {
5346 for (tqn = ifq->ifq_head; tqn != NULL; ) {
5347 nat = tqn->tqe_parent;
5348 tqn = tqn->tqe_next;
5349 if (nat->nat_p != IPPROTO_TCP)
5351 nat_delete(nat, NL_EXPIRE);
5357 * Also need to look through the user defined queues.
5359 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
5360 ifqnext = ifq->ifq_next;
5361 for (tqn = ifq->ifq_head; tqn != NULL; ) {
5362 nat = tqn->tqe_parent;
5363 tqn = tqn->tqe_next;
5364 if (nat->nat_p != IPPROTO_TCP)
5367 if ((nat->nat_tcpstate[0] >
5368 IPF_TCPS_ESTABLISHED) &&
5369 (nat->nat_tcpstate[1] >
5370 IPF_TCPS_ESTABLISHED)) {
5371 nat_delete(nat, NL_EXPIRE);
5379 * Args 5-11 correspond to flushing those particular states
5380 * for TCP connections.
5382 case IPF_TCPS_CLOSE_WAIT :
5383 case IPF_TCPS_FIN_WAIT_1 :
5384 case IPF_TCPS_CLOSING :
5385 case IPF_TCPS_LAST_ACK :
5386 case IPF_TCPS_FIN_WAIT_2 :
5387 case IPF_TCPS_TIME_WAIT :
5388 case IPF_TCPS_CLOSED :
5389 tqn = nat_tqb[which].ifq_head;
5390 while (tqn != NULL) {
5391 nat = tqn->tqe_parent;
5392 tqn = tqn->tqe_next;
5393 nat_delete(nat, NL_FLUSH);
5403 * Take a large arbitrary number to mean the number of seconds
5404 * for which which consider to be the maximum value we'll allow
5405 * the expiration to be.
5407 which = IPF_TTLVAL(which);
5408 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5409 if (fr_ticks - nat->nat_touched > which) {
5410 nat_delete(nat, NL_FLUSH);
5413 natp = &nat->nat_next;
5424 * Asked to remove inactive entries because the table is full.
5426 if (fr_ticks - nat_last_force_flush > IPF_TTLVAL(5)) {
5427 nat_last_force_flush = fr_ticks;
5428 removed = ipf_queueflush(nat_flush_entry, nat_tqb, nat_utqe);
5436 /* ------------------------------------------------------------------------ */
5437 /* Function: nat_flush_entry */
5438 /* Returns: 0 - always succeeds */
5439 /* Parameters: entry(I) - pointer to NAT entry */
5440 /* Write Locks: ipf_nat */
5442 /* This function is a stepping stone between ipf_queueflush() and */
5443 /* nat_dlete(). It is used so we can provide a uniform interface via the */
5444 /* ipf_queueflush() function. Since the nat_delete() function returns void */
5445 /* we translate that to mean it always succeeds in deleting something. */
5446 /* ------------------------------------------------------------------------ */
5447 static int nat_flush_entry(entry)
5450 nat_delete(entry, NL_FLUSH);
5455 /* ------------------------------------------------------------------------ */
5456 /* Function: nat_gettable */
5457 /* Returns: int - 0 = success, else error */
5458 /* Parameters: data(I) - pointer to ioctl data */
5460 /* This function handles ioctl requests for tables of nat information. */
5461 /* At present the only table it deals with is the hash bucket statistics. */
5462 /* ------------------------------------------------------------------------ */
5463 static int nat_gettable(data)
5469 error = fr_inobj(data, &table, IPFOBJ_GTABLE);
5473 switch (table.ita_type)
5475 case IPFTABLE_BUCKETS_NATIN :
5476 error = COPYOUT(nat_stats.ns_bucketlen[0], table.ita_table,
5477 ipf_nattable_sz * sizeof(u_long));
5480 case IPFTABLE_BUCKETS_NATOUT :
5481 error = COPYOUT(nat_stats.ns_bucketlen[1], table.ita_table,
5482 ipf_nattable_sz * sizeof(u_long));