]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/contrib/ipfilter/netinet/ip_nat.c
This commit was generated by cvs2svn to compensate for changes in r94213,
[FreeBSD/FreeBSD.git] / sys / contrib / ipfilter / netinet / ip_nat.c
1 /*
2  * Copyright (C) 1995-2001 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
7  */
8
9 #if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL)
10 #define _KERNEL
11 #endif
12
13 #ifdef __sgi
14 # include <sys/ptimers.h>
15 #endif
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
19 #include <sys/time.h>
20 #include <sys/file.h>
21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22     defined(_KERNEL)
23 # include "opt_ipfilter_log.h"
24 #endif
25 #if !defined(_KERNEL) && !defined(KERNEL)
26 # include <stdio.h>
27 # include <string.h>
28 # include <stdlib.h>
29 #endif
30 #if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
31 # include <sys/filio.h>
32 # include <sys/fcntl.h>
33 #else
34 # include <sys/ioctl.h>
35 #endif
36 #include <sys/fcntl.h>
37 #ifndef linux
38 # include <sys/protosw.h>
39 #endif
40 #include <sys/socket.h>
41 #if defined(_KERNEL) && !defined(linux)
42 # include <sys/systm.h>
43 #endif
44 #if !defined(__SVR4) && !defined(__svr4__)
45 # ifndef linux
46 #  include <sys/mbuf.h>
47 # endif
48 #else
49 # include <sys/filio.h>
50 # include <sys/byteorder.h>
51 # ifdef _KERNEL
52 #  include <sys/dditypes.h>
53 # endif
54 # include <sys/stream.h>
55 # include <sys/kmem.h>
56 #endif
57 #if __FreeBSD_version >= 300000
58 # include <sys/queue.h>
59 #endif
60 #include <net/if.h>
61 #if __FreeBSD_version >= 300000
62 # include <net/if_var.h>
63 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
64 #  include "opt_ipfilter.h"
65 # endif
66 #endif
67 #ifdef sun
68 # include <net/af.h>
69 #endif
70 #include <net/route.h>
71 #include <netinet/in.h>
72 #include <netinet/in_systm.h>
73 #include <netinet/ip.h>
74
75 #ifdef __sgi
76 # ifdef IFF_DRVRLOCK /* IRIX6 */
77 #include <sys/hashing.h>
78 #include <netinet/in_var.h>
79 # endif
80 #endif
81
82 #ifdef RFC1825
83 # include <vpn/md5.h>
84 # include <vpn/ipsec.h>
85 extern struct ifnet vpnif;
86 #endif
87
88 #ifndef linux
89 # include <netinet/ip_var.h>
90 # include <netinet/tcp_fsm.h>
91 #endif
92 #include <netinet/tcp.h>
93 #include <netinet/udp.h>
94 #include <netinet/ip_icmp.h>
95 #include "netinet/ip_compat.h"
96 #include <netinet/tcpip.h>
97 #include "netinet/ip_fil.h"
98 #include "netinet/ip_nat.h"
99 #include "netinet/ip_frag.h"
100 #include "netinet/ip_state.h"
101 #include "netinet/ip_proxy.h"
102 #if (__FreeBSD_version >= 300000)
103 # include <sys/malloc.h>
104 #endif
105 #ifndef MIN
106 # define        MIN(a,b)        (((a)<(b))?(a):(b))
107 #endif
108 #undef  SOCKADDR_IN
109 #define SOCKADDR_IN     struct sockaddr_in
110
111 #if !defined(lint)
112 static const char sccsid[] = "@(#)ip_nat.c      1.11 6/5/96 (C) 1995 Darren Reed";
113 /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.37.2.44 2001/07/21 07:17:22 darrenr Exp $"; */
114 static const char rcsid[] = "@(#)$FreeBSD$";
115 #endif
116
117 nat_t   **nat_table[2] = { NULL, NULL },
118         *nat_instances = NULL;
119 ipnat_t *nat_list = NULL;
120 u_int   ipf_nattable_sz = NAT_TABLE_SZ;
121 u_int   ipf_natrules_sz = NAT_SIZE;
122 u_int   ipf_rdrrules_sz = RDR_SIZE;
123 u_int   ipf_hostmap_sz = HOSTMAP_SIZE;
124 u_32_t  nat_masks = 0;
125 u_32_t  rdr_masks = 0;
126 ipnat_t **nat_rules = NULL;
127 ipnat_t **rdr_rules = NULL;
128 hostmap_t       **maptable  = NULL;
129
130 u_long  fr_defnatage = DEF_NAT_AGE,
131         fr_defnaticmpage = 6;           /* 3 seconds */
132 natstat_t nat_stats;
133 int     fr_nat_lock = 0;
134 #if     (SOLARIS || defined(__sgi)) && defined(_KERNEL)
135 extern  kmutex_t        ipf_rw;
136 extern  KRWLOCK_T       ipf_nat;
137 #endif
138
139 static  int     nat_flushtable __P((void));
140 static  void    nat_addnat __P((struct ipnat *));
141 static  void    nat_addrdr __P((struct ipnat *));
142 static  void    nat_delete __P((struct nat *));
143 static  void    nat_delrdr __P((struct ipnat *));
144 static  void    nat_delnat __P((struct ipnat *));
145 static  int     fr_natgetent __P((caddr_t));
146 static  int     fr_natgetsz __P((caddr_t));
147 static  int     fr_natputent __P((caddr_t));
148 static  void    nat_tabmove __P((fr_info_t *, nat_t *));
149 static  int     nat_match __P((fr_info_t *, ipnat_t *, ip_t *));
150 static  hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
151                                     struct in_addr));
152 static  void    nat_hostmapdel __P((struct hostmap *));
153
154
155 int nat_init()
156 {
157         KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
158         if (nat_table[0] != NULL)
159                 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
160         else
161                 return -1;
162
163         KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
164         if (nat_table[1] != NULL)
165                 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
166         else
167                 return -1;
168
169         KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
170         if (nat_rules != NULL)
171                 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
172         else
173                 return -1;
174
175         KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
176         if (rdr_rules != NULL)
177                 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
178         else
179                 return -1;
180
181         KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
182         if (maptable != NULL)
183                 bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
184         else
185                 return -1;
186         return 0;
187 }
188
189
190 static void nat_addrdr(n)
191 ipnat_t *n;
192 {
193         ipnat_t **np;
194         u_32_t j;
195         u_int hv;
196         int k;
197
198         k = countbits(n->in_outmsk);
199         if ((k >= 0) && (k != 32))
200                 rdr_masks |= 1 << k;
201         j = (n->in_outip & n->in_outmsk);
202         hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
203         np = rdr_rules + hv;
204         while (*np != NULL)
205                 np = &(*np)->in_rnext;
206         n->in_rnext = NULL;
207         n->in_prnext = np;
208         *np = n;
209 }
210
211
212 static void nat_addnat(n)
213 ipnat_t *n;
214 {
215         ipnat_t **np;
216         u_32_t j;
217         u_int hv;
218         int k;
219
220         k = countbits(n->in_inmsk);
221         if ((k >= 0) && (k != 32))
222                 nat_masks |= 1 << k;
223         j = (n->in_inip & n->in_inmsk);
224         hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
225         np = nat_rules + hv;
226         while (*np != NULL)
227                 np = &(*np)->in_mnext;
228         n->in_mnext = NULL;
229         n->in_pmnext = np;
230         *np = n;
231 }
232
233
234 static void nat_delrdr(n)
235 ipnat_t *n;
236 {
237         if (n->in_rnext)
238                 n->in_rnext->in_prnext = n->in_prnext;
239         *n->in_prnext = n->in_rnext;
240 }
241
242
243 static void nat_delnat(n)
244 ipnat_t *n;
245 {
246         if (n->in_mnext)
247                 n->in_mnext->in_pmnext = n->in_pmnext;
248         *n->in_pmnext = n->in_mnext;
249 }
250
251
252 /*
253  * check if an ip address has already been allocated for a given mapping that
254  * is not doing port based translation.
255  *
256  * Must be called with ipf_nat held as a write lock.
257  */
258 static struct hostmap *nat_hostmap(np, real, map)
259 ipnat_t *np;
260 struct in_addr real;
261 struct in_addr map;
262 {
263         hostmap_t *hm;
264         u_int hv;
265
266         hv = real.s_addr % HOSTMAP_SIZE;
267         for (hm = maptable[hv]; hm; hm = hm->hm_next)
268                 if ((hm->hm_realip.s_addr == real.s_addr) &&
269                     (np == hm->hm_ipnat)) {
270                         hm->hm_ref++;
271                         return hm;
272                 }
273
274         KMALLOC(hm, hostmap_t *);
275         if (hm) {
276                 hm->hm_next = maptable[hv];
277                 hm->hm_pnext = maptable + hv;
278                 if (maptable[hv])
279                         maptable[hv]->hm_pnext = &hm->hm_next;
280                 maptable[hv] = hm;
281                 hm->hm_ipnat = np;
282                 hm->hm_realip = real;
283                 hm->hm_mapip = map;
284                 hm->hm_ref = 1;
285         }
286         return hm;
287 }
288
289
290 /*
291  * Must be called with ipf_nat held as a write lock.
292  */
293 static void nat_hostmapdel(hm)
294 struct hostmap *hm;
295 {
296         ATOMIC_DEC32(hm->hm_ref);
297         if (hm->hm_ref == 0) {
298                 if (hm->hm_next)
299                         hm->hm_next->hm_pnext = hm->hm_pnext;
300                 *hm->hm_pnext = hm->hm_next;
301                 KFREE(hm);
302         }
303 }
304
305
306 void fix_outcksum(fin, sp, n)
307 fr_info_t *fin;
308 u_short *sp;
309 u_32_t n;
310 {
311         register u_short sumshort;
312         register u_32_t sum1;
313
314         if (!n)
315                 return;
316         else if (n & NAT_HW_CKSUM) {
317                 n &= 0xffff;
318                 n += fin->fin_dlen;
319                 n = (n & 0xffff) + (n >> 16);
320                 *sp = n & 0xffff;
321                 return;
322         }
323         sum1 = (~ntohs(*sp)) & 0xffff;
324         sum1 += (n);
325         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
326         /* Again */
327         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
328         sumshort = ~(u_short)sum1;
329         *(sp) = htons(sumshort);
330 }
331
332
333 void fix_incksum(fin, sp, n)
334 fr_info_t *fin;
335 u_short *sp;
336 u_32_t n;
337 {
338         register u_short sumshort;
339         register u_32_t sum1;
340
341         if (!n)
342                 return;
343         else if (n & NAT_HW_CKSUM) {
344                 n &= 0xffff;
345                 n += fin->fin_dlen;
346                 n = (n & 0xffff) + (n >> 16);
347                 *sp = n & 0xffff;
348                 return;
349         }
350 #ifdef sparc
351         sum1 = (~(*sp)) & 0xffff;
352 #else
353         sum1 = (~ntohs(*sp)) & 0xffff;
354 #endif
355         sum1 += ~(n) & 0xffff;
356         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
357         /* Again */
358         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
359         sumshort = ~(u_short)sum1;
360         *(sp) = htons(sumshort);
361 }
362
363
364 /*
365  * fix_datacksum is used *only* for the adjustments of checksums in the data
366  * section of an IP packet.
367  *
368  * The only situation in which you need to do this is when NAT'ing an 
369  * ICMP error message. Such a message, contains in its body the IP header
370  * of the original IP packet, that causes the error.
371  *
372  * You can't use fix_incksum or fix_outcksum in that case, because for the
373  * kernel the data section of the ICMP error is just data, and no special 
374  * processing like hardware cksum or ntohs processing have been done by the 
375  * kernel on the data section.
376  */
377 void fix_datacksum(sp, n)
378 u_short *sp;
379 u_32_t n;
380 {
381         register u_short sumshort;
382         register u_32_t sum1;
383
384         if (!n)
385                 return;
386
387         sum1 = (~ntohs(*sp)) & 0xffff;
388         sum1 += (n);
389         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
390         /* Again */
391         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
392         sumshort = ~(u_short)sum1;
393         *(sp) = htons(sumshort);
394 }
395
396 /*
397  * How the NAT is organised and works.
398  *
399  * Inside (interface y) NAT       Outside (interface x)
400  * -------------------- -+- -------------------------------------
401  * Packet going          |   out, processsed by ip_natout() for x
402  * ------------>         |   ------------>
403  * src=10.1.1.1          |   src=192.1.1.1
404  *                       |
405  *                       |   in, processed by ip_natin() for x
406  * <------------         |   <------------
407  * dst=10.1.1.1          |   dst=192.1.1.1
408  * -------------------- -+- -------------------------------------
409  * ip_natout() - changes ip_src and if required, sport
410  *             - creates a new mapping, if required.
411  * ip_natin()  - changes ip_dst and if required, dport
412  *
413  * In the NAT table, internal source is recorded as "in" and externally
414  * seen as "out".
415  */
416
417 /*
418  * Handle ioctls which manipulate the NAT.
419  */
420 int nat_ioctl(data, cmd, mode)
421 #if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
422 u_long cmd;
423 #else
424 int cmd;
425 #endif
426 caddr_t data;
427 int mode;
428 {
429         register ipnat_t *nat, *nt, *n = NULL, **np = NULL;
430         int error = 0, ret, arg;
431         ipnat_t natd;
432         u_32_t i, j;
433
434 #if (BSD >= 199306) && defined(_KERNEL)
435         if ((securelevel >= 3) && (mode & FWRITE))
436                 return EPERM;
437 #endif
438
439         nat = NULL;     /* XXX gcc -Wuninitialized */
440         KMALLOC(nt, ipnat_t *);
441         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT))
442                 error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
443         else if (cmd == SIOCIPFFL) {    /* SIOCFLNAT & SIOCCNATL */
444                 error = IRCOPY(data, (char *)&arg, sizeof(arg));
445                 if (error)
446                         error = EFAULT;
447         }
448
449         if (error)
450                 goto done;
451
452         /*
453          * For add/delete, look to see if the NAT entry is already present
454          */
455         WRITE_ENTER(&ipf_nat);
456         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
457                 nat = &natd;
458                 nat->in_flags &= IPN_USERFLAGS;
459                 if ((nat->in_redir & NAT_MAPBLK) == 0) {
460                         if ((nat->in_flags & IPN_SPLIT) == 0)
461                                 nat->in_inip &= nat->in_inmsk;
462                         if ((nat->in_flags & IPN_IPRANGE) == 0)
463                                 nat->in_outip &= nat->in_outmsk;
464                 }
465                 for (np = &nat_list; (n = *np); np = &n->in_next)
466                         if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
467                                         IPN_CMPSIZ))
468                                 break;
469         }
470
471         switch (cmd)
472         {
473 #ifdef  IPFILTER_LOG
474         case SIOCIPFFB :
475         {
476                 int tmp;
477
478                 if (!(mode & FWRITE))
479                         error = EPERM;
480                 else {
481                         tmp = ipflog_clear(IPL_LOGNAT);
482                         IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
483                 }
484                 break;
485         }
486 #endif
487         case SIOCADNAT :
488                 if (!(mode & FWRITE)) {
489                         error = EPERM;
490                         break;
491                 }
492                 if (n) {
493                         error = EEXIST;
494                         break;
495                 }
496                 if (nt == NULL) {
497                         error = ENOMEM;
498                         break;
499                 }
500                 n = nt;
501                 nt = NULL;
502                 bcopy((char *)nat, (char *)n, sizeof(*n));
503                 n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
504                 if (!n->in_ifp)
505                         n->in_ifp = (void *)-1;
506                 if (n->in_plabel[0] != '\0') {
507                         n->in_apr = appr_lookup(n->in_p, n->in_plabel);
508                         if (!n->in_apr) {
509                                 error = ENOENT;
510                                 break;
511                         }
512                 }
513                 n->in_next = NULL;
514                 *np = n;
515
516                 if (n->in_redir & NAT_REDIRECT) {
517                         n->in_flags &= ~IPN_NOTDST;
518                         nat_addrdr(n);
519                 }
520                 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
521                         n->in_flags &= ~IPN_NOTSRC;
522                         nat_addnat(n);
523                 }
524
525                 n->in_use = 0;
526                 if (n->in_redir & NAT_MAPBLK)
527                         n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
528                 else if (n->in_flags & IPN_AUTOPORTMAP)
529                         n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
530                 else if (n->in_flags & IPN_IPRANGE)
531                         n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
532                 else if (n->in_flags & IPN_SPLIT)
533                         n->in_space = 2;
534                 else
535                         n->in_space = ~ntohl(n->in_outmsk);
536                 /*
537                  * Calculate the number of valid IP addresses in the output
538                  * mapping range.  In all cases, the range is inclusive of
539                  * the start and ending IP addresses.
540                  * If to a CIDR address, lose 2: broadcast + network address
541                  *                               (so subtract 1)
542                  * If to a range, add one.
543                  * If to a single IP address, set to 1.
544                  */
545                 if (n->in_space) {
546                         if ((n->in_flags & IPN_IPRANGE) != 0)
547                                 n->in_space += 1;
548                         else
549                                 n->in_space -= 1;
550                 } else
551                         n->in_space = 1;
552                 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
553                     ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
554                         n->in_nip = ntohl(n->in_outip) + 1;
555                 else if ((n->in_flags & IPN_SPLIT) &&
556                          (n->in_redir & NAT_REDIRECT))
557                         n->in_nip = ntohl(n->in_inip);
558                 else
559                         n->in_nip = ntohl(n->in_outip);
560                 if (n->in_redir & NAT_MAP) {
561                         n->in_pnext = ntohs(n->in_pmin);
562                         /*
563                          * Multiply by the number of ports made available.
564                          */
565                         if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
566                                 n->in_space *= (ntohs(n->in_pmax) -
567                                                 ntohs(n->in_pmin) + 1);
568                                 /*
569                                  * Because two different sources can map to
570                                  * different destinations but use the same
571                                  * local IP#/port #.
572                                  * If the result is smaller than in_space, then
573                                  * we may have wrapped around 32bits.
574                                  */
575                                 i = n->in_inmsk;
576                                 if ((i != 0) && (i != 0xffffffff)) {
577                                         j = n->in_space * (~ntohl(i) + 1);
578                                         if (j >= n->in_space)
579                                                 n->in_space = j;
580                                         else
581                                                 n->in_space = 0xffffffff;
582                                 }
583                         }
584                         /*
585                          * If no protocol is specified, multiple by 256.
586                          */
587                         if ((n->in_flags & IPN_TCPUDP) == 0) {
588                                         j = n->in_space * 256;
589                                         if (j >= n->in_space)
590                                                 n->in_space = j;
591                                         else
592                                                 n->in_space = 0xffffffff;
593                         }
594                 }
595                 /* Otherwise, these fields are preset */
596                 n = NULL;
597                 nat_stats.ns_rules++;
598                 break;
599         case SIOCRMNAT :
600                 if (!(mode & FWRITE)) {
601                         error = EPERM;
602                         n = NULL;
603                         break;
604                 }
605                 if (!n) {
606                         error = ESRCH;
607                         break;
608                 }
609                 if (n->in_redir & NAT_REDIRECT)
610                         nat_delrdr(n);
611                 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
612                         nat_delnat(n);
613                 if (nat_list == NULL) {
614                         nat_masks = 0;
615                         rdr_masks = 0;
616                 }
617                 *np = n->in_next;
618                 if (!n->in_use) {
619                         if (n->in_apr)
620                                 appr_free(n->in_apr);
621                         KFREE(n);
622                         nat_stats.ns_rules--;
623                 } else {
624                         n->in_flags |= IPN_DELETE;
625                         n->in_next = NULL;
626                 }
627                 n = NULL;
628                 break;
629         case SIOCGNATS :
630                 MUTEX_DOWNGRADE(&ipf_nat);
631                 nat_stats.ns_table[0] = nat_table[0];
632                 nat_stats.ns_table[1] = nat_table[1];
633                 nat_stats.ns_list = nat_list;
634                 nat_stats.ns_maptable = maptable;
635                 nat_stats.ns_nattab_sz = ipf_nattable_sz;
636                 nat_stats.ns_rultab_sz = ipf_natrules_sz;
637                 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
638                 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
639                 nat_stats.ns_instances = nat_instances;
640                 nat_stats.ns_apslist = ap_sess_list;
641                 error = IWCOPYPTR((char *)&nat_stats, (char *)data,
642                                   sizeof(nat_stats));
643                 break;
644         case SIOCGNATL :
645             {
646                 natlookup_t nl;
647
648                 MUTEX_DOWNGRADE(&ipf_nat);
649                 error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
650                 if (error)
651                         break;
652
653                 if (nat_lookupredir(&nl)) {
654                         error = IWCOPYPTR((char *)&nl, (char *)data,
655                                           sizeof(nl));
656                 } else
657                         error = ESRCH;
658                 break;
659             }
660         case SIOCIPFFL :        /* old SIOCFLNAT & SIOCCNATL */
661                 if (!(mode & FWRITE)) {
662                         error = EPERM;
663                         break;
664                 }
665                 error = 0;
666                 if (arg == 0)
667                         ret = nat_flushtable();
668                 else if (arg == 1)
669                         ret = nat_clearlist();
670                 else
671                         error = EINVAL;
672                 MUTEX_DOWNGRADE(&ipf_nat);
673                 if (!error) {
674                         error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
675                         if (error)
676                                 error = EFAULT;
677                 }
678                 break;
679         case SIOCSTLCK :
680                 error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
681                 if (!error) {
682                         error = IWCOPY((caddr_t)&fr_nat_lock, data,
683                                         sizeof(fr_nat_lock));
684                         if (!error)
685                                 fr_nat_lock = arg;
686                 } else
687                         error = EFAULT;
688                 break;
689         case SIOCSTPUT :
690                 if (fr_nat_lock)
691                         error = fr_natputent(data);
692                 else
693                         error = EACCES;
694                 break;
695         case SIOCSTGSZ :
696                 if (fr_nat_lock)
697                         error = fr_natgetsz(data);
698                 else
699                         error = EACCES;
700                 break;
701         case SIOCSTGET :
702                 if (fr_nat_lock)
703                         error = fr_natgetent(data);
704                 else
705                         error = EACCES;
706                 break;
707         case FIONREAD :
708 #ifdef  IPFILTER_LOG
709                 arg = (int)iplused[IPL_LOGNAT];
710                 MUTEX_DOWNGRADE(&ipf_nat);
711                 error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
712                 if (error)
713                         error = EFAULT;
714 #endif
715                 break;
716         default :
717                 error = EINVAL;
718                 break;
719         }
720         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
721 done:
722         if (nt)
723                 KFREE(nt);
724         return error;
725 }
726
727
728 static int fr_natgetsz(data)
729 caddr_t data;
730 {
731         ap_session_t *aps;
732         nat_t *nat, *n;
733         int error = 0;
734         natget_t ng;
735
736         error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
737         if (error)
738                 return EFAULT;
739
740         nat = ng.ng_ptr;
741         if (!nat) {
742                 nat = nat_instances;
743                 ng.ng_sz = 0;
744                 if (nat == NULL) {
745                         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
746                         if (error)
747                                 error = EFAULT;
748                         return error;
749                 }
750         } else {
751                 /*
752                  * Make sure the pointer we're copying from exists in the
753                  * current list of entries.  Security precaution to prevent
754                  * copying of random kernel data.
755                  */
756                 for (n = nat_instances; n; n = n->nat_next)
757                         if (n == nat)
758                                 break;
759                 if (!n)
760                         return ESRCH;
761         }
762
763         ng.ng_sz = sizeof(nat_save_t);
764         aps = nat->nat_aps;
765         if ((aps != NULL) && (aps->aps_data != 0)) {
766                 ng.ng_sz += sizeof(ap_session_t);
767                 ng.ng_sz += aps->aps_psiz;
768         }
769
770         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
771         if (error)
772                 error = EFAULT;
773         return error;
774 }
775
776
777 static int fr_natgetent(data)
778 caddr_t data;
779 {
780         nat_save_t ipn, *ipnp, *ipnn = NULL;
781         register nat_t *n, *nat;
782         ap_session_t *aps;
783         int error;
784
785         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
786         if (error)
787                 return EFAULT;
788         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
789         if (error)
790                 return EFAULT;
791
792         nat = ipn.ipn_next;
793         if (!nat) {
794                 nat = nat_instances;
795                 if (nat == NULL) {
796                         if (nat_instances == NULL)
797                                 return ENOENT;
798                         return 0;
799                 }
800         } else {
801                 /*
802                  * Make sure the pointer we're copying from exists in the
803                  * current list of entries.  Security precaution to prevent
804                  * copying of random kernel data.
805                  */
806                 for (n = nat_instances; n; n = n->nat_next)
807                         if (n == nat)
808                                 break;
809                 if (!n)
810                         return ESRCH;
811         }
812
813         ipn.ipn_next = nat->nat_next;
814         ipn.ipn_dsize = 0;
815         bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
816         ipn.ipn_nat.nat_data = NULL;
817
818         if (nat->nat_ptr) {
819                 bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
820                       sizeof(ipn.ipn_ipnat));
821         }
822
823         if (nat->nat_fr)
824                 bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
825                       sizeof(ipn.ipn_rule));
826
827         if ((aps = nat->nat_aps)) {
828                 ipn.ipn_dsize = sizeof(*aps);
829                 if (aps->aps_data)
830                         ipn.ipn_dsize += aps->aps_psiz;
831                 KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
832                 if (ipnn == NULL)
833                         return ENOMEM;
834                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
835
836                 bcopy((char *)aps, ipnn->ipn_data, sizeof(*aps));
837                 if (aps->aps_data) {
838                         bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps),
839                               aps->aps_psiz);
840                         ipnn->ipn_dsize += aps->aps_psiz;
841                 }
842                 error = IWCOPY((caddr_t)ipnn, ipnp,
843                                sizeof(ipn) + ipn.ipn_dsize);
844                 if (error)
845                         error = EFAULT;
846                 KFREES(ipnn, sizeof(*ipnn) + ipn.ipn_dsize);
847         } else {
848                 error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
849                 if (error)
850                         error = EFAULT;
851         }
852         return error;
853 }
854
855
856 static int fr_natputent(data)
857 caddr_t data;
858 {
859         nat_save_t ipn, *ipnp, *ipnn = NULL;
860         register nat_t *n, *nat;
861         ap_session_t *aps;
862         frentry_t *fr;
863         ipnat_t *in;
864
865         int error;
866
867         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
868         if (error)
869                 return EFAULT;
870         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
871         if (error)
872                 return EFAULT;
873         nat = NULL;
874         if (ipn.ipn_dsize) {
875                 KMALLOCS(ipnn, nat_save_t *, sizeof(ipn) + ipn.ipn_dsize);
876                 if (ipnn == NULL)
877                         return ENOMEM;
878                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
879                 error = IRCOPY((caddr_t)ipnp, (caddr_t)ipn.ipn_data,
880                                ipn.ipn_dsize);
881                 if (error) {
882                         error = EFAULT;
883                         goto junkput;
884                 }
885         } else
886                 ipnn = NULL;
887
888         KMALLOC(nat, nat_t *);
889         if (nat == NULL) {
890                 error = EFAULT;
891                 goto junkput;
892         }
893
894         bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
895         /*
896          * Initialize all these so that nat_delete() doesn't cause a crash.
897          */
898         nat->nat_phnext[0] = NULL;
899         nat->nat_phnext[1] = NULL;
900         fr = nat->nat_fr;
901         nat->nat_fr = NULL;
902         aps = nat->nat_aps;
903         nat->nat_aps = NULL;
904         in = nat->nat_ptr;
905         nat->nat_ptr = NULL;
906         nat->nat_hm = NULL;
907         nat->nat_data = NULL;
908         nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
909
910         /*
911          * Restore the rule associated with this nat session
912          */
913         if (in) {
914                 KMALLOC(in, ipnat_t *);
915                 if (in == NULL) {
916                         error = ENOMEM;
917                         goto junkput;
918                 }
919                 nat->nat_ptr = in;
920                 bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
921                 in->in_use = 1;
922                 in->in_flags |= IPN_DELETE;
923                 in->in_next = NULL;
924                 in->in_rnext = NULL;
925                 in->in_prnext = NULL;
926                 in->in_mnext = NULL;
927                 in->in_pmnext = NULL;
928                 in->in_ifp = GETUNIT(in->in_ifname, 4);
929                 if (in->in_plabel[0] != '\0') {
930                         in->in_apr = appr_lookup(in->in_p, in->in_plabel);
931                 }
932         }
933
934         /*
935          * Restore ap_session_t structure.  Include the private data allocated
936          * if it was there.
937          */
938         if (aps) {
939                 KMALLOC(aps, ap_session_t *);
940                 if (aps == NULL) {
941                         error = ENOMEM;
942                         goto junkput;
943                 }
944                 nat->nat_aps = aps;
945                 aps->aps_next = ap_sess_list;
946                 ap_sess_list = aps;
947                 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
948                 if (in)
949                         aps->aps_apr = in->in_apr;
950                 if (aps->aps_psiz) {
951                         KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
952                         if (aps->aps_data == NULL) {
953                                 error = ENOMEM;
954                                 goto junkput;
955                         }
956                         bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
957                               aps->aps_psiz);
958                 } else {
959                         aps->aps_psiz = 0;
960                         aps->aps_data = NULL;
961                 }
962         }
963
964         /*
965          * If there was a filtering rule associated with this entry then
966          * build up a new one.
967          */
968         if (fr != NULL) {
969                 if (nat->nat_flags & FI_NEWFR) {
970                         KMALLOC(fr, frentry_t *);
971                         nat->nat_fr = fr;
972                         if (fr == NULL) {
973                                 error = ENOMEM;
974                                 goto junkput;
975                         }
976                         bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
977                         ipn.ipn_nat.nat_fr = fr;
978                         error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
979                         if (error) {
980                                 error = EFAULT;
981                                 goto junkput;
982                         }
983                 } else {
984                         for (n = nat_instances; n; n = n->nat_next)
985                                 if (n->nat_fr == fr)
986                                         break;
987                         if (!n) {
988                                 error = ESRCH;
989                                 goto junkput;
990                         }
991                 }
992         }
993
994         if (ipnn)
995                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
996         nat_insert(nat);
997         return 0;
998 junkput:
999         if (ipnn)
1000                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1001         if (nat)
1002                 nat_delete(nat);
1003         return error;
1004 }
1005
1006
1007 /*
1008  * Delete a nat entry from the various lists and table.
1009  */
1010 static void nat_delete(natd)
1011 struct nat *natd;
1012 {
1013         struct ipnat *ipn;
1014
1015         if (natd->nat_flags & FI_WILDP)
1016                 nat_stats.ns_wilds--;
1017         if (natd->nat_hnext[0])
1018                 natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0];
1019         *natd->nat_phnext[0] = natd->nat_hnext[0];
1020         if (natd->nat_hnext[1])
1021                 natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1];
1022         *natd->nat_phnext[1] = natd->nat_hnext[1];
1023         if (natd->nat_me != NULL)
1024                 *natd->nat_me = NULL;
1025
1026         if (natd->nat_fr != NULL) {
1027                 ATOMIC_DEC32(natd->nat_fr->fr_ref);
1028         }
1029
1030         if (natd->nat_hm != NULL)
1031                 nat_hostmapdel(natd->nat_hm);
1032
1033         /*
1034          * If there is an active reference from the nat entry to its parent
1035          * rule, decrement the rule's reference count and free it too if no
1036          * longer being used.
1037          */
1038         ipn = natd->nat_ptr;
1039         if (ipn != NULL) {
1040                 ipn->in_space++;
1041                 ipn->in_use--;
1042                 if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
1043                         if (ipn->in_apr)
1044                                 appr_free(ipn->in_apr);
1045                         KFREE(ipn);
1046                         nat_stats.ns_rules--;
1047                 }
1048         }
1049
1050         MUTEX_DESTROY(&natd->nat_lock);
1051         /*
1052          * If there's a fragment table entry too for this nat entry, then
1053          * dereference that as well.
1054          */
1055         ipfr_forget((void *)natd);
1056         aps_free(natd->nat_aps);
1057         nat_stats.ns_inuse--;
1058         KFREE(natd);
1059 }
1060
1061
1062 /*
1063  * nat_flushtable - clear the NAT table of all mapping entries.
1064  * (this is for the dynamic mappings)
1065  */
1066 static int nat_flushtable()
1067 {
1068         register nat_t *nat, **natp;
1069         register int j = 0;
1070
1071         /*
1072          * ALL NAT mappings deleted, so lets just make the deletions
1073          * quicker.
1074          */
1075         if (nat_table[0] != NULL)
1076                 bzero((char *)nat_table[0],
1077                       sizeof(nat_table[0]) * ipf_nattable_sz);
1078         if (nat_table[1] != NULL)
1079                 bzero((char *)nat_table[1],
1080                       sizeof(nat_table[1]) * ipf_nattable_sz);
1081
1082         for (natp = &nat_instances; (nat = *natp); ) {
1083                 *natp = nat->nat_next;
1084 #ifdef  IPFILTER_LOG
1085                 nat_log(nat, NL_FLUSH);
1086 #endif
1087                 nat_delete(nat);
1088                 j++;
1089         }
1090         nat_stats.ns_inuse = 0;
1091         return j;
1092 }
1093
1094
1095 /*
1096  * nat_clearlist - delete all rules in the active NAT mapping list.
1097  * (this is for NAT/RDR rules)
1098  */
1099 int nat_clearlist()
1100 {
1101         register ipnat_t *n, **np = &nat_list;
1102         int i = 0;
1103
1104         if (nat_rules != NULL)
1105                 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1106         if (rdr_rules != NULL)
1107                 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1108
1109         while ((n = *np)) {
1110                 *np = n->in_next;
1111                 if (!n->in_use) {
1112                         if (n->in_apr)
1113                                 appr_free(n->in_apr);
1114                         KFREE(n);
1115                         nat_stats.ns_rules--;
1116                 } else {
1117                         n->in_flags |= IPN_DELETE;
1118                         n->in_next = NULL;
1119                 }
1120                 i++;
1121         }
1122         nat_masks = 0;
1123         rdr_masks = 0;
1124         return i;
1125 }
1126
1127
1128 /*
1129  * Create a new NAT table entry.
1130  * NOTE: Assumes write lock on ipf_nat has been obtained already.
1131  *       If you intend on changing this, beware: appr_new() may call nat_new()
1132  *       recursively!
1133  */
1134 nat_t *nat_new(fin, ip, np, natsave, flags, direction)
1135 fr_info_t *fin;
1136 ip_t *ip;
1137 ipnat_t *np;
1138 nat_t **natsave;
1139 u_int flags;
1140 int direction;
1141 {
1142         register u_32_t sum1, sum2, sumd, l;
1143         u_short port = 0, sport = 0, dport = 0, nport = 0;
1144         struct in_addr in, inb;
1145         u_short nflags, sp, dp;
1146         tcphdr_t *tcp = NULL;
1147         hostmap_t *hm = NULL;
1148         nat_t *nat, *natl;
1149 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1150         qif_t *qf = fin->fin_qif;
1151 #endif
1152
1153         nflags = flags & np->in_flags;
1154         if (flags & IPN_TCPUDP) {
1155                 tcp = (tcphdr_t *)fin->fin_dp;
1156                 sport = htons(fin->fin_data[0]);
1157                 dport = htons(fin->fin_data[1]);
1158         }
1159
1160         /* Give me a new nat */
1161         KMALLOC(nat, nat_t *);
1162         if (nat == NULL) {
1163                 nat_stats.ns_memfail++;
1164                 return NULL;
1165         }
1166
1167         bzero((char *)nat, sizeof(*nat));
1168         nat->nat_flags = flags;
1169         if (flags & FI_WILDP)
1170                 nat_stats.ns_wilds++;
1171         /*
1172          * Search the current table for a match.
1173          */
1174         if (direction == NAT_OUTBOUND) {
1175                 /*
1176                  * Values at which the search for a free resouce starts.
1177                  */
1178                 u_32_t st_ip;
1179                 u_short st_port;
1180
1181                 /*
1182                  * If it's an outbound packet which doesn't match any existing
1183                  * record, then create a new port
1184                  */
1185                 l = 0;
1186                 st_ip = np->in_nip;
1187                 st_port = np->in_pnext;
1188
1189                 do {
1190                         port = 0;
1191                         in.s_addr = htonl(np->in_nip);
1192                         if (l == 0) {
1193                                 /*
1194                                  * Check to see if there is an existing NAT
1195                                  * setup for this IP address pair.
1196                                  */
1197                                 hm = nat_hostmap(np, fin->fin_src, in);
1198                                 if (hm != NULL)
1199                                         in.s_addr = hm->hm_mapip.s_addr;
1200                         } else if ((l == 1) && (hm != NULL)) {
1201                                 nat_hostmapdel(hm);
1202                                 hm = NULL;
1203                         }
1204                         in.s_addr = ntohl(in.s_addr);
1205
1206                         nat->nat_hm = hm;
1207
1208                         if ((np->in_outmsk == 0xffffffff) &&
1209                             (np->in_pnext == 0)) {
1210                                 if (l > 0)
1211                                         goto badnat;
1212                         }
1213
1214                         if (np->in_redir & NAT_MAPBLK) {
1215                                 if ((l >= np->in_ppip) || ((l > 0) &&
1216                                      !(flags & IPN_TCPUDP)))
1217                                         goto badnat;
1218                                 /*
1219                                  * map-block - Calculate destination address.
1220                                  */
1221                                 in.s_addr = ntohl(fin->fin_saddr);
1222                                 in.s_addr &= ntohl(~np->in_inmsk);
1223                                 inb.s_addr = in.s_addr;
1224                                 in.s_addr /= np->in_ippip;
1225                                 in.s_addr &= ntohl(~np->in_outmsk);
1226                                 in.s_addr += ntohl(np->in_outip);
1227                                 /*
1228                                  * Calculate destination port.
1229                                  */
1230                                 if ((flags & IPN_TCPUDP) &&
1231                                     (np->in_ppip != 0)) {
1232                                         port = ntohs(sport) + l;
1233                                         port %= np->in_ppip;
1234                                         port += np->in_ppip *
1235                                                 (inb.s_addr % np->in_ippip);
1236                                         port += MAPBLK_MINPORT;
1237                                         port = htons(port);
1238                                 }
1239                         } else if (!np->in_outip &&
1240                                    (np->in_outmsk == 0xffffffff)) {
1241                                 /*
1242                                  * 0/32 - use the interface's IP address.
1243                                  */
1244                                 if ((l > 0) ||
1245                                     fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
1246                                         goto badnat;
1247                                 in.s_addr = ntohl(in.s_addr);
1248                         } else if (!np->in_outip && !np->in_outmsk) {
1249                                 /*
1250                                  * 0/0 - use the original source address/port.
1251                                  */
1252                                 if (l > 0)
1253                                         goto badnat;
1254                                 in.s_addr = ntohl(fin->fin_saddr);
1255                         } else if ((np->in_outmsk != 0xffffffff) &&
1256                                    (np->in_pnext == 0) &&
1257                                    ((l > 0) || (hm == NULL)))
1258                                 np->in_nip++;
1259                         natl = NULL;
1260
1261                         if ((nflags & IPN_TCPUDP) &&
1262                             ((np->in_redir & NAT_MAPBLK) == 0) &&
1263                             (np->in_flags & IPN_AUTOPORTMAP)) {
1264                                 if ((l > 0) && (l % np->in_ppip == 0)) {
1265                                         if (l > np->in_space) {
1266                                                 goto badnat;
1267                                         } else if ((l > np->in_ppip) &&
1268                                                    np->in_outmsk != 0xffffffff)
1269                                                 np->in_nip++;
1270                                 }
1271                                 if (np->in_ppip != 0) {
1272                                         port = ntohs(sport);
1273                                         port += (l % np->in_ppip);
1274                                         port %= np->in_ppip;
1275                                         port += np->in_ppip *
1276                                                 (ntohl(fin->fin_saddr) %
1277                                                  np->in_ippip);
1278                                         port += MAPBLK_MINPORT;
1279                                         port = htons(port);
1280                                 }
1281                         } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1282                                    (nflags & IPN_TCPUDP) &&
1283                                    (np->in_pnext != 0)) {
1284                                 port = htons(np->in_pnext++);
1285                                 if (np->in_pnext > ntohs(np->in_pmax)) {
1286                                         np->in_pnext = ntohs(np->in_pmin);
1287                                         if (np->in_outmsk != 0xffffffff)
1288                                                 np->in_nip++;
1289                                 }
1290                         }
1291
1292                         if (np->in_flags & IPN_IPRANGE) {
1293                                 if (np->in_nip > ntohl(np->in_outmsk))
1294                                         np->in_nip = ntohl(np->in_outip);
1295                         } else {
1296                                 if ((np->in_outmsk != 0xffffffff) &&
1297                                     ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1298                                     ntohl(np->in_outip))
1299                                         np->in_nip = ntohl(np->in_outip) + 1;
1300                         }
1301
1302                         if (!port && (flags & IPN_TCPUDP))
1303                                 port = sport;
1304
1305                         /*
1306                          * Here we do a lookup of the connection as seen from
1307                          * the outside.  If an IP# pair already exists, try
1308                          * again.  So if you have A->B becomes C->B, you can
1309                          * also have D->E become C->E but not D->B causing
1310                          * another C->B.  Also take protocol and ports into
1311                          * account when determining whether a pre-existing
1312                          * NAT setup will cause an external conflict where
1313                          * this is appropriate.
1314                          */
1315                         inb.s_addr = htonl(in.s_addr);
1316                         sp = fin->fin_data[0];
1317                         dp = fin->fin_data[1];
1318                         fin->fin_data[0] = fin->fin_data[1];
1319                         fin->fin_data[1] = htons(port);
1320                         natl = nat_inlookup(fin, flags & ~FI_WILDP,
1321                                             (u_int)fin->fin_p, fin->fin_dst,
1322                                             inb, 1);
1323                         fin->fin_data[0] = sp;
1324                         fin->fin_data[1] = dp;
1325
1326                         /*
1327                          * Has the search wrapped around and come back to the
1328                          * start ?
1329                          */
1330                         if ((natl != NULL) &&
1331                             (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1332                             (np->in_nip != 0) && (st_ip == np->in_nip))
1333                                 goto badnat;
1334                         l++;
1335                 } while (natl != NULL);
1336
1337                 if (np->in_space > 0)
1338                         np->in_space--;
1339
1340                 /* Setup the NAT table */
1341                 nat->nat_inip = fin->fin_src;
1342                 nat->nat_outip.s_addr = htonl(in.s_addr);
1343                 nat->nat_oip = fin->fin_dst;
1344                 if (nat->nat_hm == NULL)
1345                         nat->nat_hm = nat_hostmap(np, fin->fin_src,
1346                                                   nat->nat_outip);
1347
1348                 sum1 = LONG_SUM(ntohl(fin->fin_saddr)) + ntohs(sport);
1349                 sum2 = LONG_SUM(in.s_addr) + ntohs(port);
1350
1351                 if (flags & IPN_TCPUDP) {
1352                         nat->nat_inport = sport;
1353                         nat->nat_outport = port;        /* sport */
1354                         nat->nat_oport = dport;
1355                 }
1356         } else {
1357                 /*
1358                  * Otherwise, it's an inbound packet. Most likely, we don't
1359                  * want to rewrite source ports and source addresses. Instead,
1360                  * we want to rewrite to a fixed internal address and fixed
1361                  * internal port.
1362                  */
1363                 if (np->in_flags & IPN_SPLIT) {
1364                         in.s_addr = np->in_nip;
1365                         if (np->in_inip == htonl(in.s_addr))
1366                                 np->in_nip = ntohl(np->in_inmsk);
1367                         else {
1368                                 np->in_nip = ntohl(np->in_inip);
1369                                 if (np->in_flags & IPN_ROUNDR) {
1370                                         nat_delrdr(np);
1371                                         nat_addrdr(np);
1372                                 }
1373                         }
1374                 } else {
1375                         in.s_addr = ntohl(np->in_inip);
1376                         if (np->in_flags & IPN_ROUNDR) {
1377                                 nat_delrdr(np);
1378                                 nat_addrdr(np);
1379                         }
1380                 }
1381                 if (!np->in_pnext)
1382                         nport = dport;
1383                 else {
1384                         /*
1385                          * Whilst not optimized for the case where
1386                          * pmin == pmax, the gain is not significant.
1387                          */
1388                         if (np->in_pmin != np->in_pmax) {
1389                                 nport = ntohs(dport) - ntohs(np->in_pmin) +
1390                                         ntohs(np->in_pnext);
1391                                 nport = ntohs(nport);
1392                         } else
1393                                 nport = np->in_pnext;
1394                 }
1395
1396                 /*
1397                  * When the redirect-to address is set to 0.0.0.0, just
1398                  * assume a blank `forwarding' of the packet.
1399                  */
1400                 if (in.s_addr == 0)
1401                         in.s_addr = ntohl(fin->fin_daddr);
1402
1403                 nat->nat_inip.s_addr = htonl(in.s_addr);
1404                 nat->nat_outip = fin->fin_dst;
1405                 nat->nat_oip = fin->fin_src;
1406
1407                 sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
1408                 sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
1409
1410                 if (flags & IPN_TCPUDP) {
1411                         nat->nat_inport = nport;
1412                         nat->nat_outport = dport;
1413                         nat->nat_oport = sport;
1414                 }
1415         }
1416
1417         CALC_SUMD(sum1, sum2, sumd);
1418         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1419 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1420         if ((flags & IPN_TCPUDP) && dohwcksum &&
1421             (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
1422                 if (direction == NAT_OUTBOUND)
1423                         sum1 = LONG_SUM(ntohl(in.s_addr));
1424                 else
1425                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1426                 sum1 += LONG_SUM(ntohl(fin->fin_daddr));
1427                 sum1 += IPPROTO_TCP;
1428                 sum1 = (sum1 & 0xffff) + (sum1 >> 16);
1429                 nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
1430         } else
1431 #endif
1432                 nat->nat_sumd[1] = nat->nat_sumd[0];
1433
1434         if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
1435                 if (direction == NAT_OUTBOUND)
1436                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1437                 else
1438                         sum1 = LONG_SUM(ntohl(fin->fin_daddr));
1439
1440                 sum2 = LONG_SUM(in.s_addr);
1441
1442                 CALC_SUMD(sum1, sum2, sumd);
1443                 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1444         } else
1445                 nat->nat_ipsumd = nat->nat_sumd[0];
1446
1447         in.s_addr = htonl(in.s_addr);
1448
1449         strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
1450
1451         nat->nat_me = natsave;
1452         nat->nat_dir = direction;
1453         nat->nat_ifp = fin->fin_ifp;
1454         nat->nat_ptr = np;
1455         nat->nat_p = fin->fin_p;
1456         nat->nat_bytes = 0;
1457         nat->nat_pkts = 0;
1458         nat->nat_fr = fin->fin_fr;
1459         if (nat->nat_fr != NULL) {
1460                 ATOMIC_INC32(nat->nat_fr->fr_ref);
1461         }
1462         if (direction == NAT_OUTBOUND) {
1463                 if (flags & IPN_TCPUDP)
1464                         tcp->th_sport = port;
1465         } else {
1466                 if (flags & IPN_TCPUDP)
1467                         tcp->th_dport = nport;
1468         }
1469
1470         nat_insert(nat);
1471
1472         if ((np->in_apr != NULL) && (np->in_dport == 0 ||
1473             (tcp != NULL && dport == np->in_dport)))
1474                 (void) appr_new(fin, ip, nat);
1475
1476         np->in_use++;
1477 #ifdef  IPFILTER_LOG
1478         nat_log(nat, (u_int)np->in_redir);
1479 #endif
1480         return nat;
1481 badnat:
1482         nat_stats.ns_badnat++;
1483         if ((hm = nat->nat_hm) != NULL)
1484                 nat_hostmapdel(hm);
1485         KFREE(nat);
1486         return NULL;
1487 }
1488
1489
1490 /*
1491  * Insert a NAT entry into the hash tables for searching and add it to the
1492  * list of active NAT entries.  Adjust global counters when complete.
1493  */
1494 void    nat_insert(nat)
1495 nat_t   *nat;
1496 {
1497         u_int hv1, hv2;
1498         nat_t **natp;
1499
1500         MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
1501
1502         nat->nat_age = fr_defnatage;
1503         nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
1504         if (nat->nat_ifname[0] !='\0') {
1505                 nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
1506         }
1507
1508         nat->nat_next = nat_instances;
1509         nat_instances = nat;
1510
1511         if (!(nat->nat_flags & (FI_W_SPORT|FI_W_DPORT))) {
1512                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
1513                                   0xffffffff);
1514                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
1515                                   ipf_nattable_sz);
1516                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
1517                                   0xffffffff);
1518                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
1519                                  ipf_nattable_sz);
1520         } else {
1521                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_inip.s_addr,
1522                                   ipf_nattable_sz);
1523                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_outip.s_addr,
1524                                   ipf_nattable_sz);
1525         }
1526
1527         natp = &nat_table[0][hv1];
1528         if (*natp)
1529                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
1530         nat->nat_phnext[0] = natp;
1531         nat->nat_hnext[0] = *natp;
1532         *natp = nat;
1533
1534         natp = &nat_table[1][hv2];
1535         if (*natp)
1536                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
1537         nat->nat_phnext[1] = natp;
1538         nat->nat_hnext[1] = *natp;
1539         *natp = nat;
1540
1541         nat_stats.ns_added++;
1542         nat_stats.ns_inuse++;
1543 }
1544
1545
1546 nat_t *nat_icmplookup(ip, fin, dir)
1547 ip_t *ip;
1548 fr_info_t *fin;
1549 int dir;
1550 {
1551         icmphdr_t *icmp;
1552         tcphdr_t *tcp = NULL;
1553         ip_t *oip;
1554         int flags = 0, type, minlen;
1555
1556         icmp = (icmphdr_t *)fin->fin_dp;
1557         /*
1558          * Does it at least have the return (basic) IP header ?
1559          * Only a basic IP header (no options) should be with an ICMP error
1560          * header.
1561          */
1562         if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1563                 return NULL;
1564         type = icmp->icmp_type;
1565         /*
1566          * If it's not an error type, then return.
1567          */
1568         if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1569             (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1570             (type != ICMP_PARAMPROB))
1571                 return NULL;
1572
1573         oip = (ip_t *)((char *)fin->fin_dp + 8);
1574         minlen = (oip->ip_hl << 2);
1575         if (minlen < sizeof(ip_t))
1576                 return NULL;
1577         if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1578                 return NULL;
1579         /*
1580          * Is the buffer big enough for all of it ?  It's the size of the IP
1581          * header claimed in the encapsulated part which is of concern.  It
1582          * may be too big to be in this buffer but not so big that it's
1583          * outside the ICMP packet, leading to TCP deref's causing problems.
1584          * This is possible because we don't know how big oip_hl is when we
1585          * do the pullup early in fr_check() and thus can't gaurantee it is
1586          * all here now.
1587          */
1588 #ifdef  _KERNEL
1589         {
1590         mb_t *m;
1591
1592 # if SOLARIS
1593         m = fin->fin_qfm;
1594         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
1595                 return NULL;
1596 # else
1597         m = *(mb_t **)fin->fin_mp;
1598         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
1599             (char *)ip + m->m_len)
1600                 return NULL;
1601 # endif
1602         }
1603 #endif
1604
1605         if (oip->ip_p == IPPROTO_TCP)
1606                 flags = IPN_TCP;
1607         else if (oip->ip_p == IPPROTO_UDP)
1608                 flags = IPN_UDP;
1609         if (flags & IPN_TCPUDP) {
1610                 u_short data[2];
1611                 nat_t *nat;
1612
1613                 minlen += 8;            /* + 64bits of data to get ports */
1614                 if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1615                         return NULL;
1616
1617                 data[0] = fin->fin_data[0];
1618                 data[1] = fin->fin_data[1];
1619                 tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1620                 fin->fin_data[0] = ntohs(tcp->th_dport);
1621                 fin->fin_data[1] = ntohs(tcp->th_sport);
1622
1623                 if (dir == NAT_INBOUND) {
1624                         nat = nat_inlookup(fin, flags, (u_int)oip->ip_p,
1625                                             oip->ip_dst, oip->ip_src, 0);
1626                 } else {
1627                         nat = nat_outlookup(fin, flags, (u_int)oip->ip_p,
1628                                             oip->ip_dst, oip->ip_src, 0);
1629                 }
1630                 fin->fin_data[0] = data[0];
1631                 fin->fin_data[1] = data[1];
1632                 return nat;
1633         }
1634         if (dir == NAT_INBOUND)
1635                 return nat_inlookup(fin, 0, (u_int)oip->ip_p,
1636                                     oip->ip_dst, oip->ip_src, 0);
1637         else
1638                 return nat_outlookup(fin, 0, (u_int)oip->ip_p,
1639                                     oip->ip_dst, oip->ip_src, 0);
1640 }
1641
1642
1643 /*
1644  * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1645  * packet gets correctly recognised.
1646  */
1647 nat_t *nat_icmp(ip, fin, nflags, dir)
1648 ip_t *ip;
1649 fr_info_t *fin;
1650 u_int *nflags;
1651 int dir;
1652 {
1653         u_32_t sum1, sum2, sumd, sumd2 = 0;
1654         struct in_addr in;
1655         icmphdr_t *icmp;
1656         udphdr_t *udp;
1657         nat_t *nat;
1658         ip_t *oip;
1659         int flags;
1660
1661         if ((fin->fin_fl & FI_SHORT) || (fin->fin_off != 0))
1662                 return NULL;
1663         /*
1664          * nat_icmplookup() will return NULL for `defective' packets.
1665          */
1666         if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
1667                 return NULL;
1668
1669         flags = 0;
1670         *nflags = IPN_ICMPERR;
1671         icmp = (icmphdr_t *)fin->fin_dp;
1672         oip = (ip_t *)&icmp->icmp_ip;
1673         if (oip->ip_p == IPPROTO_TCP)
1674                 flags = IPN_TCP;
1675         else if (oip->ip_p == IPPROTO_UDP)
1676                 flags = IPN_UDP;
1677         udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1678         /*
1679          * Need to adjust ICMP header to include the real IP#'s and
1680          * port #'s.  Only apply a checksum change relative to the
1681          * IP address change as it will be modified again in ip_natout
1682          * for both address and port.  Two checksum changes are
1683          * necessary for the two header address changes.  Be careful
1684          * to only modify the checksum once for the port # and twice
1685          * for the IP#.
1686          */
1687
1688         /*
1689          * Step 1
1690          * Fix the IP addresses in the offending IP packet. You also need
1691          * to adjust the IP header checksum of that offending IP packet
1692          * and the ICMP checksum of the ICMP error message itself.
1693          *
1694          * Unfortunately, for UDP and TCP, the IP addresses are also contained
1695          * in the pseudo header that is used to compute the UDP resp. TCP
1696          * checksum. So, we must compensate that as well. Even worse, the
1697          * change in the UDP and TCP checksums require yet another
1698          * adjustment of the ICMP checksum of the ICMP error message.
1699          *
1700          * For the moment we forget about TCP, because that checksum is not
1701          * in the first 8 bytes, so it will not be available in most cases.
1702          */
1703
1704         if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
1705                 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1706                 in = nat->nat_inip;
1707                 oip->ip_src = in;
1708         } else {
1709                 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1710                 in = nat->nat_outip;
1711                 oip->ip_dst = in;
1712         }
1713
1714         sum2 = LONG_SUM(ntohl(in.s_addr));
1715
1716         CALC_SUMD(sum1, sum2, sumd);
1717
1718         if (nat->nat_dir == NAT_OUTBOUND) {
1719                 /*
1720                  * Fix IP checksum of the offending IP packet to adjust for
1721                  * the change in the IP address.
1722                  *
1723                  * Normally, you would expect that the ICMP checksum of the 
1724                  * ICMP error message needs to be adjusted as well for the
1725                  * IP address change in oip.
1726                  * However, this is a NOP, because the ICMP checksum is 
1727                  * calculated over the complete ICMP packet, which includes the
1728                  * changed oip IP addresses and oip->ip_sum. However, these 
1729                  * two changes cancel each other out (if the delta for
1730                  * the IP address is x, then the delta for ip_sum is minus x), 
1731                  * so no change in the icmp_cksum is necessary.
1732                  *
1733                  * Be careful that nat_dir refers to the direction of the
1734                  * offending IP packet (oip), not to its ICMP response (icmp)
1735                  */
1736                 fix_datacksum(&oip->ip_sum, sumd);
1737
1738                 /*
1739                  * Fix UDP pseudo header checksum to compensate for the
1740                  * IP address change.
1741                  */
1742                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1743                         /*
1744                          * The UDP checksum is optional, only adjust it 
1745                          * if it has been set.
1746                          */
1747                         sum1 = ntohs(udp->uh_sum);
1748                         fix_datacksum(&udp->uh_sum, sumd);
1749                         sum2 = ntohs(udp->uh_sum);
1750
1751                         /*
1752                          * Fix ICMP checksum to compensate the UDP 
1753                          * checksum adjustment.
1754                          */
1755                         CALC_SUMD(sum1, sum2, sumd);
1756                         sumd2 = sumd;
1757                 }
1758
1759 #if 0
1760                 /*
1761                  * Fix TCP pseudo header checksum to compensate for the 
1762                  * IP address change. Before we can do the change, we
1763                  * must make sure that oip is sufficient large to hold
1764                  * the TCP checksum (normally it does not!).
1765                  */
1766                 if (oip->ip_p == IPPROTO_TCP) {
1767                 
1768                 }
1769 #endif
1770         } else {
1771
1772                 /*
1773                  * Fix IP checksum of the offending IP packet to adjust for
1774                  * the change in the IP address.
1775                  *
1776                  * Normally, you would expect that the ICMP checksum of the 
1777                  * ICMP error message needs to be adjusted as well for the
1778                  * IP address change in oip.
1779                  * However, this is a NOP, because the ICMP checksum is 
1780                  * calculated over the complete ICMP packet, which includes the
1781                  * changed oip IP addresses and oip->ip_sum. However, these 
1782                  * two changes cancel each other out (if the delta for
1783                  * the IP address is x, then the delta for ip_sum is minus x), 
1784                  * so no change in the icmp_cksum is necessary.
1785                  *
1786                  * Be careful that nat_dir refers to the direction of the
1787                  * offending IP packet (oip), not to its ICMP response (icmp)
1788                  */
1789                 fix_datacksum(&oip->ip_sum, sumd);
1790
1791 /* XXX FV : without having looked at Solaris source code, it seems unlikely
1792  * that SOLARIS would compensate this in the kernel (a body of an IP packet 
1793  * in the data section of an ICMP packet). I have the feeling that this should
1794  * be unconditional, but I'm not in a position to check.
1795  */
1796 #if !SOLARIS && !defined(__sgi)
1797                 /*
1798                  * Fix UDP pseudo header checksum to compensate for the
1799                  * IP address change.
1800                  */
1801                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1802                         /*
1803                          * The UDP checksum is optional, only adjust it 
1804                          * if it has been set 
1805                          */
1806                         sum1 = ntohs(udp->uh_sum);
1807                         fix_datacksum(&udp->uh_sum, sumd);
1808                         sum2 = ntohs(udp->uh_sum);
1809
1810                         /*
1811                          * Fix ICMP checksum to compensate the UDP 
1812                          * checksum adjustment.
1813                          */
1814                         CALC_SUMD(sum1, sum2, sumd);
1815                         sumd2 = sumd;
1816                 }
1817                 
1818 #if 0
1819                 /* 
1820                  * Fix TCP pseudo header checksum to compensate for the 
1821                  * IP address change. Before we can do the change, we
1822                  * must make sure that oip is sufficient large to hold
1823                  * the TCP checksum (normally it does not!).
1824                  */
1825                 if (oip->ip_p == IPPROTO_TCP) {
1826                 
1827                 };
1828 #endif
1829                 
1830 #endif
1831         }
1832
1833         if ((flags & IPN_TCPUDP) != 0) {
1834                 tcphdr_t *tcp;
1835
1836                 /*
1837                  * XXX - what if this is bogus hl and we go off the end ?
1838                  * In this case, nat_icmpinlookup() will have returned NULL.
1839                  */
1840                 tcp = (tcphdr_t *)udp;
1841
1842                 /*
1843                  * Step 2 :
1844                  * For offending TCP/UDP IP packets, translate the ports as
1845                  * well, based on the NAT specification. Of course such
1846                  * a change must be reflected in the ICMP checksum as well.
1847                  *
1848                  * Advance notice : Now it becomes complicated :-)
1849                  *
1850                  * Since the port fields are part of the TCP/UDP checksum
1851                  * of the offending IP packet, you need to adjust that checksum
1852                  * as well... but, if you change, you must change the icmp
1853                  * checksum *again*, to reflect that change.
1854                  *
1855                  * To further complicate: the TCP checksum is not in the first
1856                  * 8 bytes of the offending ip packet, so it most likely is not
1857                  * available (we might have to fix that if the encounter a
1858                  * device that returns more than 8 data bytes on icmp error)
1859                  */
1860
1861                 if (nat->nat_oport == tcp->th_dport) {
1862                         if (tcp->th_sport != nat->nat_inport) {
1863                                 /*
1864                                  * Fix ICMP checksum to compensate port
1865                                  * adjustment.
1866                                  */
1867                                 sum1 = ntohs(tcp->th_sport);
1868                                 sum2 = ntohs(nat->nat_inport);
1869                                 CALC_SUMD(sum1, sum2, sumd);
1870                                 sumd2 += sumd;
1871                                 tcp->th_sport = nat->nat_inport;
1872
1873                                 /*
1874                                  * Fix udp checksum to compensate port
1875                                  * adjustment.  NOTE : the offending IP packet
1876                                  * flows the other direction compared to the
1877                                  * ICMP message.
1878                                  *
1879                                  * The UDP checksum is optional, only adjust
1880                                  * it if it has been set.
1881                                  */
1882                                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1883
1884                                         sum1 = ntohs(udp->uh_sum);
1885                                         fix_datacksum(&udp->uh_sum, sumd);
1886                                         sum2 = ntohs(udp->uh_sum);
1887
1888                                         /*
1889                                          * Fix ICMP checksum to 
1890                                          * compensate UDP checksum 
1891                                          * adjustment.
1892                                          */
1893                                         CALC_SUMD(sum1, sum2, sumd);
1894                                         sumd2 += sumd;
1895                                 }
1896                         }
1897                 } else {
1898                         if (tcp->th_dport != nat->nat_outport) {
1899                                 /*
1900                                  * Fix ICMP checksum to compensate port
1901                                  * adjustment.
1902                                  */
1903                                 sum1 = ntohs(tcp->th_dport);
1904                                 sum2 = ntohs(nat->nat_outport);
1905                                 CALC_SUMD(sum1, sum2, sumd);
1906                                 sumd2 += sumd;
1907                                 tcp->th_dport = nat->nat_outport;
1908
1909                                 /*
1910                                  * Fix udp checksum to compensate port
1911                                  * adjustment.   NOTE : the offending IP
1912                                  * packet flows the other direction compared
1913                                  * to the ICMP message.
1914                                  *
1915                                  * The UDP checksum is optional, only adjust
1916                                  * it if it has been set.
1917                                  */
1918                                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1919
1920                                         sum1 = ntohs(udp->uh_sum);
1921                                         fix_datacksum(&udp->uh_sum, sumd);
1922                                         sum2 = ntohs(udp->uh_sum);
1923
1924                                         /*
1925                                          * Fix ICMP checksum to compensate
1926                                          * UDP checksum adjustment.
1927                                          */
1928                                         CALC_SUMD(sum1, sum2, sumd);
1929                                         sumd2 += sumd;
1930                                 }
1931                         }
1932                 }
1933                 if (sumd2) {
1934                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1935                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1936                         if (nat->nat_dir == NAT_OUTBOUND) {
1937                                 fix_outcksum(fin, &icmp->icmp_cksum, sumd2);
1938                         } else {
1939                                 fix_incksum(fin, &icmp->icmp_cksum, sumd2);
1940                         }
1941                 }
1942         }
1943         if (oip->ip_p == IPPROTO_ICMP)
1944                 nat->nat_age = fr_defnaticmpage;
1945         return nat;
1946 }
1947
1948
1949 /*
1950  * NB: these lookups don't lock access to the list, it assume it has already
1951  * been done!
1952  */
1953 /*
1954  * Lookup a nat entry based on the mapped destination ip address/port and
1955  * real source address/port.  We use this lookup when receiving a packet,
1956  * we're looking for a table entry, based on the destination address.
1957  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
1958  */
1959 nat_t *nat_inlookup(fin, flags, p, src, mapdst, rw)
1960 fr_info_t *fin;
1961 register u_int flags, p;
1962 struct in_addr src , mapdst;
1963 int rw;
1964 {
1965         register u_short sport, dport;
1966         register nat_t *nat;
1967         register int nflags;
1968         register u_32_t dst;
1969         ipnat_t *ipn;
1970         void *ifp;
1971         u_int hv;
1972
1973         if (fin != NULL)
1974                 ifp = fin->fin_ifp;
1975         else
1976                 ifp = NULL;
1977         dst = mapdst.s_addr;
1978         if (flags & IPN_TCPUDP) {
1979                 sport = htons(fin->fin_data[0]);
1980                 dport = htons(fin->fin_data[1]);
1981         } else {
1982                 sport = 0;
1983                 dport = 0;
1984         }
1985
1986         hv = NAT_HASH_FN(dst, dport, 0xffffffff);
1987         hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
1988         nat = nat_table[1][hv];
1989         for (; nat; nat = nat->nat_hnext[1]) {
1990                 nflags = nat->nat_flags;
1991                 if ((!ifp || ifp == nat->nat_ifp) &&
1992                     nat->nat_oip.s_addr == src.s_addr &&
1993                     nat->nat_outip.s_addr == dst &&
1994                     ((p == 0) || (p == nat->nat_p))) {
1995                         switch (p)
1996                         {
1997                         case IPPROTO_TCP :
1998                         case IPPROTO_UDP :
1999                                 if (nat->nat_oport != sport)
2000                                         continue;
2001                                 if (nat->nat_outport != dport)
2002                                         continue;
2003                                 break;
2004                         default :
2005                                 break;
2006                         }
2007
2008                         ipn = nat->nat_ptr;
2009                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2010                                 if (appr_match(fin, nat) != 0)
2011                                         continue;
2012                         return nat;
2013                 }
2014         }
2015         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2016                 return NULL;
2017         if (!rw) {
2018                 RWLOCK_EXIT(&ipf_nat);
2019         }
2020         hv = NAT_HASH_FN(dst, 0, 0xffffffff);
2021         hv = NAT_HASH_FN(src.s_addr, dst, ipf_nattable_sz);
2022         if (!rw) {
2023                 WRITE_ENTER(&ipf_nat);
2024         }
2025         nat = nat_table[1][hv];
2026         for (; nat; nat = nat->nat_hnext[1]) {
2027                 nflags = nat->nat_flags;
2028                 if (ifp && ifp != nat->nat_ifp)
2029                         continue;
2030                 if (!(nflags & FI_WILDP))
2031                         continue;
2032                 if (nat->nat_oip.s_addr != src.s_addr ||
2033                     nat->nat_outip.s_addr != dst)
2034                         continue;
2035                 if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
2036                     ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) {
2037                         nat_tabmove(fin, nat);
2038                         break;
2039                 }
2040         }
2041         if (!rw) {
2042                 MUTEX_DOWNGRADE(&ipf_nat);
2043         }
2044         return nat;
2045 }
2046
2047
2048 /*
2049  * This function is only called for TCP/UDP NAT table entries where the
2050  * original was placed in the table without hashing on the ports and we now
2051  * want to include hashing on port numbers.
2052  */
2053 static void nat_tabmove(fin, nat)
2054 fr_info_t *fin;
2055 nat_t *nat;
2056 {
2057         register u_short sport, dport;
2058         u_int hv, nflags;
2059         nat_t **natp;
2060
2061         nflags = nat->nat_flags;
2062
2063         sport = ntohs(fin->fin_data[0]);
2064         dport = ntohs(fin->fin_data[1]);
2065
2066         /*
2067          * Remove the NAT entry from the old location
2068          */
2069         if (nat->nat_hnext[0])
2070                 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
2071         *nat->nat_phnext[0] = nat->nat_hnext[0];
2072
2073         if (nat->nat_hnext[1])
2074                 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
2075         *nat->nat_phnext[1] = nat->nat_hnext[1];
2076
2077         /*
2078          * Add into the NAT table in the new position
2079          */
2080         hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, 0xffffffff);
2081         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2082         natp = &nat_table[0][hv];
2083         if (*natp)
2084                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2085         nat->nat_phnext[0] = natp;
2086         nat->nat_hnext[0] = *natp;
2087         *natp = nat;
2088
2089         hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, 0xffffffff);
2090         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2091         natp = &nat_table[1][hv];
2092         if (*natp)
2093                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2094         nat->nat_phnext[1] = natp;
2095         nat->nat_hnext[1] = *natp;
2096         *natp = nat;
2097 }
2098
2099
2100 /*
2101  * Lookup a nat entry based on the source 'real' ip address/port and
2102  * destination address/port.  We use this lookup when sending a packet out,
2103  * we're looking for a table entry, based on the source address.
2104  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2105  */
2106 nat_t *nat_outlookup(fin, flags, p, src, dst, rw)
2107 fr_info_t *fin;
2108 register u_int flags, p;
2109 struct in_addr src , dst;
2110 int rw;
2111 {
2112         register u_short sport, dport;
2113         register nat_t *nat;
2114         register int nflags;
2115         ipnat_t *ipn;
2116         u_32_t srcip;
2117         void *ifp;
2118         u_int hv;
2119
2120         ifp = fin->fin_ifp;
2121         srcip = src.s_addr;
2122         if (flags & IPN_TCPUDP) {
2123                 sport = ntohs(fin->fin_data[0]);
2124                 dport = ntohs(fin->fin_data[1]);
2125         } else {
2126                 sport = 0;
2127                 dport = 0;
2128         }
2129
2130         hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
2131         hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
2132         nat = nat_table[0][hv];
2133         for (; nat; nat = nat->nat_hnext[0]) {
2134                 nflags = nat->nat_flags;
2135
2136                 if ((!ifp || ifp == nat->nat_ifp) &&
2137                     nat->nat_inip.s_addr == srcip &&
2138                     nat->nat_oip.s_addr == dst.s_addr &&
2139                     ((p == 0) || (p == nat->nat_p))) {
2140                         switch (p)
2141                         {
2142                         case IPPROTO_TCP :
2143                         case IPPROTO_UDP :
2144                                 if (nat->nat_oport != dport)
2145                                         continue;
2146                                 if (nat->nat_inport != sport)
2147                                         continue;
2148                                 break;
2149                         default :
2150                                 break;
2151                         }
2152
2153                         ipn = nat->nat_ptr;
2154                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2155                                 if (appr_match(fin, nat) != 0)
2156                                         continue;
2157                         return nat;
2158                 }
2159         }
2160         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2161                 return NULL;
2162         if (!rw) {
2163                 RWLOCK_EXIT(&ipf_nat);
2164         }
2165
2166         hv = NAT_HASH_FN(dst.s_addr, srcip, ipf_nattable_sz);
2167         if (!rw) {
2168                 WRITE_ENTER(&ipf_nat);
2169         }
2170         nat = nat_table[0][hv];
2171         for (; nat; nat = nat->nat_hnext[0]) {
2172                 nflags = nat->nat_flags;
2173                 if (ifp && ifp != nat->nat_ifp)
2174                         continue;
2175                 if (!(nflags & FI_WILDP))
2176                         continue;
2177                 if ((nat->nat_inip.s_addr != srcip) ||
2178                     (nat->nat_oip.s_addr != dst.s_addr))
2179                         continue;
2180                 if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) &&
2181                     ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) {
2182                         nat_tabmove(fin, nat);
2183                         break;
2184                 }
2185         }
2186         if (!rw) {
2187                 MUTEX_DOWNGRADE(&ipf_nat);
2188         }
2189         return nat;
2190 }
2191
2192
2193 /*
2194  * Lookup the NAT tables to search for a matching redirect
2195  */
2196 nat_t *nat_lookupredir(np)
2197 register natlookup_t *np;
2198 {
2199         nat_t *nat;
2200         fr_info_t fi;
2201
2202         bzero((char *)&fi, sizeof(fi));
2203         fi.fin_data[0] = np->nl_inport;
2204         fi.fin_data[1] = np->nl_outport;
2205
2206         /*
2207          * If nl_inip is non null, this is a lookup based on the real
2208          * ip address. Else, we use the fake.
2209          */
2210         if ((nat = nat_outlookup(&fi, np->nl_flags, 0, np->nl_inip,
2211                                  np->nl_outip, 0))) {
2212                 np->nl_realip = nat->nat_outip;
2213                 np->nl_realport = nat->nat_outport;
2214         }
2215         return nat;
2216 }
2217
2218
2219 static int nat_match(fin, np, ip)
2220 fr_info_t *fin;
2221 ipnat_t *np;
2222 ip_t *ip;
2223 {
2224         frtuc_t *ft;
2225
2226         if (ip->ip_v != 4)
2227                 return 0;
2228
2229         if (np->in_p && fin->fin_p != np->in_p)
2230                 return 0;
2231         if (fin->fin_out) {
2232                 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2233                         return 0;
2234                 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
2235                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2236                         return 0;
2237                 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
2238                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2239                         return 0;
2240         } else {
2241                 if (!(np->in_redir & NAT_REDIRECT))
2242                         return 0;
2243                 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
2244                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2245                         return 0;
2246                 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
2247                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2248                         return 0;
2249         }
2250
2251         ft = &np->in_tuc;
2252         if (!(fin->fin_fl & FI_TCPUDP) ||
2253             (fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) {
2254                 if (ft->ftu_scmp || ft->ftu_dcmp)
2255                         return 0;
2256                 return 1;
2257         }
2258
2259         return fr_tcpudpchk(ft, fin);
2260 }
2261
2262
2263 /*
2264  * Packets going out on the external interface go through this.
2265  * Here, the source address requires alteration, if anything.
2266  */
2267 int ip_natout(ip, fin)
2268 ip_t *ip;
2269 fr_info_t *fin;
2270 {
2271         register ipnat_t *np = NULL;
2272         register u_32_t ipa;
2273         tcphdr_t *tcp = NULL;
2274         u_short sport = 0, dport = 0, *csump = NULL;
2275         int natadd = 1, i, icmpset = 1;
2276         u_int nflags = 0, hv, msk;
2277         struct ifnet *ifp;
2278         frentry_t *fr;
2279         void *sifp;
2280         u_32_t iph;
2281         nat_t *nat;
2282
2283         if (nat_list == NULL || (fr_nat_lock))
2284                 return 0;
2285
2286         if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
2287             fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) {
2288                 sifp = fin->fin_ifp;
2289                 fin->fin_ifp = fr->fr_tif.fd_ifp;
2290         } else
2291                 sifp = fin->fin_ifp;
2292         ifp = fin->fin_ifp;
2293
2294         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2295                 if (fin->fin_p == IPPROTO_TCP)
2296                         nflags = IPN_TCP;
2297                 else if (fin->fin_p == IPPROTO_UDP)
2298                         nflags = IPN_UDP;
2299                 if ((nflags & IPN_TCPUDP)) {
2300                         tcp = (tcphdr_t *)fin->fin_dp;
2301                         sport = tcp->th_sport;
2302                         dport = tcp->th_dport;
2303                 }
2304         }
2305
2306         ipa = fin->fin_saddr;
2307
2308         READ_ENTER(&ipf_nat);
2309
2310         if ((fin->fin_p == IPPROTO_ICMP) &&
2311             (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
2312                 icmpset = 1;
2313         else if ((fin->fin_fl & FI_FRAG) &&
2314             (nat = ipfr_nat_knownfrag(ip, fin)))
2315                 natadd = 0;
2316         else if ((nat = nat_outlookup(fin, nflags|FI_WILDP|FI_WILDA,
2317                                       (u_int)fin->fin_p, fin->fin_src,
2318                                       fin->fin_dst, 0))) {
2319                 nflags = nat->nat_flags;
2320                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2321                         if ((nflags & FI_W_SPORT) &&
2322                             (nat->nat_inport != sport))
2323                                 nat->nat_inport = sport;
2324                         if ((nflags & FI_W_DPORT) &&
2325                             (nat->nat_oport != dport))
2326                                 nat->nat_oport = dport;
2327
2328                         if (nat->nat_outport == 0)
2329                                 nat->nat_outport = sport;
2330                         nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
2331                         nflags = nat->nat_flags;
2332                         nat_stats.ns_wilds--;
2333                 }
2334         } else {
2335                 RWLOCK_EXIT(&ipf_nat);
2336
2337                 msk = 0xffffffff;
2338                 i = 32;
2339
2340                 WRITE_ENTER(&ipf_nat);
2341                 /*
2342                  * If there is no current entry in the nat table for this IP#,
2343                  * create one for it (if there is a matching rule).
2344                  */
2345 maskloop:
2346                 iph = ipa & htonl(msk);
2347                 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
2348                 for (np = nat_rules[hv]; np; np = np->in_mnext)
2349                 {
2350                         if (np->in_ifp && (np->in_ifp != ifp))
2351                                 continue;
2352                         if ((np->in_flags & IPN_RF) &&
2353                             !(np->in_flags & nflags))
2354                                 continue;
2355                         if (np->in_flags & IPN_FILTER) {
2356                                 if (!nat_match(fin, np, ip))
2357                                         continue;
2358                         } else if ((ipa & np->in_inmsk) != np->in_inip)
2359                                 continue;
2360                         if (*np->in_plabel && !appr_ok(ip, tcp, np))
2361                                 continue;
2362                         nat = nat_new(fin, ip, np, NULL,
2363                                       (u_int)nflags, NAT_OUTBOUND);
2364                         if (nat != NULL) {
2365                                 np->in_hits++;
2366                                 break;
2367                         }
2368                 }
2369                 if ((np == NULL) && (i > 0)) {
2370                         do {
2371                                 i--;
2372                                 msk <<= 1;
2373                         } while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
2374                         if (i >= 0)
2375                                 goto maskloop;
2376                 }
2377                 MUTEX_DOWNGRADE(&ipf_nat);
2378         }
2379
2380         /*
2381          * NOTE: ipf_nat must now only be held as a read lock
2382          */
2383         if (nat) {
2384                 np = nat->nat_ptr;
2385                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2386                         ipfr_nat_newfrag(ip, fin, 0, nat);
2387                 MUTEX_ENTER(&nat->nat_lock);
2388                 if (fin->fin_p != IPPROTO_TCP) {
2389                         if (np && np->in_age[1])
2390                                 nat->nat_age = np->in_age[1];
2391                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2392                                 nat->nat_age = fr_defnaticmpage;
2393                         else
2394                                 nat->nat_age = fr_defnatage;
2395                 }
2396                 nat->nat_bytes += ip->ip_len;
2397                 nat->nat_pkts++;
2398                 MUTEX_EXIT(&nat->nat_lock);
2399
2400                 /*
2401                  * Fix up checksums, not by recalculating them, but
2402                  * simply computing adjustments.
2403                  */
2404                 if (nflags == IPN_ICMPERR) {
2405                         u_32_t s1, s2, sumd;
2406
2407                         s1 = LONG_SUM(ntohl(fin->fin_saddr));
2408                         s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
2409                         CALC_SUMD(s1, s2, sumd);
2410
2411                         if (nat->nat_dir == NAT_OUTBOUND)
2412                                 fix_outcksum(fin, &ip->ip_sum, sumd);
2413                         else
2414                                 fix_incksum(fin, &ip->ip_sum, sumd);
2415                 }
2416 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
2417                 else {
2418                         if (nat->nat_dir == NAT_OUTBOUND)
2419                                 fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2420                         else
2421                                 fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2422                 }
2423 #endif
2424                 /*
2425                  * Only change the packet contents, not what is filtered upon.
2426                  */
2427                 ip->ip_src = nat->nat_outip;
2428
2429                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2430
2431                         if ((nat->nat_outport != 0) && (tcp != NULL)) {
2432                                 tcp->th_sport = nat->nat_outport;
2433                                 fin->fin_data[0] = ntohs(tcp->th_sport);
2434                         }
2435
2436                         if (fin->fin_p == IPPROTO_TCP) {
2437                                 csump = &tcp->th_sum;
2438                                 MUTEX_ENTER(&nat->nat_lock);
2439                                 fr_tcp_age(&nat->nat_age,
2440                                            nat->nat_tcpstate, fin, 1);
2441                                 if (nat->nat_age < fr_defnaticmpage)
2442                                         nat->nat_age = fr_defnaticmpage;
2443 #ifdef LARGE_NAT
2444                                 else if (nat->nat_age > fr_defnatage)
2445                                         nat->nat_age = fr_defnatage;
2446 #endif
2447                                 /*
2448                                  * Increase this because we may have
2449                                  * "keep state" following this too and
2450                                  * packet storms can occur if this is
2451                                  * removed too quickly.
2452                                  */
2453                                 if (nat->nat_age == fr_tcpclosed)
2454                                         nat->nat_age = fr_tcplastack;
2455                                 MUTEX_EXIT(&nat->nat_lock);
2456                         } else if (fin->fin_p == IPPROTO_UDP) {
2457                                 udphdr_t *udp = (udphdr_t *)tcp;
2458
2459                                 if (udp->uh_sum)
2460                                         csump = &udp->uh_sum;
2461                         }
2462
2463                         if (csump) {
2464                                 if (nat->nat_dir == NAT_OUTBOUND)
2465                                         fix_outcksum(fin, csump,
2466                                                      nat->nat_sumd[1]);
2467                                 else
2468                                         fix_incksum(fin, csump,
2469                                                     nat->nat_sumd[1]);
2470                         }
2471                 }
2472
2473                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2474                      (tcp != NULL && dport == np->in_dport))) {
2475                         i = appr_check(ip, fin, nat);
2476                         if (i == 0)
2477                                 i = 1;
2478                 } else
2479                         i = 1;
2480                 ATOMIC_INCL(nat_stats.ns_mapped[1]);
2481                 RWLOCK_EXIT(&ipf_nat);  /* READ */
2482                 fin->fin_ifp = sifp;
2483                 return i;
2484         }
2485         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2486         fin->fin_ifp = sifp;
2487         return 0;
2488 }
2489
2490
2491 /*
2492  * Packets coming in from the external interface go through this.
2493  * Here, the destination address requires alteration, if anything.
2494  */
2495 int ip_natin(ip, fin)
2496 ip_t *ip;
2497 fr_info_t *fin;
2498 {
2499         register struct in_addr src;
2500         register struct in_addr in;
2501         register ipnat_t *np;
2502         u_short sport = 0, dport = 0, *csump = NULL;
2503         u_int nflags = 0, natadd = 1, hv, msk;
2504         struct ifnet *ifp = fin->fin_ifp;
2505         tcphdr_t *tcp = NULL;
2506         int i, icmpset = 0;
2507         nat_t *nat;
2508         u_32_t iph;
2509
2510         if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
2511                 return 0;
2512
2513         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2514                 if (fin->fin_p == IPPROTO_TCP)
2515                         nflags = IPN_TCP;
2516                 else if (fin->fin_p == IPPROTO_UDP)
2517                         nflags = IPN_UDP;
2518                 if ((nflags & IPN_TCPUDP)) {
2519                         tcp = (tcphdr_t *)fin->fin_dp;
2520                         sport = tcp->th_sport;
2521                         dport = tcp->th_dport;
2522                 }
2523         }
2524
2525         in = fin->fin_dst;
2526         /* make sure the source address is to be redirected */
2527         src = fin->fin_src;
2528
2529         READ_ENTER(&ipf_nat);
2530
2531         if ((fin->fin_p == IPPROTO_ICMP) &&
2532             (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
2533                 icmpset = 1;
2534         else if ((fin->fin_fl & FI_FRAG) &&
2535                  (nat = ipfr_nat_knownfrag(ip, fin)))
2536                 natadd = 0;
2537         else if ((nat = nat_inlookup(fin, nflags|FI_WILDP|FI_WILDA,
2538                                      (u_int)fin->fin_p, fin->fin_src, in, 0))) {
2539                 nflags = nat->nat_flags;
2540                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2541                         if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
2542                                 nat->nat_oport = sport;
2543                         if ((nat->nat_outport != dport) &&
2544                                  (nflags & FI_W_SPORT))
2545                                 nat->nat_outport = dport;
2546                         nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
2547                         nflags = nat->nat_flags;
2548                         nat_stats.ns_wilds--;
2549                 }
2550         } else {
2551                 RWLOCK_EXIT(&ipf_nat);
2552
2553                 msk = 0xffffffff;
2554                 i = 32;
2555
2556                 WRITE_ENTER(&ipf_nat);
2557                 /*
2558                  * If there is no current entry in the nat table for this IP#,
2559                  * create one for it (if there is a matching rule).
2560                  */
2561 maskloop:
2562                 iph = in.s_addr & htonl(msk);
2563                 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
2564                 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
2565                         if ((np->in_ifp && (np->in_ifp != ifp)) ||
2566                             (np->in_p && (np->in_p != fin->fin_p)) ||
2567                             (np->in_flags && !(nflags & np->in_flags)))
2568                                 continue;
2569                         if (np->in_flags & IPN_FILTER) {
2570                                 if (!nat_match(fin, np, ip))
2571                                         continue;
2572                         } else if ((in.s_addr & np->in_outmsk) != np->in_outip)
2573                                 continue;
2574                         if ((!np->in_pmin || (np->in_flags & IPN_FILTER) ||
2575                              ((ntohs(np->in_pmax) >= ntohs(dport)) &&
2576                               (ntohs(dport) >= ntohs(np->in_pmin)))))
2577                                 if ((nat = nat_new(fin, ip, np, NULL, nflags,
2578                                                     NAT_INBOUND))) {
2579                                         np->in_hits++;
2580                                         break;
2581                                 }
2582                 }
2583
2584                 if ((np == NULL) && (i > 0)) {
2585                         do {
2586                                 i--;
2587                                 msk <<= 1;
2588                         } while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
2589                         if (i >= 0)
2590                                 goto maskloop;
2591                 }
2592                 MUTEX_DOWNGRADE(&ipf_nat);
2593         }
2594
2595         /*
2596          * NOTE: ipf_nat must now only be held as a read lock
2597          */
2598         if (nat) {
2599                 np = nat->nat_ptr;
2600                 fin->fin_fr = nat->nat_fr;
2601                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2602                         ipfr_nat_newfrag(ip, fin, 0, nat);
2603                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2604                      (tcp != NULL && sport == np->in_dport))) {
2605                         i = appr_check(ip, fin, nat);
2606                         if (i == -1) {
2607                                 RWLOCK_EXIT(&ipf_nat);
2608                                 return i;
2609                         }
2610                 }
2611
2612                 MUTEX_ENTER(&nat->nat_lock);
2613                 if (fin->fin_p != IPPROTO_TCP) {
2614                         if (np && np->in_age[0])
2615                                 nat->nat_age = np->in_age[0];
2616                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2617                                 nat->nat_age = fr_defnaticmpage;
2618                         else
2619                                 nat->nat_age = fr_defnatage;
2620                 }
2621                 nat->nat_bytes += ip->ip_len;
2622                 nat->nat_pkts++;
2623                 MUTEX_EXIT(&nat->nat_lock);
2624                 ip->ip_dst = nat->nat_inip;
2625                 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
2626
2627                 /*
2628                  * Fix up checksums, not by recalculating them, but
2629                  * simply computing adjustments.
2630                  */
2631 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
2632                 if (nat->nat_dir == NAT_OUTBOUND)
2633                         fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2634                 else
2635                         fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2636 #endif
2637                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2638
2639                         if ((nat->nat_inport != 0) && (tcp != NULL)) {
2640                                 tcp->th_dport = nat->nat_inport;
2641                                 fin->fin_data[1] = ntohs(tcp->th_dport);
2642                         }
2643
2644                         if (fin->fin_p == IPPROTO_TCP) {
2645                                 csump = &tcp->th_sum;
2646                                 MUTEX_ENTER(&nat->nat_lock);
2647                                 fr_tcp_age(&nat->nat_age,
2648                                            nat->nat_tcpstate, fin, 0);
2649                                 if (nat->nat_age < fr_defnaticmpage)
2650                                         nat->nat_age = fr_defnaticmpage;
2651 #ifdef LARGE_NAT
2652                                 else if (nat->nat_age > fr_defnatage)
2653                                         nat->nat_age = fr_defnatage;
2654 #endif
2655                                 /*
2656                                  * Increase this because we may have
2657                                  * "keep state" following this too and
2658                                  * packet storms can occur if this is
2659                                  * removed too quickly.
2660                                  */
2661                                 if (nat->nat_age == fr_tcpclosed)
2662                                         nat->nat_age = fr_tcplastack;
2663                                 MUTEX_EXIT(&nat->nat_lock);
2664                         } else if (fin->fin_p == IPPROTO_UDP) {
2665                                 udphdr_t *udp = (udphdr_t *)tcp;
2666
2667                                 if (udp->uh_sum)
2668                                         csump = &udp->uh_sum;
2669                         }
2670
2671                         if (csump) {
2672                                 if (nat->nat_dir == NAT_OUTBOUND)
2673                                         fix_incksum(fin, csump,
2674                                                     nat->nat_sumd[0]);
2675                                 else
2676                                         fix_outcksum(fin, csump,
2677                                                     nat->nat_sumd[0]);
2678                         }
2679                 }
2680                 ATOMIC_INCL(nat_stats.ns_mapped[0]);
2681                 RWLOCK_EXIT(&ipf_nat);                  /* READ */
2682                 return 1;
2683         }
2684         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2685         return 0;
2686 }
2687
2688
2689 /*
2690  * Free all memory used by NAT structures allocated at runtime.
2691  */
2692 void ip_natunload()
2693 {
2694         WRITE_ENTER(&ipf_nat);
2695         (void) nat_clearlist();
2696         (void) nat_flushtable();
2697         RWLOCK_EXIT(&ipf_nat);
2698
2699         if (nat_table[0] != NULL) {
2700                 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
2701                 nat_table[0] = NULL;
2702         }
2703         if (nat_table[1] != NULL) {
2704                 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
2705                 nat_table[1] = NULL;
2706         }
2707         if (nat_rules != NULL) {
2708                 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
2709                 nat_rules = NULL;
2710         }
2711         if (rdr_rules != NULL) {
2712                 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
2713                 rdr_rules = NULL;
2714         }
2715         if (maptable != NULL) {
2716                 KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
2717                 maptable = NULL;
2718         }
2719 }
2720
2721
2722 /*
2723  * Slowly expire held state for NAT entries.  Timeouts are set in
2724  * expectation of this being called twice per second.
2725  */
2726 void ip_natexpire()
2727 {
2728         register struct nat *nat, **natp;
2729 #if defined(_KERNEL) && !SOLARIS
2730         int s;
2731 #endif
2732
2733         SPL_NET(s);
2734         WRITE_ENTER(&ipf_nat);
2735         for (natp = &nat_instances; (nat = *natp); ) {
2736                 nat->nat_age--;
2737                 if (nat->nat_age) {
2738                         natp = &nat->nat_next;
2739                         continue;
2740                 }
2741                 *natp = nat->nat_next;
2742 #ifdef  IPFILTER_LOG
2743                 nat_log(nat, NL_EXPIRE);
2744 #endif
2745                 nat_delete(nat);
2746                 nat_stats.ns_expire++;
2747         }
2748         RWLOCK_EXIT(&ipf_nat);
2749         SPL_X(s);
2750 }
2751
2752
2753 /*
2754  */
2755 void ip_natsync(ifp)
2756 void *ifp;
2757 {
2758         register ipnat_t *n;
2759         register nat_t *nat;
2760         register u_32_t sum1, sum2, sumd;
2761         struct in_addr in;
2762         ipnat_t *np;
2763         void *ifp2;
2764 #if defined(_KERNEL) && !SOLARIS
2765         int s;
2766 #endif
2767
2768         /*
2769          * Change IP addresses for NAT sessions for any protocol except TCP
2770          * since it will break the TCP connection anyway.
2771          */
2772         SPL_NET(s);
2773         WRITE_ENTER(&ipf_nat);
2774         for (nat = nat_instances; nat; nat = nat->nat_next)
2775                 if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
2776                     !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
2777                     (np->in_outmsk == 0xffffffff) && !np->in_nip) {
2778                         ifp2 = nat->nat_ifp;
2779                         /*
2780                          * Change the map-to address to be the same as the
2781                          * new one.
2782                          */
2783                         sum1 = nat->nat_outip.s_addr;
2784                         if (fr_ifpaddr(4, ifp2, &in) != -1)
2785                                 nat->nat_outip = in;
2786                         sum2 = nat->nat_outip.s_addr;
2787
2788                         if (sum1 == sum2)
2789                                 continue;
2790                         /*
2791                          * Readjust the checksum adjustment to take into
2792                          * account the new IP#.
2793                          */
2794                         CALC_SUMD(sum1, sum2, sumd);
2795                         /* XXX - dont change for TCP when solaris does
2796                          * hardware checksumming.
2797                          */
2798                         sumd += nat->nat_sumd[0];
2799                         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2800                         nat->nat_sumd[1] = nat->nat_sumd[0];
2801                 }
2802
2803         for (n = nat_list; (n != NULL); n = n->in_next)
2804                 if (n->in_ifp == ifp) {
2805                         n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
2806                         if (!n->in_ifp)
2807                                 n->in_ifp = (void *)-1;
2808                 }
2809         RWLOCK_EXIT(&ipf_nat);
2810         SPL_X(s);
2811 }
2812
2813
2814 #ifdef  IPFILTER_LOG
2815 void nat_log(nat, type)
2816 struct nat *nat;
2817 u_int type;
2818 {
2819         struct ipnat *np;
2820         struct natlog natl;
2821         void *items[1];
2822         size_t sizes[1];
2823         int rulen, types[1];
2824
2825         natl.nl_inip = nat->nat_inip;
2826         natl.nl_outip = nat->nat_outip;
2827         natl.nl_origip = nat->nat_oip;
2828         natl.nl_bytes = nat->nat_bytes;
2829         natl.nl_pkts = nat->nat_pkts;
2830         natl.nl_origport = nat->nat_oport;
2831         natl.nl_inport = nat->nat_inport;
2832         natl.nl_outport = nat->nat_outport;
2833         natl.nl_p = nat->nat_p;
2834         natl.nl_type = type;
2835         natl.nl_rule = -1;
2836 #ifndef LARGE_NAT
2837         if (nat->nat_ptr != NULL) {
2838                 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
2839                         if (np == nat->nat_ptr) {
2840                                 natl.nl_rule = rulen;
2841                                 break;
2842                         }
2843         }
2844 #endif
2845         items[0] = &natl;
2846         sizes[0] = sizeof(natl);
2847         types[0] = 0;
2848
2849         (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
2850 }
2851 #endif
2852
2853
2854 #if defined(__OpenBSD__)
2855 void nat_ifdetach(ifp)
2856 void *ifp;
2857 {
2858         frsync();
2859         return;
2860 }
2861 #endif