]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/contrib/ipfilter/netinet/ip_nat.c
This commit was generated by cvs2svn to compensate for changes in r95504,
[FreeBSD/FreeBSD.git] / sys / contrib / ipfilter / netinet / ip_nat.c
1 /*
2  * Copyright (C) 1995-2001 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
7  */
8
9 #if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL)
10 #define _KERNEL
11 #endif
12
13 #ifdef __sgi
14 # include <sys/ptimers.h>
15 #endif
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
19 #include <sys/time.h>
20 #include <sys/file.h>
21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22     defined(_KERNEL)
23 # include "opt_ipfilter_log.h"
24 #endif
25 #if !defined(_KERNEL) && !defined(KERNEL)
26 # include <stdio.h>
27 # include <string.h>
28 # include <stdlib.h>
29 #endif
30 #if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
31 # include <sys/filio.h>
32 # include <sys/fcntl.h>
33 #else
34 # include <sys/ioctl.h>
35 #endif
36 #include <sys/fcntl.h>
37 #ifndef linux
38 # include <sys/protosw.h>
39 #endif
40 #include <sys/socket.h>
41 #if defined(_KERNEL) && !defined(linux)
42 # include <sys/systm.h>
43 #endif
44 #if !defined(__SVR4) && !defined(__svr4__)
45 # ifndef linux
46 #  include <sys/mbuf.h>
47 # endif
48 #else
49 # include <sys/filio.h>
50 # include <sys/byteorder.h>
51 # ifdef _KERNEL
52 #  include <sys/dditypes.h>
53 # endif
54 # include <sys/stream.h>
55 # include <sys/kmem.h>
56 #endif
57 #if __FreeBSD_version >= 300000
58 # include <sys/queue.h>
59 #endif
60 #include <net/if.h>
61 #if __FreeBSD_version >= 300000
62 # include <net/if_var.h>
63 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
64 #  include "opt_ipfilter.h"
65 # endif
66 #endif
67 #ifdef sun
68 # include <net/af.h>
69 #endif
70 #include <net/route.h>
71 #include <netinet/in.h>
72 #include <netinet/in_systm.h>
73 #include <netinet/ip.h>
74
75 #ifdef __sgi
76 # ifdef IFF_DRVRLOCK /* IRIX6 */
77 #include <sys/hashing.h>
78 #include <netinet/in_var.h>
79 # endif
80 #endif
81
82 #ifdef RFC1825
83 # include <vpn/md5.h>
84 # include <vpn/ipsec.h>
85 extern struct ifnet vpnif;
86 #endif
87
88 #ifndef linux
89 # include <netinet/ip_var.h>
90 # include <netinet/tcp_fsm.h>
91 #endif
92 #include <netinet/tcp.h>
93 #include <netinet/udp.h>
94 #include <netinet/ip_icmp.h>
95 #include "netinet/ip_compat.h"
96 #include <netinet/tcpip.h>
97 #include "netinet/ip_fil.h"
98 #include "netinet/ip_nat.h"
99 #include "netinet/ip_frag.h"
100 #include "netinet/ip_state.h"
101 #include "netinet/ip_proxy.h"
102 #if (__FreeBSD_version >= 300000)
103 # include <sys/malloc.h>
104 #endif
105 #ifndef MIN
106 # define        MIN(a,b)        (((a)<(b))?(a):(b))
107 #endif
108 #undef  SOCKADDR_IN
109 #define SOCKADDR_IN     struct sockaddr_in
110
111 #if !defined(lint)
112 static const char sccsid[] = "@(#)ip_nat.c      1.11 6/5/96 (C) 1995 Darren Reed";
113 /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.37.2.44 2001/07/21 07:17:22 darrenr Exp $"; */
114 static const char rcsid[] = "@(#)$FreeBSD$";
115 #endif
116
117 nat_t   **nat_table[2] = { NULL, NULL },
118         *nat_instances = NULL;
119 ipnat_t *nat_list = NULL;
120 u_int   ipf_nattable_sz = NAT_TABLE_SZ;
121 u_int   ipf_natrules_sz = NAT_SIZE;
122 u_int   ipf_rdrrules_sz = RDR_SIZE;
123 u_int   ipf_hostmap_sz = HOSTMAP_SIZE;
124 u_32_t  nat_masks = 0;
125 u_32_t  rdr_masks = 0;
126 ipnat_t **nat_rules = NULL;
127 ipnat_t **rdr_rules = NULL;
128 hostmap_t       **maptable  = NULL;
129
130 u_long  fr_defnatage = DEF_NAT_AGE,
131         fr_defnaticmpage = 6;           /* 3 seconds */
132 natstat_t nat_stats;
133 int     fr_nat_lock = 0;
134 #if     (SOLARIS || defined(__sgi)) && defined(_KERNEL)
135 extern  kmutex_t        ipf_rw;
136 extern  KRWLOCK_T       ipf_nat;
137 #endif
138
139 static  int     nat_flushtable __P((void));
140 static  void    nat_addnat __P((struct ipnat *));
141 static  void    nat_addrdr __P((struct ipnat *));
142 static  void    nat_delete __P((struct nat *));
143 static  void    nat_delrdr __P((struct ipnat *));
144 static  void    nat_delnat __P((struct ipnat *));
145 static  int     fr_natgetent __P((caddr_t));
146 static  int     fr_natgetsz __P((caddr_t));
147 static  int     fr_natputent __P((caddr_t));
148 static  void    nat_tabmove __P((fr_info_t *, nat_t *));
149 static  int     nat_match __P((fr_info_t *, ipnat_t *, ip_t *));
150 static  hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
151                                     struct in_addr));
152 static  void    nat_hostmapdel __P((struct hostmap *));
153
154
155 int nat_init()
156 {
157         KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
158         if (nat_table[0] != NULL)
159                 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
160         else
161                 return -1;
162
163         KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
164         if (nat_table[1] != NULL)
165                 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
166         else
167                 return -1;
168
169         KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
170         if (nat_rules != NULL)
171                 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
172         else
173                 return -1;
174
175         KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
176         if (rdr_rules != NULL)
177                 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
178         else
179                 return -1;
180
181         KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
182         if (maptable != NULL)
183                 bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
184         else
185                 return -1;
186         return 0;
187 }
188
189
190 static void nat_addrdr(n)
191 ipnat_t *n;
192 {
193         ipnat_t **np;
194         u_32_t j;
195         u_int hv;
196         int k;
197
198         k = countbits(n->in_outmsk);
199         if ((k >= 0) && (k != 32))
200                 rdr_masks |= 1 << k;
201         j = (n->in_outip & n->in_outmsk);
202         hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
203         np = rdr_rules + hv;
204         while (*np != NULL)
205                 np = &(*np)->in_rnext;
206         n->in_rnext = NULL;
207         n->in_prnext = np;
208         *np = n;
209 }
210
211
212 static void nat_addnat(n)
213 ipnat_t *n;
214 {
215         ipnat_t **np;
216         u_32_t j;
217         u_int hv;
218         int k;
219
220         k = countbits(n->in_inmsk);
221         if ((k >= 0) && (k != 32))
222                 nat_masks |= 1 << k;
223         j = (n->in_inip & n->in_inmsk);
224         hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
225         np = nat_rules + hv;
226         while (*np != NULL)
227                 np = &(*np)->in_mnext;
228         n->in_mnext = NULL;
229         n->in_pmnext = np;
230         *np = n;
231 }
232
233
234 static void nat_delrdr(n)
235 ipnat_t *n;
236 {
237         if (n->in_rnext)
238                 n->in_rnext->in_prnext = n->in_prnext;
239         *n->in_prnext = n->in_rnext;
240 }
241
242
243 static void nat_delnat(n)
244 ipnat_t *n;
245 {
246         if (n->in_mnext)
247                 n->in_mnext->in_pmnext = n->in_pmnext;
248         *n->in_pmnext = n->in_mnext;
249 }
250
251
252 /*
253  * check if an ip address has already been allocated for a given mapping that
254  * is not doing port based translation.
255  *
256  * Must be called with ipf_nat held as a write lock.
257  */
258 static struct hostmap *nat_hostmap(np, real, map)
259 ipnat_t *np;
260 struct in_addr real;
261 struct in_addr map;
262 {
263         hostmap_t *hm;
264         u_int hv;
265
266         hv = real.s_addr % HOSTMAP_SIZE;
267         for (hm = maptable[hv]; hm; hm = hm->hm_next)
268                 if ((hm->hm_realip.s_addr == real.s_addr) &&
269                     (np == hm->hm_ipnat)) {
270                         hm->hm_ref++;
271                         return hm;
272                 }
273
274         KMALLOC(hm, hostmap_t *);
275         if (hm) {
276                 hm->hm_next = maptable[hv];
277                 hm->hm_pnext = maptable + hv;
278                 if (maptable[hv])
279                         maptable[hv]->hm_pnext = &hm->hm_next;
280                 maptable[hv] = hm;
281                 hm->hm_ipnat = np;
282                 hm->hm_realip = real;
283                 hm->hm_mapip = map;
284                 hm->hm_ref = 1;
285         }
286         return hm;
287 }
288
289
290 /*
291  * Must be called with ipf_nat held as a write lock.
292  */
293 static void nat_hostmapdel(hm)
294 struct hostmap *hm;
295 {
296         ATOMIC_DEC32(hm->hm_ref);
297         if (hm->hm_ref == 0) {
298                 if (hm->hm_next)
299                         hm->hm_next->hm_pnext = hm->hm_pnext;
300                 *hm->hm_pnext = hm->hm_next;
301                 KFREE(hm);
302         }
303 }
304
305
306 void fix_outcksum(fin, sp, n)
307 fr_info_t *fin;
308 u_short *sp;
309 u_32_t n;
310 {
311         register u_short sumshort;
312         register u_32_t sum1;
313
314         if (!n)
315                 return;
316         else if (n & NAT_HW_CKSUM) {
317                 n &= 0xffff;
318                 n += fin->fin_dlen;
319                 n = (n & 0xffff) + (n >> 16);
320                 *sp = n & 0xffff;
321                 return;
322         }
323         sum1 = (~ntohs(*sp)) & 0xffff;
324         sum1 += (n);
325         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
326         /* Again */
327         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
328         sumshort = ~(u_short)sum1;
329         *(sp) = htons(sumshort);
330 }
331
332
333 void fix_incksum(fin, sp, n)
334 fr_info_t *fin;
335 u_short *sp;
336 u_32_t n;
337 {
338         register u_short sumshort;
339         register u_32_t sum1;
340
341         if (!n)
342                 return;
343         else if (n & NAT_HW_CKSUM) {
344                 n &= 0xffff;
345                 n += fin->fin_dlen;
346                 n = (n & 0xffff) + (n >> 16);
347                 *sp = n & 0xffff;
348                 return;
349         }
350 #ifdef sparc
351         sum1 = (~(*sp)) & 0xffff;
352 #else
353         sum1 = (~ntohs(*sp)) & 0xffff;
354 #endif
355         sum1 += ~(n) & 0xffff;
356         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
357         /* Again */
358         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
359         sumshort = ~(u_short)sum1;
360         *(sp) = htons(sumshort);
361 }
362
363
364 /*
365  * fix_datacksum is used *only* for the adjustments of checksums in the data
366  * section of an IP packet.
367  *
368  * The only situation in which you need to do this is when NAT'ing an 
369  * ICMP error message. Such a message, contains in its body the IP header
370  * of the original IP packet, that causes the error.
371  *
372  * You can't use fix_incksum or fix_outcksum in that case, because for the
373  * kernel the data section of the ICMP error is just data, and no special 
374  * processing like hardware cksum or ntohs processing have been done by the 
375  * kernel on the data section.
376  */
377 void fix_datacksum(sp, n)
378 u_short *sp;
379 u_32_t n;
380 {
381         register u_short sumshort;
382         register u_32_t sum1;
383
384         if (!n)
385                 return;
386
387         sum1 = (~ntohs(*sp)) & 0xffff;
388         sum1 += (n);
389         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
390         /* Again */
391         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
392         sumshort = ~(u_short)sum1;
393         *(sp) = htons(sumshort);
394 }
395
396 /*
397  * How the NAT is organised and works.
398  *
399  * Inside (interface y) NAT       Outside (interface x)
400  * -------------------- -+- -------------------------------------
401  * Packet going          |   out, processsed by ip_natout() for x
402  * ------------>         |   ------------>
403  * src=10.1.1.1          |   src=192.1.1.1
404  *                       |
405  *                       |   in, processed by ip_natin() for x
406  * <------------         |   <------------
407  * dst=10.1.1.1          |   dst=192.1.1.1
408  * -------------------- -+- -------------------------------------
409  * ip_natout() - changes ip_src and if required, sport
410  *             - creates a new mapping, if required.
411  * ip_natin()  - changes ip_dst and if required, dport
412  *
413  * In the NAT table, internal source is recorded as "in" and externally
414  * seen as "out".
415  */
416
417 /*
418  * Handle ioctls which manipulate the NAT.
419  */
420 int nat_ioctl(data, cmd, mode)
421 #if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
422 u_long cmd;
423 #else
424 int cmd;
425 #endif
426 caddr_t data;
427 int mode;
428 {
429         register ipnat_t *nat, *nt, *n = NULL, **np = NULL;
430         int error = 0, ret, arg, getlock;
431         ipnat_t natd;
432         u_32_t i, j;
433
434 #if (BSD >= 199306) && defined(_KERNEL)
435         if ((securelevel >= 3) && (mode & FWRITE))
436                 return EPERM;
437 #endif
438
439         nat = NULL;     /* XXX gcc -Wuninitialized */
440         KMALLOC(nt, ipnat_t *);
441         getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
442         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
443                 if (mode & NAT_SYSSPACE) {
444                         bcopy(data, (char *)&natd, sizeof(natd));
445                         error = 0;
446                 } else {
447                         error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
448                 }
449         } else if (cmd == SIOCIPFFL) {  /* SIOCFLNAT & SIOCCNATL */
450                 error = IRCOPY(data, (char *)&arg, sizeof(arg));
451                 if (error)
452                         error = EFAULT;
453         }
454
455         if (error)
456                 goto done;
457
458         /*
459          * For add/delete, look to see if the NAT entry is already present
460          */
461         if (getlock == 1)
462                 WRITE_ENTER(&ipf_nat);
463         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
464                 nat = &natd;
465                 nat->in_flags &= IPN_USERFLAGS;
466                 if ((nat->in_redir & NAT_MAPBLK) == 0) {
467                         if ((nat->in_flags & IPN_SPLIT) == 0)
468                                 nat->in_inip &= nat->in_inmsk;
469                         if ((nat->in_flags & IPN_IPRANGE) == 0)
470                                 nat->in_outip &= nat->in_outmsk;
471                 }
472                 for (np = &nat_list; (n = *np); np = &n->in_next)
473                         if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
474                                         IPN_CMPSIZ))
475                                 break;
476         }
477
478         switch (cmd)
479         {
480 #ifdef  IPFILTER_LOG
481         case SIOCIPFFB :
482         {
483                 int tmp;
484
485                 if (!(mode & FWRITE))
486                         error = EPERM;
487                 else {
488                         tmp = ipflog_clear(IPL_LOGNAT);
489                         IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
490                 }
491                 break;
492         }
493 #endif
494         case SIOCADNAT :
495                 if (!(mode & FWRITE)) {
496                         error = EPERM;
497                         break;
498                 }
499                 if (n) {
500                         error = EEXIST;
501                         break;
502                 }
503                 if (nt == NULL) {
504                         error = ENOMEM;
505                         break;
506                 }
507                 n = nt;
508                 nt = NULL;
509                 bcopy((char *)nat, (char *)n, sizeof(*n));
510                 n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
511                 if (!n->in_ifp)
512                         n->in_ifp = (void *)-1;
513                 if (n->in_plabel[0] != '\0') {
514                         n->in_apr = appr_lookup(n->in_p, n->in_plabel);
515                         if (!n->in_apr) {
516                                 error = ENOENT;
517                                 break;
518                         }
519                 }
520                 n->in_next = NULL;
521                 *np = n;
522
523                 if (n->in_redir & NAT_REDIRECT) {
524                         n->in_flags &= ~IPN_NOTDST;
525                         nat_addrdr(n);
526                 }
527                 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
528                         n->in_flags &= ~IPN_NOTSRC;
529                         nat_addnat(n);
530                 }
531
532                 n->in_use = 0;
533                 if (n->in_redir & NAT_MAPBLK)
534                         n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
535                 else if (n->in_flags & IPN_AUTOPORTMAP)
536                         n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
537                 else if (n->in_flags & IPN_IPRANGE)
538                         n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
539                 else if (n->in_flags & IPN_SPLIT)
540                         n->in_space = 2;
541                 else
542                         n->in_space = ~ntohl(n->in_outmsk);
543                 /*
544                  * Calculate the number of valid IP addresses in the output
545                  * mapping range.  In all cases, the range is inclusive of
546                  * the start and ending IP addresses.
547                  * If to a CIDR address, lose 2: broadcast + network address
548                  *                               (so subtract 1)
549                  * If to a range, add one.
550                  * If to a single IP address, set to 1.
551                  */
552                 if (n->in_space) {
553                         if ((n->in_flags & IPN_IPRANGE) != 0)
554                                 n->in_space += 1;
555                         else
556                                 n->in_space -= 1;
557                 } else
558                         n->in_space = 1;
559                 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
560                     ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
561                         n->in_nip = ntohl(n->in_outip) + 1;
562                 else if ((n->in_flags & IPN_SPLIT) &&
563                          (n->in_redir & NAT_REDIRECT))
564                         n->in_nip = ntohl(n->in_inip);
565                 else
566                         n->in_nip = ntohl(n->in_outip);
567                 if (n->in_redir & NAT_MAP) {
568                         n->in_pnext = ntohs(n->in_pmin);
569                         /*
570                          * Multiply by the number of ports made available.
571                          */
572                         if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
573                                 n->in_space *= (ntohs(n->in_pmax) -
574                                                 ntohs(n->in_pmin) + 1);
575                                 /*
576                                  * Because two different sources can map to
577                                  * different destinations but use the same
578                                  * local IP#/port #.
579                                  * If the result is smaller than in_space, then
580                                  * we may have wrapped around 32bits.
581                                  */
582                                 i = n->in_inmsk;
583                                 if ((i != 0) && (i != 0xffffffff)) {
584                                         j = n->in_space * (~ntohl(i) + 1);
585                                         if (j >= n->in_space)
586                                                 n->in_space = j;
587                                         else
588                                                 n->in_space = 0xffffffff;
589                                 }
590                         }
591                         /*
592                          * If no protocol is specified, multiple by 256.
593                          */
594                         if ((n->in_flags & IPN_TCPUDP) == 0) {
595                                         j = n->in_space * 256;
596                                         if (j >= n->in_space)
597                                                 n->in_space = j;
598                                         else
599                                                 n->in_space = 0xffffffff;
600                         }
601                 }
602                 /* Otherwise, these fields are preset */
603                 n = NULL;
604                 nat_stats.ns_rules++;
605                 break;
606         case SIOCRMNAT :
607                 if (!(mode & FWRITE)) {
608                         error = EPERM;
609                         n = NULL;
610                         break;
611                 }
612                 if (!n) {
613                         error = ESRCH;
614                         break;
615                 }
616                 if (n->in_redir & NAT_REDIRECT)
617                         nat_delrdr(n);
618                 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
619                         nat_delnat(n);
620                 if (nat_list == NULL) {
621                         nat_masks = 0;
622                         rdr_masks = 0;
623                 }
624                 *np = n->in_next;
625                 if (!n->in_use) {
626                         if (n->in_apr)
627                                 appr_free(n->in_apr);
628                         KFREE(n);
629                         nat_stats.ns_rules--;
630                 } else {
631                         n->in_flags |= IPN_DELETE;
632                         n->in_next = NULL;
633                 }
634                 n = NULL;
635                 break;
636         case SIOCGNATS :
637                 MUTEX_DOWNGRADE(&ipf_nat);
638                 nat_stats.ns_table[0] = nat_table[0];
639                 nat_stats.ns_table[1] = nat_table[1];
640                 nat_stats.ns_list = nat_list;
641                 nat_stats.ns_maptable = maptable;
642                 nat_stats.ns_nattab_sz = ipf_nattable_sz;
643                 nat_stats.ns_rultab_sz = ipf_natrules_sz;
644                 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
645                 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
646                 nat_stats.ns_instances = nat_instances;
647                 nat_stats.ns_apslist = ap_sess_list;
648                 error = IWCOPYPTR((char *)&nat_stats, (char *)data,
649                                   sizeof(nat_stats));
650                 break;
651         case SIOCGNATL :
652             {
653                 natlookup_t nl;
654
655                 MUTEX_DOWNGRADE(&ipf_nat);
656                 error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
657                 if (error)
658                         break;
659
660                 if (nat_lookupredir(&nl)) {
661                         error = IWCOPYPTR((char *)&nl, (char *)data,
662                                           sizeof(nl));
663                 } else
664                         error = ESRCH;
665                 break;
666             }
667         case SIOCIPFFL :        /* old SIOCFLNAT & SIOCCNATL */
668                 if (!(mode & FWRITE)) {
669                         error = EPERM;
670                         break;
671                 }
672                 error = 0;
673                 if (arg == 0)
674                         ret = nat_flushtable();
675                 else if (arg == 1)
676                         ret = nat_clearlist();
677                 else
678                         error = EINVAL;
679                 MUTEX_DOWNGRADE(&ipf_nat);
680                 if (!error) {
681                         error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
682                         if (error)
683                                 error = EFAULT;
684                 }
685                 break;
686         case SIOCSTLCK :
687                 error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
688                 if (!error) {
689                         error = IWCOPY((caddr_t)&fr_nat_lock, data,
690                                         sizeof(fr_nat_lock));
691                         if (!error)
692                                 fr_nat_lock = arg;
693                 } else
694                         error = EFAULT;
695                 break;
696         case SIOCSTPUT :
697                 if (fr_nat_lock)
698                         error = fr_natputent(data);
699                 else
700                         error = EACCES;
701                 break;
702         case SIOCSTGSZ :
703                 if (fr_nat_lock)
704                         error = fr_natgetsz(data);
705                 else
706                         error = EACCES;
707                 break;
708         case SIOCSTGET :
709                 if (fr_nat_lock)
710                         error = fr_natgetent(data);
711                 else
712                         error = EACCES;
713                 break;
714         case FIONREAD :
715 #ifdef  IPFILTER_LOG
716                 arg = (int)iplused[IPL_LOGNAT];
717                 MUTEX_DOWNGRADE(&ipf_nat);
718                 error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
719                 if (error)
720                         error = EFAULT;
721 #endif
722                 break;
723         default :
724                 error = EINVAL;
725                 break;
726         }
727         if (getlock == 1)
728                 RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
729 done:
730         if (nt)
731                 KFREE(nt);
732         return error;
733 }
734
735
736 static int fr_natgetsz(data)
737 caddr_t data;
738 {
739         ap_session_t *aps;
740         nat_t *nat, *n;
741         int error = 0;
742         natget_t ng;
743
744         error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
745         if (error)
746                 return EFAULT;
747
748         nat = ng.ng_ptr;
749         if (!nat) {
750                 nat = nat_instances;
751                 ng.ng_sz = 0;
752                 if (nat == NULL) {
753                         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
754                         if (error)
755                                 error = EFAULT;
756                         return error;
757                 }
758         } else {
759                 /*
760                  * Make sure the pointer we're copying from exists in the
761                  * current list of entries.  Security precaution to prevent
762                  * copying of random kernel data.
763                  */
764                 for (n = nat_instances; n; n = n->nat_next)
765                         if (n == nat)
766                                 break;
767                 if (!n)
768                         return ESRCH;
769         }
770
771         ng.ng_sz = sizeof(nat_save_t);
772         aps = nat->nat_aps;
773         if ((aps != NULL) && (aps->aps_data != 0)) {
774                 ng.ng_sz += sizeof(ap_session_t);
775                 ng.ng_sz += aps->aps_psiz;
776         }
777
778         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
779         if (error)
780                 error = EFAULT;
781         return error;
782 }
783
784
785 static int fr_natgetent(data)
786 caddr_t data;
787 {
788         nat_save_t ipn, *ipnp, *ipnn = NULL;
789         register nat_t *n, *nat;
790         ap_session_t *aps;
791         int error;
792
793         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
794         if (error)
795                 return EFAULT;
796         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
797         if (error)
798                 return EFAULT;
799
800         nat = ipn.ipn_next;
801         if (!nat) {
802                 nat = nat_instances;
803                 if (nat == NULL) {
804                         if (nat_instances == NULL)
805                                 return ENOENT;
806                         return 0;
807                 }
808         } else {
809                 /*
810                  * Make sure the pointer we're copying from exists in the
811                  * current list of entries.  Security precaution to prevent
812                  * copying of random kernel data.
813                  */
814                 for (n = nat_instances; n; n = n->nat_next)
815                         if (n == nat)
816                                 break;
817                 if (!n)
818                         return ESRCH;
819         }
820
821         ipn.ipn_next = nat->nat_next;
822         ipn.ipn_dsize = 0;
823         bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
824         ipn.ipn_nat.nat_data = NULL;
825
826         if (nat->nat_ptr) {
827                 bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
828                       sizeof(ipn.ipn_ipnat));
829         }
830
831         if (nat->nat_fr)
832                 bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
833                       sizeof(ipn.ipn_rule));
834
835         if ((aps = nat->nat_aps)) {
836                 ipn.ipn_dsize = sizeof(*aps);
837                 if (aps->aps_data)
838                         ipn.ipn_dsize += aps->aps_psiz;
839                 KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
840                 if (ipnn == NULL)
841                         return ENOMEM;
842                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
843
844                 bcopy((char *)aps, (char *)ipnn->ipn_data, sizeof(*aps));
845                 if (aps->aps_data) {
846                         bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps),
847                               aps->aps_psiz);
848                         ipnn->ipn_dsize += aps->aps_psiz;
849                 }
850                 error = IWCOPY((caddr_t)ipnn, ipnp,
851                                sizeof(ipn) + ipn.ipn_dsize);
852                 if (error)
853                         error = EFAULT;
854                 KFREES(ipnn, sizeof(*ipnn) + ipn.ipn_dsize);
855         } else {
856                 error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
857                 if (error)
858                         error = EFAULT;
859         }
860         return error;
861 }
862
863
864 static int fr_natputent(data)
865 caddr_t data;
866 {
867         nat_save_t ipn, *ipnp, *ipnn = NULL;
868         register nat_t *n, *nat;
869         ap_session_t *aps;
870         frentry_t *fr;
871         ipnat_t *in;
872
873         int error;
874
875         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
876         if (error)
877                 return EFAULT;
878         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
879         if (error)
880                 return EFAULT;
881         nat = NULL;
882         if (ipn.ipn_dsize) {
883                 KMALLOCS(ipnn, nat_save_t *, sizeof(ipn) + ipn.ipn_dsize);
884                 if (ipnn == NULL)
885                         return ENOMEM;
886                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
887                 error = IRCOPY((caddr_t)ipnp, (caddr_t)ipn.ipn_data,
888                                ipn.ipn_dsize);
889                 if (error) {
890                         error = EFAULT;
891                         goto junkput;
892                 }
893         } else
894                 ipnn = NULL;
895
896         KMALLOC(nat, nat_t *);
897         if (nat == NULL) {
898                 error = EFAULT;
899                 goto junkput;
900         }
901
902         bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
903         /*
904          * Initialize all these so that nat_delete() doesn't cause a crash.
905          */
906         nat->nat_phnext[0] = NULL;
907         nat->nat_phnext[1] = NULL;
908         fr = nat->nat_fr;
909         nat->nat_fr = NULL;
910         aps = nat->nat_aps;
911         nat->nat_aps = NULL;
912         in = nat->nat_ptr;
913         nat->nat_ptr = NULL;
914         nat->nat_hm = NULL;
915         nat->nat_data = NULL;
916         nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
917
918         /*
919          * Restore the rule associated with this nat session
920          */
921         if (in) {
922                 KMALLOC(in, ipnat_t *);
923                 if (in == NULL) {
924                         error = ENOMEM;
925                         goto junkput;
926                 }
927                 nat->nat_ptr = in;
928                 bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
929                 in->in_use = 1;
930                 in->in_flags |= IPN_DELETE;
931                 in->in_next = NULL;
932                 in->in_rnext = NULL;
933                 in->in_prnext = NULL;
934                 in->in_mnext = NULL;
935                 in->in_pmnext = NULL;
936                 in->in_ifp = GETUNIT(in->in_ifname, 4);
937                 if (in->in_plabel[0] != '\0') {
938                         in->in_apr = appr_lookup(in->in_p, in->in_plabel);
939                 }
940         }
941
942         /*
943          * Restore ap_session_t structure.  Include the private data allocated
944          * if it was there.
945          */
946         if (aps) {
947                 KMALLOC(aps, ap_session_t *);
948                 if (aps == NULL) {
949                         error = ENOMEM;
950                         goto junkput;
951                 }
952                 nat->nat_aps = aps;
953                 aps->aps_next = ap_sess_list;
954                 ap_sess_list = aps;
955                 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
956                 if (in)
957                         aps->aps_apr = in->in_apr;
958                 if (aps->aps_psiz) {
959                         KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
960                         if (aps->aps_data == NULL) {
961                                 error = ENOMEM;
962                                 goto junkput;
963                         }
964                         bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
965                               aps->aps_psiz);
966                 } else {
967                         aps->aps_psiz = 0;
968                         aps->aps_data = NULL;
969                 }
970         }
971
972         /*
973          * If there was a filtering rule associated with this entry then
974          * build up a new one.
975          */
976         if (fr != NULL) {
977                 if (nat->nat_flags & FI_NEWFR) {
978                         KMALLOC(fr, frentry_t *);
979                         nat->nat_fr = fr;
980                         if (fr == NULL) {
981                                 error = ENOMEM;
982                                 goto junkput;
983                         }
984                         bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
985                         ipn.ipn_nat.nat_fr = fr;
986                         error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
987                         if (error) {
988                                 error = EFAULT;
989                                 goto junkput;
990                         }
991                 } else {
992                         for (n = nat_instances; n; n = n->nat_next)
993                                 if (n->nat_fr == fr)
994                                         break;
995                         if (!n) {
996                                 error = ESRCH;
997                                 goto junkput;
998                         }
999                 }
1000         }
1001
1002         if (ipnn)
1003                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1004         nat_insert(nat);
1005         return 0;
1006 junkput:
1007         if (ipnn)
1008                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1009         if (nat)
1010                 nat_delete(nat);
1011         return error;
1012 }
1013
1014
1015 /*
1016  * Delete a nat entry from the various lists and table.
1017  */
1018 static void nat_delete(natd)
1019 struct nat *natd;
1020 {
1021         struct ipnat *ipn;
1022
1023         if (natd->nat_flags & FI_WILDP)
1024                 nat_stats.ns_wilds--;
1025         if (natd->nat_hnext[0])
1026                 natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0];
1027         *natd->nat_phnext[0] = natd->nat_hnext[0];
1028         if (natd->nat_hnext[1])
1029                 natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1];
1030         *natd->nat_phnext[1] = natd->nat_hnext[1];
1031         if (natd->nat_me != NULL)
1032                 *natd->nat_me = NULL;
1033
1034         if (natd->nat_fr != NULL) {
1035                 ATOMIC_DEC32(natd->nat_fr->fr_ref);
1036         }
1037
1038         if (natd->nat_hm != NULL)
1039                 nat_hostmapdel(natd->nat_hm);
1040
1041         /*
1042          * If there is an active reference from the nat entry to its parent
1043          * rule, decrement the rule's reference count and free it too if no
1044          * longer being used.
1045          */
1046         ipn = natd->nat_ptr;
1047         if (ipn != NULL) {
1048                 ipn->in_space++;
1049                 ipn->in_use--;
1050                 if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
1051                         if (ipn->in_apr)
1052                                 appr_free(ipn->in_apr);
1053                         KFREE(ipn);
1054                         nat_stats.ns_rules--;
1055                 }
1056         }
1057
1058         MUTEX_DESTROY(&natd->nat_lock);
1059         /*
1060          * If there's a fragment table entry too for this nat entry, then
1061          * dereference that as well.
1062          */
1063         ipfr_forget((void *)natd);
1064         aps_free(natd->nat_aps);
1065         nat_stats.ns_inuse--;
1066         KFREE(natd);
1067 }
1068
1069
1070 /*
1071  * nat_flushtable - clear the NAT table of all mapping entries.
1072  * (this is for the dynamic mappings)
1073  */
1074 static int nat_flushtable()
1075 {
1076         register nat_t *nat, **natp;
1077         register int j = 0;
1078
1079         /*
1080          * ALL NAT mappings deleted, so lets just make the deletions
1081          * quicker.
1082          */
1083         if (nat_table[0] != NULL)
1084                 bzero((char *)nat_table[0],
1085                       sizeof(nat_table[0]) * ipf_nattable_sz);
1086         if (nat_table[1] != NULL)
1087                 bzero((char *)nat_table[1],
1088                       sizeof(nat_table[1]) * ipf_nattable_sz);
1089
1090         for (natp = &nat_instances; (nat = *natp); ) {
1091                 *natp = nat->nat_next;
1092 #ifdef  IPFILTER_LOG
1093                 nat_log(nat, NL_FLUSH);
1094 #endif
1095                 nat_delete(nat);
1096                 j++;
1097         }
1098         nat_stats.ns_inuse = 0;
1099         return j;
1100 }
1101
1102
1103 /*
1104  * nat_clearlist - delete all rules in the active NAT mapping list.
1105  * (this is for NAT/RDR rules)
1106  */
1107 int nat_clearlist()
1108 {
1109         register ipnat_t *n, **np = &nat_list;
1110         int i = 0;
1111
1112         if (nat_rules != NULL)
1113                 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1114         if (rdr_rules != NULL)
1115                 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1116
1117         while ((n = *np)) {
1118                 *np = n->in_next;
1119                 if (!n->in_use) {
1120                         if (n->in_apr)
1121                                 appr_free(n->in_apr);
1122                         KFREE(n);
1123                         nat_stats.ns_rules--;
1124                 } else {
1125                         n->in_flags |= IPN_DELETE;
1126                         n->in_next = NULL;
1127                 }
1128                 i++;
1129         }
1130         nat_masks = 0;
1131         rdr_masks = 0;
1132         return i;
1133 }
1134
1135
1136 /*
1137  * Create a new NAT table entry.
1138  * NOTE: Assumes write lock on ipf_nat has been obtained already.
1139  *       If you intend on changing this, beware: appr_new() may call nat_new()
1140  *       recursively!
1141  */
1142 nat_t *nat_new(fin, ip, np, natsave, flags, direction)
1143 fr_info_t *fin;
1144 ip_t *ip;
1145 ipnat_t *np;
1146 nat_t **natsave;
1147 u_int flags;
1148 int direction;
1149 {
1150         register u_32_t sum1, sum2, sumd, l;
1151         u_short port = 0, sport = 0, dport = 0, nport = 0;
1152         struct in_addr in, inb;
1153         u_short nflags, sp, dp;
1154         tcphdr_t *tcp = NULL;
1155         hostmap_t *hm = NULL;
1156         nat_t *nat, *natl;
1157 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1158         qif_t *qf = fin->fin_qif;
1159 #endif
1160
1161         nflags = flags & np->in_flags;
1162         if (flags & IPN_TCPUDP) {
1163                 tcp = (tcphdr_t *)fin->fin_dp;
1164                 sport = htons(fin->fin_data[0]);
1165                 dport = htons(fin->fin_data[1]);
1166         }
1167
1168         /* Give me a new nat */
1169         KMALLOC(nat, nat_t *);
1170         if (nat == NULL) {
1171                 nat_stats.ns_memfail++;
1172                 return NULL;
1173         }
1174
1175         bzero((char *)nat, sizeof(*nat));
1176         nat->nat_flags = flags;
1177         if (flags & FI_WILDP)
1178                 nat_stats.ns_wilds++;
1179         /*
1180          * Search the current table for a match.
1181          */
1182         if (direction == NAT_OUTBOUND) {
1183                 /*
1184                  * Values at which the search for a free resouce starts.
1185                  */
1186                 u_32_t st_ip;
1187                 u_short st_port;
1188
1189                 /*
1190                  * If it's an outbound packet which doesn't match any existing
1191                  * record, then create a new port
1192                  */
1193                 l = 0;
1194                 st_ip = np->in_nip;
1195                 st_port = np->in_pnext;
1196
1197                 do {
1198                         port = 0;
1199                         in.s_addr = htonl(np->in_nip);
1200                         if (l == 0) {
1201                                 /*
1202                                  * Check to see if there is an existing NAT
1203                                  * setup for this IP address pair.
1204                                  */
1205                                 hm = nat_hostmap(np, fin->fin_src, in);
1206                                 if (hm != NULL)
1207                                         in.s_addr = hm->hm_mapip.s_addr;
1208                         } else if ((l == 1) && (hm != NULL)) {
1209                                 nat_hostmapdel(hm);
1210                                 hm = NULL;
1211                         }
1212                         in.s_addr = ntohl(in.s_addr);
1213
1214                         nat->nat_hm = hm;
1215
1216                         if ((np->in_outmsk == 0xffffffff) &&
1217                             (np->in_pnext == 0)) {
1218                                 if (l > 0)
1219                                         goto badnat;
1220                         }
1221
1222                         if (np->in_redir & NAT_MAPBLK) {
1223                                 if ((l >= np->in_ppip) || ((l > 0) &&
1224                                      !(flags & IPN_TCPUDP)))
1225                                         goto badnat;
1226                                 /*
1227                                  * map-block - Calculate destination address.
1228                                  */
1229                                 in.s_addr = ntohl(fin->fin_saddr);
1230                                 in.s_addr &= ntohl(~np->in_inmsk);
1231                                 inb.s_addr = in.s_addr;
1232                                 in.s_addr /= np->in_ippip;
1233                                 in.s_addr &= ntohl(~np->in_outmsk);
1234                                 in.s_addr += ntohl(np->in_outip);
1235                                 /*
1236                                  * Calculate destination port.
1237                                  */
1238                                 if ((flags & IPN_TCPUDP) &&
1239                                     (np->in_ppip != 0)) {
1240                                         port = ntohs(sport) + l;
1241                                         port %= np->in_ppip;
1242                                         port += np->in_ppip *
1243                                                 (inb.s_addr % np->in_ippip);
1244                                         port += MAPBLK_MINPORT;
1245                                         port = htons(port);
1246                                 }
1247                         } else if (!np->in_outip &&
1248                                    (np->in_outmsk == 0xffffffff)) {
1249                                 /*
1250                                  * 0/32 - use the interface's IP address.
1251                                  */
1252                                 if ((l > 0) ||
1253                                     fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
1254                                         goto badnat;
1255                                 in.s_addr = ntohl(in.s_addr);
1256                         } else if (!np->in_outip && !np->in_outmsk) {
1257                                 /*
1258                                  * 0/0 - use the original source address/port.
1259                                  */
1260                                 if (l > 0)
1261                                         goto badnat;
1262                                 in.s_addr = ntohl(fin->fin_saddr);
1263                         } else if ((np->in_outmsk != 0xffffffff) &&
1264                                    (np->in_pnext == 0) &&
1265                                    ((l > 0) || (hm == NULL)))
1266                                 np->in_nip++;
1267                         natl = NULL;
1268
1269                         if ((nflags & IPN_TCPUDP) &&
1270                             ((np->in_redir & NAT_MAPBLK) == 0) &&
1271                             (np->in_flags & IPN_AUTOPORTMAP)) {
1272                                 if ((l > 0) && (l % np->in_ppip == 0)) {
1273                                         if (l > np->in_space) {
1274                                                 goto badnat;
1275                                         } else if ((l > np->in_ppip) &&
1276                                                    np->in_outmsk != 0xffffffff)
1277                                                 np->in_nip++;
1278                                 }
1279                                 if (np->in_ppip != 0) {
1280                                         port = ntohs(sport);
1281                                         port += (l % np->in_ppip);
1282                                         port %= np->in_ppip;
1283                                         port += np->in_ppip *
1284                                                 (ntohl(fin->fin_saddr) %
1285                                                  np->in_ippip);
1286                                         port += MAPBLK_MINPORT;
1287                                         port = htons(port);
1288                                 }
1289                         } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1290                                    (nflags & IPN_TCPUDP) &&
1291                                    (np->in_pnext != 0)) {
1292                                 port = htons(np->in_pnext++);
1293                                 if (np->in_pnext > ntohs(np->in_pmax)) {
1294                                         np->in_pnext = ntohs(np->in_pmin);
1295                                         if (np->in_outmsk != 0xffffffff)
1296                                                 np->in_nip++;
1297                                 }
1298                         }
1299
1300                         if (np->in_flags & IPN_IPRANGE) {
1301                                 if (np->in_nip > ntohl(np->in_outmsk))
1302                                         np->in_nip = ntohl(np->in_outip);
1303                         } else {
1304                                 if ((np->in_outmsk != 0xffffffff) &&
1305                                     ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1306                                     ntohl(np->in_outip))
1307                                         np->in_nip = ntohl(np->in_outip) + 1;
1308                         }
1309
1310                         if (!port && (flags & IPN_TCPUDP))
1311                                 port = sport;
1312
1313                         /*
1314                          * Here we do a lookup of the connection as seen from
1315                          * the outside.  If an IP# pair already exists, try
1316                          * again.  So if you have A->B becomes C->B, you can
1317                          * also have D->E become C->E but not D->B causing
1318                          * another C->B.  Also take protocol and ports into
1319                          * account when determining whether a pre-existing
1320                          * NAT setup will cause an external conflict where
1321                          * this is appropriate.
1322                          */
1323                         inb.s_addr = htonl(in.s_addr);
1324                         sp = fin->fin_data[0];
1325                         dp = fin->fin_data[1];
1326                         fin->fin_data[0] = fin->fin_data[1];
1327                         fin->fin_data[1] = htons(port);
1328                         natl = nat_inlookup(fin, flags & ~FI_WILDP,
1329                                             (u_int)fin->fin_p, fin->fin_dst,
1330                                             inb, 1);
1331                         fin->fin_data[0] = sp;
1332                         fin->fin_data[1] = dp;
1333
1334                         /*
1335                          * Has the search wrapped around and come back to the
1336                          * start ?
1337                          */
1338                         if ((natl != NULL) &&
1339                             (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1340                             (np->in_nip != 0) && (st_ip == np->in_nip))
1341                                 goto badnat;
1342                         l++;
1343                 } while (natl != NULL);
1344
1345                 if (np->in_space > 0)
1346                         np->in_space--;
1347
1348                 /* Setup the NAT table */
1349                 nat->nat_inip = fin->fin_src;
1350                 nat->nat_outip.s_addr = htonl(in.s_addr);
1351                 nat->nat_oip = fin->fin_dst;
1352                 if (nat->nat_hm == NULL)
1353                         nat->nat_hm = nat_hostmap(np, fin->fin_src,
1354                                                   nat->nat_outip);
1355
1356                 sum1 = LONG_SUM(ntohl(fin->fin_saddr)) + ntohs(sport);
1357                 sum2 = LONG_SUM(in.s_addr) + ntohs(port);
1358
1359                 if (flags & IPN_TCPUDP) {
1360                         nat->nat_inport = sport;
1361                         nat->nat_outport = port;        /* sport */
1362                         nat->nat_oport = dport;
1363                 }
1364         } else {
1365                 /*
1366                  * Otherwise, it's an inbound packet. Most likely, we don't
1367                  * want to rewrite source ports and source addresses. Instead,
1368                  * we want to rewrite to a fixed internal address and fixed
1369                  * internal port.
1370                  */
1371                 if (np->in_flags & IPN_SPLIT) {
1372                         in.s_addr = np->in_nip;
1373                         if (np->in_inip == htonl(in.s_addr))
1374                                 np->in_nip = ntohl(np->in_inmsk);
1375                         else {
1376                                 np->in_nip = ntohl(np->in_inip);
1377                                 if (np->in_flags & IPN_ROUNDR) {
1378                                         nat_delrdr(np);
1379                                         nat_addrdr(np);
1380                                 }
1381                         }
1382                 } else {
1383                         in.s_addr = ntohl(np->in_inip);
1384                         if (np->in_flags & IPN_ROUNDR) {
1385                                 nat_delrdr(np);
1386                                 nat_addrdr(np);
1387                         }
1388                 }
1389                 if (!np->in_pnext)
1390                         nport = dport;
1391                 else {
1392                         /*
1393                          * Whilst not optimized for the case where
1394                          * pmin == pmax, the gain is not significant.
1395                          */
1396                         if (np->in_pmin != np->in_pmax) {
1397                                 nport = ntohs(dport) - ntohs(np->in_pmin) +
1398                                         ntohs(np->in_pnext);
1399                                 nport = ntohs(nport);
1400                         } else
1401                                 nport = np->in_pnext;
1402                 }
1403
1404                 /*
1405                  * When the redirect-to address is set to 0.0.0.0, just
1406                  * assume a blank `forwarding' of the packet.
1407                  */
1408                 if (in.s_addr == 0)
1409                         in.s_addr = ntohl(fin->fin_daddr);
1410
1411                 nat->nat_inip.s_addr = htonl(in.s_addr);
1412                 nat->nat_outip = fin->fin_dst;
1413                 nat->nat_oip = fin->fin_src;
1414
1415                 sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
1416                 sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
1417
1418                 if (flags & IPN_TCPUDP) {
1419                         nat->nat_inport = nport;
1420                         nat->nat_outport = dport;
1421                         nat->nat_oport = sport;
1422                 }
1423         }
1424
1425         CALC_SUMD(sum1, sum2, sumd);
1426         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1427 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1428         if ((flags & IPN_TCPUDP) && dohwcksum &&
1429             (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
1430                 if (direction == NAT_OUTBOUND)
1431                         sum1 = LONG_SUM(ntohl(in.s_addr));
1432                 else
1433                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1434                 sum1 += LONG_SUM(ntohl(fin->fin_daddr));
1435                 sum1 += IPPROTO_TCP;
1436                 sum1 = (sum1 & 0xffff) + (sum1 >> 16);
1437                 nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
1438         } else
1439 #endif
1440                 nat->nat_sumd[1] = nat->nat_sumd[0];
1441
1442         if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
1443                 if (direction == NAT_OUTBOUND)
1444                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1445                 else
1446                         sum1 = LONG_SUM(ntohl(fin->fin_daddr));
1447
1448                 sum2 = LONG_SUM(in.s_addr);
1449
1450                 CALC_SUMD(sum1, sum2, sumd);
1451                 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1452         } else
1453                 nat->nat_ipsumd = nat->nat_sumd[0];
1454
1455         in.s_addr = htonl(in.s_addr);
1456
1457         strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
1458
1459         nat->nat_me = natsave;
1460         nat->nat_dir = direction;
1461         nat->nat_ifp = fin->fin_ifp;
1462         nat->nat_ptr = np;
1463         nat->nat_p = fin->fin_p;
1464         nat->nat_bytes = 0;
1465         nat->nat_pkts = 0;
1466         nat->nat_fr = fin->fin_fr;
1467         if (nat->nat_fr != NULL) {
1468                 ATOMIC_INC32(nat->nat_fr->fr_ref);
1469         }
1470         if (direction == NAT_OUTBOUND) {
1471                 if (flags & IPN_TCPUDP)
1472                         tcp->th_sport = port;
1473         } else {
1474                 if (flags & IPN_TCPUDP)
1475                         tcp->th_dport = nport;
1476         }
1477
1478         nat_insert(nat);
1479
1480         if ((np->in_apr != NULL) && (np->in_dport == 0 ||
1481             (tcp != NULL && dport == np->in_dport)))
1482                 (void) appr_new(fin, ip, nat);
1483
1484         np->in_use++;
1485 #ifdef  IPFILTER_LOG
1486         nat_log(nat, (u_int)np->in_redir);
1487 #endif
1488         return nat;
1489 badnat:
1490         nat_stats.ns_badnat++;
1491         if ((hm = nat->nat_hm) != NULL)
1492                 nat_hostmapdel(hm);
1493         KFREE(nat);
1494         return NULL;
1495 }
1496
1497
1498 /*
1499  * Insert a NAT entry into the hash tables for searching and add it to the
1500  * list of active NAT entries.  Adjust global counters when complete.
1501  */
1502 void    nat_insert(nat)
1503 nat_t   *nat;
1504 {
1505         u_int hv1, hv2;
1506         nat_t **natp;
1507
1508         MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
1509
1510         nat->nat_age = fr_defnatage;
1511         nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
1512         if (nat->nat_ifname[0] !='\0') {
1513                 nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
1514         }
1515
1516         nat->nat_next = nat_instances;
1517         nat_instances = nat;
1518
1519         if (!(nat->nat_flags & (FI_W_SPORT|FI_W_DPORT))) {
1520                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
1521                                   0xffffffff);
1522                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
1523                                   ipf_nattable_sz);
1524                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
1525                                   0xffffffff);
1526                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
1527                                  ipf_nattable_sz);
1528         } else {
1529                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_inip.s_addr,
1530                                   ipf_nattable_sz);
1531                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_outip.s_addr,
1532                                   ipf_nattable_sz);
1533         }
1534
1535         natp = &nat_table[0][hv1];
1536         if (*natp)
1537                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
1538         nat->nat_phnext[0] = natp;
1539         nat->nat_hnext[0] = *natp;
1540         *natp = nat;
1541
1542         natp = &nat_table[1][hv2];
1543         if (*natp)
1544                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
1545         nat->nat_phnext[1] = natp;
1546         nat->nat_hnext[1] = *natp;
1547         *natp = nat;
1548
1549         nat_stats.ns_added++;
1550         nat_stats.ns_inuse++;
1551 }
1552
1553
1554 nat_t *nat_icmplookup(ip, fin, dir)
1555 ip_t *ip;
1556 fr_info_t *fin;
1557 int dir;
1558 {
1559         icmphdr_t *icmp;
1560         tcphdr_t *tcp = NULL;
1561         ip_t *oip;
1562         int flags = 0, type, minlen;
1563
1564         icmp = (icmphdr_t *)fin->fin_dp;
1565         /*
1566          * Does it at least have the return (basic) IP header ?
1567          * Only a basic IP header (no options) should be with an ICMP error
1568          * header.
1569          */
1570         if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1571                 return NULL;
1572         type = icmp->icmp_type;
1573         /*
1574          * If it's not an error type, then return.
1575          */
1576         if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1577             (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1578             (type != ICMP_PARAMPROB))
1579                 return NULL;
1580
1581         oip = (ip_t *)((char *)fin->fin_dp + 8);
1582         minlen = (oip->ip_hl << 2);
1583         if (minlen < sizeof(ip_t))
1584                 return NULL;
1585         if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1586                 return NULL;
1587         /*
1588          * Is the buffer big enough for all of it ?  It's the size of the IP
1589          * header claimed in the encapsulated part which is of concern.  It
1590          * may be too big to be in this buffer but not so big that it's
1591          * outside the ICMP packet, leading to TCP deref's causing problems.
1592          * This is possible because we don't know how big oip_hl is when we
1593          * do the pullup early in fr_check() and thus can't gaurantee it is
1594          * all here now.
1595          */
1596 #ifdef  _KERNEL
1597         {
1598         mb_t *m;
1599
1600 # if SOLARIS
1601         m = fin->fin_qfm;
1602         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
1603                 return NULL;
1604 # else
1605         m = *(mb_t **)fin->fin_mp;
1606         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
1607             (char *)ip + m->m_len)
1608                 return NULL;
1609 # endif
1610         }
1611 #endif
1612
1613         if (oip->ip_p == IPPROTO_TCP)
1614                 flags = IPN_TCP;
1615         else if (oip->ip_p == IPPROTO_UDP)
1616                 flags = IPN_UDP;
1617         if (flags & IPN_TCPUDP) {
1618                 u_short data[2];
1619                 nat_t *nat;
1620
1621                 minlen += 8;            /* + 64bits of data to get ports */
1622                 if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1623                         return NULL;
1624
1625                 data[0] = fin->fin_data[0];
1626                 data[1] = fin->fin_data[1];
1627                 tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1628                 fin->fin_data[0] = ntohs(tcp->th_dport);
1629                 fin->fin_data[1] = ntohs(tcp->th_sport);
1630
1631                 if (dir == NAT_INBOUND) {
1632                         nat = nat_inlookup(fin, flags, (u_int)oip->ip_p,
1633                                             oip->ip_dst, oip->ip_src, 0);
1634                 } else {
1635                         nat = nat_outlookup(fin, flags, (u_int)oip->ip_p,
1636                                             oip->ip_dst, oip->ip_src, 0);
1637                 }
1638                 fin->fin_data[0] = data[0];
1639                 fin->fin_data[1] = data[1];
1640                 return nat;
1641         }
1642         if (dir == NAT_INBOUND)
1643                 return nat_inlookup(fin, 0, (u_int)oip->ip_p,
1644                                     oip->ip_dst, oip->ip_src, 0);
1645         else
1646                 return nat_outlookup(fin, 0, (u_int)oip->ip_p,
1647                                     oip->ip_dst, oip->ip_src, 0);
1648 }
1649
1650
1651 /*
1652  * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1653  * packet gets correctly recognised.
1654  */
1655 nat_t *nat_icmp(ip, fin, nflags, dir)
1656 ip_t *ip;
1657 fr_info_t *fin;
1658 u_int *nflags;
1659 int dir;
1660 {
1661         u_32_t sum1, sum2, sumd, sumd2 = 0;
1662         struct in_addr in;
1663         int flags, dlen;
1664         icmphdr_t *icmp;
1665         udphdr_t *udp;
1666         tcphdr_t *tcp;
1667         nat_t *nat;
1668         ip_t *oip;
1669
1670         if ((fin->fin_fl & FI_SHORT) || (fin->fin_off != 0))
1671                 return NULL;
1672         /*
1673          * nat_icmplookup() will return NULL for `defective' packets.
1674          */
1675         if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
1676                 return NULL;
1677
1678         flags = 0;
1679         *nflags = IPN_ICMPERR;
1680         icmp = (icmphdr_t *)fin->fin_dp;
1681         oip = (ip_t *)&icmp->icmp_ip;
1682         if (oip->ip_p == IPPROTO_TCP)
1683                 flags = IPN_TCP;
1684         else if (oip->ip_p == IPPROTO_UDP)
1685                 flags = IPN_UDP;
1686         udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1687         dlen = ip->ip_len - ((char *)udp - (char *)ip);
1688         /*
1689          * XXX - what if this is bogus hl and we go off the end ?
1690          * In this case, nat_icmplookup() will have returned NULL.
1691          */
1692         tcp = (tcphdr_t *)udp;
1693
1694         /*
1695          * Need to adjust ICMP header to include the real IP#'s and
1696          * port #'s.  Only apply a checksum change relative to the
1697          * IP address change as it will be modified again in ip_natout
1698          * for both address and port.  Two checksum changes are
1699          * necessary for the two header address changes.  Be careful
1700          * to only modify the checksum once for the port # and twice
1701          * for the IP#.
1702          */
1703
1704         /*
1705          * Step 1
1706          * Fix the IP addresses in the offending IP packet. You also need
1707          * to adjust the IP header checksum of that offending IP packet
1708          * and the ICMP checksum of the ICMP error message itself.
1709          *
1710          * Unfortunately, for UDP and TCP, the IP addresses are also contained
1711          * in the pseudo header that is used to compute the UDP resp. TCP
1712          * checksum. So, we must compensate that as well. Even worse, the
1713          * change in the UDP and TCP checksums require yet another
1714          * adjustment of the ICMP checksum of the ICMP error message.
1715          *
1716          */
1717
1718         if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
1719                 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1720                 in = nat->nat_inip;
1721                 oip->ip_src = in;
1722         } else {
1723                 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1724                 in = nat->nat_outip;
1725                 oip->ip_dst = in;
1726         }
1727
1728         sum2 = LONG_SUM(ntohl(in.s_addr));
1729
1730         CALC_SUMD(sum1, sum2, sumd);
1731
1732         if (nat->nat_dir == NAT_OUTBOUND) {
1733                 /*
1734                  * Fix IP checksum of the offending IP packet to adjust for
1735                  * the change in the IP address.
1736                  *
1737                  * Normally, you would expect that the ICMP checksum of the 
1738                  * ICMP error message needs to be adjusted as well for the
1739                  * IP address change in oip.
1740                  * However, this is a NOP, because the ICMP checksum is 
1741                  * calculated over the complete ICMP packet, which includes the
1742                  * changed oip IP addresses and oip->ip_sum. However, these 
1743                  * two changes cancel each other out (if the delta for
1744                  * the IP address is x, then the delta for ip_sum is minus x), 
1745                  * so no change in the icmp_cksum is necessary.
1746                  *
1747                  * Be careful that nat_dir refers to the direction of the
1748                  * offending IP packet (oip), not to its ICMP response (icmp)
1749                  */
1750                 fix_datacksum(&oip->ip_sum, sumd);
1751
1752                 /*
1753                  * Fix UDP pseudo header checksum to compensate for the
1754                  * IP address change.
1755                  */
1756                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1757                         /*
1758                          * The UDP checksum is optional, only adjust it 
1759                          * if it has been set.
1760                          */
1761                         sum1 = ntohs(udp->uh_sum);
1762                         fix_datacksum(&udp->uh_sum, sumd);
1763                         sum2 = ntohs(udp->uh_sum);
1764
1765                         /*
1766                          * Fix ICMP checksum to compensate the UDP 
1767                          * checksum adjustment.
1768                          */
1769                         CALC_SUMD(sum1, sum2, sumd);
1770                         sumd2 = sumd;
1771                 }
1772
1773 #if 1
1774                 /*
1775                  * Fix TCP pseudo header checksum to compensate for the 
1776                  * IP address change. Before we can do the change, we
1777                  * must make sure that oip is sufficient large to hold
1778                  * the TCP checksum (normally it does not!).
1779                  */
1780                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1781                 
1782                         sum1 = ntohs(tcp->th_sum);
1783                         fix_datacksum(&tcp->th_sum, sumd);
1784                         sum2 = ntohs(tcp->th_sum);
1785
1786                         /*
1787                          * Fix ICMP checksum to compensate the TCP 
1788                          * checksum adjustment.
1789                          */
1790                         CALC_SUMD(sum1, sum2, sumd);
1791                         sumd2 = sumd;
1792                 }
1793 #endif
1794         } else {
1795
1796                 /*
1797                  * Fix IP checksum of the offending IP packet to adjust for
1798                  * the change in the IP address.
1799                  *
1800                  * Normally, you would expect that the ICMP checksum of the 
1801                  * ICMP error message needs to be adjusted as well for the
1802                  * IP address change in oip.
1803                  * However, this is a NOP, because the ICMP checksum is 
1804                  * calculated over the complete ICMP packet, which includes the
1805                  * changed oip IP addresses and oip->ip_sum. However, these 
1806                  * two changes cancel each other out (if the delta for
1807                  * the IP address is x, then the delta for ip_sum is minus x), 
1808                  * so no change in the icmp_cksum is necessary.
1809                  *
1810                  * Be careful that nat_dir refers to the direction of the
1811                  * offending IP packet (oip), not to its ICMP response (icmp)
1812                  */
1813                 fix_datacksum(&oip->ip_sum, sumd);
1814
1815 /* XXX FV : without having looked at Solaris source code, it seems unlikely
1816  * that SOLARIS would compensate this in the kernel (a body of an IP packet 
1817  * in the data section of an ICMP packet). I have the feeling that this should
1818  * be unconditional, but I'm not in a position to check.
1819  */
1820 #if !SOLARIS && !defined(__sgi)
1821                 /*
1822                  * Fix UDP pseudo header checksum to compensate for the
1823                  * IP address change.
1824                  */
1825                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1826                         /*
1827                          * The UDP checksum is optional, only adjust it 
1828                          * if it has been set 
1829                          */
1830                         sum1 = ntohs(udp->uh_sum);
1831                         fix_datacksum(&udp->uh_sum, sumd);
1832                         sum2 = ntohs(udp->uh_sum);
1833
1834                         /*
1835                          * Fix ICMP checksum to compensate the UDP 
1836                          * checksum adjustment.
1837                          */
1838                         CALC_SUMD(sum1, sum2, sumd);
1839                         sumd2 = sumd;
1840                 }
1841                 
1842 #if 1
1843                 /* 
1844                  * Fix TCP pseudo header checksum to compensate for the 
1845                  * IP address change. Before we can do the change, we
1846                  * must make sure that oip is sufficient large to hold
1847                  * the TCP checksum (normally it does not!).
1848                  */
1849                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1850                 
1851                         sum1 = ntohs(tcp->th_sum);
1852                         fix_datacksum(&tcp->th_sum, sumd);
1853                         sum2 = ntohs(tcp->th_sum);
1854
1855                         /*
1856                          * Fix ICMP checksum to compensate the TCP
1857                          * checksum adjustment.
1858                          */
1859                         CALC_SUMD(sum1, sum2, sumd);
1860                         sumd2 = sumd;
1861                 };
1862 #endif
1863                 
1864 #endif
1865         }
1866
1867         if ((flags & IPN_TCPUDP) != 0) {
1868                 /*
1869                  * Step 2 :
1870                  * For offending TCP/UDP IP packets, translate the ports as
1871                  * well, based on the NAT specification. Of course such
1872                  * a change must be reflected in the ICMP checksum as well.
1873                  *
1874                  * Advance notice : Now it becomes complicated :-)
1875                  *
1876                  * Since the port fields are part of the TCP/UDP checksum
1877                  * of the offending IP packet, you need to adjust that checksum
1878                  * as well... but, if you change, you must change the icmp
1879                  * checksum *again*, to reflect that change.
1880                  *
1881                  * To further complicate: the TCP checksum is not in the first
1882                  * 8 bytes of the offending ip packet, so it most likely is not
1883                  * available. Some OSses like Solaris return enough bytes to
1884                  * include the TCP checksum. So we have to check if the
1885                  * ip->ip_len actually holds the TCP checksum of the oip!
1886                  */
1887
1888                 if (nat->nat_oport == tcp->th_dport) {
1889                         if (tcp->th_sport != nat->nat_inport) {
1890                                 /*
1891                                  * Fix ICMP checksum to compensate port
1892                                  * adjustment.
1893                                  */
1894                                 sum1 = ntohs(tcp->th_sport);
1895                                 sum2 = ntohs(nat->nat_inport);
1896                                 CALC_SUMD(sum1, sum2, sumd);
1897                                 sumd2 += sumd;
1898                                 tcp->th_sport = nat->nat_inport;
1899
1900                                 /*
1901                                  * Fix udp checksum to compensate port
1902                                  * adjustment.  NOTE : the offending IP packet
1903                                  * flows the other direction compared to the
1904                                  * ICMP message.
1905                                  *
1906                                  * The UDP checksum is optional, only adjust
1907                                  * it if it has been set.
1908                                  */
1909                                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1910
1911                                         sum1 = ntohs(udp->uh_sum);
1912                                         fix_datacksum(&udp->uh_sum, sumd);
1913                                         sum2 = ntohs(udp->uh_sum);
1914
1915                                         /*
1916                                          * Fix ICMP checksum to 
1917                                          * compensate UDP checksum 
1918                                          * adjustment.
1919                                          */
1920                                         CALC_SUMD(sum1, sum2, sumd);
1921                                         sumd2 += sumd;
1922                                 }
1923
1924                                 /*
1925                                  * Fix tcp checksum (if present) to compensate
1926                                  * port adjustment. NOTE : the offending IP
1927                                  * packet flows the other direction compared to
1928                                  * the ICMP message.
1929                                  */
1930                                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1931
1932                                         sum1 = ntohs(tcp->th_sum);
1933                                         fix_datacksum(&tcp->th_sum, sumd);
1934                                         sum2 = ntohs(tcp->th_sum);
1935
1936                                         /*
1937                                          * Fix ICMP checksum to 
1938                                          * compensate TCP checksum 
1939                                          * adjustment.
1940                                          */
1941                                         CALC_SUMD(sum1, sum2, sumd);
1942                                         sumd2 += sumd;
1943                                 }
1944                         }
1945                 } else {
1946                         if (tcp->th_dport != nat->nat_outport) {
1947                                 /*
1948                                  * Fix ICMP checksum to compensate port
1949                                  * adjustment.
1950                                  */
1951                                 sum1 = ntohs(tcp->th_dport);
1952                                 sum2 = ntohs(nat->nat_outport);
1953                                 CALC_SUMD(sum1, sum2, sumd);
1954                                 sumd2 += sumd;
1955                                 tcp->th_dport = nat->nat_outport;
1956
1957                                 /*
1958                                  * Fix udp checksum to compensate port
1959                                  * adjustment.   NOTE : the offending IP
1960                                  * packet flows the other direction compared
1961                                  * to the ICMP message.
1962                                  *
1963                                  * The UDP checksum is optional, only adjust
1964                                  * it if it has been set.
1965                                  */
1966                                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1967
1968                                         sum1 = ntohs(udp->uh_sum);
1969                                         fix_datacksum(&udp->uh_sum, sumd);
1970                                         sum2 = ntohs(udp->uh_sum);
1971
1972                                         /*
1973                                          * Fix ICMP checksum to compensate
1974                                          * UDP checksum adjustment.
1975                                          */
1976                                         CALC_SUMD(sum1, sum2, sumd);
1977                                         sumd2 += sumd;
1978                                 }
1979
1980                                 /*
1981                                  * Fix tcp checksum (if present) to compensate
1982                                  * port adjustment. NOTE : the offending IP
1983                                  * packet flows the other direction compared to
1984                                  * the ICMP message.
1985                                  */
1986                                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1987
1988                                         sum1 = ntohs(tcp->th_sum);
1989                                         fix_datacksum(&tcp->th_sum, sumd);
1990                                         sum2 = ntohs(tcp->th_sum);
1991
1992                                         /*
1993                                          * Fix ICMP checksum to compensate
1994                                          * UDP checksum adjustment.
1995                                          */
1996                                         CALC_SUMD(sum1, sum2, sumd);
1997                                         sumd2 += sumd;
1998                                 }
1999                         }
2000                 }
2001                 if (sumd2) {
2002                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2003                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2004                         if (nat->nat_dir == NAT_OUTBOUND) {
2005                                 fix_outcksum(fin, &icmp->icmp_cksum, sumd2);
2006                         } else {
2007                                 fix_incksum(fin, &icmp->icmp_cksum, sumd2);
2008                         }
2009                 }
2010         }
2011         if (oip->ip_p == IPPROTO_ICMP)
2012                 nat->nat_age = fr_defnaticmpage;
2013         return nat;
2014 }
2015
2016
2017 /*
2018  * NB: these lookups don't lock access to the list, it assume it has already
2019  * been done!
2020  */
2021 /*
2022  * Lookup a nat entry based on the mapped destination ip address/port and
2023  * real source address/port.  We use this lookup when receiving a packet,
2024  * we're looking for a table entry, based on the destination address.
2025  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2026  */
2027 nat_t *nat_inlookup(fin, flags, p, src, mapdst, rw)
2028 fr_info_t *fin;
2029 register u_int flags, p;
2030 struct in_addr src , mapdst;
2031 int rw;
2032 {
2033         register u_short sport, dport;
2034         register nat_t *nat;
2035         register int nflags;
2036         register u_32_t dst;
2037         ipnat_t *ipn;
2038         void *ifp;
2039         u_int hv;
2040
2041         if (fin != NULL)
2042                 ifp = fin->fin_ifp;
2043         else
2044                 ifp = NULL;
2045         dst = mapdst.s_addr;
2046         if (flags & IPN_TCPUDP) {
2047                 sport = htons(fin->fin_data[0]);
2048                 dport = htons(fin->fin_data[1]);
2049         } else {
2050                 sport = 0;
2051                 dport = 0;
2052         }
2053
2054         hv = NAT_HASH_FN(dst, dport, 0xffffffff);
2055         hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
2056         nat = nat_table[1][hv];
2057         for (; nat; nat = nat->nat_hnext[1]) {
2058                 nflags = nat->nat_flags;
2059                 if ((!ifp || ifp == nat->nat_ifp) &&
2060                     nat->nat_oip.s_addr == src.s_addr &&
2061                     nat->nat_outip.s_addr == dst &&
2062                     ((p == 0) || (p == nat->nat_p))) {
2063                         switch (p)
2064                         {
2065                         case IPPROTO_TCP :
2066                         case IPPROTO_UDP :
2067                                 if (nat->nat_oport != sport)
2068                                         continue;
2069                                 if (nat->nat_outport != dport)
2070                                         continue;
2071                                 break;
2072                         default :
2073                                 break;
2074                         }
2075
2076                         ipn = nat->nat_ptr;
2077                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2078                                 if (appr_match(fin, nat) != 0)
2079                                         continue;
2080                         return nat;
2081                 }
2082         }
2083         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2084                 return NULL;
2085         if (!rw) {
2086                 RWLOCK_EXIT(&ipf_nat);
2087         }
2088         hv = NAT_HASH_FN(dst, 0, 0xffffffff);
2089         hv = NAT_HASH_FN(src.s_addr, dst, ipf_nattable_sz);
2090         if (!rw) {
2091                 WRITE_ENTER(&ipf_nat);
2092         }
2093         nat = nat_table[1][hv];
2094         for (; nat; nat = nat->nat_hnext[1]) {
2095                 nflags = nat->nat_flags;
2096                 if (ifp && ifp != nat->nat_ifp)
2097                         continue;
2098                 if (!(nflags & FI_WILDP))
2099                         continue;
2100                 if (nat->nat_oip.s_addr != src.s_addr ||
2101                     nat->nat_outip.s_addr != dst)
2102                         continue;
2103                 if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
2104                     ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) {
2105                         nat_tabmove(fin, nat);
2106                         break;
2107                 }
2108         }
2109         if (!rw) {
2110                 MUTEX_DOWNGRADE(&ipf_nat);
2111         }
2112         return nat;
2113 }
2114
2115
2116 /*
2117  * This function is only called for TCP/UDP NAT table entries where the
2118  * original was placed in the table without hashing on the ports and we now
2119  * want to include hashing on port numbers.
2120  */
2121 static void nat_tabmove(fin, nat)
2122 fr_info_t *fin;
2123 nat_t *nat;
2124 {
2125         register u_short sport, dport;
2126         u_int hv, nflags;
2127         nat_t **natp;
2128
2129         nflags = nat->nat_flags;
2130
2131         sport = ntohs(fin->fin_data[0]);
2132         dport = ntohs(fin->fin_data[1]);
2133
2134         /*
2135          * Remove the NAT entry from the old location
2136          */
2137         if (nat->nat_hnext[0])
2138                 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
2139         *nat->nat_phnext[0] = nat->nat_hnext[0];
2140
2141         if (nat->nat_hnext[1])
2142                 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
2143         *nat->nat_phnext[1] = nat->nat_hnext[1];
2144
2145         /*
2146          * Add into the NAT table in the new position
2147          */
2148         hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, 0xffffffff);
2149         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2150         natp = &nat_table[0][hv];
2151         if (*natp)
2152                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2153         nat->nat_phnext[0] = natp;
2154         nat->nat_hnext[0] = *natp;
2155         *natp = nat;
2156
2157         hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, 0xffffffff);
2158         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2159         natp = &nat_table[1][hv];
2160         if (*natp)
2161                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2162         nat->nat_phnext[1] = natp;
2163         nat->nat_hnext[1] = *natp;
2164         *natp = nat;
2165 }
2166
2167
2168 /*
2169  * Lookup a nat entry based on the source 'real' ip address/port and
2170  * destination address/port.  We use this lookup when sending a packet out,
2171  * we're looking for a table entry, based on the source address.
2172  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2173  */
2174 nat_t *nat_outlookup(fin, flags, p, src, dst, rw)
2175 fr_info_t *fin;
2176 register u_int flags, p;
2177 struct in_addr src , dst;
2178 int rw;
2179 {
2180         register u_short sport, dport;
2181         register nat_t *nat;
2182         register int nflags;
2183         ipnat_t *ipn;
2184         u_32_t srcip;
2185         void *ifp;
2186         u_int hv;
2187
2188         ifp = fin->fin_ifp;
2189         srcip = src.s_addr;
2190         if (flags & IPN_TCPUDP) {
2191                 sport = ntohs(fin->fin_data[0]);
2192                 dport = ntohs(fin->fin_data[1]);
2193         } else {
2194                 sport = 0;
2195                 dport = 0;
2196         }
2197
2198         hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
2199         hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
2200         nat = nat_table[0][hv];
2201         for (; nat; nat = nat->nat_hnext[0]) {
2202                 nflags = nat->nat_flags;
2203
2204                 if ((!ifp || ifp == nat->nat_ifp) &&
2205                     nat->nat_inip.s_addr == srcip &&
2206                     nat->nat_oip.s_addr == dst.s_addr &&
2207                     ((p == 0) || (p == nat->nat_p))) {
2208                         switch (p)
2209                         {
2210                         case IPPROTO_TCP :
2211                         case IPPROTO_UDP :
2212                                 if (nat->nat_oport != dport)
2213                                         continue;
2214                                 if (nat->nat_inport != sport)
2215                                         continue;
2216                                 break;
2217                         default :
2218                                 break;
2219                         }
2220
2221                         ipn = nat->nat_ptr;
2222                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2223                                 if (appr_match(fin, nat) != 0)
2224                                         continue;
2225                         return nat;
2226                 }
2227         }
2228         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2229                 return NULL;
2230         if (!rw) {
2231                 RWLOCK_EXIT(&ipf_nat);
2232         }
2233
2234         hv = NAT_HASH_FN(dst.s_addr, srcip, ipf_nattable_sz);
2235         if (!rw) {
2236                 WRITE_ENTER(&ipf_nat);
2237         }
2238         nat = nat_table[0][hv];
2239         for (; nat; nat = nat->nat_hnext[0]) {
2240                 nflags = nat->nat_flags;
2241                 if (ifp && ifp != nat->nat_ifp)
2242                         continue;
2243                 if (!(nflags & FI_WILDP))
2244                         continue;
2245                 if ((nat->nat_inip.s_addr != srcip) ||
2246                     (nat->nat_oip.s_addr != dst.s_addr))
2247                         continue;
2248                 if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) &&
2249                     ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) {
2250                         nat_tabmove(fin, nat);
2251                         break;
2252                 }
2253         }
2254         if (!rw) {
2255                 MUTEX_DOWNGRADE(&ipf_nat);
2256         }
2257         return nat;
2258 }
2259
2260
2261 /*
2262  * Lookup the NAT tables to search for a matching redirect
2263  */
2264 nat_t *nat_lookupredir(np)
2265 register natlookup_t *np;
2266 {
2267         nat_t *nat;
2268         fr_info_t fi;
2269
2270         bzero((char *)&fi, sizeof(fi));
2271         fi.fin_data[0] = np->nl_inport;
2272         fi.fin_data[1] = np->nl_outport;
2273
2274         /*
2275          * If nl_inip is non null, this is a lookup based on the real
2276          * ip address. Else, we use the fake.
2277          */
2278         if ((nat = nat_outlookup(&fi, np->nl_flags, 0, np->nl_inip,
2279                                  np->nl_outip, 0))) {
2280                 np->nl_realip = nat->nat_outip;
2281                 np->nl_realport = nat->nat_outport;
2282         }
2283         return nat;
2284 }
2285
2286
2287 static int nat_match(fin, np, ip)
2288 fr_info_t *fin;
2289 ipnat_t *np;
2290 ip_t *ip;
2291 {
2292         frtuc_t *ft;
2293
2294         if (ip->ip_v != 4)
2295                 return 0;
2296
2297         if (np->in_p && fin->fin_p != np->in_p)
2298                 return 0;
2299         if (fin->fin_out) {
2300                 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2301                         return 0;
2302                 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
2303                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2304                         return 0;
2305                 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
2306                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2307                         return 0;
2308         } else {
2309                 if (!(np->in_redir & NAT_REDIRECT))
2310                         return 0;
2311                 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
2312                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2313                         return 0;
2314                 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
2315                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2316                         return 0;
2317         }
2318
2319         ft = &np->in_tuc;
2320         if (!(fin->fin_fl & FI_TCPUDP) ||
2321             (fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) {
2322                 if (ft->ftu_scmp || ft->ftu_dcmp)
2323                         return 0;
2324                 return 1;
2325         }
2326
2327         return fr_tcpudpchk(ft, fin);
2328 }
2329
2330
2331 /*
2332  * Packets going out on the external interface go through this.
2333  * Here, the source address requires alteration, if anything.
2334  */
2335 int ip_natout(ip, fin)
2336 ip_t *ip;
2337 fr_info_t *fin;
2338 {
2339         register ipnat_t *np = NULL;
2340         register u_32_t ipa;
2341         tcphdr_t *tcp = NULL;
2342         u_short sport = 0, dport = 0, *csump = NULL;
2343         int natadd = 1, i, icmpset = 1;
2344         u_int nflags = 0, hv, msk;
2345         struct ifnet *ifp;
2346         frentry_t *fr;
2347         void *sifp;
2348         u_32_t iph;
2349         nat_t *nat;
2350
2351         if (nat_list == NULL || (fr_nat_lock))
2352                 return 0;
2353
2354         if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
2355             fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) {
2356                 sifp = fin->fin_ifp;
2357                 fin->fin_ifp = fr->fr_tif.fd_ifp;
2358         } else
2359                 sifp = fin->fin_ifp;
2360         ifp = fin->fin_ifp;
2361
2362         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2363                 if (fin->fin_p == IPPROTO_TCP)
2364                         nflags = IPN_TCP;
2365                 else if (fin->fin_p == IPPROTO_UDP)
2366                         nflags = IPN_UDP;
2367                 if ((nflags & IPN_TCPUDP)) {
2368                         tcp = (tcphdr_t *)fin->fin_dp;
2369                         sport = tcp->th_sport;
2370                         dport = tcp->th_dport;
2371                 }
2372         }
2373
2374         ipa = fin->fin_saddr;
2375
2376         READ_ENTER(&ipf_nat);
2377
2378         if ((fin->fin_p == IPPROTO_ICMP) &&
2379             (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
2380                 icmpset = 1;
2381         else if ((fin->fin_fl & FI_FRAG) &&
2382             (nat = ipfr_nat_knownfrag(ip, fin)))
2383                 natadd = 0;
2384         else if ((nat = nat_outlookup(fin, nflags|FI_WILDP|FI_WILDA,
2385                                       (u_int)fin->fin_p, fin->fin_src,
2386                                       fin->fin_dst, 0))) {
2387                 nflags = nat->nat_flags;
2388                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2389                         if ((nflags & FI_W_SPORT) &&
2390                             (nat->nat_inport != sport))
2391                                 nat->nat_inport = sport;
2392                         if ((nflags & FI_W_DPORT) &&
2393                             (nat->nat_oport != dport))
2394                                 nat->nat_oport = dport;
2395
2396                         if (nat->nat_outport == 0)
2397                                 nat->nat_outport = sport;
2398                         nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
2399                         nflags = nat->nat_flags;
2400                         nat_stats.ns_wilds--;
2401                 }
2402         } else {
2403                 RWLOCK_EXIT(&ipf_nat);
2404
2405                 msk = 0xffffffff;
2406                 i = 32;
2407
2408                 WRITE_ENTER(&ipf_nat);
2409                 /*
2410                  * If there is no current entry in the nat table for this IP#,
2411                  * create one for it (if there is a matching rule).
2412                  */
2413 maskloop:
2414                 iph = ipa & htonl(msk);
2415                 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
2416                 for (np = nat_rules[hv]; np; np = np->in_mnext)
2417                 {
2418                         if (np->in_ifp && (np->in_ifp != ifp))
2419                                 continue;
2420                         if ((np->in_flags & IPN_RF) &&
2421                             !(np->in_flags & nflags))
2422                                 continue;
2423                         if (np->in_flags & IPN_FILTER) {
2424                                 if (!nat_match(fin, np, ip))
2425                                         continue;
2426                         } else if ((ipa & np->in_inmsk) != np->in_inip)
2427                                 continue;
2428                         if (*np->in_plabel && !appr_ok(ip, tcp, np))
2429                                 continue;
2430                         nat = nat_new(fin, ip, np, NULL,
2431                                       (u_int)nflags, NAT_OUTBOUND);
2432                         if (nat != NULL) {
2433                                 np->in_hits++;
2434                                 break;
2435                         }
2436                 }
2437                 if ((np == NULL) && (i > 0)) {
2438                         do {
2439                                 i--;
2440                                 msk <<= 1;
2441                         } while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
2442                         if (i >= 0)
2443                                 goto maskloop;
2444                 }
2445                 MUTEX_DOWNGRADE(&ipf_nat);
2446         }
2447
2448         /*
2449          * NOTE: ipf_nat must now only be held as a read lock
2450          */
2451         if (nat) {
2452                 np = nat->nat_ptr;
2453                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2454                         ipfr_nat_newfrag(ip, fin, 0, nat);
2455                 MUTEX_ENTER(&nat->nat_lock);
2456                 if (fin->fin_p != IPPROTO_TCP) {
2457                         if (np && np->in_age[1])
2458                                 nat->nat_age = np->in_age[1];
2459                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2460                                 nat->nat_age = fr_defnaticmpage;
2461                         else
2462                                 nat->nat_age = fr_defnatage;
2463                 }
2464                 nat->nat_bytes += ip->ip_len;
2465                 nat->nat_pkts++;
2466                 MUTEX_EXIT(&nat->nat_lock);
2467
2468                 /*
2469                  * Fix up checksums, not by recalculating them, but
2470                  * simply computing adjustments.
2471                  */
2472                 if (nflags == IPN_ICMPERR) {
2473                         u_32_t s1, s2, sumd;
2474
2475                         s1 = LONG_SUM(ntohl(fin->fin_saddr));
2476                         s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
2477                         CALC_SUMD(s1, s2, sumd);
2478
2479                         if (nat->nat_dir == NAT_OUTBOUND)
2480                                 fix_outcksum(fin, &ip->ip_sum, sumd);
2481                         else
2482                                 fix_incksum(fin, &ip->ip_sum, sumd);
2483                 }
2484 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
2485                 else {
2486                         if (nat->nat_dir == NAT_OUTBOUND)
2487                                 fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2488                         else
2489                                 fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2490                 }
2491 #endif
2492                 /*
2493                  * Only change the packet contents, not what is filtered upon.
2494                  */
2495                 ip->ip_src = nat->nat_outip;
2496
2497                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2498
2499                         if ((nat->nat_outport != 0) && (tcp != NULL)) {
2500                                 tcp->th_sport = nat->nat_outport;
2501                                 fin->fin_data[0] = ntohs(tcp->th_sport);
2502                         }
2503
2504                         if (fin->fin_p == IPPROTO_TCP) {
2505                                 csump = &tcp->th_sum;
2506                                 MUTEX_ENTER(&nat->nat_lock);
2507                                 fr_tcp_age(&nat->nat_age,
2508                                            nat->nat_tcpstate, fin, 1, 0);
2509                                 if (nat->nat_age < fr_defnaticmpage)
2510                                         nat->nat_age = fr_defnaticmpage;
2511 #ifdef LARGE_NAT
2512                                 else if (nat->nat_age > fr_defnatage)
2513                                         nat->nat_age = fr_defnatage;
2514 #endif
2515                                 /*
2516                                  * Increase this because we may have
2517                                  * "keep state" following this too and
2518                                  * packet storms can occur if this is
2519                                  * removed too quickly.
2520                                  */
2521                                 if (nat->nat_age == fr_tcpclosed)
2522                                         nat->nat_age = fr_tcplastack;
2523                                 MUTEX_EXIT(&nat->nat_lock);
2524                         } else if (fin->fin_p == IPPROTO_UDP) {
2525                                 udphdr_t *udp = (udphdr_t *)tcp;
2526
2527                                 if (udp->uh_sum)
2528                                         csump = &udp->uh_sum;
2529                         }
2530
2531                         if (csump) {
2532                                 if (nat->nat_dir == NAT_OUTBOUND)
2533                                         fix_outcksum(fin, csump,
2534                                                      nat->nat_sumd[1]);
2535                                 else
2536                                         fix_incksum(fin, csump,
2537                                                     nat->nat_sumd[1]);
2538                         }
2539                 }
2540
2541                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2542                      (tcp != NULL && dport == np->in_dport))) {
2543                         i = appr_check(ip, fin, nat);
2544                         if (i == 0)
2545                                 i = 1;
2546                 } else
2547                         i = 1;
2548                 ATOMIC_INCL(nat_stats.ns_mapped[1]);
2549                 RWLOCK_EXIT(&ipf_nat);  /* READ */
2550                 fin->fin_ifp = sifp;
2551                 return i;
2552         }
2553         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2554         fin->fin_ifp = sifp;
2555         return 0;
2556 }
2557
2558
2559 /*
2560  * Packets coming in from the external interface go through this.
2561  * Here, the destination address requires alteration, if anything.
2562  */
2563 int ip_natin(ip, fin)
2564 ip_t *ip;
2565 fr_info_t *fin;
2566 {
2567         register struct in_addr src;
2568         register struct in_addr in;
2569         register ipnat_t *np;
2570         u_short sport = 0, dport = 0, *csump = NULL;
2571         u_int nflags = 0, natadd = 1, hv, msk;
2572         struct ifnet *ifp = fin->fin_ifp;
2573         tcphdr_t *tcp = NULL;
2574         int i, icmpset = 0;
2575         nat_t *nat;
2576         u_32_t iph;
2577
2578         if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
2579                 return 0;
2580
2581         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2582                 if (fin->fin_p == IPPROTO_TCP)
2583                         nflags = IPN_TCP;
2584                 else if (fin->fin_p == IPPROTO_UDP)
2585                         nflags = IPN_UDP;
2586                 if ((nflags & IPN_TCPUDP)) {
2587                         tcp = (tcphdr_t *)fin->fin_dp;
2588                         sport = tcp->th_sport;
2589                         dport = tcp->th_dport;
2590                 }
2591         }
2592
2593         in = fin->fin_dst;
2594         /* make sure the source address is to be redirected */
2595         src = fin->fin_src;
2596
2597         READ_ENTER(&ipf_nat);
2598
2599         if ((fin->fin_p == IPPROTO_ICMP) &&
2600             (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
2601                 icmpset = 1;
2602         else if ((fin->fin_fl & FI_FRAG) &&
2603                  (nat = ipfr_nat_knownfrag(ip, fin)))
2604                 natadd = 0;
2605         else if ((nat = nat_inlookup(fin, nflags|FI_WILDP|FI_WILDA,
2606                                      (u_int)fin->fin_p, fin->fin_src, in, 0))) {
2607                 nflags = nat->nat_flags;
2608                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2609                         if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
2610                                 nat->nat_oport = sport;
2611                         if ((nat->nat_outport != dport) &&
2612                                  (nflags & FI_W_SPORT))
2613                                 nat->nat_outport = dport;
2614                         nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
2615                         nflags = nat->nat_flags;
2616                         nat_stats.ns_wilds--;
2617                 }
2618         } else {
2619                 RWLOCK_EXIT(&ipf_nat);
2620
2621                 msk = 0xffffffff;
2622                 i = 32;
2623
2624                 WRITE_ENTER(&ipf_nat);
2625                 /*
2626                  * If there is no current entry in the nat table for this IP#,
2627                  * create one for it (if there is a matching rule).
2628                  */
2629 maskloop:
2630                 iph = in.s_addr & htonl(msk);
2631                 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
2632                 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
2633                         if ((np->in_ifp && (np->in_ifp != ifp)) ||
2634                             (np->in_p && (np->in_p != fin->fin_p)) ||
2635                             (np->in_flags && !(nflags & np->in_flags)))
2636                                 continue;
2637                         if (np->in_flags & IPN_FILTER) {
2638                                 if (!nat_match(fin, np, ip))
2639                                         continue;
2640                         } else if ((in.s_addr & np->in_outmsk) != np->in_outip)
2641                                 continue;
2642                         if ((!np->in_pmin || (np->in_flags & IPN_FILTER) ||
2643                              ((ntohs(np->in_pmax) >= ntohs(dport)) &&
2644                               (ntohs(dport) >= ntohs(np->in_pmin)))))
2645                                 if ((nat = nat_new(fin, ip, np, NULL, nflags,
2646                                                     NAT_INBOUND))) {
2647                                         np->in_hits++;
2648                                         break;
2649                                 }
2650                 }
2651
2652                 if ((np == NULL) && (i > 0)) {
2653                         do {
2654                                 i--;
2655                                 msk <<= 1;
2656                         } while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
2657                         if (i >= 0)
2658                                 goto maskloop;
2659                 }
2660                 MUTEX_DOWNGRADE(&ipf_nat);
2661         }
2662
2663         /*
2664          * NOTE: ipf_nat must now only be held as a read lock
2665          */
2666         if (nat) {
2667                 np = nat->nat_ptr;
2668                 fin->fin_fr = nat->nat_fr;
2669                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2670                         ipfr_nat_newfrag(ip, fin, 0, nat);
2671                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2672                      (tcp != NULL && sport == np->in_dport))) {
2673                         i = appr_check(ip, fin, nat);
2674                         if (i == -1) {
2675                                 RWLOCK_EXIT(&ipf_nat);
2676                                 return i;
2677                         }
2678                 }
2679
2680                 MUTEX_ENTER(&nat->nat_lock);
2681                 if (fin->fin_p != IPPROTO_TCP) {
2682                         if (np && np->in_age[0])
2683                                 nat->nat_age = np->in_age[0];
2684                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2685                                 nat->nat_age = fr_defnaticmpage;
2686                         else
2687                                 nat->nat_age = fr_defnatage;
2688                 }
2689                 nat->nat_bytes += ip->ip_len;
2690                 nat->nat_pkts++;
2691                 MUTEX_EXIT(&nat->nat_lock);
2692                 ip->ip_dst = nat->nat_inip;
2693                 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
2694
2695                 /*
2696                  * Fix up checksums, not by recalculating them, but
2697                  * simply computing adjustments.
2698                  */
2699 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
2700                 if (nat->nat_dir == NAT_OUTBOUND)
2701                         fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2702                 else
2703                         fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2704 #endif
2705                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2706
2707                         if ((nat->nat_inport != 0) && (tcp != NULL)) {
2708                                 tcp->th_dport = nat->nat_inport;
2709                                 fin->fin_data[1] = ntohs(tcp->th_dport);
2710                         }
2711
2712                         if (fin->fin_p == IPPROTO_TCP) {
2713                                 csump = &tcp->th_sum;
2714                                 MUTEX_ENTER(&nat->nat_lock);
2715                                 fr_tcp_age(&nat->nat_age,
2716                                            nat->nat_tcpstate, fin, 0, 0);
2717                                 if (nat->nat_age < fr_defnaticmpage)
2718                                         nat->nat_age = fr_defnaticmpage;
2719 #ifdef LARGE_NAT
2720                                 else if (nat->nat_age > fr_defnatage)
2721                                         nat->nat_age = fr_defnatage;
2722 #endif
2723                                 /*
2724                                  * Increase this because we may have
2725                                  * "keep state" following this too and
2726                                  * packet storms can occur if this is
2727                                  * removed too quickly.
2728                                  */
2729                                 if (nat->nat_age == fr_tcpclosed)
2730                                         nat->nat_age = fr_tcplastack;
2731                                 MUTEX_EXIT(&nat->nat_lock);
2732                         } else if (fin->fin_p == IPPROTO_UDP) {
2733                                 udphdr_t *udp = (udphdr_t *)tcp;
2734
2735                                 if (udp->uh_sum)
2736                                         csump = &udp->uh_sum;
2737                         }
2738
2739                         if (csump) {
2740                                 if (nat->nat_dir == NAT_OUTBOUND)
2741                                         fix_incksum(fin, csump,
2742                                                     nat->nat_sumd[0]);
2743                                 else
2744                                         fix_outcksum(fin, csump,
2745                                                     nat->nat_sumd[0]);
2746                         }
2747                 }
2748                 ATOMIC_INCL(nat_stats.ns_mapped[0]);
2749                 RWLOCK_EXIT(&ipf_nat);                  /* READ */
2750                 return 1;
2751         }
2752         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2753         return 0;
2754 }
2755
2756
2757 /*
2758  * Free all memory used by NAT structures allocated at runtime.
2759  */
2760 void ip_natunload()
2761 {
2762         WRITE_ENTER(&ipf_nat);
2763         (void) nat_clearlist();
2764         (void) nat_flushtable();
2765         RWLOCK_EXIT(&ipf_nat);
2766
2767         if (nat_table[0] != NULL) {
2768                 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
2769                 nat_table[0] = NULL;
2770         }
2771         if (nat_table[1] != NULL) {
2772                 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
2773                 nat_table[1] = NULL;
2774         }
2775         if (nat_rules != NULL) {
2776                 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
2777                 nat_rules = NULL;
2778         }
2779         if (rdr_rules != NULL) {
2780                 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
2781                 rdr_rules = NULL;
2782         }
2783         if (maptable != NULL) {
2784                 KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
2785                 maptable = NULL;
2786         }
2787 }
2788
2789
2790 /*
2791  * Slowly expire held state for NAT entries.  Timeouts are set in
2792  * expectation of this being called twice per second.
2793  */
2794 void ip_natexpire()
2795 {
2796         register struct nat *nat, **natp;
2797 #if defined(_KERNEL) && !SOLARIS
2798         int s;
2799 #endif
2800
2801         SPL_NET(s);
2802         WRITE_ENTER(&ipf_nat);
2803         for (natp = &nat_instances; (nat = *natp); ) {
2804                 nat->nat_age--;
2805                 if (nat->nat_age) {
2806                         natp = &nat->nat_next;
2807                         continue;
2808                 }
2809                 *natp = nat->nat_next;
2810 #ifdef  IPFILTER_LOG
2811                 nat_log(nat, NL_EXPIRE);
2812 #endif
2813                 nat_delete(nat);
2814                 nat_stats.ns_expire++;
2815         }
2816         RWLOCK_EXIT(&ipf_nat);
2817         SPL_X(s);
2818 }
2819
2820
2821 /*
2822  */
2823 void ip_natsync(ifp)
2824 void *ifp;
2825 {
2826         register ipnat_t *n;
2827         register nat_t *nat;
2828         register u_32_t sum1, sum2, sumd;
2829         struct in_addr in;
2830         ipnat_t *np;
2831         void *ifp2;
2832 #if defined(_KERNEL) && !SOLARIS
2833         int s;
2834 #endif
2835
2836         /*
2837          * Change IP addresses for NAT sessions for any protocol except TCP
2838          * since it will break the TCP connection anyway.
2839          */
2840         SPL_NET(s);
2841         WRITE_ENTER(&ipf_nat);
2842         for (nat = nat_instances; nat; nat = nat->nat_next)
2843                 if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
2844                     !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
2845                     (np->in_outmsk == 0xffffffff) && !np->in_nip) {
2846                         ifp2 = nat->nat_ifp;
2847                         /*
2848                          * Change the map-to address to be the same as the
2849                          * new one.
2850                          */
2851                         sum1 = nat->nat_outip.s_addr;
2852                         if (fr_ifpaddr(4, ifp2, &in) != -1)
2853                                 nat->nat_outip = in;
2854                         sum2 = nat->nat_outip.s_addr;
2855
2856                         if (sum1 == sum2)
2857                                 continue;
2858                         /*
2859                          * Readjust the checksum adjustment to take into
2860                          * account the new IP#.
2861                          */
2862                         CALC_SUMD(sum1, sum2, sumd);
2863                         /* XXX - dont change for TCP when solaris does
2864                          * hardware checksumming.
2865                          */
2866                         sumd += nat->nat_sumd[0];
2867                         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2868                         nat->nat_sumd[1] = nat->nat_sumd[0];
2869                 }
2870
2871         for (n = nat_list; (n != NULL); n = n->in_next)
2872                 if (n->in_ifp == ifp) {
2873                         n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
2874                         if (!n->in_ifp)
2875                                 n->in_ifp = (void *)-1;
2876                 }
2877         RWLOCK_EXIT(&ipf_nat);
2878         SPL_X(s);
2879 }
2880
2881
2882 #ifdef  IPFILTER_LOG
2883 void nat_log(nat, type)
2884 struct nat *nat;
2885 u_int type;
2886 {
2887         struct ipnat *np;
2888         struct natlog natl;
2889         void *items[1];
2890         size_t sizes[1];
2891         int rulen, types[1];
2892
2893         natl.nl_inip = nat->nat_inip;
2894         natl.nl_outip = nat->nat_outip;
2895         natl.nl_origip = nat->nat_oip;
2896         natl.nl_bytes = nat->nat_bytes;
2897         natl.nl_pkts = nat->nat_pkts;
2898         natl.nl_origport = nat->nat_oport;
2899         natl.nl_inport = nat->nat_inport;
2900         natl.nl_outport = nat->nat_outport;
2901         natl.nl_p = nat->nat_p;
2902         natl.nl_type = type;
2903         natl.nl_rule = -1;
2904 #ifndef LARGE_NAT
2905         if (nat->nat_ptr != NULL) {
2906                 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
2907                         if (np == nat->nat_ptr) {
2908                                 natl.nl_rule = rulen;
2909                                 break;
2910                         }
2911         }
2912 #endif
2913         items[0] = &natl;
2914         sizes[0] = sizeof(natl);
2915         types[0] = 0;
2916
2917         (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
2918 }
2919 #endif
2920
2921
2922 #if defined(__OpenBSD__)
2923 void nat_ifdetach(ifp)
2924 void *ifp;
2925 {
2926         frsync();
2927         return;
2928 }
2929 #endif