]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/contrib/ipfilter/netinet/ip_nat.c
Include sys/_lock.h and sys/_mutex.h to reduce namespace pollution.
[FreeBSD/FreeBSD.git] / sys / contrib / ipfilter / netinet / ip_nat.c
1 /*
2  * Copyright (C) 1995-2001 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
7  */
8
9 #if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL)
10 #define _KERNEL
11 #endif
12
13 #include <sys/errno.h>
14 #include <sys/types.h>
15 #include <sys/param.h>
16 #include <sys/time.h>
17 #if (__FreeBSD_version >= 500028)
18 #include <sys/queue.h>
19 #endif
20 #include <sys/file.h>
21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22     defined(_KERNEL)
23 # include "opt_ipfilter_log.h"
24 #endif
25 #if !defined(_KERNEL) && !defined(KERNEL)
26 # include <stdio.h>
27 # include <string.h>
28 # include <stdlib.h>
29 #endif
30 #if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
31 # include <sys/filio.h>
32 # include <sys/fcntl.h>
33 #else
34 # include <sys/ioctl.h>
35 #endif
36 #include <sys/fcntl.h>
37 #include <sys/uio.h>
38 #ifndef linux
39 # include <sys/protosw.h>
40 #endif
41 #include <sys/socket.h>
42 #if defined(_KERNEL) && !defined(linux)
43 # include <sys/systm.h>
44 #endif
45 #if !defined(__SVR4) && !defined(__svr4__)
46 # ifndef linux
47 #  include <sys/mbuf.h>
48 # endif
49 #else
50 # include <sys/filio.h>
51 # include <sys/byteorder.h>
52 # ifdef _KERNEL
53 #  include <sys/dditypes.h>
54 # endif
55 # include <sys/stream.h>
56 # include <sys/kmem.h>
57 #endif
58 #if __FreeBSD_version >= 300000
59 # include <sys/queue.h>
60 #endif
61 #include <net/if.h>
62 #if __FreeBSD_version >= 300000
63 # include <net/if_var.h>
64 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
65 #  include "opt_ipfilter.h"
66 # endif
67 #endif
68 #ifdef sun
69 # include <net/af.h>
70 #endif
71 #include <net/route.h>
72 #include <netinet/in.h>
73 #include <netinet/in_systm.h>
74 #include <netinet/ip.h>
75
76 #ifdef __sgi
77 # ifdef IFF_DRVRLOCK /* IRIX6 */
78 #include <sys/hashing.h>
79 #include <netinet/in_var.h>
80 # endif
81 #endif
82
83 #ifdef RFC1825
84 # include <vpn/md5.h>
85 # include <vpn/ipsec.h>
86 extern struct ifnet vpnif;
87 #endif
88
89 #ifndef linux
90 # include <netinet/ip_var.h>
91 # include <netinet/tcp_fsm.h>
92 #endif
93 #include <netinet/tcp.h>
94 #include <netinet/udp.h>
95 #include <netinet/ip_icmp.h>
96 #include "netinet/ip_compat.h"
97 #include <netinet/tcpip.h>
98 #include "netinet/ip_fil.h"
99 #include "netinet/ip_proxy.h"
100 #include "netinet/ip_nat.h"
101 #include "netinet/ip_frag.h"
102 #include "netinet/ip_state.h"
103 #if (__FreeBSD_version >= 300000)
104 # include <sys/malloc.h>
105 #endif
106 #ifndef MIN
107 # define        MIN(a,b)        (((a)<(b))?(a):(b))
108 #endif
109 #undef  SOCKADDR_IN
110 #define SOCKADDR_IN     struct sockaddr_in
111
112 #if !defined(lint)
113 static const char sccsid[] = "@(#)ip_nat.c      1.11 6/5/96 (C) 1995 Darren Reed";
114 /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.37.2.44 2001/07/21 07:17:22 darrenr Exp $"; */
115 static const char rcsid[] = "@(#)$FreeBSD$";
116 #endif
117
118 nat_t   **nat_table[2] = { NULL, NULL },
119         *nat_instances = NULL;
120 ipnat_t *nat_list = NULL;
121 u_int   ipf_nattable_sz = NAT_TABLE_SZ;
122 u_int   ipf_natrules_sz = NAT_SIZE;
123 u_int   ipf_rdrrules_sz = RDR_SIZE;
124 u_int   ipf_hostmap_sz = HOSTMAP_SIZE;
125 u_32_t  nat_masks = 0;
126 u_32_t  rdr_masks = 0;
127 ipnat_t **nat_rules = NULL;
128 ipnat_t **rdr_rules = NULL;
129 hostmap_t       **maptable  = NULL;
130
131 u_long  fr_defnatage = DEF_NAT_AGE,
132         fr_defnaticmpage = 6;           /* 3 seconds */
133 natstat_t nat_stats;
134 int     fr_nat_lock = 0;
135 #if     (SOLARIS || defined(__sgi)) && defined(_KERNEL)
136 extern  kmutex_t        ipf_rw;
137 extern  KRWLOCK_T       ipf_nat;
138 #endif
139
140 static  int     nat_flushtable __P((void));
141 static  int     nat_clearlist __P((void));
142 static  void    nat_addnat __P((struct ipnat *));
143 static  void    nat_addrdr __P((struct ipnat *));
144 static  void    nat_delete __P((struct nat *));
145 static  void    nat_delrdr __P((struct ipnat *));
146 static  void    nat_delnat __P((struct ipnat *));
147 static  int     fr_natgetent __P((caddr_t));
148 static  int     fr_natgetsz __P((caddr_t));
149 static  int     fr_natputent __P((caddr_t));
150 static  void    nat_tabmove __P((nat_t *, u_32_t));
151 static  int     nat_match __P((fr_info_t *, ipnat_t *, ip_t *));
152 static  hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
153                                     struct in_addr));
154 static  void    nat_hostmapdel __P((struct hostmap *));
155
156
157 int nat_init()
158 {
159         KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
160         if (nat_table[0] != NULL)
161                 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
162         else
163                 return -1;
164
165         KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
166         if (nat_table[1] != NULL)
167                 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
168         else
169                 return -1;
170
171         KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
172         if (nat_rules != NULL)
173                 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
174         else
175                 return -1;
176
177         KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
178         if (rdr_rules != NULL)
179                 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
180         else
181                 return -1;
182
183         KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
184         if (maptable != NULL)
185                 bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
186         else
187                 return -1;
188         return 0;
189 }
190
191
192 static void nat_addrdr(n)
193 ipnat_t *n;
194 {
195         ipnat_t **np;
196         u_32_t j;
197         u_int hv;
198         int k;
199
200         k = countbits(n->in_outmsk);
201         if ((k >= 0) && (k != 32))
202                 rdr_masks |= 1 << k;
203         j = (n->in_outip & n->in_outmsk);
204         hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
205         np = rdr_rules + hv;
206         while (*np != NULL)
207                 np = &(*np)->in_rnext;
208         n->in_rnext = NULL;
209         n->in_prnext = np;
210         *np = n;
211 }
212
213
214 static void nat_addnat(n)
215 ipnat_t *n;
216 {
217         ipnat_t **np;
218         u_32_t j;
219         u_int hv;
220         int k;
221
222         k = countbits(n->in_inmsk);
223         if ((k >= 0) && (k != 32))
224                 nat_masks |= 1 << k;
225         j = (n->in_inip & n->in_inmsk);
226         hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
227         np = nat_rules + hv;
228         while (*np != NULL)
229                 np = &(*np)->in_mnext;
230         n->in_mnext = NULL;
231         n->in_pmnext = np;
232         *np = n;
233 }
234
235
236 static void nat_delrdr(n)
237 ipnat_t *n;
238 {
239         if (n->in_rnext)
240                 n->in_rnext->in_prnext = n->in_prnext;
241         *n->in_prnext = n->in_rnext;
242 }
243
244
245 static void nat_delnat(n)
246 ipnat_t *n;
247 {
248         if (n->in_mnext)
249                 n->in_mnext->in_pmnext = n->in_pmnext;
250         *n->in_pmnext = n->in_mnext;
251 }
252
253
254 /*
255  * check if an ip address has already been allocated for a given mapping that
256  * is not doing port based translation.
257  *
258  * Must be called with ipf_nat held as a write lock.
259  */
260 static struct hostmap *nat_hostmap(np, real, map)
261 ipnat_t *np;
262 struct in_addr real;
263 struct in_addr map;
264 {
265         hostmap_t *hm;
266         u_int hv;
267
268         hv = real.s_addr % HOSTMAP_SIZE;
269         for (hm = maptable[hv]; hm; hm = hm->hm_next)
270                 if ((hm->hm_realip.s_addr == real.s_addr) &&
271                     (np == hm->hm_ipnat)) {
272                         hm->hm_ref++;
273                         return hm;
274                 }
275
276         KMALLOC(hm, hostmap_t *);
277         if (hm) {
278                 hm->hm_next = maptable[hv];
279                 hm->hm_pnext = maptable + hv;
280                 if (maptable[hv])
281                         maptable[hv]->hm_pnext = &hm->hm_next;
282                 maptable[hv] = hm;
283                 hm->hm_ipnat = np;
284                 hm->hm_realip = real;
285                 hm->hm_mapip = map;
286                 hm->hm_ref = 1;
287         }
288         return hm;
289 }
290
291
292 /*
293  * Must be called with ipf_nat held as a write lock.
294  */
295 static void nat_hostmapdel(hm)
296 struct hostmap *hm;
297 {
298         ATOMIC_DEC32(hm->hm_ref);
299         if (hm->hm_ref == 0) {
300                 if (hm->hm_next)
301                         hm->hm_next->hm_pnext = hm->hm_pnext;
302                 *hm->hm_pnext = hm->hm_next;
303                 KFREE(hm);
304         }
305 }
306
307
308 void fix_outcksum(fin, sp, n)
309 fr_info_t *fin;
310 u_short *sp;
311 u_32_t n;
312 {
313         register u_short sumshort;
314         register u_32_t sum1;
315
316         if (!n)
317                 return;
318         else if (n & NAT_HW_CKSUM) {
319                 n &= 0xffff;
320                 n += fin->fin_dlen;
321                 n = (n & 0xffff) + (n >> 16);
322                 *sp = n & 0xffff;
323                 return;
324         }
325         sum1 = (~ntohs(*sp)) & 0xffff;
326         sum1 += (n);
327         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
328         /* Again */
329         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
330         sumshort = ~(u_short)sum1;
331         *(sp) = htons(sumshort);
332 }
333
334
335 void fix_incksum(fin, sp, n)
336 fr_info_t *fin;
337 u_short *sp;
338 u_32_t n;
339 {
340         register u_short sumshort;
341         register u_32_t sum1;
342
343         if (!n)
344                 return;
345         else if (n & NAT_HW_CKSUM) {
346                 n &= 0xffff;
347                 n += fin->fin_dlen;
348                 n = (n & 0xffff) + (n >> 16);
349                 *sp = n & 0xffff;
350                 return;
351         }
352 #ifdef sparc
353         sum1 = (~(*sp)) & 0xffff;
354 #else
355         sum1 = (~ntohs(*sp)) & 0xffff;
356 #endif
357         sum1 += ~(n) & 0xffff;
358         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
359         /* Again */
360         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
361         sumshort = ~(u_short)sum1;
362         *(sp) = htons(sumshort);
363 }
364
365
366 /*
367  * fix_datacksum is used *only* for the adjustments of checksums in the data
368  * section of an IP packet.
369  *
370  * The only situation in which you need to do this is when NAT'ing an 
371  * ICMP error message. Such a message, contains in its body the IP header
372  * of the original IP packet, that causes the error.
373  *
374  * You can't use fix_incksum or fix_outcksum in that case, because for the
375  * kernel the data section of the ICMP error is just data, and no special 
376  * processing like hardware cksum or ntohs processing have been done by the 
377  * kernel on the data section.
378  */
379 void fix_datacksum(sp, n)
380 u_short *sp;
381 u_32_t n;
382 {
383         register u_short sumshort;
384         register u_32_t sum1;
385
386         if (!n)
387                 return;
388
389         sum1 = (~ntohs(*sp)) & 0xffff;
390         sum1 += (n);
391         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
392         /* Again */
393         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
394         sumshort = ~(u_short)sum1;
395         *(sp) = htons(sumshort);
396 }
397
398 /*
399  * How the NAT is organised and works.
400  *
401  * Inside (interface y) NAT       Outside (interface x)
402  * -------------------- -+- -------------------------------------
403  * Packet going          |   out, processsed by ip_natout() for x
404  * ------------>         |   ------------>
405  * src=10.1.1.1          |   src=192.1.1.1
406  *                       |
407  *                       |   in, processed by ip_natin() for x
408  * <------------         |   <------------
409  * dst=10.1.1.1          |   dst=192.1.1.1
410  * -------------------- -+- -------------------------------------
411  * ip_natout() - changes ip_src and if required, sport
412  *             - creates a new mapping, if required.
413  * ip_natin()  - changes ip_dst and if required, dport
414  *
415  * In the NAT table, internal source is recorded as "in" and externally
416  * seen as "out".
417  */
418
419 /*
420  * Handle ioctls which manipulate the NAT.
421  */
422 int nat_ioctl(data, cmd, mode)
423 #if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
424 u_long cmd;
425 #else
426 int cmd;
427 #endif
428 caddr_t data;
429 int mode;
430 {
431         register ipnat_t *nat, *nt, *n = NULL, **np = NULL;
432         int error = 0, ret, arg;
433         ipnat_t natd;
434         u_32_t i, j;
435
436 #if (BSD >= 199306) && defined(_KERNEL)
437         if ((securelevel >= 3) && (mode & FWRITE))
438                 return EPERM;
439 #endif
440
441         nat = NULL;     /* XXX gcc -Wuninitialized */
442         KMALLOC(nt, ipnat_t *);
443         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT))
444                 error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
445         else if (cmd == SIOCIPFFL) {    /* SIOCFLNAT & SIOCCNATL */
446                 error = IRCOPY(data, (char *)&arg, sizeof(arg));
447                 if (error)
448                         error = EFAULT;
449         }
450
451         if (error)
452                 goto done;
453
454         /*
455          * For add/delete, look to see if the NAT entry is already present
456          */
457         WRITE_ENTER(&ipf_nat);
458         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
459                 nat = &natd;
460                 nat->in_flags &= IPN_USERFLAGS;
461                 if ((nat->in_redir & NAT_MAPBLK) == 0) {
462                         if ((nat->in_flags & IPN_SPLIT) == 0)
463                                 nat->in_inip &= nat->in_inmsk;
464                         if ((nat->in_flags & IPN_IPRANGE) == 0)
465                                 nat->in_outip &= nat->in_outmsk;
466                 }
467                 for (np = &nat_list; (n = *np); np = &n->in_next)
468                         if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
469                                         IPN_CMPSIZ))
470                                 break;
471         }
472
473         switch (cmd)
474         {
475 #ifdef  IPFILTER_LOG
476         case SIOCIPFFB :
477         {
478                 int tmp;
479
480                 if (!(mode & FWRITE))
481                         error = EPERM;
482                 else {
483                         tmp = ipflog_clear(IPL_LOGNAT);
484                         IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
485                 }
486                 break;
487         }
488 #endif
489         case SIOCADNAT :
490                 if (!(mode & FWRITE)) {
491                         error = EPERM;
492                         break;
493                 }
494                 if (n) {
495                         error = EEXIST;
496                         break;
497                 }
498                 if (nt == NULL) {
499                         error = ENOMEM;
500                         break;
501                 }
502                 n = nt;
503                 nt = NULL;
504                 bcopy((char *)nat, (char *)n, sizeof(*n));
505                 n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
506                 if (!n->in_ifp)
507                         n->in_ifp = (void *)-1;
508                 if (n->in_plabel[0] != '\0') {
509                         n->in_apr = appr_match(n->in_p, n->in_plabel);
510                         if (!n->in_apr) {
511                                 error = ENOENT;
512                                 break;
513                         }
514                 }
515                 n->in_next = NULL;
516                 *np = n;
517
518                 if (n->in_redir & NAT_REDIRECT) {
519                         n->in_flags &= ~IPN_NOTDST;
520                         nat_addrdr(n);
521                 }
522                 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
523                         n->in_flags &= ~IPN_NOTSRC;
524                         nat_addnat(n);
525                 }
526
527                 n->in_use = 0;
528                 if (n->in_redir & NAT_MAPBLK)
529                         n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
530                 else if (n->in_flags & IPN_AUTOPORTMAP)
531                         n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
532                 else if (n->in_flags & IPN_IPRANGE)
533                         n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
534                 else if (n->in_flags & IPN_SPLIT)
535                         n->in_space = 2;
536                 else
537                         n->in_space = ~ntohl(n->in_outmsk);
538                 /*
539                  * Calculate the number of valid IP addresses in the output
540                  * mapping range.  In all cases, the range is inclusive of
541                  * the start and ending IP addresses.
542                  * If to a CIDR address, lose 2: broadcast + network address
543                  *                               (so subtract 1)
544                  * If to a range, add one.
545                  * If to a single IP address, set to 1.
546                  */
547                 if (n->in_space) {
548                         if ((n->in_flags & IPN_IPRANGE) != 0)
549                                 n->in_space += 1;
550                         else
551                                 n->in_space -= 1;
552                 } else
553                         n->in_space = 1;
554                 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
555                     ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
556                         n->in_nip = ntohl(n->in_outip) + 1;
557                 else if ((n->in_flags & IPN_SPLIT) &&
558                          (n->in_redir & NAT_REDIRECT))
559                         n->in_nip = ntohl(n->in_inip);
560                 else
561                         n->in_nip = ntohl(n->in_outip);
562                 if (n->in_redir & NAT_MAP) {
563                         n->in_pnext = ntohs(n->in_pmin);
564                         /*
565                          * Multiply by the number of ports made available.
566                          */
567                         if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
568                                 n->in_space *= (ntohs(n->in_pmax) -
569                                                 ntohs(n->in_pmin) + 1);
570                                 /*
571                                  * Because two different sources can map to
572                                  * different destinations but use the same
573                                  * local IP#/port #.
574                                  * If the result is smaller than in_space, then
575                                  * we may have wrapped around 32bits.
576                                  */
577                                 i = n->in_inmsk;
578                                 if ((i != 0) && (i != 0xffffffff)) {
579                                         j = n->in_space * (~ntohl(i) + 1);
580                                         if (j >= n->in_space)
581                                                 n->in_space = j;
582                                         else
583                                                 n->in_space = 0xffffffff;
584                                 }
585                         }
586                         /*
587                          * If no protocol is specified, multiple by 256.
588                          */
589                         if ((n->in_flags & IPN_TCPUDP) == 0) {
590                                         j = n->in_space * 256;
591                                         if (j >= n->in_space)
592                                                 n->in_space = j;
593                                         else
594                                                 n->in_space = 0xffffffff;
595                         }
596                 }
597                 /* Otherwise, these fields are preset */
598                 n = NULL;
599                 nat_stats.ns_rules++;
600                 break;
601         case SIOCRMNAT :
602                 if (!(mode & FWRITE)) {
603                         error = EPERM;
604                         n = NULL;
605                         break;
606                 }
607                 if (!n) {
608                         error = ESRCH;
609                         break;
610                 }
611                 if (n->in_redir & NAT_REDIRECT)
612                         nat_delrdr(n);
613                 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
614                         nat_delnat(n);
615                 if (nat_list == NULL) {
616                         nat_masks = 0;
617                         rdr_masks = 0;
618                 }
619                 *np = n->in_next;
620                 if (!n->in_use) {
621                         if (n->in_apr)
622                                 appr_free(n->in_apr);
623                         KFREE(n);
624                         nat_stats.ns_rules--;
625                 } else {
626                         n->in_flags |= IPN_DELETE;
627                         n->in_next = NULL;
628                 }
629                 n = NULL;
630                 break;
631         case SIOCGNATS :
632                 MUTEX_DOWNGRADE(&ipf_nat);
633                 nat_stats.ns_table[0] = nat_table[0];
634                 nat_stats.ns_table[1] = nat_table[1];
635                 nat_stats.ns_list = nat_list;
636                 nat_stats.ns_maptable = maptable;
637                 nat_stats.ns_nattab_sz = ipf_nattable_sz;
638                 nat_stats.ns_rultab_sz = ipf_natrules_sz;
639                 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
640                 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
641                 nat_stats.ns_instances = nat_instances;
642                 nat_stats.ns_apslist = ap_sess_list;
643                 error = IWCOPYPTR((char *)&nat_stats, (char *)data,
644                                   sizeof(nat_stats));
645                 break;
646         case SIOCGNATL :
647             {
648                 natlookup_t nl;
649
650                 MUTEX_DOWNGRADE(&ipf_nat);
651                 error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
652                 if (error)
653                         break;
654
655                 if (nat_lookupredir(&nl)) {
656                         error = IWCOPYPTR((char *)&nl, (char *)data,
657                                           sizeof(nl));
658                 } else
659                         error = ESRCH;
660                 break;
661             }
662         case SIOCIPFFL :        /* old SIOCFLNAT & SIOCCNATL */
663                 if (!(mode & FWRITE)) {
664                         error = EPERM;
665                         break;
666                 }
667                 error = 0;
668                 if (arg == 0)
669                         ret = nat_flushtable();
670                 else if (arg == 1)
671                         ret = nat_clearlist();
672                 else
673                         error = EINVAL;
674                 MUTEX_DOWNGRADE(&ipf_nat);
675                 if (!error) {
676                         error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
677                         if (error)
678                                 error = EFAULT;
679                 }
680                 break;
681         case SIOCSTLCK :
682                 error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
683                 if (!error) {
684                         error = IWCOPY((caddr_t)&fr_nat_lock, data,
685                                         sizeof(fr_nat_lock));
686                         if (!error)
687                                 fr_nat_lock = arg;
688                 } else
689                         error = EFAULT;
690                 break;
691         case SIOCSTPUT :
692                 if (fr_nat_lock)
693                         error = fr_natputent(data);
694                 else
695                         error = EACCES;
696                 break;
697         case SIOCSTGSZ :
698                 if (fr_nat_lock)
699                         error = fr_natgetsz(data);
700                 else
701                         error = EACCES;
702                 break;
703         case SIOCSTGET :
704                 if (fr_nat_lock)
705                         error = fr_natgetent(data);
706                 else
707                         error = EACCES;
708                 break;
709         case FIONREAD :
710 #ifdef  IPFILTER_LOG
711                 arg = (int)iplused[IPL_LOGNAT];
712                 MUTEX_DOWNGRADE(&ipf_nat);
713                 error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
714                 if (error)
715                         error = EFAULT;
716 #endif
717                 break;
718         default :
719                 error = EINVAL;
720                 break;
721         }
722         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
723 done:
724         if (nt)
725                 KFREE(nt);
726         return error;
727 }
728
729
730 static int fr_natgetsz(data)
731 caddr_t data;
732 {
733         ap_session_t *aps;
734         nat_t *nat, *n;
735         int error = 0;
736         natget_t ng;
737
738         error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
739         if (error)
740                 return EFAULT;
741
742         nat = ng.ng_ptr;
743         if (!nat) {
744                 nat = nat_instances;
745                 ng.ng_sz = 0;
746                 if (nat == NULL) {
747                         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
748                         if (error)
749                                 error = EFAULT;
750                         return error;
751                 }
752         } else {
753                 /*
754                  * Make sure the pointer we're copying from exists in the
755                  * current list of entries.  Security precaution to prevent
756                  * copying of random kernel data.
757                  */
758                 for (n = nat_instances; n; n = n->nat_next)
759                         if (n == nat)
760                                 break;
761                 if (!n)
762                         return ESRCH;
763         }
764
765         ng.ng_sz = sizeof(nat_save_t);
766         aps = nat->nat_aps;
767         if ((aps != NULL) && (aps->aps_data != 0)) {
768                 ng.ng_sz += sizeof(ap_session_t);
769                 ng.ng_sz += aps->aps_psiz;
770         }
771
772         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
773         if (error)
774                 error = EFAULT;
775         return error;
776 }
777
778
779 static int fr_natgetent(data)
780 caddr_t data;
781 {
782         nat_save_t ipn, *ipnp, *ipnn = NULL;
783         register nat_t *n, *nat;
784         ap_session_t *aps;
785         int error;
786
787         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
788         if (error)
789                 return EFAULT;
790         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
791         if (error)
792                 return EFAULT;
793
794         nat = ipn.ipn_next;
795         if (!nat) {
796                 nat = nat_instances;
797                 if (nat == NULL) {
798                         if (nat_instances == NULL)
799                                 return ENOENT;
800                         return 0;
801                 }
802         } else {
803                 /*
804                  * Make sure the pointer we're copying from exists in the
805                  * current list of entries.  Security precaution to prevent
806                  * copying of random kernel data.
807                  */
808                 for (n = nat_instances; n; n = n->nat_next)
809                         if (n == nat)
810                                 break;
811                 if (!n)
812                         return ESRCH;
813         }
814
815         ipn.ipn_next = nat->nat_next;
816         ipn.ipn_dsize = 0;
817         bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
818         ipn.ipn_nat.nat_data = NULL;
819
820         if (nat->nat_ptr) {
821                 bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
822                       sizeof(ipn.ipn_ipnat));
823         }
824
825         if (nat->nat_fr)
826                 bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
827                       sizeof(ipn.ipn_rule));
828
829         if ((aps = nat->nat_aps)) {
830                 ipn.ipn_dsize = sizeof(*aps);
831                 if (aps->aps_data)
832                         ipn.ipn_dsize += aps->aps_psiz;
833                 KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
834                 if (ipnn == NULL)
835                         return ENOMEM;
836                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
837
838                 bcopy((char *)aps, ipnn->ipn_data, sizeof(*aps));
839                 if (aps->aps_data) {
840                         bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps),
841                               aps->aps_psiz);
842                         ipnn->ipn_dsize += aps->aps_psiz;
843                 }
844                 error = IWCOPY((caddr_t)ipnn, ipnp,
845                                sizeof(ipn) + ipn.ipn_dsize);
846                 if (error)
847                         error = EFAULT;
848                 KFREES(ipnn, sizeof(*ipnn) + ipn.ipn_dsize);
849         } else {
850                 error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
851                 if (error)
852                         error = EFAULT;
853         }
854         return error;
855 }
856
857
858 static int fr_natputent(data)
859 caddr_t data;
860 {
861         nat_save_t ipn, *ipnp, *ipnn = NULL;
862         register nat_t *n, *nat;
863         ap_session_t *aps;
864         frentry_t *fr;
865         ipnat_t *in;
866
867         int error;
868
869         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
870         if (error)
871                 return EFAULT;
872         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
873         if (error)
874                 return EFAULT;
875         nat = NULL;
876         if (ipn.ipn_dsize) {
877                 KMALLOCS(ipnn, nat_save_t *, sizeof(ipn) + ipn.ipn_dsize);
878                 if (ipnn == NULL)
879                         return ENOMEM;
880                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
881                 error = IRCOPY((caddr_t)ipnp, (caddr_t)ipn.ipn_data,
882                                ipn.ipn_dsize);
883                 if (error) {
884                         error = EFAULT;
885                         goto junkput;
886                 }
887         } else
888                 ipnn = NULL;
889
890         KMALLOC(nat, nat_t *);
891         if (nat == NULL) {
892                 error = EFAULT;
893                 goto junkput;
894         }
895
896         bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
897         /*
898          * Initialize all these so that nat_delete() doesn't cause a crash.
899          */
900         nat->nat_phnext[0] = NULL;
901         nat->nat_phnext[1] = NULL;
902         fr = nat->nat_fr;
903         nat->nat_fr = NULL;
904         aps = nat->nat_aps;
905         nat->nat_aps = NULL;
906         in = nat->nat_ptr;
907         nat->nat_ptr = NULL;
908         nat->nat_hm = NULL;
909         nat->nat_data = NULL;
910
911         /*
912          * Restore the rule associated with this nat session
913          */
914         if (in) {
915                 KMALLOC(in, ipnat_t *);
916                 if (in == NULL) {
917                         error = ENOMEM;
918                         goto junkput;
919                 }
920                 nat->nat_ptr = in;
921                 bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
922                 in->in_use = 1;
923                 in->in_flags |= IPN_DELETE;
924                 in->in_next = NULL;
925                 in->in_rnext = NULL;
926                 in->in_prnext = NULL;
927                 in->in_mnext = NULL;
928                 in->in_pmnext = NULL;
929                 in->in_ifp = GETUNIT(in->in_ifname, 4);
930                 if (in->in_plabel[0] != '\0') {
931                         in->in_apr = appr_match(in->in_p, in->in_plabel);
932                 }
933         }
934
935         /*
936          * Restore ap_session_t structure.  Include the private data allocated
937          * if it was there.
938          */
939         if (aps) {
940                 KMALLOC(aps, ap_session_t *);
941                 if (aps == NULL) {
942                         error = ENOMEM;
943                         goto junkput;
944                 }
945                 nat->nat_aps = aps;
946                 aps->aps_next = ap_sess_list;
947                 ap_sess_list = aps;
948                 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
949                 if (in)
950                         aps->aps_apr = in->in_apr;
951                 if (aps->aps_psiz) {
952                         KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
953                         if (aps->aps_data == NULL) {
954                                 error = ENOMEM;
955                                 goto junkput;
956                         }
957                         bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
958                               aps->aps_psiz);
959                 } else {
960                         aps->aps_psiz = 0;
961                         aps->aps_data = NULL;
962                 }
963         }
964
965         /*
966          * If there was a filtering rule associated with this entry then
967          * build up a new one.
968          */
969         if (fr != NULL) {
970                 if (nat->nat_flags & FI_NEWFR) {
971                         KMALLOC(fr, frentry_t *);
972                         nat->nat_fr = fr;
973                         if (fr == NULL) {
974                                 error = ENOMEM;
975                                 goto junkput;
976                         }
977                         bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
978                         ipn.ipn_nat.nat_fr = fr;
979                         error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
980                         if (error) {
981                                 error = EFAULT;
982                                 goto junkput;
983                         }
984                 } else {
985                         for (n = nat_instances; n; n = n->nat_next)
986                                 if (n->nat_fr == fr)
987                                         break;
988                         if (!n) {
989                                 error = ESRCH;
990                                 goto junkput;
991                         }
992                 }
993         }
994
995         if (ipnn)
996                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
997         nat_insert(nat);
998         return 0;
999 junkput:
1000         if (ipnn)
1001                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1002         if (nat)
1003                 nat_delete(nat);
1004         return error;
1005 }
1006
1007
1008 /*
1009  * Delete a nat entry from the various lists and table.
1010  */
1011 static void nat_delete(natd)
1012 struct nat *natd;
1013 {
1014         struct ipnat *ipn;
1015
1016         if (natd->nat_flags & FI_WILDP)
1017                 nat_stats.ns_wilds--;
1018         if (natd->nat_hnext[0])
1019                 natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0];
1020         *natd->nat_phnext[0] = natd->nat_hnext[0];
1021         if (natd->nat_hnext[1])
1022                 natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1];
1023         *natd->nat_phnext[1] = natd->nat_hnext[1];
1024
1025         if (natd->nat_fr != NULL) {
1026                 ATOMIC_DEC32(natd->nat_fr->fr_ref);
1027         }
1028
1029         if (natd->nat_hm != NULL)
1030                 nat_hostmapdel(natd->nat_hm);
1031
1032         /*
1033          * If there is an active reference from the nat entry to its parent
1034          * rule, decrement the rule's reference count and free it too if no
1035          * longer being used.
1036          */
1037         ipn = natd->nat_ptr;
1038         if (ipn != NULL) {
1039                 ipn->in_space++;
1040                 ipn->in_use--;
1041                 if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
1042                         if (ipn->in_apr)
1043                                 appr_free(ipn->in_apr);
1044                         KFREE(ipn);
1045                         nat_stats.ns_rules--;
1046                 }
1047         }
1048
1049         MUTEX_DESTROY(&natd->nat_lock);
1050         /*
1051          * If there's a fragment table entry too for this nat entry, then
1052          * dereference that as well.
1053          */
1054         ipfr_forget((void *)natd);
1055         aps_free(natd->nat_aps);
1056         nat_stats.ns_inuse--;
1057         KFREE(natd);
1058 }
1059
1060
1061 /*
1062  * nat_flushtable - clear the NAT table of all mapping entries.
1063  */
1064 static int nat_flushtable()
1065 {
1066         register nat_t *nat, **natp;
1067         register int j = 0;
1068
1069         /*
1070          * ALL NAT mappings deleted, so lets just make the deletions
1071          * quicker.
1072          */
1073         if (nat_table[0] != NULL)
1074                 bzero((char *)nat_table[0],
1075                       sizeof(nat_table[0]) * ipf_nattable_sz);
1076         if (nat_table[1] != NULL)
1077                 bzero((char *)nat_table[1],
1078                       sizeof(nat_table[1]) * ipf_nattable_sz);
1079
1080         for (natp = &nat_instances; (nat = *natp); ) {
1081                 *natp = nat->nat_next;
1082 #ifdef  IPFILTER_LOG
1083                 nat_log(nat, NL_FLUSH);
1084 #endif
1085                 nat_delete(nat);
1086                 j++;
1087         }
1088         nat_stats.ns_inuse = 0;
1089         return j;
1090 }
1091
1092
1093 /*
1094  * nat_clearlist - delete all rules in the active NAT mapping list.
1095  */
1096 static int nat_clearlist()
1097 {
1098         register ipnat_t *n, **np = &nat_list;
1099         int i = 0;
1100
1101         if (nat_rules != NULL)
1102                 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1103         if (rdr_rules != NULL)
1104                 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1105
1106         while ((n = *np)) {
1107                 *np = n->in_next;
1108                 if (!n->in_use) {
1109                         if (n->in_apr)
1110                                 appr_free(n->in_apr);
1111                         KFREE(n);
1112                         nat_stats.ns_rules--;
1113                 } else {
1114                         n->in_flags |= IPN_DELETE;
1115                         n->in_next = NULL;
1116                 }
1117                 i++;
1118         }
1119         nat_masks = 0;
1120         rdr_masks = 0;
1121         return i;
1122 }
1123
1124
1125 /*
1126  * Create a new NAT table entry.
1127  * NOTE: assumes write lock on ipf_nat has been obtained already.
1128  */
1129 nat_t *nat_new(np, ip, fin, flags, direction)
1130 ipnat_t *np;
1131 ip_t *ip;
1132 fr_info_t *fin;
1133 u_int flags;
1134 int direction;
1135 {
1136         register u_32_t sum1, sum2, sumd, l;
1137         u_short port = 0, sport = 0, dport = 0, nport = 0;
1138         struct in_addr in, inb;
1139         tcphdr_t *tcp = NULL;
1140         hostmap_t *hm = NULL;
1141         nat_t *nat, *natl;
1142         u_short nflags;
1143 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1144         qif_t *qf = fin->fin_qif;
1145 #endif
1146
1147         nflags = flags & np->in_flags;
1148         if (flags & IPN_TCPUDP) {
1149                 tcp = (tcphdr_t *)fin->fin_dp;
1150                 sport = tcp->th_sport;
1151                 dport = tcp->th_dport;
1152         }
1153
1154         /* Give me a new nat */
1155         KMALLOC(nat, nat_t *);
1156         if (nat == NULL) {
1157                 nat_stats.ns_memfail++;
1158                 return NULL;
1159         }
1160
1161         bzero((char *)nat, sizeof(*nat));
1162         nat->nat_flags = flags;
1163         if (flags & FI_WILDP)
1164                 nat_stats.ns_wilds++;
1165         /*
1166          * Search the current table for a match.
1167          */
1168         if (direction == NAT_OUTBOUND) {
1169                 /*
1170                  * Values at which the search for a free resouce starts.
1171                  */
1172                 u_32_t st_ip;
1173                 u_short st_port;
1174
1175                 /*
1176                  * If it's an outbound packet which doesn't match any existing
1177                  * record, then create a new port
1178                  */
1179                 l = 0;
1180                 st_ip = np->in_nip;
1181                 st_port = np->in_pnext;
1182
1183                 do {
1184                         port = 0;
1185                         in.s_addr = htonl(np->in_nip);
1186                         if (l == 0) {
1187                                 /*
1188                                  * Check to see if there is an existing NAT
1189                                  * setup for this IP address pair.
1190                                  */
1191                                 hm = nat_hostmap(np, ip->ip_src, in);
1192                                 if (hm != NULL)
1193                                         in.s_addr = hm->hm_mapip.s_addr;
1194                         } else if ((l == 1) && (hm != NULL)) {
1195                                 nat_hostmapdel(hm);
1196                                 hm = NULL;
1197                         }
1198                         in.s_addr = ntohl(in.s_addr);
1199
1200                         nat->nat_hm = hm;
1201
1202                         if ((np->in_outmsk == 0xffffffff) &&
1203                             (np->in_pnext == 0)) {
1204                                 if (l > 0)
1205                                         goto badnat;
1206                         }
1207
1208                         if (np->in_redir & NAT_MAPBLK) {
1209                                 if ((l >= np->in_ppip) || ((l > 0) &&
1210                                      !(flags & IPN_TCPUDP)))
1211                                         goto badnat;
1212                                 /*
1213                                  * map-block - Calculate destination address.
1214                                  */
1215                                 in.s_addr = ntohl(ip->ip_src.s_addr);
1216                                 in.s_addr &= ntohl(~np->in_inmsk);
1217                                 inb.s_addr = in.s_addr;
1218                                 in.s_addr /= np->in_ippip;
1219                                 in.s_addr &= ntohl(~np->in_outmsk);
1220                                 in.s_addr += ntohl(np->in_outip);
1221                                 /*
1222                                  * Calculate destination port.
1223                                  */
1224                                 if ((flags & IPN_TCPUDP) &&
1225                                     (np->in_ppip != 0)) {
1226                                         port = ntohs(sport) + l;
1227                                         port %= np->in_ppip;
1228                                         port += np->in_ppip *
1229                                                 (inb.s_addr % np->in_ippip);
1230                                         port += MAPBLK_MINPORT;
1231                                         port = htons(port);
1232                                 }
1233                         } else if (!np->in_outip &&
1234                                    (np->in_outmsk == 0xffffffff)) {
1235                                 /*
1236                                  * 0/32 - use the interface's IP address.
1237                                  */
1238                                 if ((l > 0) ||
1239                                     fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
1240                                         goto badnat;
1241                                 in.s_addr = ntohl(in.s_addr);
1242                         } else if (!np->in_outip && !np->in_outmsk) {
1243                                 /*
1244                                  * 0/0 - use the original source address/port.
1245                                  */
1246                                 if (l > 0)
1247                                         goto badnat;
1248                                 in.s_addr = ntohl(ip->ip_src.s_addr);
1249                         } else if ((np->in_outmsk != 0xffffffff) &&
1250                                    (np->in_pnext == 0) &&
1251                                    ((l > 0) || (hm == NULL)))
1252                                 np->in_nip++;
1253                         natl = NULL;
1254
1255                         if ((nflags & IPN_TCPUDP) &&
1256                             ((np->in_redir & NAT_MAPBLK) == 0) &&
1257                             (np->in_flags & IPN_AUTOPORTMAP)) {
1258                                 if ((l > 0) && (l % np->in_ppip == 0)) {
1259                                         if (l > np->in_space) {
1260                                                 goto badnat;
1261                                         } else if ((l > np->in_ppip) &&
1262                                                    np->in_outmsk != 0xffffffff)
1263                                                 np->in_nip++;
1264                                 }
1265                                 if (np->in_ppip != 0) {
1266                                         port = ntohs(sport);
1267                                         port += (l % np->in_ppip);
1268                                         port %= np->in_ppip;
1269                                         port += np->in_ppip *
1270                                                 (ntohl(ip->ip_src.s_addr) %
1271                                                  np->in_ippip);
1272                                         port += MAPBLK_MINPORT;
1273                                         port = htons(port);
1274                                 }
1275                         } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1276                                    (nflags & IPN_TCPUDP) &&
1277                                    (np->in_pnext != 0)) {
1278                                 port = htons(np->in_pnext++);
1279                                 if (np->in_pnext > ntohs(np->in_pmax)) {
1280                                         np->in_pnext = ntohs(np->in_pmin);
1281                                         if (np->in_outmsk != 0xffffffff)
1282                                                 np->in_nip++;
1283                                 }
1284                         }
1285
1286                         if (np->in_flags & IPN_IPRANGE) {
1287                                 if (np->in_nip > ntohl(np->in_outmsk))
1288                                         np->in_nip = ntohl(np->in_outip);
1289                         } else {
1290                                 if ((np->in_outmsk != 0xffffffff) &&
1291                                     ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1292                                     ntohl(np->in_outip))
1293                                         np->in_nip = ntohl(np->in_outip) + 1;
1294                         }
1295
1296                         if (!port && (flags & IPN_TCPUDP))
1297                                 port = sport;
1298
1299                         /*
1300                          * Here we do a lookup of the connection as seen from
1301                          * the outside.  If an IP# pair already exists, try
1302                          * again.  So if you have A->B becomes C->B, you can
1303                          * also have D->E become C->E but not D->B causing
1304                          * another C->B.  Also take protocol and ports into
1305                          * account when determining whether a pre-existing
1306                          * NAT setup will cause an external conflict where
1307                          * this is appropriate.
1308                          */
1309                         inb.s_addr = htonl(in.s_addr);
1310                         natl = nat_inlookup(fin->fin_ifp, flags & ~FI_WILDP,
1311                                             (u_int)ip->ip_p, ip->ip_dst, inb,
1312                                             (port << 16) | dport, 1);
1313
1314                         /*
1315                          * Has the search wrapped around and come back to the
1316                          * start ?
1317                          */
1318                         if ((natl != NULL) &&
1319                             (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1320                             (np->in_nip != 0) && (st_ip == np->in_nip))
1321                                 goto badnat;
1322                         l++;
1323                 } while (natl != NULL);
1324
1325                 if (np->in_space > 0)
1326                         np->in_space--;
1327
1328                 /* Setup the NAT table */
1329                 nat->nat_inip = ip->ip_src;
1330                 nat->nat_outip.s_addr = htonl(in.s_addr);
1331                 nat->nat_oip = ip->ip_dst;
1332                 if (nat->nat_hm == NULL)
1333                         nat->nat_hm = nat_hostmap(np, ip->ip_src,
1334                                                   nat->nat_outip);
1335
1336                 sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr)) + ntohs(sport);
1337                 sum2 = LONG_SUM(in.s_addr) + ntohs(port);
1338
1339                 if (flags & IPN_TCPUDP) {
1340                         nat->nat_inport = sport;
1341                         nat->nat_outport = port;        /* sport */
1342                         nat->nat_oport = dport;
1343                 }
1344         } else {
1345                 /*
1346                  * Otherwise, it's an inbound packet. Most likely, we don't
1347                  * want to rewrite source ports and source addresses. Instead,
1348                  * we want to rewrite to a fixed internal address and fixed
1349                  * internal port.
1350                  */
1351                 if (np->in_flags & IPN_SPLIT) {
1352                         in.s_addr = np->in_nip;
1353                         if (np->in_inip == htonl(in.s_addr))
1354                                 np->in_nip = ntohl(np->in_inmsk);
1355                         else {
1356                                 np->in_nip = ntohl(np->in_inip);
1357                                 if (np->in_flags & IPN_ROUNDR) {
1358                                         nat_delrdr(np);
1359                                         nat_addrdr(np);
1360                                 }
1361                         }
1362                 } else {
1363                         in.s_addr = ntohl(np->in_inip);
1364                         if (np->in_flags & IPN_ROUNDR) {
1365                                 nat_delrdr(np);
1366                                 nat_addrdr(np);
1367                         }
1368                 }
1369                 if (!np->in_pnext)
1370                         nport = dport;
1371                 else {
1372                         /*
1373                          * Whilst not optimized for the case where
1374                          * pmin == pmax, the gain is not significant.
1375                          */
1376                         nport = ntohs(dport) - ntohs(np->in_pmin) +
1377                                 ntohs(np->in_pnext);
1378                         nport = htons(nport);
1379                 }
1380
1381                 /*
1382                  * When the redirect-to address is set to 0.0.0.0, just
1383                  * assume a blank `forwarding' of the packet.  We don't
1384                  * setup any translation for this either.
1385                  */
1386                 if (in.s_addr == 0) {
1387                         if (nport == dport)
1388                                 goto badnat;
1389                         in.s_addr = ntohl(ip->ip_dst.s_addr);
1390                 }
1391
1392                 nat->nat_inip.s_addr = htonl(in.s_addr);
1393                 nat->nat_outip = ip->ip_dst;
1394                 nat->nat_oip = ip->ip_src;
1395
1396                 sum1 = LONG_SUM(ntohl(ip->ip_dst.s_addr)) + ntohs(dport);
1397                 sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
1398
1399                 if (flags & IPN_TCPUDP) {
1400                         nat->nat_inport = nport;
1401                         nat->nat_outport = dport;
1402                         nat->nat_oport = sport;
1403                 }
1404         }
1405
1406         CALC_SUMD(sum1, sum2, sumd);
1407         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1408 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1409         if ((flags & IPN_TCPUDP) && dohwcksum &&
1410             (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
1411                 if (direction == NAT_OUTBOUND)
1412                         sum1 = LONG_SUM(ntohl(in.s_addr));
1413                 else
1414                         sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr));
1415                 sum1 += LONG_SUM(ntohl(ip->ip_dst.s_addr));
1416                 sum1 += IPPROTO_TCP;
1417                 sum1 = (sum1 & 0xffff) + (sum1 >> 16);
1418                 nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
1419         } else
1420 #endif
1421                 nat->nat_sumd[1] = nat->nat_sumd[0];
1422
1423         if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
1424                 if (direction == NAT_OUTBOUND)
1425                         sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr));
1426                 else
1427                         sum1 = LONG_SUM(ntohl(ip->ip_dst.s_addr));
1428
1429                 sum2 = LONG_SUM(in.s_addr);
1430
1431                 CALC_SUMD(sum1, sum2, sumd);
1432                 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1433         } else
1434                 nat->nat_ipsumd = nat->nat_sumd[0];
1435
1436         in.s_addr = htonl(in.s_addr);
1437
1438 #ifdef  _KERNEL
1439         strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
1440 #endif
1441         nat_insert(nat);
1442
1443         nat->nat_dir = direction;
1444         nat->nat_ifp = fin->fin_ifp;
1445         nat->nat_ptr = np;
1446         nat->nat_p = ip->ip_p;
1447         nat->nat_bytes = 0;
1448         nat->nat_pkts = 0;
1449         nat->nat_fr = fin->fin_fr;
1450         if (nat->nat_fr != NULL) {
1451                 ATOMIC_INC32(nat->nat_fr->fr_ref);
1452         }
1453         if (direction == NAT_OUTBOUND) {
1454                 if (flags & IPN_TCPUDP)
1455                         tcp->th_sport = port;
1456         } else {
1457                 if (flags & IPN_TCPUDP)
1458                         tcp->th_dport = nport;
1459         }
1460         np->in_use++;
1461 #ifdef  IPFILTER_LOG
1462         nat_log(nat, (u_int)np->in_redir);
1463 #endif
1464         return nat;
1465 badnat:
1466         nat_stats.ns_badnat++;
1467         if ((hm = nat->nat_hm) != NULL)
1468                 nat_hostmapdel(hm);
1469         KFREE(nat);
1470         return NULL;
1471 }
1472
1473
1474 void    nat_insert(nat)
1475 nat_t   *nat;
1476 {
1477         u_int hv1, hv2;
1478         nat_t **natp;
1479
1480         MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
1481
1482         nat->nat_age = fr_defnatage;
1483         nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
1484         if (nat->nat_ifname[0] !='\0') {
1485                 nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
1486         }
1487
1488         nat->nat_next = nat_instances;
1489         nat_instances = nat;
1490
1491         if (!(nat->nat_flags & (FI_W_SPORT|FI_W_DPORT))) {
1492                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
1493                                   0xffffffff);
1494                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
1495                                   ipf_nattable_sz);
1496                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
1497                                   0xffffffff);
1498                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
1499                                  ipf_nattable_sz);
1500         } else {
1501                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
1502                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
1503                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
1504                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
1505         }
1506
1507         natp = &nat_table[0][hv1];
1508         if (*natp)
1509                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
1510         nat->nat_phnext[0] = natp;
1511         nat->nat_hnext[0] = *natp;
1512         *natp = nat;
1513
1514         natp = &nat_table[1][hv2];
1515         if (*natp)
1516                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
1517         nat->nat_phnext[1] = natp;
1518         nat->nat_hnext[1] = *natp;
1519         *natp = nat;
1520
1521         nat_stats.ns_added++;
1522         nat_stats.ns_inuse++;
1523 }
1524
1525
1526 nat_t *nat_icmplookup(ip, fin, dir)
1527 ip_t *ip;
1528 fr_info_t *fin;
1529 int dir;
1530 {
1531         icmphdr_t *icmp;
1532         tcphdr_t *tcp = NULL;
1533         ip_t *oip;
1534         int flags = 0, type, minlen;
1535
1536         icmp = (icmphdr_t *)fin->fin_dp;
1537         /*
1538          * Does it at least have the return (basic) IP header ?
1539          * Only a basic IP header (no options) should be with an ICMP error
1540          * header.
1541          */
1542         if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1543                 return NULL;
1544         type = icmp->icmp_type;
1545         /*
1546          * If it's not an error type, then return.
1547          */
1548         if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1549             (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1550             (type != ICMP_PARAMPROB))
1551                 return NULL;
1552
1553         oip = (ip_t *)((char *)fin->fin_dp + 8);
1554         minlen = (oip->ip_hl << 2);
1555         if (minlen < sizeof(ip_t))
1556                 return NULL;
1557         if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1558                 return NULL;
1559         /*
1560          * Is the buffer big enough for all of it ?  It's the size of the IP
1561          * header claimed in the encapsulated part which is of concern.  It
1562          * may be too big to be in this buffer but not so big that it's
1563          * outside the ICMP packet, leading to TCP deref's causing problems.
1564          * This is possible because we don't know how big oip_hl is when we
1565          * do the pullup early in fr_check() and thus can't gaurantee it is
1566          * all here now.
1567          */
1568 #ifdef  _KERNEL
1569         {
1570         mb_t *m;
1571
1572 # if SOLARIS
1573         m = fin->fin_qfm;
1574         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
1575                 return NULL;
1576 # else
1577         m = *(mb_t **)fin->fin_mp;
1578         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
1579             (char *)ip + m->m_len)
1580                 return NULL;
1581 # endif
1582         }
1583 #endif
1584
1585         if (oip->ip_p == IPPROTO_TCP)
1586                 flags = IPN_TCP;
1587         else if (oip->ip_p == IPPROTO_UDP)
1588                 flags = IPN_UDP;
1589         if (flags & IPN_TCPUDP) {
1590                 minlen += 8;            /* + 64bits of data to get ports */
1591                 if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1592                         return NULL;
1593                 tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1594                 if (dir == NAT_INBOUND)
1595                         return nat_inlookup(fin->fin_ifp, flags,
1596                                 (u_int)oip->ip_p, oip->ip_dst, oip->ip_src,
1597                                 (tcp->th_sport << 16) | tcp->th_dport, 0);
1598                 else
1599                         return nat_outlookup(fin->fin_ifp, flags,
1600                                 (u_int)oip->ip_p, oip->ip_dst, oip->ip_src,
1601                                 (tcp->th_sport << 16) | tcp->th_dport, 0);
1602         }
1603         if (dir == NAT_INBOUND)
1604                 return nat_inlookup(fin->fin_ifp, 0, (u_int)oip->ip_p,
1605                         oip->ip_dst, oip->ip_src, 0, 0);
1606         else
1607                 return nat_outlookup(fin->fin_ifp, 0, (u_int)oip->ip_p,
1608                         oip->ip_dst, oip->ip_src, 0, 0);
1609 }
1610
1611
1612 /*
1613  * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1614  * packet gets correctly recognised.
1615  */
1616 nat_t *nat_icmp(ip, fin, nflags, dir)
1617 ip_t *ip;
1618 fr_info_t *fin;
1619 u_int *nflags;
1620 int dir;
1621 {
1622         u_32_t sum1, sum2, sumd, sumd2 = 0;
1623         struct in_addr in;
1624         icmphdr_t *icmp;
1625         udphdr_t *udp;
1626         nat_t *nat;
1627         ip_t *oip;
1628         int flags = 0;
1629
1630         if ((fin->fin_fl & FI_SHORT) || (fin->fin_off != 0))
1631                 return NULL;
1632         /*
1633          * nat_icmplookup() will return NULL for `defective' packets.
1634          */
1635         if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
1636                 return NULL;
1637         *nflags = IPN_ICMPERR;
1638         icmp = (icmphdr_t *)fin->fin_dp;
1639         oip = (ip_t *)&icmp->icmp_ip;
1640         if (oip->ip_p == IPPROTO_TCP)
1641                 flags = IPN_TCP;
1642         else if (oip->ip_p == IPPROTO_UDP)
1643                 flags = IPN_UDP;
1644         udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1645         /*
1646          * Need to adjust ICMP header to include the real IP#'s and
1647          * port #'s.  Only apply a checksum change relative to the
1648          * IP address change as it will be modified again in ip_natout
1649          * for both address and port.  Two checksum changes are
1650          * necessary for the two header address changes.  Be careful
1651          * to only modify the checksum once for the port # and twice
1652          * for the IP#.
1653          */
1654
1655         /*
1656          * Step 1
1657          * Fix the IP addresses in the offending IP packet. You also need
1658          * to adjust the IP header checksum of that offending IP packet
1659          * and the ICMP checksum of the ICMP error message itself.
1660          *
1661          * Unfortunately, for UDP and TCP, the IP addresses are also contained
1662          * in the pseudo header that is used to compute the UDP resp. TCP
1663          * checksum. So, we must compensate that as well. Even worse, the
1664          * change in the UDP and TCP checksums require yet another
1665          * adjustment of the ICMP checksum of the ICMP error message.
1666          *
1667          * For the moment we forget about TCP, because that checksum is not
1668          * in the first 8 bytes, so it will not be available in most cases.
1669          */
1670
1671         if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
1672                 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1673                 in = nat->nat_inip;
1674                 oip->ip_src = in;
1675         } else {
1676                 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1677                 in = nat->nat_outip;
1678                 oip->ip_dst = in;
1679         }
1680
1681         sum2 = LONG_SUM(ntohl(in.s_addr));
1682
1683         CALC_SUMD(sum1, sum2, sumd);
1684
1685         if (nat->nat_dir == NAT_OUTBOUND) {
1686                 /*
1687                  * Fix IP checksum of the offending IP packet to adjust for
1688                  * the change in the IP address.
1689                  *
1690                  * Normally, you would expect that the ICMP checksum of the 
1691                  * ICMP error message needs to be adjusted as well for the
1692                  * IP address change in oip.
1693                  * However, this is a NOP, because the ICMP checksum is 
1694                  * calculated over the complete ICMP packet, which includes the
1695                  * changed oip IP addresses and oip->ip_sum. However, these 
1696                  * two changes cancel each other out (if the delta for
1697                  * the IP address is x, then the delta for ip_sum is minus x), 
1698                  * so no change in the icmp_cksum is necessary.
1699                  *
1700                  * Be careful that nat_dir refers to the direction of the
1701                  * offending IP packet (oip), not to its ICMP response (icmp)
1702                  */
1703                 fix_datacksum(&oip->ip_sum, sumd);
1704
1705                 /*
1706                  * Fix UDP pseudo header checksum to compensate for the
1707                  * IP address change.
1708                  */
1709                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1710                         /*
1711                          * The UDP checksum is optional, only adjust it 
1712                          * if it has been set.
1713                          */
1714                         sum1 = ntohs(udp->uh_sum);
1715                         fix_datacksum(&udp->uh_sum, sumd);
1716                         sum2 = ntohs(udp->uh_sum);
1717
1718                         /*
1719                          * Fix ICMP checksum to compensate the UDP 
1720                          * checksum adjustment.
1721                          */
1722                         CALC_SUMD(sum1, sum2, sumd);
1723                         sumd2 = sumd;
1724                 }
1725
1726 #if 0
1727                 /*
1728                  * Fix TCP pseudo header checksum to compensate for the 
1729                  * IP address change. Before we can do the change, we
1730                  * must make sure that oip is sufficient large to hold
1731                  * the TCP checksum (normally it does not!).
1732                  */
1733                 if (oip->ip_p == IPPROTO_TCP) {
1734                 
1735                 }
1736 #endif
1737         } else {
1738
1739                 /*
1740                  * Fix IP checksum of the offending IP packet to adjust for
1741                  * the change in the IP address.
1742                  *
1743                  * Normally, you would expect that the ICMP checksum of the 
1744                  * ICMP error message needs to be adjusted as well for the
1745                  * IP address change in oip.
1746                  * However, this is a NOP, because the ICMP checksum is 
1747                  * calculated over the complete ICMP packet, which includes the
1748                  * changed oip IP addresses and oip->ip_sum. However, these 
1749                  * two changes cancel each other out (if the delta for
1750                  * the IP address is x, then the delta for ip_sum is minus x), 
1751                  * so no change in the icmp_cksum is necessary.
1752                  *
1753                  * Be careful that nat_dir refers to the direction of the
1754                  * offending IP packet (oip), not to its ICMP response (icmp)
1755                  */
1756                 fix_datacksum(&oip->ip_sum, sumd);
1757
1758 /* XXX FV : without having looked at Solaris source code, it seems unlikely
1759  * that SOLARIS would compensate this in the kernel (a body of an IP packet 
1760  * in the data section of an ICMP packet). I have the feeling that this should
1761  * be unconditional, but I'm not in a position to check.
1762  */
1763 #if !SOLARIS && !defined(__sgi)
1764                 /*
1765                  * Fix UDP pseudo header checksum to compensate for the
1766                  * IP address change.
1767                  */
1768                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1769                         /*
1770                          * The UDP checksum is optional, only adjust it 
1771                          * if it has been set 
1772                          */
1773                         sum1 = ntohs(udp->uh_sum);
1774                         fix_datacksum(&udp->uh_sum, sumd);
1775                         sum2 = ntohs(udp->uh_sum);
1776
1777                         /*
1778                          * Fix ICMP checksum to compensate the UDP 
1779                          * checksum adjustment.
1780                          */
1781                         CALC_SUMD(sum1, sum2, sumd);
1782                         sumd2 = sumd;
1783                 }
1784                 
1785 #if 0
1786                 /* 
1787                  * Fix TCP pseudo header checksum to compensate for the 
1788                  * IP address change. Before we can do the change, we
1789                  * must make sure that oip is sufficient large to hold
1790                  * the TCP checksum (normally it does not!).
1791                  */
1792                 if (oip->ip_p == IPPROTO_TCP) {
1793                 
1794                 };
1795 #endif
1796                 
1797 #endif
1798         }
1799
1800         if ((flags & IPN_TCPUDP) != 0) {
1801                 tcphdr_t *tcp;
1802
1803                 /*
1804                  * XXX - what if this is bogus hl and we go off the end ?
1805                  * In this case, nat_icmpinlookup() will have returned NULL.
1806                  */
1807                 tcp = (tcphdr_t *)udp;
1808
1809                 /*
1810                  * Step 2 :
1811                  * For offending TCP/UDP IP packets, translate the ports as
1812                  * well, based on the NAT specification. Of course such
1813                  * a change must be reflected in the ICMP checksum as well.
1814                  *
1815                  * Advance notice : Now it becomes complicated :-)
1816                  *
1817                  * Since the port fields are part of the TCP/UDP checksum
1818                  * of the offending IP packet, you need to adjust that checksum
1819                  * as well... but, if you change, you must change the icmp
1820                  * checksum *again*, to reflect that change.
1821                  *
1822                  * To further complicate: the TCP checksum is not in the first
1823                  * 8 bytes of the offending ip packet, so it most likely is not
1824                  * available (we might have to fix that if the encounter a
1825                  * device that returns more than 8 data bytes on icmp error)
1826                  */
1827
1828                 if (nat->nat_oport == tcp->th_dport) {
1829                         if (tcp->th_sport != nat->nat_inport) {
1830                                 /*
1831                                  * Fix ICMP checksum to compensate port
1832                                  * adjustment.
1833                                  */
1834                                 sum1 = ntohs(tcp->th_sport);
1835                                 sum2 = ntohs(nat->nat_inport);
1836                                 CALC_SUMD(sum1, sum2, sumd);
1837                                 sumd2 += sumd;
1838                                 tcp->th_sport = nat->nat_inport;
1839
1840                                 /*
1841                                  * Fix udp checksum to compensate port
1842                                  * adjustment.  NOTE : the offending IP packet
1843                                  * flows the other direction compared to the
1844                                  * ICMP message.
1845                                  *
1846                                  * The UDP checksum is optional, only adjust
1847                                  * it if it has been set.
1848                                  */
1849                                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1850
1851                                         sum1 = ntohs(udp->uh_sum);
1852                                         fix_datacksum(&udp->uh_sum, sumd);
1853                                         sum2 = ntohs(udp->uh_sum);
1854
1855                                         /*
1856                                          * Fix ICMP checksum to 
1857                                          * compensate UDP checksum 
1858                                          * adjustment.
1859                                          */
1860                                         CALC_SUMD(sum1, sum2, sumd);
1861                                         sumd2 += sumd;
1862                                 }
1863                         }
1864                 } else {
1865                         if (tcp->th_dport != nat->nat_outport) {
1866                                 /*
1867                                  * Fix ICMP checksum to compensate port
1868                                  * adjustment.
1869                                  */
1870                                 sum1 = ntohs(tcp->th_dport);
1871                                 sum2 = ntohs(nat->nat_outport);
1872                                 CALC_SUMD(sum1, sum2, sumd);
1873                                 sumd2 += sumd;
1874                                 tcp->th_dport = nat->nat_outport;
1875
1876                                 /*
1877                                  * Fix udp checksum to compensate port
1878                                  * adjustment.   NOTE : the offending IP
1879                                  * packet flows the other direction compared
1880                                  * to the ICMP message.
1881                                  *
1882                                  * The UDP checksum is optional, only adjust
1883                                  * it if it has been set.
1884                                  */
1885                                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1886
1887                                         sum1 = ntohs(udp->uh_sum);
1888                                         fix_datacksum(&udp->uh_sum, sumd);
1889                                         sum2 = ntohs(udp->uh_sum);
1890
1891                                         /*
1892                                          * Fix ICMP checksum to compensate
1893                                          * UDP checksum adjustment.
1894                                          */
1895                                         CALC_SUMD(sum1, sum2, sumd);
1896                                         sumd2 += sumd;
1897                                 }
1898                         }
1899                 }
1900                 if (sumd2) {
1901                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1902                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1903                         if (nat->nat_dir == NAT_OUTBOUND) {
1904                                 fix_outcksum(fin, &icmp->icmp_cksum, sumd2);
1905                         } else {
1906                                 fix_incksum(fin, &icmp->icmp_cksum, sumd2);
1907                         }
1908                 }
1909         }
1910         if (oip->ip_p == IPPROTO_ICMP)
1911                 nat->nat_age = fr_defnaticmpage;
1912         return nat;
1913 }
1914
1915
1916 /*
1917  * NB: these lookups don't lock access to the list, it assume it has already
1918  * been done!
1919  */
1920 /*
1921  * Lookup a nat entry based on the mapped destination ip address/port and
1922  * real source address/port.  We use this lookup when receiving a packet,
1923  * we're looking for a table entry, based on the destination address.
1924  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
1925  */
1926 nat_t *nat_inlookup(ifp, flags, p, src, mapdst, ports, rw)
1927 void *ifp;
1928 register u_int flags, p;
1929 struct in_addr src , mapdst;
1930 u_32_t ports;
1931 int rw;
1932 {
1933         register u_short sport, dport;
1934         register nat_t *nat;
1935         register int nflags;
1936         register u_32_t dst;
1937         u_int hv;
1938
1939         dst = mapdst.s_addr;
1940         dport = ports >> 16;
1941         sport = ports & 0xffff;
1942         flags &= IPN_TCPUDP;
1943
1944         hv = NAT_HASH_FN(dst, dport, 0xffffffff);
1945         hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
1946         nat = nat_table[1][hv];
1947         for (; nat; nat = nat->nat_hnext[1]) {
1948                 nflags = nat->nat_flags;
1949                 if ((!ifp || ifp == nat->nat_ifp) &&
1950                     nat->nat_oip.s_addr == src.s_addr &&
1951                     nat->nat_outip.s_addr == dst &&
1952                     (((p == 0) && (flags == (nat->nat_flags & IPN_TCPUDP)))
1953                      || (p == nat->nat_p)) && (!flags ||
1954                      (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
1955                       ((nat->nat_outport == dport) || (nflags & FI_W_SPORT)))))
1956                         return nat;
1957         }
1958         if (!nat_stats.ns_wilds || !(flags & IPN_TCPUDP))
1959                 return NULL;
1960         if (!rw) {
1961                 RWLOCK_EXIT(&ipf_nat);
1962         }
1963         hv = NAT_HASH_FN(dst, 0, 0xffffffff);
1964         hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
1965         if (!rw) {
1966                 WRITE_ENTER(&ipf_nat);
1967         }
1968         nat = nat_table[1][hv];
1969         for (; nat; nat = nat->nat_hnext[1]) {
1970                 nflags = nat->nat_flags;
1971                 if (ifp && ifp != nat->nat_ifp)
1972                         continue;
1973                 if (!(nflags & IPN_TCPUDP))
1974                         continue;
1975                 if (!(nflags & FI_WILDP))
1976                         continue;
1977                 if (nat->nat_oip.s_addr != src.s_addr ||
1978                     nat->nat_outip.s_addr != dst)
1979                         continue;
1980                 if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
1981                     ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) {
1982                         nat_tabmove(nat, ports);
1983                         break;
1984                 }
1985         }
1986         if (!rw) {
1987                 MUTEX_DOWNGRADE(&ipf_nat);
1988         }
1989         return nat;
1990 }
1991
1992
1993 /*
1994  * This function is only called for TCP/UDP NAT table entries where the
1995  * original was placed in the table without hashing on the ports and we now
1996  * want to include hashing on port numbers.
1997  */
1998 static void nat_tabmove(nat, ports)
1999 nat_t *nat;
2000 u_32_t ports;
2001 {
2002         register u_short sport, dport;
2003         nat_t **natp;
2004         u_int hv;
2005
2006         dport = ports >> 16;
2007         sport = ports & 0xffff;
2008
2009         if (nat->nat_oport == dport) {
2010                 nat->nat_inport = sport;
2011                 nat->nat_outport = sport;
2012         }
2013
2014         /*
2015          * Remove the NAT entry from the old location
2016          */
2017         if (nat->nat_hnext[0])
2018                 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
2019         *nat->nat_phnext[0] = nat->nat_hnext[0];
2020
2021         if (nat->nat_hnext[1])
2022                 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
2023         *nat->nat_phnext[1] = nat->nat_hnext[1];
2024
2025         /*
2026          * Add into the NAT table in the new position
2027          */
2028         hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, 0xffffffff);
2029         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2030         natp = &nat_table[0][hv];
2031         if (*natp)
2032                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2033         nat->nat_phnext[0] = natp;
2034         nat->nat_hnext[0] = *natp;
2035         *natp = nat;
2036
2037         hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, 0xffffffff);
2038         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2039         natp = &nat_table[1][hv];
2040         if (*natp)
2041                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2042         nat->nat_phnext[1] = natp;
2043         nat->nat_hnext[1] = *natp;
2044         *natp = nat;
2045 }
2046
2047
2048 /*
2049  * Lookup a nat entry based on the source 'real' ip address/port and
2050  * destination address/port.  We use this lookup when sending a packet out,
2051  * we're looking for a table entry, based on the source address.
2052  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2053  */
2054 nat_t *nat_outlookup(ifp, flags, p, src, dst, ports, rw)
2055 void *ifp;
2056 register u_int flags, p;
2057 struct in_addr src , dst;
2058 u_32_t ports;
2059 int rw;
2060 {
2061         register u_short sport, dport;
2062         register nat_t *nat;
2063         register int nflags;
2064         u_32_t srcip;
2065         u_int hv;
2066
2067         sport = ports & 0xffff;
2068         dport = ports >> 16;
2069         flags &= IPN_TCPUDP;
2070         srcip = src.s_addr;
2071
2072         hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
2073         hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
2074         nat = nat_table[0][hv];
2075         for (; nat; nat = nat->nat_hnext[0]) {
2076                 nflags = nat->nat_flags;
2077
2078                 if ((!ifp || ifp == nat->nat_ifp) &&
2079                     nat->nat_inip.s_addr == srcip &&
2080                     nat->nat_oip.s_addr == dst.s_addr &&
2081                     (((p == 0) && (flags == (nflags & IPN_TCPUDP)))
2082                      || (p == nat->nat_p)) && (!flags ||
2083                      ((nat->nat_inport == sport || nflags & FI_W_SPORT) &&
2084                       (nat->nat_oport == dport || nflags & FI_W_DPORT))))
2085                         return nat;
2086         }
2087         if (!nat_stats.ns_wilds || !(flags & IPN_TCPUDP))
2088                 return NULL;
2089         if (!rw) {
2090                 RWLOCK_EXIT(&ipf_nat);
2091         }
2092         hv = NAT_HASH_FN(srcip, 0, ipf_nattable_sz);
2093         hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
2094         if (!rw) {
2095                 WRITE_ENTER(&ipf_nat);
2096         }
2097         nat = nat_table[0][hv];
2098         for (; nat; nat = nat->nat_hnext[0]) {
2099                 nflags = nat->nat_flags;
2100                 if (ifp && ifp != nat->nat_ifp)
2101                         continue;
2102                 if (!(nflags & IPN_TCPUDP))
2103                         continue;
2104                 if (!(nflags & FI_WILDP))
2105                         continue;
2106                 if ((nat->nat_inip.s_addr != srcip) ||
2107                     (nat->nat_oip.s_addr != dst.s_addr))
2108                         continue;
2109                 if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) &&
2110                     ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) {
2111                         nat_tabmove(nat, ports);
2112                         break;
2113                 }
2114         }
2115         if (!rw) {
2116                 MUTEX_DOWNGRADE(&ipf_nat);
2117         }
2118         return nat;
2119 }
2120
2121
2122 /*
2123  * Lookup the NAT tables to search for a matching redirect
2124  */
2125 nat_t *nat_lookupredir(np)
2126 register natlookup_t *np;
2127 {
2128         u_32_t ports;
2129         nat_t *nat;
2130
2131         ports = (np->nl_outport << 16) | np->nl_inport;
2132         /*
2133          * If nl_inip is non null, this is a lookup based on the real
2134          * ip address. Else, we use the fake.
2135          */
2136         if ((nat = nat_outlookup(NULL, np->nl_flags, 0, np->nl_inip,
2137                                  np->nl_outip, ports, 0))) {
2138                 np->nl_realip = nat->nat_outip;
2139                 np->nl_realport = nat->nat_outport;
2140         }
2141         return nat;
2142 }
2143
2144
2145 static int nat_match(fin, np, ip)
2146 fr_info_t *fin;
2147 ipnat_t *np;
2148 ip_t *ip;
2149 {
2150         frtuc_t *ft;
2151
2152         if (ip->ip_v != 4)
2153                 return 0;
2154
2155         if (np->in_p && ip->ip_p != np->in_p)
2156                 return 0;
2157         if (fin->fin_out) {
2158                 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2159                         return 0;
2160                 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
2161                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2162                         return 0;
2163                 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
2164                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2165                         return 0;
2166         } else {
2167                 if (!(np->in_redir & NAT_REDIRECT))
2168                         return 0;
2169                 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
2170                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2171                         return 0;
2172                 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
2173                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2174                         return 0;
2175         }
2176
2177         ft = &np->in_tuc;
2178         if (!(fin->fin_fl & FI_TCPUDP) ||
2179             (fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) {
2180                 if (ft->ftu_scmp || ft->ftu_dcmp)
2181                         return 0;
2182                 return 1;
2183         }
2184
2185         return fr_tcpudpchk(ft, fin);
2186 }
2187
2188
2189 /*
2190  * Packets going out on the external interface go through this.
2191  * Here, the source address requires alteration, if anything.
2192  */
2193 int ip_natout(ip, fin)
2194 ip_t *ip;
2195 fr_info_t *fin;
2196 {
2197         register ipnat_t *np = NULL;
2198         register u_32_t ipa;
2199         tcphdr_t *tcp = NULL;
2200         u_short sport = 0, dport = 0, *csump = NULL;
2201         int natadd = 1, i, icmpset = 1;
2202         u_int nflags = 0, hv, msk;
2203         struct ifnet *ifp;
2204         frentry_t *fr;
2205         u_32_t iph;
2206         nat_t *nat;
2207
2208         if (nat_list == NULL || (fr_nat_lock))
2209                 return 0;
2210
2211         if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
2212             fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1)
2213                 ifp = fr->fr_tif.fd_ifp;
2214         else
2215                 ifp = fin->fin_ifp;
2216
2217         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2218                 if (ip->ip_p == IPPROTO_TCP)
2219                         nflags = IPN_TCP;
2220                 else if (ip->ip_p == IPPROTO_UDP)
2221                         nflags = IPN_UDP;
2222                 if ((nflags & IPN_TCPUDP)) {
2223                         tcp = (tcphdr_t *)fin->fin_dp;
2224                         sport = tcp->th_sport;
2225                         dport = tcp->th_dport;
2226                 }
2227         }
2228
2229         ipa = ip->ip_src.s_addr;
2230
2231         READ_ENTER(&ipf_nat);
2232
2233         if ((ip->ip_p == IPPROTO_ICMP) &&
2234             (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
2235                 icmpset = 1;
2236         else if ((fin->fin_fl & FI_FRAG) &&
2237             (nat = ipfr_nat_knownfrag(ip, fin)))
2238                 natadd = 0;
2239         else if ((nat = nat_outlookup(ifp, nflags, (u_int)ip->ip_p,
2240                                       ip->ip_src, ip->ip_dst,
2241                                       (dport << 16) | sport, 0))) {
2242                 nflags = nat->nat_flags;
2243                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2244                         if ((nflags & FI_W_SPORT) &&
2245                             (nat->nat_inport != sport))
2246                                 nat->nat_inport = sport;
2247                         else if ((nflags & FI_W_DPORT) &&
2248                                  (nat->nat_oport != dport))
2249                                 nat->nat_oport = dport;
2250                         if (nat->nat_outport == 0)
2251                                 nat->nat_outport = sport;
2252                         nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
2253                         nflags = nat->nat_flags;
2254                         nat_stats.ns_wilds--;
2255                 }
2256         } else {
2257                 RWLOCK_EXIT(&ipf_nat);
2258                 WRITE_ENTER(&ipf_nat);
2259                 /*
2260                  * If there is no current entry in the nat table for this IP#,
2261                  * create one for it (if there is a matching rule).
2262                  */
2263                 msk = 0xffffffff;
2264                 i = 32;
2265 maskloop:
2266                 iph = ipa & htonl(msk);
2267                 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
2268                 for (np = nat_rules[hv]; np; np = np->in_mnext)
2269                 {
2270                         if (np->in_ifp && (np->in_ifp != ifp))
2271                                 continue;
2272                         if ((np->in_flags & IPN_RF) &&
2273                             !(np->in_flags & nflags))
2274                                 continue;
2275                         if (np->in_flags & IPN_FILTER) {
2276                                 if (!nat_match(fin, np, ip))
2277                                         continue;
2278                         } else if ((ipa & np->in_inmsk) != np->in_inip)
2279                                 continue;
2280                         if (np->in_redir & (NAT_MAP|NAT_MAPBLK)) {
2281                                 if (*np->in_plabel && !appr_ok(ip, tcp, np))
2282                                         continue;
2283                                 /*
2284                                  * If it's a redirection, then we don't want to
2285                                  * create new outgoing port stuff.
2286                                  * Redirections are only for incoming
2287                                  * connections.
2288                                  */
2289                                 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2290                                         continue;
2291                                 if ((nat = nat_new(np, ip, fin, (u_int)nflags,
2292                                                     NAT_OUTBOUND))) {
2293                                         np->in_hits++;
2294                                         break;
2295                                 }
2296                         }
2297                 }
2298                 if ((np == NULL) && (i > 0)) {
2299                         do {
2300                                 i--;
2301                                 msk <<= 1;
2302                         } while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
2303                         if (i >= 0)
2304                                 goto maskloop;
2305                 }
2306                 MUTEX_DOWNGRADE(&ipf_nat);
2307         }
2308
2309         /*
2310          * NOTE: ipf_nat must now only be held as a read lock
2311          */
2312         if (nat) {
2313                 np = nat->nat_ptr;
2314                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2315                         ipfr_nat_newfrag(ip, fin, 0, nat);
2316                 MUTEX_ENTER(&nat->nat_lock);
2317                 nat->nat_age = fr_defnatage;
2318                 nat->nat_bytes += ip->ip_len;
2319                 nat->nat_pkts++;
2320                 MUTEX_EXIT(&nat->nat_lock);
2321
2322                 /*
2323                  * Fix up checksums, not by recalculating them, but
2324                  * simply computing adjustments.
2325                  */
2326                 if (nflags == IPN_ICMPERR) {
2327                         u_32_t s1, s2, sumd;
2328
2329                         s1 = LONG_SUM(ntohl(ip->ip_src.s_addr));
2330                         s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
2331                         CALC_SUMD(s1, s2, sumd);
2332
2333                         if (nat->nat_dir == NAT_OUTBOUND)
2334                                 fix_incksum(fin, &ip->ip_sum, sumd);
2335                         else
2336                                 fix_outcksum(fin, &ip->ip_sum, sumd);
2337                 }
2338 #if SOLARIS || defined(__sgi)
2339                 else {
2340                         if (nat->nat_dir == NAT_OUTBOUND)
2341                                 fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2342                         else
2343                                 fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2344                 }
2345 #endif
2346                 ip->ip_src = nat->nat_outip;
2347
2348                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2349
2350                         if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
2351                                 tcp->th_sport = nat->nat_outport;
2352                                 fin->fin_data[0] = ntohs(tcp->th_sport);
2353                         }
2354
2355                         if (ip->ip_p == IPPROTO_TCP) {
2356                                 csump = &tcp->th_sum;
2357                                 MUTEX_ENTER(&nat->nat_lock);
2358                                 fr_tcp_age(&nat->nat_age,
2359                                            nat->nat_tcpstate, fin, 1);
2360                                 if (nat->nat_age < fr_defnaticmpage)
2361                                         nat->nat_age = fr_defnaticmpage;
2362 #ifdef LARGE_NAT
2363                                 else if (nat->nat_age > fr_defnatage)
2364                                         nat->nat_age = fr_defnatage;
2365 #endif
2366                                 /*
2367                                  * Increase this because we may have
2368                                  * "keep state" following this too and
2369                                  * packet storms can occur if this is
2370                                  * removed too quickly.
2371                                  */
2372                                 if (nat->nat_age == fr_tcpclosed)
2373                                         nat->nat_age = fr_tcplastack;
2374                                 MUTEX_EXIT(&nat->nat_lock);
2375                         } else if (ip->ip_p == IPPROTO_UDP) {
2376                                 udphdr_t *udp = (udphdr_t *)tcp;
2377
2378                                 if (udp->uh_sum)
2379                                         csump = &udp->uh_sum;
2380                         } else if (ip->ip_p == IPPROTO_ICMP) {
2381                                 if (!icmpset)
2382                                         nat->nat_age = fr_defnaticmpage;
2383                         }
2384
2385                         if (csump) {
2386                                 if (nat->nat_dir == NAT_OUTBOUND)
2387                                         fix_outcksum(fin, csump, nat->nat_sumd[1]);
2388                                 else
2389                                         fix_incksum(fin, csump, nat->nat_sumd[1]);
2390                         }
2391                 }
2392
2393                 if ((np->in_apr != NULL) && (np->in_dport == 0 ||
2394                      (tcp != NULL && dport == np->in_dport))) {
2395                         i = appr_check(ip, fin, nat);
2396                         if (i == 0)
2397                                 i = 1;
2398                 } else
2399                         i = 1;
2400                 ATOMIC_INCL(nat_stats.ns_mapped[1]);
2401                 RWLOCK_EXIT(&ipf_nat);  /* READ */
2402                 return i;
2403         }
2404         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2405         return 0;
2406 }
2407
2408
2409 /*
2410  * Packets coming in from the external interface go through this.
2411  * Here, the destination address requires alteration, if anything.
2412  */
2413 int ip_natin(ip, fin)
2414 ip_t *ip;
2415 fr_info_t *fin;
2416 {
2417         register struct in_addr src;
2418         register struct in_addr in;
2419         register ipnat_t *np;
2420         u_short sport = 0, dport = 0, *csump = NULL;
2421         u_int nflags = 0, natadd = 1, hv, msk;
2422         struct ifnet *ifp = fin->fin_ifp;
2423         tcphdr_t *tcp = NULL;
2424         int i, icmpset = 0;
2425         nat_t *nat;
2426         u_32_t iph;
2427
2428         if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
2429                 return 0;
2430
2431         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2432                 if (ip->ip_p == IPPROTO_TCP)
2433                         nflags = IPN_TCP;
2434                 else if (ip->ip_p == IPPROTO_UDP)
2435                         nflags = IPN_UDP;
2436                 if ((nflags & IPN_TCPUDP)) {
2437                         tcp = (tcphdr_t *)fin->fin_dp;
2438                         dport = tcp->th_dport;
2439                         sport = tcp->th_sport;
2440                 }
2441         }
2442
2443         in = ip->ip_dst;
2444         /* make sure the source address is to be redirected */
2445         src = ip->ip_src;
2446
2447         READ_ENTER(&ipf_nat);
2448
2449         if ((ip->ip_p == IPPROTO_ICMP) &&
2450             (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
2451                 icmpset = 1;
2452         else if ((fin->fin_fl & FI_FRAG) &&
2453                  (nat = ipfr_nat_knownfrag(ip, fin)))
2454                 natadd = 0;
2455         else if ((nat = nat_inlookup(fin->fin_ifp, nflags, (u_int)ip->ip_p,
2456                                      ip->ip_src, in, (dport << 16) | sport,
2457                                      0))) {
2458                 nflags = nat->nat_flags;
2459                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2460                         if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
2461                                 nat->nat_oport = sport;
2462                         else if ((nat->nat_outport != dport) &&
2463                                  (nflags & FI_W_SPORT))
2464                                 nat->nat_outport = dport;
2465                         nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
2466                         nflags = nat->nat_flags;
2467                         nat_stats.ns_wilds--;
2468                 }
2469         } else {
2470                 RWLOCK_EXIT(&ipf_nat);
2471                 WRITE_ENTER(&ipf_nat);
2472                 /*
2473                  * If there is no current entry in the nat table for this IP#,
2474                  * create one for it (if there is a matching rule).
2475                  */
2476                 msk = 0xffffffff;
2477                 i = 32;
2478 maskloop:
2479                 iph = in.s_addr & htonl(msk);
2480                 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
2481                 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
2482                         if ((np->in_ifp && (np->in_ifp != ifp)) ||
2483                             (np->in_p && (np->in_p != ip->ip_p)) ||
2484                             (np->in_flags && !(nflags & np->in_flags)))
2485                                 continue;
2486                         if (np->in_flags & IPN_FILTER) {
2487                                 if (!nat_match(fin, np, ip))
2488                                         continue;
2489                         } else if ((in.s_addr & np->in_outmsk) != np->in_outip)
2490                                 continue;
2491                         if ((np->in_redir & NAT_REDIRECT) &&
2492                             (!np->in_pmin || (np->in_flags & IPN_FILTER) ||
2493                              ((ntohs(np->in_pmax) >= ntohs(dport)) &&
2494                               (ntohs(dport) >= ntohs(np->in_pmin)))))
2495                                 if ((nat = nat_new(np, ip, fin, nflags,
2496                                                     NAT_INBOUND))) {
2497                                         np->in_hits++;
2498                                         break;
2499                                 }
2500                 }
2501
2502                 if ((np == NULL) && (i > 0)) {
2503                         do {
2504                                 i--;
2505                                 msk <<= 1;
2506                         } while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
2507                         if (i >= 0)
2508                                 goto maskloop;
2509                 }
2510                 MUTEX_DOWNGRADE(&ipf_nat);
2511         }
2512
2513         /*
2514          * NOTE: ipf_nat must now only be held as a read lock
2515          */
2516         if (nat) {
2517                 np = nat->nat_ptr;
2518                 fin->fin_fr = nat->nat_fr;
2519                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2520                         ipfr_nat_newfrag(ip, fin, 0, nat);
2521                 if ((np->in_apr != NULL) && (np->in_dport == 0 ||
2522                     (tcp != NULL && sport == np->in_dport))) {
2523                         i = appr_check(ip, fin, nat);
2524                         if (i == -1) {
2525                                 RWLOCK_EXIT(&ipf_nat);
2526                                 return i;
2527                         }
2528                 }
2529
2530                 MUTEX_ENTER(&nat->nat_lock);
2531                 if (nflags != IPN_ICMPERR)
2532                         nat->nat_age = fr_defnatage;
2533
2534                 nat->nat_bytes += ip->ip_len;
2535                 nat->nat_pkts++;
2536                 MUTEX_EXIT(&nat->nat_lock);
2537                 ip->ip_dst = nat->nat_inip;
2538                 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
2539
2540                 /*
2541                  * Fix up checksums, not by recalculating them, but
2542                  * simply computing adjustments.
2543                  */
2544 #if SOLARIS || defined(__sgi)
2545                 if (nat->nat_dir == NAT_OUTBOUND)
2546                         fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2547                 else
2548                         fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2549 #endif
2550                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2551
2552                         if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
2553                                 tcp->th_dport = nat->nat_inport;
2554                                 fin->fin_data[1] = ntohs(tcp->th_dport);
2555                         }
2556
2557                         if (ip->ip_p == IPPROTO_TCP) {
2558                                 csump = &tcp->th_sum;
2559                                 MUTEX_ENTER(&nat->nat_lock);
2560                                 fr_tcp_age(&nat->nat_age,
2561                                            nat->nat_tcpstate, fin, 0);
2562                                 if (nat->nat_age < fr_defnaticmpage)
2563                                         nat->nat_age = fr_defnaticmpage;
2564 #ifdef LARGE_NAT
2565                                 else if (nat->nat_age > fr_defnatage)
2566                                         nat->nat_age = fr_defnatage;
2567 #endif
2568                                 /*
2569                                  * Increase this because we may have
2570                                  * "keep state" following this too and
2571                                  * packet storms can occur if this is
2572                                  * removed too quickly.
2573                                  */
2574                                 if (nat->nat_age == fr_tcpclosed)
2575                                         nat->nat_age = fr_tcplastack;
2576                                 MUTEX_EXIT(&nat->nat_lock);
2577                         } else if (ip->ip_p == IPPROTO_UDP) {
2578                                 udphdr_t *udp = (udphdr_t *)tcp;
2579
2580                                 if (udp->uh_sum)
2581                                         csump = &udp->uh_sum;
2582                         } else if (ip->ip_p == IPPROTO_ICMP) {
2583                                 if (!icmpset)
2584                                         nat->nat_age = fr_defnaticmpage;
2585                         }
2586
2587                         if (csump) {
2588                                 if (nat->nat_dir == NAT_OUTBOUND)
2589                                         fix_incksum(fin, csump, nat->nat_sumd[0]);
2590                                 else
2591                                         fix_outcksum(fin, csump, nat->nat_sumd[0]);
2592                         }
2593                 }
2594                 ATOMIC_INCL(nat_stats.ns_mapped[0]);
2595                 RWLOCK_EXIT(&ipf_nat);                  /* READ */
2596                 return 1;
2597         }
2598         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2599         return 0;
2600 }
2601
2602
2603 /*
2604  * Free all memory used by NAT structures allocated at runtime.
2605  */
2606 void ip_natunload()
2607 {
2608         WRITE_ENTER(&ipf_nat);
2609         (void) nat_clearlist();
2610         (void) nat_flushtable();
2611         RWLOCK_EXIT(&ipf_nat);
2612
2613         if (nat_table[0] != NULL) {
2614                 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
2615                 nat_table[0] = NULL;
2616         }
2617         if (nat_table[1] != NULL) {
2618                 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
2619                 nat_table[1] = NULL;
2620         }
2621         if (nat_rules != NULL) {
2622                 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
2623                 nat_rules = NULL;
2624         }
2625         if (rdr_rules != NULL) {
2626                 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
2627                 rdr_rules = NULL;
2628         }
2629         if (maptable != NULL) {
2630                 KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
2631                 maptable = NULL;
2632         }
2633 }
2634
2635
2636 /*
2637  * Slowly expire held state for NAT entries.  Timeouts are set in
2638  * expectation of this being called twice per second.
2639  */
2640 void ip_natexpire()
2641 {
2642         register struct nat *nat, **natp;
2643 #if defined(_KERNEL) && !SOLARIS
2644         int s;
2645 #endif
2646
2647         SPL_NET(s);
2648         WRITE_ENTER(&ipf_nat);
2649         for (natp = &nat_instances; (nat = *natp); ) {
2650                 nat->nat_age--;
2651                 if (nat->nat_age) {
2652                         natp = &nat->nat_next;
2653                         continue;
2654                 }
2655                 *natp = nat->nat_next;
2656 #ifdef  IPFILTER_LOG
2657                 nat_log(nat, NL_EXPIRE);
2658 #endif
2659                 nat_delete(nat);
2660                 nat_stats.ns_expire++;
2661         }
2662         RWLOCK_EXIT(&ipf_nat);
2663         SPL_X(s);
2664 }
2665
2666
2667 /*
2668  */
2669 void ip_natsync(ifp)
2670 void *ifp;
2671 {
2672         register ipnat_t *n;
2673         register nat_t *nat;
2674         register u_32_t sum1, sum2, sumd;
2675         struct in_addr in;
2676         ipnat_t *np;
2677         void *ifp2;
2678 #if defined(_KERNEL) && !SOLARIS
2679         int s;
2680 #endif
2681
2682         /*
2683          * Change IP addresses for NAT sessions for any protocol except TCP
2684          * since it will break the TCP connection anyway.
2685          */
2686         SPL_NET(s);
2687         WRITE_ENTER(&ipf_nat);
2688         for (nat = nat_instances; nat; nat = nat->nat_next)
2689                 if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
2690                     !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
2691                     (np->in_outmsk == 0xffffffff) && !np->in_nip) {
2692                         ifp2 = nat->nat_ifp;
2693                         /*
2694                          * Change the map-to address to be the same as the
2695                          * new one.
2696                          */
2697                         sum1 = nat->nat_outip.s_addr;
2698                         if (fr_ifpaddr(4, ifp2, &in) != -1)
2699                                 nat->nat_outip = in;
2700                         sum2 = nat->nat_outip.s_addr;
2701
2702                         if (sum1 == sum2)
2703                                 continue;
2704                         /*
2705                          * Readjust the checksum adjustment to take into
2706                          * account the new IP#.
2707                          */
2708                         CALC_SUMD(sum1, sum2, sumd);
2709                         /* XXX - dont change for TCP when solaris does
2710                          * hardware checksumming.
2711                          */
2712                         sumd += nat->nat_sumd[0];
2713                         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2714                         nat->nat_sumd[1] = nat->nat_sumd[0];
2715                 }
2716
2717         for (n = nat_list; (n != NULL); n = n->in_next)
2718                 if (n->in_ifp == ifp) {
2719                         n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
2720                         if (!n->in_ifp)
2721                                 n->in_ifp = (void *)-1;
2722                 }
2723         RWLOCK_EXIT(&ipf_nat);
2724         SPL_X(s);
2725 }
2726
2727
2728 #ifdef  IPFILTER_LOG
2729 void nat_log(nat, type)
2730 struct nat *nat;
2731 u_int type;
2732 {
2733         struct ipnat *np;
2734         struct natlog natl;
2735         void *items[1];
2736         size_t sizes[1];
2737         int rulen, types[1];
2738
2739         natl.nl_inip = nat->nat_inip;
2740         natl.nl_outip = nat->nat_outip;
2741         natl.nl_origip = nat->nat_oip;
2742         natl.nl_bytes = nat->nat_bytes;
2743         natl.nl_pkts = nat->nat_pkts;
2744         natl.nl_origport = nat->nat_oport;
2745         natl.nl_inport = nat->nat_inport;
2746         natl.nl_outport = nat->nat_outport;
2747         natl.nl_p = nat->nat_p;
2748         natl.nl_type = type;
2749         natl.nl_rule = -1;
2750 #ifndef LARGE_NAT
2751         if (nat->nat_ptr != NULL) {
2752                 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
2753                         if (np == nat->nat_ptr) {
2754                                 natl.nl_rule = rulen;
2755                                 break;
2756                         }
2757         }
2758 #endif
2759         items[0] = &natl;
2760         sizes[0] = sizeof(natl);
2761         types[0] = 0;
2762
2763         (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
2764 }
2765 #endif