]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/contrib/ipfilter/netinet/ip_nat.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / sys / contrib / ipfilter / netinet / ip_nat.c
1 /*      $FreeBSD$       */
2
3 /*
4  * Copyright (C) 1995-2003 by Darren Reed.
5  *
6  * See the IPFILTER.LICENCE file for details on licencing.
7  */
8 #if defined(KERNEL) || defined(_KERNEL)
9 # undef KERNEL
10 # undef _KERNEL
11 # define        KERNEL  1
12 # define        _KERNEL 1
13 #endif
14 #include <sys/errno.h>
15 #include <sys/types.h>
16 #include <sys/param.h>
17 #include <sys/time.h>
18 #include <sys/file.h>
19 #if defined(_KERNEL) && defined(__NetBSD_Version__) && \
20     (__NetBSD_Version__ >= 399002000)
21 # include <sys/kauth.h>
22 #endif
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24     defined(_KERNEL)
25 #if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
26 #  include "opt_ipfilter_log.h"
27 # else
28 #  include "opt_ipfilter.h"
29 # endif
30 #endif
31 #if !defined(_KERNEL)
32 # include <stdio.h>
33 # include <string.h>
34 # include <stdlib.h>
35 # define _KERNEL
36 # ifdef __OpenBSD__
37 struct file;
38 # endif
39 # include <sys/uio.h>
40 # undef _KERNEL
41 #endif
42 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
43 # include <sys/filio.h>
44 # include <sys/fcntl.h>
45 #else
46 # include <sys/ioctl.h>
47 #endif
48 #if !defined(AIX)
49 # include <sys/fcntl.h>
50 #endif
51 #if !defined(linux)
52 # include <sys/protosw.h>
53 #endif
54 #include <sys/socket.h>
55 #if defined(_KERNEL)
56 # include <sys/systm.h>
57 # if !defined(__SVR4) && !defined(__svr4__)
58 #  include <sys/mbuf.h>
59 # endif
60 #endif
61 #if defined(__SVR4) || defined(__svr4__)
62 # include <sys/filio.h>
63 # include <sys/byteorder.h>
64 # ifdef _KERNEL
65 #  include <sys/dditypes.h>
66 # endif
67 # include <sys/stream.h>
68 # include <sys/kmem.h>
69 #endif
70 #if __FreeBSD_version >= 300000
71 # include <sys/queue.h>
72 #endif
73 #include <net/if.h>
74 #if __FreeBSD_version >= 300000
75 # include <net/if_var.h>
76 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
77 #  include "opt_ipfilter.h"
78 # endif
79 #endif
80 #ifdef sun
81 # include <net/af.h>
82 #endif
83 #include <net/route.h>
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/ip.h>
87
88 #ifdef RFC1825
89 # include <vpn/md5.h>
90 # include <vpn/ipsec.h>
91 extern struct ifnet vpnif;
92 #endif
93
94 #if !defined(linux)
95 # include <netinet/ip_var.h>
96 #endif
97 #include <netinet/tcp.h>
98 #include <netinet/udp.h>
99 #include <netinet/ip_icmp.h>
100 #include "netinet/ip_compat.h"
101 #include <netinet/tcpip.h>
102 #include "netinet/ip_fil.h"
103 #include "netinet/ip_nat.h"
104 #include "netinet/ip_frag.h"
105 #include "netinet/ip_state.h"
106 #include "netinet/ip_proxy.h"
107 #ifdef  IPFILTER_SYNC
108 #include "netinet/ip_sync.h"
109 #endif
110 #if (__FreeBSD_version >= 300000)
111 # include <sys/malloc.h>
112 #endif
113 /* END OF INCLUDES */
114
115 #undef  SOCKADDR_IN
116 #define SOCKADDR_IN     struct sockaddr_in
117
118 #if !defined(lint)
119 static const char sccsid[] = "@(#)ip_nat.c      1.11 6/5/96 (C) 1995 Darren Reed";
120 static const char rcsid[] = "@(#)$FreeBSD$";
121 /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */
122 #endif
123
124
125 /* ======================================================================== */
126 /* How the NAT is organised and works.                                      */
127 /*                                                                          */
128 /* Inside (interface y) NAT       Outside (interface x)                     */
129 /* -------------------- -+- -------------------------------------           */
130 /* Packet going          |   out, processsed by fr_checknatout() for x      */
131 /* ------------>         |   ------------>                                  */
132 /* src=10.1.1.1          |   src=192.1.1.1                                  */
133 /*                       |                                                  */
134 /*                       |   in, processed by fr_checknatin() for x         */
135 /* <------------         |   <------------                                  */
136 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
137 /* -------------------- -+- -------------------------------------           */
138 /* fr_checknatout() - changes ip_src and if required, sport                 */
139 /*             - creates a new mapping, if required.                        */
140 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
141 /*                                                                          */
142 /* In the NAT table, internal source is recorded as "in" and externally     */
143 /* seen as "out".                                                           */
144 /* ======================================================================== */
145
146
147 nat_t   **nat_table[2] = { NULL, NULL },
148         *nat_instances = NULL;
149 ipnat_t *nat_list = NULL;
150 u_int   ipf_nattable_max = NAT_TABLE_MAX;
151 u_int   ipf_nattable_sz = NAT_TABLE_SZ;
152 u_int   ipf_natrules_sz = NAT_SIZE;
153 u_int   ipf_rdrrules_sz = RDR_SIZE;
154 u_int   ipf_hostmap_sz = HOSTMAP_SIZE;
155 u_int   fr_nat_maxbucket = 0,
156         fr_nat_maxbucket_reset = 1;
157 u_32_t  nat_masks = 0;
158 u_32_t  rdr_masks = 0;
159 u_long  nat_last_force_flush = 0;
160 ipnat_t **nat_rules = NULL;
161 ipnat_t **rdr_rules = NULL;
162 hostmap_t       **ipf_hm_maptable  = NULL;
163 hostmap_t       *ipf_hm_maplist  = NULL;
164 ipftq_t nat_tqb[IPF_TCP_NSTATES];
165 ipftq_t nat_udptq;
166 ipftq_t nat_icmptq;
167 ipftq_t nat_iptq;
168 ipftq_t *nat_utqe = NULL;
169 int     fr_nat_doflush = 0;
170 #ifdef  IPFILTER_LOG
171 int     nat_logging = 1;
172 #else
173 int     nat_logging = 0;
174 #endif
175
176 u_long  fr_defnatage = DEF_NAT_AGE,
177         fr_defnatipage = 120,           /* 60 seconds */
178         fr_defnaticmpage = 6;           /* 3 seconds */
179 natstat_t nat_stats;
180 int     fr_nat_lock = 0;
181 int     fr_nat_init = 0;
182 #if SOLARIS && !defined(_INET_IP_STACK_H)
183 extern  int             pfil_delayed_copy;
184 #endif
185
186 static  int     nat_flush_entry __P((void *));
187 static  int     nat_flushtable __P((void));
188 static  int     nat_clearlist __P((void));
189 static  void    nat_addnat __P((struct ipnat *));
190 static  void    nat_addrdr __P((struct ipnat *));
191 static  void    nat_delrdr __P((struct ipnat *));
192 static  void    nat_delnat __P((struct ipnat *));
193 static  int     fr_natgetent __P((caddr_t, int));
194 static  int     fr_natgetsz __P((caddr_t, int));
195 static  int     fr_natputent __P((caddr_t, int));
196 static  int     nat_extraflush __P((int));
197 static  int     nat_gettable __P((char *));
198 static  void    nat_tabmove __P((nat_t *));
199 static  int     nat_match __P((fr_info_t *, ipnat_t *));
200 static  INLINE  int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
201 static  INLINE  int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
202 static  hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
203                                     struct in_addr, struct in_addr, u_32_t));
204 static  int     nat_icmpquerytype4 __P((int));
205 static  int     nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
206 static  void    nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
207 static  int     nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
208                                       tcphdr_t *, nat_t **, int));
209 static  int     nat_resolverule __P((ipnat_t *));
210 static  nat_t   *fr_natclone __P((fr_info_t *, nat_t *));
211 static  void    nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
212 static  int     nat_wildok __P((nat_t *, int, int, int, int));
213 static  int     nat_getnext __P((ipftoken_t *, ipfgeniter_t *));
214 static  int     nat_iterator __P((ipftoken_t *, ipfgeniter_t *));
215
216
217 /* ------------------------------------------------------------------------ */
218 /* Function:    fr_natinit                                                  */
219 /* Returns:     int - 0 == success, -1 == failure                           */
220 /* Parameters:  Nil                                                         */
221 /*                                                                          */
222 /* Initialise all of the NAT locks, tables and other structures.            */
223 /* ------------------------------------------------------------------------ */
224 int fr_natinit()
225 {
226         int i;
227
228         KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
229         if (nat_table[0] != NULL)
230                 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
231         else
232                 return -1;
233
234         KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
235         if (nat_table[1] != NULL)
236                 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
237         else
238                 return -2;
239
240         KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
241         if (nat_rules != NULL)
242                 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
243         else
244                 return -3;
245
246         KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
247         if (rdr_rules != NULL)
248                 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
249         else
250                 return -4;
251
252         KMALLOCS(ipf_hm_maptable, hostmap_t **, \
253                  sizeof(hostmap_t *) * ipf_hostmap_sz);
254         if (ipf_hm_maptable != NULL)
255                 bzero((char *)ipf_hm_maptable,
256                       sizeof(hostmap_t *) * ipf_hostmap_sz);
257         else
258                 return -5;
259         ipf_hm_maplist = NULL;
260
261         KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
262                  ipf_nattable_sz * sizeof(u_long));
263         if (nat_stats.ns_bucketlen[0] == NULL)
264                 return -6;
265         bzero((char *)nat_stats.ns_bucketlen[0],
266               ipf_nattable_sz * sizeof(u_long));
267
268         KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
269                  ipf_nattable_sz * sizeof(u_long));
270         if (nat_stats.ns_bucketlen[1] == NULL)
271                 return -7;
272
273         bzero((char *)nat_stats.ns_bucketlen[1],
274               ipf_nattable_sz * sizeof(u_long));
275
276         if (fr_nat_maxbucket == 0) {
277                 for (i = ipf_nattable_sz; i > 0; i >>= 1)
278                         fr_nat_maxbucket++;
279                 fr_nat_maxbucket *= 2;
280         }
281
282         fr_sttab_init(nat_tqb);
283         /*
284          * Increase this because we may have "keep state" following this too
285          * and packet storms can occur if this is removed too quickly.
286          */
287         nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
288         nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
289         nat_udptq.ifq_ttl = fr_defnatage;
290         nat_udptq.ifq_ref = 1;
291         nat_udptq.ifq_head = NULL;
292         nat_udptq.ifq_tail = &nat_udptq.ifq_head;
293         MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
294         nat_udptq.ifq_next = &nat_icmptq;
295         nat_icmptq.ifq_ttl = fr_defnaticmpage;
296         nat_icmptq.ifq_ref = 1;
297         nat_icmptq.ifq_head = NULL;
298         nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
299         MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
300         nat_icmptq.ifq_next = &nat_iptq;
301         nat_iptq.ifq_ttl = fr_defnatipage;
302         nat_iptq.ifq_ref = 1;
303         nat_iptq.ifq_head = NULL;
304         nat_iptq.ifq_tail = &nat_iptq.ifq_head;
305         MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
306         nat_iptq.ifq_next = NULL;
307
308         for (i = 0; i < IPF_TCP_NSTATES; i++) {
309                 if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
310                         nat_tqb[i].ifq_ttl = fr_defnaticmpage;
311 #ifdef LARGE_NAT
312                 else if (nat_tqb[i].ifq_ttl > fr_defnatage)
313                         nat_tqb[i].ifq_ttl = fr_defnatage;
314 #endif
315         }
316
317         /*
318          * Increase this because we may have "keep state" following
319          * this too and packet storms can occur if this is removed
320          * too quickly.
321          */
322         nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
323
324         RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
325         RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
326         MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
327         MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
328
329         fr_nat_init = 1;
330
331         return 0;
332 }
333
334
335 /* ------------------------------------------------------------------------ */
336 /* Function:    nat_addrdr                                                  */
337 /* Returns:     Nil                                                         */
338 /* Parameters:  n(I) - pointer to NAT rule to add                           */
339 /*                                                                          */
340 /* Adds a redirect rule to the hash table of redirect rules and the list of */
341 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
342 /* use by redirect rules.                                                   */
343 /* ------------------------------------------------------------------------ */
344 static void nat_addrdr(n)
345 ipnat_t *n;
346 {
347         ipnat_t **np;
348         u_32_t j;
349         u_int hv;
350         int k;
351
352         k = count4bits(n->in_outmsk);
353         if ((k >= 0) && (k != 32))
354                 rdr_masks |= 1 << k;
355         j = (n->in_outip & n->in_outmsk);
356         hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
357         np = rdr_rules + hv;
358         while (*np != NULL)
359                 np = &(*np)->in_rnext;
360         n->in_rnext = NULL;
361         n->in_prnext = np;
362         n->in_hv = hv;
363         *np = n;
364 }
365
366
367 /* ------------------------------------------------------------------------ */
368 /* Function:    nat_addnat                                                  */
369 /* Returns:     Nil                                                         */
370 /* Parameters:  n(I) - pointer to NAT rule to add                           */
371 /*                                                                          */
372 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
373 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
374 /* redirect rules.                                                          */
375 /* ------------------------------------------------------------------------ */
376 static void nat_addnat(n)
377 ipnat_t *n;
378 {
379         ipnat_t **np;
380         u_32_t j;
381         u_int hv;
382         int k;
383
384         k = count4bits(n->in_inmsk);
385         if ((k >= 0) && (k != 32))
386                 nat_masks |= 1 << k;
387         j = (n->in_inip & n->in_inmsk);
388         hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
389         np = nat_rules + hv;
390         while (*np != NULL)
391                 np = &(*np)->in_mnext;
392         n->in_mnext = NULL;
393         n->in_pmnext = np;
394         n->in_hv = hv;
395         *np = n;
396 }
397
398
399 /* ------------------------------------------------------------------------ */
400 /* Function:    nat_delrdr                                                  */
401 /* Returns:     Nil                                                         */
402 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
403 /*                                                                          */
404 /* Removes a redirect rule from the hash table of redirect rules.           */
405 /* ------------------------------------------------------------------------ */
406 static void nat_delrdr(n)
407 ipnat_t *n;
408 {
409         if (n->in_rnext)
410                 n->in_rnext->in_prnext = n->in_prnext;
411         *n->in_prnext = n->in_rnext;
412 }
413
414
415 /* ------------------------------------------------------------------------ */
416 /* Function:    nat_delnat                                                  */
417 /* Returns:     Nil                                                         */
418 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
419 /*                                                                          */
420 /* Removes a NAT map rule from the hash table of NAT map rules.             */
421 /* ------------------------------------------------------------------------ */
422 static void nat_delnat(n)
423 ipnat_t *n;
424 {
425         if (n->in_mnext != NULL)
426                 n->in_mnext->in_pmnext = n->in_pmnext;
427         *n->in_pmnext = n->in_mnext;
428 }
429
430
431 /* ------------------------------------------------------------------------ */
432 /* Function:    nat_hostmap                                                 */
433 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
434 /*                                else a pointer to the hostmapping to use  */
435 /* Parameters:  np(I)   - pointer to NAT rule                               */
436 /*              real(I) - real IP address                                   */
437 /*              map(I)  - mapped IP address                                 */
438 /*              port(I) - destination port number                           */
439 /* Write Locks: ipf_nat                                                     */
440 /*                                                                          */
441 /* Check if an ip address has already been allocated for a given mapping    */
442 /* that is not doing port based translation.  If is not yet allocated, then */
443 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
444 /* ------------------------------------------------------------------------ */
445 static struct hostmap *nat_hostmap(np, src, dst, map, port)
446 ipnat_t *np;
447 struct in_addr src;
448 struct in_addr dst;
449 struct in_addr map;
450 u_32_t port;
451 {
452         hostmap_t *hm;
453         u_int hv;
454
455         hv = (src.s_addr ^ dst.s_addr);
456         hv += src.s_addr;
457         hv += dst.s_addr;
458         hv %= HOSTMAP_SIZE;
459         for (hm = ipf_hm_maptable[hv]; hm; hm = hm->hm_next)
460                 if ((hm->hm_srcip.s_addr == src.s_addr) &&
461                     (hm->hm_dstip.s_addr == dst.s_addr) &&
462                     ((np == NULL) || (np == hm->hm_ipnat)) &&
463                     ((port == 0) || (port == hm->hm_port))) {
464                         hm->hm_ref++;
465                         return hm;
466                 }
467
468         if (np == NULL)
469                 return NULL;
470
471         KMALLOC(hm, hostmap_t *);
472         if (hm) {
473                 hm->hm_next = ipf_hm_maplist;
474                 hm->hm_pnext = &ipf_hm_maplist;
475                 if (ipf_hm_maplist != NULL)
476                         ipf_hm_maplist->hm_pnext = &hm->hm_next;
477                 ipf_hm_maplist = hm;
478                 hm->hm_hnext = ipf_hm_maptable[hv];
479                 hm->hm_phnext = ipf_hm_maptable + hv;
480                 if (ipf_hm_maptable[hv] != NULL)
481                         ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
482                 ipf_hm_maptable[hv] = hm;
483                 hm->hm_ipnat = np;
484                 hm->hm_srcip = src;
485                 hm->hm_dstip = dst;
486                 hm->hm_mapip = map;
487                 hm->hm_ref = 1;
488                 hm->hm_port = port;
489         }
490         return hm;
491 }
492
493
494 /* ------------------------------------------------------------------------ */
495 /* Function:    fr_hostmapdel                                               */
496 /* Returns:     Nil                                                         */
497 /* Parameters:  hmp(I) - pointer to hostmap structure pointer               */
498 /* Write Locks: ipf_nat                                                     */
499 /*                                                                          */
500 /* Decrement the references to this hostmap structure by one.  If this      */
501 /* reaches zero then remove it and free it.                                 */
502 /* ------------------------------------------------------------------------ */
503 void fr_hostmapdel(hmp)
504 struct hostmap **hmp;
505 {
506         struct hostmap *hm;
507
508         hm = *hmp;
509         *hmp = NULL;
510
511         hm->hm_ref--;
512         if (hm->hm_ref == 0) {
513                 if (hm->hm_hnext)
514                         hm->hm_hnext->hm_phnext = hm->hm_phnext;
515                 *hm->hm_phnext = hm->hm_hnext;
516                 if (hm->hm_next)
517                         hm->hm_next->hm_pnext = hm->hm_pnext;
518                 *hm->hm_pnext = hm->hm_next;
519                 KFREE(hm);
520         }
521 }
522
523
524 /* ------------------------------------------------------------------------ */
525 /* Function:    fix_outcksum                                                */
526 /* Returns:     Nil                                                         */
527 /* Parameters:  fin(I) - pointer to packet information                      */
528 /*              sp(I)  - location of 16bit checksum to update               */
529 /*              n((I)  - amount to adjust checksum by                       */
530 /*                                                                          */
531 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
532 /* ------------------------------------------------------------------------ */
533 void fix_outcksum(fin, sp, n)
534 fr_info_t *fin;
535 u_short *sp;
536 u_32_t n;
537 {
538         u_short sumshort;
539         u_32_t sum1;
540
541         if (n == 0)
542                 return;
543
544         if (n & NAT_HW_CKSUM) {
545                 n &= 0xffff;
546                 n += fin->fin_dlen;
547                 n = (n & 0xffff) + (n >> 16);
548                 *sp = n & 0xffff;
549                 return;
550         }
551         sum1 = (~ntohs(*sp)) & 0xffff;
552         sum1 += (n);
553         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
554         /* Again */
555         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
556         sumshort = ~(u_short)sum1;
557         *(sp) = htons(sumshort);
558 }
559
560
561 /* ------------------------------------------------------------------------ */
562 /* Function:    fix_incksum                                                 */
563 /* Returns:     Nil                                                         */
564 /* Parameters:  fin(I) - pointer to packet information                      */
565 /*              sp(I)  - location of 16bit checksum to update               */
566 /*              n((I)  - amount to adjust checksum by                       */
567 /*                                                                          */
568 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
569 /* ------------------------------------------------------------------------ */
570 void fix_incksum(fin, sp, n)
571 fr_info_t *fin;
572 u_short *sp;
573 u_32_t n;
574 {
575         u_short sumshort;
576         u_32_t sum1;
577
578         if (n == 0)
579                 return;
580
581         if (n & NAT_HW_CKSUM) {
582                 n &= 0xffff;
583                 n += fin->fin_dlen;
584                 n = (n & 0xffff) + (n >> 16);
585                 *sp = n & 0xffff;
586                 return;
587         }
588         sum1 = (~ntohs(*sp)) & 0xffff;
589         sum1 += ~(n) & 0xffff;
590         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
591         /* Again */
592         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
593         sumshort = ~(u_short)sum1;
594         *(sp) = htons(sumshort);
595 }
596
597
598 /* ------------------------------------------------------------------------ */
599 /* Function:    fix_datacksum                                               */
600 /* Returns:     Nil                                                         */
601 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
602 /*              n((I)  - amount to adjust checksum by                       */
603 /*                                                                          */
604 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
605 /* data section of an IP packet.                                            */
606 /*                                                                          */
607 /* The only situation in which you need to do this is when NAT'ing an       */
608 /* ICMP error message. Such a message, contains in its body the IP header   */
609 /* of the original IP packet, that causes the error.                        */
610 /*                                                                          */
611 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
612 /* kernel the data section of the ICMP error is just data, and no special   */
613 /* processing like hardware cksum or ntohs processing have been done by the */
614 /* kernel on the data section.                                              */
615 /* ------------------------------------------------------------------------ */
616 void fix_datacksum(sp, n)
617 u_short *sp;
618 u_32_t n;
619 {
620         u_short sumshort;
621         u_32_t sum1;
622
623         if (n == 0)
624                 return;
625
626         sum1 = (~ntohs(*sp)) & 0xffff;
627         sum1 += (n);
628         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
629         /* Again */
630         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
631         sumshort = ~(u_short)sum1;
632         *(sp) = htons(sumshort);
633 }
634
635
636 /* ------------------------------------------------------------------------ */
637 /* Function:    fr_nat_ioctl                                                */
638 /* Returns:     int - 0 == success, != 0 == failure                         */
639 /* Parameters:  data(I) - pointer to ioctl data                             */
640 /*              cmd(I)  - ioctl command integer                             */
641 /*              mode(I) - file mode bits used with open                     */
642 /*                                                                          */
643 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
644 /* ------------------------------------------------------------------------ */
645 int fr_nat_ioctl(data, cmd, mode, uid, ctx)
646 ioctlcmd_t cmd;
647 caddr_t data;
648 int mode, uid;
649 void *ctx;
650 {
651         ipnat_t *nat, *nt, *n = NULL, **np = NULL;
652         int error = 0, ret, arg, getlock;
653         ipnat_t natd;
654         SPL_INT(s);
655
656 #if (BSD >= 199306) && defined(_KERNEL)
657 # if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
658         if ((mode & FWRITE) &&
659              kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
660                                      KAUTH_REQ_NETWORK_FIREWALL_FW,
661                                      NULL, NULL, NULL)) {
662                 return EPERM;
663         }
664 # else
665 #  if defined(__FreeBSD_version) && (__FreeBSD_version >= 500034)
666         if (securelevel_ge(curthread->td_ucred, 3) && (mode & FWRITE)) {
667 #  else
668         if ((securelevel >= 3) && (mode & FWRITE)) {
669 #  endif
670                 return EPERM;
671         }
672 # endif
673 #endif
674
675 #if defined(__osf__) && defined(_KERNEL)
676         getlock = 0;
677 #else
678         getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
679 #endif
680
681         nat = NULL;     /* XXX gcc -Wuninitialized */
682         if (cmd == (ioctlcmd_t)SIOCADNAT) {
683                 KMALLOC(nt, ipnat_t *);
684         } else {
685                 nt = NULL;
686         }
687
688         if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
689                 if (mode & NAT_SYSSPACE) {
690                         bcopy(data, (char *)&natd, sizeof(natd));
691                         error = 0;
692                 } else {
693                         error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
694                 }
695         }
696
697         if (error != 0)
698                 goto done;
699
700         /*
701          * For add/delete, look to see if the NAT entry is already present
702          */
703         if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
704                 nat = &natd;
705                 if (nat->in_v == 0)     /* For backward compat. */
706                         nat->in_v = 4;
707                 nat->in_flags &= IPN_USERFLAGS;
708                 if ((nat->in_redir & NAT_MAPBLK) == 0) {
709                         if ((nat->in_flags & IPN_SPLIT) == 0)
710                                 nat->in_inip &= nat->in_inmsk;
711                         if ((nat->in_flags & IPN_IPRANGE) == 0)
712                                 nat->in_outip &= nat->in_outmsk;
713                 }
714                 MUTEX_ENTER(&ipf_natio);
715                 for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
716                         if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
717                                         IPN_CMPSIZ) == 0) {
718                                 if (nat->in_redir == NAT_REDIRECT &&
719                                     nat->in_pnext != n->in_pnext)
720                                         continue;
721                                 break;
722                         }
723         }
724
725         switch (cmd)
726         {
727 #ifdef  IPFILTER_LOG
728         case SIOCIPFFB :
729         {
730                 int tmp;
731
732                 if (!(mode & FWRITE))
733                         error = EPERM;
734                 else {
735                         tmp = ipflog_clear(IPL_LOGNAT);
736                         error = BCOPYOUT((char *)&tmp, (char *)data,
737                                          sizeof(tmp));
738                         if (error != 0)
739                                 error = EFAULT;
740                 }
741                 break;
742         }
743
744         case SIOCSETLG :
745                 if (!(mode & FWRITE))
746                         error = EPERM;
747                 else {
748                         error = BCOPYIN((char *)data, (char *)&nat_logging,
749                                         sizeof(nat_logging));
750                         if (error != 0)
751                                 error = EFAULT;
752                 }
753                 break;
754
755         case SIOCGETLG :
756                 error = BCOPYOUT((char *)&nat_logging, (char *)data,
757                                  sizeof(nat_logging));
758                 if (error != 0)
759                         error = EFAULT;
760                 break;
761
762         case FIONREAD :
763                 arg = iplused[IPL_LOGNAT];
764                 error = BCOPYOUT(&arg, data, sizeof(arg));
765                 if (error != 0)
766                         error = EFAULT;
767                 break;
768 #endif
769         case SIOCADNAT :
770                 if (!(mode & FWRITE)) {
771                         error = EPERM;
772                 } else if (n != NULL) {
773                         error = EEXIST;
774                 } else if (nt == NULL) {
775                         error = ENOMEM;
776                 }
777                 if (error != 0) {
778                         MUTEX_EXIT(&ipf_natio);
779                         break;
780                 }
781                 bcopy((char *)nat, (char *)nt, sizeof(*n));
782                 error = nat_siocaddnat(nt, np, getlock);
783                 MUTEX_EXIT(&ipf_natio);
784                 if (error == 0)
785                         nt = NULL;
786                 break;
787
788         case SIOCRMNAT :
789                 if (!(mode & FWRITE)) {
790                         error = EPERM;
791                         n = NULL;
792                 } else if (n == NULL) {
793                         error = ESRCH;
794                 }
795
796                 if (error != 0) {
797                         MUTEX_EXIT(&ipf_natio);
798                         break;
799                 }
800                 nat_siocdelnat(n, np, getlock);
801
802                 MUTEX_EXIT(&ipf_natio);
803                 n = NULL;
804                 break;
805
806         case SIOCGNATS :
807                 nat_stats.ns_table[0] = nat_table[0];
808                 nat_stats.ns_table[1] = nat_table[1];
809                 nat_stats.ns_list = nat_list;
810                 nat_stats.ns_maptable = ipf_hm_maptable;
811                 nat_stats.ns_maplist = ipf_hm_maplist;
812                 nat_stats.ns_nattab_sz = ipf_nattable_sz;
813                 nat_stats.ns_nattab_max = ipf_nattable_max;
814                 nat_stats.ns_rultab_sz = ipf_natrules_sz;
815                 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
816                 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
817                 nat_stats.ns_instances = nat_instances;
818                 nat_stats.ns_apslist = ap_sess_list;
819                 nat_stats.ns_ticks = fr_ticks;
820                 error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
821                 break;
822
823         case SIOCGNATL :
824             {
825                 natlookup_t nl;
826
827                 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
828                 if (error == 0) {
829                         void *ptr;
830
831                         if (getlock) {
832                                 READ_ENTER(&ipf_nat);
833                         }
834                         ptr = nat_lookupredir(&nl);
835                         if (getlock) {
836                                 RWLOCK_EXIT(&ipf_nat);
837                         }
838                         if (ptr != NULL) {
839                                 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
840                         } else {
841                                 error = ESRCH;
842                         }
843                 }
844                 break;
845             }
846
847         case SIOCIPFFL :        /* old SIOCFLNAT & SIOCCNATL */
848                 if (!(mode & FWRITE)) {
849                         error = EPERM;
850                         break;
851                 }
852                 if (getlock) {
853                         WRITE_ENTER(&ipf_nat);
854                 }
855
856                 error = BCOPYIN(data, &arg, sizeof(arg));
857                 if (error != 0)
858                         error = EFAULT;
859                 else {
860                         if (arg == 0)
861                                 ret = nat_flushtable();
862                         else if (arg == 1)
863                                 ret = nat_clearlist();
864                         else
865                                 ret = nat_extraflush(arg);
866                 }
867
868                 if (getlock) {
869                         RWLOCK_EXIT(&ipf_nat);
870                 }
871                 if (error == 0) {
872                         error = BCOPYOUT(&ret, data, sizeof(ret));
873                 }
874                 break;
875
876         case SIOCPROXY :
877                 error = appr_ioctl(data, cmd, mode, ctx);
878                 break;
879
880         case SIOCSTLCK :
881                 if (!(mode & FWRITE)) {
882                         error = EPERM;
883                 } else {
884                         error = fr_lock(data, &fr_nat_lock);
885                 }
886                 break;
887
888         case SIOCSTPUT :
889                 if ((mode & FWRITE) != 0) {
890                         error = fr_natputent(data, getlock);
891                 } else {
892                         error = EACCES;
893                 }
894                 break;
895
896         case SIOCSTGSZ :
897                 if (fr_nat_lock) {
898                         error = fr_natgetsz(data, getlock);
899                 } else
900                         error = EACCES;
901                 break;
902
903         case SIOCSTGET :
904                 if (fr_nat_lock) {
905                         error = fr_natgetent(data, getlock);
906                 } else
907                         error = EACCES;
908                 break;
909
910         case SIOCGENITER :
911             {
912                 ipfgeniter_t iter;
913                 ipftoken_t *token;
914
915                 SPL_SCHED(s);
916                 error = fr_inobj(data, &iter, IPFOBJ_GENITER);
917                 if (error == 0) {
918                         token = ipf_findtoken(iter.igi_type, uid, ctx);
919                         if (token != NULL) {
920                                 error  = nat_iterator(token, &iter);
921                         }
922                         RWLOCK_EXIT(&ipf_tokens);
923                 }
924                 SPL_X(s);
925                 break;
926             }
927
928         case SIOCIPFDELTOK :
929                 error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
930                 if (error == 0) {
931                         SPL_SCHED(s);
932                         error = ipf_deltoken(arg, uid, ctx);
933                         SPL_X(s);
934                 } else {
935                         error = EFAULT;
936                 }
937                 break;
938
939         case SIOCGTQTAB :
940                 error = fr_outobj(data, nat_tqb, IPFOBJ_STATETQTAB);
941                 break;
942
943         case SIOCGTABL :
944                 error = nat_gettable(data);
945                 break;
946
947         default :
948                 error = EINVAL;
949                 break;
950         }
951 done:
952         if (nt != NULL)
953                 KFREE(nt);
954         return error;
955 }
956
957
958 /* ------------------------------------------------------------------------ */
959 /* Function:    nat_siocaddnat                                              */
960 /* Returns:     int - 0 == success, != 0 == failure                         */
961 /* Parameters:  n(I)       - pointer to new NAT rule                        */
962 /*              np(I)      - pointer to where to insert new NAT rule        */
963 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
964 /* Mutex Locks: ipf_natio                                                   */
965 /*                                                                          */
966 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
967 /* from information passed to the kernel, then add it  to the appropriate   */
968 /* NAT rule table(s).                                                       */
969 /* ------------------------------------------------------------------------ */
970 static int nat_siocaddnat(n, np, getlock)
971 ipnat_t *n, **np;
972 int getlock;
973 {
974         int error = 0, i, j;
975
976         if (nat_resolverule(n) != 0)
977                 return ENOENT;
978
979         if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
980                 return EINVAL;
981
982         n->in_use = 0;
983         if (n->in_redir & NAT_MAPBLK)
984                 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
985         else if (n->in_flags & IPN_AUTOPORTMAP)
986                 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
987         else if (n->in_flags & IPN_IPRANGE)
988                 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
989         else if (n->in_flags & IPN_SPLIT)
990                 n->in_space = 2;
991         else if (n->in_outmsk != 0)
992                 n->in_space = ~ntohl(n->in_outmsk);
993         else
994                 n->in_space = 1;
995
996         /*
997          * Calculate the number of valid IP addresses in the output
998          * mapping range.  In all cases, the range is inclusive of
999          * the start and ending IP addresses.
1000          * If to a CIDR address, lose 2: broadcast + network address
1001          *                               (so subtract 1)
1002          * If to a range, add one.
1003          * If to a single IP address, set to 1.
1004          */
1005         if (n->in_space) {
1006                 if ((n->in_flags & IPN_IPRANGE) != 0)
1007                         n->in_space += 1;
1008                 else
1009                         n->in_space -= 1;
1010         } else
1011                 n->in_space = 1;
1012
1013         if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
1014             ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
1015                 n->in_nip = ntohl(n->in_outip) + 1;
1016         else if ((n->in_flags & IPN_SPLIT) &&
1017                  (n->in_redir & NAT_REDIRECT))
1018                 n->in_nip = ntohl(n->in_inip);
1019         else
1020                 n->in_nip = ntohl(n->in_outip);
1021         if (n->in_redir & NAT_MAP) {
1022                 n->in_pnext = ntohs(n->in_pmin);
1023                 /*
1024                  * Multiply by the number of ports made available.
1025                  */
1026                 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1027                         n->in_space *= (ntohs(n->in_pmax) -
1028                                         ntohs(n->in_pmin) + 1);
1029                         /*
1030                          * Because two different sources can map to
1031                          * different destinations but use the same
1032                          * local IP#/port #.
1033                          * If the result is smaller than in_space, then
1034                          * we may have wrapped around 32bits.
1035                          */
1036                         i = n->in_inmsk;
1037                         if ((i != 0) && (i != 0xffffffff)) {
1038                                 j = n->in_space * (~ntohl(i) + 1);
1039                                 if (j >= n->in_space)
1040                                         n->in_space = j;
1041                                 else
1042                                         n->in_space = 0xffffffff;
1043                         }
1044                 }
1045                 /*
1046                  * If no protocol is specified, multiple by 256 to allow for
1047                  * at least one IP:IP mapping per protocol.
1048                  */
1049                 if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1050                                 j = n->in_space * 256;
1051                                 if (j >= n->in_space)
1052                                         n->in_space = j;
1053                                 else
1054                                         n->in_space = 0xffffffff;
1055                 }
1056         }
1057
1058         /* Otherwise, these fields are preset */
1059
1060         if (getlock) {
1061                 WRITE_ENTER(&ipf_nat);
1062         }
1063         n->in_next = NULL;
1064         *np = n;
1065
1066         if (n->in_age[0] != 0)
1067                 n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
1068
1069         if (n->in_age[1] != 0)
1070                 n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
1071
1072         if (n->in_redir & NAT_REDIRECT) {
1073                 n->in_flags &= ~IPN_NOTDST;
1074                 nat_addrdr(n);
1075         }
1076         if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1077                 n->in_flags &= ~IPN_NOTSRC;
1078                 nat_addnat(n);
1079         }
1080         MUTEX_INIT(&n->in_lock, "ipnat rule lock");
1081
1082         n = NULL;
1083         nat_stats.ns_rules++;
1084 #if SOLARIS && !defined(_INET_IP_STACK_H)
1085         pfil_delayed_copy = 0;
1086 #endif
1087         if (getlock) {
1088                 RWLOCK_EXIT(&ipf_nat);                  /* WRITE */
1089         }
1090
1091         return error;
1092 }
1093
1094
1095 /* ------------------------------------------------------------------------ */
1096 /* Function:    nat_resolvrule                                              */
1097 /* Returns:     Nil                                                         */
1098 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1099 /*                                                                          */
1100 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1101 /* from information passed to the kernel, then add it  to the appropriate   */
1102 /* NAT rule table(s).                                                       */
1103 /* ------------------------------------------------------------------------ */
1104 static int nat_resolverule(n)
1105 ipnat_t *n;
1106 {
1107         n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1108         n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
1109
1110         n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1111         if (n->in_ifnames[1][0] == '\0') {
1112                 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1113                 n->in_ifps[1] = n->in_ifps[0];
1114         } else {
1115                 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
1116         }
1117
1118         if (n->in_plabel[0] != '\0') {
1119                 n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1120                 if (n->in_apr == NULL)
1121                         return -1;
1122         }
1123         return 0;
1124 }
1125
1126
1127 /* ------------------------------------------------------------------------ */
1128 /* Function:    nat_siocdelnat                                              */
1129 /* Returns:     int - 0 == success, != 0 == failure                         */
1130 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1131 /*              np(I)      - pointer to where to insert new NAT rule        */
1132 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1133 /* Mutex Locks: ipf_natio                                                   */
1134 /*                                                                          */
1135 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1136 /* from information passed to the kernel, then add it  to the appropriate   */
1137 /* NAT rule table(s).                                                       */
1138 /* ------------------------------------------------------------------------ */
1139 static void nat_siocdelnat(n, np, getlock)
1140 ipnat_t *n, **np;
1141 int getlock;
1142 {
1143         if (getlock) {
1144                 WRITE_ENTER(&ipf_nat);
1145         }
1146         if (n->in_redir & NAT_REDIRECT)
1147                 nat_delrdr(n);
1148         if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1149                 nat_delnat(n);
1150         if (nat_list == NULL) {
1151                 nat_masks = 0;
1152                 rdr_masks = 0;
1153         }
1154
1155         if (n->in_tqehead[0] != NULL) {
1156                 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1157                         fr_freetimeoutqueue(n->in_tqehead[1]);
1158                 }
1159         }
1160
1161         if (n->in_tqehead[1] != NULL) {
1162                 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1163                         fr_freetimeoutqueue(n->in_tqehead[1]);
1164                 }
1165         }
1166
1167         *np = n->in_next;
1168
1169         if (n->in_use == 0) {
1170                 if (n->in_apr)
1171                         appr_free(n->in_apr);
1172                 MUTEX_DESTROY(&n->in_lock);
1173                 KFREE(n);
1174                 nat_stats.ns_rules--;
1175 #if SOLARIS && !defined(_INET_IP_STACK_H)
1176                 if (nat_stats.ns_rules == 0)
1177                         pfil_delayed_copy = 1;
1178 #endif
1179         } else {
1180                 n->in_flags |= IPN_DELETE;
1181                 n->in_next = NULL;
1182         }
1183         if (getlock) {
1184                 RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
1185         }
1186 }
1187
1188
1189 /* ------------------------------------------------------------------------ */
1190 /* Function:    fr_natgetsz                                                 */
1191 /* Returns:     int - 0 == success, != 0 is the error value.                */
1192 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1193 /*                        get the size of.                                  */
1194 /*                                                                          */
1195 /* Handle SIOCSTGSZ.                                                        */
1196 /* Return the size of the nat list entry to be copied back to user space.   */
1197 /* The size of the entry is stored in the ng_sz field and the enture natget */
1198 /* structure is copied back to the user.                                    */
1199 /* ------------------------------------------------------------------------ */
1200 static int fr_natgetsz(data, getlock)
1201 caddr_t data;
1202 int getlock;
1203 {
1204         ap_session_t *aps;
1205         nat_t *nat, *n;
1206         natget_t ng;
1207
1208         if (BCOPYIN(data, &ng, sizeof(ng)) != 0)
1209                 return EFAULT;
1210
1211         if (getlock) {
1212                 READ_ENTER(&ipf_nat);
1213         }
1214
1215         nat = ng.ng_ptr;
1216         if (!nat) {
1217                 nat = nat_instances;
1218                 ng.ng_sz = 0;
1219                 /*
1220                  * Empty list so the size returned is 0.  Simple.
1221                  */
1222                 if (nat == NULL) {
1223                         if (getlock) {
1224                                 RWLOCK_EXIT(&ipf_nat);
1225                         }
1226                         if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1227                                 return EFAULT;
1228                         return 0;
1229                 }
1230         } else {
1231                 /*
1232                  * Make sure the pointer we're copying from exists in the
1233                  * current list of entries.  Security precaution to prevent
1234                  * copying of random kernel data.
1235                  */
1236                 for (n = nat_instances; n; n = n->nat_next)
1237                         if (n == nat)
1238                                 break;
1239                 if (n == NULL) {
1240                         if (getlock) {
1241                                 RWLOCK_EXIT(&ipf_nat);
1242                         }
1243                         return ESRCH;
1244                 }
1245         }
1246
1247         /*
1248          * Incluse any space required for proxy data structures.
1249          */
1250         ng.ng_sz = sizeof(nat_save_t);
1251         aps = nat->nat_aps;
1252         if (aps != NULL) {
1253                 ng.ng_sz += sizeof(ap_session_t) - 4;
1254                 if (aps->aps_data != 0)
1255                         ng.ng_sz += aps->aps_psiz;
1256         }
1257         if (getlock) {
1258                 RWLOCK_EXIT(&ipf_nat);
1259         }
1260
1261         if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1262                 return EFAULT;
1263         return 0;
1264 }
1265
1266
1267 /* ------------------------------------------------------------------------ */
1268 /* Function:    fr_natgetent                                                */
1269 /* Returns:     int - 0 == success, != 0 is the error value.                */
1270 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1271 /*                        to NAT structure to copy out.                     */
1272 /*                                                                          */
1273 /* Handle SIOCSTGET.                                                        */
1274 /* Copies out NAT entry to user space.  Any additional data held for a      */
1275 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1276 /* ------------------------------------------------------------------------ */
1277 static int fr_natgetent(data, getlock)
1278 caddr_t data;
1279 int getlock;
1280 {
1281         int error, outsize;
1282         ap_session_t *aps;
1283         nat_save_t *ipn, ipns;
1284         nat_t *n, *nat;
1285
1286         error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1287         if (error != 0)
1288                 return error;
1289
1290         if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1291                 return EINVAL;
1292
1293         KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1294         if (ipn == NULL)
1295                 return ENOMEM;
1296
1297         if (getlock) {
1298                 READ_ENTER(&ipf_nat);
1299         }
1300
1301         ipn->ipn_dsize = ipns.ipn_dsize;
1302         nat = ipns.ipn_next;
1303         if (nat == NULL) {
1304                 nat = nat_instances;
1305                 if (nat == NULL) {
1306                         if (nat_instances == NULL)
1307                                 error = ENOENT;
1308                         goto finished;
1309                 }
1310         } else {
1311                 /*
1312                  * Make sure the pointer we're copying from exists in the
1313                  * current list of entries.  Security precaution to prevent
1314                  * copying of random kernel data.
1315                  */
1316                 for (n = nat_instances; n; n = n->nat_next)
1317                         if (n == nat)
1318                                 break;
1319                 if (n == NULL) {
1320                         error = ESRCH;
1321                         goto finished;
1322                 }
1323         }
1324         ipn->ipn_next = nat->nat_next;
1325
1326         /*
1327          * Copy the NAT structure.
1328          */
1329         bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1330
1331         /*
1332          * If we have a pointer to the NAT rule it belongs to, save that too.
1333          */
1334         if (nat->nat_ptr != NULL)
1335                 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1336                       sizeof(ipn->ipn_ipnat));
1337
1338         /*
1339          * If we also know the NAT entry has an associated filter rule,
1340          * save that too.
1341          */
1342         if (nat->nat_fr != NULL)
1343                 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1344                       sizeof(ipn->ipn_fr));
1345
1346         /*
1347          * Last but not least, if there is an application proxy session set
1348          * up for this NAT entry, then copy that out too, including any
1349          * private data saved along side it by the proxy.
1350          */
1351         aps = nat->nat_aps;
1352         outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1353         if (aps != NULL) {
1354                 char *s;
1355
1356                 if (outsize < sizeof(*aps)) {
1357                         error = ENOBUFS;
1358                         goto finished;
1359                 }
1360
1361                 s = ipn->ipn_data;
1362                 bcopy((char *)aps, s, sizeof(*aps));
1363                 s += sizeof(*aps);
1364                 outsize -= sizeof(*aps);
1365                 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1366                         bcopy(aps->aps_data, s, aps->aps_psiz);
1367                 else
1368                         error = ENOBUFS;
1369         }
1370         if (error == 0) {
1371                 if (getlock) {
1372                         RWLOCK_EXIT(&ipf_nat);
1373                         getlock = 0;
1374                 }
1375                 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1376         }
1377
1378 finished:
1379         if (getlock) {
1380                 RWLOCK_EXIT(&ipf_nat);
1381         }
1382         if (ipn != NULL) {
1383                 KFREES(ipn, ipns.ipn_dsize);
1384         }
1385         return error;
1386 }
1387
1388
1389 /* ------------------------------------------------------------------------ */
1390 /* Function:    fr_natputent                                                */
1391 /* Returns:     int - 0 == success, != 0 is the error value.                */
1392 /* Parameters:  data(I) -     pointer to natget structure with NAT          */
1393 /*                            structure information to load into the kernel */
1394 /*              getlock(I) - flag indicating whether or not a write lock    */
1395 /*                           on ipf_nat is already held.                    */
1396 /*                                                                          */
1397 /* Handle SIOCSTPUT.                                                        */
1398 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1399 /* firewall rule data structures, if pointers to them indicate so.          */
1400 /* ------------------------------------------------------------------------ */
1401 static int fr_natputent(data, getlock)
1402 caddr_t data;
1403 int getlock;
1404 {
1405         nat_save_t ipn, *ipnn;
1406         ap_session_t *aps;
1407         nat_t *n, *nat;
1408         frentry_t *fr;
1409         fr_info_t fin;
1410         ipnat_t *in;
1411         int error;
1412
1413         error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1414         if (error != 0)
1415                 return error;
1416
1417         /*
1418          * Initialise early because of code at junkput label.
1419          */
1420         in = NULL;
1421         aps = NULL;
1422         nat = NULL;
1423         ipnn = NULL;
1424         fr = NULL;
1425
1426         /*
1427          * New entry, copy in the rest of the NAT entry if it's size is more
1428          * than just the nat_t structure.
1429          */
1430         if (ipn.ipn_dsize > sizeof(ipn)) {
1431                 if (ipn.ipn_dsize > 81920) {
1432                         error = ENOMEM;
1433                         goto junkput;
1434                 }
1435
1436                 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1437                 if (ipnn == NULL)
1438                         return ENOMEM;
1439
1440                 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1441                 if (error != 0) {
1442                         error = EFAULT;
1443                         goto junkput;
1444                 }
1445         } else
1446                 ipnn = &ipn;
1447
1448         KMALLOC(nat, nat_t *);
1449         if (nat == NULL) {
1450                 error = ENOMEM;
1451                 goto junkput;
1452         }
1453
1454         bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1455         /*
1456          * Initialize all these so that nat_delete() doesn't cause a crash.
1457          */
1458         bzero((char *)nat, offsetof(struct nat, nat_tqe));
1459         nat->nat_tqe.tqe_pnext = NULL;
1460         nat->nat_tqe.tqe_next = NULL;
1461         nat->nat_tqe.tqe_ifq = NULL;
1462         nat->nat_tqe.tqe_parent = nat;
1463
1464         /*
1465          * Restore the rule associated with this nat session
1466          */
1467         in = ipnn->ipn_nat.nat_ptr;
1468         if (in != NULL) {
1469                 KMALLOC(in, ipnat_t *);
1470                 nat->nat_ptr = in;
1471                 if (in == NULL) {
1472                         error = ENOMEM;
1473                         goto junkput;
1474                 }
1475                 bzero((char *)in, offsetof(struct ipnat, in_next6));
1476                 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1477                 in->in_use = 1;
1478                 in->in_flags |= IPN_DELETE;
1479
1480                 ATOMIC_INC(nat_stats.ns_rules);
1481
1482                 if (nat_resolverule(in) != 0) {
1483                         error = ESRCH;
1484                         goto junkput;
1485                 }
1486         }
1487
1488         /*
1489          * Check that the NAT entry doesn't already exist in the kernel.
1490          *
1491          * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry.  To do
1492          * this, we check to see if the inbound combination of addresses and
1493          * ports is already known.  Similar logic is applied for NAT_INBOUND.
1494          * 
1495          */
1496         bzero((char *)&fin, sizeof(fin));
1497         fin.fin_p = nat->nat_p;
1498         if (nat->nat_dir == NAT_OUTBOUND) {
1499                 fin.fin_ifp = nat->nat_ifps[0];
1500                 fin.fin_data[0] = ntohs(nat->nat_oport);
1501                 fin.fin_data[1] = ntohs(nat->nat_outport);
1502                 if (getlock) {
1503                         READ_ENTER(&ipf_nat);
1504                 }
1505                 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1506                                  nat->nat_oip, nat->nat_inip);
1507                 if (getlock) {
1508                         RWLOCK_EXIT(&ipf_nat);
1509                 }
1510                 if (n != NULL) {
1511                         error = EEXIST;
1512                         goto junkput;
1513                 }
1514         } else if (nat->nat_dir == NAT_INBOUND) {
1515                 fin.fin_ifp = nat->nat_ifps[0];
1516                 fin.fin_data[0] = ntohs(nat->nat_outport);
1517                 fin.fin_data[1] = ntohs(nat->nat_oport);
1518                 if (getlock) {
1519                         READ_ENTER(&ipf_nat);
1520                 }
1521                 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1522                                   nat->nat_outip, nat->nat_oip);
1523                 if (getlock) {
1524                         RWLOCK_EXIT(&ipf_nat);
1525                 }
1526                 if (n != NULL) {
1527                         error = EEXIST;
1528                         goto junkput;
1529                 }
1530         } else {
1531                 error = EINVAL;
1532                 goto junkput;
1533         }
1534
1535         /*
1536          * Restore ap_session_t structure.  Include the private data allocated
1537          * if it was there.
1538          */
1539         aps = nat->nat_aps;
1540         if (aps != NULL) {
1541                 KMALLOC(aps, ap_session_t *);
1542                 nat->nat_aps = aps;
1543                 if (aps == NULL) {
1544                         error = ENOMEM;
1545                         goto junkput;
1546                 }
1547                 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1548                 if (in != NULL)
1549                         aps->aps_apr = in->in_apr;
1550                 else
1551                         aps->aps_apr = NULL;
1552                 if (aps->aps_psiz != 0) {
1553                         if (aps->aps_psiz > 81920) {
1554                                 error = ENOMEM;
1555                                 goto junkput;
1556                         }
1557                         KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1558                         if (aps->aps_data == NULL) {
1559                                 error = ENOMEM;
1560                                 goto junkput;
1561                         }
1562                         bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1563                               aps->aps_psiz);
1564                 } else {
1565                         aps->aps_psiz = 0;
1566                         aps->aps_data = NULL;
1567                 }
1568         }
1569
1570         /*
1571          * If there was a filtering rule associated with this entry then
1572          * build up a new one.
1573          */
1574         fr = nat->nat_fr;
1575         if (fr != NULL) {
1576                 if ((nat->nat_flags & SI_NEWFR) != 0) {
1577                         KMALLOC(fr, frentry_t *);
1578                         nat->nat_fr = fr;
1579                         if (fr == NULL) {
1580                                 error = ENOMEM;
1581                                 goto junkput;
1582                         }
1583                         ipnn->ipn_nat.nat_fr = fr;
1584                         fr->fr_ref = 1;
1585                         (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1586                         bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1587
1588                         fr->fr_ref = 1;
1589                         fr->fr_dsize = 0;
1590                         fr->fr_data = NULL;
1591                         fr->fr_type = FR_T_NONE;
1592
1593                         MUTEX_NUKE(&fr->fr_lock);
1594                         MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1595                 } else {
1596                         if (getlock) {
1597                                 READ_ENTER(&ipf_nat);
1598                         }
1599                         for (n = nat_instances; n; n = n->nat_next)
1600                                 if (n->nat_fr == fr)
1601                                         break;
1602
1603                         if (n != NULL) {
1604                                 MUTEX_ENTER(&fr->fr_lock);
1605                                 fr->fr_ref++;
1606                                 MUTEX_EXIT(&fr->fr_lock);
1607                         }
1608                         if (getlock) {
1609                                 RWLOCK_EXIT(&ipf_nat);
1610                         }
1611
1612                         if (!n) {
1613                                 error = ESRCH;
1614                                 goto junkput;
1615                         }
1616                 }
1617         }
1618
1619         if (ipnn != &ipn) {
1620                 KFREES(ipnn, ipn.ipn_dsize);
1621                 ipnn = NULL;
1622         }
1623
1624         if (getlock) {
1625                 WRITE_ENTER(&ipf_nat);
1626         }
1627         error = nat_insert(nat, nat->nat_rev);
1628         if ((error == 0) && (aps != NULL)) {
1629                 aps->aps_next = ap_sess_list;
1630                 ap_sess_list = aps;
1631         }
1632         if (getlock) {
1633                 RWLOCK_EXIT(&ipf_nat);
1634         }
1635
1636         if (error == 0)
1637                 return 0;
1638
1639         error = ENOMEM;
1640
1641 junkput:
1642         if (fr != NULL)
1643                 (void) fr_derefrule(&fr);
1644
1645         if ((ipnn != NULL) && (ipnn != &ipn)) {
1646                 KFREES(ipnn, ipn.ipn_dsize);
1647         }
1648         if (nat != NULL) {
1649                 if (aps != NULL) {
1650                         if (aps->aps_data != NULL) {
1651                                 KFREES(aps->aps_data, aps->aps_psiz);
1652                         }
1653                         KFREE(aps);
1654                 }
1655                 if (in != NULL) {
1656                         if (in->in_apr)
1657                                 appr_free(in->in_apr);
1658                         KFREE(in);
1659                 }
1660                 KFREE(nat);
1661         }
1662         return error;
1663 }
1664
1665
1666 /* ------------------------------------------------------------------------ */
1667 /* Function:    nat_delete                                                  */
1668 /* Returns:     Nil                                                         */
1669 /* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1670 /*              logtype(I) - type of LOG record to create before deleting   */
1671 /* Write Lock:  ipf_nat                                                     */
1672 /*                                                                          */
1673 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1674 /* enabled then generate a NAT log record for this event.                   */
1675 /* ------------------------------------------------------------------------ */
1676 void nat_delete(nat, logtype)
1677 struct nat *nat;
1678 int logtype;
1679 {
1680         struct ipnat *ipn;
1681         int removed = 0;
1682
1683         if (logtype != 0 && nat_logging != 0)
1684                 nat_log(nat, logtype);
1685 #if defined(NEED_LOCAL_RAND) && defined(_KERNEL)
1686         ipf_rand_push(nat, sizeof(*nat));
1687 #endif
1688
1689         /*
1690          * Take it as a general indication that all the pointers are set if
1691          * nat_pnext is set.
1692          */
1693         if (nat->nat_pnext != NULL) {
1694                 removed = 1;
1695
1696                 nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1697                 nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1698
1699                 *nat->nat_pnext = nat->nat_next;
1700                 if (nat->nat_next != NULL) {
1701                         nat->nat_next->nat_pnext = nat->nat_pnext;
1702                         nat->nat_next = NULL;
1703                 }
1704                 nat->nat_pnext = NULL;
1705
1706                 *nat->nat_phnext[0] = nat->nat_hnext[0];
1707                 if (nat->nat_hnext[0] != NULL) {
1708                         nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1709                         nat->nat_hnext[0] = NULL;
1710                 }
1711                 nat->nat_phnext[0] = NULL;
1712
1713                 *nat->nat_phnext[1] = nat->nat_hnext[1];
1714                 if (nat->nat_hnext[1] != NULL) {
1715                         nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1716                         nat->nat_hnext[1] = NULL;
1717                 }
1718                 nat->nat_phnext[1] = NULL;
1719
1720                 if ((nat->nat_flags & SI_WILDP) != 0)
1721                         nat_stats.ns_wilds--;
1722         }
1723
1724         if (nat->nat_me != NULL) {
1725                 *nat->nat_me = NULL;
1726                 nat->nat_me = NULL;
1727         }
1728
1729         if (nat->nat_tqe.tqe_ifq != NULL)
1730                 fr_deletequeueentry(&nat->nat_tqe);
1731
1732         if (logtype == NL_EXPIRE)
1733                 nat_stats.ns_expire++;
1734
1735         MUTEX_ENTER(&nat->nat_lock);
1736         /*
1737          * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1738          * This happens when a nat'd packet is blocked and we want to throw
1739          * away the NAT session.
1740          */
1741         if (logtype == NL_DESTROY) {
1742                 if (nat->nat_ref > 2) {
1743                         nat->nat_ref -= 2;
1744                         MUTEX_EXIT(&nat->nat_lock);
1745                         if (removed)
1746                                 nat_stats.ns_orphans++;
1747                         return;
1748                 }
1749         } else if (nat->nat_ref > 1) {
1750                 nat->nat_ref--;
1751                 MUTEX_EXIT(&nat->nat_lock);
1752                 if (removed)
1753                         nat_stats.ns_orphans++;
1754                 return;
1755         }
1756         MUTEX_EXIT(&nat->nat_lock);
1757
1758         /*
1759          * At this point, nat_ref is 1, doing "--" would make it 0..
1760          */
1761         nat->nat_ref = 0;
1762         if (!removed)
1763                 nat_stats.ns_orphans--;
1764
1765 #ifdef  IPFILTER_SYNC
1766         if (nat->nat_sync)
1767                 ipfsync_del(nat->nat_sync);
1768 #endif
1769
1770         if (nat->nat_fr != NULL)
1771                 (void) fr_derefrule(&nat->nat_fr);
1772
1773         if (nat->nat_hm != NULL)
1774                 fr_hostmapdel(&nat->nat_hm);
1775
1776         /*
1777          * If there is an active reference from the nat entry to its parent
1778          * rule, decrement the rule's reference count and free it too if no
1779          * longer being used.
1780          */
1781         ipn = nat->nat_ptr;
1782         if (ipn != NULL) {
1783                 fr_ipnatderef(&ipn);
1784         }
1785
1786         MUTEX_DESTROY(&nat->nat_lock);
1787
1788         aps_free(nat->nat_aps);
1789         nat_stats.ns_inuse--;
1790
1791         /*
1792          * If there's a fragment table entry too for this nat entry, then
1793          * dereference that as well.  This is after nat_lock is released
1794          * because of Tru64.
1795          */
1796         fr_forgetnat((void *)nat);
1797
1798         KFREE(nat);
1799 }
1800
1801
1802 /* ------------------------------------------------------------------------ */
1803 /* Function:    nat_flushtable                                              */
1804 /* Returns:     int - number of NAT rules deleted                           */
1805 /* Parameters:  Nil                                                         */
1806 /*                                                                          */
1807 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1808 /* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1809 /* ------------------------------------------------------------------------ */
1810 /*
1811  * nat_flushtable - clear the NAT table of all mapping entries.
1812  */
1813 static int nat_flushtable()
1814 {
1815         nat_t *nat;
1816         int j = 0;
1817
1818         /*
1819          * ALL NAT mappings deleted, so lets just make the deletions
1820          * quicker.
1821          */
1822         if (nat_table[0] != NULL)
1823                 bzero((char *)nat_table[0],
1824                       sizeof(nat_table[0]) * ipf_nattable_sz);
1825         if (nat_table[1] != NULL)
1826                 bzero((char *)nat_table[1],
1827                       sizeof(nat_table[1]) * ipf_nattable_sz);
1828
1829         while ((nat = nat_instances) != NULL) {
1830                 nat_delete(nat, NL_FLUSH);
1831                 j++;
1832         }
1833
1834         nat_stats.ns_inuse = 0;
1835         return j;
1836 }
1837
1838
1839 /* ------------------------------------------------------------------------ */
1840 /* Function:    nat_clearlist                                               */
1841 /* Returns:     int - number of NAT/RDR rules deleted                       */
1842 /* Parameters:  Nil                                                         */
1843 /*                                                                          */
1844 /* Delete all rules in the current list of rules.  There is nothing elegant */
1845 /* about this cleanup: simply free all entries on the list of rules and     */
1846 /* clear out the tables used for hashed NAT rule lookups.                   */
1847 /* ------------------------------------------------------------------------ */
1848 static int nat_clearlist()
1849 {
1850         ipnat_t *n, **np = &nat_list;
1851         int i = 0;
1852
1853         if (nat_rules != NULL)
1854                 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1855         if (rdr_rules != NULL)
1856                 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1857
1858         while ((n = *np) != NULL) {
1859                 *np = n->in_next;
1860                 if (n->in_use == 0) {
1861                         if (n->in_apr != NULL)
1862                                 appr_free(n->in_apr);
1863                         MUTEX_DESTROY(&n->in_lock);
1864                         KFREE(n);
1865                         nat_stats.ns_rules--;
1866                 } else {
1867                         n->in_flags |= IPN_DELETE;
1868                         n->in_next = NULL;
1869                 }
1870                 i++;
1871         }
1872 #if SOLARIS && !defined(_INET_IP_STACK_H)
1873         pfil_delayed_copy = 1;
1874 #endif
1875         nat_masks = 0;
1876         rdr_masks = 0;
1877         return i;
1878 }
1879
1880
1881 /* ------------------------------------------------------------------------ */
1882 /* Function:    nat_newmap                                                  */
1883 /* Returns:     int - -1 == error, 0 == success                             */
1884 /* Parameters:  fin(I) - pointer to packet information                      */
1885 /*              nat(I) - pointer to NAT entry                               */
1886 /*              ni(I)  - pointer to structure with misc. information needed */
1887 /*                       to create new NAT entry.                           */
1888 /*                                                                          */
1889 /* Given an empty NAT structure, populate it with new information about a   */
1890 /* new NAT session, as defined by the matching NAT rule.                    */
1891 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1892 /* to the new IP address for the translation.                               */
1893 /* ------------------------------------------------------------------------ */
1894 static INLINE int nat_newmap(fin, nat, ni)
1895 fr_info_t *fin;
1896 nat_t *nat;
1897 natinfo_t *ni;
1898 {
1899         u_short st_port, dport, sport, port, sp, dp;
1900         struct in_addr in, inb;
1901         hostmap_t *hm;
1902         u_32_t flags;
1903         u_32_t st_ip;
1904         ipnat_t *np;
1905         nat_t *natl;
1906         int l;
1907
1908         /*
1909          * If it's an outbound packet which doesn't match any existing
1910          * record, then create a new port
1911          */
1912         l = 0;
1913         hm = NULL;
1914         np = ni->nai_np;
1915         st_ip = np->in_nip;
1916         st_port = np->in_pnext;
1917         flags = ni->nai_flags;
1918         sport = ni->nai_sport;
1919         dport = ni->nai_dport;
1920
1921         /*
1922          * Do a loop until we either run out of entries to try or we find
1923          * a NAT mapping that isn't currently being used.  This is done
1924          * because the change to the source is not (usually) being fixed.
1925          */
1926         do {
1927                 port = 0;
1928                 in.s_addr = htonl(np->in_nip);
1929                 if (l == 0) {
1930                         /*
1931                          * Check to see if there is an existing NAT
1932                          * setup for this IP address pair.
1933                          */
1934                         hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1935                                          in, 0);
1936                         if (hm != NULL)
1937                                 in.s_addr = hm->hm_mapip.s_addr;
1938                 } else if ((l == 1) && (hm != NULL)) {
1939                         fr_hostmapdel(&hm);
1940                 }
1941                 in.s_addr = ntohl(in.s_addr);
1942
1943                 nat->nat_hm = hm;
1944
1945                 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1946                         if (l > 0)
1947                                 return -1;
1948                 }
1949
1950                 if (np->in_redir == NAT_BIMAP &&
1951                     np->in_inmsk == np->in_outmsk) {
1952                         /*
1953                          * map the address block in a 1:1 fashion
1954                          */
1955                         in.s_addr = np->in_outip;
1956                         in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1957                         in.s_addr = ntohl(in.s_addr);
1958
1959                 } else if (np->in_redir & NAT_MAPBLK) {
1960                         if ((l >= np->in_ppip) || ((l > 0) &&
1961                              !(flags & IPN_TCPUDP)))
1962                                 return -1;
1963                         /*
1964                          * map-block - Calculate destination address.
1965                          */
1966                         in.s_addr = ntohl(fin->fin_saddr);
1967                         in.s_addr &= ntohl(~np->in_inmsk);
1968                         inb.s_addr = in.s_addr;
1969                         in.s_addr /= np->in_ippip;
1970                         in.s_addr &= ntohl(~np->in_outmsk);
1971                         in.s_addr += ntohl(np->in_outip);
1972                         /*
1973                          * Calculate destination port.
1974                          */
1975                         if ((flags & IPN_TCPUDP) &&
1976                             (np->in_ppip != 0)) {
1977                                 port = ntohs(sport) + l;
1978                                 port %= np->in_ppip;
1979                                 port += np->in_ppip *
1980                                         (inb.s_addr % np->in_ippip);
1981                                 port += MAPBLK_MINPORT;
1982                                 port = htons(port);
1983                         }
1984
1985                 } else if ((np->in_outip == 0) &&
1986                            (np->in_outmsk == 0xffffffff)) {
1987                         /*
1988                          * 0/32 - use the interface's IP address.
1989                          */
1990                         if ((l > 0) ||
1991                             fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1992                                        &in, NULL) == -1)
1993                                 return -1;
1994                         in.s_addr = ntohl(in.s_addr);
1995
1996                 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1997                         /*
1998                          * 0/0 - use the original source address/port.
1999                          */
2000                         if (l > 0)
2001                                 return -1;
2002                         in.s_addr = ntohl(fin->fin_saddr);
2003
2004                 } else if ((np->in_outmsk != 0xffffffff) &&
2005                            (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2006                         np->in_nip++;
2007
2008                 natl = NULL;
2009
2010                 if ((flags & IPN_TCPUDP) &&
2011                     ((np->in_redir & NAT_MAPBLK) == 0) &&
2012                     (np->in_flags & IPN_AUTOPORTMAP)) {
2013                         /*
2014                          * "ports auto" (without map-block)
2015                          */
2016                         if ((l > 0) && (l % np->in_ppip == 0)) {
2017                                 if (l > np->in_space) {
2018                                         return -1;
2019                                 } else if ((l > np->in_ppip) &&
2020                                            np->in_outmsk != 0xffffffff)
2021                                         np->in_nip++;
2022                         }
2023                         if (np->in_ppip != 0) {
2024                                 port = ntohs(sport);
2025                                 port += (l % np->in_ppip);
2026                                 port %= np->in_ppip;
2027                                 port += np->in_ppip *
2028                                         (ntohl(fin->fin_saddr) %
2029                                          np->in_ippip);
2030                                 port += MAPBLK_MINPORT;
2031                                 port = htons(port);
2032                         }
2033
2034                 } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2035                            (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2036                         /*
2037                          * Standard port translation.  Select next port.
2038                          */
2039                         if (np->in_flags & IPN_SEQUENTIAL) {
2040                                 port = np->in_pnext;
2041                         } else {
2042                                 port = ipf_random() % (ntohs(np->in_pmax) -
2043                                                        ntohs(np->in_pmin));
2044                                 port += ntohs(np->in_pmin);
2045                         }
2046                         port = htons(port);
2047                         np->in_pnext++;
2048
2049                         if (np->in_pnext > ntohs(np->in_pmax)) {
2050                                 np->in_pnext = ntohs(np->in_pmin);
2051                                 if (np->in_outmsk != 0xffffffff)
2052                                         np->in_nip++;
2053                         }
2054                 }
2055
2056                 if (np->in_flags & IPN_IPRANGE) {
2057                         if (np->in_nip > ntohl(np->in_outmsk))
2058                                 np->in_nip = ntohl(np->in_outip);
2059                 } else {
2060                         if ((np->in_outmsk != 0xffffffff) &&
2061                             ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2062                             ntohl(np->in_outip))
2063                                 np->in_nip = ntohl(np->in_outip) + 1;
2064                 }
2065
2066                 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2067                         port = sport;
2068
2069                 /*
2070                  * Here we do a lookup of the connection as seen from
2071                  * the outside.  If an IP# pair already exists, try
2072                  * again.  So if you have A->B becomes C->B, you can
2073                  * also have D->E become C->E but not D->B causing
2074                  * another C->B.  Also take protocol and ports into
2075                  * account when determining whether a pre-existing
2076                  * NAT setup will cause an external conflict where
2077                  * this is appropriate.
2078                  */
2079                 inb.s_addr = htonl(in.s_addr);
2080                 sp = fin->fin_data[0];
2081                 dp = fin->fin_data[1];
2082                 fin->fin_data[0] = fin->fin_data[1];
2083                 fin->fin_data[1] = htons(port);
2084                 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2085                                     (u_int)fin->fin_p, fin->fin_dst, inb);
2086                 fin->fin_data[0] = sp;
2087                 fin->fin_data[1] = dp;
2088
2089                 /*
2090                  * Has the search wrapped around and come back to the
2091                  * start ?
2092                  */
2093                 if ((natl != NULL) &&
2094                     (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2095                     (np->in_nip != 0) && (st_ip == np->in_nip))
2096                         return -1;
2097                 l++;
2098         } while (natl != NULL);
2099
2100         if (np->in_space > 0)
2101                 np->in_space--;
2102
2103         /* Setup the NAT table */
2104         nat->nat_inip = fin->fin_src;
2105         nat->nat_outip.s_addr = htonl(in.s_addr);
2106         nat->nat_oip = fin->fin_dst;
2107         if (nat->nat_hm == NULL)
2108                 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2109                                           nat->nat_outip, 0);
2110
2111         /*
2112          * The ICMP checksum does not have a pseudo header containing
2113          * the IP addresses
2114          */
2115         ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2116         ni->nai_sum2 = LONG_SUM(in.s_addr);
2117         if ((flags & IPN_TCPUDP)) {
2118                 ni->nai_sum1 += ntohs(sport);
2119                 ni->nai_sum2 += ntohs(port);
2120         }
2121
2122         if (flags & IPN_TCPUDP) {
2123                 nat->nat_inport = sport;
2124                 nat->nat_outport = port;        /* sport */
2125                 nat->nat_oport = dport;
2126                 ((tcphdr_t *)fin->fin_dp)->th_sport = port;
2127         } else if (flags & IPN_ICMPQUERY) {
2128                 ((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2129                 nat->nat_inport = port;
2130                 nat->nat_outport = port;
2131         } else if (fin->fin_p == IPPROTO_GRE) {
2132 #if 0
2133                 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2134                 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2135                         nat->nat_oport = 0;/*fin->fin_data[1];*/
2136                         nat->nat_inport = 0;/*fin->fin_data[0];*/
2137                         nat->nat_outport = 0;/*fin->fin_data[0];*/
2138                         nat->nat_call[0] = fin->fin_data[0];
2139                         nat->nat_call[1] = fin->fin_data[0];
2140                 }
2141 #endif
2142         }
2143         ni->nai_ip.s_addr = in.s_addr;
2144         ni->nai_port = port;
2145         ni->nai_nport = dport;
2146         return 0;
2147 }
2148
2149
2150 /* ------------------------------------------------------------------------ */
2151 /* Function:    nat_newrdr                                                  */
2152 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2153 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2154 /* Parameters:  fin(I) - pointer to packet information                      */
2155 /*              nat(I) - pointer to NAT entry                               */
2156 /*              ni(I)  - pointer to structure with misc. information needed */
2157 /*                       to create new NAT entry.                           */
2158 /*                                                                          */
2159 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2160 /* to the new IP address for the translation.                               */
2161 /* ------------------------------------------------------------------------ */
2162 static INLINE int nat_newrdr(fin, nat, ni)
2163 fr_info_t *fin;
2164 nat_t *nat;
2165 natinfo_t *ni;
2166 {
2167         u_short nport, dport, sport;
2168         struct in_addr in, inb;
2169         u_short sp, dp;
2170         hostmap_t *hm;
2171         u_32_t flags;
2172         ipnat_t *np;
2173         nat_t *natl;
2174         int move;
2175
2176         move = 1;
2177         hm = NULL;
2178         in.s_addr = 0;
2179         np = ni->nai_np;
2180         flags = ni->nai_flags;
2181         sport = ni->nai_sport;
2182         dport = ni->nai_dport;
2183
2184         /*
2185          * If the matching rule has IPN_STICKY set, then we want to have the
2186          * same rule kick in as before.  Why would this happen?  If you have
2187          * a collection of rdr rules with "round-robin sticky", the current
2188          * packet might match a different one to the previous connection but
2189          * we want the same destination to be used.
2190          */
2191         if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
2192             ((np->in_flags & IPN_STICKY) != 0)) {
2193                 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2194                                  (u_32_t)dport);
2195                 if (hm != NULL) {
2196                         in.s_addr = ntohl(hm->hm_mapip.s_addr);
2197                         np = hm->hm_ipnat;
2198                         ni->nai_np = np;
2199                         move = 0;
2200                 }
2201         }
2202
2203         /*
2204          * Otherwise, it's an inbound packet. Most likely, we don't
2205          * want to rewrite source ports and source addresses. Instead,
2206          * we want to rewrite to a fixed internal address and fixed
2207          * internal port.
2208          */
2209         if (np->in_flags & IPN_SPLIT) {
2210                 in.s_addr = np->in_nip;
2211
2212                 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2213                         hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst,
2214                                          in, (u_32_t)dport);
2215                         if (hm != NULL) {
2216                                 in.s_addr = hm->hm_mapip.s_addr;
2217                                 move = 0;
2218                         }
2219                 }
2220
2221                 if (hm == NULL || hm->hm_ref == 1) {
2222                         if (np->in_inip == htonl(in.s_addr)) {
2223                                 np->in_nip = ntohl(np->in_inmsk);
2224                                 move = 0;
2225                         } else {
2226                                 np->in_nip = ntohl(np->in_inip);
2227                         }
2228                 }
2229
2230         } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2231                 /*
2232                  * 0/32 - use the interface's IP address.
2233                  */
2234                 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2235                         return -1;
2236                 in.s_addr = ntohl(in.s_addr);
2237
2238         } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2239                 /*
2240                  * 0/0 - use the original destination address/port.
2241                  */
2242                 in.s_addr = ntohl(fin->fin_daddr);
2243
2244         } else if (np->in_redir == NAT_BIMAP &&
2245                    np->in_inmsk == np->in_outmsk) {
2246                 /*
2247                  * map the address block in a 1:1 fashion
2248                  */
2249                 in.s_addr = np->in_inip;
2250                 in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2251                 in.s_addr = ntohl(in.s_addr);
2252         } else {
2253                 in.s_addr = ntohl(np->in_inip);
2254         }
2255
2256         if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2257                 nport = dport;
2258         else {
2259                 /*
2260                  * Whilst not optimized for the case where
2261                  * pmin == pmax, the gain is not significant.
2262                  */
2263                 if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2264                     (np->in_pmin != np->in_pmax)) {
2265                         nport = ntohs(dport) - ntohs(np->in_pmin) +
2266                                 ntohs(np->in_pnext);
2267                         nport = htons(nport);
2268                 } else
2269                         nport = np->in_pnext;
2270         }
2271
2272         /*
2273          * When the redirect-to address is set to 0.0.0.0, just
2274          * assume a blank `forwarding' of the packet.  We don't
2275          * setup any translation for this either.
2276          */
2277         if (in.s_addr == 0) {
2278                 if (nport == dport)
2279                         return -1;
2280                 in.s_addr = ntohl(fin->fin_daddr);
2281         }
2282
2283         /*
2284          * Check to see if this redirect mapping already exists and if
2285          * it does, return "failure" (allowing it to be created will just
2286          * cause one or both of these "connections" to stop working.)
2287          */
2288         inb.s_addr = htonl(in.s_addr);
2289         sp = fin->fin_data[0];
2290         dp = fin->fin_data[1];
2291         fin->fin_data[1] = fin->fin_data[0];
2292         fin->fin_data[0] = ntohs(nport);
2293         natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2294                              (u_int)fin->fin_p, inb, fin->fin_src);
2295         fin->fin_data[0] = sp;
2296         fin->fin_data[1] = dp;
2297         if (natl != NULL)
2298                 return -1;
2299
2300         nat->nat_inip.s_addr = htonl(in.s_addr);
2301         nat->nat_outip = fin->fin_dst;
2302         nat->nat_oip = fin->fin_src;
2303         if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
2304                 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, in,
2305                                           (u_32_t)dport);
2306
2307         ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2308         ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2309
2310         ni->nai_ip.s_addr = in.s_addr;
2311         ni->nai_nport = nport;
2312         ni->nai_port = sport;
2313
2314         if (flags & IPN_TCPUDP) {
2315                 nat->nat_inport = nport;
2316                 nat->nat_outport = dport;
2317                 nat->nat_oport = sport;
2318                 ((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2319         } else if (flags & IPN_ICMPQUERY) {
2320                 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2321                 nat->nat_inport = nport;
2322                 nat->nat_outport = nport;
2323         } else if (fin->fin_p == IPPROTO_GRE) {
2324 #if 0
2325                 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2326                 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2327                         nat->nat_call[0] = fin->fin_data[0];
2328                         nat->nat_call[1] = fin->fin_data[1];
2329                         nat->nat_oport = 0; /*fin->fin_data[0];*/
2330                         nat->nat_inport = 0; /*fin->fin_data[1];*/
2331                         nat->nat_outport = 0; /*fin->fin_data[1];*/
2332                 }
2333 #endif
2334         }
2335
2336         return move;
2337 }
2338
2339 /* ------------------------------------------------------------------------ */
2340 /* Function:    nat_new                                                     */
2341 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2342 /*                       else pointer to new NAT structure                  */
2343 /* Parameters:  fin(I)       - pointer to packet information                */
2344 /*              np(I)        - pointer to NAT rule                          */
2345 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2346 /*              flags(I)     - flags describing the current packet          */
2347 /*              direction(I) - direction of packet (in/out)                 */
2348 /* Write Lock:  ipf_nat                                                     */
2349 /*                                                                          */
2350 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2351 /* in any way.                                                              */
2352 /*                                                                          */
2353 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2354 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2355 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2356 /* and (3) building that structure and putting it into the NAT table(s).    */
2357 /*                                                                          */
2358 /* NOTE: natsave should NOT be used top point back to an ipstate_t struct   */
2359 /*       as it can result in memory being corrupted.                        */
2360 /* ------------------------------------------------------------------------ */
2361 nat_t *nat_new(fin, np, natsave, flags, direction)
2362 fr_info_t *fin;
2363 ipnat_t *np;
2364 nat_t **natsave;
2365 u_int flags;
2366 int direction;
2367 {
2368         u_short port = 0, sport = 0, dport = 0, nport = 0;
2369         tcphdr_t *tcp = NULL;
2370         hostmap_t *hm = NULL;
2371         struct in_addr in;
2372         nat_t *nat, *natl;
2373         u_int nflags;
2374         natinfo_t ni;
2375         u_32_t sumd;
2376         int move;
2377 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2378         qpktinfo_t *qpi = fin->fin_qpi;
2379 #endif
2380
2381         if (nat_stats.ns_inuse >= ipf_nattable_max) {
2382                 nat_stats.ns_memfail++;
2383                 fr_nat_doflush = 1;
2384                 return NULL;
2385         }
2386
2387         move = 1;
2388         nflags = np->in_flags & flags;
2389         nflags &= NAT_FROMRULE;
2390
2391         ni.nai_np = np;
2392         ni.nai_nflags = nflags;
2393         ni.nai_flags = flags;
2394         ni.nai_dport = 0;
2395         ni.nai_sport = 0;
2396
2397         /* Give me a new nat */
2398         KMALLOC(nat, nat_t *);
2399         if (nat == NULL) {
2400                 nat_stats.ns_memfail++;
2401                 /*
2402                  * Try to automatically tune the max # of entries in the
2403                  * table allowed to be less than what will cause kmem_alloc()
2404                  * to fail and try to eliminate panics due to out of memory
2405                  * conditions arising.
2406                  */
2407                 if (ipf_nattable_max > ipf_nattable_sz) {
2408                         ipf_nattable_max = nat_stats.ns_inuse - 100;
2409                         printf("ipf_nattable_max reduced to %d\n",
2410                                 ipf_nattable_max);
2411                 }
2412                 return NULL;
2413         }
2414
2415         if (flags & IPN_TCPUDP) {
2416                 tcp = fin->fin_dp;
2417                 ni.nai_sport = htons(fin->fin_sport);
2418                 ni.nai_dport = htons(fin->fin_dport);
2419         } else if (flags & IPN_ICMPQUERY) {
2420                 /*
2421                  * In the ICMP query NAT code, we translate the ICMP id fields
2422                  * to make them unique. This is indepedent of the ICMP type
2423                  * (e.g. in the unlikely event that a host sends an echo and
2424                  * an tstamp request with the same id, both packets will have
2425                  * their ip address/id field changed in the same way).
2426                  */
2427                 /* The icmp_id field is used by the sender to identify the
2428                  * process making the icmp request. (the receiver justs
2429                  * copies it back in its response). So, it closely matches
2430                  * the concept of source port. We overlay sport, so we can
2431                  * maximally reuse the existing code.
2432                  */
2433                 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2434                 ni.nai_dport = ni.nai_sport;
2435         }
2436
2437         bzero((char *)nat, sizeof(*nat));
2438         nat->nat_flags = flags;
2439         nat->nat_redir = np->in_redir;
2440
2441         if ((flags & NAT_SLAVE) == 0) {
2442                 MUTEX_ENTER(&ipf_nat_new);
2443         }
2444
2445         /*
2446          * Search the current table for a match.
2447          */
2448         if (direction == NAT_OUTBOUND) {
2449                 /*
2450                  * We can now arrange to call this for the same connection
2451                  * because ipf_nat_new doesn't protect the code path into
2452                  * this function.
2453                  */
2454                 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2455                                      fin->fin_src, fin->fin_dst);
2456                 if (natl != NULL) {
2457                         KFREE(nat);
2458                         nat = natl;
2459                         goto done;
2460                 }
2461
2462                 move = nat_newmap(fin, nat, &ni);
2463                 if (move == -1)
2464                         goto badnat;
2465
2466                 np = ni.nai_np;
2467                 in = ni.nai_ip;
2468         } else {
2469                 /*
2470                  * NAT_INBOUND is used only for redirects rules
2471                  */
2472                 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2473                                     fin->fin_src, fin->fin_dst);
2474                 if (natl != NULL) {
2475                         KFREE(nat);
2476                         nat = natl;
2477                         goto done;
2478                 }
2479
2480                 move = nat_newrdr(fin, nat, &ni);
2481                 if (move == -1)
2482                         goto badnat;
2483
2484                 np = ni.nai_np;
2485                 in = ni.nai_ip;
2486         }
2487         port = ni.nai_port;
2488         nport = ni.nai_nport;
2489
2490         if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2491                 if (np->in_redir == NAT_REDIRECT) {
2492                         nat_delrdr(np);
2493                         nat_addrdr(np);
2494                 } else if (np->in_redir == NAT_MAP) {
2495                         nat_delnat(np);
2496                         nat_addnat(np);
2497                 }
2498         }
2499
2500         if (flags & IPN_TCPUDP) {
2501                 sport = ni.nai_sport;
2502                 dport = ni.nai_dport;
2503         } else if (flags & IPN_ICMPQUERY) {
2504                 sport = ni.nai_sport;
2505                 dport = 0;
2506         }
2507
2508         CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2509         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2510 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2511         if ((flags & IPN_TCP) && dohwcksum &&
2512             (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2513                 if (direction == NAT_OUTBOUND)
2514                         ni.nai_sum1 = LONG_SUM(in.s_addr);
2515                 else
2516                         ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2517                 ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2518                 ni.nai_sum1 += 30;
2519                 ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2520                 nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2521         } else
2522 #endif
2523                 nat->nat_sumd[1] = nat->nat_sumd[0];
2524
2525         if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2526                 if (direction == NAT_OUTBOUND)
2527                         ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2528                 else
2529                         ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2530
2531                 ni.nai_sum2 = LONG_SUM(in.s_addr);
2532
2533                 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2534                 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2535         } else {
2536                 nat->nat_ipsumd = nat->nat_sumd[0];
2537                 if (!(flags & IPN_TCPUDPICMP)) {
2538                         nat->nat_sumd[0] = 0;
2539                         nat->nat_sumd[1] = 0;
2540                 }
2541         }
2542
2543         if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2544                 fr_nat_doflush = 1;
2545                 goto badnat;
2546         }
2547         if (flags & SI_WILDP)
2548                 nat_stats.ns_wilds++;
2549         fin->fin_flx |= FI_NEWNAT;
2550         goto done;
2551 badnat:
2552         nat_stats.ns_badnat++;
2553         if ((hm = nat->nat_hm) != NULL)
2554                 fr_hostmapdel(&hm);
2555         KFREE(nat);
2556         nat = NULL;
2557 done:
2558         if ((flags & NAT_SLAVE) == 0) {
2559                 MUTEX_EXIT(&ipf_nat_new);
2560         }
2561         return nat;
2562 }
2563
2564
2565 /* ------------------------------------------------------------------------ */
2566 /* Function:    nat_finalise                                                */
2567 /* Returns:     int - 0 == sucess, -1 == failure                            */
2568 /* Parameters:  fin(I) - pointer to packet information                      */
2569 /*              nat(I) - pointer to NAT entry                               */
2570 /*              ni(I)  - pointer to structure with misc. information needed */
2571 /*                       to create new NAT entry.                           */
2572 /* Write Lock:  ipf_nat                                                     */
2573 /*                                                                          */
2574 /* This is the tail end of constructing a new NAT entry and is the same     */
2575 /* for both IPv4 and IPv6.                                                  */
2576 /* ------------------------------------------------------------------------ */
2577 /*ARGSUSED*/
2578 static int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2579 fr_info_t *fin;
2580 nat_t *nat;
2581 natinfo_t *ni;
2582 tcphdr_t *tcp;
2583 nat_t **natsave;
2584 int direction;
2585 {
2586         frentry_t *fr;
2587         ipnat_t *np;
2588
2589         np = ni->nai_np;
2590
2591         if (np->in_ifps[0] != NULL) {
2592                 COPYIFNAME(4, np->in_ifps[0], nat->nat_ifnames[0]);
2593         }
2594         if (np->in_ifps[1] != NULL) {
2595                 COPYIFNAME(4, np->in_ifps[1], nat->nat_ifnames[1]);
2596         }
2597 #ifdef  IPFILTER_SYNC
2598         if ((nat->nat_flags & SI_CLONE) == 0)
2599                 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2600 #endif
2601
2602         nat->nat_me = natsave;
2603         nat->nat_dir = direction;
2604         nat->nat_ifps[0] = np->in_ifps[0];
2605         nat->nat_ifps[1] = np->in_ifps[1];
2606         nat->nat_ptr = np;
2607         nat->nat_p = fin->fin_p;
2608         nat->nat_mssclamp = np->in_mssclamp;
2609         if (nat->nat_p == IPPROTO_TCP)
2610                 nat->nat_seqnext[0] = ntohl(tcp->th_seq);
2611
2612         if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2613                 if (appr_new(fin, nat) == -1)
2614                         return -1;
2615
2616         if (nat_insert(nat, fin->fin_rev) == 0) {
2617                 if (nat_logging)
2618                         nat_log(nat, (u_int)np->in_redir);
2619                 np->in_use++;
2620                 fr = fin->fin_fr;
2621                 nat->nat_fr = fr;
2622                 if (fr != NULL) {
2623                         MUTEX_ENTER(&fr->fr_lock);
2624                         fr->fr_ref++;
2625                         MUTEX_EXIT(&fr->fr_lock);
2626                 }
2627                 return 0;
2628         }
2629
2630         /*
2631          * nat_insert failed, so cleanup time...
2632          */
2633         return -1;
2634 }
2635
2636
2637 /* ------------------------------------------------------------------------ */
2638 /* Function:   nat_insert                                                   */
2639 /* Returns:    int - 0 == sucess, -1 == failure                             */
2640 /* Parameters: nat(I) - pointer to NAT structure                            */
2641 /*             rev(I) - flag indicating forward/reverse direction of packet */
2642 /* Write Lock: ipf_nat                                                      */
2643 /*                                                                          */
2644 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2645 /* list of active NAT entries.  Adjust global counters when complete.       */
2646 /* ------------------------------------------------------------------------ */
2647 int     nat_insert(nat, rev)
2648 nat_t   *nat;
2649 int     rev;
2650 {
2651         u_int hv1, hv2;
2652         nat_t **natp;
2653
2654         /*
2655          * Try and return an error as early as possible, so calculate the hash
2656          * entry numbers first and then proceed.
2657          */
2658         if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2659                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2660                                   0xffffffff);
2661                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2662                                   ipf_nattable_sz);
2663                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2664                                   0xffffffff);
2665                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2666                                   ipf_nattable_sz);
2667         } else {
2668                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2669                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2670                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2671                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2672         }
2673
2674         if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2675             nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2676                 return -1;
2677         }
2678
2679         nat->nat_hv[0] = hv1;
2680         nat->nat_hv[1] = hv2;
2681
2682         MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2683
2684         nat->nat_rev = rev;
2685         nat->nat_ref = 1;
2686         nat->nat_bytes[0] = 0;
2687         nat->nat_pkts[0] = 0;
2688         nat->nat_bytes[1] = 0;
2689         nat->nat_pkts[1] = 0;
2690
2691         nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2692         nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2693
2694         if (nat->nat_ifnames[1][0] != '\0') {
2695                 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2696                 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2697         } else {
2698                 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2699                                LIFNAMSIZ);
2700                 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2701                 nat->nat_ifps[1] = nat->nat_ifps[0];
2702         }
2703
2704         nat->nat_next = nat_instances;
2705         nat->nat_pnext = &nat_instances;
2706         if (nat_instances)
2707                 nat_instances->nat_pnext = &nat->nat_next;
2708         nat_instances = nat;
2709
2710         natp = &nat_table[0][hv1];
2711         if (*natp)
2712                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2713         nat->nat_phnext[0] = natp;
2714         nat->nat_hnext[0] = *natp;
2715         *natp = nat;
2716         nat_stats.ns_bucketlen[0][hv1]++;
2717
2718         natp = &nat_table[1][hv2];
2719         if (*natp)
2720                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2721         nat->nat_phnext[1] = natp;
2722         nat->nat_hnext[1] = *natp;
2723         *natp = nat;
2724         nat_stats.ns_bucketlen[1][hv2]++;
2725
2726         fr_setnatqueue(nat, rev);
2727
2728         nat_stats.ns_added++;
2729         nat_stats.ns_inuse++;
2730         return 0;
2731 }
2732
2733
2734 /* ------------------------------------------------------------------------ */
2735 /* Function:    nat_icmperrorlookup                                         */
2736 /* Returns:     nat_t* - point to matching NAT structure                    */
2737 /* Parameters:  fin(I) - pointer to packet information                      */
2738 /*              dir(I) - direction of packet (in/out)                       */
2739 /*                                                                          */
2740 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2741 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2742 /* the required length.                                                     */
2743 /* ------------------------------------------------------------------------ */
2744 nat_t *nat_icmperrorlookup(fin, dir)
2745 fr_info_t *fin;
2746 int dir;
2747 {
2748         int flags = 0, type, minlen;
2749         icmphdr_t *icmp, *orgicmp;
2750         tcphdr_t *tcp = NULL;
2751         u_short data[2];
2752         nat_t *nat;
2753         ip_t *oip;
2754         u_int p;
2755
2756         icmp = fin->fin_dp;
2757         type = icmp->icmp_type;
2758         /*
2759          * Does it at least have the return (basic) IP header ?
2760          * Only a basic IP header (no options) should be with an ICMP error
2761          * header.  Also, if it's not an error type, then return.
2762          */
2763         if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2764                 return NULL;
2765
2766         /*
2767          * Check packet size
2768          */
2769         oip = (ip_t *)((char *)fin->fin_dp + 8);
2770         minlen = IP_HL(oip) << 2;
2771         if ((minlen < sizeof(ip_t)) ||
2772             (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2773                 return NULL;
2774         /*
2775          * Is the buffer big enough for all of it ?  It's the size of the IP
2776          * header claimed in the encapsulated part which is of concern.  It
2777          * may be too big to be in this buffer but not so big that it's
2778          * outside the ICMP packet, leading to TCP deref's causing problems.
2779          * This is possible because we don't know how big oip_hl is when we
2780          * do the pullup early in fr_check() and thus can't gaurantee it is
2781          * all here now.
2782          */
2783 #ifdef  _KERNEL
2784         {
2785         mb_t *m;
2786
2787         m = fin->fin_m;
2788 # if defined(MENTAT)
2789         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2790                 return NULL;
2791 # else
2792         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2793             (char *)fin->fin_ip + M_LEN(m))
2794                 return NULL;
2795 # endif
2796         }
2797 #endif
2798
2799         if (fin->fin_daddr != oip->ip_src.s_addr)
2800                 return NULL;
2801
2802         p = oip->ip_p;
2803         if (p == IPPROTO_TCP)
2804                 flags = IPN_TCP;
2805         else if (p == IPPROTO_UDP)
2806                 flags = IPN_UDP;
2807         else if (p == IPPROTO_ICMP) {
2808                 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2809
2810                 /* see if this is related to an ICMP query */
2811                 if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2812                         data[0] = fin->fin_data[0];
2813                         data[1] = fin->fin_data[1];
2814                         fin->fin_data[0] = 0;
2815                         fin->fin_data[1] = orgicmp->icmp_id;
2816
2817                         flags = IPN_ICMPERR|IPN_ICMPQUERY;
2818                         /*
2819                          * NOTE : dir refers to the direction of the original
2820                          *        ip packet. By definition the icmp error
2821                          *        message flows in the opposite direction.
2822                          */
2823                         if (dir == NAT_INBOUND)
2824                                 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2825                                                    oip->ip_src);
2826                         else
2827                                 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2828                                                     oip->ip_src);
2829                         fin->fin_data[0] = data[0];
2830                         fin->fin_data[1] = data[1];
2831                         return nat;
2832                 }
2833         }
2834                 
2835         if (flags & IPN_TCPUDP) {
2836                 minlen += 8;            /* + 64bits of data to get ports */
2837                 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2838                         return NULL;
2839
2840                 data[0] = fin->fin_data[0];
2841                 data[1] = fin->fin_data[1];
2842                 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2843                 fin->fin_data[0] = ntohs(tcp->th_dport);
2844                 fin->fin_data[1] = ntohs(tcp->th_sport);
2845
2846                 if (dir == NAT_INBOUND) {
2847                         nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2848                                            oip->ip_src);
2849                 } else {
2850                         nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2851                                             oip->ip_src);
2852                 }
2853                 fin->fin_data[0] = data[0];
2854                 fin->fin_data[1] = data[1];
2855                 return nat;
2856         }
2857         if (dir == NAT_INBOUND)
2858                 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2859         else
2860                 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2861 }
2862
2863
2864 /* ------------------------------------------------------------------------ */
2865 /* Function:    nat_icmperror                                               */
2866 /* Returns:     nat_t* - point to matching NAT structure                    */
2867 /* Parameters:  fin(I)    - pointer to packet information                   */
2868 /*              nflags(I) - NAT flags for this packet                       */
2869 /*              dir(I)    - direction of packet (in/out)                    */
2870 /*                                                                          */
2871 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2872 /* session.  This will correct both packet header data and checksums.       */
2873 /*                                                                          */
2874 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2875 /* a NAT'd ICMP packet gets correctly recognised.                           */
2876 /* ------------------------------------------------------------------------ */
2877 nat_t *nat_icmperror(fin, nflags, dir)
2878 fr_info_t *fin;
2879 u_int *nflags;
2880 int dir;
2881 {
2882         u_32_t sum1, sum2, sumd, sumd2;
2883         struct in_addr a1, a2;
2884         int flags, dlen, odst;
2885         icmphdr_t *icmp;
2886         u_short *csump;
2887         tcphdr_t *tcp;
2888         nat_t *nat;
2889         ip_t *oip;
2890         void *dp;
2891
2892         if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2893                 return NULL;
2894         /*
2895          * nat_icmperrorlookup() will return NULL for `defective' packets.
2896          */
2897         if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2898                 return NULL;
2899
2900         tcp = NULL;
2901         csump = NULL;
2902         flags = 0;
2903         sumd2 = 0;
2904         *nflags = IPN_ICMPERR;
2905         icmp = fin->fin_dp;
2906         oip = (ip_t *)&icmp->icmp_ip;
2907         dp = (((char *)oip) + (IP_HL(oip) << 2));
2908         if (oip->ip_p == IPPROTO_TCP) {
2909                 tcp = (tcphdr_t *)dp;
2910                 csump = (u_short *)&tcp->th_sum;
2911                 flags = IPN_TCP;
2912         } else if (oip->ip_p == IPPROTO_UDP) {
2913                 udphdr_t *udp;
2914
2915                 udp = (udphdr_t *)dp;
2916                 tcp = (tcphdr_t *)dp;
2917                 csump = (u_short *)&udp->uh_sum;
2918                 flags = IPN_UDP;
2919         } else if (oip->ip_p == IPPROTO_ICMP)
2920                 flags = IPN_ICMPQUERY;
2921         dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2922
2923         /*
2924          * Need to adjust ICMP header to include the real IP#'s and
2925          * port #'s.  Only apply a checksum change relative to the
2926          * IP address change as it will be modified again in fr_checknatout
2927          * for both address and port.  Two checksum changes are
2928          * necessary for the two header address changes.  Be careful
2929          * to only modify the checksum once for the port # and twice
2930          * for the IP#.
2931          */
2932
2933         /*
2934          * Step 1
2935          * Fix the IP addresses in the offending IP packet. You also need
2936          * to adjust the IP header checksum of that offending IP packet.
2937          *
2938          * Normally, you would expect that the ICMP checksum of the
2939          * ICMP error message needs to be adjusted as well for the
2940          * IP address change in oip.
2941          * However, this is a NOP, because the ICMP checksum is
2942          * calculated over the complete ICMP packet, which includes the
2943          * changed oip IP addresses and oip->ip_sum. However, these
2944          * two changes cancel each other out (if the delta for
2945          * the IP address is x, then the delta for ip_sum is minus x),
2946          * so no change in the icmp_cksum is necessary.
2947          *
2948          * Inbound ICMP
2949          * ------------
2950          * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2951          * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2952          * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2953          *
2954          * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2955          * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2956          * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2957          *
2958          * Outbound ICMP
2959          * -------------
2960          * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2961          * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2962          * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2963          *
2964          * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2965          * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2966          * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2967          *
2968          */
2969         odst = (oip->ip_dst.s_addr == nat->nat_oip.s_addr) ? 1 : 0;
2970         if (odst == 1) {
2971                 a1.s_addr = ntohl(nat->nat_inip.s_addr);
2972                 a2.s_addr = ntohl(oip->ip_src.s_addr);
2973                 oip->ip_src.s_addr = htonl(a1.s_addr);
2974         } else {
2975                 a1.s_addr = ntohl(nat->nat_outip.s_addr);
2976                 a2.s_addr = ntohl(oip->ip_dst.s_addr);
2977                 oip->ip_dst.s_addr = htonl(a1.s_addr);
2978         }
2979
2980         sumd = a2.s_addr - a1.s_addr;
2981         if (sumd != 0) {
2982                 if (a1.s_addr > a2.s_addr)
2983                         sumd--;
2984                 sumd = ~sumd;
2985
2986                 fix_datacksum(&oip->ip_sum, sumd);
2987         }
2988
2989         sumd2 = sumd;
2990         sum1 = 0;
2991         sum2 = 0;
2992
2993         /*
2994          * Fix UDP pseudo header checksum to compensate for the
2995          * IP address change.
2996          */
2997         if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
2998                 /*
2999                  * Step 2 :
3000                  * For offending TCP/UDP IP packets, translate the ports as
3001                  * well, based on the NAT specification. Of course such
3002                  * a change may be reflected in the ICMP checksum as well.
3003                  *
3004                  * Since the port fields are part of the TCP/UDP checksum
3005                  * of the offending IP packet, you need to adjust that checksum
3006                  * as well... except that the change in the port numbers should 
3007                  * be offset by the checksum change.  However, the TCP/UDP
3008                  * checksum will also need to change if there has been an
3009                  * IP address change.
3010                  */
3011                 if (odst == 1) {
3012                         sum1 = ntohs(nat->nat_inport);
3013                         sum2 = ntohs(tcp->th_sport);
3014
3015                         tcp->th_sport = htons(sum1);
3016                 } else {
3017                         sum1 = ntohs(nat->nat_outport);
3018                         sum2 = ntohs(tcp->th_dport);
3019
3020                         tcp->th_dport = htons(sum1);
3021                 }
3022
3023                 sumd += sum1 - sum2;
3024                 if (sumd != 0 || sumd2 != 0) {
3025                         /*
3026                          * At this point, sumd is the delta to apply to the
3027                          * TCP/UDP header, given the changes in both the IP
3028                          * address and the ports and sumd2 is the delta to
3029                          * apply to the ICMP header, given the IP address
3030                          * change delta that may need to be applied to the
3031                          * TCP/UDP checksum instead.
3032                          *
3033                          * If we will both the IP and TCP/UDP checksums
3034                          * then the ICMP checksum changes by the address
3035                          * delta applied to the TCP/UDP checksum.  If we
3036                          * do not change the TCP/UDP checksum them we
3037                          * apply the delta in ports to the ICMP checksum.
3038                          */
3039                         if (oip->ip_p == IPPROTO_UDP) {
3040                                 if ((dlen >= 8) && (*csump != 0)) {
3041                                         fix_datacksum(csump, sumd);
3042                                 } else {
3043                                         sumd2 = sum1 - sum2;
3044                                         if (sum2 > sum1)
3045                                                 sumd2--;
3046                                 }
3047                         } else if (oip->ip_p == IPPROTO_TCP) {
3048                                 if (dlen >= 18) {
3049                                         fix_datacksum(csump, sumd);
3050                                 } else {
3051                                         sumd2 = sum2 - sum1;
3052                                         if (sum1 > sum2)
3053                                                 sumd2--;
3054                                 }
3055                         }
3056
3057                         if (sumd2 != 0) {
3058                                 ipnat_t *np;
3059
3060                                 np = nat->nat_ptr;
3061                                 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3062                                 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3063                                 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3064
3065                                 if ((odst == 0) && (dir == NAT_OUTBOUND) &&
3066                                     (fin->fin_rev == 0) && (np != NULL) &&
3067                                     (np->in_redir & NAT_REDIRECT)) {
3068                                         fix_outcksum(fin, &icmp->icmp_cksum,
3069                                                      sumd2);
3070                                 } else {
3071                                         fix_incksum(fin, &icmp->icmp_cksum,
3072                                                     sumd2);
3073                                 }
3074                         }
3075                 }
3076         } else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
3077                 icmphdr_t *orgicmp;
3078
3079                 /*
3080                  * XXX - what if this is bogus hl and we go off the end ?
3081                  * In this case, nat_icmperrorlookup() will have returned NULL.
3082                  */
3083                 orgicmp = (icmphdr_t *)dp;
3084
3085                 if (odst == 1) {
3086                         if (orgicmp->icmp_id != nat->nat_inport) {
3087
3088                                 /*
3089                                  * Fix ICMP checksum (of the offening ICMP
3090                                  * query packet) to compensate the change
3091                                  * in the ICMP id of the offending ICMP
3092                                  * packet.
3093                                  *
3094                                  * Since you modify orgicmp->icmp_id with
3095                                  * a delta (say x) and you compensate that
3096                                  * in origicmp->icmp_cksum with a delta
3097                                  * minus x, you don't have to adjust the
3098                                  * overall icmp->icmp_cksum
3099                                  */
3100                                 sum1 = ntohs(orgicmp->icmp_id);
3101                                 sum2 = ntohs(nat->nat_inport);
3102                                 CALC_SUMD(sum1, sum2, sumd);
3103                                 orgicmp->icmp_id = nat->nat_inport;
3104                                 fix_datacksum(&orgicmp->icmp_cksum, sumd);
3105                         }
3106                 } /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3107         }
3108         return nat;
3109 }
3110
3111
3112 /*
3113  * NB: these lookups don't lock access to the list, it assumed that it has
3114  * already been done!
3115  */
3116
3117 /* ------------------------------------------------------------------------ */
3118 /* Function:    nat_inlookup                                                */
3119 /* Returns:     nat_t* - NULL == no match,                                  */
3120 /*                       else pointer to matching NAT entry                 */
3121 /* Parameters:  fin(I)    - pointer to packet information                   */
3122 /*              flags(I)  - NAT flags for this packet                       */
3123 /*              p(I)      - protocol for this packet                        */
3124 /*              src(I)    - source IP address                               */
3125 /*              mapdst(I) - destination IP address                          */
3126 /*                                                                          */
3127 /* Lookup a nat entry based on the mapped destination ip address/port and   */
3128 /* real source address/port.  We use this lookup when receiving a packet,   */
3129 /* we're looking for a table entry, based on the destination address.       */
3130 /*                                                                          */
3131 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3132 /*                                                                          */
3133 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3134 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3135 /*                                                                          */
3136 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3137 /*            the packet is of said protocol                                */
3138 /* ------------------------------------------------------------------------ */
3139 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3140 fr_info_t *fin;
3141 u_int flags, p;
3142 struct in_addr src , mapdst;
3143 {
3144         u_short sport, dport;
3145         grehdr_t *gre;
3146         ipnat_t *ipn;
3147         u_int sflags;
3148         nat_t *nat;
3149         int nflags;
3150         u_32_t dst;
3151         void *ifp;
3152         u_int hv;
3153
3154         ifp = fin->fin_ifp;
3155         sport = 0;
3156         dport = 0;
3157         gre = NULL;
3158         dst = mapdst.s_addr;
3159         sflags = flags & NAT_TCPUDPICMP;
3160
3161         switch (p)
3162         {
3163         case IPPROTO_TCP :
3164         case IPPROTO_UDP :
3165                 sport = htons(fin->fin_data[0]);
3166                 dport = htons(fin->fin_data[1]);
3167                 break;
3168         case IPPROTO_ICMP :
3169                 if (flags & IPN_ICMPERR)
3170                         sport = fin->fin_data[1];
3171                 else
3172                         dport = fin->fin_data[1];
3173                 break;
3174         default :
3175                 break;
3176         }
3177
3178
3179         if ((flags & SI_WILDP) != 0)
3180                 goto find_in_wild_ports;
3181
3182         hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3183         hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3184         nat = nat_table[1][hv];
3185         for (; nat; nat = nat->nat_hnext[1]) {
3186                 if (nat->nat_ifps[0] != NULL) {
3187                         if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3188                                 continue;
3189                 } else if (ifp != NULL)
3190                         nat->nat_ifps[0] = ifp;
3191
3192                 nflags = nat->nat_flags;
3193
3194                 if (nat->nat_oip.s_addr == src.s_addr &&
3195                     nat->nat_outip.s_addr == dst &&
3196                     (((p == 0) &&
3197                       (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3198                      || (p == nat->nat_p))) {
3199                         switch (p)
3200                         {
3201 #if 0
3202                         case IPPROTO_GRE :
3203                                 if (nat->nat_call[1] != fin->fin_data[0])
3204                                         continue;
3205                                 break;
3206 #endif
3207                         case IPPROTO_ICMP :
3208                                 if ((flags & IPN_ICMPERR) != 0) {
3209                                         if (nat->nat_outport != sport)
3210                                                 continue;
3211                                 } else {
3212                                         if (nat->nat_outport != dport)
3213                                                 continue;
3214                                 }
3215                                 break;
3216                         case IPPROTO_TCP :
3217                         case IPPROTO_UDP :
3218                                 if (nat->nat_oport != sport)
3219                                         continue;
3220                                 if (nat->nat_outport != dport)
3221                                         continue;
3222                                 break;
3223                         default :
3224                                 break;
3225                         }
3226
3227                         ipn = nat->nat_ptr;
3228                         if ((ipn != NULL) && (nat->nat_aps != NULL))
3229                                 if (appr_match(fin, nat) != 0)
3230                                         continue;
3231                         return nat;
3232                 }
3233         }
3234
3235         /*
3236          * So if we didn't find it but there are wildcard members in the hash
3237          * table, go back and look for them.  We do this search and update here
3238          * because it is modifying the NAT table and we want to do this only
3239          * for the first packet that matches.  The exception, of course, is
3240          * for "dummy" (FI_IGNORE) lookups.
3241          */
3242 find_in_wild_ports:
3243         if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3244                 return NULL;
3245         if (nat_stats.ns_wilds == 0)
3246                 return NULL;
3247
3248         RWLOCK_EXIT(&ipf_nat);
3249
3250         hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3251         hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3252
3253         WRITE_ENTER(&ipf_nat);
3254
3255         nat = nat_table[1][hv];
3256         for (; nat; nat = nat->nat_hnext[1]) {
3257                 if (nat->nat_ifps[0] != NULL) {
3258                         if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3259                                 continue;
3260                 } else if (ifp != NULL)
3261                         nat->nat_ifps[0] = ifp;
3262
3263                 if (nat->nat_p != fin->fin_p)
3264                         continue;
3265                 if (nat->nat_oip.s_addr != src.s_addr ||
3266                     nat->nat_outip.s_addr != dst)
3267                         continue;
3268
3269                 nflags = nat->nat_flags;
3270                 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3271                         continue;
3272
3273                 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3274                                NAT_INBOUND) == 1) {
3275                         if ((fin->fin_flx & FI_IGNORE) != 0)
3276                                 break;
3277                         if ((nflags & SI_CLONE) != 0) {
3278                                 nat = fr_natclone(fin, nat);
3279                                 if (nat == NULL)
3280                                         break;
3281                         } else {
3282                                 MUTEX_ENTER(&ipf_nat_new);
3283                                 nat_stats.ns_wilds--;
3284                                 MUTEX_EXIT(&ipf_nat_new);
3285                         }
3286                         nat->nat_oport = sport;
3287                         nat->nat_outport = dport;
3288                         nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3289                         nat_tabmove(nat);
3290                         break;
3291                 }
3292         }
3293
3294         MUTEX_DOWNGRADE(&ipf_nat);
3295
3296         return nat;
3297 }
3298
3299
3300 /* ------------------------------------------------------------------------ */
3301 /* Function:    nat_tabmove                                                 */
3302 /* Returns:     Nil                                                         */
3303 /* Parameters:  nat(I) - pointer to NAT structure                           */
3304 /* Write Lock:  ipf_nat                                                     */
3305 /*                                                                          */
3306 /* This function is only called for TCP/UDP NAT table entries where the     */
3307 /* original was placed in the table without hashing on the ports and we now */
3308 /* want to include hashing on port numbers.                                 */
3309 /* ------------------------------------------------------------------------ */
3310 static void nat_tabmove(nat)
3311 nat_t *nat;
3312 {
3313         nat_t **natp;
3314         u_int hv;
3315
3316         if (nat->nat_flags & SI_CLONE)
3317                 return;
3318
3319         /*
3320          * Remove the NAT entry from the old location
3321          */
3322         if (nat->nat_hnext[0])
3323                 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3324         *nat->nat_phnext[0] = nat->nat_hnext[0];
3325         nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3326
3327         if (nat->nat_hnext[1])
3328                 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3329         *nat->nat_phnext[1] = nat->nat_hnext[1];
3330         nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3331
3332         /*
3333          * Add into the NAT table in the new position
3334          */
3335         hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3336         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3337                          ipf_nattable_sz);
3338         nat->nat_hv[0] = hv;
3339         natp = &nat_table[0][hv];
3340         if (*natp)
3341                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3342         nat->nat_phnext[0] = natp;
3343         nat->nat_hnext[0] = *natp;
3344         *natp = nat;
3345         nat_stats.ns_bucketlen[0][hv]++;
3346
3347         hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3348         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3349                          ipf_nattable_sz);
3350         nat->nat_hv[1] = hv;
3351         natp = &nat_table[1][hv];
3352         if (*natp)
3353                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3354         nat->nat_phnext[1] = natp;
3355         nat->nat_hnext[1] = *natp;
3356         *natp = nat;
3357         nat_stats.ns_bucketlen[1][hv]++;
3358 }
3359
3360
3361 /* ------------------------------------------------------------------------ */
3362 /* Function:    nat_outlookup                                               */
3363 /* Returns:     nat_t* - NULL == no match,                                  */
3364 /*                       else pointer to matching NAT entry                 */
3365 /* Parameters:  fin(I)   - pointer to packet information                    */
3366 /*              flags(I) - NAT flags for this packet                        */
3367 /*              p(I)     - protocol for this packet                         */
3368 /*              src(I)   - source IP address                                */
3369 /*              dst(I)   - destination IP address                           */
3370 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3371 /*                                                                          */
3372 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3373 /* destination address/port.  We use this lookup when sending a packet out, */
3374 /* we're looking for a table entry, based on the source address.            */
3375 /*                                                                          */
3376 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3377 /*                                                                          */
3378 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3379 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3380 /*                                                                          */
3381 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3382 /*            the packet is of said protocol                                */
3383 /* ------------------------------------------------------------------------ */
3384 nat_t *nat_outlookup(fin, flags, p, src, dst)
3385 fr_info_t *fin;
3386 u_int flags, p;
3387 struct in_addr src , dst;
3388 {
3389         u_short sport, dport;
3390         u_int sflags;
3391         ipnat_t *ipn;
3392         u_32_t srcip;
3393         nat_t *nat;
3394         int nflags;
3395         void *ifp;
3396         u_int hv;
3397
3398         ifp = fin->fin_ifp;
3399         srcip = src.s_addr;
3400         sflags = flags & IPN_TCPUDPICMP;
3401         sport = 0;
3402         dport = 0;
3403
3404         switch (p)
3405         {
3406         case IPPROTO_TCP :
3407         case IPPROTO_UDP :
3408                 sport = htons(fin->fin_data[0]);
3409                 dport = htons(fin->fin_data[1]);
3410                 break;
3411         case IPPROTO_ICMP :
3412                 if (flags & IPN_ICMPERR)
3413                         sport = fin->fin_data[1];
3414                 else
3415                         dport = fin->fin_data[1];
3416                 break;
3417         default :
3418                 break;
3419         }
3420
3421         if ((flags & SI_WILDP) != 0)
3422                 goto find_out_wild_ports;
3423
3424         hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3425         hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3426         nat = nat_table[0][hv];
3427         for (; nat; nat = nat->nat_hnext[0]) {
3428                 if (nat->nat_ifps[1] != NULL) {
3429                         if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3430                                 continue;
3431                 } else if (ifp != NULL)
3432                         nat->nat_ifps[1] = ifp;
3433
3434                 nflags = nat->nat_flags;
3435
3436                 if (nat->nat_inip.s_addr == srcip &&
3437                     nat->nat_oip.s_addr == dst.s_addr &&
3438                     (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3439                      || (p == nat->nat_p))) {
3440                         switch (p)
3441                         {
3442 #if 0
3443                         case IPPROTO_GRE :
3444                                 if (nat->nat_call[1] != fin->fin_data[0])
3445                                         continue;
3446                                 break;
3447 #endif
3448                         case IPPROTO_TCP :
3449                         case IPPROTO_UDP :
3450                                 if (nat->nat_oport != dport)
3451                                         continue;
3452                                 if (nat->nat_inport != sport)
3453                                         continue;
3454                                 break;
3455                         default :
3456                                 break;
3457                         }
3458
3459                         ipn = nat->nat_ptr;
3460                         if ((ipn != NULL) && (nat->nat_aps != NULL))
3461                                 if (appr_match(fin, nat) != 0)
3462                                         continue;
3463                         return nat;
3464                 }
3465         }
3466
3467         /*
3468          * So if we didn't find it but there are wildcard members in the hash
3469          * table, go back and look for them.  We do this search and update here
3470          * because it is modifying the NAT table and we want to do this only
3471          * for the first packet that matches.  The exception, of course, is
3472          * for "dummy" (FI_IGNORE) lookups.
3473          */
3474 find_out_wild_ports:
3475         if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3476                 return NULL;
3477         if (nat_stats.ns_wilds == 0)
3478                 return NULL;
3479
3480         RWLOCK_EXIT(&ipf_nat);
3481
3482         hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3483         hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3484
3485         WRITE_ENTER(&ipf_nat);
3486
3487         nat = nat_table[0][hv];
3488         for (; nat; nat = nat->nat_hnext[0]) {
3489                 if (nat->nat_ifps[1] != NULL) {
3490                         if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3491                                 continue;
3492                 } else if (ifp != NULL)
3493                         nat->nat_ifps[1] = ifp;
3494
3495                 if (nat->nat_p != fin->fin_p)
3496                         continue;
3497                 if ((nat->nat_inip.s_addr != srcip) ||
3498                     (nat->nat_oip.s_addr != dst.s_addr))
3499                         continue;
3500
3501                 nflags = nat->nat_flags;
3502                 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3503                         continue;
3504
3505                 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3506                                NAT_OUTBOUND) == 1) {
3507                         if ((fin->fin_flx & FI_IGNORE) != 0)
3508                                 break;
3509                         if ((nflags & SI_CLONE) != 0) {
3510                                 nat = fr_natclone(fin, nat);
3511                                 if (nat == NULL)
3512                                         break;
3513                         } else {
3514                                 MUTEX_ENTER(&ipf_nat_new);
3515                                 nat_stats.ns_wilds--;
3516                                 MUTEX_EXIT(&ipf_nat_new);
3517                         }
3518                         nat->nat_inport = sport;
3519                         nat->nat_oport = dport;
3520                         if (nat->nat_outport == 0)
3521                                 nat->nat_outport = sport;
3522                         nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3523                         nat_tabmove(nat);
3524                         break;
3525                 }
3526         }
3527
3528         MUTEX_DOWNGRADE(&ipf_nat);
3529
3530         return nat;
3531 }
3532
3533
3534 /* ------------------------------------------------------------------------ */
3535 /* Function:    nat_lookupredir                                             */
3536 /* Returns:     nat_t* - NULL == no match,                                  */
3537 /*                       else pointer to matching NAT entry                 */
3538 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3539 /*                      entry for.                                          */
3540 /*                                                                          */
3541 /* Lookup the NAT tables to search for a matching redirect                  */
3542 /* The contents of natlookup_t should imitate those found in a packet that  */
3543 /* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3544 /* We can do the lookup in one of two ways, imitating an inbound or         */
3545 /* outbound  packet.  By default we assume outbound, unless IPN_IN is set.  */
3546 /* For IN, the fields are set as follows:                                   */
3547 /*     nl_real* = source information                                        */
3548 /*     nl_out* = destination information (translated)                       */
3549 /* For an out packet, the fields are set like this:                         */
3550 /*     nl_in* = source information (untranslated)                           */
3551 /*     nl_out* = destination information (translated)                       */
3552 /* ------------------------------------------------------------------------ */
3553 nat_t *nat_lookupredir(np)
3554 natlookup_t *np;
3555 {
3556         fr_info_t fi;
3557         nat_t *nat;
3558
3559         bzero((char *)&fi, sizeof(fi));
3560         if (np->nl_flags & IPN_IN) {
3561                 fi.fin_data[0] = ntohs(np->nl_realport);
3562                 fi.fin_data[1] = ntohs(np->nl_outport);
3563         } else {
3564                 fi.fin_data[0] = ntohs(np->nl_inport);
3565                 fi.fin_data[1] = ntohs(np->nl_outport);
3566         }
3567         if (np->nl_flags & IPN_TCP)
3568                 fi.fin_p = IPPROTO_TCP;
3569         else if (np->nl_flags & IPN_UDP)
3570                 fi.fin_p = IPPROTO_UDP;
3571         else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3572                 fi.fin_p = IPPROTO_ICMP;
3573
3574         /*
3575          * We can do two sorts of lookups:
3576          * - IPN_IN: we have the `real' and `out' address, look for `in'.
3577          * - default: we have the `in' and `out' address, look for `real'.
3578          */
3579         if (np->nl_flags & IPN_IN) {
3580                 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3581                                         np->nl_realip, np->nl_outip))) {
3582                         np->nl_inip = nat->nat_inip;
3583                         np->nl_inport = nat->nat_inport;
3584                 }
3585         } else {
3586                 /*
3587                  * If nl_inip is non null, this is a lookup based on the real
3588                  * ip address. Else, we use the fake.
3589                  */
3590                 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3591                                          np->nl_inip, np->nl_outip))) {
3592
3593                         if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3594                                 fr_info_t fin;
3595                                 bzero((char *)&fin, sizeof(fin));
3596                                 fin.fin_p = nat->nat_p;
3597                                 fin.fin_data[0] = ntohs(nat->nat_outport);
3598                                 fin.fin_data[1] = ntohs(nat->nat_oport);
3599                                 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3600                                                  nat->nat_outip,
3601                                                  nat->nat_oip) != NULL) {
3602                                         np->nl_flags &= ~IPN_FINDFORWARD;
3603                                 }
3604                         }
3605
3606                         np->nl_realip = nat->nat_outip;
3607                         np->nl_realport = nat->nat_outport;
3608                 }
3609         }
3610
3611         return nat;
3612 }
3613
3614
3615 /* ------------------------------------------------------------------------ */
3616 /* Function:    nat_match                                                   */
3617 /* Returns:     int - 0 == no match, 1 == match                             */
3618 /* Parameters:  fin(I)   - pointer to packet information                    */
3619 /*              np(I)    - pointer to NAT rule                              */
3620 /*                                                                          */
3621 /* Pull the matching of a packet against a NAT rule out of that complex     */
3622 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3623 /* ------------------------------------------------------------------------ */
3624 static int nat_match(fin, np)
3625 fr_info_t *fin;
3626 ipnat_t *np;
3627 {
3628         frtuc_t *ft;
3629
3630         if (fin->fin_v != 4)
3631                 return 0;
3632
3633         if (np->in_p && fin->fin_p != np->in_p)
3634                 return 0;
3635
3636         if (fin->fin_out) {
3637                 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3638                         return 0;
3639                 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3640                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
3641                         return 0;
3642                 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3643                     ^ ((np->in_flags & IPN_NOTDST) != 0))
3644                         return 0;
3645         } else {
3646                 if (!(np->in_redir & NAT_REDIRECT))
3647                         return 0;
3648                 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3649                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
3650                         return 0;
3651                 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3652                     ^ ((np->in_flags & IPN_NOTDST) != 0))
3653                         return 0;
3654         }
3655
3656         ft = &np->in_tuc;
3657         if (!(fin->fin_flx & FI_TCPUDP) ||
3658             (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3659                 if (ft->ftu_scmp || ft->ftu_dcmp)
3660                         return 0;
3661                 return 1;
3662         }
3663
3664         return fr_tcpudpchk(fin, ft);
3665 }
3666
3667
3668 /* ------------------------------------------------------------------------ */
3669 /* Function:    nat_update                                                  */
3670 /* Returns:     Nil                                                         */
3671 /* Parameters:  nat(I)    - pointer to NAT structure                        */
3672 /*              np(I)     - pointer to NAT rule                             */
3673 /*                                                                          */
3674 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3675 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3676 /* ------------------------------------------------------------------------ */
3677 void nat_update(fin, nat, np)
3678 fr_info_t *fin;
3679 nat_t *nat;
3680 ipnat_t *np;
3681 {
3682         ipftq_t *ifq, *ifq2;
3683         ipftqent_t *tqe;
3684
3685         MUTEX_ENTER(&nat->nat_lock);
3686         tqe = &nat->nat_tqe;
3687         ifq = tqe->tqe_ifq;
3688
3689         /*
3690          * We allow over-riding of NAT timeouts from NAT rules, even for
3691          * TCP, however, if it is TCP and there is no rule timeout set,
3692          * then do not update the timeout here.
3693          */
3694         if (np != NULL)
3695                 ifq2 = np->in_tqehead[fin->fin_rev];
3696         else
3697                 ifq2 = NULL;
3698
3699         if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3700                 u_32_t end, ack;
3701                 u_char tcpflags;
3702                 tcphdr_t *tcp;
3703                 int dsize;
3704
3705                 tcp = fin->fin_dp;
3706                 tcpflags = tcp->th_flags;
3707                 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
3708                         ((tcpflags & TH_SYN) ? 1 : 0) +
3709                         ((tcpflags & TH_FIN) ? 1 : 0);
3710
3711                 ack = ntohl(tcp->th_ack);
3712                 end = ntohl(tcp->th_seq) + dsize;
3713
3714                 if (SEQ_GT(ack, nat->nat_seqnext[1 - fin->fin_rev]))
3715                         nat->nat_seqnext[1 - fin->fin_rev] = ack;
3716
3717                 if (nat->nat_seqnext[fin->fin_rev] == 0)
3718                         nat->nat_seqnext[fin->fin_rev] = end;
3719
3720                 (void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3721         } else {
3722                 if (ifq2 == NULL) {
3723                         if (nat->nat_p == IPPROTO_UDP)
3724                                 ifq2 = &nat_udptq;
3725                         else if (nat->nat_p == IPPROTO_ICMP)
3726                                 ifq2 = &nat_icmptq;
3727                         else
3728                                 ifq2 = &nat_iptq;
3729                 }
3730
3731                 fr_movequeue(tqe, ifq, ifq2);
3732         }
3733         MUTEX_EXIT(&nat->nat_lock);
3734 }
3735
3736
3737 /* ------------------------------------------------------------------------ */
3738 /* Function:    fr_checknatout                                              */
3739 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3740 /*                     0 == no packet translation occurred,                 */
3741 /*                     1 == packet was successfully translated.             */
3742 /* Parameters:  fin(I)   - pointer to packet information                    */
3743 /*              passp(I) - pointer to filtering result flags                */
3744 /*                                                                          */
3745 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3746 /* first checked to see if they match an existing entry (if an error),      */
3747 /* otherwise a search of the current NAT table is made.  If neither results */
3748 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3749 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3750 /* packet header(s) as required.                                            */
3751 /* ------------------------------------------------------------------------ */
3752 int fr_checknatout(fin, passp)
3753 fr_info_t *fin;
3754 u_32_t *passp;
3755 {
3756         struct ifnet *ifp, *sifp;
3757         icmphdr_t *icmp = NULL;
3758         tcphdr_t *tcp = NULL;
3759         int rval, natfailed;
3760         ipnat_t *np = NULL;
3761         u_int nflags = 0;
3762         u_32_t ipa, iph;
3763         int natadd = 1;
3764         frentry_t *fr;
3765         nat_t *nat;
3766
3767         if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
3768                 return 0;
3769
3770         natfailed = 0;
3771         fr = fin->fin_fr;
3772         sifp = fin->fin_ifp;
3773         if (fr != NULL) {
3774                 ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3775                 if ((ifp != NULL) && (ifp != (void *)-1))
3776                         fin->fin_ifp = ifp;
3777         }
3778         ifp = fin->fin_ifp;
3779
3780         if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3781                 switch (fin->fin_p)
3782                 {
3783                 case IPPROTO_TCP :
3784                         nflags = IPN_TCP;
3785                         break;
3786                 case IPPROTO_UDP :
3787                         nflags = IPN_UDP;
3788                         break;
3789                 case IPPROTO_ICMP :
3790                         icmp = fin->fin_dp;
3791
3792                         /*
3793                          * This is an incoming packet, so the destination is
3794                          * the icmp_id and the source port equals 0
3795                          */
3796                         if (nat_icmpquerytype4(icmp->icmp_type))
3797                                 nflags = IPN_ICMPQUERY;
3798                         break;
3799                 default :
3800                         break;
3801                 }
3802                 
3803                 if ((nflags & IPN_TCPUDP))
3804                         tcp = fin->fin_dp;
3805         }
3806
3807         ipa = fin->fin_saddr;
3808
3809         READ_ENTER(&ipf_nat);
3810
3811         if (((fin->fin_flx & FI_ICMPERR) != 0) &&
3812             (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3813                 /*EMPTY*/;
3814         else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3815                 natadd = 0;
3816         else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3817                                       fin->fin_src, fin->fin_dst))) {
3818                 nflags = nat->nat_flags;
3819         } else {
3820                 u_32_t hv, msk, nmsk;
3821
3822                 /*
3823                  * If there is no current entry in the nat table for this IP#,
3824                  * create one for it (if there is a matching rule).
3825                  */
3826                 RWLOCK_EXIT(&ipf_nat);
3827                 msk = 0xffffffff;
3828                 nmsk = nat_masks;
3829                 WRITE_ENTER(&ipf_nat);
3830 maskloop:
3831                 iph = ipa & htonl(msk);
3832                 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3833                 for (np = nat_rules[hv]; np; np = np->in_mnext)
3834                 {
3835                         if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3836                                 continue;
3837                         if (np->in_v != fin->fin_v)
3838                                 continue;
3839                         if (np->in_p && (np->in_p != fin->fin_p))
3840                                 continue;
3841                         if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3842                                 continue;
3843                         if (np->in_flags & IPN_FILTER) {
3844                                 if (!nat_match(fin, np))
3845                                         continue;
3846                         } else if ((ipa & np->in_inmsk) != np->in_inip)
3847                                 continue;
3848
3849                         if ((fr != NULL) &&
3850                             !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3851                                 continue;
3852
3853                         if (*np->in_plabel != '\0') {
3854                                 if (((np->in_flags & IPN_FILTER) == 0) &&
3855                                     (np->in_dport != tcp->th_dport))
3856                                         continue;
3857                                 if (appr_ok(fin, tcp, np) == 0)
3858                                         continue;
3859                         }
3860
3861                         if ((nat = nat_new(fin, np, NULL, nflags,
3862                                            NAT_OUTBOUND))) {
3863                                 np->in_hits++;
3864                                 break;
3865                         } else
3866                                 natfailed = -1;
3867                 }
3868                 if ((np == NULL) && (nmsk != 0)) {
3869                         while (nmsk) {
3870                                 msk <<= 1;
3871                                 if (nmsk & 0x80000000)
3872                                         break;
3873                                 nmsk <<= 1;
3874                         }
3875                         if (nmsk != 0) {
3876                                 nmsk <<= 1;
3877                                 goto maskloop;
3878                         }
3879                 }
3880                 MUTEX_DOWNGRADE(&ipf_nat);
3881         }
3882
3883         if (nat != NULL) {
3884                 rval = fr_natout(fin, nat, natadd, nflags);
3885                 if (rval == 1) {
3886                         MUTEX_ENTER(&nat->nat_lock);
3887                         nat->nat_ref++;
3888                         MUTEX_EXIT(&nat->nat_lock);
3889                         nat->nat_touched = fr_ticks;
3890                         fin->fin_nat = nat;
3891                 }
3892         } else
3893                 rval = natfailed;
3894         RWLOCK_EXIT(&ipf_nat);
3895
3896         if (rval == -1) {
3897                 if (passp != NULL)
3898                         *passp = FR_BLOCK;
3899                 fin->fin_flx |= FI_BADNAT;
3900         }
3901         fin->fin_ifp = sifp;
3902         return rval;
3903 }
3904
3905 /* ------------------------------------------------------------------------ */
3906 /* Function:    fr_natout                                                   */
3907 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3908 /*                     1 == packet was successfully translated.             */
3909 /* Parameters:  fin(I)    - pointer to packet information                   */
3910 /*              nat(I)    - pointer to NAT structure                        */
3911 /*              natadd(I) - flag indicating if it is safe to add frag cache */
3912 /*              nflags(I) - NAT flags set for this packet                   */
3913 /*                                                                          */
3914 /* Translate a packet coming "out" on an interface.                         */
3915 /* ------------------------------------------------------------------------ */
3916 int fr_natout(fin, nat, natadd, nflags)
3917 fr_info_t *fin;
3918 nat_t *nat;
3919 int natadd;
3920 u_32_t nflags;
3921 {
3922         icmphdr_t *icmp;
3923         u_short *csump;
3924         tcphdr_t *tcp;
3925         ipnat_t *np;
3926         int i;
3927
3928         tcp = NULL;
3929         icmp = NULL;
3930         csump = NULL;
3931         np = nat->nat_ptr;
3932
3933         if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3934                 (void) fr_nat_newfrag(fin, 0, nat);
3935
3936         MUTEX_ENTER(&nat->nat_lock);
3937         nat->nat_bytes[1] += fin->fin_plen;
3938         nat->nat_pkts[1]++;
3939         MUTEX_EXIT(&nat->nat_lock);
3940
3941         /*
3942          * Fix up checksums, not by recalculating them, but
3943          * simply computing adjustments.
3944          * This is only done for STREAMS based IP implementations where the
3945          * checksum has already been calculated by IP.  In all other cases,
3946          * IPFilter is called before the checksum needs calculating so there
3947          * is no call to modify whatever is in the header now.
3948          */
3949         if (fin->fin_v == 4) {
3950                 if (nflags == IPN_ICMPERR) {
3951                         u_32_t s1, s2, sumd;
3952
3953                         s1 = LONG_SUM(ntohl(fin->fin_saddr));
3954                         s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3955                         CALC_SUMD(s1, s2, sumd);
3956                         fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3957                 }
3958 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3959     defined(linux) || defined(BRIDGE_IPF)
3960                 else {
3961                         /*
3962                          * Strictly speaking, this isn't necessary on BSD
3963                          * kernels because they do checksum calculation after
3964                          * this code has run BUT if ipfilter is being used
3965                          * to do NAT as a bridge, that code doesn't exist.
3966                          */
3967                         if (nat->nat_dir == NAT_OUTBOUND)
3968                                 fix_outcksum(fin, &fin->fin_ip->ip_sum,
3969                                              nat->nat_ipsumd);
3970                         else
3971                                 fix_incksum(fin, &fin->fin_ip->ip_sum,
3972                                             nat->nat_ipsumd);
3973                 }
3974 #endif
3975         }
3976
3977         if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3978                 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3979                         tcp = fin->fin_dp;
3980
3981                         tcp->th_sport = nat->nat_outport;
3982                         fin->fin_data[0] = ntohs(nat->nat_outport);
3983                 }
3984
3985                 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3986                         icmp = fin->fin_dp;
3987                         icmp->icmp_id = nat->nat_outport;
3988                 }
3989
3990                 csump = nat_proto(fin, nat, nflags);
3991         }
3992
3993         fin->fin_ip->ip_src = nat->nat_outip;
3994
3995         nat_update(fin, nat, np);
3996
3997         /*
3998          * The above comments do not hold for layer 4 (or higher) checksums...
3999          */
4000         if (csump != NULL) {
4001                 if (nat->nat_dir == NAT_OUTBOUND)
4002                         fix_outcksum(fin, csump, nat->nat_sumd[1]);
4003                 else
4004                         fix_incksum(fin, csump, nat->nat_sumd[1]);
4005         }
4006 #ifdef  IPFILTER_SYNC
4007         ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4008 #endif
4009         /* ------------------------------------------------------------- */
4010         /* A few quick notes:                                            */
4011         /*      Following are test conditions prior to calling the       */
4012         /*      appr_check routine.                                      */
4013         /*                                                               */
4014         /*      A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4015         /*      with a redirect rule, we attempt to match the packet's   */
4016         /*      source port against in_dport, otherwise we'd compare the */
4017         /*      packet's destination.                                    */
4018         /* ------------------------------------------------------------- */
4019         if ((np != NULL) && (np->in_apr != NULL)) {
4020                 i = appr_check(fin, nat);
4021                 if (i == 0)
4022                         i = 1;
4023         } else
4024                 i = 1;
4025         ATOMIC_INCL(nat_stats.ns_mapped[1]);
4026         fin->fin_flx |= FI_NATED;
4027         return i;
4028 }
4029
4030
4031 /* ------------------------------------------------------------------------ */
4032 /* Function:    fr_checknatin                                               */
4033 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4034 /*                     0 == no packet translation occurred,                 */
4035 /*                     1 == packet was successfully translated.             */
4036 /* Parameters:  fin(I)   - pointer to packet information                    */
4037 /*              passp(I) - pointer to filtering result flags                */
4038 /*                                                                          */
4039 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
4040 /* first checked to see if they match an existing entry (if an error),      */
4041 /* otherwise a search of the current NAT table is made.  If neither results */
4042 /* in a match then a search for a matching NAT rule is made.  Create a new  */
4043 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4044 /* packet header(s) as required.                                            */
4045 /* ------------------------------------------------------------------------ */
4046 int fr_checknatin(fin, passp)
4047 fr_info_t *fin;
4048 u_32_t *passp;
4049 {
4050         u_int nflags, natadd;
4051         int rval, natfailed;
4052         struct ifnet *ifp;
4053         struct in_addr in;
4054         icmphdr_t *icmp;
4055         tcphdr_t *tcp;
4056         u_short dport;
4057         ipnat_t *np;
4058         nat_t *nat;
4059         u_32_t iph;
4060
4061         if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
4062                 return 0;
4063
4064         tcp = NULL;
4065         icmp = NULL;
4066         dport = 0;
4067         natadd = 1;
4068         nflags = 0;
4069         natfailed = 0;
4070         ifp = fin->fin_ifp;
4071
4072         if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4073                 switch (fin->fin_p)
4074                 {
4075                 case IPPROTO_TCP :
4076                         nflags = IPN_TCP;
4077                         break;
4078                 case IPPROTO_UDP :
4079                         nflags = IPN_UDP;
4080                         break;
4081                 case IPPROTO_ICMP :
4082                         icmp = fin->fin_dp;
4083
4084                         /*
4085                          * This is an incoming packet, so the destination is
4086                          * the icmp_id and the source port equals 0
4087                          */
4088                         if (nat_icmpquerytype4(icmp->icmp_type)) {
4089                                 nflags = IPN_ICMPQUERY;
4090                                 dport = icmp->icmp_id;  
4091                         } break;
4092                 default :
4093                         break;
4094                 }
4095                 
4096                 if ((nflags & IPN_TCPUDP)) {
4097                         tcp = fin->fin_dp;
4098                         dport = tcp->th_dport;
4099                 }
4100         }
4101
4102         in = fin->fin_dst;
4103
4104         READ_ENTER(&ipf_nat);
4105
4106         if (((fin->fin_flx & FI_ICMPERR) != 0) &&
4107             (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4108                 /*EMPTY*/;
4109         else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4110                 natadd = 0;
4111         else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4112                                      fin->fin_src, in))) {
4113                 nflags = nat->nat_flags;
4114         } else {
4115                 u_32_t hv, msk, rmsk;
4116
4117                 RWLOCK_EXIT(&ipf_nat);
4118                 rmsk = rdr_masks;
4119                 msk = 0xffffffff;
4120                 WRITE_ENTER(&ipf_nat);
4121                 /*
4122                  * If there is no current entry in the nat table for this IP#,
4123                  * create one for it (if there is a matching rule).
4124                  */
4125 maskloop:
4126                 iph = in.s_addr & htonl(msk);
4127                 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
4128                 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
4129                         if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4130                                 continue;
4131                         if (np->in_v != fin->fin_v)
4132                                 continue;
4133                         if (np->in_p && (np->in_p != fin->fin_p))
4134                                 continue;
4135                         if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4136                                 continue;
4137                         if (np->in_flags & IPN_FILTER) {
4138                                 if (!nat_match(fin, np))
4139                                         continue;
4140                         } else {
4141                                 if ((in.s_addr & np->in_outmsk) != np->in_outip)
4142                                         continue;
4143                                 if (np->in_pmin &&
4144                                     ((ntohs(np->in_pmax) < ntohs(dport)) ||
4145                                      (ntohs(dport) < ntohs(np->in_pmin))))
4146                                         continue;
4147                         }
4148
4149                         if (*np->in_plabel != '\0') {
4150                                 if (!appr_ok(fin, tcp, np)) {
4151                                         continue;
4152                                 }
4153                         }
4154
4155                         nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4156                         if (nat != NULL) {
4157                                 np->in_hits++;
4158                                 break;
4159                         } else
4160                                 natfailed = -1;
4161                 }
4162
4163                 if ((np == NULL) && (rmsk != 0)) {
4164                         while (rmsk) {
4165                                 msk <<= 1;
4166                                 if (rmsk & 0x80000000)
4167                                         break;
4168                                 rmsk <<= 1;
4169                         }
4170                         if (rmsk != 0) {
4171                                 rmsk <<= 1;
4172                                 goto maskloop;
4173                         }
4174                 }
4175                 MUTEX_DOWNGRADE(&ipf_nat);
4176         }
4177         if (nat != NULL) {
4178                 rval = fr_natin(fin, nat, natadd, nflags);
4179                 if (rval == 1) {
4180                         MUTEX_ENTER(&nat->nat_lock);
4181                         nat->nat_ref++;
4182                         MUTEX_EXIT(&nat->nat_lock);
4183                         nat->nat_touched = fr_ticks;
4184                         fin->fin_nat = nat;
4185                 }
4186         } else
4187                 rval = natfailed;
4188         RWLOCK_EXIT(&ipf_nat);
4189
4190         if (rval == -1) {
4191                 if (passp != NULL)
4192                         *passp = FR_BLOCK;
4193                 fin->fin_flx |= FI_BADNAT;
4194         }
4195         return rval;
4196 }
4197
4198
4199 /* ------------------------------------------------------------------------ */
4200 /* Function:    fr_natin                                                    */
4201 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4202 /*                     1 == packet was successfully translated.             */
4203 /* Parameters:  fin(I)    - pointer to packet information                   */
4204 /*              nat(I)    - pointer to NAT structure                        */
4205 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4206 /*              nflags(I) - NAT flags set for this packet                   */
4207 /* Locks Held:  ipf_nat (READ)                                              */
4208 /*                                                                          */
4209 /* Translate a packet coming "in" on an interface.                          */
4210 /* ------------------------------------------------------------------------ */
4211 int fr_natin(fin, nat, natadd, nflags)
4212 fr_info_t *fin;
4213 nat_t *nat;
4214 int natadd;
4215 u_32_t nflags;
4216 {
4217         icmphdr_t *icmp;
4218         u_short *csump;
4219         tcphdr_t *tcp;
4220         ipnat_t *np;
4221         int i;
4222
4223         tcp = NULL;
4224         csump = NULL;
4225         np = nat->nat_ptr;
4226         fin->fin_fr = nat->nat_fr;
4227
4228         if (np != NULL) {
4229                 if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4230                         (void) fr_nat_newfrag(fin, 0, nat);
4231
4232         /* ------------------------------------------------------------- */
4233         /* A few quick notes:                                            */
4234         /*      Following are test conditions prior to calling the       */
4235         /*      appr_check routine.                                      */
4236         /*                                                               */
4237         /*      A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4238         /*      with a map rule, we attempt to match the packet's        */
4239         /*      source port against in_dport, otherwise we'd compare the */
4240         /*      packet's destination.                                    */
4241         /* ------------------------------------------------------------- */
4242                 if (np->in_apr != NULL) {
4243                         i = appr_check(fin, nat);
4244                         if (i == -1) {
4245                                 return -1;
4246                         }
4247                 }
4248         }
4249
4250 #ifdef  IPFILTER_SYNC
4251         ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4252 #endif
4253
4254         MUTEX_ENTER(&nat->nat_lock);
4255         nat->nat_bytes[0] += fin->fin_plen;
4256         nat->nat_pkts[0]++;
4257         MUTEX_EXIT(&nat->nat_lock);
4258
4259         fin->fin_ip->ip_dst = nat->nat_inip;
4260         fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4261         if (nflags & IPN_TCPUDP)
4262                 tcp = fin->fin_dp;
4263
4264         /*
4265          * Fix up checksums, not by recalculating them, but
4266          * simply computing adjustments.
4267          * Why only do this for some platforms on inbound packets ?
4268          * Because for those that it is done, IP processing is yet to happen
4269          * and so the IPv4 header checksum has not yet been evaluated.
4270          * Perhaps it should always be done for the benefit of things like
4271          * fast forwarding (so that it doesn't need to be recomputed) but with
4272          * header checksum offloading, perhaps it is a moot point.
4273          */
4274 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4275      defined(__osf__) || defined(linux)
4276         if (nat->nat_dir == NAT_OUTBOUND)
4277                 fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4278         else
4279                 fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4280 #endif
4281
4282         if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4283                 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4284                         tcp->th_dport = nat->nat_inport;
4285                         fin->fin_data[1] = ntohs(nat->nat_inport);
4286                 }
4287
4288
4289                 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4290                         icmp = fin->fin_dp;
4291
4292                         icmp->icmp_id = nat->nat_inport;
4293                 }
4294
4295                 csump = nat_proto(fin, nat, nflags);
4296         }
4297
4298         nat_update(fin, nat, np);
4299
4300         /*
4301          * The above comments do not hold for layer 4 (or higher) checksums...
4302          */
4303         if (csump != NULL) {
4304                 if (nat->nat_dir == NAT_OUTBOUND)
4305                         fix_incksum(fin, csump, nat->nat_sumd[0]);
4306                 else
4307                         fix_outcksum(fin, csump, nat->nat_sumd[0]);
4308         }
4309         ATOMIC_INCL(nat_stats.ns_mapped[0]);
4310         fin->fin_flx |= FI_NATED;
4311         if (np != NULL && np->in_tag.ipt_num[0] != 0)
4312                 fin->fin_nattag = &np->in_tag;
4313         return 1;
4314 }
4315
4316
4317 /* ------------------------------------------------------------------------ */
4318 /* Function:    nat_proto                                                   */
4319 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4320 /*                         NULL if the transport protocol is not recognised */
4321 /*                         as needing a checksum update.                    */
4322 /* Parameters:  fin(I)    - pointer to packet information                   */
4323 /*              nat(I)    - pointer to NAT structure                        */
4324 /*              nflags(I) - NAT flags set for this packet                   */
4325 /*                                                                          */
4326 /* Return the pointer to the checksum field for each protocol so understood.*/
4327 /* If support for making other changes to a protocol header is required,    */
4328 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4329 /* TCP down to a specific value, then do it from here.                      */
4330 /* ------------------------------------------------------------------------ */
4331 u_short *nat_proto(fin, nat, nflags)
4332 fr_info_t *fin;
4333 nat_t *nat;
4334 u_int nflags;
4335 {
4336         icmphdr_t *icmp;
4337         u_short *csump;
4338         tcphdr_t *tcp;
4339         udphdr_t *udp;
4340
4341         csump = NULL;
4342         if (fin->fin_out == 0) {
4343                 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4344         } else {
4345                 fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4346         }
4347
4348         switch (fin->fin_p)
4349         {
4350         case IPPROTO_TCP :
4351                 tcp = fin->fin_dp;
4352
4353                 csump = &tcp->th_sum;
4354
4355                 /*
4356                  * Do a MSS CLAMPING on a SYN packet,
4357                  * only deal IPv4 for now.
4358                  */
4359                 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4360                         nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4361
4362                 break;
4363
4364         case IPPROTO_UDP :
4365                 udp = fin->fin_dp;
4366
4367                 if (udp->uh_sum)
4368                         csump = &udp->uh_sum;
4369                 break;
4370
4371         case IPPROTO_ICMP :
4372                 icmp = fin->fin_dp;
4373
4374                 if ((nflags & IPN_ICMPQUERY) != 0) {
4375                         if (icmp->icmp_cksum != 0)
4376                                 csump = &icmp->icmp_cksum;
4377                 }
4378                 break;
4379         }
4380         return csump;
4381 }
4382
4383
4384 /* ------------------------------------------------------------------------ */
4385 /* Function:    fr_natunload                                                */
4386 /* Returns:     Nil                                                         */
4387 /* Parameters:  Nil                                                         */
4388 /*                                                                          */
4389 /* Free all memory used by NAT structures allocated at runtime.             */
4390 /* ------------------------------------------------------------------------ */
4391 void fr_natunload()
4392 {
4393         ipftq_t *ifq, *ifqnext;
4394
4395         (void) nat_clearlist();
4396         (void) nat_flushtable();
4397
4398         /*
4399          * Proxy timeout queues are not cleaned here because although they
4400          * exist on the NAT list, appr_unload is called after fr_natunload
4401          * and the proxies actually are responsible for them being created.
4402          * Should the proxy timeouts have their own list?  There's no real
4403          * justification as this is the only complication.
4404          */
4405         for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4406                 ifqnext = ifq->ifq_next;
4407                 if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4408                     (fr_deletetimeoutqueue(ifq) == 0))
4409                         fr_freetimeoutqueue(ifq);
4410         }
4411
4412         if (nat_table[0] != NULL) {
4413                 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4414                 nat_table[0] = NULL;
4415         }
4416         if (nat_table[1] != NULL) {
4417                 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4418                 nat_table[1] = NULL;
4419         }
4420         if (nat_rules != NULL) {
4421                 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4422                 nat_rules = NULL;
4423         }
4424         if (rdr_rules != NULL) {
4425                 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4426                 rdr_rules = NULL;
4427         }
4428         if (ipf_hm_maptable != NULL) {
4429                 KFREES(ipf_hm_maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4430                 ipf_hm_maptable = NULL;
4431         }
4432         if (nat_stats.ns_bucketlen[0] != NULL) {
4433                 KFREES(nat_stats.ns_bucketlen[0],
4434                        sizeof(u_long *) * ipf_nattable_sz);
4435                 nat_stats.ns_bucketlen[0] = NULL;
4436         }
4437         if (nat_stats.ns_bucketlen[1] != NULL) {
4438                 KFREES(nat_stats.ns_bucketlen[1],
4439                        sizeof(u_long *) * ipf_nattable_sz);
4440                 nat_stats.ns_bucketlen[1] = NULL;
4441         }
4442
4443         if (fr_nat_maxbucket_reset == 1)
4444                 fr_nat_maxbucket = 0;
4445
4446         if (fr_nat_init == 1) {
4447                 fr_nat_init = 0;
4448                 fr_sttab_destroy(nat_tqb);
4449
4450                 RW_DESTROY(&ipf_natfrag);
4451                 RW_DESTROY(&ipf_nat);
4452
4453                 MUTEX_DESTROY(&ipf_nat_new);
4454                 MUTEX_DESTROY(&ipf_natio);
4455
4456                 MUTEX_DESTROY(&nat_udptq.ifq_lock);
4457                 MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4458                 MUTEX_DESTROY(&nat_iptq.ifq_lock);
4459         }
4460 }
4461
4462
4463 /* ------------------------------------------------------------------------ */
4464 /* Function:    fr_natexpire                                                */
4465 /* Returns:     Nil                                                         */
4466 /* Parameters:  Nil                                                         */
4467 /*                                                                          */
4468 /* Check all of the timeout queues for entries at the top which need to be  */
4469 /* expired.                                                                 */
4470 /* ------------------------------------------------------------------------ */
4471 void fr_natexpire()
4472 {
4473         ipftq_t *ifq, *ifqnext;
4474         ipftqent_t *tqe, *tqn;
4475         int i;
4476         SPL_INT(s);
4477
4478         SPL_NET(s);
4479         WRITE_ENTER(&ipf_nat);
4480         for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4481                 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4482                         if (tqe->tqe_die > fr_ticks)
4483                                 break;
4484                         tqn = tqe->tqe_next;
4485                         nat_delete(tqe->tqe_parent, NL_EXPIRE);
4486                 }
4487         }
4488
4489         for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4490                 ifqnext = ifq->ifq_next;
4491
4492                 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4493                         if (tqe->tqe_die > fr_ticks)
4494                                 break;
4495                         tqn = tqe->tqe_next;
4496                         nat_delete(tqe->tqe_parent, NL_EXPIRE);
4497                 }
4498         }
4499
4500         for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4501                 ifqnext = ifq->ifq_next;
4502
4503                 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4504                     (ifq->ifq_ref == 0)) {
4505                         fr_freetimeoutqueue(ifq);
4506                 }
4507         }
4508
4509         if (fr_nat_doflush != 0) {
4510                 nat_extraflush(2);
4511                 fr_nat_doflush = 0;
4512         }
4513
4514         RWLOCK_EXIT(&ipf_nat);
4515         SPL_X(s);
4516 }
4517
4518
4519 /* ------------------------------------------------------------------------ */
4520 /* Function:    fr_natsync                                                  */
4521 /* Returns:     Nil                                                         */
4522 /* Parameters:  ifp(I) - pointer to network interface                       */
4523 /*                                                                          */
4524 /* Walk through all of the currently active NAT sessions, looking for those */
4525 /* which need to have their translated address updated.                     */
4526 /* ------------------------------------------------------------------------ */
4527 void fr_natsync(ifp)
4528 void *ifp;
4529 {
4530         u_32_t sum1, sum2, sumd;
4531         struct in_addr in;
4532         ipnat_t *n;
4533         nat_t *nat;
4534         void *ifp2;
4535         SPL_INT(s);
4536
4537         if (fr_running <= 0)
4538                 return;
4539
4540         /*
4541          * Change IP addresses for NAT sessions for any protocol except TCP
4542          * since it will break the TCP connection anyway.  The only rules
4543          * which will get changed are those which are "map ... -> 0/32",
4544          * where the rule specifies the address is taken from the interface.
4545          */
4546         SPL_NET(s);
4547         WRITE_ENTER(&ipf_nat);
4548
4549         if (fr_running <= 0) {
4550                 RWLOCK_EXIT(&ipf_nat);
4551                 return;
4552         }
4553
4554         for (nat = nat_instances; nat; nat = nat->nat_next) {
4555                 if ((nat->nat_flags & IPN_TCP) != 0)
4556                         continue;
4557                 n = nat->nat_ptr;
4558                 if ((n == NULL) ||
4559                     (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4560                         continue;
4561                 if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4562                      (ifp == nat->nat_ifps[1]))) {
4563                         nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4564                         if (nat->nat_ifnames[1][0] != '\0') {
4565                                 nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4566                                                           4);
4567                         } else
4568                                 nat->nat_ifps[1] = nat->nat_ifps[0];
4569                         ifp2 = nat->nat_ifps[0];
4570                         if (ifp2 == NULL)
4571                                 continue;
4572
4573                         /*
4574                          * Change the map-to address to be the same as the
4575                          * new one.
4576                          */
4577                         sum1 = nat->nat_outip.s_addr;
4578                         if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4579                                 nat->nat_outip = in;
4580                         sum2 = nat->nat_outip.s_addr;
4581
4582                         if (sum1 == sum2)
4583                                 continue;
4584                         /*
4585                          * Readjust the checksum adjustment to take into
4586                          * account the new IP#.
4587                          */
4588                         CALC_SUMD(sum1, sum2, sumd);
4589                         /* XXX - dont change for TCP when solaris does
4590                          * hardware checksumming.
4591                          */
4592                         sumd += nat->nat_sumd[0];
4593                         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4594                         nat->nat_sumd[1] = nat->nat_sumd[0];
4595                 }
4596         }
4597
4598         for (n = nat_list; (n != NULL); n = n->in_next) {
4599                 if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4600                         n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4601                 if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4602                         n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4603         }
4604         RWLOCK_EXIT(&ipf_nat);
4605         SPL_X(s);
4606 }
4607
4608
4609 /* ------------------------------------------------------------------------ */
4610 /* Function:    nat_icmpquerytype4                                          */
4611 /* Returns:     int - 1 == success, 0 == failure                            */
4612 /* Parameters:  icmptype(I) - ICMP type number                              */
4613 /*                                                                          */
4614 /* Tests to see if the ICMP type number passed is a query/response type or  */
4615 /* not.                                                                     */
4616 /* ------------------------------------------------------------------------ */
4617 static int nat_icmpquerytype4(icmptype)
4618 int icmptype;
4619 {
4620
4621         /*
4622          * For the ICMP query NAT code, it is essential that both the query
4623          * and the reply match on the NAT rule. Because the NAT structure
4624          * does not keep track of the icmptype, and a single NAT structure
4625          * is used for all icmp types with the same src, dest and id, we
4626          * simply define the replies as queries as well. The funny thing is,
4627          * altough it seems silly to call a reply a query, this is exactly
4628          * as it is defined in the IPv4 specification
4629          */
4630         
4631         switch (icmptype)
4632         {
4633         
4634         case ICMP_ECHOREPLY:
4635         case ICMP_ECHO:
4636         /* route aedvertisement/solliciation is currently unsupported: */
4637         /* it would require rewriting the ICMP data section            */
4638         case ICMP_TSTAMP:
4639         case ICMP_TSTAMPREPLY:
4640         case ICMP_IREQ:
4641         case ICMP_IREQREPLY:
4642         case ICMP_MASKREQ:
4643         case ICMP_MASKREPLY:
4644                 return 1;
4645         default:
4646                 return 0;
4647         }
4648 }
4649
4650
4651 /* ------------------------------------------------------------------------ */
4652 /* Function:    nat_log                                                     */
4653 /* Returns:     Nil                                                         */
4654 /* Parameters:  nat(I)  - pointer to NAT structure                          */
4655 /*              type(I) - type of log entry to create                       */
4656 /*                                                                          */
4657 /* Creates a NAT log entry.                                                 */
4658 /* ------------------------------------------------------------------------ */
4659 void nat_log(nat, type)
4660 struct nat *nat;
4661 u_int type;
4662 {
4663 #ifdef  IPFILTER_LOG
4664 # ifndef LARGE_NAT
4665         struct ipnat *np;
4666         int rulen;
4667 # endif
4668         struct natlog natl;
4669         void *items[1];
4670         size_t sizes[1];
4671         int types[1];
4672
4673         natl.nl_inip = nat->nat_inip;
4674         natl.nl_outip = nat->nat_outip;
4675         natl.nl_origip = nat->nat_oip;
4676         natl.nl_bytes[0] = nat->nat_bytes[0];
4677         natl.nl_bytes[1] = nat->nat_bytes[1];
4678         natl.nl_pkts[0] = nat->nat_pkts[0];
4679         natl.nl_pkts[1] = nat->nat_pkts[1];
4680         natl.nl_origport = nat->nat_oport;
4681         natl.nl_inport = nat->nat_inport;
4682         natl.nl_outport = nat->nat_outport;
4683         natl.nl_p = nat->nat_p;
4684         natl.nl_type = type;
4685         natl.nl_rule = -1;
4686 # ifndef LARGE_NAT
4687         if (nat->nat_ptr != NULL) {
4688                 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4689                         if (np == nat->nat_ptr) {
4690                                 natl.nl_rule = rulen;
4691                                 break;
4692                         }
4693         }
4694 # endif
4695         items[0] = &natl;
4696         sizes[0] = sizeof(natl);
4697         types[0] = 0;
4698
4699         (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4700 #endif
4701 }
4702
4703
4704 #if defined(__OpenBSD__)
4705 /* ------------------------------------------------------------------------ */
4706 /* Function:    nat_ifdetach                                                */
4707 /* Returns:     Nil                                                         */
4708 /* Parameters:  ifp(I) - pointer to network interface                       */
4709 /*                                                                          */
4710 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
4711 /* interface references within IPFilter.                                    */
4712 /* ------------------------------------------------------------------------ */
4713 void nat_ifdetach(ifp)
4714 void *ifp;
4715 {
4716         frsync(ifp);
4717         return;
4718 }
4719 #endif
4720
4721
4722 /* ------------------------------------------------------------------------ */
4723 /* Function:    fr_ipnatderef                                               */
4724 /* Returns:     Nil                                                         */
4725 /* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4726 /* Write Locks: ipf_nat                                                     */
4727 /*                                                                          */
4728 /* ------------------------------------------------------------------------ */
4729 void fr_ipnatderef(inp)
4730 ipnat_t **inp;
4731 {
4732         ipnat_t *in;
4733
4734         in = *inp;
4735         *inp = NULL;
4736         in->in_space++;
4737         in->in_use--;
4738         if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4739                 if (in->in_apr)
4740                         appr_free(in->in_apr);
4741                 MUTEX_DESTROY(&in->in_lock);
4742                 KFREE(in);
4743                 nat_stats.ns_rules--;
4744 #if SOLARIS && !defined(_INET_IP_STACK_H)
4745                 if (nat_stats.ns_rules == 0)
4746                         pfil_delayed_copy = 1;
4747 #endif
4748         }
4749 }
4750
4751
4752 /* ------------------------------------------------------------------------ */
4753 /* Function:    fr_natderef                                                 */
4754 /* Returns:     Nil                                                         */
4755 /* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4756 /*                                                                          */
4757 /* Decrement the reference counter for this NAT table entry and free it if  */
4758 /* there are no more things using it.                                       */
4759 /*                                                                          */
4760 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4761 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4762 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
4763 /* because nat_delete() will do that and send nat_ref to -1.                */
4764 /*                                                                          */
4765 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4766 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4767 /* ------------------------------------------------------------------------ */
4768 void fr_natderef(natp)
4769 nat_t **natp;
4770 {
4771         nat_t *nat;
4772
4773         nat = *natp;
4774         *natp = NULL;
4775
4776         MUTEX_ENTER(&nat->nat_lock);
4777         if (nat->nat_ref > 1) {
4778                 nat->nat_ref--;
4779                 MUTEX_EXIT(&nat->nat_lock);
4780                 return;
4781         }
4782         MUTEX_EXIT(&nat->nat_lock);
4783
4784         WRITE_ENTER(&ipf_nat);
4785         nat_delete(nat, NL_EXPIRE);
4786         RWLOCK_EXIT(&ipf_nat);
4787 }
4788
4789
4790 /* ------------------------------------------------------------------------ */
4791 /* Function:    fr_natclone                                                 */
4792 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
4793 /*                           else pointer to new state structure            */
4794 /* Parameters:  fin(I) - pointer to packet information                      */
4795 /*              is(I)  - pointer to master state structure                  */
4796 /* Write Lock:  ipf_nat                                                     */
4797 /*                                                                          */
4798 /* Create a "duplcate" state table entry from the master.                   */
4799 /* ------------------------------------------------------------------------ */
4800 static nat_t *fr_natclone(fin, nat)
4801 fr_info_t *fin;
4802 nat_t *nat;
4803 {
4804         frentry_t *fr;
4805         nat_t *clone;
4806         ipnat_t *np;
4807
4808         KMALLOC(clone, nat_t *);
4809         if (clone == NULL)
4810                 return NULL;
4811         bcopy((char *)nat, (char *)clone, sizeof(*clone));
4812
4813         MUTEX_NUKE(&clone->nat_lock);
4814
4815         clone->nat_aps = NULL;
4816         /*
4817          * Initialize all these so that nat_delete() doesn't cause a crash.
4818          */
4819         clone->nat_tqe.tqe_pnext = NULL;
4820         clone->nat_tqe.tqe_next = NULL;
4821         clone->nat_tqe.tqe_ifq = NULL;
4822         clone->nat_tqe.tqe_parent = clone;
4823
4824         clone->nat_flags &= ~SI_CLONE;
4825         clone->nat_flags |= SI_CLONED;
4826
4827         if (clone->nat_hm)
4828                 clone->nat_hm->hm_ref++;
4829
4830         if (nat_insert(clone, fin->fin_rev) == -1) {
4831                 KFREE(clone);
4832                 return NULL;
4833         }
4834         np = clone->nat_ptr;
4835         if (np != NULL) {
4836                 if (nat_logging)
4837                         nat_log(clone, (u_int)np->in_redir);
4838                 np->in_use++;
4839         }
4840         fr = clone->nat_fr;
4841         if (fr != NULL) {
4842                 MUTEX_ENTER(&fr->fr_lock);
4843                 fr->fr_ref++;
4844                 MUTEX_EXIT(&fr->fr_lock);
4845         }
4846
4847         /*
4848          * Because the clone is created outside the normal loop of things and
4849          * TCP has special needs in terms of state, initialise the timeout
4850          * state of the new NAT from here.
4851          */
4852         if (clone->nat_p == IPPROTO_TCP) {
4853                 (void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
4854                                   clone->nat_flags);
4855         }
4856 #ifdef  IPFILTER_SYNC
4857         clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4858 #endif
4859         if (nat_logging)
4860                 nat_log(clone, NL_CLONE);
4861         return clone;
4862 }
4863
4864
4865 /* ------------------------------------------------------------------------ */
4866 /* Function:   nat_wildok                                                   */
4867 /* Returns:    int - 1 == packet's ports match wildcards                    */
4868 /*                   0 == packet's ports don't match wildcards              */
4869 /* Parameters: nat(I)   - NAT entry                                         */
4870 /*             sport(I) - source port                                       */
4871 /*             dport(I) - destination port                                  */
4872 /*             flags(I) - wildcard flags                                    */
4873 /*             dir(I)   - packet direction                                  */
4874 /*                                                                          */
4875 /* Use NAT entry and packet direction to determine which combination of     */
4876 /* wildcard flags should be used.                                           */
4877 /* ------------------------------------------------------------------------ */
4878 static int nat_wildok(nat, sport, dport, flags, dir)
4879 nat_t *nat;
4880 int sport;
4881 int dport;
4882 int flags;
4883 int dir;
4884 {
4885         /*
4886          * When called by       dir is set to
4887          * nat_inlookup         NAT_INBOUND (0)
4888          * nat_outlookup        NAT_OUTBOUND (1)
4889          *
4890          * We simply combine the packet's direction in dir with the original
4891          * "intended" direction of that NAT entry in nat->nat_dir to decide
4892          * which combination of wildcard flags to allow.
4893          */
4894
4895         switch ((dir << 1) | nat->nat_dir)
4896         {
4897         case 3: /* outbound packet / outbound entry */
4898                 if (((nat->nat_inport == sport) ||
4899                     (flags & SI_W_SPORT)) &&
4900                     ((nat->nat_oport == dport) ||
4901                     (flags & SI_W_DPORT)))
4902                         return 1;
4903                 break;
4904         case 2: /* outbound packet / inbound entry */
4905                 if (((nat->nat_outport == sport) ||
4906                     (flags & SI_W_DPORT)) &&
4907                     ((nat->nat_oport == dport) ||
4908                     (flags & SI_W_SPORT)))
4909                         return 1;
4910                 break;
4911         case 1: /* inbound packet / outbound entry */
4912                 if (((nat->nat_oport == sport) ||
4913                     (flags & SI_W_DPORT)) &&
4914                     ((nat->nat_outport == dport) ||
4915                     (flags & SI_W_SPORT)))
4916                         return 1;
4917                 break;
4918         case 0: /* inbound packet / inbound entry */
4919                 if (((nat->nat_oport == sport) ||
4920                     (flags & SI_W_SPORT)) &&
4921                     ((nat->nat_outport == dport) ||
4922                     (flags & SI_W_DPORT)))
4923                         return 1;
4924                 break;
4925         default:
4926                 break;
4927         }
4928
4929         return(0);
4930 }
4931
4932
4933 /* ------------------------------------------------------------------------ */
4934 /* Function:    nat_mssclamp                                                */
4935 /* Returns:     Nil                                                         */
4936 /* Parameters:  tcp(I)    - pointer to TCP header                           */
4937 /*              maxmss(I) - value to clamp the TCP MSS to                   */
4938 /*              fin(I)    - pointer to packet information                   */
4939 /*              csump(I)  - pointer to TCP checksum                         */
4940 /*                                                                          */
4941 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
4942 /* then the TCP header checksum will be updated to reflect the change in    */
4943 /* the MSS.                                                                 */
4944 /* ------------------------------------------------------------------------ */
4945 static void nat_mssclamp(tcp, maxmss, fin, csump)
4946 tcphdr_t *tcp;
4947 u_32_t maxmss;
4948 fr_info_t *fin;
4949 u_short *csump;
4950 {
4951         u_char *cp, *ep, opt;
4952         int hlen, advance;
4953         u_32_t mss, sumd;
4954
4955         hlen = TCP_OFF(tcp) << 2;
4956         if (hlen > sizeof(*tcp)) {
4957                 cp = (u_char *)tcp + sizeof(*tcp);
4958                 ep = (u_char *)tcp + hlen;
4959
4960                 while (cp < ep) {
4961                         opt = cp[0];
4962                         if (opt == TCPOPT_EOL)
4963                                 break;
4964                         else if (opt == TCPOPT_NOP) {
4965                                 cp++;
4966                                 continue;
4967                         }
4968
4969                         if (cp + 1 >= ep)
4970                                 break;
4971                         advance = cp[1];
4972                         if ((cp + advance > ep) || (advance <= 0))
4973                                 break;
4974                         switch (opt)
4975                         {
4976                         case TCPOPT_MAXSEG:
4977                                 if (advance != 4)
4978                                         break;
4979                                 mss = cp[2] * 256 + cp[3];
4980                                 if (mss > maxmss) {
4981                                         cp[2] = maxmss / 256;
4982                                         cp[3] = maxmss & 0xff;
4983                                         CALC_SUMD(mss, maxmss, sumd);
4984                                         fix_outcksum(fin, csump, sumd);
4985                                 }
4986                                 break;
4987                         default:
4988                                 /* ignore unknown options */
4989                                 break;
4990                         }
4991
4992                         cp += advance;
4993                 }
4994         }
4995 }
4996
4997
4998 /* ------------------------------------------------------------------------ */
4999 /* Function:    fr_setnatqueue                                              */
5000 /* Returns:     Nil                                                         */
5001 /* Parameters:  nat(I)- pointer to NAT structure                            */
5002 /*              rev(I) - forward(0) or reverse(1) direction                 */
5003 /* Locks:       ipf_nat (read or write)                                     */
5004 /*                                                                          */
5005 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
5006 /* determining which queue it should be placed on.                          */
5007 /* ------------------------------------------------------------------------ */
5008 void fr_setnatqueue(nat, rev)
5009 nat_t *nat;
5010 int rev;
5011 {
5012         ipftq_t *oifq, *nifq;
5013
5014         if (nat->nat_ptr != NULL)
5015                 nifq = nat->nat_ptr->in_tqehead[rev];
5016         else
5017                 nifq = NULL;
5018
5019         if (nifq == NULL) {
5020                 switch (nat->nat_p)
5021                 {
5022                 case IPPROTO_UDP :
5023                         nifq = &nat_udptq;
5024                         break;
5025                 case IPPROTO_ICMP :
5026                         nifq = &nat_icmptq;
5027                         break;
5028                 case IPPROTO_TCP :
5029                         nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
5030                         break;
5031                 default :
5032                         nifq = &nat_iptq;
5033                         break;
5034                 }
5035         }
5036
5037         oifq = nat->nat_tqe.tqe_ifq;
5038         /*
5039          * If it's currently on a timeout queue, move it from one queue to
5040          * another, else put it on the end of the newly determined queue.
5041          */
5042         if (oifq != NULL)
5043                 fr_movequeue(&nat->nat_tqe, oifq, nifq);
5044         else
5045                 fr_queueappend(&nat->nat_tqe, nifq, nat);
5046         return;
5047 }
5048
5049
5050 /* ------------------------------------------------------------------------ */
5051 /* Function:    nat_getnext                                                 */
5052 /* Returns:     int - 0 == ok, else error                                   */
5053 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5054 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5055 /*                                                                          */
5056 /* Fetch the next nat/ipnat structure pointer from the linked list and      */
5057 /* copy it out to the storage space pointed to by itp_data.  The next item  */
5058 /* in the list to look at is put back in the ipftoken struture.             */
5059 /* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/
5060 /* ipf_freetoken will call a deref function for us and we dont want to call */
5061 /* that twice (second time would be in the second switch statement below.   */
5062 /* ------------------------------------------------------------------------ */
5063 static int nat_getnext(t, itp)
5064 ipftoken_t *t;
5065 ipfgeniter_t *itp;
5066 {
5067         hostmap_t *hm, *nexthm = NULL, zerohm;
5068         ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5069         nat_t *nat, *nextnat = NULL, zeronat;
5070         int error = 0, count;
5071         char *dst;
5072
5073         count = itp->igi_nitems;
5074         if (count < 1)
5075                 return ENOSPC;
5076
5077         READ_ENTER(&ipf_nat);
5078
5079         switch (itp->igi_type)
5080         {
5081         case IPFGENITER_HOSTMAP :
5082                 hm = t->ipt_data;
5083                 if (hm == NULL) {
5084                         nexthm = ipf_hm_maplist;
5085                 } else {
5086                         nexthm = hm->hm_next;
5087                 }
5088                 break;
5089
5090         case IPFGENITER_IPNAT :
5091                 ipn = t->ipt_data;
5092                 if (ipn == NULL) {
5093                         nextipnat = nat_list;
5094                 } else {
5095                         nextipnat = ipn->in_next;
5096                 }
5097                 break;
5098
5099         case IPFGENITER_NAT :
5100                 nat = t->ipt_data;
5101                 if (nat == NULL) {
5102                         nextnat = nat_instances;
5103                 } else {
5104                         nextnat = nat->nat_next;
5105                 }
5106                 break;
5107         default :
5108                 RWLOCK_EXIT(&ipf_nat);
5109                 return EINVAL;
5110         }
5111
5112         dst = itp->igi_data;
5113         for (;;) {
5114                 switch (itp->igi_type)
5115                 {
5116                 case IPFGENITER_HOSTMAP :
5117                         if (nexthm != NULL) {
5118                                 if (count == 1) {
5119                                         ATOMIC_INC32(nexthm->hm_ref);
5120                                         t->ipt_data = nexthm;
5121                                 }
5122                         } else {
5123                                 bzero(&zerohm, sizeof(zerohm));
5124                                 nexthm = &zerohm;
5125                                 count = 1;
5126                                 t->ipt_data = NULL;
5127                         }
5128                         break;
5129
5130                 case IPFGENITER_IPNAT :
5131                         if (nextipnat != NULL) {
5132                                 if (count == 1) {
5133                                         MUTEX_ENTER(&nextipnat->in_lock);
5134                                         nextipnat->in_use++;
5135                                         MUTEX_EXIT(&nextipnat->in_lock);
5136                                         t->ipt_data = nextipnat;
5137                                 }
5138                         } else {
5139                                 bzero(&zeroipn, sizeof(zeroipn));
5140                                 nextipnat = &zeroipn;
5141                                 count = 1;
5142                                 t->ipt_data = NULL;
5143                         }
5144                         break;
5145
5146                 case IPFGENITER_NAT :
5147                         if (nextnat != NULL) {
5148                                 if (count == 1) {
5149                                         MUTEX_ENTER(&nextnat->nat_lock);
5150                                         nextnat->nat_ref++;
5151                                         MUTEX_EXIT(&nextnat->nat_lock);
5152                                         t->ipt_data = nextnat;
5153                                 }
5154                         } else {
5155                                 bzero(&zeronat, sizeof(zeronat));
5156                                 nextnat = &zeronat;
5157                                 count = 1;
5158                                 t->ipt_data = NULL;
5159                         }
5160                         break;
5161                 default :
5162                         break;
5163                 }
5164                 RWLOCK_EXIT(&ipf_nat);
5165
5166                 /*
5167                  * Copying out to user space needs to be done without the lock.
5168                  */
5169                 switch (itp->igi_type)
5170                 {
5171                 case IPFGENITER_HOSTMAP :
5172                         error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5173                         if (error != 0)
5174                                 error = EFAULT;
5175                         else
5176                                 dst += sizeof(*nexthm);
5177                         break;
5178
5179                 case IPFGENITER_IPNAT :
5180                         error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5181                         if (error != 0)
5182                                 error = EFAULT;
5183                         else
5184                                 dst += sizeof(*nextipnat);
5185                         break;
5186
5187                 case IPFGENITER_NAT :
5188                         error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5189                         if (error != 0)
5190                                 error = EFAULT;
5191                         else
5192                                 dst += sizeof(*nextnat);
5193                         break;
5194                 }
5195
5196                 if ((count == 1) || (error != 0))
5197                         break;
5198
5199                 count--;
5200
5201                 READ_ENTER(&ipf_nat);
5202
5203                 /*
5204                  * We need to have the lock again here to make sure that
5205                  * using _next is consistent.
5206                  */
5207                 switch (itp->igi_type)
5208                 {
5209                 case IPFGENITER_HOSTMAP :
5210                         nexthm = nexthm->hm_next;
5211                         break;
5212                 case IPFGENITER_IPNAT :
5213                         nextipnat = nextipnat->in_next;
5214                         break;
5215                 case IPFGENITER_NAT :
5216                         nextnat = nextnat->nat_next;
5217                         break;
5218                 }
5219         }
5220
5221
5222         switch (itp->igi_type)
5223         {
5224         case IPFGENITER_HOSTMAP :
5225                 if (hm != NULL) {
5226                         WRITE_ENTER(&ipf_nat);
5227                         fr_hostmapdel(&hm);
5228                         RWLOCK_EXIT(&ipf_nat);
5229                 }
5230                 break;
5231         case IPFGENITER_IPNAT :
5232                 if (ipn != NULL) {
5233                         fr_ipnatderef(&ipn);
5234                 }
5235                 break;
5236         case IPFGENITER_NAT :
5237                 if (nat != NULL) {
5238                         fr_natderef(&nat);
5239                 }
5240                 break;
5241         default :
5242                 break;
5243         }
5244
5245         return error;
5246 }
5247
5248
5249 /* ------------------------------------------------------------------------ */
5250 /* Function:    nat_iterator                                                */
5251 /* Returns:     int - 0 == ok, else error                                   */
5252 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5253 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5254 /*                                                                          */
5255 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5256 /* generic structure to iterate through a list.  There are three different  */
5257 /* linked lists of NAT related information to go through: NAT rules, active */
5258 /* NAT mappings and the NAT fragment cache.                                 */
5259 /* ------------------------------------------------------------------------ */
5260 static int nat_iterator(token, itp)
5261 ipftoken_t *token;
5262 ipfgeniter_t *itp;
5263 {
5264         int error;
5265
5266         if (itp->igi_data == NULL)
5267                 return EFAULT;
5268
5269         token->ipt_subtype = itp->igi_type;
5270
5271         switch (itp->igi_type)
5272         {
5273         case IPFGENITER_HOSTMAP :
5274         case IPFGENITER_IPNAT :
5275         case IPFGENITER_NAT :
5276                 error = nat_getnext(token, itp);
5277                 break;
5278
5279         case IPFGENITER_NATFRAG :
5280 #ifdef USE_MUTEXES
5281                 error = fr_nextfrag(token, itp, &ipfr_natlist,
5282                                     &ipfr_nattail, &ipf_natfrag);
5283 #else
5284                 error = fr_nextfrag(token, itp, &ipfr_natlist, &ipfr_nattail);
5285 #endif
5286                 break;
5287         default :
5288                 error = EINVAL;
5289                 break;
5290         }
5291
5292         return error;
5293 }
5294
5295
5296 /* ------------------------------------------------------------------------ */
5297 /* Function:    nat_extraflush                                              */
5298 /* Returns:     int - 0 == success, -1 == failure                           */
5299 /* Parameters:  which(I) - how to flush the active NAT table                */
5300 /* Write Locks: ipf_nat                                                     */
5301 /*                                                                          */
5302 /* Flush nat tables.  Three actions currently defined:                      */
5303 /* which == 0 : flush all nat table entries                                 */
5304 /* which == 1 : flush TCP connections which have started to close but are   */
5305 /*            stuck for some reason.                                        */
5306 /* which == 2 : flush TCP connections which have been idle for a long time, */
5307 /*            starting at > 4 days idle and working back in successive half-*/
5308 /*            days to at most 12 hours old.  If this fails to free enough   */
5309 /*            slots then work backwards in half hour slots to 30 minutes.   */
5310 /*            If that too fails, then work backwards in 30 second intervals */
5311 /*            for the last 30 minutes to at worst 30 seconds idle.          */
5312 /* ------------------------------------------------------------------------ */
5313 static int nat_extraflush(which)
5314 int which;
5315 {
5316         ipftq_t *ifq, *ifqnext;
5317         nat_t *nat, **natp;
5318         ipftqent_t *tqn;
5319         int removed;
5320         SPL_INT(s);
5321
5322         removed = 0;
5323
5324         SPL_NET(s);
5325
5326         switch (which)
5327         {
5328         case 0 :
5329                 /*
5330                  * Style 0 flush removes everything...
5331                  */
5332                 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5333                         nat_delete(nat, NL_FLUSH);
5334                         removed++;
5335                 }
5336                 break;
5337
5338         case 1 :
5339                 /*
5340                  * Since we're only interested in things that are closing,
5341                  * we can start with the appropriate timeout queue.
5342                  */
5343                 for (ifq = nat_tqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL;
5344                      ifq = ifq->ifq_next) {
5345
5346                         for (tqn = ifq->ifq_head; tqn != NULL; ) {
5347                                 nat = tqn->tqe_parent;
5348                                 tqn = tqn->tqe_next;
5349                                 if (nat->nat_p != IPPROTO_TCP)
5350                                         break;
5351                                 nat_delete(nat, NL_EXPIRE);
5352                                 removed++;
5353                         }
5354                 }
5355
5356                 /*
5357                  * Also need to look through the user defined queues.
5358                  */
5359                 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
5360                         ifqnext = ifq->ifq_next;
5361                         for (tqn = ifq->ifq_head; tqn != NULL; ) {
5362                                 nat = tqn->tqe_parent;
5363                                 tqn = tqn->tqe_next;
5364                                 if (nat->nat_p != IPPROTO_TCP)
5365                                         continue;
5366
5367                                 if ((nat->nat_tcpstate[0] >
5368                                      IPF_TCPS_ESTABLISHED) &&
5369                                     (nat->nat_tcpstate[1] >
5370                                      IPF_TCPS_ESTABLISHED)) {
5371                                         nat_delete(nat, NL_EXPIRE);
5372                                         removed++;
5373                                 }
5374                         }
5375                 }
5376                 break;
5377
5378                 /*
5379                  * Args 5-11 correspond to flushing those particular states
5380                  * for TCP connections.
5381                  */
5382         case IPF_TCPS_CLOSE_WAIT :
5383         case IPF_TCPS_FIN_WAIT_1 :
5384         case IPF_TCPS_CLOSING :
5385         case IPF_TCPS_LAST_ACK :
5386         case IPF_TCPS_FIN_WAIT_2 :
5387         case IPF_TCPS_TIME_WAIT :
5388         case IPF_TCPS_CLOSED :
5389                 tqn = nat_tqb[which].ifq_head;
5390                 while (tqn != NULL) {
5391                         nat = tqn->tqe_parent;
5392                         tqn = tqn->tqe_next;
5393                         nat_delete(nat, NL_FLUSH);
5394                         removed++;
5395                 }
5396                 break;
5397          
5398         default :
5399                 if (which < 30)
5400                         break;
5401            
5402                 /*
5403                  * Take a large arbitrary number to mean the number of seconds
5404                  * for which which consider to be the maximum value we'll allow
5405                  * the expiration to be.
5406                  */
5407                 which = IPF_TTLVAL(which);
5408                 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5409                         if (fr_ticks - nat->nat_touched > which) {
5410                                 nat_delete(nat, NL_FLUSH);
5411                                 removed++;
5412                         } else
5413                                 natp = &nat->nat_next;
5414                 }
5415                 break;
5416         }
5417
5418         if (which != 2) {
5419                 SPL_X(s);
5420                 return removed;
5421         }
5422
5423         /*
5424          * Asked to remove inactive entries because the table is full.
5425          */
5426         if (fr_ticks - nat_last_force_flush > IPF_TTLVAL(5)) {
5427                 nat_last_force_flush = fr_ticks;
5428                 removed = ipf_queueflush(nat_flush_entry, nat_tqb, nat_utqe);
5429         }
5430
5431         SPL_X(s);
5432         return removed;
5433 }
5434
5435
5436 /* ------------------------------------------------------------------------ */
5437 /* Function:    nat_flush_entry                                             */
5438 /* Returns:     0 - always succeeds                                         */
5439 /* Parameters:  entry(I) - pointer to NAT entry                             */
5440 /* Write Locks: ipf_nat                                                     */
5441 /*                                                                          */
5442 /* This function is a stepping stone between ipf_queueflush() and           */
5443 /* nat_dlete().  It is used so we can provide a uniform interface via the   */
5444 /* ipf_queueflush() function.  Since the nat_delete() function returns void */
5445 /* we translate that to mean it always succeeds in deleting something.      */
5446 /* ------------------------------------------------------------------------ */
5447 static int nat_flush_entry(entry)
5448 void *entry;
5449 {
5450         nat_delete(entry, NL_FLUSH);
5451         return 0;
5452 }
5453
5454
5455 /* ------------------------------------------------------------------------ */
5456 /* Function:    nat_gettable                                                */
5457 /* Returns:     int     - 0 = success, else error                           */
5458 /* Parameters:  data(I) - pointer to ioctl data                             */
5459 /*                                                                          */
5460 /* This function handles ioctl requests for tables of nat information.      */
5461 /* At present the only table it deals with is the hash bucket statistics.   */
5462 /* ------------------------------------------------------------------------ */
5463 static int nat_gettable(data)
5464 char *data;
5465 {
5466         ipftable_t table;
5467         int error;
5468
5469         error = fr_inobj(data, &table, IPFOBJ_GTABLE);
5470         if (error != 0)
5471                 return error;
5472
5473         switch (table.ita_type)
5474         {
5475         case IPFTABLE_BUCKETS_NATIN :
5476                 error = COPYOUT(nat_stats.ns_bucketlen[0], table.ita_table, 
5477                                 ipf_nattable_sz * sizeof(u_long));
5478                 break;
5479
5480         case IPFTABLE_BUCKETS_NATOUT :
5481                 error = COPYOUT(nat_stats.ns_bucketlen[1], table.ita_table, 
5482                                 ipf_nattable_sz * sizeof(u_long));
5483                 break;
5484
5485         default :
5486                 return EINVAL;
5487         }
5488
5489         if (error != 0) {
5490                 error = EFAULT;
5491         }
5492         return error;
5493 }