]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/contrib/ipfilter/netinet/ip_nat.c
merge fix for boot-time hang on centos' xen
[FreeBSD/FreeBSD.git] / sys / contrib / ipfilter / netinet / ip_nat.c
1 /*      $FreeBSD$       */
2
3 /*
4  * Copyright (C) 1995-2003 by Darren Reed.
5  *
6  * See the IPFILTER.LICENCE file for details on licencing.
7  */
8 #if defined(KERNEL) || defined(_KERNEL)
9 # undef KERNEL
10 # undef _KERNEL
11 # define        KERNEL  1
12 # define        _KERNEL 1
13 #endif
14 #include <sys/errno.h>
15 #include <sys/types.h>
16 #include <sys/param.h>
17 #include <sys/time.h>
18 #include <sys/file.h>
19 #if defined(_KERNEL) && defined(__NetBSD_Version__) && \
20     (__NetBSD_Version__ >= 399002000)
21 # include <sys/kauth.h>
22 #endif
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24     defined(_KERNEL)
25 #if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
26 #  include "opt_ipfilter_log.h"
27 # else
28 #  include "opt_ipfilter.h"
29 # endif
30 #endif
31 #if !defined(_KERNEL)
32 # include <stdio.h>
33 # include <string.h>
34 # include <stdlib.h>
35 # define _KERNEL
36 # ifdef __OpenBSD__
37 struct file;
38 # endif
39 # include <sys/uio.h>
40 # undef _KERNEL
41 #endif
42 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
43 # include <sys/filio.h>
44 # include <sys/fcntl.h>
45 #else
46 # include <sys/ioctl.h>
47 #endif
48 #if !defined(AIX)
49 # include <sys/fcntl.h>
50 #endif
51 #if !defined(linux)
52 # include <sys/protosw.h>
53 #endif
54 #include <sys/socket.h>
55 #if defined(_KERNEL)
56 # include <sys/systm.h>
57 # if !defined(__SVR4) && !defined(__svr4__)
58 #  include <sys/mbuf.h>
59 # endif
60 #endif
61 #if defined(__SVR4) || defined(__svr4__)
62 # include <sys/filio.h>
63 # include <sys/byteorder.h>
64 # ifdef _KERNEL
65 #  include <sys/dditypes.h>
66 # endif
67 # include <sys/stream.h>
68 # include <sys/kmem.h>
69 #endif
70 #if __FreeBSD_version >= 300000
71 # include <sys/queue.h>
72 #endif
73 #include <net/if.h>
74 #if __FreeBSD_version >= 300000
75 # include <net/if_var.h>
76 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
77 #  include "opt_ipfilter.h"
78 # endif
79 #endif
80 #ifdef sun
81 # include <net/af.h>
82 #endif
83 #include <net/route.h>
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/ip.h>
87
88 #ifdef RFC1825
89 # include <vpn/md5.h>
90 # include <vpn/ipsec.h>
91 extern struct ifnet vpnif;
92 #endif
93
94 #if !defined(linux)
95 # include <netinet/ip_var.h>
96 #endif
97 #include <netinet/tcp.h>
98 #include <netinet/udp.h>
99 #include <netinet/ip_icmp.h>
100 #include "netinet/ip_compat.h"
101 #include <netinet/tcpip.h>
102 #include "netinet/ip_fil.h"
103 #include "netinet/ip_nat.h"
104 #include "netinet/ip_frag.h"
105 #include "netinet/ip_state.h"
106 #include "netinet/ip_proxy.h"
107 #ifdef  IPFILTER_SYNC
108 #include "netinet/ip_sync.h"
109 #endif
110 #if (__FreeBSD_version >= 300000)
111 # include <sys/malloc.h>
112 #endif
113 /* END OF INCLUDES */
114
115 #undef  SOCKADDR_IN
116 #define SOCKADDR_IN     struct sockaddr_in
117
118 #if !defined(lint)
119 static const char sccsid[] = "@(#)ip_nat.c      1.11 6/5/96 (C) 1995 Darren Reed";
120 static const char rcsid[] = "@(#)$FreeBSD$";
121 /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */
122 #endif
123
124
125 /* ======================================================================== */
126 /* How the NAT is organised and works.                                      */
127 /*                                                                          */
128 /* Inside (interface y) NAT       Outside (interface x)                     */
129 /* -------------------- -+- -------------------------------------           */
130 /* Packet going          |   out, processsed by fr_checknatout() for x      */
131 /* ------------>         |   ------------>                                  */
132 /* src=10.1.1.1          |   src=192.1.1.1                                  */
133 /*                       |                                                  */
134 /*                       |   in, processed by fr_checknatin() for x         */
135 /* <------------         |   <------------                                  */
136 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
137 /* -------------------- -+- -------------------------------------           */
138 /* fr_checknatout() - changes ip_src and if required, sport                 */
139 /*             - creates a new mapping, if required.                        */
140 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
141 /*                                                                          */
142 /* In the NAT table, internal source is recorded as "in" and externally     */
143 /* seen as "out".                                                           */
144 /* ======================================================================== */
145
146
147 nat_t   **nat_table[2] = { NULL, NULL },
148         *nat_instances = NULL;
149 ipnat_t *nat_list = NULL;
150 u_int   ipf_nattable_max = NAT_TABLE_MAX;
151 u_int   ipf_nattable_sz = NAT_TABLE_SZ;
152 u_int   ipf_natrules_sz = NAT_SIZE;
153 u_int   ipf_rdrrules_sz = RDR_SIZE;
154 u_int   ipf_hostmap_sz = HOSTMAP_SIZE;
155 u_int   fr_nat_maxbucket = 0,
156         fr_nat_maxbucket_reset = 1;
157 u_32_t  nat_masks = 0;
158 u_32_t  rdr_masks = 0;
159 u_long  nat_last_force_flush = 0;
160 ipnat_t **nat_rules = NULL;
161 ipnat_t **rdr_rules = NULL;
162 hostmap_t       **ipf_hm_maptable  = NULL;
163 hostmap_t       *ipf_hm_maplist  = NULL;
164 ipftq_t nat_tqb[IPF_TCP_NSTATES];
165 ipftq_t nat_udptq;
166 ipftq_t nat_icmptq;
167 ipftq_t nat_iptq;
168 ipftq_t *nat_utqe = NULL;
169 int     fr_nat_doflush = 0;
170 #ifdef  IPFILTER_LOG
171 int     nat_logging = 1;
172 #else
173 int     nat_logging = 0;
174 #endif
175
176 u_long  fr_defnatage = DEF_NAT_AGE,
177         fr_defnatipage = 120,           /* 60 seconds */
178         fr_defnaticmpage = 6;           /* 3 seconds */
179 natstat_t nat_stats;
180 int     fr_nat_lock = 0;
181 int     fr_nat_init = 0;
182 #if SOLARIS && !defined(_INET_IP_STACK_H)
183 extern  int             pfil_delayed_copy;
184 #endif
185
186 static  int     nat_flush_entry __P((void *));
187 static  int     nat_flushtable __P((void));
188 static  int     nat_clearlist __P((void));
189 static  void    nat_addnat __P((struct ipnat *));
190 static  void    nat_addrdr __P((struct ipnat *));
191 static  void    nat_delrdr __P((struct ipnat *));
192 static  void    nat_delnat __P((struct ipnat *));
193 static  int     fr_natgetent __P((caddr_t, int));
194 static  int     fr_natgetsz __P((caddr_t, int));
195 static  int     fr_natputent __P((caddr_t, int));
196 static  int     nat_extraflush __P((int));
197 static  int     nat_gettable __P((char *));
198 static  void    nat_tabmove __P((nat_t *));
199 static  int     nat_match __P((fr_info_t *, ipnat_t *));
200 static  INLINE  int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
201 static  INLINE  int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
202 static  hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
203                                     struct in_addr, struct in_addr, u_32_t));
204 static  int     nat_icmpquerytype4 __P((int));
205 static  int     nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
206 static  void    nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
207 static  int     nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
208                                       tcphdr_t *, nat_t **, int));
209 static  int     nat_resolverule __P((ipnat_t *));
210 static  nat_t   *fr_natclone __P((fr_info_t *, nat_t *));
211 static  void    nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
212 static  int     nat_wildok __P((nat_t *, int, int, int, int));
213 static  int     nat_getnext __P((ipftoken_t *, ipfgeniter_t *));
214 static  int     nat_iterator __P((ipftoken_t *, ipfgeniter_t *));
215
216
217 /* ------------------------------------------------------------------------ */
218 /* Function:    fr_natinit                                                  */
219 /* Returns:     int - 0 == success, -1 == failure                           */
220 /* Parameters:  Nil                                                         */
221 /*                                                                          */
222 /* Initialise all of the NAT locks, tables and other structures.            */
223 /* ------------------------------------------------------------------------ */
224 int fr_natinit()
225 {
226         int i;
227
228         KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
229         if (nat_table[0] != NULL)
230                 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
231         else
232                 return -1;
233
234         KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
235         if (nat_table[1] != NULL)
236                 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
237         else
238                 return -2;
239
240         KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
241         if (nat_rules != NULL)
242                 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
243         else
244                 return -3;
245
246         KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
247         if (rdr_rules != NULL)
248                 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
249         else
250                 return -4;
251
252         KMALLOCS(ipf_hm_maptable, hostmap_t **, \
253                  sizeof(hostmap_t *) * ipf_hostmap_sz);
254         if (ipf_hm_maptable != NULL)
255                 bzero((char *)ipf_hm_maptable,
256                       sizeof(hostmap_t *) * ipf_hostmap_sz);
257         else
258                 return -5;
259         ipf_hm_maplist = NULL;
260
261         KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
262                  ipf_nattable_sz * sizeof(u_long));
263         if (nat_stats.ns_bucketlen[0] == NULL)
264                 return -6;
265         bzero((char *)nat_stats.ns_bucketlen[0],
266               ipf_nattable_sz * sizeof(u_long));
267
268         KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
269                  ipf_nattable_sz * sizeof(u_long));
270         if (nat_stats.ns_bucketlen[1] == NULL)
271                 return -7;
272
273         bzero((char *)nat_stats.ns_bucketlen[1],
274               ipf_nattable_sz * sizeof(u_long));
275
276         if (fr_nat_maxbucket == 0) {
277                 for (i = ipf_nattable_sz; i > 0; i >>= 1)
278                         fr_nat_maxbucket++;
279                 fr_nat_maxbucket *= 2;
280         }
281
282         fr_sttab_init(nat_tqb);
283         /*
284          * Increase this because we may have "keep state" following this too
285          * and packet storms can occur if this is removed too quickly.
286          */
287         nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
288         nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
289         nat_udptq.ifq_ttl = fr_defnatage;
290         nat_udptq.ifq_ref = 1;
291         nat_udptq.ifq_head = NULL;
292         nat_udptq.ifq_tail = &nat_udptq.ifq_head;
293         MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
294         nat_udptq.ifq_next = &nat_icmptq;
295         nat_icmptq.ifq_ttl = fr_defnaticmpage;
296         nat_icmptq.ifq_ref = 1;
297         nat_icmptq.ifq_head = NULL;
298         nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
299         MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
300         nat_icmptq.ifq_next = &nat_iptq;
301         nat_iptq.ifq_ttl = fr_defnatipage;
302         nat_iptq.ifq_ref = 1;
303         nat_iptq.ifq_head = NULL;
304         nat_iptq.ifq_tail = &nat_iptq.ifq_head;
305         MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
306         nat_iptq.ifq_next = NULL;
307
308         for (i = 0; i < IPF_TCP_NSTATES; i++) {
309                 if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
310                         nat_tqb[i].ifq_ttl = fr_defnaticmpage;
311 #ifdef LARGE_NAT
312                 else if (nat_tqb[i].ifq_ttl > fr_defnatage)
313                         nat_tqb[i].ifq_ttl = fr_defnatage;
314 #endif
315         }
316
317         /*
318          * Increase this because we may have "keep state" following
319          * this too and packet storms can occur if this is removed
320          * too quickly.
321          */
322         nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
323
324         RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
325         RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
326         MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
327         MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
328
329         fr_nat_init = 1;
330
331         return 0;
332 }
333
334
335 /* ------------------------------------------------------------------------ */
336 /* Function:    nat_addrdr                                                  */
337 /* Returns:     Nil                                                         */
338 /* Parameters:  n(I) - pointer to NAT rule to add                           */
339 /*                                                                          */
340 /* Adds a redirect rule to the hash table of redirect rules and the list of */
341 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
342 /* use by redirect rules.                                                   */
343 /* ------------------------------------------------------------------------ */
344 static void nat_addrdr(n)
345 ipnat_t *n;
346 {
347         ipnat_t **np;
348         u_32_t j;
349         u_int hv;
350         int k;
351
352         k = count4bits(n->in_outmsk);
353         if ((k >= 0) && (k != 32))
354                 rdr_masks |= 1 << k;
355         j = (n->in_outip & n->in_outmsk);
356         hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
357         np = rdr_rules + hv;
358         while (*np != NULL)
359                 np = &(*np)->in_rnext;
360         n->in_rnext = NULL;
361         n->in_prnext = np;
362         n->in_hv = hv;
363         *np = n;
364 }
365
366
367 /* ------------------------------------------------------------------------ */
368 /* Function:    nat_addnat                                                  */
369 /* Returns:     Nil                                                         */
370 /* Parameters:  n(I) - pointer to NAT rule to add                           */
371 /*                                                                          */
372 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
373 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
374 /* redirect rules.                                                          */
375 /* ------------------------------------------------------------------------ */
376 static void nat_addnat(n)
377 ipnat_t *n;
378 {
379         ipnat_t **np;
380         u_32_t j;
381         u_int hv;
382         int k;
383
384         k = count4bits(n->in_inmsk);
385         if ((k >= 0) && (k != 32))
386                 nat_masks |= 1 << k;
387         j = (n->in_inip & n->in_inmsk);
388         hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
389         np = nat_rules + hv;
390         while (*np != NULL)
391                 np = &(*np)->in_mnext;
392         n->in_mnext = NULL;
393         n->in_pmnext = np;
394         n->in_hv = hv;
395         *np = n;
396 }
397
398
399 /* ------------------------------------------------------------------------ */
400 /* Function:    nat_delrdr                                                  */
401 /* Returns:     Nil                                                         */
402 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
403 /*                                                                          */
404 /* Removes a redirect rule from the hash table of redirect rules.           */
405 /* ------------------------------------------------------------------------ */
406 static void nat_delrdr(n)
407 ipnat_t *n;
408 {
409         if (n->in_rnext)
410                 n->in_rnext->in_prnext = n->in_prnext;
411         *n->in_prnext = n->in_rnext;
412 }
413
414
415 /* ------------------------------------------------------------------------ */
416 /* Function:    nat_delnat                                                  */
417 /* Returns:     Nil                                                         */
418 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
419 /*                                                                          */
420 /* Removes a NAT map rule from the hash table of NAT map rules.             */
421 /* ------------------------------------------------------------------------ */
422 static void nat_delnat(n)
423 ipnat_t *n;
424 {
425         if (n->in_mnext != NULL)
426                 n->in_mnext->in_pmnext = n->in_pmnext;
427         *n->in_pmnext = n->in_mnext;
428 }
429
430
431 /* ------------------------------------------------------------------------ */
432 /* Function:    nat_hostmap                                                 */
433 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
434 /*                                else a pointer to the hostmapping to use  */
435 /* Parameters:  np(I)   - pointer to NAT rule                               */
436 /*              real(I) - real IP address                                   */
437 /*              map(I)  - mapped IP address                                 */
438 /*              port(I) - destination port number                           */
439 /* Write Locks: ipf_nat                                                     */
440 /*                                                                          */
441 /* Check if an ip address has already been allocated for a given mapping    */
442 /* that is not doing port based translation.  If is not yet allocated, then */
443 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
444 /* ------------------------------------------------------------------------ */
445 static struct hostmap *nat_hostmap(np, src, dst, map, port)
446 ipnat_t *np;
447 struct in_addr src;
448 struct in_addr dst;
449 struct in_addr map;
450 u_32_t port;
451 {
452         hostmap_t *hm;
453         u_int hv;
454
455         hv = (src.s_addr ^ dst.s_addr);
456         hv += src.s_addr;
457         hv += dst.s_addr;
458         hv %= HOSTMAP_SIZE;
459         for (hm = ipf_hm_maptable[hv]; hm; hm = hm->hm_next)
460                 if ((hm->hm_srcip.s_addr == src.s_addr) &&
461                     (hm->hm_dstip.s_addr == dst.s_addr) &&
462                     ((np == NULL) || (np == hm->hm_ipnat)) &&
463                     ((port == 0) || (port == hm->hm_port))) {
464                         hm->hm_ref++;
465                         return hm;
466                 }
467
468         if (np == NULL)
469                 return NULL;
470
471         KMALLOC(hm, hostmap_t *);
472         if (hm) {
473                 hm->hm_next = ipf_hm_maplist;
474                 hm->hm_pnext = &ipf_hm_maplist;
475                 if (ipf_hm_maplist != NULL)
476                         ipf_hm_maplist->hm_pnext = &hm->hm_next;
477                 ipf_hm_maplist = hm;
478                 hm->hm_hnext = ipf_hm_maptable[hv];
479                 hm->hm_phnext = ipf_hm_maptable + hv;
480                 if (ipf_hm_maptable[hv] != NULL)
481                         ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
482                 ipf_hm_maptable[hv] = hm;
483                 hm->hm_ipnat = np;
484                 hm->hm_srcip = src;
485                 hm->hm_dstip = dst;
486                 hm->hm_mapip = map;
487                 hm->hm_ref = 1;
488                 hm->hm_port = port;
489         }
490         return hm;
491 }
492
493
494 /* ------------------------------------------------------------------------ */
495 /* Function:    fr_hostmapdel                                               */
496 /* Returns:     Nil                                                         */
497 /* Parameters:  hmp(I) - pointer to hostmap structure pointer               */
498 /* Write Locks: ipf_nat                                                     */
499 /*                                                                          */
500 /* Decrement the references to this hostmap structure by one.  If this      */
501 /* reaches zero then remove it and free it.                                 */
502 /* ------------------------------------------------------------------------ */
503 void fr_hostmapdel(hmp)
504 struct hostmap **hmp;
505 {
506         struct hostmap *hm;
507
508         hm = *hmp;
509         *hmp = NULL;
510
511         hm->hm_ref--;
512         if (hm->hm_ref == 0) {
513                 if (hm->hm_hnext)
514                         hm->hm_hnext->hm_phnext = hm->hm_phnext;
515                 *hm->hm_phnext = hm->hm_hnext;
516                 if (hm->hm_next)
517                         hm->hm_next->hm_pnext = hm->hm_pnext;
518                 *hm->hm_pnext = hm->hm_next;
519                 KFREE(hm);
520         }
521 }
522
523
524 /* ------------------------------------------------------------------------ */
525 /* Function:    fix_outcksum                                                */
526 /* Returns:     Nil                                                         */
527 /* Parameters:  fin(I) - pointer to packet information                      */
528 /*              sp(I)  - location of 16bit checksum to update               */
529 /*              n((I)  - amount to adjust checksum by                       */
530 /*                                                                          */
531 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
532 /* ------------------------------------------------------------------------ */
533 void fix_outcksum(fin, sp, n)
534 fr_info_t *fin;
535 u_short *sp;
536 u_32_t n;
537 {
538         u_short sumshort;
539         u_32_t sum1;
540
541         if (n == 0)
542                 return;
543
544         if (n & NAT_HW_CKSUM) {
545                 n &= 0xffff;
546                 n += fin->fin_dlen;
547                 n = (n & 0xffff) + (n >> 16);
548                 *sp = n & 0xffff;
549                 return;
550         }
551         sum1 = (~ntohs(*sp)) & 0xffff;
552         sum1 += (n);
553         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
554         /* Again */
555         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
556         sumshort = ~(u_short)sum1;
557         *(sp) = htons(sumshort);
558 }
559
560
561 /* ------------------------------------------------------------------------ */
562 /* Function:    fix_incksum                                                 */
563 /* Returns:     Nil                                                         */
564 /* Parameters:  fin(I) - pointer to packet information                      */
565 /*              sp(I)  - location of 16bit checksum to update               */
566 /*              n((I)  - amount to adjust checksum by                       */
567 /*                                                                          */
568 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
569 /* ------------------------------------------------------------------------ */
570 void fix_incksum(fin, sp, n)
571 fr_info_t *fin;
572 u_short *sp;
573 u_32_t n;
574 {
575         u_short sumshort;
576         u_32_t sum1;
577
578         if (n == 0)
579                 return;
580
581         if (n & NAT_HW_CKSUM) {
582                 n &= 0xffff;
583                 n += fin->fin_dlen;
584                 n = (n & 0xffff) + (n >> 16);
585                 *sp = n & 0xffff;
586                 return;
587         }
588         sum1 = (~ntohs(*sp)) & 0xffff;
589         sum1 += ~(n) & 0xffff;
590         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
591         /* Again */
592         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
593         sumshort = ~(u_short)sum1;
594         *(sp) = htons(sumshort);
595 }
596
597
598 /* ------------------------------------------------------------------------ */
599 /* Function:    fix_datacksum                                               */
600 /* Returns:     Nil                                                         */
601 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
602 /*              n((I)  - amount to adjust checksum by                       */
603 /*                                                                          */
604 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
605 /* data section of an IP packet.                                            */
606 /*                                                                          */
607 /* The only situation in which you need to do this is when NAT'ing an       */
608 /* ICMP error message. Such a message, contains in its body the IP header   */
609 /* of the original IP packet, that causes the error.                        */
610 /*                                                                          */
611 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
612 /* kernel the data section of the ICMP error is just data, and no special   */
613 /* processing like hardware cksum or ntohs processing have been done by the */
614 /* kernel on the data section.                                              */
615 /* ------------------------------------------------------------------------ */
616 void fix_datacksum(sp, n)
617 u_short *sp;
618 u_32_t n;
619 {
620         u_short sumshort;
621         u_32_t sum1;
622
623         if (n == 0)
624                 return;
625
626         sum1 = (~ntohs(*sp)) & 0xffff;
627         sum1 += (n);
628         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
629         /* Again */
630         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
631         sumshort = ~(u_short)sum1;
632         *(sp) = htons(sumshort);
633 }
634
635
636 /* ------------------------------------------------------------------------ */
637 /* Function:    fr_nat_ioctl                                                */
638 /* Returns:     int - 0 == success, != 0 == failure                         */
639 /* Parameters:  data(I) - pointer to ioctl data                             */
640 /*              cmd(I)  - ioctl command integer                             */
641 /*              mode(I) - file mode bits used with open                     */
642 /*                                                                          */
643 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
644 /* ------------------------------------------------------------------------ */
645 int fr_nat_ioctl(data, cmd, mode, uid, ctx)
646 ioctlcmd_t cmd;
647 caddr_t data;
648 int mode, uid;
649 void *ctx;
650 {
651         ipnat_t *nat, *nt, *n = NULL, **np = NULL;
652         int error = 0, ret, arg, getlock;
653         ipnat_t natd;
654         SPL_INT(s);
655
656 #if (BSD >= 199306) && defined(_KERNEL)
657 # if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
658         if ((mode & FWRITE) &&
659              kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
660                                      KAUTH_REQ_NETWORK_FIREWALL_FW,
661                                      NULL, NULL, NULL)) {
662                 return EPERM;
663         }
664 # else
665         if ((securelevel >= 3) && (mode & FWRITE)) {
666                 return EPERM;
667         }
668 # endif
669 #endif
670
671 #if defined(__osf__) && defined(_KERNEL)
672         getlock = 0;
673 #else
674         getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
675 #endif
676
677         nat = NULL;     /* XXX gcc -Wuninitialized */
678         if (cmd == (ioctlcmd_t)SIOCADNAT) {
679                 KMALLOC(nt, ipnat_t *);
680         } else {
681                 nt = NULL;
682         }
683
684         if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
685                 if (mode & NAT_SYSSPACE) {
686                         bcopy(data, (char *)&natd, sizeof(natd));
687                         error = 0;
688                 } else {
689                         error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
690                 }
691         }
692
693         if (error != 0)
694                 goto done;
695
696         /*
697          * For add/delete, look to see if the NAT entry is already present
698          */
699         if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
700                 nat = &natd;
701                 if (nat->in_v == 0)     /* For backward compat. */
702                         nat->in_v = 4;
703                 nat->in_flags &= IPN_USERFLAGS;
704                 if ((nat->in_redir & NAT_MAPBLK) == 0) {
705                         if ((nat->in_flags & IPN_SPLIT) == 0)
706                                 nat->in_inip &= nat->in_inmsk;
707                         if ((nat->in_flags & IPN_IPRANGE) == 0)
708                                 nat->in_outip &= nat->in_outmsk;
709                 }
710                 MUTEX_ENTER(&ipf_natio);
711                 for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
712                         if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
713                                         IPN_CMPSIZ) == 0) {
714                                 if (nat->in_redir == NAT_REDIRECT &&
715                                     nat->in_pnext != n->in_pnext)
716                                         continue;
717                                 break;
718                         }
719         }
720
721         switch (cmd)
722         {
723 #ifdef  IPFILTER_LOG
724         case SIOCIPFFB :
725         {
726                 int tmp;
727
728                 if (!(mode & FWRITE))
729                         error = EPERM;
730                 else {
731                         tmp = ipflog_clear(IPL_LOGNAT);
732                         error = BCOPYOUT((char *)&tmp, (char *)data,
733                                          sizeof(tmp));
734                         if (error != 0)
735                                 error = EFAULT;
736                 }
737                 break;
738         }
739
740         case SIOCSETLG :
741                 if (!(mode & FWRITE))
742                         error = EPERM;
743                 else {
744                         error = BCOPYIN((char *)data, (char *)&nat_logging,
745                                         sizeof(nat_logging));
746                         if (error != 0)
747                                 error = EFAULT;
748                 }
749                 break;
750
751         case SIOCGETLG :
752                 error = BCOPYOUT((char *)&nat_logging, (char *)data,
753                                  sizeof(nat_logging));
754                 if (error != 0)
755                         error = EFAULT;
756                 break;
757
758         case FIONREAD :
759                 arg = iplused[IPL_LOGNAT];
760                 error = BCOPYOUT(&arg, data, sizeof(arg));
761                 if (error != 0)
762                         error = EFAULT;
763                 break;
764 #endif
765         case SIOCADNAT :
766                 if (!(mode & FWRITE)) {
767                         error = EPERM;
768                 } else if (n != NULL) {
769                         error = EEXIST;
770                 } else if (nt == NULL) {
771                         error = ENOMEM;
772                 }
773                 if (error != 0) {
774                         MUTEX_EXIT(&ipf_natio);
775                         break;
776                 }
777                 bcopy((char *)nat, (char *)nt, sizeof(*n));
778                 error = nat_siocaddnat(nt, np, getlock);
779                 MUTEX_EXIT(&ipf_natio);
780                 if (error == 0)
781                         nt = NULL;
782                 break;
783
784         case SIOCRMNAT :
785                 if (!(mode & FWRITE)) {
786                         error = EPERM;
787                         n = NULL;
788                 } else if (n == NULL) {
789                         error = ESRCH;
790                 }
791
792                 if (error != 0) {
793                         MUTEX_EXIT(&ipf_natio);
794                         break;
795                 }
796                 nat_siocdelnat(n, np, getlock);
797
798                 MUTEX_EXIT(&ipf_natio);
799                 n = NULL;
800                 break;
801
802         case SIOCGNATS :
803                 nat_stats.ns_table[0] = nat_table[0];
804                 nat_stats.ns_table[1] = nat_table[1];
805                 nat_stats.ns_list = nat_list;
806                 nat_stats.ns_maptable = ipf_hm_maptable;
807                 nat_stats.ns_maplist = ipf_hm_maplist;
808                 nat_stats.ns_nattab_sz = ipf_nattable_sz;
809                 nat_stats.ns_nattab_max = ipf_nattable_max;
810                 nat_stats.ns_rultab_sz = ipf_natrules_sz;
811                 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
812                 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
813                 nat_stats.ns_instances = nat_instances;
814                 nat_stats.ns_apslist = ap_sess_list;
815                 nat_stats.ns_ticks = fr_ticks;
816                 error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
817                 break;
818
819         case SIOCGNATL :
820             {
821                 natlookup_t nl;
822
823                 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
824                 if (error == 0) {
825                         void *ptr;
826
827                         if (getlock) {
828                                 READ_ENTER(&ipf_nat);
829                         }
830                         ptr = nat_lookupredir(&nl);
831                         if (getlock) {
832                                 RWLOCK_EXIT(&ipf_nat);
833                         }
834                         if (ptr != NULL) {
835                                 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
836                         } else {
837                                 error = ESRCH;
838                         }
839                 }
840                 break;
841             }
842
843         case SIOCIPFFL :        /* old SIOCFLNAT & SIOCCNATL */
844                 if (!(mode & FWRITE)) {
845                         error = EPERM;
846                         break;
847                 }
848                 if (getlock) {
849                         WRITE_ENTER(&ipf_nat);
850                 }
851
852                 error = BCOPYIN(data, &arg, sizeof(arg));
853                 if (error != 0)
854                         error = EFAULT;
855                 else {
856                         if (arg == 0)
857                                 ret = nat_flushtable();
858                         else if (arg == 1)
859                                 ret = nat_clearlist();
860                         else
861                                 ret = nat_extraflush(arg);
862                 }
863
864                 if (getlock) {
865                         RWLOCK_EXIT(&ipf_nat);
866                 }
867                 if (error == 0) {
868                         error = BCOPYOUT(&ret, data, sizeof(ret));
869                 }
870                 break;
871
872         case SIOCPROXY :
873                 error = appr_ioctl(data, cmd, mode, ctx);
874                 break;
875
876         case SIOCSTLCK :
877                 if (!(mode & FWRITE)) {
878                         error = EPERM;
879                 } else {
880                         error = fr_lock(data, &fr_nat_lock);
881                 }
882                 break;
883
884         case SIOCSTPUT :
885                 if ((mode & FWRITE) != 0) {
886                         error = fr_natputent(data, getlock);
887                 } else {
888                         error = EACCES;
889                 }
890                 break;
891
892         case SIOCSTGSZ :
893                 if (fr_nat_lock) {
894                         error = fr_natgetsz(data, getlock);
895                 } else
896                         error = EACCES;
897                 break;
898
899         case SIOCSTGET :
900                 if (fr_nat_lock) {
901                         error = fr_natgetent(data, getlock);
902                 } else
903                         error = EACCES;
904                 break;
905
906         case SIOCGENITER :
907             {
908                 ipfgeniter_t iter;
909                 ipftoken_t *token;
910
911                 SPL_SCHED(s);
912                 error = fr_inobj(data, &iter, IPFOBJ_GENITER);
913                 if (error == 0) {
914                         token = ipf_findtoken(iter.igi_type, uid, ctx);
915                         if (token != NULL) {
916                                 error  = nat_iterator(token, &iter);
917                         }
918                         RWLOCK_EXIT(&ipf_tokens);
919                 }
920                 SPL_X(s);
921                 break;
922             }
923
924         case SIOCIPFDELTOK :
925                 error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
926                 if (error == 0) {
927                         SPL_SCHED(s);
928                         error = ipf_deltoken(arg, uid, ctx);
929                         SPL_X(s);
930                 } else {
931                         error = EFAULT;
932                 }
933                 break;
934
935         case SIOCGTQTAB :
936                 error = fr_outobj(data, nat_tqb, IPFOBJ_STATETQTAB);
937                 break;
938
939         case SIOCGTABL :
940                 error = nat_gettable(data);
941                 break;
942
943         default :
944                 error = EINVAL;
945                 break;
946         }
947 done:
948         if (nt != NULL)
949                 KFREE(nt);
950         return error;
951 }
952
953
954 /* ------------------------------------------------------------------------ */
955 /* Function:    nat_siocaddnat                                              */
956 /* Returns:     int - 0 == success, != 0 == failure                         */
957 /* Parameters:  n(I)       - pointer to new NAT rule                        */
958 /*              np(I)      - pointer to where to insert new NAT rule        */
959 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
960 /* Mutex Locks: ipf_natio                                                   */
961 /*                                                                          */
962 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
963 /* from information passed to the kernel, then add it  to the appropriate   */
964 /* NAT rule table(s).                                                       */
965 /* ------------------------------------------------------------------------ */
966 static int nat_siocaddnat(n, np, getlock)
967 ipnat_t *n, **np;
968 int getlock;
969 {
970         int error = 0, i, j;
971
972         if (nat_resolverule(n) != 0)
973                 return ENOENT;
974
975         if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
976                 return EINVAL;
977
978         n->in_use = 0;
979         if (n->in_redir & NAT_MAPBLK)
980                 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
981         else if (n->in_flags & IPN_AUTOPORTMAP)
982                 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
983         else if (n->in_flags & IPN_IPRANGE)
984                 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
985         else if (n->in_flags & IPN_SPLIT)
986                 n->in_space = 2;
987         else if (n->in_outmsk != 0)
988                 n->in_space = ~ntohl(n->in_outmsk);
989         else
990                 n->in_space = 1;
991
992         /*
993          * Calculate the number of valid IP addresses in the output
994          * mapping range.  In all cases, the range is inclusive of
995          * the start and ending IP addresses.
996          * If to a CIDR address, lose 2: broadcast + network address
997          *                               (so subtract 1)
998          * If to a range, add one.
999          * If to a single IP address, set to 1.
1000          */
1001         if (n->in_space) {
1002                 if ((n->in_flags & IPN_IPRANGE) != 0)
1003                         n->in_space += 1;
1004                 else
1005                         n->in_space -= 1;
1006         } else
1007                 n->in_space = 1;
1008
1009         if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
1010             ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
1011                 n->in_nip = ntohl(n->in_outip) + 1;
1012         else if ((n->in_flags & IPN_SPLIT) &&
1013                  (n->in_redir & NAT_REDIRECT))
1014                 n->in_nip = ntohl(n->in_inip);
1015         else
1016                 n->in_nip = ntohl(n->in_outip);
1017         if (n->in_redir & NAT_MAP) {
1018                 n->in_pnext = ntohs(n->in_pmin);
1019                 /*
1020                  * Multiply by the number of ports made available.
1021                  */
1022                 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1023                         n->in_space *= (ntohs(n->in_pmax) -
1024                                         ntohs(n->in_pmin) + 1);
1025                         /*
1026                          * Because two different sources can map to
1027                          * different destinations but use the same
1028                          * local IP#/port #.
1029                          * If the result is smaller than in_space, then
1030                          * we may have wrapped around 32bits.
1031                          */
1032                         i = n->in_inmsk;
1033                         if ((i != 0) && (i != 0xffffffff)) {
1034                                 j = n->in_space * (~ntohl(i) + 1);
1035                                 if (j >= n->in_space)
1036                                         n->in_space = j;
1037                                 else
1038                                         n->in_space = 0xffffffff;
1039                         }
1040                 }
1041                 /*
1042                  * If no protocol is specified, multiple by 256 to allow for
1043                  * at least one IP:IP mapping per protocol.
1044                  */
1045                 if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1046                                 j = n->in_space * 256;
1047                                 if (j >= n->in_space)
1048                                         n->in_space = j;
1049                                 else
1050                                         n->in_space = 0xffffffff;
1051                 }
1052         }
1053
1054         /* Otherwise, these fields are preset */
1055
1056         if (getlock) {
1057                 WRITE_ENTER(&ipf_nat);
1058         }
1059         n->in_next = NULL;
1060         *np = n;
1061
1062         if (n->in_age[0] != 0)
1063                 n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
1064
1065         if (n->in_age[1] != 0)
1066                 n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
1067
1068         if (n->in_redir & NAT_REDIRECT) {
1069                 n->in_flags &= ~IPN_NOTDST;
1070                 nat_addrdr(n);
1071         }
1072         if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1073                 n->in_flags &= ~IPN_NOTSRC;
1074                 nat_addnat(n);
1075         }
1076         MUTEX_INIT(&n->in_lock, "ipnat rule lock");
1077
1078         n = NULL;
1079         nat_stats.ns_rules++;
1080 #if SOLARIS && !defined(_INET_IP_STACK_H)
1081         pfil_delayed_copy = 0;
1082 #endif
1083         if (getlock) {
1084                 RWLOCK_EXIT(&ipf_nat);                  /* WRITE */
1085         }
1086
1087         return error;
1088 }
1089
1090
1091 /* ------------------------------------------------------------------------ */
1092 /* Function:    nat_resolvrule                                              */
1093 /* Returns:     Nil                                                         */
1094 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1095 /*                                                                          */
1096 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1097 /* from information passed to the kernel, then add it  to the appropriate   */
1098 /* NAT rule table(s).                                                       */
1099 /* ------------------------------------------------------------------------ */
1100 static int nat_resolverule(n)
1101 ipnat_t *n;
1102 {
1103         n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1104         n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
1105
1106         n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1107         if (n->in_ifnames[1][0] == '\0') {
1108                 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1109                 n->in_ifps[1] = n->in_ifps[0];
1110         } else {
1111                 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
1112         }
1113
1114         if (n->in_plabel[0] != '\0') {
1115                 n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1116                 if (n->in_apr == NULL)
1117                         return -1;
1118         }
1119         return 0;
1120 }
1121
1122
1123 /* ------------------------------------------------------------------------ */
1124 /* Function:    nat_siocdelnat                                              */
1125 /* Returns:     int - 0 == success, != 0 == failure                         */
1126 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1127 /*              np(I)      - pointer to where to insert new NAT rule        */
1128 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1129 /* Mutex Locks: ipf_natio                                                   */
1130 /*                                                                          */
1131 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1132 /* from information passed to the kernel, then add it  to the appropriate   */
1133 /* NAT rule table(s).                                                       */
1134 /* ------------------------------------------------------------------------ */
1135 static void nat_siocdelnat(n, np, getlock)
1136 ipnat_t *n, **np;
1137 int getlock;
1138 {
1139         if (getlock) {
1140                 WRITE_ENTER(&ipf_nat);
1141         }
1142         if (n->in_redir & NAT_REDIRECT)
1143                 nat_delrdr(n);
1144         if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1145                 nat_delnat(n);
1146         if (nat_list == NULL) {
1147                 nat_masks = 0;
1148                 rdr_masks = 0;
1149         }
1150
1151         if (n->in_tqehead[0] != NULL) {
1152                 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1153                         fr_freetimeoutqueue(n->in_tqehead[1]);
1154                 }
1155         }
1156
1157         if (n->in_tqehead[1] != NULL) {
1158                 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1159                         fr_freetimeoutqueue(n->in_tqehead[1]);
1160                 }
1161         }
1162
1163         *np = n->in_next;
1164
1165         if (n->in_use == 0) {
1166                 if (n->in_apr)
1167                         appr_free(n->in_apr);
1168                 MUTEX_DESTROY(&n->in_lock);
1169                 KFREE(n);
1170                 nat_stats.ns_rules--;
1171 #if SOLARIS && !defined(_INET_IP_STACK_H)
1172                 if (nat_stats.ns_rules == 0)
1173                         pfil_delayed_copy = 1;
1174 #endif
1175         } else {
1176                 n->in_flags |= IPN_DELETE;
1177                 n->in_next = NULL;
1178         }
1179         if (getlock) {
1180                 RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
1181         }
1182 }
1183
1184
1185 /* ------------------------------------------------------------------------ */
1186 /* Function:    fr_natgetsz                                                 */
1187 /* Returns:     int - 0 == success, != 0 is the error value.                */
1188 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1189 /*                        get the size of.                                  */
1190 /*                                                                          */
1191 /* Handle SIOCSTGSZ.                                                        */
1192 /* Return the size of the nat list entry to be copied back to user space.   */
1193 /* The size of the entry is stored in the ng_sz field and the enture natget */
1194 /* structure is copied back to the user.                                    */
1195 /* ------------------------------------------------------------------------ */
1196 static int fr_natgetsz(data, getlock)
1197 caddr_t data;
1198 int getlock;
1199 {
1200         ap_session_t *aps;
1201         nat_t *nat, *n;
1202         natget_t ng;
1203
1204         if (BCOPYIN(data, &ng, sizeof(ng)) != 0)
1205                 return EFAULT;
1206
1207         if (getlock) {
1208                 READ_ENTER(&ipf_nat);
1209         }
1210
1211         nat = ng.ng_ptr;
1212         if (!nat) {
1213                 nat = nat_instances;
1214                 ng.ng_sz = 0;
1215                 /*
1216                  * Empty list so the size returned is 0.  Simple.
1217                  */
1218                 if (nat == NULL) {
1219                         if (getlock) {
1220                                 RWLOCK_EXIT(&ipf_nat);
1221                         }
1222                         if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1223                                 return EFAULT;
1224                         return 0;
1225                 }
1226         } else {
1227                 /*
1228                  * Make sure the pointer we're copying from exists in the
1229                  * current list of entries.  Security precaution to prevent
1230                  * copying of random kernel data.
1231                  */
1232                 for (n = nat_instances; n; n = n->nat_next)
1233                         if (n == nat)
1234                                 break;
1235                 if (n == NULL) {
1236                         if (getlock) {
1237                                 RWLOCK_EXIT(&ipf_nat);
1238                         }
1239                         return ESRCH;
1240                 }
1241         }
1242
1243         /*
1244          * Incluse any space required for proxy data structures.
1245          */
1246         ng.ng_sz = sizeof(nat_save_t);
1247         aps = nat->nat_aps;
1248         if (aps != NULL) {
1249                 ng.ng_sz += sizeof(ap_session_t) - 4;
1250                 if (aps->aps_data != 0)
1251                         ng.ng_sz += aps->aps_psiz;
1252         }
1253         if (getlock) {
1254                 RWLOCK_EXIT(&ipf_nat);
1255         }
1256
1257         if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1258                 return EFAULT;
1259         return 0;
1260 }
1261
1262
1263 /* ------------------------------------------------------------------------ */
1264 /* Function:    fr_natgetent                                                */
1265 /* Returns:     int - 0 == success, != 0 is the error value.                */
1266 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1267 /*                        to NAT structure to copy out.                     */
1268 /*                                                                          */
1269 /* Handle SIOCSTGET.                                                        */
1270 /* Copies out NAT entry to user space.  Any additional data held for a      */
1271 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1272 /* ------------------------------------------------------------------------ */
1273 static int fr_natgetent(data, getlock)
1274 caddr_t data;
1275 int getlock;
1276 {
1277         int error, outsize;
1278         ap_session_t *aps;
1279         nat_save_t *ipn, ipns;
1280         nat_t *n, *nat;
1281
1282         error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1283         if (error != 0)
1284                 return error;
1285
1286         if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1287                 return EINVAL;
1288
1289         KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1290         if (ipn == NULL)
1291                 return ENOMEM;
1292
1293         if (getlock) {
1294                 READ_ENTER(&ipf_nat);
1295         }
1296
1297         ipn->ipn_dsize = ipns.ipn_dsize;
1298         nat = ipns.ipn_next;
1299         if (nat == NULL) {
1300                 nat = nat_instances;
1301                 if (nat == NULL) {
1302                         if (nat_instances == NULL)
1303                                 error = ENOENT;
1304                         goto finished;
1305                 }
1306         } else {
1307                 /*
1308                  * Make sure the pointer we're copying from exists in the
1309                  * current list of entries.  Security precaution to prevent
1310                  * copying of random kernel data.
1311                  */
1312                 for (n = nat_instances; n; n = n->nat_next)
1313                         if (n == nat)
1314                                 break;
1315                 if (n == NULL) {
1316                         error = ESRCH;
1317                         goto finished;
1318                 }
1319         }
1320         ipn->ipn_next = nat->nat_next;
1321
1322         /*
1323          * Copy the NAT structure.
1324          */
1325         bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1326
1327         /*
1328          * If we have a pointer to the NAT rule it belongs to, save that too.
1329          */
1330         if (nat->nat_ptr != NULL)
1331                 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1332                       sizeof(ipn->ipn_ipnat));
1333
1334         /*
1335          * If we also know the NAT entry has an associated filter rule,
1336          * save that too.
1337          */
1338         if (nat->nat_fr != NULL)
1339                 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1340                       sizeof(ipn->ipn_fr));
1341
1342         /*
1343          * Last but not least, if there is an application proxy session set
1344          * up for this NAT entry, then copy that out too, including any
1345          * private data saved along side it by the proxy.
1346          */
1347         aps = nat->nat_aps;
1348         outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1349         if (aps != NULL) {
1350                 char *s;
1351
1352                 if (outsize < sizeof(*aps)) {
1353                         error = ENOBUFS;
1354                         goto finished;
1355                 }
1356
1357                 s = ipn->ipn_data;
1358                 bcopy((char *)aps, s, sizeof(*aps));
1359                 s += sizeof(*aps);
1360                 outsize -= sizeof(*aps);
1361                 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1362                         bcopy(aps->aps_data, s, aps->aps_psiz);
1363                 else
1364                         error = ENOBUFS;
1365         }
1366         if (error == 0) {
1367                 if (getlock) {
1368                         RWLOCK_EXIT(&ipf_nat);
1369                         getlock = 0;
1370                 }
1371                 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1372         }
1373
1374 finished:
1375         if (getlock) {
1376                 RWLOCK_EXIT(&ipf_nat);
1377         }
1378         if (ipn != NULL) {
1379                 KFREES(ipn, ipns.ipn_dsize);
1380         }
1381         return error;
1382 }
1383
1384
1385 /* ------------------------------------------------------------------------ */
1386 /* Function:    fr_natputent                                                */
1387 /* Returns:     int - 0 == success, != 0 is the error value.                */
1388 /* Parameters:  data(I) -     pointer to natget structure with NAT          */
1389 /*                            structure information to load into the kernel */
1390 /*              getlock(I) - flag indicating whether or not a write lock    */
1391 /*                           on ipf_nat is already held.                    */
1392 /*                                                                          */
1393 /* Handle SIOCSTPUT.                                                        */
1394 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1395 /* firewall rule data structures, if pointers to them indicate so.          */
1396 /* ------------------------------------------------------------------------ */
1397 static int fr_natputent(data, getlock)
1398 caddr_t data;
1399 int getlock;
1400 {
1401         nat_save_t ipn, *ipnn;
1402         ap_session_t *aps;
1403         nat_t *n, *nat;
1404         frentry_t *fr;
1405         fr_info_t fin;
1406         ipnat_t *in;
1407         int error;
1408
1409         error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1410         if (error != 0)
1411                 return error;
1412
1413         /*
1414          * Initialise early because of code at junkput label.
1415          */
1416         in = NULL;
1417         aps = NULL;
1418         nat = NULL;
1419         ipnn = NULL;
1420         fr = NULL;
1421
1422         /*
1423          * New entry, copy in the rest of the NAT entry if it's size is more
1424          * than just the nat_t structure.
1425          */
1426         if (ipn.ipn_dsize > sizeof(ipn)) {
1427                 if (ipn.ipn_dsize > 81920) {
1428                         error = ENOMEM;
1429                         goto junkput;
1430                 }
1431
1432                 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1433                 if (ipnn == NULL)
1434                         return ENOMEM;
1435
1436                 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1437                 if (error != 0) {
1438                         error = EFAULT;
1439                         goto junkput;
1440                 }
1441         } else
1442                 ipnn = &ipn;
1443
1444         KMALLOC(nat, nat_t *);
1445         if (nat == NULL) {
1446                 error = ENOMEM;
1447                 goto junkput;
1448         }
1449
1450         bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1451         /*
1452          * Initialize all these so that nat_delete() doesn't cause a crash.
1453          */
1454         bzero((char *)nat, offsetof(struct nat, nat_tqe));
1455         nat->nat_tqe.tqe_pnext = NULL;
1456         nat->nat_tqe.tqe_next = NULL;
1457         nat->nat_tqe.tqe_ifq = NULL;
1458         nat->nat_tqe.tqe_parent = nat;
1459
1460         /*
1461          * Restore the rule associated with this nat session
1462          */
1463         in = ipnn->ipn_nat.nat_ptr;
1464         if (in != NULL) {
1465                 KMALLOC(in, ipnat_t *);
1466                 nat->nat_ptr = in;
1467                 if (in == NULL) {
1468                         error = ENOMEM;
1469                         goto junkput;
1470                 }
1471                 bzero((char *)in, offsetof(struct ipnat, in_next6));
1472                 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1473                 in->in_use = 1;
1474                 in->in_flags |= IPN_DELETE;
1475
1476                 ATOMIC_INC(nat_stats.ns_rules);
1477
1478                 if (nat_resolverule(in) != 0) {
1479                         error = ESRCH;
1480                         goto junkput;
1481                 }
1482         }
1483
1484         /*
1485          * Check that the NAT entry doesn't already exist in the kernel.
1486          *
1487          * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry.  To do
1488          * this, we check to see if the inbound combination of addresses and
1489          * ports is already known.  Similar logic is applied for NAT_INBOUND.
1490          * 
1491          */
1492         bzero((char *)&fin, sizeof(fin));
1493         fin.fin_p = nat->nat_p;
1494         if (nat->nat_dir == NAT_OUTBOUND) {
1495                 fin.fin_ifp = nat->nat_ifps[0];
1496                 fin.fin_data[0] = ntohs(nat->nat_oport);
1497                 fin.fin_data[1] = ntohs(nat->nat_outport);
1498                 if (getlock) {
1499                         READ_ENTER(&ipf_nat);
1500                 }
1501                 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1502                                  nat->nat_oip, nat->nat_inip);
1503                 if (getlock) {
1504                         RWLOCK_EXIT(&ipf_nat);
1505                 }
1506                 if (n != NULL) {
1507                         error = EEXIST;
1508                         goto junkput;
1509                 }
1510         } else if (nat->nat_dir == NAT_INBOUND) {
1511                 fin.fin_ifp = nat->nat_ifps[0];
1512                 fin.fin_data[0] = ntohs(nat->nat_outport);
1513                 fin.fin_data[1] = ntohs(nat->nat_oport);
1514                 if (getlock) {
1515                         READ_ENTER(&ipf_nat);
1516                 }
1517                 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1518                                   nat->nat_outip, nat->nat_oip);
1519                 if (getlock) {
1520                         RWLOCK_EXIT(&ipf_nat);
1521                 }
1522                 if (n != NULL) {
1523                         error = EEXIST;
1524                         goto junkput;
1525                 }
1526         } else {
1527                 error = EINVAL;
1528                 goto junkput;
1529         }
1530
1531         /*
1532          * Restore ap_session_t structure.  Include the private data allocated
1533          * if it was there.
1534          */
1535         aps = nat->nat_aps;
1536         if (aps != NULL) {
1537                 KMALLOC(aps, ap_session_t *);
1538                 nat->nat_aps = aps;
1539                 if (aps == NULL) {
1540                         error = ENOMEM;
1541                         goto junkput;
1542                 }
1543                 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1544                 if (in != NULL)
1545                         aps->aps_apr = in->in_apr;
1546                 else
1547                         aps->aps_apr = NULL;
1548                 if (aps->aps_psiz != 0) {
1549                         if (aps->aps_psiz > 81920) {
1550                                 error = ENOMEM;
1551                                 goto junkput;
1552                         }
1553                         KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1554                         if (aps->aps_data == NULL) {
1555                                 error = ENOMEM;
1556                                 goto junkput;
1557                         }
1558                         bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1559                               aps->aps_psiz);
1560                 } else {
1561                         aps->aps_psiz = 0;
1562                         aps->aps_data = NULL;
1563                 }
1564         }
1565
1566         /*
1567          * If there was a filtering rule associated with this entry then
1568          * build up a new one.
1569          */
1570         fr = nat->nat_fr;
1571         if (fr != NULL) {
1572                 if ((nat->nat_flags & SI_NEWFR) != 0) {
1573                         KMALLOC(fr, frentry_t *);
1574                         nat->nat_fr = fr;
1575                         if (fr == NULL) {
1576                                 error = ENOMEM;
1577                                 goto junkput;
1578                         }
1579                         ipnn->ipn_nat.nat_fr = fr;
1580                         fr->fr_ref = 1;
1581                         (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1582                         bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1583
1584                         fr->fr_ref = 1;
1585                         fr->fr_dsize = 0;
1586                         fr->fr_data = NULL;
1587                         fr->fr_type = FR_T_NONE;
1588
1589                         MUTEX_NUKE(&fr->fr_lock);
1590                         MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1591                 } else {
1592                         if (getlock) {
1593                                 READ_ENTER(&ipf_nat);
1594                         }
1595                         for (n = nat_instances; n; n = n->nat_next)
1596                                 if (n->nat_fr == fr)
1597                                         break;
1598
1599                         if (n != NULL) {
1600                                 MUTEX_ENTER(&fr->fr_lock);
1601                                 fr->fr_ref++;
1602                                 MUTEX_EXIT(&fr->fr_lock);
1603                         }
1604                         if (getlock) {
1605                                 RWLOCK_EXIT(&ipf_nat);
1606                         }
1607
1608                         if (!n) {
1609                                 error = ESRCH;
1610                                 goto junkput;
1611                         }
1612                 }
1613         }
1614
1615         if (ipnn != &ipn) {
1616                 KFREES(ipnn, ipn.ipn_dsize);
1617                 ipnn = NULL;
1618         }
1619
1620         if (getlock) {
1621                 WRITE_ENTER(&ipf_nat);
1622         }
1623         error = nat_insert(nat, nat->nat_rev);
1624         if ((error == 0) && (aps != NULL)) {
1625                 aps->aps_next = ap_sess_list;
1626                 ap_sess_list = aps;
1627         }
1628         if (getlock) {
1629                 RWLOCK_EXIT(&ipf_nat);
1630         }
1631
1632         if (error == 0)
1633                 return 0;
1634
1635         error = ENOMEM;
1636
1637 junkput:
1638         if (fr != NULL)
1639                 (void) fr_derefrule(&fr);
1640
1641         if ((ipnn != NULL) && (ipnn != &ipn)) {
1642                 KFREES(ipnn, ipn.ipn_dsize);
1643         }
1644         if (nat != NULL) {
1645                 if (aps != NULL) {
1646                         if (aps->aps_data != NULL) {
1647                                 KFREES(aps->aps_data, aps->aps_psiz);
1648                         }
1649                         KFREE(aps);
1650                 }
1651                 if (in != NULL) {
1652                         if (in->in_apr)
1653                                 appr_free(in->in_apr);
1654                         KFREE(in);
1655                 }
1656                 KFREE(nat);
1657         }
1658         return error;
1659 }
1660
1661
1662 /* ------------------------------------------------------------------------ */
1663 /* Function:    nat_delete                                                  */
1664 /* Returns:     Nil                                                         */
1665 /* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1666 /*              logtype(I) - type of LOG record to create before deleting   */
1667 /* Write Lock:  ipf_nat                                                     */
1668 /*                                                                          */
1669 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1670 /* enabled then generate a NAT log record for this event.                   */
1671 /* ------------------------------------------------------------------------ */
1672 void nat_delete(nat, logtype)
1673 struct nat *nat;
1674 int logtype;
1675 {
1676         struct ipnat *ipn;
1677         int removed = 0;
1678
1679         if (logtype != 0 && nat_logging != 0)
1680                 nat_log(nat, logtype);
1681
1682         /*
1683          * Take it as a general indication that all the pointers are set if
1684          * nat_pnext is set.
1685          */
1686         if (nat->nat_pnext != NULL) {
1687                 removed = 1;
1688
1689                 nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1690                 nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1691
1692                 *nat->nat_pnext = nat->nat_next;
1693                 if (nat->nat_next != NULL) {
1694                         nat->nat_next->nat_pnext = nat->nat_pnext;
1695                         nat->nat_next = NULL;
1696                 }
1697                 nat->nat_pnext = NULL;
1698
1699                 *nat->nat_phnext[0] = nat->nat_hnext[0];
1700                 if (nat->nat_hnext[0] != NULL) {
1701                         nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1702                         nat->nat_hnext[0] = NULL;
1703                 }
1704                 nat->nat_phnext[0] = NULL;
1705
1706                 *nat->nat_phnext[1] = nat->nat_hnext[1];
1707                 if (nat->nat_hnext[1] != NULL) {
1708                         nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1709                         nat->nat_hnext[1] = NULL;
1710                 }
1711                 nat->nat_phnext[1] = NULL;
1712
1713                 if ((nat->nat_flags & SI_WILDP) != 0)
1714                         nat_stats.ns_wilds--;
1715         }
1716
1717         if (nat->nat_me != NULL) {
1718                 *nat->nat_me = NULL;
1719                 nat->nat_me = NULL;
1720         }
1721
1722         if (nat->nat_tqe.tqe_ifq != NULL)
1723                 fr_deletequeueentry(&nat->nat_tqe);
1724
1725         if (logtype == NL_EXPIRE)
1726                 nat_stats.ns_expire++;
1727
1728         MUTEX_ENTER(&nat->nat_lock);
1729         /*
1730          * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1731          * This happens when a nat'd packet is blocked and we want to throw
1732          * away the NAT session.
1733          */
1734         if (logtype == NL_DESTROY) {
1735                 if (nat->nat_ref > 2) {
1736                         nat->nat_ref -= 2;
1737                         MUTEX_EXIT(&nat->nat_lock);
1738                         if (removed)
1739                                 nat_stats.ns_orphans++;
1740                         return;
1741                 }
1742         } else if (nat->nat_ref > 1) {
1743                 nat->nat_ref--;
1744                 MUTEX_EXIT(&nat->nat_lock);
1745                 if (removed)
1746                         nat_stats.ns_orphans++;
1747                 return;
1748         }
1749         MUTEX_EXIT(&nat->nat_lock);
1750
1751         /*
1752          * At this point, nat_ref is 1, doing "--" would make it 0..
1753          */
1754         nat->nat_ref = 0;
1755         if (!removed)
1756                 nat_stats.ns_orphans--;
1757
1758 #ifdef  IPFILTER_SYNC
1759         if (nat->nat_sync)
1760                 ipfsync_del(nat->nat_sync);
1761 #endif
1762
1763         if (nat->nat_fr != NULL)
1764                 (void) fr_derefrule(&nat->nat_fr);
1765
1766         if (nat->nat_hm != NULL)
1767                 fr_hostmapdel(&nat->nat_hm);
1768
1769         /*
1770          * If there is an active reference from the nat entry to its parent
1771          * rule, decrement the rule's reference count and free it too if no
1772          * longer being used.
1773          */
1774         ipn = nat->nat_ptr;
1775         if (ipn != NULL) {
1776                 fr_ipnatderef(&ipn);
1777         }
1778
1779         MUTEX_DESTROY(&nat->nat_lock);
1780
1781         aps_free(nat->nat_aps);
1782         nat_stats.ns_inuse--;
1783
1784         /*
1785          * If there's a fragment table entry too for this nat entry, then
1786          * dereference that as well.  This is after nat_lock is released
1787          * because of Tru64.
1788          */
1789         fr_forgetnat((void *)nat);
1790
1791         KFREE(nat);
1792 }
1793
1794
1795 /* ------------------------------------------------------------------------ */
1796 /* Function:    nat_flushtable                                              */
1797 /* Returns:     int - number of NAT rules deleted                           */
1798 /* Parameters:  Nil                                                         */
1799 /*                                                                          */
1800 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1801 /* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1802 /* ------------------------------------------------------------------------ */
1803 /*
1804  * nat_flushtable - clear the NAT table of all mapping entries.
1805  */
1806 static int nat_flushtable()
1807 {
1808         nat_t *nat;
1809         int j = 0;
1810
1811         /*
1812          * ALL NAT mappings deleted, so lets just make the deletions
1813          * quicker.
1814          */
1815         if (nat_table[0] != NULL)
1816                 bzero((char *)nat_table[0],
1817                       sizeof(nat_table[0]) * ipf_nattable_sz);
1818         if (nat_table[1] != NULL)
1819                 bzero((char *)nat_table[1],
1820                       sizeof(nat_table[1]) * ipf_nattable_sz);
1821
1822         while ((nat = nat_instances) != NULL) {
1823                 nat_delete(nat, NL_FLUSH);
1824                 j++;
1825         }
1826
1827         nat_stats.ns_inuse = 0;
1828         return j;
1829 }
1830
1831
1832 /* ------------------------------------------------------------------------ */
1833 /* Function:    nat_clearlist                                               */
1834 /* Returns:     int - number of NAT/RDR rules deleted                       */
1835 /* Parameters:  Nil                                                         */
1836 /*                                                                          */
1837 /* Delete all rules in the current list of rules.  There is nothing elegant */
1838 /* about this cleanup: simply free all entries on the list of rules and     */
1839 /* clear out the tables used for hashed NAT rule lookups.                   */
1840 /* ------------------------------------------------------------------------ */
1841 static int nat_clearlist()
1842 {
1843         ipnat_t *n, **np = &nat_list;
1844         int i = 0;
1845
1846         if (nat_rules != NULL)
1847                 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1848         if (rdr_rules != NULL)
1849                 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1850
1851         while ((n = *np) != NULL) {
1852                 *np = n->in_next;
1853                 if (n->in_use == 0) {
1854                         if (n->in_apr != NULL)
1855                                 appr_free(n->in_apr);
1856                         MUTEX_DESTROY(&n->in_lock);
1857                         KFREE(n);
1858                         nat_stats.ns_rules--;
1859                 } else {
1860                         n->in_flags |= IPN_DELETE;
1861                         n->in_next = NULL;
1862                 }
1863                 i++;
1864         }
1865 #if SOLARIS && !defined(_INET_IP_STACK_H)
1866         pfil_delayed_copy = 1;
1867 #endif
1868         nat_masks = 0;
1869         rdr_masks = 0;
1870         return i;
1871 }
1872
1873
1874 /* ------------------------------------------------------------------------ */
1875 /* Function:    nat_newmap                                                  */
1876 /* Returns:     int - -1 == error, 0 == success                             */
1877 /* Parameters:  fin(I) - pointer to packet information                      */
1878 /*              nat(I) - pointer to NAT entry                               */
1879 /*              ni(I)  - pointer to structure with misc. information needed */
1880 /*                       to create new NAT entry.                           */
1881 /*                                                                          */
1882 /* Given an empty NAT structure, populate it with new information about a   */
1883 /* new NAT session, as defined by the matching NAT rule.                    */
1884 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1885 /* to the new IP address for the translation.                               */
1886 /* ------------------------------------------------------------------------ */
1887 static INLINE int nat_newmap(fin, nat, ni)
1888 fr_info_t *fin;
1889 nat_t *nat;
1890 natinfo_t *ni;
1891 {
1892         u_short st_port, dport, sport, port, sp, dp;
1893         struct in_addr in, inb;
1894         hostmap_t *hm;
1895         u_32_t flags;
1896         u_32_t st_ip;
1897         ipnat_t *np;
1898         nat_t *natl;
1899         int l;
1900
1901         /*
1902          * If it's an outbound packet which doesn't match any existing
1903          * record, then create a new port
1904          */
1905         l = 0;
1906         hm = NULL;
1907         np = ni->nai_np;
1908         st_ip = np->in_nip;
1909         st_port = np->in_pnext;
1910         flags = ni->nai_flags;
1911         sport = ni->nai_sport;
1912         dport = ni->nai_dport;
1913
1914         /*
1915          * Do a loop until we either run out of entries to try or we find
1916          * a NAT mapping that isn't currently being used.  This is done
1917          * because the change to the source is not (usually) being fixed.
1918          */
1919         do {
1920                 port = 0;
1921                 in.s_addr = htonl(np->in_nip);
1922                 if (l == 0) {
1923                         /*
1924                          * Check to see if there is an existing NAT
1925                          * setup for this IP address pair.
1926                          */
1927                         hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1928                                          in, 0);
1929                         if (hm != NULL)
1930                                 in.s_addr = hm->hm_mapip.s_addr;
1931                 } else if ((l == 1) && (hm != NULL)) {
1932                         fr_hostmapdel(&hm);
1933                 }
1934                 in.s_addr = ntohl(in.s_addr);
1935
1936                 nat->nat_hm = hm;
1937
1938                 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1939                         if (l > 0)
1940                                 return -1;
1941                 }
1942
1943                 if (np->in_redir == NAT_BIMAP &&
1944                     np->in_inmsk == np->in_outmsk) {
1945                         /*
1946                          * map the address block in a 1:1 fashion
1947                          */
1948                         in.s_addr = np->in_outip;
1949                         in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1950                         in.s_addr = ntohl(in.s_addr);
1951
1952                 } else if (np->in_redir & NAT_MAPBLK) {
1953                         if ((l >= np->in_ppip) || ((l > 0) &&
1954                              !(flags & IPN_TCPUDP)))
1955                                 return -1;
1956                         /*
1957                          * map-block - Calculate destination address.
1958                          */
1959                         in.s_addr = ntohl(fin->fin_saddr);
1960                         in.s_addr &= ntohl(~np->in_inmsk);
1961                         inb.s_addr = in.s_addr;
1962                         in.s_addr /= np->in_ippip;
1963                         in.s_addr &= ntohl(~np->in_outmsk);
1964                         in.s_addr += ntohl(np->in_outip);
1965                         /*
1966                          * Calculate destination port.
1967                          */
1968                         if ((flags & IPN_TCPUDP) &&
1969                             (np->in_ppip != 0)) {
1970                                 port = ntohs(sport) + l;
1971                                 port %= np->in_ppip;
1972                                 port += np->in_ppip *
1973                                         (inb.s_addr % np->in_ippip);
1974                                 port += MAPBLK_MINPORT;
1975                                 port = htons(port);
1976                         }
1977
1978                 } else if ((np->in_outip == 0) &&
1979                            (np->in_outmsk == 0xffffffff)) {
1980                         /*
1981                          * 0/32 - use the interface's IP address.
1982                          */
1983                         if ((l > 0) ||
1984                             fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1985                                        &in, NULL) == -1)
1986                                 return -1;
1987                         in.s_addr = ntohl(in.s_addr);
1988
1989                 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1990                         /*
1991                          * 0/0 - use the original source address/port.
1992                          */
1993                         if (l > 0)
1994                                 return -1;
1995                         in.s_addr = ntohl(fin->fin_saddr);
1996
1997                 } else if ((np->in_outmsk != 0xffffffff) &&
1998                            (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
1999                         np->in_nip++;
2000
2001                 natl = NULL;
2002
2003                 if ((flags & IPN_TCPUDP) &&
2004                     ((np->in_redir & NAT_MAPBLK) == 0) &&
2005                     (np->in_flags & IPN_AUTOPORTMAP)) {
2006                         /*
2007                          * "ports auto" (without map-block)
2008                          */
2009                         if ((l > 0) && (l % np->in_ppip == 0)) {
2010                                 if (l > np->in_space) {
2011                                         return -1;
2012                                 } else if ((l > np->in_ppip) &&
2013                                            np->in_outmsk != 0xffffffff)
2014                                         np->in_nip++;
2015                         }
2016                         if (np->in_ppip != 0) {
2017                                 port = ntohs(sport);
2018                                 port += (l % np->in_ppip);
2019                                 port %= np->in_ppip;
2020                                 port += np->in_ppip *
2021                                         (ntohl(fin->fin_saddr) %
2022                                          np->in_ippip);
2023                                 port += MAPBLK_MINPORT;
2024                                 port = htons(port);
2025                         }
2026
2027                 } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2028                            (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2029                         /*
2030                          * Standard port translation.  Select next port.
2031                          */
2032                         port = htons(np->in_pnext++);
2033
2034                         if (np->in_pnext > ntohs(np->in_pmax)) {
2035                                 np->in_pnext = ntohs(np->in_pmin);
2036                                 if (np->in_outmsk != 0xffffffff)
2037                                         np->in_nip++;
2038                         }
2039                 }
2040
2041                 if (np->in_flags & IPN_IPRANGE) {
2042                         if (np->in_nip > ntohl(np->in_outmsk))
2043                                 np->in_nip = ntohl(np->in_outip);
2044                 } else {
2045                         if ((np->in_outmsk != 0xffffffff) &&
2046                             ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2047                             ntohl(np->in_outip))
2048                                 np->in_nip = ntohl(np->in_outip) + 1;
2049                 }
2050
2051                 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2052                         port = sport;
2053
2054                 /*
2055                  * Here we do a lookup of the connection as seen from
2056                  * the outside.  If an IP# pair already exists, try
2057                  * again.  So if you have A->B becomes C->B, you can
2058                  * also have D->E become C->E but not D->B causing
2059                  * another C->B.  Also take protocol and ports into
2060                  * account when determining whether a pre-existing
2061                  * NAT setup will cause an external conflict where
2062                  * this is appropriate.
2063                  */
2064                 inb.s_addr = htonl(in.s_addr);
2065                 sp = fin->fin_data[0];
2066                 dp = fin->fin_data[1];
2067                 fin->fin_data[0] = fin->fin_data[1];
2068                 fin->fin_data[1] = htons(port);
2069                 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2070                                     (u_int)fin->fin_p, fin->fin_dst, inb);
2071                 fin->fin_data[0] = sp;
2072                 fin->fin_data[1] = dp;
2073
2074                 /*
2075                  * Has the search wrapped around and come back to the
2076                  * start ?
2077                  */
2078                 if ((natl != NULL) &&
2079                     (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2080                     (np->in_nip != 0) && (st_ip == np->in_nip))
2081                         return -1;
2082                 l++;
2083         } while (natl != NULL);
2084
2085         if (np->in_space > 0)
2086                 np->in_space--;
2087
2088         /* Setup the NAT table */
2089         nat->nat_inip = fin->fin_src;
2090         nat->nat_outip.s_addr = htonl(in.s_addr);
2091         nat->nat_oip = fin->fin_dst;
2092         if (nat->nat_hm == NULL)
2093                 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2094                                           nat->nat_outip, 0);
2095
2096         /*
2097          * The ICMP checksum does not have a pseudo header containing
2098          * the IP addresses
2099          */
2100         ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2101         ni->nai_sum2 = LONG_SUM(in.s_addr);
2102         if ((flags & IPN_TCPUDP)) {
2103                 ni->nai_sum1 += ntohs(sport);
2104                 ni->nai_sum2 += ntohs(port);
2105         }
2106
2107         if (flags & IPN_TCPUDP) {
2108                 nat->nat_inport = sport;
2109                 nat->nat_outport = port;        /* sport */
2110                 nat->nat_oport = dport;
2111                 ((tcphdr_t *)fin->fin_dp)->th_sport = port;
2112         } else if (flags & IPN_ICMPQUERY) {
2113                 ((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2114                 nat->nat_inport = port;
2115                 nat->nat_outport = port;
2116         } else if (fin->fin_p == IPPROTO_GRE) {
2117 #if 0
2118                 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2119                 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2120                         nat->nat_oport = 0;/*fin->fin_data[1];*/
2121                         nat->nat_inport = 0;/*fin->fin_data[0];*/
2122                         nat->nat_outport = 0;/*fin->fin_data[0];*/
2123                         nat->nat_call[0] = fin->fin_data[0];
2124                         nat->nat_call[1] = fin->fin_data[0];
2125                 }
2126 #endif
2127         }
2128         ni->nai_ip.s_addr = in.s_addr;
2129         ni->nai_port = port;
2130         ni->nai_nport = dport;
2131         return 0;
2132 }
2133
2134
2135 /* ------------------------------------------------------------------------ */
2136 /* Function:    nat_newrdr                                                  */
2137 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2138 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2139 /* Parameters:  fin(I) - pointer to packet information                      */
2140 /*              nat(I) - pointer to NAT entry                               */
2141 /*              ni(I)  - pointer to structure with misc. information needed */
2142 /*                       to create new NAT entry.                           */
2143 /*                                                                          */
2144 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2145 /* to the new IP address for the translation.                               */
2146 /* ------------------------------------------------------------------------ */
2147 static INLINE int nat_newrdr(fin, nat, ni)
2148 fr_info_t *fin;
2149 nat_t *nat;
2150 natinfo_t *ni;
2151 {
2152         u_short nport, dport, sport;
2153         struct in_addr in, inb;
2154         u_short sp, dp;
2155         hostmap_t *hm;
2156         u_32_t flags;
2157         ipnat_t *np;
2158         nat_t *natl;
2159         int move;
2160
2161         move = 1;
2162         hm = NULL;
2163         in.s_addr = 0;
2164         np = ni->nai_np;
2165         flags = ni->nai_flags;
2166         sport = ni->nai_sport;
2167         dport = ni->nai_dport;
2168
2169         /*
2170          * If the matching rule has IPN_STICKY set, then we want to have the
2171          * same rule kick in as before.  Why would this happen?  If you have
2172          * a collection of rdr rules with "round-robin sticky", the current
2173          * packet might match a different one to the previous connection but
2174          * we want the same destination to be used.
2175          */
2176         if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
2177             ((np->in_flags & IPN_STICKY) != 0)) {
2178                 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2179                                  (u_32_t)dport);
2180                 if (hm != NULL) {
2181                         in.s_addr = ntohl(hm->hm_mapip.s_addr);
2182                         np = hm->hm_ipnat;
2183                         ni->nai_np = np;
2184                         move = 0;
2185                 }
2186         }
2187
2188         /*
2189          * Otherwise, it's an inbound packet. Most likely, we don't
2190          * want to rewrite source ports and source addresses. Instead,
2191          * we want to rewrite to a fixed internal address and fixed
2192          * internal port.
2193          */
2194         if (np->in_flags & IPN_SPLIT) {
2195                 in.s_addr = np->in_nip;
2196
2197                 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2198                         hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst,
2199                                          in, (u_32_t)dport);
2200                         if (hm != NULL) {
2201                                 in.s_addr = hm->hm_mapip.s_addr;
2202                                 move = 0;
2203                         }
2204                 }
2205
2206                 if (hm == NULL || hm->hm_ref == 1) {
2207                         if (np->in_inip == htonl(in.s_addr)) {
2208                                 np->in_nip = ntohl(np->in_inmsk);
2209                                 move = 0;
2210                         } else {
2211                                 np->in_nip = ntohl(np->in_inip);
2212                         }
2213                 }
2214
2215         } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2216                 /*
2217                  * 0/32 - use the interface's IP address.
2218                  */
2219                 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2220                         return -1;
2221                 in.s_addr = ntohl(in.s_addr);
2222
2223         } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2224                 /*
2225                  * 0/0 - use the original destination address/port.
2226                  */
2227                 in.s_addr = ntohl(fin->fin_daddr);
2228
2229         } else if (np->in_redir == NAT_BIMAP &&
2230                    np->in_inmsk == np->in_outmsk) {
2231                 /*
2232                  * map the address block in a 1:1 fashion
2233                  */
2234                 in.s_addr = np->in_inip;
2235                 in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2236                 in.s_addr = ntohl(in.s_addr);
2237         } else {
2238                 in.s_addr = ntohl(np->in_inip);
2239         }
2240
2241         if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2242                 nport = dport;
2243         else {
2244                 /*
2245                  * Whilst not optimized for the case where
2246                  * pmin == pmax, the gain is not significant.
2247                  */
2248                 if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2249                     (np->in_pmin != np->in_pmax)) {
2250                         nport = ntohs(dport) - ntohs(np->in_pmin) +
2251                                 ntohs(np->in_pnext);
2252                         nport = htons(nport);
2253                 } else
2254                         nport = np->in_pnext;
2255         }
2256
2257         /*
2258          * When the redirect-to address is set to 0.0.0.0, just
2259          * assume a blank `forwarding' of the packet.  We don't
2260          * setup any translation for this either.
2261          */
2262         if (in.s_addr == 0) {
2263                 if (nport == dport)
2264                         return -1;
2265                 in.s_addr = ntohl(fin->fin_daddr);
2266         }
2267
2268         /*
2269          * Check to see if this redirect mapping already exists and if
2270          * it does, return "failure" (allowing it to be created will just
2271          * cause one or both of these "connections" to stop working.)
2272          */
2273         inb.s_addr = htonl(in.s_addr);
2274         sp = fin->fin_data[0];
2275         dp = fin->fin_data[1];
2276         fin->fin_data[1] = fin->fin_data[0];
2277         fin->fin_data[0] = ntohs(nport);
2278         natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2279                              (u_int)fin->fin_p, inb, fin->fin_src);
2280         fin->fin_data[0] = sp;
2281         fin->fin_data[1] = dp;
2282         if (natl != NULL)
2283                 return -1;
2284
2285         nat->nat_inip.s_addr = htonl(in.s_addr);
2286         nat->nat_outip = fin->fin_dst;
2287         nat->nat_oip = fin->fin_src;
2288         if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
2289                 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, in,
2290                                           (u_32_t)dport);
2291
2292         ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2293         ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2294
2295         ni->nai_ip.s_addr = in.s_addr;
2296         ni->nai_nport = nport;
2297         ni->nai_port = sport;
2298
2299         if (flags & IPN_TCPUDP) {
2300                 nat->nat_inport = nport;
2301                 nat->nat_outport = dport;
2302                 nat->nat_oport = sport;
2303                 ((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2304         } else if (flags & IPN_ICMPQUERY) {
2305                 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2306                 nat->nat_inport = nport;
2307                 nat->nat_outport = nport;
2308         } else if (fin->fin_p == IPPROTO_GRE) {
2309 #if 0
2310                 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2311                 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2312                         nat->nat_call[0] = fin->fin_data[0];
2313                         nat->nat_call[1] = fin->fin_data[1];
2314                         nat->nat_oport = 0; /*fin->fin_data[0];*/
2315                         nat->nat_inport = 0; /*fin->fin_data[1];*/
2316                         nat->nat_outport = 0; /*fin->fin_data[1];*/
2317                 }
2318 #endif
2319         }
2320
2321         return move;
2322 }
2323
2324 /* ------------------------------------------------------------------------ */
2325 /* Function:    nat_new                                                     */
2326 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2327 /*                       else pointer to new NAT structure                  */
2328 /* Parameters:  fin(I)       - pointer to packet information                */
2329 /*              np(I)        - pointer to NAT rule                          */
2330 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2331 /*              flags(I)     - flags describing the current packet          */
2332 /*              direction(I) - direction of packet (in/out)                 */
2333 /* Write Lock:  ipf_nat                                                     */
2334 /*                                                                          */
2335 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2336 /* in any way.                                                              */
2337 /*                                                                          */
2338 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2339 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2340 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2341 /* and (3) building that structure and putting it into the NAT table(s).    */
2342 /*                                                                          */
2343 /* NOTE: natsave should NOT be used top point back to an ipstate_t struct   */
2344 /*       as it can result in memory being corrupted.                        */
2345 /* ------------------------------------------------------------------------ */
2346 nat_t *nat_new(fin, np, natsave, flags, direction)
2347 fr_info_t *fin;
2348 ipnat_t *np;
2349 nat_t **natsave;
2350 u_int flags;
2351 int direction;
2352 {
2353         u_short port = 0, sport = 0, dport = 0, nport = 0;
2354         tcphdr_t *tcp = NULL;
2355         hostmap_t *hm = NULL;
2356         struct in_addr in;
2357         nat_t *nat, *natl;
2358         u_int nflags;
2359         natinfo_t ni;
2360         u_32_t sumd;
2361         int move;
2362 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2363         qpktinfo_t *qpi = fin->fin_qpi;
2364 #endif
2365
2366         if (nat_stats.ns_inuse >= ipf_nattable_max) {
2367                 nat_stats.ns_memfail++;
2368                 fr_nat_doflush = 1;
2369                 return NULL;
2370         }
2371
2372         move = 1;
2373         nflags = np->in_flags & flags;
2374         nflags &= NAT_FROMRULE;
2375
2376         ni.nai_np = np;
2377         ni.nai_nflags = nflags;
2378         ni.nai_flags = flags;
2379         ni.nai_dport = 0;
2380         ni.nai_sport = 0;
2381
2382         /* Give me a new nat */
2383         KMALLOC(nat, nat_t *);
2384         if (nat == NULL) {
2385                 nat_stats.ns_memfail++;
2386                 /*
2387                  * Try to automatically tune the max # of entries in the
2388                  * table allowed to be less than what will cause kmem_alloc()
2389                  * to fail and try to eliminate panics due to out of memory
2390                  * conditions arising.
2391                  */
2392                 if (ipf_nattable_max > ipf_nattable_sz) {
2393                         ipf_nattable_max = nat_stats.ns_inuse - 100;
2394                         printf("ipf_nattable_max reduced to %d\n",
2395                                 ipf_nattable_max);
2396                 }
2397                 return NULL;
2398         }
2399
2400         if (flags & IPN_TCPUDP) {
2401                 tcp = fin->fin_dp;
2402                 ni.nai_sport = htons(fin->fin_sport);
2403                 ni.nai_dport = htons(fin->fin_dport);
2404         } else if (flags & IPN_ICMPQUERY) {
2405                 /*
2406                  * In the ICMP query NAT code, we translate the ICMP id fields
2407                  * to make them unique. This is indepedent of the ICMP type
2408                  * (e.g. in the unlikely event that a host sends an echo and
2409                  * an tstamp request with the same id, both packets will have
2410                  * their ip address/id field changed in the same way).
2411                  */
2412                 /* The icmp_id field is used by the sender to identify the
2413                  * process making the icmp request. (the receiver justs
2414                  * copies it back in its response). So, it closely matches
2415                  * the concept of source port. We overlay sport, so we can
2416                  * maximally reuse the existing code.
2417                  */
2418                 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2419                 ni.nai_dport = ni.nai_sport;
2420         }
2421
2422         bzero((char *)nat, sizeof(*nat));
2423         nat->nat_flags = flags;
2424         nat->nat_redir = np->in_redir;
2425
2426         if ((flags & NAT_SLAVE) == 0) {
2427                 MUTEX_ENTER(&ipf_nat_new);
2428         }
2429
2430         /*
2431          * Search the current table for a match.
2432          */
2433         if (direction == NAT_OUTBOUND) {
2434                 /*
2435                  * We can now arrange to call this for the same connection
2436                  * because ipf_nat_new doesn't protect the code path into
2437                  * this function.
2438                  */
2439                 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2440                                      fin->fin_src, fin->fin_dst);
2441                 if (natl != NULL) {
2442                         KFREE(nat);
2443                         nat = natl;
2444                         goto done;
2445                 }
2446
2447                 move = nat_newmap(fin, nat, &ni);
2448                 if (move == -1)
2449                         goto badnat;
2450
2451                 np = ni.nai_np;
2452                 in = ni.nai_ip;
2453         } else {
2454                 /*
2455                  * NAT_INBOUND is used only for redirects rules
2456                  */
2457                 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2458                                     fin->fin_src, fin->fin_dst);
2459                 if (natl != NULL) {
2460                         KFREE(nat);
2461                         nat = natl;
2462                         goto done;
2463                 }
2464
2465                 move = nat_newrdr(fin, nat, &ni);
2466                 if (move == -1)
2467                         goto badnat;
2468
2469                 np = ni.nai_np;
2470                 in = ni.nai_ip;
2471         }
2472         port = ni.nai_port;
2473         nport = ni.nai_nport;
2474
2475         if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2476                 if (np->in_redir == NAT_REDIRECT) {
2477                         nat_delrdr(np);
2478                         nat_addrdr(np);
2479                 } else if (np->in_redir == NAT_MAP) {
2480                         nat_delnat(np);
2481                         nat_addnat(np);
2482                 }
2483         }
2484
2485         if (flags & IPN_TCPUDP) {
2486                 sport = ni.nai_sport;
2487                 dport = ni.nai_dport;
2488         } else if (flags & IPN_ICMPQUERY) {
2489                 sport = ni.nai_sport;
2490                 dport = 0;
2491         }
2492
2493         CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2494         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2495 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2496         if ((flags & IPN_TCP) && dohwcksum &&
2497             (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2498                 if (direction == NAT_OUTBOUND)
2499                         ni.nai_sum1 = LONG_SUM(in.s_addr);
2500                 else
2501                         ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2502                 ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2503                 ni.nai_sum1 += 30;
2504                 ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2505                 nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2506         } else
2507 #endif
2508                 nat->nat_sumd[1] = nat->nat_sumd[0];
2509
2510         if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2511                 if (direction == NAT_OUTBOUND)
2512                         ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2513                 else
2514                         ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2515
2516                 ni.nai_sum2 = LONG_SUM(in.s_addr);
2517
2518                 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2519                 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2520         } else {
2521                 nat->nat_ipsumd = nat->nat_sumd[0];
2522                 if (!(flags & IPN_TCPUDPICMP)) {
2523                         nat->nat_sumd[0] = 0;
2524                         nat->nat_sumd[1] = 0;
2525                 }
2526         }
2527
2528         if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2529                 fr_nat_doflush = 1;
2530                 goto badnat;
2531         }
2532         if (flags & SI_WILDP)
2533                 nat_stats.ns_wilds++;
2534         fin->fin_flx |= FI_NEWNAT;
2535         goto done;
2536 badnat:
2537         nat_stats.ns_badnat++;
2538         if ((hm = nat->nat_hm) != NULL)
2539                 fr_hostmapdel(&hm);
2540         KFREE(nat);
2541         nat = NULL;
2542 done:
2543         if ((flags & NAT_SLAVE) == 0) {
2544                 MUTEX_EXIT(&ipf_nat_new);
2545         }
2546         return nat;
2547 }
2548
2549
2550 /* ------------------------------------------------------------------------ */
2551 /* Function:    nat_finalise                                                */
2552 /* Returns:     int - 0 == sucess, -1 == failure                            */
2553 /* Parameters:  fin(I) - pointer to packet information                      */
2554 /*              nat(I) - pointer to NAT entry                               */
2555 /*              ni(I)  - pointer to structure with misc. information needed */
2556 /*                       to create new NAT entry.                           */
2557 /* Write Lock:  ipf_nat                                                     */
2558 /*                                                                          */
2559 /* This is the tail end of constructing a new NAT entry and is the same     */
2560 /* for both IPv4 and IPv6.                                                  */
2561 /* ------------------------------------------------------------------------ */
2562 /*ARGSUSED*/
2563 static int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2564 fr_info_t *fin;
2565 nat_t *nat;
2566 natinfo_t *ni;
2567 tcphdr_t *tcp;
2568 nat_t **natsave;
2569 int direction;
2570 {
2571         frentry_t *fr;
2572         ipnat_t *np;
2573
2574         np = ni->nai_np;
2575
2576         if (np->in_ifps[0] != NULL) {
2577                 COPYIFNAME(4, np->in_ifps[0], nat->nat_ifnames[0]);
2578         }
2579         if (np->in_ifps[1] != NULL) {
2580                 COPYIFNAME(4, np->in_ifps[1], nat->nat_ifnames[1]);
2581         }
2582 #ifdef  IPFILTER_SYNC
2583         if ((nat->nat_flags & SI_CLONE) == 0)
2584                 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2585 #endif
2586
2587         nat->nat_me = natsave;
2588         nat->nat_dir = direction;
2589         nat->nat_ifps[0] = np->in_ifps[0];
2590         nat->nat_ifps[1] = np->in_ifps[1];
2591         nat->nat_ptr = np;
2592         nat->nat_p = fin->fin_p;
2593         nat->nat_mssclamp = np->in_mssclamp;
2594         if (nat->nat_p == IPPROTO_TCP)
2595                 nat->nat_seqnext[0] = ntohl(tcp->th_seq);
2596
2597         if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2598                 if (appr_new(fin, nat) == -1)
2599                         return -1;
2600
2601         if (nat_insert(nat, fin->fin_rev) == 0) {
2602                 if (nat_logging)
2603                         nat_log(nat, (u_int)np->in_redir);
2604                 np->in_use++;
2605                 fr = fin->fin_fr;
2606                 nat->nat_fr = fr;
2607                 if (fr != NULL) {
2608                         MUTEX_ENTER(&fr->fr_lock);
2609                         fr->fr_ref++;
2610                         MUTEX_EXIT(&fr->fr_lock);
2611                 }
2612                 return 0;
2613         }
2614
2615         /*
2616          * nat_insert failed, so cleanup time...
2617          */
2618         return -1;
2619 }
2620
2621
2622 /* ------------------------------------------------------------------------ */
2623 /* Function:   nat_insert                                                   */
2624 /* Returns:    int - 0 == sucess, -1 == failure                             */
2625 /* Parameters: nat(I) - pointer to NAT structure                            */
2626 /*             rev(I) - flag indicating forward/reverse direction of packet */
2627 /* Write Lock: ipf_nat                                                      */
2628 /*                                                                          */
2629 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2630 /* list of active NAT entries.  Adjust global counters when complete.       */
2631 /* ------------------------------------------------------------------------ */
2632 int     nat_insert(nat, rev)
2633 nat_t   *nat;
2634 int     rev;
2635 {
2636         u_int hv1, hv2;
2637         nat_t **natp;
2638
2639         /*
2640          * Try and return an error as early as possible, so calculate the hash
2641          * entry numbers first and then proceed.
2642          */
2643         if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2644                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2645                                   0xffffffff);
2646                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2647                                   ipf_nattable_sz);
2648                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2649                                   0xffffffff);
2650                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2651                                   ipf_nattable_sz);
2652         } else {
2653                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2654                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2655                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2656                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2657         }
2658
2659         if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2660             nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2661                 return -1;
2662         }
2663
2664         nat->nat_hv[0] = hv1;
2665         nat->nat_hv[1] = hv2;
2666
2667         MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2668
2669         nat->nat_rev = rev;
2670         nat->nat_ref = 1;
2671         nat->nat_bytes[0] = 0;
2672         nat->nat_pkts[0] = 0;
2673         nat->nat_bytes[1] = 0;
2674         nat->nat_pkts[1] = 0;
2675
2676         nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2677         nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2678
2679         if (nat->nat_ifnames[1][0] != '\0') {
2680                 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2681                 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2682         } else {
2683                 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2684                                LIFNAMSIZ);
2685                 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2686                 nat->nat_ifps[1] = nat->nat_ifps[0];
2687         }
2688
2689         nat->nat_next = nat_instances;
2690         nat->nat_pnext = &nat_instances;
2691         if (nat_instances)
2692                 nat_instances->nat_pnext = &nat->nat_next;
2693         nat_instances = nat;
2694
2695         natp = &nat_table[0][hv1];
2696         if (*natp)
2697                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2698         nat->nat_phnext[0] = natp;
2699         nat->nat_hnext[0] = *natp;
2700         *natp = nat;
2701         nat_stats.ns_bucketlen[0][hv1]++;
2702
2703         natp = &nat_table[1][hv2];
2704         if (*natp)
2705                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2706         nat->nat_phnext[1] = natp;
2707         nat->nat_hnext[1] = *natp;
2708         *natp = nat;
2709         nat_stats.ns_bucketlen[1][hv2]++;
2710
2711         fr_setnatqueue(nat, rev);
2712
2713         nat_stats.ns_added++;
2714         nat_stats.ns_inuse++;
2715         return 0;
2716 }
2717
2718
2719 /* ------------------------------------------------------------------------ */
2720 /* Function:    nat_icmperrorlookup                                         */
2721 /* Returns:     nat_t* - point to matching NAT structure                    */
2722 /* Parameters:  fin(I) - pointer to packet information                      */
2723 /*              dir(I) - direction of packet (in/out)                       */
2724 /*                                                                          */
2725 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2726 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2727 /* the required length.                                                     */
2728 /* ------------------------------------------------------------------------ */
2729 nat_t *nat_icmperrorlookup(fin, dir)
2730 fr_info_t *fin;
2731 int dir;
2732 {
2733         int flags = 0, type, minlen;
2734         icmphdr_t *icmp, *orgicmp;
2735         tcphdr_t *tcp = NULL;
2736         u_short data[2];
2737         nat_t *nat;
2738         ip_t *oip;
2739         u_int p;
2740
2741         icmp = fin->fin_dp;
2742         type = icmp->icmp_type;
2743         /*
2744          * Does it at least have the return (basic) IP header ?
2745          * Only a basic IP header (no options) should be with an ICMP error
2746          * header.  Also, if it's not an error type, then return.
2747          */
2748         if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2749                 return NULL;
2750
2751         /*
2752          * Check packet size
2753          */
2754         oip = (ip_t *)((char *)fin->fin_dp + 8);
2755         minlen = IP_HL(oip) << 2;
2756         if ((minlen < sizeof(ip_t)) ||
2757             (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2758                 return NULL;
2759         /*
2760          * Is the buffer big enough for all of it ?  It's the size of the IP
2761          * header claimed in the encapsulated part which is of concern.  It
2762          * may be too big to be in this buffer but not so big that it's
2763          * outside the ICMP packet, leading to TCP deref's causing problems.
2764          * This is possible because we don't know how big oip_hl is when we
2765          * do the pullup early in fr_check() and thus can't gaurantee it is
2766          * all here now.
2767          */
2768 #ifdef  _KERNEL
2769         {
2770         mb_t *m;
2771
2772         m = fin->fin_m;
2773 # if defined(MENTAT)
2774         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2775                 return NULL;
2776 # else
2777         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2778             (char *)fin->fin_ip + M_LEN(m))
2779                 return NULL;
2780 # endif
2781         }
2782 #endif
2783
2784         if (fin->fin_daddr != oip->ip_src.s_addr)
2785                 return NULL;
2786
2787         p = oip->ip_p;
2788         if (p == IPPROTO_TCP)
2789                 flags = IPN_TCP;
2790         else if (p == IPPROTO_UDP)
2791                 flags = IPN_UDP;
2792         else if (p == IPPROTO_ICMP) {
2793                 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2794
2795                 /* see if this is related to an ICMP query */
2796                 if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2797                         data[0] = fin->fin_data[0];
2798                         data[1] = fin->fin_data[1];
2799                         fin->fin_data[0] = 0;
2800                         fin->fin_data[1] = orgicmp->icmp_id;
2801
2802                         flags = IPN_ICMPERR|IPN_ICMPQUERY;
2803                         /*
2804                          * NOTE : dir refers to the direction of the original
2805                          *        ip packet. By definition the icmp error
2806                          *        message flows in the opposite direction.
2807                          */
2808                         if (dir == NAT_INBOUND)
2809                                 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2810                                                    oip->ip_src);
2811                         else
2812                                 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2813                                                     oip->ip_src);
2814                         fin->fin_data[0] = data[0];
2815                         fin->fin_data[1] = data[1];
2816                         return nat;
2817                 }
2818         }
2819                 
2820         if (flags & IPN_TCPUDP) {
2821                 minlen += 8;            /* + 64bits of data to get ports */
2822                 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2823                         return NULL;
2824
2825                 data[0] = fin->fin_data[0];
2826                 data[1] = fin->fin_data[1];
2827                 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2828                 fin->fin_data[0] = ntohs(tcp->th_dport);
2829                 fin->fin_data[1] = ntohs(tcp->th_sport);
2830
2831                 if (dir == NAT_INBOUND) {
2832                         nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2833                                            oip->ip_src);
2834                 } else {
2835                         nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2836                                             oip->ip_src);
2837                 }
2838                 fin->fin_data[0] = data[0];
2839                 fin->fin_data[1] = data[1];
2840                 return nat;
2841         }
2842         if (dir == NAT_INBOUND)
2843                 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2844         else
2845                 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2846 }
2847
2848
2849 /* ------------------------------------------------------------------------ */
2850 /* Function:    nat_icmperror                                               */
2851 /* Returns:     nat_t* - point to matching NAT structure                    */
2852 /* Parameters:  fin(I)    - pointer to packet information                   */
2853 /*              nflags(I) - NAT flags for this packet                       */
2854 /*              dir(I)    - direction of packet (in/out)                    */
2855 /*                                                                          */
2856 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2857 /* session.  This will correct both packet header data and checksums.       */
2858 /*                                                                          */
2859 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2860 /* a NAT'd ICMP packet gets correctly recognised.                           */
2861 /* ------------------------------------------------------------------------ */
2862 nat_t *nat_icmperror(fin, nflags, dir)
2863 fr_info_t *fin;
2864 u_int *nflags;
2865 int dir;
2866 {
2867         u_32_t sum1, sum2, sumd, sumd2;
2868         struct in_addr a1, a2;
2869         int flags, dlen, odst;
2870         icmphdr_t *icmp;
2871         u_short *csump;
2872         tcphdr_t *tcp;
2873         nat_t *nat;
2874         ip_t *oip;
2875         void *dp;
2876
2877         if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2878                 return NULL;
2879         /*
2880          * nat_icmperrorlookup() will return NULL for `defective' packets.
2881          */
2882         if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2883                 return NULL;
2884
2885         tcp = NULL;
2886         csump = NULL;
2887         flags = 0;
2888         sumd2 = 0;
2889         *nflags = IPN_ICMPERR;
2890         icmp = fin->fin_dp;
2891         oip = (ip_t *)&icmp->icmp_ip;
2892         dp = (((char *)oip) + (IP_HL(oip) << 2));
2893         if (oip->ip_p == IPPROTO_TCP) {
2894                 tcp = (tcphdr_t *)dp;
2895                 csump = (u_short *)&tcp->th_sum;
2896                 flags = IPN_TCP;
2897         } else if (oip->ip_p == IPPROTO_UDP) {
2898                 udphdr_t *udp;
2899
2900                 udp = (udphdr_t *)dp;
2901                 tcp = (tcphdr_t *)dp;
2902                 csump = (u_short *)&udp->uh_sum;
2903                 flags = IPN_UDP;
2904         } else if (oip->ip_p == IPPROTO_ICMP)
2905                 flags = IPN_ICMPQUERY;
2906         dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2907
2908         /*
2909          * Need to adjust ICMP header to include the real IP#'s and
2910          * port #'s.  Only apply a checksum change relative to the
2911          * IP address change as it will be modified again in fr_checknatout
2912          * for both address and port.  Two checksum changes are
2913          * necessary for the two header address changes.  Be careful
2914          * to only modify the checksum once for the port # and twice
2915          * for the IP#.
2916          */
2917
2918         /*
2919          * Step 1
2920          * Fix the IP addresses in the offending IP packet. You also need
2921          * to adjust the IP header checksum of that offending IP packet.
2922          *
2923          * Normally, you would expect that the ICMP checksum of the
2924          * ICMP error message needs to be adjusted as well for the
2925          * IP address change in oip.
2926          * However, this is a NOP, because the ICMP checksum is
2927          * calculated over the complete ICMP packet, which includes the
2928          * changed oip IP addresses and oip->ip_sum. However, these
2929          * two changes cancel each other out (if the delta for
2930          * the IP address is x, then the delta for ip_sum is minus x),
2931          * so no change in the icmp_cksum is necessary.
2932          *
2933          * Inbound ICMP
2934          * ------------
2935          * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2936          * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2937          * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2938          *
2939          * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2940          * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2941          * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2942          *
2943          * Outbound ICMP
2944          * -------------
2945          * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2946          * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2947          * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2948          *
2949          * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2950          * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2951          * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2952          *
2953          */
2954         odst = (oip->ip_dst.s_addr == nat->nat_oip.s_addr) ? 1 : 0;
2955         if (odst == 1) {
2956                 a1.s_addr = ntohl(nat->nat_inip.s_addr);
2957                 a2.s_addr = ntohl(oip->ip_src.s_addr);
2958                 oip->ip_src.s_addr = htonl(a1.s_addr);
2959         } else {
2960                 a1.s_addr = ntohl(nat->nat_outip.s_addr);
2961                 a2.s_addr = ntohl(oip->ip_dst.s_addr);
2962                 oip->ip_dst.s_addr = htonl(a1.s_addr);
2963         }
2964
2965         sumd = a2.s_addr - a1.s_addr;
2966         if (sumd != 0) {
2967                 if (a1.s_addr > a2.s_addr)
2968                         sumd--;
2969                 sumd = ~sumd;
2970
2971                 fix_datacksum(&oip->ip_sum, sumd);
2972         }
2973
2974         sumd2 = sumd;
2975         sum1 = 0;
2976         sum2 = 0;
2977
2978         /*
2979          * Fix UDP pseudo header checksum to compensate for the
2980          * IP address change.
2981          */
2982         if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
2983                 /*
2984                  * Step 2 :
2985                  * For offending TCP/UDP IP packets, translate the ports as
2986                  * well, based on the NAT specification. Of course such
2987                  * a change may be reflected in the ICMP checksum as well.
2988                  *
2989                  * Since the port fields are part of the TCP/UDP checksum
2990                  * of the offending IP packet, you need to adjust that checksum
2991                  * as well... except that the change in the port numbers should 
2992                  * be offset by the checksum change.  However, the TCP/UDP
2993                  * checksum will also need to change if there has been an
2994                  * IP address change.
2995                  */
2996                 if (odst == 1) {
2997                         sum1 = ntohs(nat->nat_inport);
2998                         sum2 = ntohs(tcp->th_sport);
2999
3000                         tcp->th_sport = htons(sum1);
3001                 } else {
3002                         sum1 = ntohs(nat->nat_outport);
3003                         sum2 = ntohs(tcp->th_dport);
3004
3005                         tcp->th_dport = htons(sum1);
3006                 }
3007
3008                 sumd += sum1 - sum2;
3009                 if (sumd != 0 || sumd2 != 0) {
3010                         /*
3011                          * At this point, sumd is the delta to apply to the
3012                          * TCP/UDP header, given the changes in both the IP
3013                          * address and the ports and sumd2 is the delta to
3014                          * apply to the ICMP header, given the IP address
3015                          * change delta that may need to be applied to the
3016                          * TCP/UDP checksum instead.
3017                          *
3018                          * If we will both the IP and TCP/UDP checksums
3019                          * then the ICMP checksum changes by the address
3020                          * delta applied to the TCP/UDP checksum.  If we
3021                          * do not change the TCP/UDP checksum them we
3022                          * apply the delta in ports to the ICMP checksum.
3023                          */
3024                         if (oip->ip_p == IPPROTO_UDP) {
3025                                 if ((dlen >= 8) && (*csump != 0)) {
3026                                         fix_datacksum(csump, sumd);
3027                                 } else {
3028                                         sumd2 = sum1 - sum2;
3029                                         if (sum2 > sum1)
3030                                                 sumd2--;
3031                                 }
3032                         } else if (oip->ip_p == IPPROTO_TCP) {
3033                                 if (dlen >= 18) {
3034                                         fix_datacksum(csump, sumd);
3035                                 } else {
3036                                         sumd2 = sum2 - sum1;
3037                                         if (sum1 > sum2)
3038                                                 sumd2--;
3039                                 }
3040                         }
3041
3042                         if (sumd2 != 0) {
3043                                 ipnat_t *np;
3044
3045                                 np = nat->nat_ptr;
3046                                 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3047                                 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3048                                 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3049
3050                                 if ((odst == 0) && (dir == NAT_OUTBOUND) &&
3051                                     (fin->fin_rev == 0) && (np != NULL) &&
3052                                     (np->in_redir & NAT_REDIRECT)) {
3053                                         fix_outcksum(fin, &icmp->icmp_cksum,
3054                                                      sumd2);
3055                                 } else {
3056                                         fix_incksum(fin, &icmp->icmp_cksum,
3057                                                     sumd2);
3058                                 }
3059                         }
3060                 }
3061         } else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
3062                 icmphdr_t *orgicmp;
3063
3064                 /*
3065                  * XXX - what if this is bogus hl and we go off the end ?
3066                  * In this case, nat_icmperrorlookup() will have returned NULL.
3067                  */
3068                 orgicmp = (icmphdr_t *)dp;
3069
3070                 if (odst == 1) {
3071                         if (orgicmp->icmp_id != nat->nat_inport) {
3072
3073                                 /*
3074                                  * Fix ICMP checksum (of the offening ICMP
3075                                  * query packet) to compensate the change
3076                                  * in the ICMP id of the offending ICMP
3077                                  * packet.
3078                                  *
3079                                  * Since you modify orgicmp->icmp_id with
3080                                  * a delta (say x) and you compensate that
3081                                  * in origicmp->icmp_cksum with a delta
3082                                  * minus x, you don't have to adjust the
3083                                  * overall icmp->icmp_cksum
3084                                  */
3085                                 sum1 = ntohs(orgicmp->icmp_id);
3086                                 sum2 = ntohs(nat->nat_inport);
3087                                 CALC_SUMD(sum1, sum2, sumd);
3088                                 orgicmp->icmp_id = nat->nat_inport;
3089                                 fix_datacksum(&orgicmp->icmp_cksum, sumd);
3090                         }
3091                 } /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3092         }
3093         return nat;
3094 }
3095
3096
3097 /*
3098  * NB: these lookups don't lock access to the list, it assumed that it has
3099  * already been done!
3100  */
3101
3102 /* ------------------------------------------------------------------------ */
3103 /* Function:    nat_inlookup                                                */
3104 /* Returns:     nat_t* - NULL == no match,                                  */
3105 /*                       else pointer to matching NAT entry                 */
3106 /* Parameters:  fin(I)    - pointer to packet information                   */
3107 /*              flags(I)  - NAT flags for this packet                       */
3108 /*              p(I)      - protocol for this packet                        */
3109 /*              src(I)    - source IP address                               */
3110 /*              mapdst(I) - destination IP address                          */
3111 /*                                                                          */
3112 /* Lookup a nat entry based on the mapped destination ip address/port and   */
3113 /* real source address/port.  We use this lookup when receiving a packet,   */
3114 /* we're looking for a table entry, based on the destination address.       */
3115 /*                                                                          */
3116 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3117 /*                                                                          */
3118 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3119 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3120 /*                                                                          */
3121 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3122 /*            the packet is of said protocol                                */
3123 /* ------------------------------------------------------------------------ */
3124 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3125 fr_info_t *fin;
3126 u_int flags, p;
3127 struct in_addr src , mapdst;
3128 {
3129         u_short sport, dport;
3130         grehdr_t *gre;
3131         ipnat_t *ipn;
3132         u_int sflags;
3133         nat_t *nat;
3134         int nflags;
3135         u_32_t dst;
3136         void *ifp;
3137         u_int hv;
3138
3139         ifp = fin->fin_ifp;
3140         sport = 0;
3141         dport = 0;
3142         gre = NULL;
3143         dst = mapdst.s_addr;
3144         sflags = flags & NAT_TCPUDPICMP;
3145
3146         switch (p)
3147         {
3148         case IPPROTO_TCP :
3149         case IPPROTO_UDP :
3150                 sport = htons(fin->fin_data[0]);
3151                 dport = htons(fin->fin_data[1]);
3152                 break;
3153         case IPPROTO_ICMP :
3154                 if (flags & IPN_ICMPERR)
3155                         sport = fin->fin_data[1];
3156                 else
3157                         dport = fin->fin_data[1];
3158                 break;
3159         default :
3160                 break;
3161         }
3162
3163
3164         if ((flags & SI_WILDP) != 0)
3165                 goto find_in_wild_ports;
3166
3167         hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3168         hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3169         nat = nat_table[1][hv];
3170         for (; nat; nat = nat->nat_hnext[1]) {
3171                 if (nat->nat_ifps[0] != NULL) {
3172                         if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3173                                 continue;
3174                 } else if (ifp != NULL)
3175                         nat->nat_ifps[0] = ifp;
3176
3177                 nflags = nat->nat_flags;
3178
3179                 if (nat->nat_oip.s_addr == src.s_addr &&
3180                     nat->nat_outip.s_addr == dst &&
3181                     (((p == 0) &&
3182                       (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3183                      || (p == nat->nat_p))) {
3184                         switch (p)
3185                         {
3186 #if 0
3187                         case IPPROTO_GRE :
3188                                 if (nat->nat_call[1] != fin->fin_data[0])
3189                                         continue;
3190                                 break;
3191 #endif
3192                         case IPPROTO_ICMP :
3193                                 if ((flags & IPN_ICMPERR) != 0) {
3194                                         if (nat->nat_outport != sport)
3195                                                 continue;
3196                                 } else {
3197                                         if (nat->nat_outport != dport)
3198                                                 continue;
3199                                 }
3200                                 break;
3201                         case IPPROTO_TCP :
3202                         case IPPROTO_UDP :
3203                                 if (nat->nat_oport != sport)
3204                                         continue;
3205                                 if (nat->nat_outport != dport)
3206                                         continue;
3207                                 break;
3208                         default :
3209                                 break;
3210                         }
3211
3212                         ipn = nat->nat_ptr;
3213                         if ((ipn != NULL) && (nat->nat_aps != NULL))
3214                                 if (appr_match(fin, nat) != 0)
3215                                         continue;
3216                         return nat;
3217                 }
3218         }
3219
3220         /*
3221          * So if we didn't find it but there are wildcard members in the hash
3222          * table, go back and look for them.  We do this search and update here
3223          * because it is modifying the NAT table and we want to do this only
3224          * for the first packet that matches.  The exception, of course, is
3225          * for "dummy" (FI_IGNORE) lookups.
3226          */
3227 find_in_wild_ports:
3228         if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3229                 return NULL;
3230         if (nat_stats.ns_wilds == 0)
3231                 return NULL;
3232
3233         RWLOCK_EXIT(&ipf_nat);
3234
3235         hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3236         hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3237
3238         WRITE_ENTER(&ipf_nat);
3239
3240         nat = nat_table[1][hv];
3241         for (; nat; nat = nat->nat_hnext[1]) {
3242                 if (nat->nat_ifps[0] != NULL) {
3243                         if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3244                                 continue;
3245                 } else if (ifp != NULL)
3246                         nat->nat_ifps[0] = ifp;
3247
3248                 if (nat->nat_p != fin->fin_p)
3249                         continue;
3250                 if (nat->nat_oip.s_addr != src.s_addr ||
3251                     nat->nat_outip.s_addr != dst)
3252                         continue;
3253
3254                 nflags = nat->nat_flags;
3255                 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3256                         continue;
3257
3258                 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3259                                NAT_INBOUND) == 1) {
3260                         if ((fin->fin_flx & FI_IGNORE) != 0)
3261                                 break;
3262                         if ((nflags & SI_CLONE) != 0) {
3263                                 nat = fr_natclone(fin, nat);
3264                                 if (nat == NULL)
3265                                         break;
3266                         } else {
3267                                 MUTEX_ENTER(&ipf_nat_new);
3268                                 nat_stats.ns_wilds--;
3269                                 MUTEX_EXIT(&ipf_nat_new);
3270                         }
3271                         nat->nat_oport = sport;
3272                         nat->nat_outport = dport;
3273                         nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3274                         nat_tabmove(nat);
3275                         break;
3276                 }
3277         }
3278
3279         MUTEX_DOWNGRADE(&ipf_nat);
3280
3281         return nat;
3282 }
3283
3284
3285 /* ------------------------------------------------------------------------ */
3286 /* Function:    nat_tabmove                                                 */
3287 /* Returns:     Nil                                                         */
3288 /* Parameters:  nat(I) - pointer to NAT structure                           */
3289 /* Write Lock:  ipf_nat                                                     */
3290 /*                                                                          */
3291 /* This function is only called for TCP/UDP NAT table entries where the     */
3292 /* original was placed in the table without hashing on the ports and we now */
3293 /* want to include hashing on port numbers.                                 */
3294 /* ------------------------------------------------------------------------ */
3295 static void nat_tabmove(nat)
3296 nat_t *nat;
3297 {
3298         nat_t **natp;
3299         u_int hv;
3300
3301         if (nat->nat_flags & SI_CLONE)
3302                 return;
3303
3304         /*
3305          * Remove the NAT entry from the old location
3306          */
3307         if (nat->nat_hnext[0])
3308                 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3309         *nat->nat_phnext[0] = nat->nat_hnext[0];
3310         nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3311
3312         if (nat->nat_hnext[1])
3313                 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3314         *nat->nat_phnext[1] = nat->nat_hnext[1];
3315         nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3316
3317         /*
3318          * Add into the NAT table in the new position
3319          */
3320         hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3321         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3322                          ipf_nattable_sz);
3323         nat->nat_hv[0] = hv;
3324         natp = &nat_table[0][hv];
3325         if (*natp)
3326                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3327         nat->nat_phnext[0] = natp;
3328         nat->nat_hnext[0] = *natp;
3329         *natp = nat;
3330         nat_stats.ns_bucketlen[0][hv]++;
3331
3332         hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3333         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3334                          ipf_nattable_sz);
3335         nat->nat_hv[1] = hv;
3336         natp = &nat_table[1][hv];
3337         if (*natp)
3338                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3339         nat->nat_phnext[1] = natp;
3340         nat->nat_hnext[1] = *natp;
3341         *natp = nat;
3342         nat_stats.ns_bucketlen[1][hv]++;
3343 }
3344
3345
3346 /* ------------------------------------------------------------------------ */
3347 /* Function:    nat_outlookup                                               */
3348 /* Returns:     nat_t* - NULL == no match,                                  */
3349 /*                       else pointer to matching NAT entry                 */
3350 /* Parameters:  fin(I)   - pointer to packet information                    */
3351 /*              flags(I) - NAT flags for this packet                        */
3352 /*              p(I)     - protocol for this packet                         */
3353 /*              src(I)   - source IP address                                */
3354 /*              dst(I)   - destination IP address                           */
3355 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3356 /*                                                                          */
3357 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3358 /* destination address/port.  We use this lookup when sending a packet out, */
3359 /* we're looking for a table entry, based on the source address.            */
3360 /*                                                                          */
3361 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3362 /*                                                                          */
3363 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3364 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3365 /*                                                                          */
3366 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3367 /*            the packet is of said protocol                                */
3368 /* ------------------------------------------------------------------------ */
3369 nat_t *nat_outlookup(fin, flags, p, src, dst)
3370 fr_info_t *fin;
3371 u_int flags, p;
3372 struct in_addr src , dst;
3373 {
3374         u_short sport, dport;
3375         u_int sflags;
3376         ipnat_t *ipn;
3377         u_32_t srcip;
3378         nat_t *nat;
3379         int nflags;
3380         void *ifp;
3381         u_int hv;
3382
3383         ifp = fin->fin_ifp;
3384         srcip = src.s_addr;
3385         sflags = flags & IPN_TCPUDPICMP;
3386         sport = 0;
3387         dport = 0;
3388
3389         switch (p)
3390         {
3391         case IPPROTO_TCP :
3392         case IPPROTO_UDP :
3393                 sport = htons(fin->fin_data[0]);
3394                 dport = htons(fin->fin_data[1]);
3395                 break;
3396         case IPPROTO_ICMP :
3397                 if (flags & IPN_ICMPERR)
3398                         sport = fin->fin_data[1];
3399                 else
3400                         dport = fin->fin_data[1];
3401                 break;
3402         default :
3403                 break;
3404         }
3405
3406         if ((flags & SI_WILDP) != 0)
3407                 goto find_out_wild_ports;
3408
3409         hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3410         hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3411         nat = nat_table[0][hv];
3412         for (; nat; nat = nat->nat_hnext[0]) {
3413                 if (nat->nat_ifps[1] != NULL) {
3414                         if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3415                                 continue;
3416                 } else if (ifp != NULL)
3417                         nat->nat_ifps[1] = ifp;
3418
3419                 nflags = nat->nat_flags;
3420
3421                 if (nat->nat_inip.s_addr == srcip &&
3422                     nat->nat_oip.s_addr == dst.s_addr &&
3423                     (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3424                      || (p == nat->nat_p))) {
3425                         switch (p)
3426                         {
3427 #if 0
3428                         case IPPROTO_GRE :
3429                                 if (nat->nat_call[1] != fin->fin_data[0])
3430                                         continue;
3431                                 break;
3432 #endif
3433                         case IPPROTO_TCP :
3434                         case IPPROTO_UDP :
3435                                 if (nat->nat_oport != dport)
3436                                         continue;
3437                                 if (nat->nat_inport != sport)
3438                                         continue;
3439                                 break;
3440                         default :
3441                                 break;
3442                         }
3443
3444                         ipn = nat->nat_ptr;
3445                         if ((ipn != NULL) && (nat->nat_aps != NULL))
3446                                 if (appr_match(fin, nat) != 0)
3447                                         continue;
3448                         return nat;
3449                 }
3450         }
3451
3452         /*
3453          * So if we didn't find it but there are wildcard members in the hash
3454          * table, go back and look for them.  We do this search and update here
3455          * because it is modifying the NAT table and we want to do this only
3456          * for the first packet that matches.  The exception, of course, is
3457          * for "dummy" (FI_IGNORE) lookups.
3458          */
3459 find_out_wild_ports:
3460         if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3461                 return NULL;
3462         if (nat_stats.ns_wilds == 0)
3463                 return NULL;
3464
3465         RWLOCK_EXIT(&ipf_nat);
3466
3467         hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3468         hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3469
3470         WRITE_ENTER(&ipf_nat);
3471
3472         nat = nat_table[0][hv];
3473         for (; nat; nat = nat->nat_hnext[0]) {
3474                 if (nat->nat_ifps[1] != NULL) {
3475                         if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3476                                 continue;
3477                 } else if (ifp != NULL)
3478                         nat->nat_ifps[1] = ifp;
3479
3480                 if (nat->nat_p != fin->fin_p)
3481                         continue;
3482                 if ((nat->nat_inip.s_addr != srcip) ||
3483                     (nat->nat_oip.s_addr != dst.s_addr))
3484                         continue;
3485
3486                 nflags = nat->nat_flags;
3487                 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3488                         continue;
3489
3490                 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3491                                NAT_OUTBOUND) == 1) {
3492                         if ((fin->fin_flx & FI_IGNORE) != 0)
3493                                 break;
3494                         if ((nflags & SI_CLONE) != 0) {
3495                                 nat = fr_natclone(fin, nat);
3496                                 if (nat == NULL)
3497                                         break;
3498                         } else {
3499                                 MUTEX_ENTER(&ipf_nat_new);
3500                                 nat_stats.ns_wilds--;
3501                                 MUTEX_EXIT(&ipf_nat_new);
3502                         }
3503                         nat->nat_inport = sport;
3504                         nat->nat_oport = dport;
3505                         if (nat->nat_outport == 0)
3506                                 nat->nat_outport = sport;
3507                         nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3508                         nat_tabmove(nat);
3509                         break;
3510                 }
3511         }
3512
3513         MUTEX_DOWNGRADE(&ipf_nat);
3514
3515         return nat;
3516 }
3517
3518
3519 /* ------------------------------------------------------------------------ */
3520 /* Function:    nat_lookupredir                                             */
3521 /* Returns:     nat_t* - NULL == no match,                                  */
3522 /*                       else pointer to matching NAT entry                 */
3523 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3524 /*                      entry for.                                          */
3525 /*                                                                          */
3526 /* Lookup the NAT tables to search for a matching redirect                  */
3527 /* The contents of natlookup_t should imitate those found in a packet that  */
3528 /* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3529 /* We can do the lookup in one of two ways, imitating an inbound or         */
3530 /* outbound  packet.  By default we assume outbound, unless IPN_IN is set.  */
3531 /* For IN, the fields are set as follows:                                   */
3532 /*     nl_real* = source information                                        */
3533 /*     nl_out* = destination information (translated)                       */
3534 /* For an out packet, the fields are set like this:                         */
3535 /*     nl_in* = source information (untranslated)                           */
3536 /*     nl_out* = destination information (translated)                       */
3537 /* ------------------------------------------------------------------------ */
3538 nat_t *nat_lookupredir(np)
3539 natlookup_t *np;
3540 {
3541         fr_info_t fi;
3542         nat_t *nat;
3543
3544         bzero((char *)&fi, sizeof(fi));
3545         if (np->nl_flags & IPN_IN) {
3546                 fi.fin_data[0] = ntohs(np->nl_realport);
3547                 fi.fin_data[1] = ntohs(np->nl_outport);
3548         } else {
3549                 fi.fin_data[0] = ntohs(np->nl_inport);
3550                 fi.fin_data[1] = ntohs(np->nl_outport);
3551         }
3552         if (np->nl_flags & IPN_TCP)
3553                 fi.fin_p = IPPROTO_TCP;
3554         else if (np->nl_flags & IPN_UDP)
3555                 fi.fin_p = IPPROTO_UDP;
3556         else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3557                 fi.fin_p = IPPROTO_ICMP;
3558
3559         /*
3560          * We can do two sorts of lookups:
3561          * - IPN_IN: we have the `real' and `out' address, look for `in'.
3562          * - default: we have the `in' and `out' address, look for `real'.
3563          */
3564         if (np->nl_flags & IPN_IN) {
3565                 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3566                                         np->nl_realip, np->nl_outip))) {
3567                         np->nl_inip = nat->nat_inip;
3568                         np->nl_inport = nat->nat_inport;
3569                 }
3570         } else {
3571                 /*
3572                  * If nl_inip is non null, this is a lookup based on the real
3573                  * ip address. Else, we use the fake.
3574                  */
3575                 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3576                                          np->nl_inip, np->nl_outip))) {
3577
3578                         if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3579                                 fr_info_t fin;
3580                                 bzero((char *)&fin, sizeof(fin));
3581                                 fin.fin_p = nat->nat_p;
3582                                 fin.fin_data[0] = ntohs(nat->nat_outport);
3583                                 fin.fin_data[1] = ntohs(nat->nat_oport);
3584                                 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3585                                                  nat->nat_outip,
3586                                                  nat->nat_oip) != NULL) {
3587                                         np->nl_flags &= ~IPN_FINDFORWARD;
3588                                 }
3589                         }
3590
3591                         np->nl_realip = nat->nat_outip;
3592                         np->nl_realport = nat->nat_outport;
3593                 }
3594         }
3595
3596         return nat;
3597 }
3598
3599
3600 /* ------------------------------------------------------------------------ */
3601 /* Function:    nat_match                                                   */
3602 /* Returns:     int - 0 == no match, 1 == match                             */
3603 /* Parameters:  fin(I)   - pointer to packet information                    */
3604 /*              np(I)    - pointer to NAT rule                              */
3605 /*                                                                          */
3606 /* Pull the matching of a packet against a NAT rule out of that complex     */
3607 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3608 /* ------------------------------------------------------------------------ */
3609 static int nat_match(fin, np)
3610 fr_info_t *fin;
3611 ipnat_t *np;
3612 {
3613         frtuc_t *ft;
3614
3615         if (fin->fin_v != 4)
3616                 return 0;
3617
3618         if (np->in_p && fin->fin_p != np->in_p)
3619                 return 0;
3620
3621         if (fin->fin_out) {
3622                 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3623                         return 0;
3624                 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3625                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
3626                         return 0;
3627                 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3628                     ^ ((np->in_flags & IPN_NOTDST) != 0))
3629                         return 0;
3630         } else {
3631                 if (!(np->in_redir & NAT_REDIRECT))
3632                         return 0;
3633                 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3634                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
3635                         return 0;
3636                 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3637                     ^ ((np->in_flags & IPN_NOTDST) != 0))
3638                         return 0;
3639         }
3640
3641         ft = &np->in_tuc;
3642         if (!(fin->fin_flx & FI_TCPUDP) ||
3643             (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3644                 if (ft->ftu_scmp || ft->ftu_dcmp)
3645                         return 0;
3646                 return 1;
3647         }
3648
3649         return fr_tcpudpchk(fin, ft);
3650 }
3651
3652
3653 /* ------------------------------------------------------------------------ */
3654 /* Function:    nat_update                                                  */
3655 /* Returns:     Nil                                                         */
3656 /* Parameters:  nat(I)    - pointer to NAT structure                        */
3657 /*              np(I)     - pointer to NAT rule                             */
3658 /*                                                                          */
3659 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3660 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3661 /* ------------------------------------------------------------------------ */
3662 void nat_update(fin, nat, np)
3663 fr_info_t *fin;
3664 nat_t *nat;
3665 ipnat_t *np;
3666 {
3667         ipftq_t *ifq, *ifq2;
3668         ipftqent_t *tqe;
3669
3670         MUTEX_ENTER(&nat->nat_lock);
3671         tqe = &nat->nat_tqe;
3672         ifq = tqe->tqe_ifq;
3673
3674         /*
3675          * We allow over-riding of NAT timeouts from NAT rules, even for
3676          * TCP, however, if it is TCP and there is no rule timeout set,
3677          * then do not update the timeout here.
3678          */
3679         if (np != NULL)
3680                 ifq2 = np->in_tqehead[fin->fin_rev];
3681         else
3682                 ifq2 = NULL;
3683
3684         if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3685                 u_32_t end, ack;
3686                 u_char tcpflags;
3687                 tcphdr_t *tcp;
3688                 int dsize;
3689
3690                 tcp = fin->fin_dp;
3691                 tcpflags = tcp->th_flags;
3692                 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
3693                         ((tcpflags & TH_SYN) ? 1 : 0) +
3694                         ((tcpflags & TH_FIN) ? 1 : 0);
3695
3696                 ack = ntohl(tcp->th_ack);
3697                 end = ntohl(tcp->th_seq) + dsize;
3698
3699                 if (SEQ_GT(ack, nat->nat_seqnext[1 - fin->fin_rev]))
3700                         nat->nat_seqnext[1 - fin->fin_rev] = ack;
3701
3702                 if (nat->nat_seqnext[fin->fin_rev] == 0)
3703                         nat->nat_seqnext[fin->fin_rev] = end;
3704
3705                 (void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3706         } else {
3707                 if (ifq2 == NULL) {
3708                         if (nat->nat_p == IPPROTO_UDP)
3709                                 ifq2 = &nat_udptq;
3710                         else if (nat->nat_p == IPPROTO_ICMP)
3711                                 ifq2 = &nat_icmptq;
3712                         else
3713                                 ifq2 = &nat_iptq;
3714                 }
3715
3716                 fr_movequeue(tqe, ifq, ifq2);
3717         }
3718         MUTEX_EXIT(&nat->nat_lock);
3719 }
3720
3721
3722 /* ------------------------------------------------------------------------ */
3723 /* Function:    fr_checknatout                                              */
3724 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3725 /*                     0 == no packet translation occurred,                 */
3726 /*                     1 == packet was successfully translated.             */
3727 /* Parameters:  fin(I)   - pointer to packet information                    */
3728 /*              passp(I) - pointer to filtering result flags                */
3729 /*                                                                          */
3730 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3731 /* first checked to see if they match an existing entry (if an error),      */
3732 /* otherwise a search of the current NAT table is made.  If neither results */
3733 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3734 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3735 /* packet header(s) as required.                                            */
3736 /* ------------------------------------------------------------------------ */
3737 int fr_checknatout(fin, passp)
3738 fr_info_t *fin;
3739 u_32_t *passp;
3740 {
3741         struct ifnet *ifp, *sifp;
3742         icmphdr_t *icmp = NULL;
3743         tcphdr_t *tcp = NULL;
3744         int rval, natfailed;
3745         ipnat_t *np = NULL;
3746         u_int nflags = 0;
3747         u_32_t ipa, iph;
3748         int natadd = 1;
3749         frentry_t *fr;
3750         nat_t *nat;
3751
3752         if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
3753                 return 0;
3754
3755         natfailed = 0;
3756         fr = fin->fin_fr;
3757         sifp = fin->fin_ifp;
3758         if (fr != NULL) {
3759                 ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3760                 if ((ifp != NULL) && (ifp != (void *)-1))
3761                         fin->fin_ifp = ifp;
3762         }
3763         ifp = fin->fin_ifp;
3764
3765         if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3766                 switch (fin->fin_p)
3767                 {
3768                 case IPPROTO_TCP :
3769                         nflags = IPN_TCP;
3770                         break;
3771                 case IPPROTO_UDP :
3772                         nflags = IPN_UDP;
3773                         break;
3774                 case IPPROTO_ICMP :
3775                         icmp = fin->fin_dp;
3776
3777                         /*
3778                          * This is an incoming packet, so the destination is
3779                          * the icmp_id and the source port equals 0
3780                          */
3781                         if (nat_icmpquerytype4(icmp->icmp_type))
3782                                 nflags = IPN_ICMPQUERY;
3783                         break;
3784                 default :
3785                         break;
3786                 }
3787                 
3788                 if ((nflags & IPN_TCPUDP))
3789                         tcp = fin->fin_dp;
3790         }
3791
3792         ipa = fin->fin_saddr;
3793
3794         READ_ENTER(&ipf_nat);
3795
3796         if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3797             (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3798                 /*EMPTY*/;
3799         else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3800                 natadd = 0;
3801         else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3802                                       fin->fin_src, fin->fin_dst))) {
3803                 nflags = nat->nat_flags;
3804         } else {
3805                 u_32_t hv, msk, nmsk;
3806
3807                 /*
3808                  * If there is no current entry in the nat table for this IP#,
3809                  * create one for it (if there is a matching rule).
3810                  */
3811                 RWLOCK_EXIT(&ipf_nat);
3812                 msk = 0xffffffff;
3813                 nmsk = nat_masks;
3814                 WRITE_ENTER(&ipf_nat);
3815 maskloop:
3816                 iph = ipa & htonl(msk);
3817                 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3818                 for (np = nat_rules[hv]; np; np = np->in_mnext)
3819                 {
3820                         if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3821                                 continue;
3822                         if (np->in_v != fin->fin_v)
3823                                 continue;
3824                         if (np->in_p && (np->in_p != fin->fin_p))
3825                                 continue;
3826                         if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3827                                 continue;
3828                         if (np->in_flags & IPN_FILTER) {
3829                                 if (!nat_match(fin, np))
3830                                         continue;
3831                         } else if ((ipa & np->in_inmsk) != np->in_inip)
3832                                 continue;
3833
3834                         if ((fr != NULL) &&
3835                             !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3836                                 continue;
3837
3838                         if (*np->in_plabel != '\0') {
3839                                 if (((np->in_flags & IPN_FILTER) == 0) &&
3840                                     (np->in_dport != tcp->th_dport))
3841                                         continue;
3842                                 if (appr_ok(fin, tcp, np) == 0)
3843                                         continue;
3844                         }
3845
3846                         if ((nat = nat_new(fin, np, NULL, nflags,
3847                                            NAT_OUTBOUND))) {
3848                                 np->in_hits++;
3849                                 break;
3850                         } else
3851                                 natfailed = -1;
3852                 }
3853                 if ((np == NULL) && (nmsk != 0)) {
3854                         while (nmsk) {
3855                                 msk <<= 1;
3856                                 if (nmsk & 0x80000000)
3857                                         break;
3858                                 nmsk <<= 1;
3859                         }
3860                         if (nmsk != 0) {
3861                                 nmsk <<= 1;
3862                                 goto maskloop;
3863                         }
3864                 }
3865                 MUTEX_DOWNGRADE(&ipf_nat);
3866         }
3867
3868         if (nat != NULL) {
3869                 rval = fr_natout(fin, nat, natadd, nflags);
3870                 if (rval == 1) {
3871                         MUTEX_ENTER(&nat->nat_lock);
3872                         nat->nat_ref++;
3873                         MUTEX_EXIT(&nat->nat_lock);
3874                         nat->nat_touched = fr_ticks;
3875                         fin->fin_nat = nat;
3876                 }
3877         } else
3878                 rval = natfailed;
3879         RWLOCK_EXIT(&ipf_nat);
3880
3881         if (rval == -1) {
3882                 if (passp != NULL)
3883                         *passp = FR_BLOCK;
3884                 fin->fin_flx |= FI_BADNAT;
3885         }
3886         fin->fin_ifp = sifp;
3887         return rval;
3888 }
3889
3890 /* ------------------------------------------------------------------------ */
3891 /* Function:    fr_natout                                                   */
3892 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3893 /*                     1 == packet was successfully translated.             */
3894 /* Parameters:  fin(I)    - pointer to packet information                   */
3895 /*              nat(I)    - pointer to NAT structure                        */
3896 /*              natadd(I) - flag indicating if it is safe to add frag cache */
3897 /*              nflags(I) - NAT flags set for this packet                   */
3898 /*                                                                          */
3899 /* Translate a packet coming "out" on an interface.                         */
3900 /* ------------------------------------------------------------------------ */
3901 int fr_natout(fin, nat, natadd, nflags)
3902 fr_info_t *fin;
3903 nat_t *nat;
3904 int natadd;
3905 u_32_t nflags;
3906 {
3907         icmphdr_t *icmp;
3908         u_short *csump;
3909         tcphdr_t *tcp;
3910         ipnat_t *np;
3911         int i;
3912
3913         tcp = NULL;
3914         icmp = NULL;
3915         csump = NULL;
3916         np = nat->nat_ptr;
3917
3918         if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3919                 (void) fr_nat_newfrag(fin, 0, nat);
3920
3921         MUTEX_ENTER(&nat->nat_lock);
3922         nat->nat_bytes[1] += fin->fin_plen;
3923         nat->nat_pkts[1]++;
3924         MUTEX_EXIT(&nat->nat_lock);
3925
3926         /*
3927          * Fix up checksums, not by recalculating them, but
3928          * simply computing adjustments.
3929          * This is only done for STREAMS based IP implementations where the
3930          * checksum has already been calculated by IP.  In all other cases,
3931          * IPFilter is called before the checksum needs calculating so there
3932          * is no call to modify whatever is in the header now.
3933          */
3934         if (fin->fin_v == 4) {
3935                 if (nflags == IPN_ICMPERR) {
3936                         u_32_t s1, s2, sumd;
3937
3938                         s1 = LONG_SUM(ntohl(fin->fin_saddr));
3939                         s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3940                         CALC_SUMD(s1, s2, sumd);
3941                         fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3942                 }
3943 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3944     defined(linux) || defined(BRIDGE_IPF)
3945                 else {
3946                         /*
3947                          * Strictly speaking, this isn't necessary on BSD
3948                          * kernels because they do checksum calculation after
3949                          * this code has run BUT if ipfilter is being used
3950                          * to do NAT as a bridge, that code doesn't exist.
3951                          */
3952                         if (nat->nat_dir == NAT_OUTBOUND)
3953                                 fix_outcksum(fin, &fin->fin_ip->ip_sum,
3954                                              nat->nat_ipsumd);
3955                         else
3956                                 fix_incksum(fin, &fin->fin_ip->ip_sum,
3957                                             nat->nat_ipsumd);
3958                 }
3959 #endif
3960         }
3961
3962         if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3963                 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3964                         tcp = fin->fin_dp;
3965
3966                         tcp->th_sport = nat->nat_outport;
3967                         fin->fin_data[0] = ntohs(nat->nat_outport);
3968                 }
3969
3970                 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3971                         icmp = fin->fin_dp;
3972                         icmp->icmp_id = nat->nat_outport;
3973                 }
3974
3975                 csump = nat_proto(fin, nat, nflags);
3976         }
3977
3978         fin->fin_ip->ip_src = nat->nat_outip;
3979
3980         nat_update(fin, nat, np);
3981
3982         /*
3983          * The above comments do not hold for layer 4 (or higher) checksums...
3984          */
3985         if (csump != NULL) {
3986                 if (nat->nat_dir == NAT_OUTBOUND)
3987                         fix_outcksum(fin, csump, nat->nat_sumd[1]);
3988                 else
3989                         fix_incksum(fin, csump, nat->nat_sumd[1]);
3990         }
3991 #ifdef  IPFILTER_SYNC
3992         ipfsync_update(SMC_NAT, fin, nat->nat_sync);
3993 #endif
3994         /* ------------------------------------------------------------- */
3995         /* A few quick notes:                                            */
3996         /*      Following are test conditions prior to calling the       */
3997         /*      appr_check routine.                                      */
3998         /*                                                               */
3999         /*      A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4000         /*      with a redirect rule, we attempt to match the packet's   */
4001         /*      source port against in_dport, otherwise we'd compare the */
4002         /*      packet's destination.                                    */
4003         /* ------------------------------------------------------------- */
4004         if ((np != NULL) && (np->in_apr != NULL)) {
4005                 i = appr_check(fin, nat);
4006                 if (i == 0)
4007                         i = 1;
4008         } else
4009                 i = 1;
4010         ATOMIC_INCL(nat_stats.ns_mapped[1]);
4011         fin->fin_flx |= FI_NATED;
4012         return i;
4013 }
4014
4015
4016 /* ------------------------------------------------------------------------ */
4017 /* Function:    fr_checknatin                                               */
4018 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4019 /*                     0 == no packet translation occurred,                 */
4020 /*                     1 == packet was successfully translated.             */
4021 /* Parameters:  fin(I)   - pointer to packet information                    */
4022 /*              passp(I) - pointer to filtering result flags                */
4023 /*                                                                          */
4024 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
4025 /* first checked to see if they match an existing entry (if an error),      */
4026 /* otherwise a search of the current NAT table is made.  If neither results */
4027 /* in a match then a search for a matching NAT rule is made.  Create a new  */
4028 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4029 /* packet header(s) as required.                                            */
4030 /* ------------------------------------------------------------------------ */
4031 int fr_checknatin(fin, passp)
4032 fr_info_t *fin;
4033 u_32_t *passp;
4034 {
4035         u_int nflags, natadd;
4036         int rval, natfailed;
4037         struct ifnet *ifp;
4038         struct in_addr in;
4039         icmphdr_t *icmp;
4040         tcphdr_t *tcp;
4041         u_short dport;
4042         ipnat_t *np;
4043         nat_t *nat;
4044         u_32_t iph;
4045
4046         if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
4047                 return 0;
4048
4049         tcp = NULL;
4050         icmp = NULL;
4051         dport = 0;
4052         natadd = 1;
4053         nflags = 0;
4054         natfailed = 0;
4055         ifp = fin->fin_ifp;
4056
4057         if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4058                 switch (fin->fin_p)
4059                 {
4060                 case IPPROTO_TCP :
4061                         nflags = IPN_TCP;
4062                         break;
4063                 case IPPROTO_UDP :
4064                         nflags = IPN_UDP;
4065                         break;
4066                 case IPPROTO_ICMP :
4067                         icmp = fin->fin_dp;
4068
4069                         /*
4070                          * This is an incoming packet, so the destination is
4071                          * the icmp_id and the source port equals 0
4072                          */
4073                         if (nat_icmpquerytype4(icmp->icmp_type)) {
4074                                 nflags = IPN_ICMPQUERY;
4075                                 dport = icmp->icmp_id;  
4076                         } break;
4077                 default :
4078                         break;
4079                 }
4080                 
4081                 if ((nflags & IPN_TCPUDP)) {
4082                         tcp = fin->fin_dp;
4083                         dport = tcp->th_dport;
4084                 }
4085         }
4086
4087         in = fin->fin_dst;
4088
4089         READ_ENTER(&ipf_nat);
4090
4091         if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4092             (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4093                 /*EMPTY*/;
4094         else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4095                 natadd = 0;
4096         else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4097                                      fin->fin_src, in))) {
4098                 nflags = nat->nat_flags;
4099         } else {
4100                 u_32_t hv, msk, rmsk;
4101
4102                 RWLOCK_EXIT(&ipf_nat);
4103                 rmsk = rdr_masks;
4104                 msk = 0xffffffff;
4105                 WRITE_ENTER(&ipf_nat);
4106                 /*
4107                  * If there is no current entry in the nat table for this IP#,
4108                  * create one for it (if there is a matching rule).
4109                  */
4110 maskloop:
4111                 iph = in.s_addr & htonl(msk);
4112                 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
4113                 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
4114                         if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4115                                 continue;
4116                         if (np->in_v != fin->fin_v)
4117                                 continue;
4118                         if (np->in_p && (np->in_p != fin->fin_p))
4119                                 continue;
4120                         if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4121                                 continue;
4122                         if (np->in_flags & IPN_FILTER) {
4123                                 if (!nat_match(fin, np))
4124                                         continue;
4125                         } else {
4126                                 if ((in.s_addr & np->in_outmsk) != np->in_outip)
4127                                         continue;
4128                                 if (np->in_pmin &&
4129                                     ((ntohs(np->in_pmax) < ntohs(dport)) ||
4130                                      (ntohs(dport) < ntohs(np->in_pmin))))
4131                                         continue;
4132                         }
4133
4134                         if (*np->in_plabel != '\0') {
4135                                 if (!appr_ok(fin, tcp, np)) {
4136                                         continue;
4137                                 }
4138                         }
4139
4140                         nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4141                         if (nat != NULL) {
4142                                 np->in_hits++;
4143                                 break;
4144                         } else
4145                                 natfailed = -1;
4146                 }
4147
4148                 if ((np == NULL) && (rmsk != 0)) {
4149                         while (rmsk) {
4150                                 msk <<= 1;
4151                                 if (rmsk & 0x80000000)
4152                                         break;
4153                                 rmsk <<= 1;
4154                         }
4155                         if (rmsk != 0) {
4156                                 rmsk <<= 1;
4157                                 goto maskloop;
4158                         }
4159                 }
4160                 MUTEX_DOWNGRADE(&ipf_nat);
4161         }
4162         if (nat != NULL) {
4163                 rval = fr_natin(fin, nat, natadd, nflags);
4164                 if (rval == 1) {
4165                         MUTEX_ENTER(&nat->nat_lock);
4166                         nat->nat_ref++;
4167                         MUTEX_EXIT(&nat->nat_lock);
4168                         nat->nat_touched = fr_ticks;
4169                         fin->fin_nat = nat;
4170                 }
4171         } else
4172                 rval = natfailed;
4173         RWLOCK_EXIT(&ipf_nat);
4174
4175         if (rval == -1) {
4176                 if (passp != NULL)
4177                         *passp = FR_BLOCK;
4178                 fin->fin_flx |= FI_BADNAT;
4179         }
4180         return rval;
4181 }
4182
4183
4184 /* ------------------------------------------------------------------------ */
4185 /* Function:    fr_natin                                                    */
4186 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4187 /*                     1 == packet was successfully translated.             */
4188 /* Parameters:  fin(I)    - pointer to packet information                   */
4189 /*              nat(I)    - pointer to NAT structure                        */
4190 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4191 /*              nflags(I) - NAT flags set for this packet                   */
4192 /* Locks Held:  ipf_nat (READ)                                              */
4193 /*                                                                          */
4194 /* Translate a packet coming "in" on an interface.                          */
4195 /* ------------------------------------------------------------------------ */
4196 int fr_natin(fin, nat, natadd, nflags)
4197 fr_info_t *fin;
4198 nat_t *nat;
4199 int natadd;
4200 u_32_t nflags;
4201 {
4202         icmphdr_t *icmp;
4203         u_short *csump;
4204         tcphdr_t *tcp;
4205         ipnat_t *np;
4206         int i;
4207
4208         tcp = NULL;
4209         csump = NULL;
4210         np = nat->nat_ptr;
4211         fin->fin_fr = nat->nat_fr;
4212
4213         if (np != NULL) {
4214                 if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4215                         (void) fr_nat_newfrag(fin, 0, nat);
4216
4217         /* ------------------------------------------------------------- */
4218         /* A few quick notes:                                            */
4219         /*      Following are test conditions prior to calling the       */
4220         /*      appr_check routine.                                      */
4221         /*                                                               */
4222         /*      A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4223         /*      with a map rule, we attempt to match the packet's        */
4224         /*      source port against in_dport, otherwise we'd compare the */
4225         /*      packet's destination.                                    */
4226         /* ------------------------------------------------------------- */
4227                 if (np->in_apr != NULL) {
4228                         i = appr_check(fin, nat);
4229                         if (i == -1) {
4230                                 return -1;
4231                         }
4232                 }
4233         }
4234
4235 #ifdef  IPFILTER_SYNC
4236         ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4237 #endif
4238
4239         MUTEX_ENTER(&nat->nat_lock);
4240         nat->nat_bytes[0] += fin->fin_plen;
4241         nat->nat_pkts[0]++;
4242         MUTEX_EXIT(&nat->nat_lock);
4243
4244         fin->fin_ip->ip_dst = nat->nat_inip;
4245         fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4246         if (nflags & IPN_TCPUDP)
4247                 tcp = fin->fin_dp;
4248
4249         /*
4250          * Fix up checksums, not by recalculating them, but
4251          * simply computing adjustments.
4252          * Why only do this for some platforms on inbound packets ?
4253          * Because for those that it is done, IP processing is yet to happen
4254          * and so the IPv4 header checksum has not yet been evaluated.
4255          * Perhaps it should always be done for the benefit of things like
4256          * fast forwarding (so that it doesn't need to be recomputed) but with
4257          * header checksum offloading, perhaps it is a moot point.
4258          */
4259 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4260      defined(__osf__) || defined(linux)
4261         if (nat->nat_dir == NAT_OUTBOUND)
4262                 fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4263         else
4264                 fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4265 #endif
4266
4267         if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4268                 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4269                         tcp->th_dport = nat->nat_inport;
4270                         fin->fin_data[1] = ntohs(nat->nat_inport);
4271                 }
4272
4273
4274                 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4275                         icmp = fin->fin_dp;
4276
4277                         icmp->icmp_id = nat->nat_inport;
4278                 }
4279
4280                 csump = nat_proto(fin, nat, nflags);
4281         }
4282
4283         nat_update(fin, nat, np);
4284
4285         /*
4286          * The above comments do not hold for layer 4 (or higher) checksums...
4287          */
4288         if (csump != NULL) {
4289                 if (nat->nat_dir == NAT_OUTBOUND)
4290                         fix_incksum(fin, csump, nat->nat_sumd[0]);
4291                 else
4292                         fix_outcksum(fin, csump, nat->nat_sumd[0]);
4293         }
4294         ATOMIC_INCL(nat_stats.ns_mapped[0]);
4295         fin->fin_flx |= FI_NATED;
4296         if (np != NULL && np->in_tag.ipt_num[0] != 0)
4297                 fin->fin_nattag = &np->in_tag;
4298         return 1;
4299 }
4300
4301
4302 /* ------------------------------------------------------------------------ */
4303 /* Function:    nat_proto                                                   */
4304 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4305 /*                         NULL if the transport protocol is not recognised */
4306 /*                         as needing a checksum update.                    */
4307 /* Parameters:  fin(I)    - pointer to packet information                   */
4308 /*              nat(I)    - pointer to NAT structure                        */
4309 /*              nflags(I) - NAT flags set for this packet                   */
4310 /*                                                                          */
4311 /* Return the pointer to the checksum field for each protocol so understood.*/
4312 /* If support for making other changes to a protocol header is required,    */
4313 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4314 /* TCP down to a specific value, then do it from here.                      */
4315 /* ------------------------------------------------------------------------ */
4316 u_short *nat_proto(fin, nat, nflags)
4317 fr_info_t *fin;
4318 nat_t *nat;
4319 u_int nflags;
4320 {
4321         icmphdr_t *icmp;
4322         u_short *csump;
4323         tcphdr_t *tcp;
4324         udphdr_t *udp;
4325
4326         csump = NULL;
4327         if (fin->fin_out == 0) {
4328                 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4329         } else {
4330                 fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4331         }
4332
4333         switch (fin->fin_p)
4334         {
4335         case IPPROTO_TCP :
4336                 tcp = fin->fin_dp;
4337
4338                 csump = &tcp->th_sum;
4339
4340                 /*
4341                  * Do a MSS CLAMPING on a SYN packet,
4342                  * only deal IPv4 for now.
4343                  */
4344                 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4345                         nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4346
4347                 break;
4348
4349         case IPPROTO_UDP :
4350                 udp = fin->fin_dp;
4351
4352                 if (udp->uh_sum)
4353                         csump = &udp->uh_sum;
4354                 break;
4355
4356         case IPPROTO_ICMP :
4357                 icmp = fin->fin_dp;
4358
4359                 if ((nflags & IPN_ICMPQUERY) != 0) {
4360                         if (icmp->icmp_cksum != 0)
4361                                 csump = &icmp->icmp_cksum;
4362                 }
4363                 break;
4364         }
4365         return csump;
4366 }
4367
4368
4369 /* ------------------------------------------------------------------------ */
4370 /* Function:    fr_natunload                                                */
4371 /* Returns:     Nil                                                         */
4372 /* Parameters:  Nil                                                         */
4373 /*                                                                          */
4374 /* Free all memory used by NAT structures allocated at runtime.             */
4375 /* ------------------------------------------------------------------------ */
4376 void fr_natunload()
4377 {
4378         ipftq_t *ifq, *ifqnext;
4379
4380         (void) nat_clearlist();
4381         (void) nat_flushtable();
4382
4383         /*
4384          * Proxy timeout queues are not cleaned here because although they
4385          * exist on the NAT list, appr_unload is called after fr_natunload
4386          * and the proxies actually are responsible for them being created.
4387          * Should the proxy timeouts have their own list?  There's no real
4388          * justification as this is the only complication.
4389          */
4390         for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4391                 ifqnext = ifq->ifq_next;
4392                 if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4393                     (fr_deletetimeoutqueue(ifq) == 0))
4394                         fr_freetimeoutqueue(ifq);
4395         }
4396
4397         if (nat_table[0] != NULL) {
4398                 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4399                 nat_table[0] = NULL;
4400         }
4401         if (nat_table[1] != NULL) {
4402                 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4403                 nat_table[1] = NULL;
4404         }
4405         if (nat_rules != NULL) {
4406                 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4407                 nat_rules = NULL;
4408         }
4409         if (rdr_rules != NULL) {
4410                 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4411                 rdr_rules = NULL;
4412         }
4413         if (ipf_hm_maptable != NULL) {
4414                 KFREES(ipf_hm_maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4415                 ipf_hm_maptable = NULL;
4416         }
4417         if (nat_stats.ns_bucketlen[0] != NULL) {
4418                 KFREES(nat_stats.ns_bucketlen[0],
4419                        sizeof(u_long *) * ipf_nattable_sz);
4420                 nat_stats.ns_bucketlen[0] = NULL;
4421         }
4422         if (nat_stats.ns_bucketlen[1] != NULL) {
4423                 KFREES(nat_stats.ns_bucketlen[1],
4424                        sizeof(u_long *) * ipf_nattable_sz);
4425                 nat_stats.ns_bucketlen[1] = NULL;
4426         }
4427
4428         if (fr_nat_maxbucket_reset == 1)
4429                 fr_nat_maxbucket = 0;
4430
4431         if (fr_nat_init == 1) {
4432                 fr_nat_init = 0;
4433                 fr_sttab_destroy(nat_tqb);
4434
4435                 RW_DESTROY(&ipf_natfrag);
4436                 RW_DESTROY(&ipf_nat);
4437
4438                 MUTEX_DESTROY(&ipf_nat_new);
4439                 MUTEX_DESTROY(&ipf_natio);
4440
4441                 MUTEX_DESTROY(&nat_udptq.ifq_lock);
4442                 MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4443                 MUTEX_DESTROY(&nat_iptq.ifq_lock);
4444         }
4445 }
4446
4447
4448 /* ------------------------------------------------------------------------ */
4449 /* Function:    fr_natexpire                                                */
4450 /* Returns:     Nil                                                         */
4451 /* Parameters:  Nil                                                         */
4452 /*                                                                          */
4453 /* Check all of the timeout queues for entries at the top which need to be  */
4454 /* expired.                                                                 */
4455 /* ------------------------------------------------------------------------ */
4456 void fr_natexpire()
4457 {
4458         ipftq_t *ifq, *ifqnext;
4459         ipftqent_t *tqe, *tqn;
4460         int i;
4461         SPL_INT(s);
4462
4463         SPL_NET(s);
4464         WRITE_ENTER(&ipf_nat);
4465         for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4466                 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4467                         if (tqe->tqe_die > fr_ticks)
4468                                 break;
4469                         tqn = tqe->tqe_next;
4470                         nat_delete(tqe->tqe_parent, NL_EXPIRE);
4471                 }
4472         }
4473
4474         for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4475                 ifqnext = ifq->ifq_next;
4476
4477                 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4478                         if (tqe->tqe_die > fr_ticks)
4479                                 break;
4480                         tqn = tqe->tqe_next;
4481                         nat_delete(tqe->tqe_parent, NL_EXPIRE);
4482                 }
4483         }
4484
4485         for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4486                 ifqnext = ifq->ifq_next;
4487
4488                 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4489                     (ifq->ifq_ref == 0)) {
4490                         fr_freetimeoutqueue(ifq);
4491                 }
4492         }
4493
4494         if (fr_nat_doflush != 0) {
4495                 nat_extraflush(2);
4496                 fr_nat_doflush = 0;
4497         }
4498
4499         RWLOCK_EXIT(&ipf_nat);
4500         SPL_X(s);
4501 }
4502
4503
4504 /* ------------------------------------------------------------------------ */
4505 /* Function:    fr_natsync                                                  */
4506 /* Returns:     Nil                                                         */
4507 /* Parameters:  ifp(I) - pointer to network interface                       */
4508 /*                                                                          */
4509 /* Walk through all of the currently active NAT sessions, looking for those */
4510 /* which need to have their translated address updated.                     */
4511 /* ------------------------------------------------------------------------ */
4512 void fr_natsync(ifp)
4513 void *ifp;
4514 {
4515         u_32_t sum1, sum2, sumd;
4516         struct in_addr in;
4517         ipnat_t *n;
4518         nat_t *nat;
4519         void *ifp2;
4520         SPL_INT(s);
4521
4522         if (fr_running <= 0)
4523                 return;
4524
4525         /*
4526          * Change IP addresses for NAT sessions for any protocol except TCP
4527          * since it will break the TCP connection anyway.  The only rules
4528          * which will get changed are those which are "map ... -> 0/32",
4529          * where the rule specifies the address is taken from the interface.
4530          */
4531         SPL_NET(s);
4532         WRITE_ENTER(&ipf_nat);
4533
4534         if (fr_running <= 0) {
4535                 RWLOCK_EXIT(&ipf_nat);
4536                 return;
4537         }
4538
4539         for (nat = nat_instances; nat; nat = nat->nat_next) {
4540                 if ((nat->nat_flags & IPN_TCP) != 0)
4541                         continue;
4542                 n = nat->nat_ptr;
4543                 if ((n == NULL) ||
4544                     (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4545                         continue;
4546                 if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4547                      (ifp == nat->nat_ifps[1]))) {
4548                         nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4549                         if (nat->nat_ifnames[1][0] != '\0') {
4550                                 nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4551                                                           4);
4552                         } else
4553                                 nat->nat_ifps[1] = nat->nat_ifps[0];
4554                         ifp2 = nat->nat_ifps[0];
4555                         if (ifp2 == NULL)
4556                                 continue;
4557
4558                         /*
4559                          * Change the map-to address to be the same as the
4560                          * new one.
4561                          */
4562                         sum1 = nat->nat_outip.s_addr;
4563                         if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4564                                 nat->nat_outip = in;
4565                         sum2 = nat->nat_outip.s_addr;
4566
4567                         if (sum1 == sum2)
4568                                 continue;
4569                         /*
4570                          * Readjust the checksum adjustment to take into
4571                          * account the new IP#.
4572                          */
4573                         CALC_SUMD(sum1, sum2, sumd);
4574                         /* XXX - dont change for TCP when solaris does
4575                          * hardware checksumming.
4576                          */
4577                         sumd += nat->nat_sumd[0];
4578                         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4579                         nat->nat_sumd[1] = nat->nat_sumd[0];
4580                 }
4581         }
4582
4583         for (n = nat_list; (n != NULL); n = n->in_next) {
4584                 if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4585                         n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4586                 if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4587                         n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4588         }
4589         RWLOCK_EXIT(&ipf_nat);
4590         SPL_X(s);
4591 }
4592
4593
4594 /* ------------------------------------------------------------------------ */
4595 /* Function:    nat_icmpquerytype4                                          */
4596 /* Returns:     int - 1 == success, 0 == failure                            */
4597 /* Parameters:  icmptype(I) - ICMP type number                              */
4598 /*                                                                          */
4599 /* Tests to see if the ICMP type number passed is a query/response type or  */
4600 /* not.                                                                     */
4601 /* ------------------------------------------------------------------------ */
4602 static int nat_icmpquerytype4(icmptype)
4603 int icmptype;
4604 {
4605
4606         /*
4607          * For the ICMP query NAT code, it is essential that both the query
4608          * and the reply match on the NAT rule. Because the NAT structure
4609          * does not keep track of the icmptype, and a single NAT structure
4610          * is used for all icmp types with the same src, dest and id, we
4611          * simply define the replies as queries as well. The funny thing is,
4612          * altough it seems silly to call a reply a query, this is exactly
4613          * as it is defined in the IPv4 specification
4614          */
4615         
4616         switch (icmptype)
4617         {
4618         
4619         case ICMP_ECHOREPLY:
4620         case ICMP_ECHO:
4621         /* route aedvertisement/solliciation is currently unsupported: */
4622         /* it would require rewriting the ICMP data section            */
4623         case ICMP_TSTAMP:
4624         case ICMP_TSTAMPREPLY:
4625         case ICMP_IREQ:
4626         case ICMP_IREQREPLY:
4627         case ICMP_MASKREQ:
4628         case ICMP_MASKREPLY:
4629                 return 1;
4630         default:
4631                 return 0;
4632         }
4633 }
4634
4635
4636 /* ------------------------------------------------------------------------ */
4637 /* Function:    nat_log                                                     */
4638 /* Returns:     Nil                                                         */
4639 /* Parameters:  nat(I)  - pointer to NAT structure                          */
4640 /*              type(I) - type of log entry to create                       */
4641 /*                                                                          */
4642 /* Creates a NAT log entry.                                                 */
4643 /* ------------------------------------------------------------------------ */
4644 void nat_log(nat, type)
4645 struct nat *nat;
4646 u_int type;
4647 {
4648 #ifdef  IPFILTER_LOG
4649 # ifndef LARGE_NAT
4650         struct ipnat *np;
4651         int rulen;
4652 # endif
4653         struct natlog natl;
4654         void *items[1];
4655         size_t sizes[1];
4656         int types[1];
4657
4658         natl.nl_inip = nat->nat_inip;
4659         natl.nl_outip = nat->nat_outip;
4660         natl.nl_origip = nat->nat_oip;
4661         natl.nl_bytes[0] = nat->nat_bytes[0];
4662         natl.nl_bytes[1] = nat->nat_bytes[1];
4663         natl.nl_pkts[0] = nat->nat_pkts[0];
4664         natl.nl_pkts[1] = nat->nat_pkts[1];
4665         natl.nl_origport = nat->nat_oport;
4666         natl.nl_inport = nat->nat_inport;
4667         natl.nl_outport = nat->nat_outport;
4668         natl.nl_p = nat->nat_p;
4669         natl.nl_type = type;
4670         natl.nl_rule = -1;
4671 # ifndef LARGE_NAT
4672         if (nat->nat_ptr != NULL) {
4673                 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4674                         if (np == nat->nat_ptr) {
4675                                 natl.nl_rule = rulen;
4676                                 break;
4677                         }
4678         }
4679 # endif
4680         items[0] = &natl;
4681         sizes[0] = sizeof(natl);
4682         types[0] = 0;
4683
4684         (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4685 #endif
4686 }
4687
4688
4689 #if defined(__OpenBSD__)
4690 /* ------------------------------------------------------------------------ */
4691 /* Function:    nat_ifdetach                                                */
4692 /* Returns:     Nil                                                         */
4693 /* Parameters:  ifp(I) - pointer to network interface                       */
4694 /*                                                                          */
4695 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
4696 /* interface references within IPFilter.                                    */
4697 /* ------------------------------------------------------------------------ */
4698 void nat_ifdetach(ifp)
4699 void *ifp;
4700 {
4701         frsync(ifp);
4702         return;
4703 }
4704 #endif
4705
4706
4707 /* ------------------------------------------------------------------------ */
4708 /* Function:    fr_ipnatderef                                               */
4709 /* Returns:     Nil                                                         */
4710 /* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4711 /* Write Locks: ipf_nat                                                     */
4712 /*                                                                          */
4713 /* ------------------------------------------------------------------------ */
4714 void fr_ipnatderef(inp)
4715 ipnat_t **inp;
4716 {
4717         ipnat_t *in;
4718
4719         in = *inp;
4720         *inp = NULL;
4721         in->in_space++;
4722         in->in_use--;
4723         if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4724                 if (in->in_apr)
4725                         appr_free(in->in_apr);
4726                 MUTEX_DESTROY(&in->in_lock);
4727                 KFREE(in);
4728                 nat_stats.ns_rules--;
4729 #if SOLARIS && !defined(_INET_IP_STACK_H)
4730                 if (nat_stats.ns_rules == 0)
4731                         pfil_delayed_copy = 1;
4732 #endif
4733         }
4734 }
4735
4736
4737 /* ------------------------------------------------------------------------ */
4738 /* Function:    fr_natderef                                                 */
4739 /* Returns:     Nil                                                         */
4740 /* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4741 /*                                                                          */
4742 /* Decrement the reference counter for this NAT table entry and free it if  */
4743 /* there are no more things using it.                                       */
4744 /*                                                                          */
4745 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4746 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4747 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
4748 /* because nat_delete() will do that and send nat_ref to -1.                */
4749 /*                                                                          */
4750 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4751 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4752 /* ------------------------------------------------------------------------ */
4753 void fr_natderef(natp)
4754 nat_t **natp;
4755 {
4756         nat_t *nat;
4757
4758         nat = *natp;
4759         *natp = NULL;
4760
4761         MUTEX_ENTER(&nat->nat_lock);
4762         if (nat->nat_ref > 1) {
4763                 nat->nat_ref--;
4764                 MUTEX_EXIT(&nat->nat_lock);
4765                 return;
4766         }
4767         MUTEX_EXIT(&nat->nat_lock);
4768
4769         WRITE_ENTER(&ipf_nat);
4770         nat_delete(nat, NL_EXPIRE);
4771         RWLOCK_EXIT(&ipf_nat);
4772 }
4773
4774
4775 /* ------------------------------------------------------------------------ */
4776 /* Function:    fr_natclone                                                 */
4777 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
4778 /*                           else pointer to new state structure            */
4779 /* Parameters:  fin(I) - pointer to packet information                      */
4780 /*              is(I)  - pointer to master state structure                  */
4781 /* Write Lock:  ipf_nat                                                     */
4782 /*                                                                          */
4783 /* Create a "duplcate" state table entry from the master.                   */
4784 /* ------------------------------------------------------------------------ */
4785 static nat_t *fr_natclone(fin, nat)
4786 fr_info_t *fin;
4787 nat_t *nat;
4788 {
4789         frentry_t *fr;
4790         nat_t *clone;
4791         ipnat_t *np;
4792
4793         KMALLOC(clone, nat_t *);
4794         if (clone == NULL)
4795                 return NULL;
4796         bcopy((char *)nat, (char *)clone, sizeof(*clone));
4797
4798         MUTEX_NUKE(&clone->nat_lock);
4799
4800         clone->nat_aps = NULL;
4801         /*
4802          * Initialize all these so that nat_delete() doesn't cause a crash.
4803          */
4804         clone->nat_tqe.tqe_pnext = NULL;
4805         clone->nat_tqe.tqe_next = NULL;
4806         clone->nat_tqe.tqe_ifq = NULL;
4807         clone->nat_tqe.tqe_parent = clone;
4808
4809         clone->nat_flags &= ~SI_CLONE;
4810         clone->nat_flags |= SI_CLONED;
4811
4812         if (clone->nat_hm)
4813                 clone->nat_hm->hm_ref++;
4814
4815         if (nat_insert(clone, fin->fin_rev) == -1) {
4816                 KFREE(clone);
4817                 return NULL;
4818         }
4819         np = clone->nat_ptr;
4820         if (np != NULL) {
4821                 if (nat_logging)
4822                         nat_log(clone, (u_int)np->in_redir);
4823                 np->in_use++;
4824         }
4825         fr = clone->nat_fr;
4826         if (fr != NULL) {
4827                 MUTEX_ENTER(&fr->fr_lock);
4828                 fr->fr_ref++;
4829                 MUTEX_EXIT(&fr->fr_lock);
4830         }
4831
4832         /*
4833          * Because the clone is created outside the normal loop of things and
4834          * TCP has special needs in terms of state, initialise the timeout
4835          * state of the new NAT from here.
4836          */
4837         if (clone->nat_p == IPPROTO_TCP) {
4838                 (void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
4839                                   clone->nat_flags);
4840         }
4841 #ifdef  IPFILTER_SYNC
4842         clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4843 #endif
4844         if (nat_logging)
4845                 nat_log(clone, NL_CLONE);
4846         return clone;
4847 }
4848
4849
4850 /* ------------------------------------------------------------------------ */
4851 /* Function:   nat_wildok                                                   */
4852 /* Returns:    int - 1 == packet's ports match wildcards                    */
4853 /*                   0 == packet's ports don't match wildcards              */
4854 /* Parameters: nat(I)   - NAT entry                                         */
4855 /*             sport(I) - source port                                       */
4856 /*             dport(I) - destination port                                  */
4857 /*             flags(I) - wildcard flags                                    */
4858 /*             dir(I)   - packet direction                                  */
4859 /*                                                                          */
4860 /* Use NAT entry and packet direction to determine which combination of     */
4861 /* wildcard flags should be used.                                           */
4862 /* ------------------------------------------------------------------------ */
4863 static int nat_wildok(nat, sport, dport, flags, dir)
4864 nat_t *nat;
4865 int sport;
4866 int dport;
4867 int flags;
4868 int dir;
4869 {
4870         /*
4871          * When called by       dir is set to
4872          * nat_inlookup         NAT_INBOUND (0)
4873          * nat_outlookup        NAT_OUTBOUND (1)
4874          *
4875          * We simply combine the packet's direction in dir with the original
4876          * "intended" direction of that NAT entry in nat->nat_dir to decide
4877          * which combination of wildcard flags to allow.
4878          */
4879
4880         switch ((dir << 1) | nat->nat_dir)
4881         {
4882         case 3: /* outbound packet / outbound entry */
4883                 if (((nat->nat_inport == sport) ||
4884                     (flags & SI_W_SPORT)) &&
4885                     ((nat->nat_oport == dport) ||
4886                     (flags & SI_W_DPORT)))
4887                         return 1;
4888                 break;
4889         case 2: /* outbound packet / inbound entry */
4890                 if (((nat->nat_outport == sport) ||
4891                     (flags & SI_W_DPORT)) &&
4892                     ((nat->nat_oport == dport) ||
4893                     (flags & SI_W_SPORT)))
4894                         return 1;
4895                 break;
4896         case 1: /* inbound packet / outbound entry */
4897                 if (((nat->nat_oport == sport) ||
4898                     (flags & SI_W_DPORT)) &&
4899                     ((nat->nat_outport == dport) ||
4900                     (flags & SI_W_SPORT)))
4901                         return 1;
4902                 break;
4903         case 0: /* inbound packet / inbound entry */
4904                 if (((nat->nat_oport == sport) ||
4905                     (flags & SI_W_SPORT)) &&
4906                     ((nat->nat_outport == dport) ||
4907                     (flags & SI_W_DPORT)))
4908                         return 1;
4909                 break;
4910         default:
4911                 break;
4912         }
4913
4914         return(0);
4915 }
4916
4917
4918 /* ------------------------------------------------------------------------ */
4919 /* Function:    nat_mssclamp                                                */
4920 /* Returns:     Nil                                                         */
4921 /* Parameters:  tcp(I)    - pointer to TCP header                           */
4922 /*              maxmss(I) - value to clamp the TCP MSS to                   */
4923 /*              fin(I)    - pointer to packet information                   */
4924 /*              csump(I)  - pointer to TCP checksum                         */
4925 /*                                                                          */
4926 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
4927 /* then the TCP header checksum will be updated to reflect the change in    */
4928 /* the MSS.                                                                 */
4929 /* ------------------------------------------------------------------------ */
4930 static void nat_mssclamp(tcp, maxmss, fin, csump)
4931 tcphdr_t *tcp;
4932 u_32_t maxmss;
4933 fr_info_t *fin;
4934 u_short *csump;
4935 {
4936         u_char *cp, *ep, opt;
4937         int hlen, advance;
4938         u_32_t mss, sumd;
4939
4940         hlen = TCP_OFF(tcp) << 2;
4941         if (hlen > sizeof(*tcp)) {
4942                 cp = (u_char *)tcp + sizeof(*tcp);
4943                 ep = (u_char *)tcp + hlen;
4944
4945                 while (cp < ep) {
4946                         opt = cp[0];
4947                         if (opt == TCPOPT_EOL)
4948                                 break;
4949                         else if (opt == TCPOPT_NOP) {
4950                                 cp++;
4951                                 continue;
4952                         }
4953
4954                         if (cp + 1 >= ep)
4955                                 break;
4956                         advance = cp[1];
4957                         if ((cp + advance > ep) || (advance <= 0))
4958                                 break;
4959                         switch (opt)
4960                         {
4961                         case TCPOPT_MAXSEG:
4962                                 if (advance != 4)
4963                                         break;
4964                                 mss = cp[2] * 256 + cp[3];
4965                                 if (mss > maxmss) {
4966                                         cp[2] = maxmss / 256;
4967                                         cp[3] = maxmss & 0xff;
4968                                         CALC_SUMD(mss, maxmss, sumd);
4969                                         fix_outcksum(fin, csump, sumd);
4970                                 }
4971                                 break;
4972                         default:
4973                                 /* ignore unknown options */
4974                                 break;
4975                         }
4976
4977                         cp += advance;
4978                 }
4979         }
4980 }
4981
4982
4983 /* ------------------------------------------------------------------------ */
4984 /* Function:    fr_setnatqueue                                              */
4985 /* Returns:     Nil                                                         */
4986 /* Parameters:  nat(I)- pointer to NAT structure                            */
4987 /*              rev(I) - forward(0) or reverse(1) direction                 */
4988 /* Locks:       ipf_nat (read or write)                                     */
4989 /*                                                                          */
4990 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
4991 /* determining which queue it should be placed on.                          */
4992 /* ------------------------------------------------------------------------ */
4993 void fr_setnatqueue(nat, rev)
4994 nat_t *nat;
4995 int rev;
4996 {
4997         ipftq_t *oifq, *nifq;
4998
4999         if (nat->nat_ptr != NULL)
5000                 nifq = nat->nat_ptr->in_tqehead[rev];
5001         else
5002                 nifq = NULL;
5003
5004         if (nifq == NULL) {
5005                 switch (nat->nat_p)
5006                 {
5007                 case IPPROTO_UDP :
5008                         nifq = &nat_udptq;
5009                         break;
5010                 case IPPROTO_ICMP :
5011                         nifq = &nat_icmptq;
5012                         break;
5013                 case IPPROTO_TCP :
5014                         nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
5015                         break;
5016                 default :
5017                         nifq = &nat_iptq;
5018                         break;
5019                 }
5020         }
5021
5022         oifq = nat->nat_tqe.tqe_ifq;
5023         /*
5024          * If it's currently on a timeout queue, move it from one queue to
5025          * another, else put it on the end of the newly determined queue.
5026          */
5027         if (oifq != NULL)
5028                 fr_movequeue(&nat->nat_tqe, oifq, nifq);
5029         else
5030                 fr_queueappend(&nat->nat_tqe, nifq, nat);
5031         return;
5032 }
5033
5034
5035 /* ------------------------------------------------------------------------ */
5036 /* Function:    nat_getnext                                                 */
5037 /* Returns:     int - 0 == ok, else error                                   */
5038 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5039 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5040 /*                                                                          */
5041 /* Fetch the next nat/ipnat structure pointer from the linked list and      */
5042 /* copy it out to the storage space pointed to by itp_data.  The next item  */
5043 /* in the list to look at is put back in the ipftoken struture.             */
5044 /* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/
5045 /* ipf_freetoken will call a deref function for us and we dont want to call */
5046 /* that twice (second time would be in the second switch statement below.   */
5047 /* ------------------------------------------------------------------------ */
5048 static int nat_getnext(t, itp)
5049 ipftoken_t *t;
5050 ipfgeniter_t *itp;
5051 {
5052         hostmap_t *hm, *nexthm = NULL, zerohm;
5053         ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5054         nat_t *nat, *nextnat = NULL, zeronat;
5055         int error = 0, count;
5056         char *dst;
5057
5058         count = itp->igi_nitems;
5059         if (count < 1)
5060                 return ENOSPC;
5061
5062         READ_ENTER(&ipf_nat);
5063
5064         switch (itp->igi_type)
5065         {
5066         case IPFGENITER_HOSTMAP :
5067                 hm = t->ipt_data;
5068                 if (hm == NULL) {
5069                         nexthm = ipf_hm_maplist;
5070                 } else {
5071                         nexthm = hm->hm_next;
5072                 }
5073                 break;
5074
5075         case IPFGENITER_IPNAT :
5076                 ipn = t->ipt_data;
5077                 if (ipn == NULL) {
5078                         nextipnat = nat_list;
5079                 } else {
5080                         nextipnat = ipn->in_next;
5081                 }
5082                 break;
5083
5084         case IPFGENITER_NAT :
5085                 nat = t->ipt_data;
5086                 if (nat == NULL) {
5087                         nextnat = nat_instances;
5088                 } else {
5089                         nextnat = nat->nat_next;
5090                 }
5091                 break;
5092         default :
5093                 RWLOCK_EXIT(&ipf_nat);
5094                 return EINVAL;
5095         }
5096
5097         dst = itp->igi_data;
5098         for (;;) {
5099                 switch (itp->igi_type)
5100                 {
5101                 case IPFGENITER_HOSTMAP :
5102                         if (nexthm != NULL) {
5103                                 if (count == 1) {
5104                                         ATOMIC_INC32(nexthm->hm_ref);
5105                                         t->ipt_data = nexthm;
5106                                 }
5107                         } else {
5108                                 bzero(&zerohm, sizeof(zerohm));
5109                                 nexthm = &zerohm;
5110                                 count = 1;
5111                                 t->ipt_data = NULL;
5112                         }
5113                         break;
5114
5115                 case IPFGENITER_IPNAT :
5116                         if (nextipnat != NULL) {
5117                                 if (count == 1) {
5118                                         MUTEX_ENTER(&nextipnat->in_lock);
5119                                         nextipnat->in_use++;
5120                                         MUTEX_EXIT(&nextipnat->in_lock);
5121                                         t->ipt_data = nextipnat;
5122                                 }
5123                         } else {
5124                                 bzero(&zeroipn, sizeof(zeroipn));
5125                                 nextipnat = &zeroipn;
5126                                 count = 1;
5127                                 t->ipt_data = NULL;
5128                         }
5129                         break;
5130
5131                 case IPFGENITER_NAT :
5132                         if (nextnat != NULL) {
5133                                 if (count == 1) {
5134                                         MUTEX_ENTER(&nextnat->nat_lock);
5135                                         nextnat->nat_ref++;
5136                                         MUTEX_EXIT(&nextnat->nat_lock);
5137                                         t->ipt_data = nextnat;
5138                                 }
5139                         } else {
5140                                 bzero(&zeronat, sizeof(zeronat));
5141                                 nextnat = &zeronat;
5142                                 count = 1;
5143                                 t->ipt_data = NULL;
5144                         }
5145                         break;
5146                 default :
5147                         break;
5148                 }
5149                 RWLOCK_EXIT(&ipf_nat);
5150
5151                 /*
5152                  * Copying out to user space needs to be done without the lock.
5153                  */
5154                 switch (itp->igi_type)
5155                 {
5156                 case IPFGENITER_HOSTMAP :
5157                         error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5158                         if (error != 0)
5159                                 error = EFAULT;
5160                         else
5161                                 dst += sizeof(*nexthm);
5162                         break;
5163
5164                 case IPFGENITER_IPNAT :
5165                         error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5166                         if (error != 0)
5167                                 error = EFAULT;
5168                         else
5169                                 dst += sizeof(*nextipnat);
5170                         break;
5171
5172                 case IPFGENITER_NAT :
5173                         error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5174                         if (error != 0)
5175                                 error = EFAULT;
5176                         else
5177                                 dst += sizeof(*nextnat);
5178                         break;
5179                 }
5180
5181                 if ((count == 1) || (error != 0))
5182                         break;
5183
5184                 count--;
5185
5186                 READ_ENTER(&ipf_nat);
5187
5188                 /*
5189                  * We need to have the lock again here to make sure that
5190                  * using _next is consistent.
5191                  */
5192                 switch (itp->igi_type)
5193                 {
5194                 case IPFGENITER_HOSTMAP :
5195                         nexthm = nexthm->hm_next;
5196                         break;
5197                 case IPFGENITER_IPNAT :
5198                         nextipnat = nextipnat->in_next;
5199                         break;
5200                 case IPFGENITER_NAT :
5201                         nextnat = nextnat->nat_next;
5202                         break;
5203                 }
5204         }
5205
5206
5207         switch (itp->igi_type)
5208         {
5209         case IPFGENITER_HOSTMAP :
5210                 if (hm != NULL) {
5211                         WRITE_ENTER(&ipf_nat);
5212                         fr_hostmapdel(&hm);
5213                         RWLOCK_EXIT(&ipf_nat);
5214                 }
5215                 break;
5216         case IPFGENITER_IPNAT :
5217                 if (ipn != NULL) {
5218                         fr_ipnatderef(&ipn);
5219                 }
5220                 break;
5221         case IPFGENITER_NAT :
5222                 if (nat != NULL) {
5223                         fr_natderef(&nat);
5224                 }
5225                 break;
5226         default :
5227                 break;
5228         }
5229
5230         return error;
5231 }
5232
5233
5234 /* ------------------------------------------------------------------------ */
5235 /* Function:    nat_iterator                                                */
5236 /* Returns:     int - 0 == ok, else error                                   */
5237 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5238 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5239 /*                                                                          */
5240 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5241 /* generic structure to iterate through a list.  There are three different  */
5242 /* linked lists of NAT related information to go through: NAT rules, active */
5243 /* NAT mappings and the NAT fragment cache.                                 */
5244 /* ------------------------------------------------------------------------ */
5245 static int nat_iterator(token, itp)
5246 ipftoken_t *token;
5247 ipfgeniter_t *itp;
5248 {
5249         int error;
5250
5251         if (itp->igi_data == NULL)
5252                 return EFAULT;
5253
5254         token->ipt_subtype = itp->igi_type;
5255
5256         switch (itp->igi_type)
5257         {
5258         case IPFGENITER_HOSTMAP :
5259         case IPFGENITER_IPNAT :
5260         case IPFGENITER_NAT :
5261                 error = nat_getnext(token, itp);
5262                 break;
5263
5264         case IPFGENITER_NATFRAG :
5265 #ifdef USE_MUTEXES
5266                 error = fr_nextfrag(token, itp, &ipfr_natlist,
5267                                     &ipfr_nattail, &ipf_natfrag);
5268 #else
5269                 error = fr_nextfrag(token, itp, &ipfr_natlist, &ipfr_nattail);
5270 #endif
5271                 break;
5272         default :
5273                 error = EINVAL;
5274                 break;
5275         }
5276
5277         return error;
5278 }
5279
5280
5281 /* ------------------------------------------------------------------------ */
5282 /* Function:    nat_extraflush                                              */
5283 /* Returns:     int - 0 == success, -1 == failure                           */
5284 /* Parameters:  which(I) - how to flush the active NAT table                */
5285 /* Write Locks: ipf_nat                                                     */
5286 /*                                                                          */
5287 /* Flush nat tables.  Three actions currently defined:                      */
5288 /* which == 0 : flush all nat table entries                                 */
5289 /* which == 1 : flush TCP connections which have started to close but are   */
5290 /*            stuck for some reason.                                        */
5291 /* which == 2 : flush TCP connections which have been idle for a long time, */
5292 /*            starting at > 4 days idle and working back in successive half-*/
5293 /*            days to at most 12 hours old.  If this fails to free enough   */
5294 /*            slots then work backwards in half hour slots to 30 minutes.   */
5295 /*            If that too fails, then work backwards in 30 second intervals */
5296 /*            for the last 30 minutes to at worst 30 seconds idle.          */
5297 /* ------------------------------------------------------------------------ */
5298 static int nat_extraflush(which)
5299 int which;
5300 {
5301         ipftq_t *ifq, *ifqnext;
5302         nat_t *nat, **natp;
5303         ipftqent_t *tqn;
5304         int removed;
5305         SPL_INT(s);
5306
5307         removed = 0;
5308
5309         SPL_NET(s);
5310
5311         switch (which)
5312         {
5313         case 0 :
5314                 /*
5315                  * Style 0 flush removes everything...
5316                  */
5317                 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5318                         nat_delete(nat, NL_FLUSH);
5319                         removed++;
5320                 }
5321                 break;
5322
5323         case 1 :
5324                 /*
5325                  * Since we're only interested in things that are closing,
5326                  * we can start with the appropriate timeout queue.
5327                  */
5328                 for (ifq = nat_tqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL;
5329                      ifq = ifq->ifq_next) {
5330
5331                         for (tqn = ifq->ifq_head; tqn != NULL; ) {
5332                                 nat = tqn->tqe_parent;
5333                                 tqn = tqn->tqe_next;
5334                                 if (nat->nat_p != IPPROTO_TCP)
5335                                         break;
5336                                 nat_delete(nat, NL_EXPIRE);
5337                                 removed++;
5338                         }
5339                 }
5340
5341                 /*
5342                  * Also need to look through the user defined queues.
5343                  */
5344                 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
5345                         ifqnext = ifq->ifq_next;
5346                         for (tqn = ifq->ifq_head; tqn != NULL; ) {
5347                                 nat = tqn->tqe_parent;
5348                                 tqn = tqn->tqe_next;
5349                                 if (nat->nat_p != IPPROTO_TCP)
5350                                         continue;
5351
5352                                 if ((nat->nat_tcpstate[0] >
5353                                      IPF_TCPS_ESTABLISHED) &&
5354                                     (nat->nat_tcpstate[1] >
5355                                      IPF_TCPS_ESTABLISHED)) {
5356                                         nat_delete(nat, NL_EXPIRE);
5357                                         removed++;
5358                                 }
5359                         }
5360                 }
5361                 break;
5362
5363                 /*
5364                  * Args 5-11 correspond to flushing those particular states
5365                  * for TCP connections.
5366                  */
5367         case IPF_TCPS_CLOSE_WAIT :
5368         case IPF_TCPS_FIN_WAIT_1 :
5369         case IPF_TCPS_CLOSING :
5370         case IPF_TCPS_LAST_ACK :
5371         case IPF_TCPS_FIN_WAIT_2 :
5372         case IPF_TCPS_TIME_WAIT :
5373         case IPF_TCPS_CLOSED :
5374                 tqn = nat_tqb[which].ifq_head;
5375                 while (tqn != NULL) {
5376                         nat = tqn->tqe_parent;
5377                         tqn = tqn->tqe_next;
5378                         nat_delete(nat, NL_FLUSH);
5379                         removed++;
5380                 }
5381                 break;
5382          
5383         default :
5384                 if (which < 30)
5385                         break;
5386            
5387                 /*
5388                  * Take a large arbitrary number to mean the number of seconds
5389                  * for which which consider to be the maximum value we'll allow
5390                  * the expiration to be.
5391                  */
5392                 which = IPF_TTLVAL(which);
5393                 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5394                         if (fr_ticks - nat->nat_touched > which) {
5395                                 nat_delete(nat, NL_FLUSH);
5396                                 removed++;
5397                         } else
5398                                 natp = &nat->nat_next;
5399                 }
5400                 break;
5401         }
5402
5403         if (which != 2) {
5404                 SPL_X(s);
5405                 return removed;
5406         }
5407
5408         /*
5409          * Asked to remove inactive entries because the table is full.
5410          */
5411         if (fr_ticks - nat_last_force_flush > IPF_TTLVAL(5)) {
5412                 nat_last_force_flush = fr_ticks;
5413                 removed = ipf_queueflush(nat_flush_entry, nat_tqb, nat_utqe);
5414         }
5415
5416         SPL_X(s);
5417         return removed;
5418 }
5419
5420
5421 /* ------------------------------------------------------------------------ */
5422 /* Function:    nat_flush_entry                                             */
5423 /* Returns:     0 - always succeeds                                         */
5424 /* Parameters:  entry(I) - pointer to NAT entry                             */
5425 /* Write Locks: ipf_nat                                                     */
5426 /*                                                                          */
5427 /* This function is a stepping stone between ipf_queueflush() and           */
5428 /* nat_dlete().  It is used so we can provide a uniform interface via the   */
5429 /* ipf_queueflush() function.  Since the nat_delete() function returns void */
5430 /* we translate that to mean it always succeeds in deleting something.      */
5431 /* ------------------------------------------------------------------------ */
5432 static int nat_flush_entry(entry)
5433 void *entry;
5434 {
5435         nat_delete(entry, NL_FLUSH);
5436         return 0;
5437 }
5438
5439
5440 /* ------------------------------------------------------------------------ */
5441 /* Function:    nat_gettable                                                */
5442 /* Returns:     int     - 0 = success, else error                           */
5443 /* Parameters:  data(I) - pointer to ioctl data                             */
5444 /*                                                                          */
5445 /* This function handles ioctl requests for tables of nat information.      */
5446 /* At present the only table it deals with is the hash bucket statistics.   */
5447 /* ------------------------------------------------------------------------ */
5448 static int nat_gettable(data)
5449 char *data;
5450 {
5451         ipftable_t table;
5452         int error;
5453
5454         error = fr_inobj(data, &table, IPFOBJ_GTABLE);
5455         if (error != 0)
5456                 return error;
5457
5458         switch (table.ita_type)
5459         {
5460         case IPFTABLE_BUCKETS_NATIN :
5461                 error = COPYOUT(nat_stats.ns_bucketlen[0], table.ita_table, 
5462                                 ipf_nattable_sz * sizeof(u_long));
5463                 break;
5464
5465         case IPFTABLE_BUCKETS_NATOUT :
5466                 error = COPYOUT(nat_stats.ns_bucketlen[1], table.ita_table, 
5467                                 ipf_nattable_sz * sizeof(u_long));
5468                 break;
5469
5470         default :
5471                 return EINVAL;
5472         }
5473
5474         if (error != 0) {
5475                 error = EFAULT;
5476         }
5477         return error;
5478 }