]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - sys/contrib/ipfilter/netinet/ip_nat.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / sys / contrib / ipfilter / netinet / ip_nat.c
1 /*      $FreeBSD$       */
2
3 /*
4  * Copyright (C) 1995-2003 by Darren Reed.
5  *
6  * See the IPFILTER.LICENCE file for details on licencing.
7  */
8 #if defined(KERNEL) || defined(_KERNEL)
9 # undef KERNEL
10 # undef _KERNEL
11 # define        KERNEL  1
12 # define        _KERNEL 1
13 #endif
14 #include <sys/errno.h>
15 #include <sys/types.h>
16 #include <sys/param.h>
17 #include <sys/time.h>
18 #include <sys/file.h>
19 #if defined(_KERNEL) && defined(__NetBSD_Version__) && \
20     (__NetBSD_Version__ >= 399002000)
21 # include <sys/kauth.h>
22 #endif
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24     defined(_KERNEL)
25 #if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
26 #  include "opt_ipfilter_log.h"
27 # else
28 #  include "opt_ipfilter.h"
29 # endif
30 #endif
31 #if !defined(_KERNEL)
32 # include <stdio.h>
33 # include <string.h>
34 # include <stdlib.h>
35 # define _KERNEL
36 # ifdef __OpenBSD__
37 struct file;
38 # endif
39 # include <sys/uio.h>
40 # undef _KERNEL
41 #endif
42 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
43 # include <sys/filio.h>
44 # include <sys/fcntl.h>
45 #else
46 # include <sys/ioctl.h>
47 #endif
48 #if !defined(AIX)
49 # include <sys/fcntl.h>
50 #endif
51 #if !defined(linux)
52 # include <sys/protosw.h>
53 #endif
54 #include <sys/socket.h>
55 #if defined(_KERNEL)
56 # include <sys/systm.h>
57 # if !defined(__SVR4) && !defined(__svr4__)
58 #  include <sys/mbuf.h>
59 # endif
60 #endif
61 #if defined(__SVR4) || defined(__svr4__)
62 # include <sys/filio.h>
63 # include <sys/byteorder.h>
64 # ifdef _KERNEL
65 #  include <sys/dditypes.h>
66 # endif
67 # include <sys/stream.h>
68 # include <sys/kmem.h>
69 #endif
70 #if __FreeBSD_version >= 300000
71 # include <sys/queue.h>
72 #endif
73 #include <net/if.h>
74 #if __FreeBSD_version >= 300000
75 # include <net/if_var.h>
76 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
77 #  include "opt_ipfilter.h"
78 # endif
79 #endif
80 #ifdef sun
81 # include <net/af.h>
82 #endif
83 #include <net/route.h>
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/ip.h>
87
88 #ifdef RFC1825
89 # include <vpn/md5.h>
90 # include <vpn/ipsec.h>
91 extern struct ifnet vpnif;
92 #endif
93
94 #if !defined(linux)
95 # include <netinet/ip_var.h>
96 #endif
97 #include <netinet/tcp.h>
98 #include <netinet/udp.h>
99 #include <netinet/ip_icmp.h>
100 #include "netinet/ip_compat.h"
101 #include <netinet/tcpip.h>
102 #include "netinet/ip_fil.h"
103 #include "netinet/ip_nat.h"
104 #include "netinet/ip_frag.h"
105 #include "netinet/ip_state.h"
106 #include "netinet/ip_proxy.h"
107 #ifdef  IPFILTER_SYNC
108 #include "netinet/ip_sync.h"
109 #endif
110 #if (__FreeBSD_version >= 300000)
111 # include <sys/malloc.h>
112 #endif
113 /* END OF INCLUDES */
114
115 #undef  SOCKADDR_IN
116 #define SOCKADDR_IN     struct sockaddr_in
117
118 #if !defined(lint)
119 static const char sccsid[] = "@(#)ip_nat.c      1.11 6/5/96 (C) 1995 Darren Reed";
120 static const char rcsid[] = "@(#)$FreeBSD$";
121 /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */
122 #endif
123
124
125 /* ======================================================================== */
126 /* How the NAT is organised and works.                                      */
127 /*                                                                          */
128 /* Inside (interface y) NAT       Outside (interface x)                     */
129 /* -------------------- -+- -------------------------------------           */
130 /* Packet going          |   out, processsed by fr_checknatout() for x      */
131 /* ------------>         |   ------------>                                  */
132 /* src=10.1.1.1          |   src=192.1.1.1                                  */
133 /*                       |                                                  */
134 /*                       |   in, processed by fr_checknatin() for x         */
135 /* <------------         |   <------------                                  */
136 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
137 /* -------------------- -+- -------------------------------------           */
138 /* fr_checknatout() - changes ip_src and if required, sport                 */
139 /*             - creates a new mapping, if required.                        */
140 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
141 /*                                                                          */
142 /* In the NAT table, internal source is recorded as "in" and externally     */
143 /* seen as "out".                                                           */
144 /* ======================================================================== */
145
146
147 nat_t   **nat_table[2] = { NULL, NULL },
148         *nat_instances = NULL;
149 ipnat_t *nat_list = NULL;
150 u_int   ipf_nattable_max = NAT_TABLE_MAX;
151 u_int   ipf_nattable_sz = NAT_TABLE_SZ;
152 u_int   ipf_natrules_sz = NAT_SIZE;
153 u_int   ipf_rdrrules_sz = RDR_SIZE;
154 u_int   ipf_hostmap_sz = HOSTMAP_SIZE;
155 u_int   fr_nat_maxbucket = 0,
156         fr_nat_maxbucket_reset = 1;
157 u_32_t  nat_masks = 0;
158 u_32_t  rdr_masks = 0;
159 u_long  nat_last_force_flush = 0;
160 ipnat_t **nat_rules = NULL;
161 ipnat_t **rdr_rules = NULL;
162 hostmap_t       **ipf_hm_maptable  = NULL;
163 hostmap_t       *ipf_hm_maplist  = NULL;
164 ipftq_t nat_tqb[IPF_TCP_NSTATES];
165 ipftq_t nat_udptq;
166 ipftq_t nat_icmptq;
167 ipftq_t nat_iptq;
168 ipftq_t *nat_utqe = NULL;
169 int     fr_nat_doflush = 0;
170 #ifdef  IPFILTER_LOG
171 int     nat_logging = 1;
172 #else
173 int     nat_logging = 0;
174 #endif
175
176 u_long  fr_defnatage = DEF_NAT_AGE,
177         fr_defnatipage = 120,           /* 60 seconds */
178         fr_defnaticmpage = 6;           /* 3 seconds */
179 natstat_t nat_stats;
180 int     fr_nat_lock = 0;
181 int     fr_nat_init = 0;
182 #if SOLARIS && !defined(_INET_IP_STACK_H)
183 extern  int             pfil_delayed_copy;
184 #endif
185
186 static  int     nat_flush_entry __P((void *));
187 static  int     nat_flushtable __P((void));
188 static  int     nat_clearlist __P((void));
189 static  void    nat_addnat __P((struct ipnat *));
190 static  void    nat_addrdr __P((struct ipnat *));
191 static  void    nat_delrdr __P((struct ipnat *));
192 static  void    nat_delnat __P((struct ipnat *));
193 static  int     fr_natgetent __P((caddr_t));
194 static  int     fr_natgetsz __P((caddr_t));
195 static  int     fr_natputent __P((caddr_t, int));
196 static  int     nat_extraflush __P((int));
197 static  int     nat_gettable __P((char *));
198 static  void    nat_tabmove __P((nat_t *));
199 static  int     nat_match __P((fr_info_t *, ipnat_t *));
200 static  INLINE  int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
201 static  INLINE  int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
202 static  hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
203                                     struct in_addr, struct in_addr, u_32_t));
204 static  int     nat_icmpquerytype4 __P((int));
205 static  int     nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
206 static  void    nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
207 static  int     nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
208                                       tcphdr_t *, nat_t **, int));
209 static  int     nat_resolverule __P((ipnat_t *));
210 static  nat_t   *fr_natclone __P((fr_info_t *, nat_t *));
211 static  void    nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
212 static  int     nat_wildok __P((nat_t *, int, int, int, int));
213 static  int     nat_getnext __P((ipftoken_t *, ipfgeniter_t *));
214 static  int     nat_iterator __P((ipftoken_t *, ipfgeniter_t *));
215
216
217 /* ------------------------------------------------------------------------ */
218 /* Function:    fr_natinit                                                  */
219 /* Returns:     int - 0 == success, -1 == failure                           */
220 /* Parameters:  Nil                                                         */
221 /*                                                                          */
222 /* Initialise all of the NAT locks, tables and other structures.            */
223 /* ------------------------------------------------------------------------ */
224 int fr_natinit()
225 {
226         int i;
227
228         KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
229         if (nat_table[0] != NULL)
230                 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
231         else
232                 return -1;
233
234         KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
235         if (nat_table[1] != NULL)
236                 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
237         else
238                 return -2;
239
240         KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
241         if (nat_rules != NULL)
242                 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
243         else
244                 return -3;
245
246         KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
247         if (rdr_rules != NULL)
248                 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
249         else
250                 return -4;
251
252         KMALLOCS(ipf_hm_maptable, hostmap_t **, \
253                  sizeof(hostmap_t *) * ipf_hostmap_sz);
254         if (ipf_hm_maptable != NULL)
255                 bzero((char *)ipf_hm_maptable,
256                       sizeof(hostmap_t *) * ipf_hostmap_sz);
257         else
258                 return -5;
259         ipf_hm_maplist = NULL;
260
261         KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
262                  ipf_nattable_sz * sizeof(u_long));
263         if (nat_stats.ns_bucketlen[0] == NULL)
264                 return -6;
265         bzero((char *)nat_stats.ns_bucketlen[0],
266               ipf_nattable_sz * sizeof(u_long));
267
268         KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
269                  ipf_nattable_sz * sizeof(u_long));
270         if (nat_stats.ns_bucketlen[1] == NULL)
271                 return -7;
272
273         bzero((char *)nat_stats.ns_bucketlen[1],
274               ipf_nattable_sz * sizeof(u_long));
275
276         if (fr_nat_maxbucket == 0) {
277                 for (i = ipf_nattable_sz; i > 0; i >>= 1)
278                         fr_nat_maxbucket++;
279                 fr_nat_maxbucket *= 2;
280         }
281
282         fr_sttab_init(nat_tqb);
283         /*
284          * Increase this because we may have "keep state" following this too
285          * and packet storms can occur if this is removed too quickly.
286          */
287         nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
288         nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
289         nat_udptq.ifq_ttl = fr_defnatage;
290         nat_udptq.ifq_ref = 1;
291         nat_udptq.ifq_head = NULL;
292         nat_udptq.ifq_tail = &nat_udptq.ifq_head;
293         MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
294         nat_udptq.ifq_next = &nat_icmptq;
295         nat_icmptq.ifq_ttl = fr_defnaticmpage;
296         nat_icmptq.ifq_ref = 1;
297         nat_icmptq.ifq_head = NULL;
298         nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
299         MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
300         nat_icmptq.ifq_next = &nat_iptq;
301         nat_iptq.ifq_ttl = fr_defnatipage;
302         nat_iptq.ifq_ref = 1;
303         nat_iptq.ifq_head = NULL;
304         nat_iptq.ifq_tail = &nat_iptq.ifq_head;
305         MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
306         nat_iptq.ifq_next = NULL;
307
308         for (i = 0; i < IPF_TCP_NSTATES; i++) {
309                 if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
310                         nat_tqb[i].ifq_ttl = fr_defnaticmpage;
311 #ifdef LARGE_NAT
312                 else if (nat_tqb[i].ifq_ttl > fr_defnatage)
313                         nat_tqb[i].ifq_ttl = fr_defnatage;
314 #endif
315         }
316
317         /*
318          * Increase this because we may have "keep state" following
319          * this too and packet storms can occur if this is removed
320          * too quickly.
321          */
322         nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
323
324         RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
325         RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
326         MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
327         MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
328
329         fr_nat_init = 1;
330
331         return 0;
332 }
333
334
335 /* ------------------------------------------------------------------------ */
336 /* Function:    nat_addrdr                                                  */
337 /* Returns:     Nil                                                         */
338 /* Parameters:  n(I) - pointer to NAT rule to add                           */
339 /*                                                                          */
340 /* Adds a redirect rule to the hash table of redirect rules and the list of */
341 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
342 /* use by redirect rules.                                                   */
343 /* ------------------------------------------------------------------------ */
344 static void nat_addrdr(n)
345 ipnat_t *n;
346 {
347         ipnat_t **np;
348         u_32_t j;
349         u_int hv;
350         int k;
351
352         k = count4bits(n->in_outmsk);
353         if ((k >= 0) && (k != 32))
354                 rdr_masks |= 1 << k;
355         j = (n->in_outip & n->in_outmsk);
356         hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
357         np = rdr_rules + hv;
358         while (*np != NULL)
359                 np = &(*np)->in_rnext;
360         n->in_rnext = NULL;
361         n->in_prnext = np;
362         n->in_hv = hv;
363         *np = n;
364 }
365
366
367 /* ------------------------------------------------------------------------ */
368 /* Function:    nat_addnat                                                  */
369 /* Returns:     Nil                                                         */
370 /* Parameters:  n(I) - pointer to NAT rule to add                           */
371 /*                                                                          */
372 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
373 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
374 /* redirect rules.                                                          */
375 /* ------------------------------------------------------------------------ */
376 static void nat_addnat(n)
377 ipnat_t *n;
378 {
379         ipnat_t **np;
380         u_32_t j;
381         u_int hv;
382         int k;
383
384         k = count4bits(n->in_inmsk);
385         if ((k >= 0) && (k != 32))
386                 nat_masks |= 1 << k;
387         j = (n->in_inip & n->in_inmsk);
388         hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
389         np = nat_rules + hv;
390         while (*np != NULL)
391                 np = &(*np)->in_mnext;
392         n->in_mnext = NULL;
393         n->in_pmnext = np;
394         n->in_hv = hv;
395         *np = n;
396 }
397
398
399 /* ------------------------------------------------------------------------ */
400 /* Function:    nat_delrdr                                                  */
401 /* Returns:     Nil                                                         */
402 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
403 /*                                                                          */
404 /* Removes a redirect rule from the hash table of redirect rules.           */
405 /* ------------------------------------------------------------------------ */
406 static void nat_delrdr(n)
407 ipnat_t *n;
408 {
409         if (n->in_rnext)
410                 n->in_rnext->in_prnext = n->in_prnext;
411         *n->in_prnext = n->in_rnext;
412 }
413
414
415 /* ------------------------------------------------------------------------ */
416 /* Function:    nat_delnat                                                  */
417 /* Returns:     Nil                                                         */
418 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
419 /*                                                                          */
420 /* Removes a NAT map rule from the hash table of NAT map rules.             */
421 /* ------------------------------------------------------------------------ */
422 static void nat_delnat(n)
423 ipnat_t *n;
424 {
425         if (n->in_mnext != NULL)
426                 n->in_mnext->in_pmnext = n->in_pmnext;
427         *n->in_pmnext = n->in_mnext;
428 }
429
430
431 /* ------------------------------------------------------------------------ */
432 /* Function:    nat_hostmap                                                 */
433 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
434 /*                                else a pointer to the hostmapping to use  */
435 /* Parameters:  np(I)   - pointer to NAT rule                               */
436 /*              real(I) - real IP address                                   */
437 /*              map(I)  - mapped IP address                                 */
438 /*              port(I) - destination port number                           */
439 /* Write Locks: ipf_nat                                                     */
440 /*                                                                          */
441 /* Check if an ip address has already been allocated for a given mapping    */
442 /* that is not doing port based translation.  If is not yet allocated, then */
443 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
444 /* ------------------------------------------------------------------------ */
445 static struct hostmap *nat_hostmap(np, src, dst, map, port)
446 ipnat_t *np;
447 struct in_addr src;
448 struct in_addr dst;
449 struct in_addr map;
450 u_32_t port;
451 {
452         hostmap_t *hm;
453         u_int hv;
454
455         hv = (src.s_addr ^ dst.s_addr);
456         hv += src.s_addr;
457         hv += dst.s_addr;
458         hv %= HOSTMAP_SIZE;
459         for (hm = ipf_hm_maptable[hv]; hm; hm = hm->hm_next)
460                 if ((hm->hm_srcip.s_addr == src.s_addr) &&
461                     (hm->hm_dstip.s_addr == dst.s_addr) &&
462                     ((np == NULL) || (np == hm->hm_ipnat)) &&
463                     ((port == 0) || (port == hm->hm_port))) {
464                         hm->hm_ref++;
465                         return hm;
466                 }
467
468         if (np == NULL)
469                 return NULL;
470
471         KMALLOC(hm, hostmap_t *);
472         if (hm) {
473                 hm->hm_next = ipf_hm_maplist;
474                 hm->hm_pnext = &ipf_hm_maplist;
475                 if (ipf_hm_maplist != NULL)
476                         ipf_hm_maplist->hm_pnext = &hm->hm_next;
477                 ipf_hm_maplist = hm;
478                 hm->hm_hnext = ipf_hm_maptable[hv];
479                 hm->hm_phnext = ipf_hm_maptable + hv;
480                 if (ipf_hm_maptable[hv] != NULL)
481                         ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
482                 ipf_hm_maptable[hv] = hm;
483                 hm->hm_ipnat = np;
484                 hm->hm_srcip = src;
485                 hm->hm_dstip = dst;
486                 hm->hm_mapip = map;
487                 hm->hm_ref = 1;
488                 hm->hm_port = port;
489         }
490         return hm;
491 }
492
493
494 /* ------------------------------------------------------------------------ */
495 /* Function:    fr_hostmapdel                                               */
496 /* Returns:     Nil                                                         */
497 /* Parameters:  hmp(I) - pointer to hostmap structure pointer               */
498 /* Write Locks: ipf_nat                                                     */
499 /*                                                                          */
500 /* Decrement the references to this hostmap structure by one.  If this      */
501 /* reaches zero then remove it and free it.                                 */
502 /* ------------------------------------------------------------------------ */
503 void fr_hostmapdel(hmp)
504 struct hostmap **hmp;
505 {
506         struct hostmap *hm;
507
508         hm = *hmp;
509         *hmp = NULL;
510
511         hm->hm_ref--;
512         if (hm->hm_ref == 0) {
513                 if (hm->hm_hnext)
514                         hm->hm_hnext->hm_phnext = hm->hm_phnext;
515                 *hm->hm_phnext = hm->hm_hnext;
516                 if (hm->hm_next)
517                         hm->hm_next->hm_pnext = hm->hm_pnext;
518                 *hm->hm_pnext = hm->hm_next;
519                 KFREE(hm);
520         }
521 }
522
523
524 /* ------------------------------------------------------------------------ */
525 /* Function:    fix_outcksum                                                */
526 /* Returns:     Nil                                                         */
527 /* Parameters:  fin(I) - pointer to packet information                      */
528 /*              sp(I)  - location of 16bit checksum to update               */
529 /*              n((I)  - amount to adjust checksum by                       */
530 /*                                                                          */
531 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
532 /* ------------------------------------------------------------------------ */
533 void fix_outcksum(fin, sp, n)
534 fr_info_t *fin;
535 u_short *sp;
536 u_32_t n;
537 {
538         u_short sumshort;
539         u_32_t sum1;
540
541         if (n == 0)
542                 return;
543
544         if (n & NAT_HW_CKSUM) {
545                 n &= 0xffff;
546                 n += fin->fin_dlen;
547                 n = (n & 0xffff) + (n >> 16);
548                 *sp = n & 0xffff;
549                 return;
550         }
551         sum1 = (~ntohs(*sp)) & 0xffff;
552         sum1 += (n);
553         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
554         /* Again */
555         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
556         sumshort = ~(u_short)sum1;
557         *(sp) = htons(sumshort);
558 }
559
560
561 /* ------------------------------------------------------------------------ */
562 /* Function:    fix_incksum                                                 */
563 /* Returns:     Nil                                                         */
564 /* Parameters:  fin(I) - pointer to packet information                      */
565 /*              sp(I)  - location of 16bit checksum to update               */
566 /*              n((I)  - amount to adjust checksum by                       */
567 /*                                                                          */
568 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
569 /* ------------------------------------------------------------------------ */
570 void fix_incksum(fin, sp, n)
571 fr_info_t *fin;
572 u_short *sp;
573 u_32_t n;
574 {
575         u_short sumshort;
576         u_32_t sum1;
577
578         if (n == 0)
579                 return;
580
581         if (n & NAT_HW_CKSUM) {
582                 n &= 0xffff;
583                 n += fin->fin_dlen;
584                 n = (n & 0xffff) + (n >> 16);
585                 *sp = n & 0xffff;
586                 return;
587         }
588         sum1 = (~ntohs(*sp)) & 0xffff;
589         sum1 += ~(n) & 0xffff;
590         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
591         /* Again */
592         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
593         sumshort = ~(u_short)sum1;
594         *(sp) = htons(sumshort);
595 }
596
597
598 /* ------------------------------------------------------------------------ */
599 /* Function:    fix_datacksum                                               */
600 /* Returns:     Nil                                                         */
601 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
602 /*              n((I)  - amount to adjust checksum by                       */
603 /*                                                                          */
604 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
605 /* data section of an IP packet.                                            */
606 /*                                                                          */
607 /* The only situation in which you need to do this is when NAT'ing an       */
608 /* ICMP error message. Such a message, contains in its body the IP header   */
609 /* of the original IP packet, that causes the error.                        */
610 /*                                                                          */
611 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
612 /* kernel the data section of the ICMP error is just data, and no special   */
613 /* processing like hardware cksum or ntohs processing have been done by the */
614 /* kernel on the data section.                                              */
615 /* ------------------------------------------------------------------------ */
616 void fix_datacksum(sp, n)
617 u_short *sp;
618 u_32_t n;
619 {
620         u_short sumshort;
621         u_32_t sum1;
622
623         if (n == 0)
624                 return;
625
626         sum1 = (~ntohs(*sp)) & 0xffff;
627         sum1 += (n);
628         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
629         /* Again */
630         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
631         sumshort = ~(u_short)sum1;
632         *(sp) = htons(sumshort);
633 }
634
635
636 /* ------------------------------------------------------------------------ */
637 /* Function:    fr_nat_ioctl                                                */
638 /* Returns:     int - 0 == success, != 0 == failure                         */
639 /* Parameters:  data(I) - pointer to ioctl data                             */
640 /*              cmd(I)  - ioctl command integer                             */
641 /*              mode(I) - file mode bits used with open                     */
642 /*                                                                          */
643 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
644 /* ------------------------------------------------------------------------ */
645 int fr_nat_ioctl(data, cmd, mode, uid, ctx)
646 ioctlcmd_t cmd;
647 caddr_t data;
648 int mode, uid;
649 void *ctx;
650 {
651         ipnat_t *nat, *nt, *n = NULL, **np = NULL;
652         int error = 0, ret, arg, getlock;
653         ipnat_t natd;
654         SPL_INT(s);
655
656 #if (BSD >= 199306) && defined(_KERNEL)
657 # if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
658         if ((mode & FWRITE) &&
659              kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
660                                      KAUTH_REQ_NETWORK_FIREWALL_FW,
661                                      NULL, NULL, NULL)) {
662                 return EPERM;
663         }
664 # else
665         if ((securelevel >= 3) && (mode & FWRITE)) {
666                 return EPERM;
667         }
668 # endif
669 #endif
670
671 #if defined(__osf__) && defined(_KERNEL)
672         getlock = 0;
673 #else
674         getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
675 #endif
676
677         nat = NULL;     /* XXX gcc -Wuninitialized */
678         if (cmd == (ioctlcmd_t)SIOCADNAT) {
679                 KMALLOC(nt, ipnat_t *);
680         } else {
681                 nt = NULL;
682         }
683
684         if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
685                 if (mode & NAT_SYSSPACE) {
686                         bcopy(data, (char *)&natd, sizeof(natd));
687                         error = 0;
688                 } else {
689                         error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
690                 }
691         }
692
693         if (error != 0)
694                 goto done;
695
696         /*
697          * For add/delete, look to see if the NAT entry is already present
698          */
699         if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
700                 nat = &natd;
701                 if (nat->in_v == 0)     /* For backward compat. */
702                         nat->in_v = 4;
703                 nat->in_flags &= IPN_USERFLAGS;
704                 if ((nat->in_redir & NAT_MAPBLK) == 0) {
705                         if ((nat->in_flags & IPN_SPLIT) == 0)
706                                 nat->in_inip &= nat->in_inmsk;
707                         if ((nat->in_flags & IPN_IPRANGE) == 0)
708                                 nat->in_outip &= nat->in_outmsk;
709                 }
710                 MUTEX_ENTER(&ipf_natio);
711                 for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
712                         if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
713                                         IPN_CMPSIZ) == 0) {
714                                 if (nat->in_redir == NAT_REDIRECT &&
715                                     nat->in_pnext != n->in_pnext)
716                                         continue;
717                                 break;
718                         }
719         }
720
721         switch (cmd)
722         {
723 #ifdef  IPFILTER_LOG
724         case SIOCIPFFB :
725         {
726                 int tmp;
727
728                 if (!(mode & FWRITE))
729                         error = EPERM;
730                 else {
731                         tmp = ipflog_clear(IPL_LOGNAT);
732                         error = BCOPYOUT((char *)&tmp, (char *)data,
733                                          sizeof(tmp));
734                         if (error != 0)
735                                 error = EFAULT;
736                 }
737                 break;
738         }
739
740         case SIOCSETLG :
741                 if (!(mode & FWRITE))
742                         error = EPERM;
743                 else {
744                         error = BCOPYIN((char *)data, (char *)&nat_logging,
745                                         sizeof(nat_logging));
746                         if (error != 0)
747                                 error = EFAULT;
748                 }
749                 break;
750
751         case SIOCGETLG :
752                 error = BCOPYOUT((char *)&nat_logging, (char *)data,
753                                  sizeof(nat_logging));
754                 if (error != 0)
755                         error = EFAULT;
756                 break;
757
758         case FIONREAD :
759                 arg = iplused[IPL_LOGNAT];
760                 error = BCOPYOUT(&arg, data, sizeof(arg));
761                 if (error != 0)
762                         error = EFAULT;
763                 break;
764 #endif
765         case SIOCADNAT :
766                 if (!(mode & FWRITE)) {
767                         error = EPERM;
768                 } else if (n != NULL) {
769                         error = EEXIST;
770                 } else if (nt == NULL) {
771                         error = ENOMEM;
772                 }
773                 if (error != 0) {
774                         MUTEX_EXIT(&ipf_natio);
775                         break;
776                 }
777                 bcopy((char *)nat, (char *)nt, sizeof(*n));
778                 error = nat_siocaddnat(nt, np, getlock);
779                 MUTEX_EXIT(&ipf_natio);
780                 if (error == 0)
781                         nt = NULL;
782                 break;
783
784         case SIOCRMNAT :
785                 if (!(mode & FWRITE)) {
786                         error = EPERM;
787                         n = NULL;
788                 } else if (n == NULL) {
789                         error = ESRCH;
790                 }
791
792                 if (error != 0) {
793                         MUTEX_EXIT(&ipf_natio);
794                         break;
795                 }
796                 nat_siocdelnat(n, np, getlock);
797
798                 MUTEX_EXIT(&ipf_natio);
799                 n = NULL;
800                 break;
801
802         case SIOCGNATS :
803                 nat_stats.ns_table[0] = nat_table[0];
804                 nat_stats.ns_table[1] = nat_table[1];
805                 nat_stats.ns_list = nat_list;
806                 nat_stats.ns_maptable = ipf_hm_maptable;
807                 nat_stats.ns_maplist = ipf_hm_maplist;
808                 nat_stats.ns_nattab_sz = ipf_nattable_sz;
809                 nat_stats.ns_nattab_max = ipf_nattable_max;
810                 nat_stats.ns_rultab_sz = ipf_natrules_sz;
811                 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
812                 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
813                 nat_stats.ns_instances = nat_instances;
814                 nat_stats.ns_apslist = ap_sess_list;
815                 nat_stats.ns_ticks = fr_ticks;
816                 error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
817                 break;
818
819         case SIOCGNATL :
820             {
821                 natlookup_t nl;
822
823                 if (getlock) {
824                         READ_ENTER(&ipf_nat);
825                 }
826                 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
827                 if (error == 0) {
828                         if (nat_lookupredir(&nl) != NULL) {
829                                 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
830                         } else {
831                                 error = ESRCH;
832                         }
833                 }
834                 if (getlock) {
835                         RWLOCK_EXIT(&ipf_nat);
836                 }
837                 break;
838             }
839
840         case SIOCIPFFL :        /* old SIOCFLNAT & SIOCCNATL */
841                 if (!(mode & FWRITE)) {
842                         error = EPERM;
843                         break;
844                 }
845                 if (getlock) {
846                         WRITE_ENTER(&ipf_nat);
847                 }
848
849                 error = BCOPYIN(data, &arg, sizeof(arg));
850                 if (error != 0)
851                         error = EFAULT;
852                 else {
853                         if (arg == 0)
854                                 ret = nat_flushtable();
855                         else if (arg == 1)
856                                 ret = nat_clearlist();
857                         else
858                                 ret = nat_extraflush(arg);
859                 }
860
861                 if (getlock) {
862                         RWLOCK_EXIT(&ipf_nat);
863                 }
864                 if (error == 0) {
865                         error = BCOPYOUT(&ret, data, sizeof(ret));
866                 }
867                 break;
868
869         case SIOCPROXY :
870                 error = appr_ioctl(data, cmd, mode, ctx);
871                 break;
872
873         case SIOCSTLCK :
874                 if (!(mode & FWRITE)) {
875                         error = EPERM;
876                 } else {
877                         error = fr_lock(data, &fr_nat_lock);
878                 }
879                 break;
880
881         case SIOCSTPUT :
882                 if ((mode & FWRITE) != 0) {
883                         error = fr_natputent(data, getlock);
884                 } else {
885                         error = EACCES;
886                 }
887                 break;
888
889         case SIOCSTGSZ :
890                 if (fr_nat_lock) {
891                         if (getlock) {
892                                 READ_ENTER(&ipf_nat);
893                         }
894                         error = fr_natgetsz(data);
895                         if (getlock) {
896                                 RWLOCK_EXIT(&ipf_nat);
897                         }
898                 } else
899                         error = EACCES;
900                 break;
901
902         case SIOCSTGET :
903                 if (fr_nat_lock) {
904                         if (getlock) {
905                                 READ_ENTER(&ipf_nat);
906                         }
907                         error = fr_natgetent(data);
908                         if (getlock) {
909                                 RWLOCK_EXIT(&ipf_nat);
910                         }
911                 } else
912                         error = EACCES;
913                 break;
914
915         case SIOCGENITER :
916             {
917                 ipfgeniter_t iter;
918                 ipftoken_t *token;
919
920                 SPL_SCHED(s);
921                 error = fr_inobj(data, &iter, IPFOBJ_GENITER);
922                 if (error == 0) {
923                         token = ipf_findtoken(iter.igi_type, uid, ctx);
924                         if (token != NULL) {
925                                 error  = nat_iterator(token, &iter);
926                         }
927                         RWLOCK_EXIT(&ipf_tokens);
928                 }
929                 SPL_X(s);
930                 break;
931             }
932
933         case SIOCIPFDELTOK :
934                 error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
935                 if (error == 0) {
936                         SPL_SCHED(s);
937                         error = ipf_deltoken(arg, uid, ctx);
938                         SPL_X(s);
939                 } else {
940                         error = EFAULT;
941                 }
942                 break;
943
944         case SIOCGTQTAB :
945                 error = fr_outobj(data, nat_tqb, IPFOBJ_STATETQTAB);
946                 break;
947
948         case SIOCGTABL :
949                 error = nat_gettable(data);
950                 break;
951
952         default :
953                 error = EINVAL;
954                 break;
955         }
956 done:
957         if (nt != NULL)
958                 KFREE(nt);
959         return error;
960 }
961
962
963 /* ------------------------------------------------------------------------ */
964 /* Function:    nat_siocaddnat                                              */
965 /* Returns:     int - 0 == success, != 0 == failure                         */
966 /* Parameters:  n(I)       - pointer to new NAT rule                        */
967 /*              np(I)      - pointer to where to insert new NAT rule        */
968 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
969 /* Mutex Locks: ipf_natio                                                   */
970 /*                                                                          */
971 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
972 /* from information passed to the kernel, then add it  to the appropriate   */
973 /* NAT rule table(s).                                                       */
974 /* ------------------------------------------------------------------------ */
975 static int nat_siocaddnat(n, np, getlock)
976 ipnat_t *n, **np;
977 int getlock;
978 {
979         int error = 0, i, j;
980
981         if (nat_resolverule(n) != 0)
982                 return ENOENT;
983
984         if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
985                 return EINVAL;
986
987         n->in_use = 0;
988         if (n->in_redir & NAT_MAPBLK)
989                 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
990         else if (n->in_flags & IPN_AUTOPORTMAP)
991                 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
992         else if (n->in_flags & IPN_IPRANGE)
993                 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
994         else if (n->in_flags & IPN_SPLIT)
995                 n->in_space = 2;
996         else if (n->in_outmsk != 0)
997                 n->in_space = ~ntohl(n->in_outmsk);
998         else
999                 n->in_space = 1;
1000
1001         /*
1002          * Calculate the number of valid IP addresses in the output
1003          * mapping range.  In all cases, the range is inclusive of
1004          * the start and ending IP addresses.
1005          * If to a CIDR address, lose 2: broadcast + network address
1006          *                               (so subtract 1)
1007          * If to a range, add one.
1008          * If to a single IP address, set to 1.
1009          */
1010         if (n->in_space) {
1011                 if ((n->in_flags & IPN_IPRANGE) != 0)
1012                         n->in_space += 1;
1013                 else
1014                         n->in_space -= 1;
1015         } else
1016                 n->in_space = 1;
1017
1018         if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
1019             ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
1020                 n->in_nip = ntohl(n->in_outip) + 1;
1021         else if ((n->in_flags & IPN_SPLIT) &&
1022                  (n->in_redir & NAT_REDIRECT))
1023                 n->in_nip = ntohl(n->in_inip);
1024         else
1025                 n->in_nip = ntohl(n->in_outip);
1026         if (n->in_redir & NAT_MAP) {
1027                 n->in_pnext = ntohs(n->in_pmin);
1028                 /*
1029                  * Multiply by the number of ports made available.
1030                  */
1031                 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1032                         n->in_space *= (ntohs(n->in_pmax) -
1033                                         ntohs(n->in_pmin) + 1);
1034                         /*
1035                          * Because two different sources can map to
1036                          * different destinations but use the same
1037                          * local IP#/port #.
1038                          * If the result is smaller than in_space, then
1039                          * we may have wrapped around 32bits.
1040                          */
1041                         i = n->in_inmsk;
1042                         if ((i != 0) && (i != 0xffffffff)) {
1043                                 j = n->in_space * (~ntohl(i) + 1);
1044                                 if (j >= n->in_space)
1045                                         n->in_space = j;
1046                                 else
1047                                         n->in_space = 0xffffffff;
1048                         }
1049                 }
1050                 /*
1051                  * If no protocol is specified, multiple by 256 to allow for
1052                  * at least one IP:IP mapping per protocol.
1053                  */
1054                 if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1055                                 j = n->in_space * 256;
1056                                 if (j >= n->in_space)
1057                                         n->in_space = j;
1058                                 else
1059                                         n->in_space = 0xffffffff;
1060                 }
1061         }
1062
1063         /* Otherwise, these fields are preset */
1064
1065         if (getlock) {
1066                 WRITE_ENTER(&ipf_nat);
1067         }
1068         n->in_next = NULL;
1069         *np = n;
1070
1071         if (n->in_age[0] != 0)
1072                 n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
1073
1074         if (n->in_age[1] != 0)
1075                 n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
1076
1077         if (n->in_redir & NAT_REDIRECT) {
1078                 n->in_flags &= ~IPN_NOTDST;
1079                 nat_addrdr(n);
1080         }
1081         if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1082                 n->in_flags &= ~IPN_NOTSRC;
1083                 nat_addnat(n);
1084         }
1085         MUTEX_INIT(&n->in_lock, "ipnat rule lock");
1086
1087         n = NULL;
1088         nat_stats.ns_rules++;
1089 #if SOLARIS && !defined(_INET_IP_STACK_H)
1090         pfil_delayed_copy = 0;
1091 #endif
1092         if (getlock) {
1093                 RWLOCK_EXIT(&ipf_nat);                  /* WRITE */
1094         }
1095
1096         return error;
1097 }
1098
1099
1100 /* ------------------------------------------------------------------------ */
1101 /* Function:    nat_resolvrule                                              */
1102 /* Returns:     Nil                                                         */
1103 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1104 /*                                                                          */
1105 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1106 /* from information passed to the kernel, then add it  to the appropriate   */
1107 /* NAT rule table(s).                                                       */
1108 /* ------------------------------------------------------------------------ */
1109 static int nat_resolverule(n)
1110 ipnat_t *n;
1111 {
1112         n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1113         n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
1114
1115         n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1116         if (n->in_ifnames[1][0] == '\0') {
1117                 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1118                 n->in_ifps[1] = n->in_ifps[0];
1119         } else {
1120                 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
1121         }
1122
1123         if (n->in_plabel[0] != '\0') {
1124                 n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1125                 if (n->in_apr == NULL)
1126                         return -1;
1127         }
1128         return 0;
1129 }
1130
1131
1132 /* ------------------------------------------------------------------------ */
1133 /* Function:    nat_siocdelnat                                              */
1134 /* Returns:     int - 0 == success, != 0 == failure                         */
1135 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1136 /*              np(I)      - pointer to where to insert new NAT rule        */
1137 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1138 /* Mutex Locks: ipf_natio                                                   */
1139 /*                                                                          */
1140 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1141 /* from information passed to the kernel, then add it  to the appropriate   */
1142 /* NAT rule table(s).                                                       */
1143 /* ------------------------------------------------------------------------ */
1144 static void nat_siocdelnat(n, np, getlock)
1145 ipnat_t *n, **np;
1146 int getlock;
1147 {
1148         if (getlock) {
1149                 WRITE_ENTER(&ipf_nat);
1150         }
1151         if (n->in_redir & NAT_REDIRECT)
1152                 nat_delrdr(n);
1153         if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1154                 nat_delnat(n);
1155         if (nat_list == NULL) {
1156                 nat_masks = 0;
1157                 rdr_masks = 0;
1158         }
1159
1160         if (n->in_tqehead[0] != NULL) {
1161                 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1162                         fr_freetimeoutqueue(n->in_tqehead[1]);
1163                 }
1164         }
1165
1166         if (n->in_tqehead[1] != NULL) {
1167                 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1168                         fr_freetimeoutqueue(n->in_tqehead[1]);
1169                 }
1170         }
1171
1172         *np = n->in_next;
1173
1174         if (n->in_use == 0) {
1175                 if (n->in_apr)
1176                         appr_free(n->in_apr);
1177                 MUTEX_DESTROY(&n->in_lock);
1178                 KFREE(n);
1179                 nat_stats.ns_rules--;
1180 #if SOLARIS && !defined(_INET_IP_STACK_H)
1181                 if (nat_stats.ns_rules == 0)
1182                         pfil_delayed_copy = 1;
1183 #endif
1184         } else {
1185                 n->in_flags |= IPN_DELETE;
1186                 n->in_next = NULL;
1187         }
1188         if (getlock) {
1189                 RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
1190         }
1191 }
1192
1193
1194 /* ------------------------------------------------------------------------ */
1195 /* Function:    fr_natgetsz                                                 */
1196 /* Returns:     int - 0 == success, != 0 is the error value.                */
1197 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1198 /*                        get the size of.                                  */
1199 /*                                                                          */
1200 /* Handle SIOCSTGSZ.                                                        */
1201 /* Return the size of the nat list entry to be copied back to user space.   */
1202 /* The size of the entry is stored in the ng_sz field and the enture natget */
1203 /* structure is copied back to the user.                                    */
1204 /* ------------------------------------------------------------------------ */
1205 static int fr_natgetsz(data)
1206 caddr_t data;
1207 {
1208         ap_session_t *aps;
1209         nat_t *nat, *n;
1210         natget_t ng;
1211
1212         if (BCOPYIN(data, &ng, sizeof(ng)) != 0)
1213                 return EFAULT;
1214
1215         nat = ng.ng_ptr;
1216         if (!nat) {
1217                 nat = nat_instances;
1218                 ng.ng_sz = 0;
1219                 /*
1220                  * Empty list so the size returned is 0.  Simple.
1221                  */
1222                 if (nat == NULL) {
1223                         if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1224                                 return EFAULT;
1225                         return 0;
1226                 }
1227         } else {
1228                 /*
1229                  * Make sure the pointer we're copying from exists in the
1230                  * current list of entries.  Security precaution to prevent
1231                  * copying of random kernel data.
1232                  */
1233                 for (n = nat_instances; n; n = n->nat_next)
1234                         if (n == nat)
1235                                 break;
1236                 if (!n)
1237                         return ESRCH;
1238         }
1239
1240         /*
1241          * Incluse any space required for proxy data structures.
1242          */
1243         ng.ng_sz = sizeof(nat_save_t);
1244         aps = nat->nat_aps;
1245         if (aps != NULL) {
1246                 ng.ng_sz += sizeof(ap_session_t) - 4;
1247                 if (aps->aps_data != 0)
1248                         ng.ng_sz += aps->aps_psiz;
1249         }
1250
1251         if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1252                 return EFAULT;
1253         return 0;
1254 }
1255
1256
1257 /* ------------------------------------------------------------------------ */
1258 /* Function:    fr_natgetent                                                */
1259 /* Returns:     int - 0 == success, != 0 is the error value.                */
1260 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1261 /*                        to NAT structure to copy out.                     */
1262 /*                                                                          */
1263 /* Handle SIOCSTGET.                                                        */
1264 /* Copies out NAT entry to user space.  Any additional data held for a      */
1265 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1266 /* ------------------------------------------------------------------------ */
1267 static int fr_natgetent(data)
1268 caddr_t data;
1269 {
1270         int error, outsize;
1271         ap_session_t *aps;
1272         nat_save_t *ipn, ipns;
1273         nat_t *n, *nat;
1274
1275         error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1276         if (error != 0)
1277                 return error;
1278
1279         if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1280                 return EINVAL;
1281
1282         KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1283         if (ipn == NULL)
1284                 return ENOMEM;
1285
1286         ipn->ipn_dsize = ipns.ipn_dsize;
1287         nat = ipns.ipn_next;
1288         if (nat == NULL) {
1289                 nat = nat_instances;
1290                 if (nat == NULL) {
1291                         if (nat_instances == NULL)
1292                                 error = ENOENT;
1293                         goto finished;
1294                 }
1295         } else {
1296                 /*
1297                  * Make sure the pointer we're copying from exists in the
1298                  * current list of entries.  Security precaution to prevent
1299                  * copying of random kernel data.
1300                  */
1301                 for (n = nat_instances; n; n = n->nat_next)
1302                         if (n == nat)
1303                                 break;
1304                 if (n == NULL) {
1305                         error = ESRCH;
1306                         goto finished;
1307                 }
1308         }
1309         ipn->ipn_next = nat->nat_next;
1310
1311         /*
1312          * Copy the NAT structure.
1313          */
1314         bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1315
1316         /*
1317          * If we have a pointer to the NAT rule it belongs to, save that too.
1318          */
1319         if (nat->nat_ptr != NULL)
1320                 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1321                       sizeof(ipn->ipn_ipnat));
1322
1323         /*
1324          * If we also know the NAT entry has an associated filter rule,
1325          * save that too.
1326          */
1327         if (nat->nat_fr != NULL)
1328                 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1329                       sizeof(ipn->ipn_fr));
1330
1331         /*
1332          * Last but not least, if there is an application proxy session set
1333          * up for this NAT entry, then copy that out too, including any
1334          * private data saved along side it by the proxy.
1335          */
1336         aps = nat->nat_aps;
1337         outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1338         if (aps != NULL) {
1339                 char *s;
1340
1341                 if (outsize < sizeof(*aps)) {
1342                         error = ENOBUFS;
1343                         goto finished;
1344                 }
1345
1346                 s = ipn->ipn_data;
1347                 bcopy((char *)aps, s, sizeof(*aps));
1348                 s += sizeof(*aps);
1349                 outsize -= sizeof(*aps);
1350                 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1351                         bcopy(aps->aps_data, s, aps->aps_psiz);
1352                 else
1353                         error = ENOBUFS;
1354         }
1355         if (error == 0) {
1356                 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1357         }
1358
1359 finished:
1360         if (ipn != NULL) {
1361                 KFREES(ipn, ipns.ipn_dsize);
1362         }
1363         return error;
1364 }
1365
1366
1367 /* ------------------------------------------------------------------------ */
1368 /* Function:    fr_natputent                                                */
1369 /* Returns:     int - 0 == success, != 0 is the error value.                */
1370 /* Parameters:  data(I) -     pointer to natget structure with NAT          */
1371 /*                            structure information to load into the kernel */
1372 /*              getlock(I) - flag indicating whether or not a write lock    */
1373 /*                           on ipf_nat is already held.                    */
1374 /*                                                                          */
1375 /* Handle SIOCSTPUT.                                                        */
1376 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1377 /* firewall rule data structures, if pointers to them indicate so.          */
1378 /* ------------------------------------------------------------------------ */
1379 static int fr_natputent(data, getlock)
1380 caddr_t data;
1381 int getlock;
1382 {
1383         nat_save_t ipn, *ipnn;
1384         ap_session_t *aps;
1385         nat_t *n, *nat;
1386         frentry_t *fr;
1387         fr_info_t fin;
1388         ipnat_t *in;
1389         int error;
1390
1391         error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1392         if (error != 0)
1393                 return error;
1394
1395         /*
1396          * Initialise early because of code at junkput label.
1397          */
1398         in = NULL;
1399         aps = NULL;
1400         nat = NULL;
1401         ipnn = NULL;
1402         fr = NULL;
1403
1404         /*
1405          * New entry, copy in the rest of the NAT entry if it's size is more
1406          * than just the nat_t structure.
1407          */
1408         if (ipn.ipn_dsize > sizeof(ipn)) {
1409                 if (ipn.ipn_dsize > 81920) {
1410                         error = ENOMEM;
1411                         goto junkput;
1412                 }
1413
1414                 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1415                 if (ipnn == NULL)
1416                         return ENOMEM;
1417
1418                 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1419                 if (error != 0) {
1420                         error = EFAULT;
1421                         goto junkput;
1422                 }
1423         } else
1424                 ipnn = &ipn;
1425
1426         KMALLOC(nat, nat_t *);
1427         if (nat == NULL) {
1428                 error = ENOMEM;
1429                 goto junkput;
1430         }
1431
1432         bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1433         /*
1434          * Initialize all these so that nat_delete() doesn't cause a crash.
1435          */
1436         bzero((char *)nat, offsetof(struct nat, nat_tqe));
1437         nat->nat_tqe.tqe_pnext = NULL;
1438         nat->nat_tqe.tqe_next = NULL;
1439         nat->nat_tqe.tqe_ifq = NULL;
1440         nat->nat_tqe.tqe_parent = nat;
1441
1442         /*
1443          * Restore the rule associated with this nat session
1444          */
1445         in = ipnn->ipn_nat.nat_ptr;
1446         if (in != NULL) {
1447                 KMALLOC(in, ipnat_t *);
1448                 nat->nat_ptr = in;
1449                 if (in == NULL) {
1450                         error = ENOMEM;
1451                         goto junkput;
1452                 }
1453                 bzero((char *)in, offsetof(struct ipnat, in_next6));
1454                 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1455                 in->in_use = 1;
1456                 in->in_flags |= IPN_DELETE;
1457
1458                 ATOMIC_INC(nat_stats.ns_rules);
1459
1460                 if (nat_resolverule(in) != 0) {
1461                         error = ESRCH;
1462                         goto junkput;
1463                 }
1464         }
1465
1466         /*
1467          * Check that the NAT entry doesn't already exist in the kernel.
1468          *
1469          * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry.  To do
1470          * this, we check to see if the inbound combination of addresses and
1471          * ports is already known.  Similar logic is applied for NAT_INBOUND.
1472          * 
1473          */
1474         bzero((char *)&fin, sizeof(fin));
1475         fin.fin_p = nat->nat_p;
1476         if (nat->nat_dir == NAT_OUTBOUND) {
1477                 fin.fin_ifp = nat->nat_ifps[0];
1478                 fin.fin_data[0] = ntohs(nat->nat_oport);
1479                 fin.fin_data[1] = ntohs(nat->nat_outport);
1480                 if (getlock) {
1481                         READ_ENTER(&ipf_nat);
1482                 }
1483                 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1484                                  nat->nat_oip, nat->nat_inip);
1485                 if (getlock) {
1486                         RWLOCK_EXIT(&ipf_nat);
1487                 }
1488                 if (n != NULL) {
1489                         error = EEXIST;
1490                         goto junkput;
1491                 }
1492         } else if (nat->nat_dir == NAT_INBOUND) {
1493                 fin.fin_ifp = nat->nat_ifps[0];
1494                 fin.fin_data[0] = ntohs(nat->nat_outport);
1495                 fin.fin_data[1] = ntohs(nat->nat_oport);
1496                 if (getlock) {
1497                         READ_ENTER(&ipf_nat);
1498                 }
1499                 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1500                                   nat->nat_outip, nat->nat_oip);
1501                 if (getlock) {
1502                         RWLOCK_EXIT(&ipf_nat);
1503                 }
1504                 if (n != NULL) {
1505                         error = EEXIST;
1506                         goto junkput;
1507                 }
1508         } else {
1509                 error = EINVAL;
1510                 goto junkput;
1511         }
1512
1513         /*
1514          * Restore ap_session_t structure.  Include the private data allocated
1515          * if it was there.
1516          */
1517         aps = nat->nat_aps;
1518         if (aps != NULL) {
1519                 KMALLOC(aps, ap_session_t *);
1520                 nat->nat_aps = aps;
1521                 if (aps == NULL) {
1522                         error = ENOMEM;
1523                         goto junkput;
1524                 }
1525                 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1526                 if (in != NULL)
1527                         aps->aps_apr = in->in_apr;
1528                 else
1529                         aps->aps_apr = NULL;
1530                 if (aps->aps_psiz != 0) {
1531                         if (aps->aps_psiz > 81920) {
1532                                 error = ENOMEM;
1533                                 goto junkput;
1534                         }
1535                         KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1536                         if (aps->aps_data == NULL) {
1537                                 error = ENOMEM;
1538                                 goto junkput;
1539                         }
1540                         bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1541                               aps->aps_psiz);
1542                 } else {
1543                         aps->aps_psiz = 0;
1544                         aps->aps_data = NULL;
1545                 }
1546         }
1547
1548         /*
1549          * If there was a filtering rule associated with this entry then
1550          * build up a new one.
1551          */
1552         fr = nat->nat_fr;
1553         if (fr != NULL) {
1554                 if ((nat->nat_flags & SI_NEWFR) != 0) {
1555                         KMALLOC(fr, frentry_t *);
1556                         nat->nat_fr = fr;
1557                         if (fr == NULL) {
1558                                 error = ENOMEM;
1559                                 goto junkput;
1560                         }
1561                         ipnn->ipn_nat.nat_fr = fr;
1562                         fr->fr_ref = 1;
1563                         (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1564                         bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1565
1566                         fr->fr_ref = 1;
1567                         fr->fr_dsize = 0;
1568                         fr->fr_data = NULL;
1569                         fr->fr_type = FR_T_NONE;
1570
1571                         MUTEX_NUKE(&fr->fr_lock);
1572                         MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1573                 } else {
1574                         if (getlock) {
1575                                 READ_ENTER(&ipf_nat);
1576                         }
1577                         for (n = nat_instances; n; n = n->nat_next)
1578                                 if (n->nat_fr == fr)
1579                                         break;
1580
1581                         if (n != NULL) {
1582                                 MUTEX_ENTER(&fr->fr_lock);
1583                                 fr->fr_ref++;
1584                                 MUTEX_EXIT(&fr->fr_lock);
1585                         }
1586                         if (getlock) {
1587                                 RWLOCK_EXIT(&ipf_nat);
1588                         }
1589
1590                         if (!n) {
1591                                 error = ESRCH;
1592                                 goto junkput;
1593                         }
1594                 }
1595         }
1596
1597         if (ipnn != &ipn) {
1598                 KFREES(ipnn, ipn.ipn_dsize);
1599                 ipnn = NULL;
1600         }
1601
1602         if (getlock) {
1603                 WRITE_ENTER(&ipf_nat);
1604         }
1605         error = nat_insert(nat, nat->nat_rev);
1606         if ((error == 0) && (aps != NULL)) {
1607                 aps->aps_next = ap_sess_list;
1608                 ap_sess_list = aps;
1609         }
1610         if (getlock) {
1611                 RWLOCK_EXIT(&ipf_nat);
1612         }
1613
1614         if (error == 0)
1615                 return 0;
1616
1617         error = ENOMEM;
1618
1619 junkput:
1620         if (fr != NULL)
1621                 (void) fr_derefrule(&fr);
1622
1623         if ((ipnn != NULL) && (ipnn != &ipn)) {
1624                 KFREES(ipnn, ipn.ipn_dsize);
1625         }
1626         if (nat != NULL) {
1627                 if (aps != NULL) {
1628                         if (aps->aps_data != NULL) {
1629                                 KFREES(aps->aps_data, aps->aps_psiz);
1630                         }
1631                         KFREE(aps);
1632                 }
1633                 if (in != NULL) {
1634                         if (in->in_apr)
1635                                 appr_free(in->in_apr);
1636                         KFREE(in);
1637                 }
1638                 KFREE(nat);
1639         }
1640         return error;
1641 }
1642
1643
1644 /* ------------------------------------------------------------------------ */
1645 /* Function:    nat_delete                                                  */
1646 /* Returns:     Nil                                                         */
1647 /* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1648 /*              logtype(I) - type of LOG record to create before deleting   */
1649 /* Write Lock:  ipf_nat                                                     */
1650 /*                                                                          */
1651 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1652 /* enabled then generate a NAT log record for this event.                   */
1653 /* ------------------------------------------------------------------------ */
1654 void nat_delete(nat, logtype)
1655 struct nat *nat;
1656 int logtype;
1657 {
1658         struct ipnat *ipn;
1659         int removed = 0;
1660
1661         if (logtype != 0 && nat_logging != 0)
1662                 nat_log(nat, logtype);
1663
1664         /*
1665          * Take it as a general indication that all the pointers are set if
1666          * nat_pnext is set.
1667          */
1668         if (nat->nat_pnext != NULL) {
1669                 removed = 1;
1670
1671                 nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1672                 nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1673
1674                 *nat->nat_pnext = nat->nat_next;
1675                 if (nat->nat_next != NULL) {
1676                         nat->nat_next->nat_pnext = nat->nat_pnext;
1677                         nat->nat_next = NULL;
1678                 }
1679                 nat->nat_pnext = NULL;
1680
1681                 *nat->nat_phnext[0] = nat->nat_hnext[0];
1682                 if (nat->nat_hnext[0] != NULL) {
1683                         nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1684                         nat->nat_hnext[0] = NULL;
1685                 }
1686                 nat->nat_phnext[0] = NULL;
1687
1688                 *nat->nat_phnext[1] = nat->nat_hnext[1];
1689                 if (nat->nat_hnext[1] != NULL) {
1690                         nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1691                         nat->nat_hnext[1] = NULL;
1692                 }
1693                 nat->nat_phnext[1] = NULL;
1694
1695                 if ((nat->nat_flags & SI_WILDP) != 0)
1696                         nat_stats.ns_wilds--;
1697         }
1698
1699         if (nat->nat_me != NULL) {
1700                 *nat->nat_me = NULL;
1701                 nat->nat_me = NULL;
1702         }
1703
1704         if (nat->nat_tqe.tqe_ifq != NULL)
1705                 fr_deletequeueentry(&nat->nat_tqe);
1706
1707         if (logtype == NL_EXPIRE)
1708                 nat_stats.ns_expire++;
1709
1710         MUTEX_ENTER(&nat->nat_lock);
1711         /*
1712          * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1713          * This happens when a nat'd packet is blocked and we want to throw
1714          * away the NAT session.
1715          */
1716         if (logtype == NL_DESTROY) {
1717                 if (nat->nat_ref > 2) {
1718                         nat->nat_ref -= 2;
1719                         MUTEX_EXIT(&nat->nat_lock);
1720                         if (removed)
1721                                 nat_stats.ns_orphans++;
1722                         return;
1723                 }
1724         } else if (nat->nat_ref > 1) {
1725                 nat->nat_ref--;
1726                 MUTEX_EXIT(&nat->nat_lock);
1727                 if (removed)
1728                         nat_stats.ns_orphans++;
1729                 return;
1730         }
1731         MUTEX_EXIT(&nat->nat_lock);
1732
1733         /*
1734          * At this point, nat_ref is 1, doing "--" would make it 0..
1735          */
1736         nat->nat_ref = 0;
1737         if (!removed)
1738                 nat_stats.ns_orphans--;
1739
1740 #ifdef  IPFILTER_SYNC
1741         if (nat->nat_sync)
1742                 ipfsync_del(nat->nat_sync);
1743 #endif
1744
1745         if (nat->nat_fr != NULL)
1746                 (void) fr_derefrule(&nat->nat_fr);
1747
1748         if (nat->nat_hm != NULL)
1749                 fr_hostmapdel(&nat->nat_hm);
1750
1751         /*
1752          * If there is an active reference from the nat entry to its parent
1753          * rule, decrement the rule's reference count and free it too if no
1754          * longer being used.
1755          */
1756         ipn = nat->nat_ptr;
1757         if (ipn != NULL) {
1758                 fr_ipnatderef(&ipn);
1759         }
1760
1761         MUTEX_DESTROY(&nat->nat_lock);
1762
1763         aps_free(nat->nat_aps);
1764         nat_stats.ns_inuse--;
1765
1766         /*
1767          * If there's a fragment table entry too for this nat entry, then
1768          * dereference that as well.  This is after nat_lock is released
1769          * because of Tru64.
1770          */
1771         fr_forgetnat((void *)nat);
1772
1773         KFREE(nat);
1774 }
1775
1776
1777 /* ------------------------------------------------------------------------ */
1778 /* Function:    nat_flushtable                                              */
1779 /* Returns:     int - number of NAT rules deleted                           */
1780 /* Parameters:  Nil                                                         */
1781 /*                                                                          */
1782 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1783 /* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1784 /* ------------------------------------------------------------------------ */
1785 /*
1786  * nat_flushtable - clear the NAT table of all mapping entries.
1787  */
1788 static int nat_flushtable()
1789 {
1790         nat_t *nat;
1791         int j = 0;
1792
1793         /*
1794          * ALL NAT mappings deleted, so lets just make the deletions
1795          * quicker.
1796          */
1797         if (nat_table[0] != NULL)
1798                 bzero((char *)nat_table[0],
1799                       sizeof(nat_table[0]) * ipf_nattable_sz);
1800         if (nat_table[1] != NULL)
1801                 bzero((char *)nat_table[1],
1802                       sizeof(nat_table[1]) * ipf_nattable_sz);
1803
1804         while ((nat = nat_instances) != NULL) {
1805                 nat_delete(nat, NL_FLUSH);
1806                 j++;
1807         }
1808
1809         nat_stats.ns_inuse = 0;
1810         return j;
1811 }
1812
1813
1814 /* ------------------------------------------------------------------------ */
1815 /* Function:    nat_clearlist                                               */
1816 /* Returns:     int - number of NAT/RDR rules deleted                       */
1817 /* Parameters:  Nil                                                         */
1818 /*                                                                          */
1819 /* Delete all rules in the current list of rules.  There is nothing elegant */
1820 /* about this cleanup: simply free all entries on the list of rules and     */
1821 /* clear out the tables used for hashed NAT rule lookups.                   */
1822 /* ------------------------------------------------------------------------ */
1823 static int nat_clearlist()
1824 {
1825         ipnat_t *n, **np = &nat_list;
1826         int i = 0;
1827
1828         if (nat_rules != NULL)
1829                 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1830         if (rdr_rules != NULL)
1831                 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1832
1833         while ((n = *np) != NULL) {
1834                 *np = n->in_next;
1835                 if (n->in_use == 0) {
1836                         if (n->in_apr != NULL)
1837                                 appr_free(n->in_apr);
1838                         MUTEX_DESTROY(&n->in_lock);
1839                         KFREE(n);
1840                         nat_stats.ns_rules--;
1841                 } else {
1842                         n->in_flags |= IPN_DELETE;
1843                         n->in_next = NULL;
1844                 }
1845                 i++;
1846         }
1847 #if SOLARIS && !defined(_INET_IP_STACK_H)
1848         pfil_delayed_copy = 1;
1849 #endif
1850         nat_masks = 0;
1851         rdr_masks = 0;
1852         return i;
1853 }
1854
1855
1856 /* ------------------------------------------------------------------------ */
1857 /* Function:    nat_newmap                                                  */
1858 /* Returns:     int - -1 == error, 0 == success                             */
1859 /* Parameters:  fin(I) - pointer to packet information                      */
1860 /*              nat(I) - pointer to NAT entry                               */
1861 /*              ni(I)  - pointer to structure with misc. information needed */
1862 /*                       to create new NAT entry.                           */
1863 /*                                                                          */
1864 /* Given an empty NAT structure, populate it with new information about a   */
1865 /* new NAT session, as defined by the matching NAT rule.                    */
1866 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1867 /* to the new IP address for the translation.                               */
1868 /* ------------------------------------------------------------------------ */
1869 static INLINE int nat_newmap(fin, nat, ni)
1870 fr_info_t *fin;
1871 nat_t *nat;
1872 natinfo_t *ni;
1873 {
1874         u_short st_port, dport, sport, port, sp, dp;
1875         struct in_addr in, inb;
1876         hostmap_t *hm;
1877         u_32_t flags;
1878         u_32_t st_ip;
1879         ipnat_t *np;
1880         nat_t *natl;
1881         int l;
1882
1883         /*
1884          * If it's an outbound packet which doesn't match any existing
1885          * record, then create a new port
1886          */
1887         l = 0;
1888         hm = NULL;
1889         np = ni->nai_np;
1890         st_ip = np->in_nip;
1891         st_port = np->in_pnext;
1892         flags = ni->nai_flags;
1893         sport = ni->nai_sport;
1894         dport = ni->nai_dport;
1895
1896         /*
1897          * Do a loop until we either run out of entries to try or we find
1898          * a NAT mapping that isn't currently being used.  This is done
1899          * because the change to the source is not (usually) being fixed.
1900          */
1901         do {
1902                 port = 0;
1903                 in.s_addr = htonl(np->in_nip);
1904                 if (l == 0) {
1905                         /*
1906                          * Check to see if there is an existing NAT
1907                          * setup for this IP address pair.
1908                          */
1909                         hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1910                                          in, 0);
1911                         if (hm != NULL)
1912                                 in.s_addr = hm->hm_mapip.s_addr;
1913                 } else if ((l == 1) && (hm != NULL)) {
1914                         fr_hostmapdel(&hm);
1915                 }
1916                 in.s_addr = ntohl(in.s_addr);
1917
1918                 nat->nat_hm = hm;
1919
1920                 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1921                         if (l > 0)
1922                                 return -1;
1923                 }
1924
1925                 if (np->in_redir == NAT_BIMAP &&
1926                     np->in_inmsk == np->in_outmsk) {
1927                         /*
1928                          * map the address block in a 1:1 fashion
1929                          */
1930                         in.s_addr = np->in_outip;
1931                         in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1932                         in.s_addr = ntohl(in.s_addr);
1933
1934                 } else if (np->in_redir & NAT_MAPBLK) {
1935                         if ((l >= np->in_ppip) || ((l > 0) &&
1936                              !(flags & IPN_TCPUDP)))
1937                                 return -1;
1938                         /*
1939                          * map-block - Calculate destination address.
1940                          */
1941                         in.s_addr = ntohl(fin->fin_saddr);
1942                         in.s_addr &= ntohl(~np->in_inmsk);
1943                         inb.s_addr = in.s_addr;
1944                         in.s_addr /= np->in_ippip;
1945                         in.s_addr &= ntohl(~np->in_outmsk);
1946                         in.s_addr += ntohl(np->in_outip);
1947                         /*
1948                          * Calculate destination port.
1949                          */
1950                         if ((flags & IPN_TCPUDP) &&
1951                             (np->in_ppip != 0)) {
1952                                 port = ntohs(sport) + l;
1953                                 port %= np->in_ppip;
1954                                 port += np->in_ppip *
1955                                         (inb.s_addr % np->in_ippip);
1956                                 port += MAPBLK_MINPORT;
1957                                 port = htons(port);
1958                         }
1959
1960                 } else if ((np->in_outip == 0) &&
1961                            (np->in_outmsk == 0xffffffff)) {
1962                         /*
1963                          * 0/32 - use the interface's IP address.
1964                          */
1965                         if ((l > 0) ||
1966                             fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1967                                        &in, NULL) == -1)
1968                                 return -1;
1969                         in.s_addr = ntohl(in.s_addr);
1970
1971                 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1972                         /*
1973                          * 0/0 - use the original source address/port.
1974                          */
1975                         if (l > 0)
1976                                 return -1;
1977                         in.s_addr = ntohl(fin->fin_saddr);
1978
1979                 } else if ((np->in_outmsk != 0xffffffff) &&
1980                            (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
1981                         np->in_nip++;
1982
1983                 natl = NULL;
1984
1985                 if ((flags & IPN_TCPUDP) &&
1986                     ((np->in_redir & NAT_MAPBLK) == 0) &&
1987                     (np->in_flags & IPN_AUTOPORTMAP)) {
1988                         /*
1989                          * "ports auto" (without map-block)
1990                          */
1991                         if ((l > 0) && (l % np->in_ppip == 0)) {
1992                                 if (l > np->in_space) {
1993                                         return -1;
1994                                 } else if ((l > np->in_ppip) &&
1995                                            np->in_outmsk != 0xffffffff)
1996                                         np->in_nip++;
1997                         }
1998                         if (np->in_ppip != 0) {
1999                                 port = ntohs(sport);
2000                                 port += (l % np->in_ppip);
2001                                 port %= np->in_ppip;
2002                                 port += np->in_ppip *
2003                                         (ntohl(fin->fin_saddr) %
2004                                          np->in_ippip);
2005                                 port += MAPBLK_MINPORT;
2006                                 port = htons(port);
2007                         }
2008
2009                 } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2010                            (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2011                         /*
2012                          * Standard port translation.  Select next port.
2013                          */
2014                         port = htons(np->in_pnext++);
2015
2016                         if (np->in_pnext > ntohs(np->in_pmax)) {
2017                                 np->in_pnext = ntohs(np->in_pmin);
2018                                 if (np->in_outmsk != 0xffffffff)
2019                                         np->in_nip++;
2020                         }
2021                 }
2022
2023                 if (np->in_flags & IPN_IPRANGE) {
2024                         if (np->in_nip > ntohl(np->in_outmsk))
2025                                 np->in_nip = ntohl(np->in_outip);
2026                 } else {
2027                         if ((np->in_outmsk != 0xffffffff) &&
2028                             ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2029                             ntohl(np->in_outip))
2030                                 np->in_nip = ntohl(np->in_outip) + 1;
2031                 }
2032
2033                 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2034                         port = sport;
2035
2036                 /*
2037                  * Here we do a lookup of the connection as seen from
2038                  * the outside.  If an IP# pair already exists, try
2039                  * again.  So if you have A->B becomes C->B, you can
2040                  * also have D->E become C->E but not D->B causing
2041                  * another C->B.  Also take protocol and ports into
2042                  * account when determining whether a pre-existing
2043                  * NAT setup will cause an external conflict where
2044                  * this is appropriate.
2045                  */
2046                 inb.s_addr = htonl(in.s_addr);
2047                 sp = fin->fin_data[0];
2048                 dp = fin->fin_data[1];
2049                 fin->fin_data[0] = fin->fin_data[1];
2050                 fin->fin_data[1] = htons(port);
2051                 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2052                                     (u_int)fin->fin_p, fin->fin_dst, inb);
2053                 fin->fin_data[0] = sp;
2054                 fin->fin_data[1] = dp;
2055
2056                 /*
2057                  * Has the search wrapped around and come back to the
2058                  * start ?
2059                  */
2060                 if ((natl != NULL) &&
2061                     (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2062                     (np->in_nip != 0) && (st_ip == np->in_nip))
2063                         return -1;
2064                 l++;
2065         } while (natl != NULL);
2066
2067         if (np->in_space > 0)
2068                 np->in_space--;
2069
2070         /* Setup the NAT table */
2071         nat->nat_inip = fin->fin_src;
2072         nat->nat_outip.s_addr = htonl(in.s_addr);
2073         nat->nat_oip = fin->fin_dst;
2074         if (nat->nat_hm == NULL)
2075                 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2076                                           nat->nat_outip, 0);
2077
2078         /*
2079          * The ICMP checksum does not have a pseudo header containing
2080          * the IP addresses
2081          */
2082         ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2083         ni->nai_sum2 = LONG_SUM(in.s_addr);
2084         if ((flags & IPN_TCPUDP)) {
2085                 ni->nai_sum1 += ntohs(sport);
2086                 ni->nai_sum2 += ntohs(port);
2087         }
2088
2089         if (flags & IPN_TCPUDP) {
2090                 nat->nat_inport = sport;
2091                 nat->nat_outport = port;        /* sport */
2092                 nat->nat_oport = dport;
2093                 ((tcphdr_t *)fin->fin_dp)->th_sport = port;
2094         } else if (flags & IPN_ICMPQUERY) {
2095                 ((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2096                 nat->nat_inport = port;
2097                 nat->nat_outport = port;
2098         } else if (fin->fin_p == IPPROTO_GRE) {
2099 #if 0
2100                 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2101                 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2102                         nat->nat_oport = 0;/*fin->fin_data[1];*/
2103                         nat->nat_inport = 0;/*fin->fin_data[0];*/
2104                         nat->nat_outport = 0;/*fin->fin_data[0];*/
2105                         nat->nat_call[0] = fin->fin_data[0];
2106                         nat->nat_call[1] = fin->fin_data[0];
2107                 }
2108 #endif
2109         }
2110         ni->nai_ip.s_addr = in.s_addr;
2111         ni->nai_port = port;
2112         ni->nai_nport = dport;
2113         return 0;
2114 }
2115
2116
2117 /* ------------------------------------------------------------------------ */
2118 /* Function:    nat_newrdr                                                  */
2119 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2120 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2121 /* Parameters:  fin(I) - pointer to packet information                      */
2122 /*              nat(I) - pointer to NAT entry                               */
2123 /*              ni(I)  - pointer to structure with misc. information needed */
2124 /*                       to create new NAT entry.                           */
2125 /*                                                                          */
2126 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2127 /* to the new IP address for the translation.                               */
2128 /* ------------------------------------------------------------------------ */
2129 static INLINE int nat_newrdr(fin, nat, ni)
2130 fr_info_t *fin;
2131 nat_t *nat;
2132 natinfo_t *ni;
2133 {
2134         u_short nport, dport, sport;
2135         struct in_addr in, inb;
2136         u_short sp, dp;
2137         hostmap_t *hm;
2138         u_32_t flags;
2139         ipnat_t *np;
2140         nat_t *natl;
2141         int move;
2142
2143         move = 1;
2144         hm = NULL;
2145         in.s_addr = 0;
2146         np = ni->nai_np;
2147         flags = ni->nai_flags;
2148         sport = ni->nai_sport;
2149         dport = ni->nai_dport;
2150
2151         /*
2152          * If the matching rule has IPN_STICKY set, then we want to have the
2153          * same rule kick in as before.  Why would this happen?  If you have
2154          * a collection of rdr rules with "round-robin sticky", the current
2155          * packet might match a different one to the previous connection but
2156          * we want the same destination to be used.
2157          */
2158         if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
2159             ((np->in_flags & IPN_STICKY) != 0)) {
2160                 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2161                                  (u_32_t)dport);
2162                 if (hm != NULL) {
2163                         in.s_addr = ntohl(hm->hm_mapip.s_addr);
2164                         np = hm->hm_ipnat;
2165                         ni->nai_np = np;
2166                         move = 0;
2167                 }
2168         }
2169
2170         /*
2171          * Otherwise, it's an inbound packet. Most likely, we don't
2172          * want to rewrite source ports and source addresses. Instead,
2173          * we want to rewrite to a fixed internal address and fixed
2174          * internal port.
2175          */
2176         if (np->in_flags & IPN_SPLIT) {
2177                 in.s_addr = np->in_nip;
2178
2179                 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2180                         hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst,
2181                                          in, (u_32_t)dport);
2182                         if (hm != NULL) {
2183                                 in.s_addr = hm->hm_mapip.s_addr;
2184                                 move = 0;
2185                         }
2186                 }
2187
2188                 if (hm == NULL || hm->hm_ref == 1) {
2189                         if (np->in_inip == htonl(in.s_addr)) {
2190                                 np->in_nip = ntohl(np->in_inmsk);
2191                                 move = 0;
2192                         } else {
2193                                 np->in_nip = ntohl(np->in_inip);
2194                         }
2195                 }
2196
2197         } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2198                 /*
2199                  * 0/32 - use the interface's IP address.
2200                  */
2201                 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2202                         return -1;
2203                 in.s_addr = ntohl(in.s_addr);
2204
2205         } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2206                 /*
2207                  * 0/0 - use the original destination address/port.
2208                  */
2209                 in.s_addr = ntohl(fin->fin_daddr);
2210
2211         } else if (np->in_redir == NAT_BIMAP &&
2212                    np->in_inmsk == np->in_outmsk) {
2213                 /*
2214                  * map the address block in a 1:1 fashion
2215                  */
2216                 in.s_addr = np->in_inip;
2217                 in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2218                 in.s_addr = ntohl(in.s_addr);
2219         } else {
2220                 in.s_addr = ntohl(np->in_inip);
2221         }
2222
2223         if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2224                 nport = dport;
2225         else {
2226                 /*
2227                  * Whilst not optimized for the case where
2228                  * pmin == pmax, the gain is not significant.
2229                  */
2230                 if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2231                     (np->in_pmin != np->in_pmax)) {
2232                         nport = ntohs(dport) - ntohs(np->in_pmin) +
2233                                 ntohs(np->in_pnext);
2234                         nport = htons(nport);
2235                 } else
2236                         nport = np->in_pnext;
2237         }
2238
2239         /*
2240          * When the redirect-to address is set to 0.0.0.0, just
2241          * assume a blank `forwarding' of the packet.  We don't
2242          * setup any translation for this either.
2243          */
2244         if (in.s_addr == 0) {
2245                 if (nport == dport)
2246                         return -1;
2247                 in.s_addr = ntohl(fin->fin_daddr);
2248         }
2249
2250         /*
2251          * Check to see if this redirect mapping already exists and if
2252          * it does, return "failure" (allowing it to be created will just
2253          * cause one or both of these "connections" to stop working.)
2254          */
2255         inb.s_addr = htonl(in.s_addr);
2256         sp = fin->fin_data[0];
2257         dp = fin->fin_data[1];
2258         fin->fin_data[1] = fin->fin_data[0];
2259         fin->fin_data[0] = ntohs(nport);
2260         natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2261                              (u_int)fin->fin_p, inb, fin->fin_src);
2262         fin->fin_data[0] = sp;
2263         fin->fin_data[1] = dp;
2264         if (natl != NULL)
2265                 return -1;
2266
2267         nat->nat_inip.s_addr = htonl(in.s_addr);
2268         nat->nat_outip = fin->fin_dst;
2269         nat->nat_oip = fin->fin_src;
2270         if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
2271                 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, in,
2272                                           (u_32_t)dport);
2273
2274         ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2275         ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2276
2277         ni->nai_ip.s_addr = in.s_addr;
2278         ni->nai_nport = nport;
2279         ni->nai_port = sport;
2280
2281         if (flags & IPN_TCPUDP) {
2282                 nat->nat_inport = nport;
2283                 nat->nat_outport = dport;
2284                 nat->nat_oport = sport;
2285                 ((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2286         } else if (flags & IPN_ICMPQUERY) {
2287                 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2288                 nat->nat_inport = nport;
2289                 nat->nat_outport = nport;
2290         } else if (fin->fin_p == IPPROTO_GRE) {
2291 #if 0
2292                 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2293                 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2294                         nat->nat_call[0] = fin->fin_data[0];
2295                         nat->nat_call[1] = fin->fin_data[1];
2296                         nat->nat_oport = 0; /*fin->fin_data[0];*/
2297                         nat->nat_inport = 0; /*fin->fin_data[1];*/
2298                         nat->nat_outport = 0; /*fin->fin_data[1];*/
2299                 }
2300 #endif
2301         }
2302
2303         return move;
2304 }
2305
2306 /* ------------------------------------------------------------------------ */
2307 /* Function:    nat_new                                                     */
2308 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2309 /*                       else pointer to new NAT structure                  */
2310 /* Parameters:  fin(I)       - pointer to packet information                */
2311 /*              np(I)        - pointer to NAT rule                          */
2312 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2313 /*              flags(I)     - flags describing the current packet          */
2314 /*              direction(I) - direction of packet (in/out)                 */
2315 /* Write Lock:  ipf_nat                                                     */
2316 /*                                                                          */
2317 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2318 /* in any way.                                                              */
2319 /*                                                                          */
2320 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2321 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2322 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2323 /* and (3) building that structure and putting it into the NAT table(s).    */
2324 /*                                                                          */
2325 /* NOTE: natsave should NOT be used top point back to an ipstate_t struct   */
2326 /*       as it can result in memory being corrupted.                        */
2327 /* ------------------------------------------------------------------------ */
2328 nat_t *nat_new(fin, np, natsave, flags, direction)
2329 fr_info_t *fin;
2330 ipnat_t *np;
2331 nat_t **natsave;
2332 u_int flags;
2333 int direction;
2334 {
2335         u_short port = 0, sport = 0, dport = 0, nport = 0;
2336         tcphdr_t *tcp = NULL;
2337         hostmap_t *hm = NULL;
2338         struct in_addr in;
2339         nat_t *nat, *natl;
2340         u_int nflags;
2341         natinfo_t ni;
2342         u_32_t sumd;
2343         int move;
2344 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2345         qpktinfo_t *qpi = fin->fin_qpi;
2346 #endif
2347
2348         if (nat_stats.ns_inuse >= ipf_nattable_max) {
2349                 nat_stats.ns_memfail++;
2350                 fr_nat_doflush = 1;
2351                 return NULL;
2352         }
2353
2354         move = 1;
2355         nflags = np->in_flags & flags;
2356         nflags &= NAT_FROMRULE;
2357
2358         ni.nai_np = np;
2359         ni.nai_nflags = nflags;
2360         ni.nai_flags = flags;
2361         ni.nai_dport = 0;
2362         ni.nai_sport = 0;
2363
2364         /* Give me a new nat */
2365         KMALLOC(nat, nat_t *);
2366         if (nat == NULL) {
2367                 nat_stats.ns_memfail++;
2368                 /*
2369                  * Try to automatically tune the max # of entries in the
2370                  * table allowed to be less than what will cause kmem_alloc()
2371                  * to fail and try to eliminate panics due to out of memory
2372                  * conditions arising.
2373                  */
2374                 if (ipf_nattable_max > ipf_nattable_sz) {
2375                         ipf_nattable_max = nat_stats.ns_inuse - 100;
2376                         printf("ipf_nattable_max reduced to %d\n",
2377                                 ipf_nattable_max);
2378                 }
2379                 return NULL;
2380         }
2381
2382         if (flags & IPN_TCPUDP) {
2383                 tcp = fin->fin_dp;
2384                 ni.nai_sport = htons(fin->fin_sport);
2385                 ni.nai_dport = htons(fin->fin_dport);
2386         } else if (flags & IPN_ICMPQUERY) {
2387                 /*
2388                  * In the ICMP query NAT code, we translate the ICMP id fields
2389                  * to make them unique. This is indepedent of the ICMP type
2390                  * (e.g. in the unlikely event that a host sends an echo and
2391                  * an tstamp request with the same id, both packets will have
2392                  * their ip address/id field changed in the same way).
2393                  */
2394                 /* The icmp_id field is used by the sender to identify the
2395                  * process making the icmp request. (the receiver justs
2396                  * copies it back in its response). So, it closely matches
2397                  * the concept of source port. We overlay sport, so we can
2398                  * maximally reuse the existing code.
2399                  */
2400                 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2401                 ni.nai_dport = ni.nai_sport;
2402         }
2403
2404         bzero((char *)nat, sizeof(*nat));
2405         nat->nat_flags = flags;
2406         nat->nat_redir = np->in_redir;
2407
2408         if ((flags & NAT_SLAVE) == 0) {
2409                 MUTEX_ENTER(&ipf_nat_new);
2410         }
2411
2412         /*
2413          * Search the current table for a match.
2414          */
2415         if (direction == NAT_OUTBOUND) {
2416                 /*
2417                  * We can now arrange to call this for the same connection
2418                  * because ipf_nat_new doesn't protect the code path into
2419                  * this function.
2420                  */
2421                 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2422                                      fin->fin_src, fin->fin_dst);
2423                 if (natl != NULL) {
2424                         KFREE(nat);
2425                         nat = natl;
2426                         goto done;
2427                 }
2428
2429                 move = nat_newmap(fin, nat, &ni);
2430                 if (move == -1)
2431                         goto badnat;
2432
2433                 np = ni.nai_np;
2434                 in = ni.nai_ip;
2435         } else {
2436                 /*
2437                  * NAT_INBOUND is used only for redirects rules
2438                  */
2439                 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2440                                     fin->fin_src, fin->fin_dst);
2441                 if (natl != NULL) {
2442                         KFREE(nat);
2443                         nat = natl;
2444                         goto done;
2445                 }
2446
2447                 move = nat_newrdr(fin, nat, &ni);
2448                 if (move == -1)
2449                         goto badnat;
2450
2451                 np = ni.nai_np;
2452                 in = ni.nai_ip;
2453         }
2454         port = ni.nai_port;
2455         nport = ni.nai_nport;
2456
2457         if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2458                 if (np->in_redir == NAT_REDIRECT) {
2459                         nat_delrdr(np);
2460                         nat_addrdr(np);
2461                 } else if (np->in_redir == NAT_MAP) {
2462                         nat_delnat(np);
2463                         nat_addnat(np);
2464                 }
2465         }
2466
2467         if (flags & IPN_TCPUDP) {
2468                 sport = ni.nai_sport;
2469                 dport = ni.nai_dport;
2470         } else if (flags & IPN_ICMPQUERY) {
2471                 sport = ni.nai_sport;
2472                 dport = 0;
2473         }
2474
2475         CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2476         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2477 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2478         if ((flags & IPN_TCP) && dohwcksum &&
2479             (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2480                 if (direction == NAT_OUTBOUND)
2481                         ni.nai_sum1 = LONG_SUM(in.s_addr);
2482                 else
2483                         ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2484                 ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2485                 ni.nai_sum1 += 30;
2486                 ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2487                 nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2488         } else
2489 #endif
2490                 nat->nat_sumd[1] = nat->nat_sumd[0];
2491
2492         if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2493                 if (direction == NAT_OUTBOUND)
2494                         ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2495                 else
2496                         ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2497
2498                 ni.nai_sum2 = LONG_SUM(in.s_addr);
2499
2500                 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2501                 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2502         } else {
2503                 nat->nat_ipsumd = nat->nat_sumd[0];
2504                 if (!(flags & IPN_TCPUDPICMP)) {
2505                         nat->nat_sumd[0] = 0;
2506                         nat->nat_sumd[1] = 0;
2507                 }
2508         }
2509
2510         if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2511                 fr_nat_doflush = 1;
2512                 goto badnat;
2513         }
2514         if (flags & SI_WILDP)
2515                 nat_stats.ns_wilds++;
2516         fin->fin_flx |= FI_NEWNAT;
2517         goto done;
2518 badnat:
2519         nat_stats.ns_badnat++;
2520         if ((hm = nat->nat_hm) != NULL)
2521                 fr_hostmapdel(&hm);
2522         KFREE(nat);
2523         nat = NULL;
2524 done:
2525         if ((flags & NAT_SLAVE) == 0) {
2526                 MUTEX_EXIT(&ipf_nat_new);
2527         }
2528         return nat;
2529 }
2530
2531
2532 /* ------------------------------------------------------------------------ */
2533 /* Function:    nat_finalise                                                */
2534 /* Returns:     int - 0 == sucess, -1 == failure                            */
2535 /* Parameters:  fin(I) - pointer to packet information                      */
2536 /*              nat(I) - pointer to NAT entry                               */
2537 /*              ni(I)  - pointer to structure with misc. information needed */
2538 /*                       to create new NAT entry.                           */
2539 /* Write Lock:  ipf_nat                                                     */
2540 /*                                                                          */
2541 /* This is the tail end of constructing a new NAT entry and is the same     */
2542 /* for both IPv4 and IPv6.                                                  */
2543 /* ------------------------------------------------------------------------ */
2544 /*ARGSUSED*/
2545 static int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2546 fr_info_t *fin;
2547 nat_t *nat;
2548 natinfo_t *ni;
2549 tcphdr_t *tcp;
2550 nat_t **natsave;
2551 int direction;
2552 {
2553         frentry_t *fr;
2554         ipnat_t *np;
2555
2556         np = ni->nai_np;
2557
2558         if (np->in_ifps[0] != NULL) {
2559                 COPYIFNAME(4, np->in_ifps[0], nat->nat_ifnames[0]);
2560         }
2561         if (np->in_ifps[1] != NULL) {
2562                 COPYIFNAME(4, np->in_ifps[1], nat->nat_ifnames[1]);
2563         }
2564 #ifdef  IPFILTER_SYNC
2565         if ((nat->nat_flags & SI_CLONE) == 0)
2566                 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2567 #endif
2568
2569         nat->nat_me = natsave;
2570         nat->nat_dir = direction;
2571         nat->nat_ifps[0] = np->in_ifps[0];
2572         nat->nat_ifps[1] = np->in_ifps[1];
2573         nat->nat_ptr = np;
2574         nat->nat_p = fin->fin_p;
2575         nat->nat_mssclamp = np->in_mssclamp;
2576         if (nat->nat_p == IPPROTO_TCP)
2577                 nat->nat_seqnext[0] = ntohl(tcp->th_seq);
2578
2579         if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2580                 if (appr_new(fin, nat) == -1)
2581                         return -1;
2582
2583         if (nat_insert(nat, fin->fin_rev) == 0) {
2584                 if (nat_logging)
2585                         nat_log(nat, (u_int)np->in_redir);
2586                 np->in_use++;
2587                 fr = fin->fin_fr;
2588                 nat->nat_fr = fr;
2589                 if (fr != NULL) {
2590                         MUTEX_ENTER(&fr->fr_lock);
2591                         fr->fr_ref++;
2592                         MUTEX_EXIT(&fr->fr_lock);
2593                 }
2594                 return 0;
2595         }
2596
2597         /*
2598          * nat_insert failed, so cleanup time...
2599          */
2600         return -1;
2601 }
2602
2603
2604 /* ------------------------------------------------------------------------ */
2605 /* Function:   nat_insert                                                   */
2606 /* Returns:    int - 0 == sucess, -1 == failure                             */
2607 /* Parameters: nat(I) - pointer to NAT structure                            */
2608 /*             rev(I) - flag indicating forward/reverse direction of packet */
2609 /* Write Lock: ipf_nat                                                      */
2610 /*                                                                          */
2611 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2612 /* list of active NAT entries.  Adjust global counters when complete.       */
2613 /* ------------------------------------------------------------------------ */
2614 int     nat_insert(nat, rev)
2615 nat_t   *nat;
2616 int     rev;
2617 {
2618         u_int hv1, hv2;
2619         nat_t **natp;
2620
2621         /*
2622          * Try and return an error as early as possible, so calculate the hash
2623          * entry numbers first and then proceed.
2624          */
2625         if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2626                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2627                                   0xffffffff);
2628                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2629                                   ipf_nattable_sz);
2630                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2631                                   0xffffffff);
2632                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2633                                   ipf_nattable_sz);
2634         } else {
2635                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2636                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2637                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2638                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2639         }
2640
2641         if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2642             nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2643                 return -1;
2644         }
2645
2646         nat->nat_hv[0] = hv1;
2647         nat->nat_hv[1] = hv2;
2648
2649         MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2650
2651         nat->nat_rev = rev;
2652         nat->nat_ref = 1;
2653         nat->nat_bytes[0] = 0;
2654         nat->nat_pkts[0] = 0;
2655         nat->nat_bytes[1] = 0;
2656         nat->nat_pkts[1] = 0;
2657
2658         nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2659         nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2660
2661         if (nat->nat_ifnames[1][0] != '\0') {
2662                 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2663                 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2664         } else {
2665                 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2666                                LIFNAMSIZ);
2667                 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2668                 nat->nat_ifps[1] = nat->nat_ifps[0];
2669         }
2670
2671         nat->nat_next = nat_instances;
2672         nat->nat_pnext = &nat_instances;
2673         if (nat_instances)
2674                 nat_instances->nat_pnext = &nat->nat_next;
2675         nat_instances = nat;
2676
2677         natp = &nat_table[0][hv1];
2678         if (*natp)
2679                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2680         nat->nat_phnext[0] = natp;
2681         nat->nat_hnext[0] = *natp;
2682         *natp = nat;
2683         nat_stats.ns_bucketlen[0][hv1]++;
2684
2685         natp = &nat_table[1][hv2];
2686         if (*natp)
2687                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2688         nat->nat_phnext[1] = natp;
2689         nat->nat_hnext[1] = *natp;
2690         *natp = nat;
2691         nat_stats.ns_bucketlen[1][hv2]++;
2692
2693         fr_setnatqueue(nat, rev);
2694
2695         nat_stats.ns_added++;
2696         nat_stats.ns_inuse++;
2697         return 0;
2698 }
2699
2700
2701 /* ------------------------------------------------------------------------ */
2702 /* Function:    nat_icmperrorlookup                                         */
2703 /* Returns:     nat_t* - point to matching NAT structure                    */
2704 /* Parameters:  fin(I) - pointer to packet information                      */
2705 /*              dir(I) - direction of packet (in/out)                       */
2706 /*                                                                          */
2707 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2708 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2709 /* the required length.                                                     */
2710 /* ------------------------------------------------------------------------ */
2711 nat_t *nat_icmperrorlookup(fin, dir)
2712 fr_info_t *fin;
2713 int dir;
2714 {
2715         int flags = 0, type, minlen;
2716         icmphdr_t *icmp, *orgicmp;
2717         tcphdr_t *tcp = NULL;
2718         u_short data[2];
2719         nat_t *nat;
2720         ip_t *oip;
2721         u_int p;
2722
2723         icmp = fin->fin_dp;
2724         type = icmp->icmp_type;
2725         /*
2726          * Does it at least have the return (basic) IP header ?
2727          * Only a basic IP header (no options) should be with an ICMP error
2728          * header.  Also, if it's not an error type, then return.
2729          */
2730         if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2731                 return NULL;
2732
2733         /*
2734          * Check packet size
2735          */
2736         oip = (ip_t *)((char *)fin->fin_dp + 8);
2737         minlen = IP_HL(oip) << 2;
2738         if ((minlen < sizeof(ip_t)) ||
2739             (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2740                 return NULL;
2741         /*
2742          * Is the buffer big enough for all of it ?  It's the size of the IP
2743          * header claimed in the encapsulated part which is of concern.  It
2744          * may be too big to be in this buffer but not so big that it's
2745          * outside the ICMP packet, leading to TCP deref's causing problems.
2746          * This is possible because we don't know how big oip_hl is when we
2747          * do the pullup early in fr_check() and thus can't gaurantee it is
2748          * all here now.
2749          */
2750 #ifdef  _KERNEL
2751         {
2752         mb_t *m;
2753
2754         m = fin->fin_m;
2755 # if defined(MENTAT)
2756         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2757                 return NULL;
2758 # else
2759         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2760             (char *)fin->fin_ip + M_LEN(m))
2761                 return NULL;
2762 # endif
2763         }
2764 #endif
2765
2766         if (fin->fin_daddr != oip->ip_src.s_addr)
2767                 return NULL;
2768
2769         p = oip->ip_p;
2770         if (p == IPPROTO_TCP)
2771                 flags = IPN_TCP;
2772         else if (p == IPPROTO_UDP)
2773                 flags = IPN_UDP;
2774         else if (p == IPPROTO_ICMP) {
2775                 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2776
2777                 /* see if this is related to an ICMP query */
2778                 if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2779                         data[0] = fin->fin_data[0];
2780                         data[1] = fin->fin_data[1];
2781                         fin->fin_data[0] = 0;
2782                         fin->fin_data[1] = orgicmp->icmp_id;
2783
2784                         flags = IPN_ICMPERR|IPN_ICMPQUERY;
2785                         /*
2786                          * NOTE : dir refers to the direction of the original
2787                          *        ip packet. By definition the icmp error
2788                          *        message flows in the opposite direction.
2789                          */
2790                         if (dir == NAT_INBOUND)
2791                                 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2792                                                    oip->ip_src);
2793                         else
2794                                 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2795                                                     oip->ip_src);
2796                         fin->fin_data[0] = data[0];
2797                         fin->fin_data[1] = data[1];
2798                         return nat;
2799                 }
2800         }
2801                 
2802         if (flags & IPN_TCPUDP) {
2803                 minlen += 8;            /* + 64bits of data to get ports */
2804                 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2805                         return NULL;
2806
2807                 data[0] = fin->fin_data[0];
2808                 data[1] = fin->fin_data[1];
2809                 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2810                 fin->fin_data[0] = ntohs(tcp->th_dport);
2811                 fin->fin_data[1] = ntohs(tcp->th_sport);
2812
2813                 if (dir == NAT_INBOUND) {
2814                         nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2815                                            oip->ip_src);
2816                 } else {
2817                         nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2818                                             oip->ip_src);
2819                 }
2820                 fin->fin_data[0] = data[0];
2821                 fin->fin_data[1] = data[1];
2822                 return nat;
2823         }
2824         if (dir == NAT_INBOUND)
2825                 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2826         else
2827                 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2828 }
2829
2830
2831 /* ------------------------------------------------------------------------ */
2832 /* Function:    nat_icmperror                                               */
2833 /* Returns:     nat_t* - point to matching NAT structure                    */
2834 /* Parameters:  fin(I)    - pointer to packet information                   */
2835 /*              nflags(I) - NAT flags for this packet                       */
2836 /*              dir(I)    - direction of packet (in/out)                    */
2837 /*                                                                          */
2838 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2839 /* session.  This will correct both packet header data and checksums.       */
2840 /*                                                                          */
2841 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2842 /* a NAT'd ICMP packet gets correctly recognised.                           */
2843 /* ------------------------------------------------------------------------ */
2844 nat_t *nat_icmperror(fin, nflags, dir)
2845 fr_info_t *fin;
2846 u_int *nflags;
2847 int dir;
2848 {
2849         u_32_t sum1, sum2, sumd, sumd2;
2850         struct in_addr a1, a2;
2851         int flags, dlen, odst;
2852         icmphdr_t *icmp;
2853         u_short *csump;
2854         tcphdr_t *tcp;
2855         nat_t *nat;
2856         ip_t *oip;
2857         void *dp;
2858
2859         if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2860                 return NULL;
2861         /*
2862          * nat_icmperrorlookup() will return NULL for `defective' packets.
2863          */
2864         if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2865                 return NULL;
2866
2867         tcp = NULL;
2868         csump = NULL;
2869         flags = 0;
2870         sumd2 = 0;
2871         *nflags = IPN_ICMPERR;
2872         icmp = fin->fin_dp;
2873         oip = (ip_t *)&icmp->icmp_ip;
2874         dp = (((char *)oip) + (IP_HL(oip) << 2));
2875         if (oip->ip_p == IPPROTO_TCP) {
2876                 tcp = (tcphdr_t *)dp;
2877                 csump = (u_short *)&tcp->th_sum;
2878                 flags = IPN_TCP;
2879         } else if (oip->ip_p == IPPROTO_UDP) {
2880                 udphdr_t *udp;
2881
2882                 udp = (udphdr_t *)dp;
2883                 tcp = (tcphdr_t *)dp;
2884                 csump = (u_short *)&udp->uh_sum;
2885                 flags = IPN_UDP;
2886         } else if (oip->ip_p == IPPROTO_ICMP)
2887                 flags = IPN_ICMPQUERY;
2888         dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2889
2890         /*
2891          * Need to adjust ICMP header to include the real IP#'s and
2892          * port #'s.  Only apply a checksum change relative to the
2893          * IP address change as it will be modified again in fr_checknatout
2894          * for both address and port.  Two checksum changes are
2895          * necessary for the two header address changes.  Be careful
2896          * to only modify the checksum once for the port # and twice
2897          * for the IP#.
2898          */
2899
2900         /*
2901          * Step 1
2902          * Fix the IP addresses in the offending IP packet. You also need
2903          * to adjust the IP header checksum of that offending IP packet.
2904          *
2905          * Normally, you would expect that the ICMP checksum of the
2906          * ICMP error message needs to be adjusted as well for the
2907          * IP address change in oip.
2908          * However, this is a NOP, because the ICMP checksum is
2909          * calculated over the complete ICMP packet, which includes the
2910          * changed oip IP addresses and oip->ip_sum. However, these
2911          * two changes cancel each other out (if the delta for
2912          * the IP address is x, then the delta for ip_sum is minus x),
2913          * so no change in the icmp_cksum is necessary.
2914          *
2915          * Inbound ICMP
2916          * ------------
2917          * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2918          * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2919          * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2920          *
2921          * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2922          * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2923          * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2924          *
2925          * Outbound ICMP
2926          * -------------
2927          * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2928          * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2929          * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2930          *
2931          * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2932          * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2933          * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2934          *
2935          */
2936         odst = (oip->ip_dst.s_addr == nat->nat_oip.s_addr) ? 1 : 0;
2937         if (odst == 1) {
2938                 a1.s_addr = ntohl(nat->nat_inip.s_addr);
2939                 a2.s_addr = ntohl(oip->ip_src.s_addr);
2940                 oip->ip_src.s_addr = htonl(a1.s_addr);
2941         } else {
2942                 a1.s_addr = ntohl(nat->nat_outip.s_addr);
2943                 a2.s_addr = ntohl(oip->ip_dst.s_addr);
2944                 oip->ip_dst.s_addr = htonl(a1.s_addr);
2945         }
2946
2947         sumd = a2.s_addr - a1.s_addr;
2948         if (sumd != 0) {
2949                 if (a1.s_addr > a2.s_addr)
2950                         sumd--;
2951                 sumd = ~sumd;
2952
2953                 fix_datacksum(&oip->ip_sum, sumd);
2954         }
2955
2956         sumd2 = sumd;
2957         sum1 = 0;
2958         sum2 = 0;
2959
2960         /*
2961          * Fix UDP pseudo header checksum to compensate for the
2962          * IP address change.
2963          */
2964         if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
2965                 /*
2966                  * Step 2 :
2967                  * For offending TCP/UDP IP packets, translate the ports as
2968                  * well, based on the NAT specification. Of course such
2969                  * a change may be reflected in the ICMP checksum as well.
2970                  *
2971                  * Since the port fields are part of the TCP/UDP checksum
2972                  * of the offending IP packet, you need to adjust that checksum
2973                  * as well... except that the change in the port numbers should 
2974                  * be offset by the checksum change.  However, the TCP/UDP
2975                  * checksum will also need to change if there has been an
2976                  * IP address change.
2977                  */
2978                 if (odst == 1) {
2979                         sum1 = ntohs(nat->nat_inport);
2980                         sum2 = ntohs(tcp->th_sport);
2981
2982                         tcp->th_sport = htons(sum1);
2983                 } else {
2984                         sum1 = ntohs(nat->nat_outport);
2985                         sum2 = ntohs(tcp->th_dport);
2986
2987                         tcp->th_dport = htons(sum1);
2988                 }
2989
2990                 sumd += sum1 - sum2;
2991                 if (sumd != 0 || sumd2 != 0) {
2992                         /*
2993                          * At this point, sumd is the delta to apply to the
2994                          * TCP/UDP header, given the changes in both the IP
2995                          * address and the ports and sumd2 is the delta to
2996                          * apply to the ICMP header, given the IP address
2997                          * change delta that may need to be applied to the
2998                          * TCP/UDP checksum instead.
2999                          *
3000                          * If we will both the IP and TCP/UDP checksums
3001                          * then the ICMP checksum changes by the address
3002                          * delta applied to the TCP/UDP checksum.  If we
3003                          * do not change the TCP/UDP checksum them we
3004                          * apply the delta in ports to the ICMP checksum.
3005                          */
3006                         if (oip->ip_p == IPPROTO_UDP) {
3007                                 if ((dlen >= 8) && (*csump != 0)) {
3008                                         fix_datacksum(csump, sumd);
3009                                 } else {
3010                                         sumd2 = sum1 - sum2;
3011                                         if (sum2 > sum1)
3012                                                 sumd2--;
3013                                 }
3014                         } else if (oip->ip_p == IPPROTO_TCP) {
3015                                 if (dlen >= 18) {
3016                                         fix_datacksum(csump, sumd);
3017                                 } else {
3018                                         sumd2 = sum2 - sum1;
3019                                         if (sum1 > sum2)
3020                                                 sumd2--;
3021                                 }
3022                         }
3023
3024                         if (sumd2 != 0) {
3025                                 ipnat_t *np;
3026
3027                                 np = nat->nat_ptr;
3028                                 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3029                                 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3030                                 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3031
3032                                 if ((odst == 0) && (dir == NAT_OUTBOUND) &&
3033                                     (fin->fin_rev == 0) && (np != NULL) &&
3034                                     (np->in_redir & NAT_REDIRECT)) {
3035                                         fix_outcksum(fin, &icmp->icmp_cksum,
3036                                                      sumd2);
3037                                 } else {
3038                                         fix_incksum(fin, &icmp->icmp_cksum,
3039                                                     sumd2);
3040                                 }
3041                         }
3042                 }
3043         } else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
3044                 icmphdr_t *orgicmp;
3045
3046                 /*
3047                  * XXX - what if this is bogus hl and we go off the end ?
3048                  * In this case, nat_icmperrorlookup() will have returned NULL.
3049                  */
3050                 orgicmp = (icmphdr_t *)dp;
3051
3052                 if (odst == 1) {
3053                         if (orgicmp->icmp_id != nat->nat_inport) {
3054
3055                                 /*
3056                                  * Fix ICMP checksum (of the offening ICMP
3057                                  * query packet) to compensate the change
3058                                  * in the ICMP id of the offending ICMP
3059                                  * packet.
3060                                  *
3061                                  * Since you modify orgicmp->icmp_id with
3062                                  * a delta (say x) and you compensate that
3063                                  * in origicmp->icmp_cksum with a delta
3064                                  * minus x, you don't have to adjust the
3065                                  * overall icmp->icmp_cksum
3066                                  */
3067                                 sum1 = ntohs(orgicmp->icmp_id);
3068                                 sum2 = ntohs(nat->nat_inport);
3069                                 CALC_SUMD(sum1, sum2, sumd);
3070                                 orgicmp->icmp_id = nat->nat_inport;
3071                                 fix_datacksum(&orgicmp->icmp_cksum, sumd);
3072                         }
3073                 } /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3074         }
3075         return nat;
3076 }
3077
3078
3079 /*
3080  * NB: these lookups don't lock access to the list, it assumed that it has
3081  * already been done!
3082  */
3083
3084 /* ------------------------------------------------------------------------ */
3085 /* Function:    nat_inlookup                                                */
3086 /* Returns:     nat_t* - NULL == no match,                                  */
3087 /*                       else pointer to matching NAT entry                 */
3088 /* Parameters:  fin(I)    - pointer to packet information                   */
3089 /*              flags(I)  - NAT flags for this packet                       */
3090 /*              p(I)      - protocol for this packet                        */
3091 /*              src(I)    - source IP address                               */
3092 /*              mapdst(I) - destination IP address                          */
3093 /*                                                                          */
3094 /* Lookup a nat entry based on the mapped destination ip address/port and   */
3095 /* real source address/port.  We use this lookup when receiving a packet,   */
3096 /* we're looking for a table entry, based on the destination address.       */
3097 /*                                                                          */
3098 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3099 /*                                                                          */
3100 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3101 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3102 /*                                                                          */
3103 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3104 /*            the packet is of said protocol                                */
3105 /* ------------------------------------------------------------------------ */
3106 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3107 fr_info_t *fin;
3108 u_int flags, p;
3109 struct in_addr src , mapdst;
3110 {
3111         u_short sport, dport;
3112         grehdr_t *gre;
3113         ipnat_t *ipn;
3114         u_int sflags;
3115         nat_t *nat;
3116         int nflags;
3117         u_32_t dst;
3118         void *ifp;
3119         u_int hv;
3120
3121         ifp = fin->fin_ifp;
3122         sport = 0;
3123         dport = 0;
3124         gre = NULL;
3125         dst = mapdst.s_addr;
3126         sflags = flags & NAT_TCPUDPICMP;
3127
3128         switch (p)
3129         {
3130         case IPPROTO_TCP :
3131         case IPPROTO_UDP :
3132                 sport = htons(fin->fin_data[0]);
3133                 dport = htons(fin->fin_data[1]);
3134                 break;
3135         case IPPROTO_ICMP :
3136                 if (flags & IPN_ICMPERR)
3137                         sport = fin->fin_data[1];
3138                 else
3139                         dport = fin->fin_data[1];
3140                 break;
3141         default :
3142                 break;
3143         }
3144
3145
3146         if ((flags & SI_WILDP) != 0)
3147                 goto find_in_wild_ports;
3148
3149         hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3150         hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3151         nat = nat_table[1][hv];
3152         for (; nat; nat = nat->nat_hnext[1]) {
3153                 if (nat->nat_ifps[0] != NULL) {
3154                         if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3155                                 continue;
3156                 } else if (ifp != NULL)
3157                         nat->nat_ifps[0] = ifp;
3158
3159                 nflags = nat->nat_flags;
3160
3161                 if (nat->nat_oip.s_addr == src.s_addr &&
3162                     nat->nat_outip.s_addr == dst &&
3163                     (((p == 0) &&
3164                       (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3165                      || (p == nat->nat_p))) {
3166                         switch (p)
3167                         {
3168 #if 0
3169                         case IPPROTO_GRE :
3170                                 if (nat->nat_call[1] != fin->fin_data[0])
3171                                         continue;
3172                                 break;
3173 #endif
3174                         case IPPROTO_ICMP :
3175                                 if ((flags & IPN_ICMPERR) != 0) {
3176                                         if (nat->nat_outport != sport)
3177                                                 continue;
3178                                 } else {
3179                                         if (nat->nat_outport != dport)
3180                                                 continue;
3181                                 }
3182                                 break;
3183                         case IPPROTO_TCP :
3184                         case IPPROTO_UDP :
3185                                 if (nat->nat_oport != sport)
3186                                         continue;
3187                                 if (nat->nat_outport != dport)
3188                                         continue;
3189                                 break;
3190                         default :
3191                                 break;
3192                         }
3193
3194                         ipn = nat->nat_ptr;
3195                         if ((ipn != NULL) && (nat->nat_aps != NULL))
3196                                 if (appr_match(fin, nat) != 0)
3197                                         continue;
3198                         return nat;
3199                 }
3200         }
3201
3202         /*
3203          * So if we didn't find it but there are wildcard members in the hash
3204          * table, go back and look for them.  We do this search and update here
3205          * because it is modifying the NAT table and we want to do this only
3206          * for the first packet that matches.  The exception, of course, is
3207          * for "dummy" (FI_IGNORE) lookups.
3208          */
3209 find_in_wild_ports:
3210         if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3211                 return NULL;
3212         if (nat_stats.ns_wilds == 0)
3213                 return NULL;
3214
3215         RWLOCK_EXIT(&ipf_nat);
3216
3217         hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3218         hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3219
3220         WRITE_ENTER(&ipf_nat);
3221
3222         nat = nat_table[1][hv];
3223         for (; nat; nat = nat->nat_hnext[1]) {
3224                 if (nat->nat_ifps[0] != NULL) {
3225                         if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3226                                 continue;
3227                 } else if (ifp != NULL)
3228                         nat->nat_ifps[0] = ifp;
3229
3230                 if (nat->nat_p != fin->fin_p)
3231                         continue;
3232                 if (nat->nat_oip.s_addr != src.s_addr ||
3233                     nat->nat_outip.s_addr != dst)
3234                         continue;
3235
3236                 nflags = nat->nat_flags;
3237                 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3238                         continue;
3239
3240                 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3241                                NAT_INBOUND) == 1) {
3242                         if ((fin->fin_flx & FI_IGNORE) != 0)
3243                                 break;
3244                         if ((nflags & SI_CLONE) != 0) {
3245                                 nat = fr_natclone(fin, nat);
3246                                 if (nat == NULL)
3247                                         break;
3248                         } else {
3249                                 MUTEX_ENTER(&ipf_nat_new);
3250                                 nat_stats.ns_wilds--;
3251                                 MUTEX_EXIT(&ipf_nat_new);
3252                         }
3253                         nat->nat_oport = sport;
3254                         nat->nat_outport = dport;
3255                         nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3256                         nat_tabmove(nat);
3257                         break;
3258                 }
3259         }
3260
3261         MUTEX_DOWNGRADE(&ipf_nat);
3262
3263         return nat;
3264 }
3265
3266
3267 /* ------------------------------------------------------------------------ */
3268 /* Function:    nat_tabmove                                                 */
3269 /* Returns:     Nil                                                         */
3270 /* Parameters:  nat(I) - pointer to NAT structure                           */
3271 /* Write Lock:  ipf_nat                                                     */
3272 /*                                                                          */
3273 /* This function is only called for TCP/UDP NAT table entries where the     */
3274 /* original was placed in the table without hashing on the ports and we now */
3275 /* want to include hashing on port numbers.                                 */
3276 /* ------------------------------------------------------------------------ */
3277 static void nat_tabmove(nat)
3278 nat_t *nat;
3279 {
3280         nat_t **natp;
3281         u_int hv;
3282
3283         if (nat->nat_flags & SI_CLONE)
3284                 return;
3285
3286         /*
3287          * Remove the NAT entry from the old location
3288          */
3289         if (nat->nat_hnext[0])
3290                 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3291         *nat->nat_phnext[0] = nat->nat_hnext[0];
3292         nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3293
3294         if (nat->nat_hnext[1])
3295                 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3296         *nat->nat_phnext[1] = nat->nat_hnext[1];
3297         nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3298
3299         /*
3300          * Add into the NAT table in the new position
3301          */
3302         hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3303         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3304                          ipf_nattable_sz);
3305         nat->nat_hv[0] = hv;
3306         natp = &nat_table[0][hv];
3307         if (*natp)
3308                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3309         nat->nat_phnext[0] = natp;
3310         nat->nat_hnext[0] = *natp;
3311         *natp = nat;
3312         nat_stats.ns_bucketlen[0][hv]++;
3313
3314         hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3315         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3316                          ipf_nattable_sz);
3317         nat->nat_hv[1] = hv;
3318         natp = &nat_table[1][hv];
3319         if (*natp)
3320                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3321         nat->nat_phnext[1] = natp;
3322         nat->nat_hnext[1] = *natp;
3323         *natp = nat;
3324         nat_stats.ns_bucketlen[1][hv]++;
3325 }
3326
3327
3328 /* ------------------------------------------------------------------------ */
3329 /* Function:    nat_outlookup                                               */
3330 /* Returns:     nat_t* - NULL == no match,                                  */
3331 /*                       else pointer to matching NAT entry                 */
3332 /* Parameters:  fin(I)   - pointer to packet information                    */
3333 /*              flags(I) - NAT flags for this packet                        */
3334 /*              p(I)     - protocol for this packet                         */
3335 /*              src(I)   - source IP address                                */
3336 /*              dst(I)   - destination IP address                           */
3337 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3338 /*                                                                          */
3339 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3340 /* destination address/port.  We use this lookup when sending a packet out, */
3341 /* we're looking for a table entry, based on the source address.            */
3342 /*                                                                          */
3343 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3344 /*                                                                          */
3345 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3346 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3347 /*                                                                          */
3348 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3349 /*            the packet is of said protocol                                */
3350 /* ------------------------------------------------------------------------ */
3351 nat_t *nat_outlookup(fin, flags, p, src, dst)
3352 fr_info_t *fin;
3353 u_int flags, p;
3354 struct in_addr src , dst;
3355 {
3356         u_short sport, dport;
3357         u_int sflags;
3358         ipnat_t *ipn;
3359         u_32_t srcip;
3360         nat_t *nat;
3361         int nflags;
3362         void *ifp;
3363         u_int hv;
3364
3365         ifp = fin->fin_ifp;
3366         srcip = src.s_addr;
3367         sflags = flags & IPN_TCPUDPICMP;
3368         sport = 0;
3369         dport = 0;
3370
3371         switch (p)
3372         {
3373         case IPPROTO_TCP :
3374         case IPPROTO_UDP :
3375                 sport = htons(fin->fin_data[0]);
3376                 dport = htons(fin->fin_data[1]);
3377                 break;
3378         case IPPROTO_ICMP :
3379                 if (flags & IPN_ICMPERR)
3380                         sport = fin->fin_data[1];
3381                 else
3382                         dport = fin->fin_data[1];
3383                 break;
3384         default :
3385                 break;
3386         }
3387
3388         if ((flags & SI_WILDP) != 0)
3389                 goto find_out_wild_ports;
3390
3391         hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3392         hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3393         nat = nat_table[0][hv];
3394         for (; nat; nat = nat->nat_hnext[0]) {
3395                 if (nat->nat_ifps[1] != NULL) {
3396                         if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3397                                 continue;
3398                 } else if (ifp != NULL)
3399                         nat->nat_ifps[1] = ifp;
3400
3401                 nflags = nat->nat_flags;
3402
3403                 if (nat->nat_inip.s_addr == srcip &&
3404                     nat->nat_oip.s_addr == dst.s_addr &&
3405                     (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3406                      || (p == nat->nat_p))) {
3407                         switch (p)
3408                         {
3409 #if 0
3410                         case IPPROTO_GRE :
3411                                 if (nat->nat_call[1] != fin->fin_data[0])
3412                                         continue;
3413                                 break;
3414 #endif
3415                         case IPPROTO_TCP :
3416                         case IPPROTO_UDP :
3417                                 if (nat->nat_oport != dport)
3418                                         continue;
3419                                 if (nat->nat_inport != sport)
3420                                         continue;
3421                                 break;
3422                         default :
3423                                 break;
3424                         }
3425
3426                         ipn = nat->nat_ptr;
3427                         if ((ipn != NULL) && (nat->nat_aps != NULL))
3428                                 if (appr_match(fin, nat) != 0)
3429                                         continue;
3430                         return nat;
3431                 }
3432         }
3433
3434         /*
3435          * So if we didn't find it but there are wildcard members in the hash
3436          * table, go back and look for them.  We do this search and update here
3437          * because it is modifying the NAT table and we want to do this only
3438          * for the first packet that matches.  The exception, of course, is
3439          * for "dummy" (FI_IGNORE) lookups.
3440          */
3441 find_out_wild_ports:
3442         if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3443                 return NULL;
3444         if (nat_stats.ns_wilds == 0)
3445                 return NULL;
3446
3447         RWLOCK_EXIT(&ipf_nat);
3448
3449         hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3450         hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3451
3452         WRITE_ENTER(&ipf_nat);
3453
3454         nat = nat_table[0][hv];
3455         for (; nat; nat = nat->nat_hnext[0]) {
3456                 if (nat->nat_ifps[1] != NULL) {
3457                         if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3458                                 continue;
3459                 } else if (ifp != NULL)
3460                         nat->nat_ifps[1] = ifp;
3461
3462                 if (nat->nat_p != fin->fin_p)
3463                         continue;
3464                 if ((nat->nat_inip.s_addr != srcip) ||
3465                     (nat->nat_oip.s_addr != dst.s_addr))
3466                         continue;
3467
3468                 nflags = nat->nat_flags;
3469                 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3470                         continue;
3471
3472                 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3473                                NAT_OUTBOUND) == 1) {
3474                         if ((fin->fin_flx & FI_IGNORE) != 0)
3475                                 break;
3476                         if ((nflags & SI_CLONE) != 0) {
3477                                 nat = fr_natclone(fin, nat);
3478                                 if (nat == NULL)
3479                                         break;
3480                         } else {
3481                                 MUTEX_ENTER(&ipf_nat_new);
3482                                 nat_stats.ns_wilds--;
3483                                 MUTEX_EXIT(&ipf_nat_new);
3484                         }
3485                         nat->nat_inport = sport;
3486                         nat->nat_oport = dport;
3487                         if (nat->nat_outport == 0)
3488                                 nat->nat_outport = sport;
3489                         nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3490                         nat_tabmove(nat);
3491                         break;
3492                 }
3493         }
3494
3495         MUTEX_DOWNGRADE(&ipf_nat);
3496
3497         return nat;
3498 }
3499
3500
3501 /* ------------------------------------------------------------------------ */
3502 /* Function:    nat_lookupredir                                             */
3503 /* Returns:     nat_t* - NULL == no match,                                  */
3504 /*                       else pointer to matching NAT entry                 */
3505 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3506 /*                      entry for.                                          */
3507 /*                                                                          */
3508 /* Lookup the NAT tables to search for a matching redirect                  */
3509 /* The contents of natlookup_t should imitate those found in a packet that  */
3510 /* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3511 /* We can do the lookup in one of two ways, imitating an inbound or         */
3512 /* outbound  packet.  By default we assume outbound, unless IPN_IN is set.  */
3513 /* For IN, the fields are set as follows:                                   */
3514 /*     nl_real* = source information                                        */
3515 /*     nl_out* = destination information (translated)                       */
3516 /* For an out packet, the fields are set like this:                         */
3517 /*     nl_in* = source information (untranslated)                           */
3518 /*     nl_out* = destination information (translated)                       */
3519 /* ------------------------------------------------------------------------ */
3520 nat_t *nat_lookupredir(np)
3521 natlookup_t *np;
3522 {
3523         fr_info_t fi;
3524         nat_t *nat;
3525
3526         bzero((char *)&fi, sizeof(fi));
3527         if (np->nl_flags & IPN_IN) {
3528                 fi.fin_data[0] = ntohs(np->nl_realport);
3529                 fi.fin_data[1] = ntohs(np->nl_outport);
3530         } else {
3531                 fi.fin_data[0] = ntohs(np->nl_inport);
3532                 fi.fin_data[1] = ntohs(np->nl_outport);
3533         }
3534         if (np->nl_flags & IPN_TCP)
3535                 fi.fin_p = IPPROTO_TCP;
3536         else if (np->nl_flags & IPN_UDP)
3537                 fi.fin_p = IPPROTO_UDP;
3538         else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3539                 fi.fin_p = IPPROTO_ICMP;
3540
3541         /*
3542          * We can do two sorts of lookups:
3543          * - IPN_IN: we have the `real' and `out' address, look for `in'.
3544          * - default: we have the `in' and `out' address, look for `real'.
3545          */
3546         if (np->nl_flags & IPN_IN) {
3547                 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3548                                         np->nl_realip, np->nl_outip))) {
3549                         np->nl_inip = nat->nat_inip;
3550                         np->nl_inport = nat->nat_inport;
3551                 }
3552         } else {
3553                 /*
3554                  * If nl_inip is non null, this is a lookup based on the real
3555                  * ip address. Else, we use the fake.
3556                  */
3557                 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3558                                          np->nl_inip, np->nl_outip))) {
3559
3560                         if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3561                                 fr_info_t fin;
3562                                 bzero((char *)&fin, sizeof(fin));
3563                                 fin.fin_p = nat->nat_p;
3564                                 fin.fin_data[0] = ntohs(nat->nat_outport);
3565                                 fin.fin_data[1] = ntohs(nat->nat_oport);
3566                                 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3567                                                  nat->nat_outip,
3568                                                  nat->nat_oip) != NULL) {
3569                                         np->nl_flags &= ~IPN_FINDFORWARD;
3570                                 }
3571                         }
3572
3573                         np->nl_realip = nat->nat_outip;
3574                         np->nl_realport = nat->nat_outport;
3575                 }
3576         }
3577
3578         return nat;
3579 }
3580
3581
3582 /* ------------------------------------------------------------------------ */
3583 /* Function:    nat_match                                                   */
3584 /* Returns:     int - 0 == no match, 1 == match                             */
3585 /* Parameters:  fin(I)   - pointer to packet information                    */
3586 /*              np(I)    - pointer to NAT rule                              */
3587 /*                                                                          */
3588 /* Pull the matching of a packet against a NAT rule out of that complex     */
3589 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3590 /* ------------------------------------------------------------------------ */
3591 static int nat_match(fin, np)
3592 fr_info_t *fin;
3593 ipnat_t *np;
3594 {
3595         frtuc_t *ft;
3596
3597         if (fin->fin_v != 4)
3598                 return 0;
3599
3600         if (np->in_p && fin->fin_p != np->in_p)
3601                 return 0;
3602
3603         if (fin->fin_out) {
3604                 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3605                         return 0;
3606                 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3607                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
3608                         return 0;
3609                 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3610                     ^ ((np->in_flags & IPN_NOTDST) != 0))
3611                         return 0;
3612         } else {
3613                 if (!(np->in_redir & NAT_REDIRECT))
3614                         return 0;
3615                 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3616                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
3617                         return 0;
3618                 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3619                     ^ ((np->in_flags & IPN_NOTDST) != 0))
3620                         return 0;
3621         }
3622
3623         ft = &np->in_tuc;
3624         if (!(fin->fin_flx & FI_TCPUDP) ||
3625             (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3626                 if (ft->ftu_scmp || ft->ftu_dcmp)
3627                         return 0;
3628                 return 1;
3629         }
3630
3631         return fr_tcpudpchk(fin, ft);
3632 }
3633
3634
3635 /* ------------------------------------------------------------------------ */
3636 /* Function:    nat_update                                                  */
3637 /* Returns:     Nil                                                         */
3638 /* Parameters:  nat(I)    - pointer to NAT structure                        */
3639 /*              np(I)     - pointer to NAT rule                             */
3640 /*                                                                          */
3641 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3642 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3643 /* ------------------------------------------------------------------------ */
3644 void nat_update(fin, nat, np)
3645 fr_info_t *fin;
3646 nat_t *nat;
3647 ipnat_t *np;
3648 {
3649         ipftq_t *ifq, *ifq2;
3650         ipftqent_t *tqe;
3651
3652         MUTEX_ENTER(&nat->nat_lock);
3653         tqe = &nat->nat_tqe;
3654         ifq = tqe->tqe_ifq;
3655
3656         /*
3657          * We allow over-riding of NAT timeouts from NAT rules, even for
3658          * TCP, however, if it is TCP and there is no rule timeout set,
3659          * then do not update the timeout here.
3660          */
3661         if (np != NULL)
3662                 ifq2 = np->in_tqehead[fin->fin_rev];
3663         else
3664                 ifq2 = NULL;
3665
3666         if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3667                 u_32_t end, ack;
3668                 u_char tcpflags;
3669                 tcphdr_t *tcp;
3670                 int dsize;
3671
3672                 tcp = fin->fin_dp;
3673                 tcpflags = tcp->th_flags;
3674                 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
3675                         ((tcpflags & TH_SYN) ? 1 : 0) +
3676                         ((tcpflags & TH_FIN) ? 1 : 0);
3677
3678                 ack = ntohl(tcp->th_ack);
3679                 end = ntohl(tcp->th_seq) + dsize;
3680
3681                 if (SEQ_GT(ack, nat->nat_seqnext[1 - fin->fin_rev]))
3682                         nat->nat_seqnext[1 - fin->fin_rev] = ack;
3683
3684                 if (nat->nat_seqnext[fin->fin_rev] == 0)
3685                         nat->nat_seqnext[fin->fin_rev] = end;
3686
3687                 (void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3688         } else {
3689                 if (ifq2 == NULL) {
3690                         if (nat->nat_p == IPPROTO_UDP)
3691                                 ifq2 = &nat_udptq;
3692                         else if (nat->nat_p == IPPROTO_ICMP)
3693                                 ifq2 = &nat_icmptq;
3694                         else
3695                                 ifq2 = &nat_iptq;
3696                 }
3697
3698                 fr_movequeue(tqe, ifq, ifq2);
3699         }
3700         MUTEX_EXIT(&nat->nat_lock);
3701 }
3702
3703
3704 /* ------------------------------------------------------------------------ */
3705 /* Function:    fr_checknatout                                              */
3706 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3707 /*                     0 == no packet translation occurred,                 */
3708 /*                     1 == packet was successfully translated.             */
3709 /* Parameters:  fin(I)   - pointer to packet information                    */
3710 /*              passp(I) - pointer to filtering result flags                */
3711 /*                                                                          */
3712 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3713 /* first checked to see if they match an existing entry (if an error),      */
3714 /* otherwise a search of the current NAT table is made.  If neither results */
3715 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3716 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3717 /* packet header(s) as required.                                            */
3718 /* ------------------------------------------------------------------------ */
3719 int fr_checknatout(fin, passp)
3720 fr_info_t *fin;
3721 u_32_t *passp;
3722 {
3723         struct ifnet *ifp, *sifp;
3724         icmphdr_t *icmp = NULL;
3725         tcphdr_t *tcp = NULL;
3726         int rval, natfailed;
3727         ipnat_t *np = NULL;
3728         u_int nflags = 0;
3729         u_32_t ipa, iph;
3730         int natadd = 1;
3731         frentry_t *fr;
3732         nat_t *nat;
3733
3734         if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
3735                 return 0;
3736
3737         natfailed = 0;
3738         fr = fin->fin_fr;
3739         sifp = fin->fin_ifp;
3740         if (fr != NULL) {
3741                 ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3742                 if ((ifp != NULL) && (ifp != (void *)-1))
3743                         fin->fin_ifp = ifp;
3744         }
3745         ifp = fin->fin_ifp;
3746
3747         if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3748                 switch (fin->fin_p)
3749                 {
3750                 case IPPROTO_TCP :
3751                         nflags = IPN_TCP;
3752                         break;
3753                 case IPPROTO_UDP :
3754                         nflags = IPN_UDP;
3755                         break;
3756                 case IPPROTO_ICMP :
3757                         icmp = fin->fin_dp;
3758
3759                         /*
3760                          * This is an incoming packet, so the destination is
3761                          * the icmp_id and the source port equals 0
3762                          */
3763                         if (nat_icmpquerytype4(icmp->icmp_type))
3764                                 nflags = IPN_ICMPQUERY;
3765                         break;
3766                 default :
3767                         break;
3768                 }
3769                 
3770                 if ((nflags & IPN_TCPUDP))
3771                         tcp = fin->fin_dp;
3772         }
3773
3774         ipa = fin->fin_saddr;
3775
3776         READ_ENTER(&ipf_nat);
3777
3778         if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3779             (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3780                 /*EMPTY*/;
3781         else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3782                 natadd = 0;
3783         else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3784                                       fin->fin_src, fin->fin_dst))) {
3785                 nflags = nat->nat_flags;
3786         } else {
3787                 u_32_t hv, msk, nmsk;
3788
3789                 /*
3790                  * If there is no current entry in the nat table for this IP#,
3791                  * create one for it (if there is a matching rule).
3792                  */
3793                 RWLOCK_EXIT(&ipf_nat);
3794                 msk = 0xffffffff;
3795                 nmsk = nat_masks;
3796                 WRITE_ENTER(&ipf_nat);
3797 maskloop:
3798                 iph = ipa & htonl(msk);
3799                 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3800                 for (np = nat_rules[hv]; np; np = np->in_mnext)
3801                 {
3802                         if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3803                                 continue;
3804                         if (np->in_v != fin->fin_v)
3805                                 continue;
3806                         if (np->in_p && (np->in_p != fin->fin_p))
3807                                 continue;
3808                         if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3809                                 continue;
3810                         if (np->in_flags & IPN_FILTER) {
3811                                 if (!nat_match(fin, np))
3812                                         continue;
3813                         } else if ((ipa & np->in_inmsk) != np->in_inip)
3814                                 continue;
3815
3816                         if ((fr != NULL) &&
3817                             !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3818                                 continue;
3819
3820                         if (*np->in_plabel != '\0') {
3821                                 if (((np->in_flags & IPN_FILTER) == 0) &&
3822                                     (np->in_dport != tcp->th_dport))
3823                                         continue;
3824                                 if (appr_ok(fin, tcp, np) == 0)
3825                                         continue;
3826                         }
3827
3828                         if ((nat = nat_new(fin, np, NULL, nflags,
3829                                            NAT_OUTBOUND))) {
3830                                 np->in_hits++;
3831                                 break;
3832                         } else
3833                                 natfailed = -1;
3834                 }
3835                 if ((np == NULL) && (nmsk != 0)) {
3836                         while (nmsk) {
3837                                 msk <<= 1;
3838                                 if (nmsk & 0x80000000)
3839                                         break;
3840                                 nmsk <<= 1;
3841                         }
3842                         if (nmsk != 0) {
3843                                 nmsk <<= 1;
3844                                 goto maskloop;
3845                         }
3846                 }
3847                 MUTEX_DOWNGRADE(&ipf_nat);
3848         }
3849
3850         if (nat != NULL) {
3851                 rval = fr_natout(fin, nat, natadd, nflags);
3852                 if (rval == 1) {
3853                         MUTEX_ENTER(&nat->nat_lock);
3854                         nat->nat_ref++;
3855                         MUTEX_EXIT(&nat->nat_lock);
3856                         nat->nat_touched = fr_ticks;
3857                         fin->fin_nat = nat;
3858                 }
3859         } else
3860                 rval = natfailed;
3861         RWLOCK_EXIT(&ipf_nat);
3862
3863         if (rval == -1) {
3864                 if (passp != NULL)
3865                         *passp = FR_BLOCK;
3866                 fin->fin_flx |= FI_BADNAT;
3867         }
3868         fin->fin_ifp = sifp;
3869         return rval;
3870 }
3871
3872 /* ------------------------------------------------------------------------ */
3873 /* Function:    fr_natout                                                   */
3874 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3875 /*                     1 == packet was successfully translated.             */
3876 /* Parameters:  fin(I)    - pointer to packet information                   */
3877 /*              nat(I)    - pointer to NAT structure                        */
3878 /*              natadd(I) - flag indicating if it is safe to add frag cache */
3879 /*              nflags(I) - NAT flags set for this packet                   */
3880 /*                                                                          */
3881 /* Translate a packet coming "out" on an interface.                         */
3882 /* ------------------------------------------------------------------------ */
3883 int fr_natout(fin, nat, natadd, nflags)
3884 fr_info_t *fin;
3885 nat_t *nat;
3886 int natadd;
3887 u_32_t nflags;
3888 {
3889         icmphdr_t *icmp;
3890         u_short *csump;
3891         tcphdr_t *tcp;
3892         ipnat_t *np;
3893         int i;
3894
3895         tcp = NULL;
3896         icmp = NULL;
3897         csump = NULL;
3898         np = nat->nat_ptr;
3899
3900         if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3901                 (void) fr_nat_newfrag(fin, 0, nat);
3902
3903         MUTEX_ENTER(&nat->nat_lock);
3904         nat->nat_bytes[1] += fin->fin_plen;
3905         nat->nat_pkts[1]++;
3906         MUTEX_EXIT(&nat->nat_lock);
3907
3908         /*
3909          * Fix up checksums, not by recalculating them, but
3910          * simply computing adjustments.
3911          * This is only done for STREAMS based IP implementations where the
3912          * checksum has already been calculated by IP.  In all other cases,
3913          * IPFilter is called before the checksum needs calculating so there
3914          * is no call to modify whatever is in the header now.
3915          */
3916         if (fin->fin_v == 4) {
3917                 if (nflags == IPN_ICMPERR) {
3918                         u_32_t s1, s2, sumd;
3919
3920                         s1 = LONG_SUM(ntohl(fin->fin_saddr));
3921                         s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3922                         CALC_SUMD(s1, s2, sumd);
3923                         fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3924                 }
3925 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3926     defined(linux) || defined(BRIDGE_IPF)
3927                 else {
3928                         /*
3929                          * Strictly speaking, this isn't necessary on BSD
3930                          * kernels because they do checksum calculation after
3931                          * this code has run BUT if ipfilter is being used
3932                          * to do NAT as a bridge, that code doesn't exist.
3933                          */
3934                         if (nat->nat_dir == NAT_OUTBOUND)
3935                                 fix_outcksum(fin, &fin->fin_ip->ip_sum,
3936                                              nat->nat_ipsumd);
3937                         else
3938                                 fix_incksum(fin, &fin->fin_ip->ip_sum,
3939                                             nat->nat_ipsumd);
3940                 }
3941 #endif
3942         }
3943
3944         if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3945                 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3946                         tcp = fin->fin_dp;
3947
3948                         tcp->th_sport = nat->nat_outport;
3949                         fin->fin_data[0] = ntohs(nat->nat_outport);
3950                 }
3951
3952                 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3953                         icmp = fin->fin_dp;
3954                         icmp->icmp_id = nat->nat_outport;
3955                 }
3956
3957                 csump = nat_proto(fin, nat, nflags);
3958         }
3959
3960         fin->fin_ip->ip_src = nat->nat_outip;
3961
3962         nat_update(fin, nat, np);
3963
3964         /*
3965          * The above comments do not hold for layer 4 (or higher) checksums...
3966          */
3967         if (csump != NULL) {
3968                 if (nat->nat_dir == NAT_OUTBOUND)
3969                         fix_outcksum(fin, csump, nat->nat_sumd[1]);
3970                 else
3971                         fix_incksum(fin, csump, nat->nat_sumd[1]);
3972         }
3973 #ifdef  IPFILTER_SYNC
3974         ipfsync_update(SMC_NAT, fin, nat->nat_sync);
3975 #endif
3976         /* ------------------------------------------------------------- */
3977         /* A few quick notes:                                            */
3978         /*      Following are test conditions prior to calling the       */
3979         /*      appr_check routine.                                      */
3980         /*                                                               */
3981         /*      A NULL tcp indicates a non TCP/UDP packet.  When dealing */
3982         /*      with a redirect rule, we attempt to match the packet's   */
3983         /*      source port against in_dport, otherwise we'd compare the */
3984         /*      packet's destination.                                    */
3985         /* ------------------------------------------------------------- */
3986         if ((np != NULL) && (np->in_apr != NULL)) {
3987                 i = appr_check(fin, nat);
3988                 if (i == 0)
3989                         i = 1;
3990         } else
3991                 i = 1;
3992         ATOMIC_INCL(nat_stats.ns_mapped[1]);
3993         fin->fin_flx |= FI_NATED;
3994         return i;
3995 }
3996
3997
3998 /* ------------------------------------------------------------------------ */
3999 /* Function:    fr_checknatin                                               */
4000 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4001 /*                     0 == no packet translation occurred,                 */
4002 /*                     1 == packet was successfully translated.             */
4003 /* Parameters:  fin(I)   - pointer to packet information                    */
4004 /*              passp(I) - pointer to filtering result flags                */
4005 /*                                                                          */
4006 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
4007 /* first checked to see if they match an existing entry (if an error),      */
4008 /* otherwise a search of the current NAT table is made.  If neither results */
4009 /* in a match then a search for a matching NAT rule is made.  Create a new  */
4010 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4011 /* packet header(s) as required.                                            */
4012 /* ------------------------------------------------------------------------ */
4013 int fr_checknatin(fin, passp)
4014 fr_info_t *fin;
4015 u_32_t *passp;
4016 {
4017         u_int nflags, natadd;
4018         int rval, natfailed;
4019         struct ifnet *ifp;
4020         struct in_addr in;
4021         icmphdr_t *icmp;
4022         tcphdr_t *tcp;
4023         u_short dport;
4024         ipnat_t *np;
4025         nat_t *nat;
4026         u_32_t iph;
4027
4028         if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
4029                 return 0;
4030
4031         tcp = NULL;
4032         icmp = NULL;
4033         dport = 0;
4034         natadd = 1;
4035         nflags = 0;
4036         natfailed = 0;
4037         ifp = fin->fin_ifp;
4038
4039         if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4040                 switch (fin->fin_p)
4041                 {
4042                 case IPPROTO_TCP :
4043                         nflags = IPN_TCP;
4044                         break;
4045                 case IPPROTO_UDP :
4046                         nflags = IPN_UDP;
4047                         break;
4048                 case IPPROTO_ICMP :
4049                         icmp = fin->fin_dp;
4050
4051                         /*
4052                          * This is an incoming packet, so the destination is
4053                          * the icmp_id and the source port equals 0
4054                          */
4055                         if (nat_icmpquerytype4(icmp->icmp_type)) {
4056                                 nflags = IPN_ICMPQUERY;
4057                                 dport = icmp->icmp_id;  
4058                         } break;
4059                 default :
4060                         break;
4061                 }
4062                 
4063                 if ((nflags & IPN_TCPUDP)) {
4064                         tcp = fin->fin_dp;
4065                         dport = tcp->th_dport;
4066                 }
4067         }
4068
4069         in = fin->fin_dst;
4070
4071         READ_ENTER(&ipf_nat);
4072
4073         if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4074             (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4075                 /*EMPTY*/;
4076         else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4077                 natadd = 0;
4078         else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4079                                      fin->fin_src, in))) {
4080                 nflags = nat->nat_flags;
4081         } else {
4082                 u_32_t hv, msk, rmsk;
4083
4084                 RWLOCK_EXIT(&ipf_nat);
4085                 rmsk = rdr_masks;
4086                 msk = 0xffffffff;
4087                 WRITE_ENTER(&ipf_nat);
4088                 /*
4089                  * If there is no current entry in the nat table for this IP#,
4090                  * create one for it (if there is a matching rule).
4091                  */
4092 maskloop:
4093                 iph = in.s_addr & htonl(msk);
4094                 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
4095                 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
4096                         if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4097                                 continue;
4098                         if (np->in_v != fin->fin_v)
4099                                 continue;
4100                         if (np->in_p && (np->in_p != fin->fin_p))
4101                                 continue;
4102                         if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4103                                 continue;
4104                         if (np->in_flags & IPN_FILTER) {
4105                                 if (!nat_match(fin, np))
4106                                         continue;
4107                         } else {
4108                                 if ((in.s_addr & np->in_outmsk) != np->in_outip)
4109                                         continue;
4110                                 if (np->in_pmin &&
4111                                     ((ntohs(np->in_pmax) < ntohs(dport)) ||
4112                                      (ntohs(dport) < ntohs(np->in_pmin))))
4113                                         continue;
4114                         }
4115
4116                         if (*np->in_plabel != '\0') {
4117                                 if (!appr_ok(fin, tcp, np)) {
4118                                         continue;
4119                                 }
4120                         }
4121
4122                         nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4123                         if (nat != NULL) {
4124                                 np->in_hits++;
4125                                 break;
4126                         } else
4127                                 natfailed = -1;
4128                 }
4129
4130                 if ((np == NULL) && (rmsk != 0)) {
4131                         while (rmsk) {
4132                                 msk <<= 1;
4133                                 if (rmsk & 0x80000000)
4134                                         break;
4135                                 rmsk <<= 1;
4136                         }
4137                         if (rmsk != 0) {
4138                                 rmsk <<= 1;
4139                                 goto maskloop;
4140                         }
4141                 }
4142                 MUTEX_DOWNGRADE(&ipf_nat);
4143         }
4144         if (nat != NULL) {
4145                 rval = fr_natin(fin, nat, natadd, nflags);
4146                 if (rval == 1) {
4147                         MUTEX_ENTER(&nat->nat_lock);
4148                         nat->nat_ref++;
4149                         MUTEX_EXIT(&nat->nat_lock);
4150                         nat->nat_touched = fr_ticks;
4151                         fin->fin_nat = nat;
4152                 }
4153         } else
4154                 rval = natfailed;
4155         RWLOCK_EXIT(&ipf_nat);
4156
4157         if (rval == -1) {
4158                 if (passp != NULL)
4159                         *passp = FR_BLOCK;
4160                 fin->fin_flx |= FI_BADNAT;
4161         }
4162         return rval;
4163 }
4164
4165
4166 /* ------------------------------------------------------------------------ */
4167 /* Function:    fr_natin                                                    */
4168 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4169 /*                     1 == packet was successfully translated.             */
4170 /* Parameters:  fin(I)    - pointer to packet information                   */
4171 /*              nat(I)    - pointer to NAT structure                        */
4172 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4173 /*              nflags(I) - NAT flags set for this packet                   */
4174 /* Locks Held:  ipf_nat (READ)                                              */
4175 /*                                                                          */
4176 /* Translate a packet coming "in" on an interface.                          */
4177 /* ------------------------------------------------------------------------ */
4178 int fr_natin(fin, nat, natadd, nflags)
4179 fr_info_t *fin;
4180 nat_t *nat;
4181 int natadd;
4182 u_32_t nflags;
4183 {
4184         icmphdr_t *icmp;
4185         u_short *csump;
4186         tcphdr_t *tcp;
4187         ipnat_t *np;
4188         int i;
4189
4190         tcp = NULL;
4191         csump = NULL;
4192         np = nat->nat_ptr;
4193         fin->fin_fr = nat->nat_fr;
4194
4195         if (np != NULL) {
4196                 if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4197                         (void) fr_nat_newfrag(fin, 0, nat);
4198
4199         /* ------------------------------------------------------------- */
4200         /* A few quick notes:                                            */
4201         /*      Following are test conditions prior to calling the       */
4202         /*      appr_check routine.                                      */
4203         /*                                                               */
4204         /*      A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4205         /*      with a map rule, we attempt to match the packet's        */
4206         /*      source port against in_dport, otherwise we'd compare the */
4207         /*      packet's destination.                                    */
4208         /* ------------------------------------------------------------- */
4209                 if (np->in_apr != NULL) {
4210                         i = appr_check(fin, nat);
4211                         if (i == -1) {
4212                                 return -1;
4213                         }
4214                 }
4215         }
4216
4217 #ifdef  IPFILTER_SYNC
4218         ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4219 #endif
4220
4221         MUTEX_ENTER(&nat->nat_lock);
4222         nat->nat_bytes[0] += fin->fin_plen;
4223         nat->nat_pkts[0]++;
4224         MUTEX_EXIT(&nat->nat_lock);
4225
4226         fin->fin_ip->ip_dst = nat->nat_inip;
4227         fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4228         if (nflags & IPN_TCPUDP)
4229                 tcp = fin->fin_dp;
4230
4231         /*
4232          * Fix up checksums, not by recalculating them, but
4233          * simply computing adjustments.
4234          * Why only do this for some platforms on inbound packets ?
4235          * Because for those that it is done, IP processing is yet to happen
4236          * and so the IPv4 header checksum has not yet been evaluated.
4237          * Perhaps it should always be done for the benefit of things like
4238          * fast forwarding (so that it doesn't need to be recomputed) but with
4239          * header checksum offloading, perhaps it is a moot point.
4240          */
4241 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4242      defined(__osf__) || defined(linux)
4243         if (nat->nat_dir == NAT_OUTBOUND)
4244                 fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4245         else
4246                 fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4247 #endif
4248
4249         if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4250                 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4251                         tcp->th_dport = nat->nat_inport;
4252                         fin->fin_data[1] = ntohs(nat->nat_inport);
4253                 }
4254
4255
4256                 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4257                         icmp = fin->fin_dp;
4258
4259                         icmp->icmp_id = nat->nat_inport;
4260                 }
4261
4262                 csump = nat_proto(fin, nat, nflags);
4263         }
4264
4265         nat_update(fin, nat, np);
4266
4267         /*
4268          * The above comments do not hold for layer 4 (or higher) checksums...
4269          */
4270         if (csump != NULL) {
4271                 if (nat->nat_dir == NAT_OUTBOUND)
4272                         fix_incksum(fin, csump, nat->nat_sumd[0]);
4273                 else
4274                         fix_outcksum(fin, csump, nat->nat_sumd[0]);
4275         }
4276         ATOMIC_INCL(nat_stats.ns_mapped[0]);
4277         fin->fin_flx |= FI_NATED;
4278         if (np != NULL && np->in_tag.ipt_num[0] != 0)
4279                 fin->fin_nattag = &np->in_tag;
4280         return 1;
4281 }
4282
4283
4284 /* ------------------------------------------------------------------------ */
4285 /* Function:    nat_proto                                                   */
4286 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4287 /*                         NULL if the transport protocol is not recognised */
4288 /*                         as needing a checksum update.                    */
4289 /* Parameters:  fin(I)    - pointer to packet information                   */
4290 /*              nat(I)    - pointer to NAT structure                        */
4291 /*              nflags(I) - NAT flags set for this packet                   */
4292 /*                                                                          */
4293 /* Return the pointer to the checksum field for each protocol so understood.*/
4294 /* If support for making other changes to a protocol header is required,    */
4295 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4296 /* TCP down to a specific value, then do it from here.                      */
4297 /* ------------------------------------------------------------------------ */
4298 u_short *nat_proto(fin, nat, nflags)
4299 fr_info_t *fin;
4300 nat_t *nat;
4301 u_int nflags;
4302 {
4303         icmphdr_t *icmp;
4304         u_short *csump;
4305         tcphdr_t *tcp;
4306         udphdr_t *udp;
4307
4308         csump = NULL;
4309         if (fin->fin_out == 0) {
4310                 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4311         } else {
4312                 fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4313         }
4314
4315         switch (fin->fin_p)
4316         {
4317         case IPPROTO_TCP :
4318                 tcp = fin->fin_dp;
4319
4320                 csump = &tcp->th_sum;
4321
4322                 /*
4323                  * Do a MSS CLAMPING on a SYN packet,
4324                  * only deal IPv4 for now.
4325                  */
4326                 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4327                         nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4328
4329                 break;
4330
4331         case IPPROTO_UDP :
4332                 udp = fin->fin_dp;
4333
4334                 if (udp->uh_sum)
4335                         csump = &udp->uh_sum;
4336                 break;
4337
4338         case IPPROTO_ICMP :
4339                 icmp = fin->fin_dp;
4340
4341                 if ((nflags & IPN_ICMPQUERY) != 0) {
4342                         if (icmp->icmp_cksum != 0)
4343                                 csump = &icmp->icmp_cksum;
4344                 }
4345                 break;
4346         }
4347         return csump;
4348 }
4349
4350
4351 /* ------------------------------------------------------------------------ */
4352 /* Function:    fr_natunload                                                */
4353 /* Returns:     Nil                                                         */
4354 /* Parameters:  Nil                                                         */
4355 /*                                                                          */
4356 /* Free all memory used by NAT structures allocated at runtime.             */
4357 /* ------------------------------------------------------------------------ */
4358 void fr_natunload()
4359 {
4360         ipftq_t *ifq, *ifqnext;
4361
4362         (void) nat_clearlist();
4363         (void) nat_flushtable();
4364
4365         /*
4366          * Proxy timeout queues are not cleaned here because although they
4367          * exist on the NAT list, appr_unload is called after fr_natunload
4368          * and the proxies actually are responsible for them being created.
4369          * Should the proxy timeouts have their own list?  There's no real
4370          * justification as this is the only complication.
4371          */
4372         for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4373                 ifqnext = ifq->ifq_next;
4374                 if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4375                     (fr_deletetimeoutqueue(ifq) == 0))
4376                         fr_freetimeoutqueue(ifq);
4377         }
4378
4379         if (nat_table[0] != NULL) {
4380                 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4381                 nat_table[0] = NULL;
4382         }
4383         if (nat_table[1] != NULL) {
4384                 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4385                 nat_table[1] = NULL;
4386         }
4387         if (nat_rules != NULL) {
4388                 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4389                 nat_rules = NULL;
4390         }
4391         if (rdr_rules != NULL) {
4392                 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4393                 rdr_rules = NULL;
4394         }
4395         if (ipf_hm_maptable != NULL) {
4396                 KFREES(ipf_hm_maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4397                 ipf_hm_maptable = NULL;
4398         }
4399         if (nat_stats.ns_bucketlen[0] != NULL) {
4400                 KFREES(nat_stats.ns_bucketlen[0],
4401                        sizeof(u_long *) * ipf_nattable_sz);
4402                 nat_stats.ns_bucketlen[0] = NULL;
4403         }
4404         if (nat_stats.ns_bucketlen[1] != NULL) {
4405                 KFREES(nat_stats.ns_bucketlen[1],
4406                        sizeof(u_long *) * ipf_nattable_sz);
4407                 nat_stats.ns_bucketlen[1] = NULL;
4408         }
4409
4410         if (fr_nat_maxbucket_reset == 1)
4411                 fr_nat_maxbucket = 0;
4412
4413         if (fr_nat_init == 1) {
4414                 fr_nat_init = 0;
4415                 fr_sttab_destroy(nat_tqb);
4416
4417                 RW_DESTROY(&ipf_natfrag);
4418                 RW_DESTROY(&ipf_nat);
4419
4420                 MUTEX_DESTROY(&ipf_nat_new);
4421                 MUTEX_DESTROY(&ipf_natio);
4422
4423                 MUTEX_DESTROY(&nat_udptq.ifq_lock);
4424                 MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4425                 MUTEX_DESTROY(&nat_iptq.ifq_lock);
4426         }
4427 }
4428
4429
4430 /* ------------------------------------------------------------------------ */
4431 /* Function:    fr_natexpire                                                */
4432 /* Returns:     Nil                                                         */
4433 /* Parameters:  Nil                                                         */
4434 /*                                                                          */
4435 /* Check all of the timeout queues for entries at the top which need to be  */
4436 /* expired.                                                                 */
4437 /* ------------------------------------------------------------------------ */
4438 void fr_natexpire()
4439 {
4440         ipftq_t *ifq, *ifqnext;
4441         ipftqent_t *tqe, *tqn;
4442         int i;
4443         SPL_INT(s);
4444
4445         SPL_NET(s);
4446         WRITE_ENTER(&ipf_nat);
4447         for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4448                 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4449                         if (tqe->tqe_die > fr_ticks)
4450                                 break;
4451                         tqn = tqe->tqe_next;
4452                         nat_delete(tqe->tqe_parent, NL_EXPIRE);
4453                 }
4454         }
4455
4456         for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4457                 ifqnext = ifq->ifq_next;
4458
4459                 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4460                         if (tqe->tqe_die > fr_ticks)
4461                                 break;
4462                         tqn = tqe->tqe_next;
4463                         nat_delete(tqe->tqe_parent, NL_EXPIRE);
4464                 }
4465         }
4466
4467         for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4468                 ifqnext = ifq->ifq_next;
4469
4470                 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4471                     (ifq->ifq_ref == 0)) {
4472                         fr_freetimeoutqueue(ifq);
4473                 }
4474         }
4475
4476         if (fr_nat_doflush != 0) {
4477                 nat_extraflush(2);
4478                 fr_nat_doflush = 0;
4479         }
4480
4481         RWLOCK_EXIT(&ipf_nat);
4482         SPL_X(s);
4483 }
4484
4485
4486 /* ------------------------------------------------------------------------ */
4487 /* Function:    fr_natsync                                                  */
4488 /* Returns:     Nil                                                         */
4489 /* Parameters:  ifp(I) - pointer to network interface                       */
4490 /*                                                                          */
4491 /* Walk through all of the currently active NAT sessions, looking for those */
4492 /* which need to have their translated address updated.                     */
4493 /* ------------------------------------------------------------------------ */
4494 void fr_natsync(ifp)
4495 void *ifp;
4496 {
4497         u_32_t sum1, sum2, sumd;
4498         struct in_addr in;
4499         ipnat_t *n;
4500         nat_t *nat;
4501         void *ifp2;
4502         SPL_INT(s);
4503
4504         if (fr_running <= 0)
4505                 return;
4506
4507         /*
4508          * Change IP addresses for NAT sessions for any protocol except TCP
4509          * since it will break the TCP connection anyway.  The only rules
4510          * which will get changed are those which are "map ... -> 0/32",
4511          * where the rule specifies the address is taken from the interface.
4512          */
4513         SPL_NET(s);
4514         WRITE_ENTER(&ipf_nat);
4515
4516         if (fr_running <= 0) {
4517                 RWLOCK_EXIT(&ipf_nat);
4518                 return;
4519         }
4520
4521         for (nat = nat_instances; nat; nat = nat->nat_next) {
4522                 if ((nat->nat_flags & IPN_TCP) != 0)
4523                         continue;
4524                 n = nat->nat_ptr;
4525                 if ((n == NULL) ||
4526                     (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4527                         continue;
4528                 if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4529                      (ifp == nat->nat_ifps[1]))) {
4530                         nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4531                         if (nat->nat_ifnames[1][0] != '\0') {
4532                                 nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4533                                                           4);
4534                         } else
4535                                 nat->nat_ifps[1] = nat->nat_ifps[0];
4536                         ifp2 = nat->nat_ifps[0];
4537                         if (ifp2 == NULL)
4538                                 continue;
4539
4540                         /*
4541                          * Change the map-to address to be the same as the
4542                          * new one.
4543                          */
4544                         sum1 = nat->nat_outip.s_addr;
4545                         if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4546                                 nat->nat_outip = in;
4547                         sum2 = nat->nat_outip.s_addr;
4548
4549                         if (sum1 == sum2)
4550                                 continue;
4551                         /*
4552                          * Readjust the checksum adjustment to take into
4553                          * account the new IP#.
4554                          */
4555                         CALC_SUMD(sum1, sum2, sumd);
4556                         /* XXX - dont change for TCP when solaris does
4557                          * hardware checksumming.
4558                          */
4559                         sumd += nat->nat_sumd[0];
4560                         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4561                         nat->nat_sumd[1] = nat->nat_sumd[0];
4562                 }
4563         }
4564
4565         for (n = nat_list; (n != NULL); n = n->in_next) {
4566                 if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4567                         n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4568                 if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4569                         n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4570         }
4571         RWLOCK_EXIT(&ipf_nat);
4572         SPL_X(s);
4573 }
4574
4575
4576 /* ------------------------------------------------------------------------ */
4577 /* Function:    nat_icmpquerytype4                                          */
4578 /* Returns:     int - 1 == success, 0 == failure                            */
4579 /* Parameters:  icmptype(I) - ICMP type number                              */
4580 /*                                                                          */
4581 /* Tests to see if the ICMP type number passed is a query/response type or  */
4582 /* not.                                                                     */
4583 /* ------------------------------------------------------------------------ */
4584 static int nat_icmpquerytype4(icmptype)
4585 int icmptype;
4586 {
4587
4588         /*
4589          * For the ICMP query NAT code, it is essential that both the query
4590          * and the reply match on the NAT rule. Because the NAT structure
4591          * does not keep track of the icmptype, and a single NAT structure
4592          * is used for all icmp types with the same src, dest and id, we
4593          * simply define the replies as queries as well. The funny thing is,
4594          * altough it seems silly to call a reply a query, this is exactly
4595          * as it is defined in the IPv4 specification
4596          */
4597         
4598         switch (icmptype)
4599         {
4600         
4601         case ICMP_ECHOREPLY:
4602         case ICMP_ECHO:
4603         /* route aedvertisement/solliciation is currently unsupported: */
4604         /* it would require rewriting the ICMP data section            */
4605         case ICMP_TSTAMP:
4606         case ICMP_TSTAMPREPLY:
4607         case ICMP_IREQ:
4608         case ICMP_IREQREPLY:
4609         case ICMP_MASKREQ:
4610         case ICMP_MASKREPLY:
4611                 return 1;
4612         default:
4613                 return 0;
4614         }
4615 }
4616
4617
4618 /* ------------------------------------------------------------------------ */
4619 /* Function:    nat_log                                                     */
4620 /* Returns:     Nil                                                         */
4621 /* Parameters:  nat(I)  - pointer to NAT structure                          */
4622 /*              type(I) - type of log entry to create                       */
4623 /*                                                                          */
4624 /* Creates a NAT log entry.                                                 */
4625 /* ------------------------------------------------------------------------ */
4626 void nat_log(nat, type)
4627 struct nat *nat;
4628 u_int type;
4629 {
4630 #ifdef  IPFILTER_LOG
4631 # ifndef LARGE_NAT
4632         struct ipnat *np;
4633         int rulen;
4634 # endif
4635         struct natlog natl;
4636         void *items[1];
4637         size_t sizes[1];
4638         int types[1];
4639
4640         natl.nl_inip = nat->nat_inip;
4641         natl.nl_outip = nat->nat_outip;
4642         natl.nl_origip = nat->nat_oip;
4643         natl.nl_bytes[0] = nat->nat_bytes[0];
4644         natl.nl_bytes[1] = nat->nat_bytes[1];
4645         natl.nl_pkts[0] = nat->nat_pkts[0];
4646         natl.nl_pkts[1] = nat->nat_pkts[1];
4647         natl.nl_origport = nat->nat_oport;
4648         natl.nl_inport = nat->nat_inport;
4649         natl.nl_outport = nat->nat_outport;
4650         natl.nl_p = nat->nat_p;
4651         natl.nl_type = type;
4652         natl.nl_rule = -1;
4653 # ifndef LARGE_NAT
4654         if (nat->nat_ptr != NULL) {
4655                 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4656                         if (np == nat->nat_ptr) {
4657                                 natl.nl_rule = rulen;
4658                                 break;
4659                         }
4660         }
4661 # endif
4662         items[0] = &natl;
4663         sizes[0] = sizeof(natl);
4664         types[0] = 0;
4665
4666         (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4667 #endif
4668 }
4669
4670
4671 #if defined(__OpenBSD__)
4672 /* ------------------------------------------------------------------------ */
4673 /* Function:    nat_ifdetach                                                */
4674 /* Returns:     Nil                                                         */
4675 /* Parameters:  ifp(I) - pointer to network interface                       */
4676 /*                                                                          */
4677 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
4678 /* interface references within IPFilter.                                    */
4679 /* ------------------------------------------------------------------------ */
4680 void nat_ifdetach(ifp)
4681 void *ifp;
4682 {
4683         frsync(ifp);
4684         return;
4685 }
4686 #endif
4687
4688
4689 /* ------------------------------------------------------------------------ */
4690 /* Function:    fr_ipnatderef                                               */
4691 /* Returns:     Nil                                                         */
4692 /* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4693 /* Write Locks: ipf_nat                                                     */
4694 /*                                                                          */
4695 /* ------------------------------------------------------------------------ */
4696 void fr_ipnatderef(inp)
4697 ipnat_t **inp;
4698 {
4699         ipnat_t *in;
4700
4701         in = *inp;
4702         *inp = NULL;
4703         in->in_space++;
4704         in->in_use--;
4705         if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4706                 if (in->in_apr)
4707                         appr_free(in->in_apr);
4708                 MUTEX_DESTROY(&in->in_lock);
4709                 KFREE(in);
4710                 nat_stats.ns_rules--;
4711 #if SOLARIS && !defined(_INET_IP_STACK_H)
4712                 if (nat_stats.ns_rules == 0)
4713                         pfil_delayed_copy = 1;
4714 #endif
4715         }
4716 }
4717
4718
4719 /* ------------------------------------------------------------------------ */
4720 /* Function:    fr_natderef                                                 */
4721 /* Returns:     Nil                                                         */
4722 /* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4723 /*                                                                          */
4724 /* Decrement the reference counter for this NAT table entry and free it if  */
4725 /* there are no more things using it.                                       */
4726 /*                                                                          */
4727 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4728 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4729 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
4730 /* because nat_delete() will do that and send nat_ref to -1.                */
4731 /*                                                                          */
4732 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4733 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4734 /* ------------------------------------------------------------------------ */
4735 void fr_natderef(natp)
4736 nat_t **natp;
4737 {
4738         nat_t *nat;
4739
4740         nat = *natp;
4741         *natp = NULL;
4742
4743         MUTEX_ENTER(&nat->nat_lock);
4744         if (nat->nat_ref > 1) {
4745                 nat->nat_ref--;
4746                 MUTEX_EXIT(&nat->nat_lock);
4747                 return;
4748         }
4749         MUTEX_EXIT(&nat->nat_lock);
4750
4751         WRITE_ENTER(&ipf_nat);
4752         nat_delete(nat, NL_EXPIRE);
4753         RWLOCK_EXIT(&ipf_nat);
4754 }
4755
4756
4757 /* ------------------------------------------------------------------------ */
4758 /* Function:    fr_natclone                                                 */
4759 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
4760 /*                           else pointer to new state structure            */
4761 /* Parameters:  fin(I) - pointer to packet information                      */
4762 /*              is(I)  - pointer to master state structure                  */
4763 /* Write Lock:  ipf_nat                                                     */
4764 /*                                                                          */
4765 /* Create a "duplcate" state table entry from the master.                   */
4766 /* ------------------------------------------------------------------------ */
4767 static nat_t *fr_natclone(fin, nat)
4768 fr_info_t *fin;
4769 nat_t *nat;
4770 {
4771         frentry_t *fr;
4772         nat_t *clone;
4773         ipnat_t *np;
4774
4775         KMALLOC(clone, nat_t *);
4776         if (clone == NULL)
4777                 return NULL;
4778         bcopy((char *)nat, (char *)clone, sizeof(*clone));
4779
4780         MUTEX_NUKE(&clone->nat_lock);
4781
4782         clone->nat_aps = NULL;
4783         /*
4784          * Initialize all these so that nat_delete() doesn't cause a crash.
4785          */
4786         clone->nat_tqe.tqe_pnext = NULL;
4787         clone->nat_tqe.tqe_next = NULL;
4788         clone->nat_tqe.tqe_ifq = NULL;
4789         clone->nat_tqe.tqe_parent = clone;
4790
4791         clone->nat_flags &= ~SI_CLONE;
4792         clone->nat_flags |= SI_CLONED;
4793
4794         if (clone->nat_hm)
4795                 clone->nat_hm->hm_ref++;
4796
4797         if (nat_insert(clone, fin->fin_rev) == -1) {
4798                 KFREE(clone);
4799                 return NULL;
4800         }
4801         np = clone->nat_ptr;
4802         if (np != NULL) {
4803                 if (nat_logging)
4804                         nat_log(clone, (u_int)np->in_redir);
4805                 np->in_use++;
4806         }
4807         fr = clone->nat_fr;
4808         if (fr != NULL) {
4809                 MUTEX_ENTER(&fr->fr_lock);
4810                 fr->fr_ref++;
4811                 MUTEX_EXIT(&fr->fr_lock);
4812         }
4813
4814         /*
4815          * Because the clone is created outside the normal loop of things and
4816          * TCP has special needs in terms of state, initialise the timeout
4817          * state of the new NAT from here.
4818          */
4819         if (clone->nat_p == IPPROTO_TCP) {
4820                 (void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
4821                                   clone->nat_flags);
4822         }
4823 #ifdef  IPFILTER_SYNC
4824         clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4825 #endif
4826         if (nat_logging)
4827                 nat_log(clone, NL_CLONE);
4828         return clone;
4829 }
4830
4831
4832 /* ------------------------------------------------------------------------ */
4833 /* Function:   nat_wildok                                                   */
4834 /* Returns:    int - 1 == packet's ports match wildcards                    */
4835 /*                   0 == packet's ports don't match wildcards              */
4836 /* Parameters: nat(I)   - NAT entry                                         */
4837 /*             sport(I) - source port                                       */
4838 /*             dport(I) - destination port                                  */
4839 /*             flags(I) - wildcard flags                                    */
4840 /*             dir(I)   - packet direction                                  */
4841 /*                                                                          */
4842 /* Use NAT entry and packet direction to determine which combination of     */
4843 /* wildcard flags should be used.                                           */
4844 /* ------------------------------------------------------------------------ */
4845 static int nat_wildok(nat, sport, dport, flags, dir)
4846 nat_t *nat;
4847 int sport;
4848 int dport;
4849 int flags;
4850 int dir;
4851 {
4852         /*
4853          * When called by       dir is set to
4854          * nat_inlookup         NAT_INBOUND (0)
4855          * nat_outlookup        NAT_OUTBOUND (1)
4856          *
4857          * We simply combine the packet's direction in dir with the original
4858          * "intended" direction of that NAT entry in nat->nat_dir to decide
4859          * which combination of wildcard flags to allow.
4860          */
4861
4862         switch ((dir << 1) | nat->nat_dir)
4863         {
4864         case 3: /* outbound packet / outbound entry */
4865                 if (((nat->nat_inport == sport) ||
4866                     (flags & SI_W_SPORT)) &&
4867                     ((nat->nat_oport == dport) ||
4868                     (flags & SI_W_DPORT)))
4869                         return 1;
4870                 break;
4871         case 2: /* outbound packet / inbound entry */
4872                 if (((nat->nat_outport == sport) ||
4873                     (flags & SI_W_DPORT)) &&
4874                     ((nat->nat_oport == dport) ||
4875                     (flags & SI_W_SPORT)))
4876                         return 1;
4877                 break;
4878         case 1: /* inbound packet / outbound entry */
4879                 if (((nat->nat_oport == sport) ||
4880                     (flags & SI_W_DPORT)) &&
4881                     ((nat->nat_outport == dport) ||
4882                     (flags & SI_W_SPORT)))
4883                         return 1;
4884                 break;
4885         case 0: /* inbound packet / inbound entry */
4886                 if (((nat->nat_oport == sport) ||
4887                     (flags & SI_W_SPORT)) &&
4888                     ((nat->nat_outport == dport) ||
4889                     (flags & SI_W_DPORT)))
4890                         return 1;
4891                 break;
4892         default:
4893                 break;
4894         }
4895
4896         return(0);
4897 }
4898
4899
4900 /* ------------------------------------------------------------------------ */
4901 /* Function:    nat_mssclamp                                                */
4902 /* Returns:     Nil                                                         */
4903 /* Parameters:  tcp(I)    - pointer to TCP header                           */
4904 /*              maxmss(I) - value to clamp the TCP MSS to                   */
4905 /*              fin(I)    - pointer to packet information                   */
4906 /*              csump(I)  - pointer to TCP checksum                         */
4907 /*                                                                          */
4908 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
4909 /* then the TCP header checksum will be updated to reflect the change in    */
4910 /* the MSS.                                                                 */
4911 /* ------------------------------------------------------------------------ */
4912 static void nat_mssclamp(tcp, maxmss, fin, csump)
4913 tcphdr_t *tcp;
4914 u_32_t maxmss;
4915 fr_info_t *fin;
4916 u_short *csump;
4917 {
4918         u_char *cp, *ep, opt;
4919         int hlen, advance;
4920         u_32_t mss, sumd;
4921
4922         hlen = TCP_OFF(tcp) << 2;
4923         if (hlen > sizeof(*tcp)) {
4924                 cp = (u_char *)tcp + sizeof(*tcp);
4925                 ep = (u_char *)tcp + hlen;
4926
4927                 while (cp < ep) {
4928                         opt = cp[0];
4929                         if (opt == TCPOPT_EOL)
4930                                 break;
4931                         else if (opt == TCPOPT_NOP) {
4932                                 cp++;
4933                                 continue;
4934                         }
4935
4936                         if (cp + 1 >= ep)
4937                                 break;
4938                         advance = cp[1];
4939                         if ((cp + advance > ep) || (advance <= 0))
4940                                 break;
4941                         switch (opt)
4942                         {
4943                         case TCPOPT_MAXSEG:
4944                                 if (advance != 4)
4945                                         break;
4946                                 mss = cp[2] * 256 + cp[3];
4947                                 if (mss > maxmss) {
4948                                         cp[2] = maxmss / 256;
4949                                         cp[3] = maxmss & 0xff;
4950                                         CALC_SUMD(mss, maxmss, sumd);
4951                                         fix_outcksum(fin, csump, sumd);
4952                                 }
4953                                 break;
4954                         default:
4955                                 /* ignore unknown options */
4956                                 break;
4957                         }
4958
4959                         cp += advance;
4960                 }
4961         }
4962 }
4963
4964
4965 /* ------------------------------------------------------------------------ */
4966 /* Function:    fr_setnatqueue                                              */
4967 /* Returns:     Nil                                                         */
4968 /* Parameters:  nat(I)- pointer to NAT structure                            */
4969 /*              rev(I) - forward(0) or reverse(1) direction                 */
4970 /* Locks:       ipf_nat (read or write)                                     */
4971 /*                                                                          */
4972 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
4973 /* determining which queue it should be placed on.                          */
4974 /* ------------------------------------------------------------------------ */
4975 void fr_setnatqueue(nat, rev)
4976 nat_t *nat;
4977 int rev;
4978 {
4979         ipftq_t *oifq, *nifq;
4980
4981         if (nat->nat_ptr != NULL)
4982                 nifq = nat->nat_ptr->in_tqehead[rev];
4983         else
4984                 nifq = NULL;
4985
4986         if (nifq == NULL) {
4987                 switch (nat->nat_p)
4988                 {
4989                 case IPPROTO_UDP :
4990                         nifq = &nat_udptq;
4991                         break;
4992                 case IPPROTO_ICMP :
4993                         nifq = &nat_icmptq;
4994                         break;
4995                 case IPPROTO_TCP :
4996                         nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
4997                         break;
4998                 default :
4999                         nifq = &nat_iptq;
5000                         break;
5001                 }
5002         }
5003
5004         oifq = nat->nat_tqe.tqe_ifq;
5005         /*
5006          * If it's currently on a timeout queue, move it from one queue to
5007          * another, else put it on the end of the newly determined queue.
5008          */
5009         if (oifq != NULL)
5010                 fr_movequeue(&nat->nat_tqe, oifq, nifq);
5011         else
5012                 fr_queueappend(&nat->nat_tqe, nifq, nat);
5013         return;
5014 }
5015
5016
5017 /* ------------------------------------------------------------------------ */
5018 /* Function:    nat_getnext                                                 */
5019 /* Returns:     int - 0 == ok, else error                                   */
5020 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5021 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5022 /*                                                                          */
5023 /* Fetch the next nat/ipnat structure pointer from the linked list and      */
5024 /* copy it out to the storage space pointed to by itp_data.  The next item  */
5025 /* in the list to look at is put back in the ipftoken struture.             */
5026 /* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/
5027 /* ipf_freetoken will call a deref function for us and we dont want to call */
5028 /* that twice (second time would be in the second switch statement below.   */
5029 /* ------------------------------------------------------------------------ */
5030 static int nat_getnext(t, itp)
5031 ipftoken_t *t;
5032 ipfgeniter_t *itp;
5033 {
5034         hostmap_t *hm, *nexthm = NULL, zerohm;
5035         ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5036         nat_t *nat, *nextnat = NULL, zeronat;
5037         int error = 0, count;
5038         char *dst;
5039
5040         count = itp->igi_nitems;
5041         if (count < 1)
5042                 return ENOSPC;
5043
5044         READ_ENTER(&ipf_nat);
5045
5046         switch (itp->igi_type)
5047         {
5048         case IPFGENITER_HOSTMAP :
5049                 hm = t->ipt_data;
5050                 if (hm == NULL) {
5051                         nexthm = ipf_hm_maplist;
5052                 } else {
5053                         nexthm = hm->hm_next;
5054                 }
5055                 break;
5056
5057         case IPFGENITER_IPNAT :
5058                 ipn = t->ipt_data;
5059                 if (ipn == NULL) {
5060                         nextipnat = nat_list;
5061                 } else {
5062                         nextipnat = ipn->in_next;
5063                 }
5064                 break;
5065
5066         case IPFGENITER_NAT :
5067                 nat = t->ipt_data;
5068                 if (nat == NULL) {
5069                         nextnat = nat_instances;
5070                 } else {
5071                         nextnat = nat->nat_next;
5072                 }
5073                 break;
5074         default :
5075                 RWLOCK_EXIT(&ipf_nat);
5076                 return EINVAL;
5077         }
5078
5079         dst = itp->igi_data;
5080         for (;;) {
5081                 switch (itp->igi_type)
5082                 {
5083                 case IPFGENITER_HOSTMAP :
5084                         if (nexthm != NULL) {
5085                                 if (count == 1) {
5086                                         ATOMIC_INC32(nexthm->hm_ref);
5087                                         t->ipt_data = nexthm;
5088                                 }
5089                         } else {
5090                                 bzero(&zerohm, sizeof(zerohm));
5091                                 nexthm = &zerohm;
5092                                 count = 1;
5093                                 t->ipt_data = NULL;
5094                         }
5095                         break;
5096
5097                 case IPFGENITER_IPNAT :
5098                         if (nextipnat != NULL) {
5099                                 if (count == 1) {
5100                                         MUTEX_ENTER(&nextipnat->in_lock);
5101                                         nextipnat->in_use++;
5102                                         MUTEX_EXIT(&nextipnat->in_lock);
5103                                         t->ipt_data = nextipnat;
5104                                 }
5105                         } else {
5106                                 bzero(&zeroipn, sizeof(zeroipn));
5107                                 nextipnat = &zeroipn;
5108                                 count = 1;
5109                                 t->ipt_data = NULL;
5110                         }
5111                         break;
5112
5113                 case IPFGENITER_NAT :
5114                         if (nextnat != NULL) {
5115                                 if (count == 1) {
5116                                         MUTEX_ENTER(&nextnat->nat_lock);
5117                                         nextnat->nat_ref++;
5118                                         MUTEX_EXIT(&nextnat->nat_lock);
5119                                         t->ipt_data = nextnat;
5120                                 }
5121                         } else {
5122                                 bzero(&zeronat, sizeof(zeronat));
5123                                 nextnat = &zeronat;
5124                                 count = 1;
5125                                 t->ipt_data = NULL;
5126                         }
5127                         break;
5128                 default :
5129                         break;
5130                 }
5131                 RWLOCK_EXIT(&ipf_nat);
5132
5133                 /*
5134                  * Copying out to user space needs to be done without the lock.
5135                  */
5136                 switch (itp->igi_type)
5137                 {
5138                 case IPFGENITER_HOSTMAP :
5139                         error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5140                         if (error != 0)
5141                                 error = EFAULT;
5142                         else
5143                                 dst += sizeof(*nexthm);
5144                         break;
5145
5146                 case IPFGENITER_IPNAT :
5147                         error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5148                         if (error != 0)
5149                                 error = EFAULT;
5150                         else
5151                                 dst += sizeof(*nextipnat);
5152                         break;
5153
5154                 case IPFGENITER_NAT :
5155                         error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5156                         if (error != 0)
5157                                 error = EFAULT;
5158                         else
5159                                 dst += sizeof(*nextnat);
5160                         break;
5161                 }
5162
5163                 if ((count == 1) || (error != 0))
5164                         break;
5165
5166                 count--;
5167
5168                 READ_ENTER(&ipf_nat);
5169
5170                 /*
5171                  * We need to have the lock again here to make sure that
5172                  * using _next is consistent.
5173                  */
5174                 switch (itp->igi_type)
5175                 {
5176                 case IPFGENITER_HOSTMAP :
5177                         nexthm = nexthm->hm_next;
5178                         break;
5179                 case IPFGENITER_IPNAT :
5180                         nextipnat = nextipnat->in_next;
5181                         break;
5182                 case IPFGENITER_NAT :
5183                         nextnat = nextnat->nat_next;
5184                         break;
5185                 }
5186         }
5187
5188
5189         switch (itp->igi_type)
5190         {
5191         case IPFGENITER_HOSTMAP :
5192                 if (hm != NULL) {
5193                         WRITE_ENTER(&ipf_nat);
5194                         fr_hostmapdel(&hm);
5195                         RWLOCK_EXIT(&ipf_nat);
5196                 }
5197                 break;
5198         case IPFGENITER_IPNAT :
5199                 if (ipn != NULL) {
5200                         fr_ipnatderef(&ipn);
5201                 }
5202                 break;
5203         case IPFGENITER_NAT :
5204                 if (nat != NULL) {
5205                         fr_natderef(&nat);
5206                 }
5207                 break;
5208         default :
5209                 break;
5210         }
5211
5212         return error;
5213 }
5214
5215
5216 /* ------------------------------------------------------------------------ */
5217 /* Function:    nat_iterator                                                */
5218 /* Returns:     int - 0 == ok, else error                                   */
5219 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5220 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5221 /*                                                                          */
5222 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5223 /* generic structure to iterate through a list.  There are three different  */
5224 /* linked lists of NAT related information to go through: NAT rules, active */
5225 /* NAT mappings and the NAT fragment cache.                                 */
5226 /* ------------------------------------------------------------------------ */
5227 static int nat_iterator(token, itp)
5228 ipftoken_t *token;
5229 ipfgeniter_t *itp;
5230 {
5231         int error;
5232
5233         if (itp->igi_data == NULL)
5234                 return EFAULT;
5235
5236         token->ipt_subtype = itp->igi_type;
5237
5238         switch (itp->igi_type)
5239         {
5240         case IPFGENITER_HOSTMAP :
5241         case IPFGENITER_IPNAT :
5242         case IPFGENITER_NAT :
5243                 error = nat_getnext(token, itp);
5244                 break;
5245
5246         case IPFGENITER_NATFRAG :
5247 #ifdef USE_MUTEXES
5248                 error = fr_nextfrag(token, itp, &ipfr_natlist,
5249                                     &ipfr_nattail, &ipf_natfrag);
5250 #else
5251                 error = fr_nextfrag(token, itp, &ipfr_natlist, &ipfr_nattail);
5252 #endif
5253                 break;
5254         default :
5255                 error = EINVAL;
5256                 break;
5257         }
5258
5259         return error;
5260 }
5261
5262
5263 /* ------------------------------------------------------------------------ */
5264 /* Function:    nat_extraflush                                              */
5265 /* Returns:     int - 0 == success, -1 == failure                           */
5266 /* Parameters:  which(I) - how to flush the active NAT table                */
5267 /* Write Locks: ipf_nat                                                     */
5268 /*                                                                          */
5269 /* Flush nat tables.  Three actions currently defined:                      */
5270 /* which == 0 : flush all nat table entries                                 */
5271 /* which == 1 : flush TCP connections which have started to close but are   */
5272 /*            stuck for some reason.                                        */
5273 /* which == 2 : flush TCP connections which have been idle for a long time, */
5274 /*            starting at > 4 days idle and working back in successive half-*/
5275 /*            days to at most 12 hours old.  If this fails to free enough   */
5276 /*            slots then work backwards in half hour slots to 30 minutes.   */
5277 /*            If that too fails, then work backwards in 30 second intervals */
5278 /*            for the last 30 minutes to at worst 30 seconds idle.          */
5279 /* ------------------------------------------------------------------------ */
5280 static int nat_extraflush(which)
5281 int which;
5282 {
5283         ipftq_t *ifq, *ifqnext;
5284         nat_t *nat, **natp;
5285         ipftqent_t *tqn;
5286         int removed;
5287         SPL_INT(s);
5288
5289         removed = 0;
5290
5291         SPL_NET(s);
5292
5293         switch (which)
5294         {
5295         case 0 :
5296                 /*
5297                  * Style 0 flush removes everything...
5298                  */
5299                 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5300                         nat_delete(nat, NL_FLUSH);
5301                         removed++;
5302                 }
5303                 break;
5304
5305         case 1 :
5306                 /*
5307                  * Since we're only interested in things that are closing,
5308                  * we can start with the appropriate timeout queue.
5309                  */
5310                 for (ifq = nat_tqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL;
5311                      ifq = ifq->ifq_next) {
5312
5313                         for (tqn = ifq->ifq_head; tqn != NULL; ) {
5314                                 nat = tqn->tqe_parent;
5315                                 tqn = tqn->tqe_next;
5316                                 if (nat->nat_p != IPPROTO_TCP)
5317                                         break;
5318                                 nat_delete(nat, NL_EXPIRE);
5319                                 removed++;
5320                         }
5321                 }
5322
5323                 /*
5324                  * Also need to look through the user defined queues.
5325                  */
5326                 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
5327                         ifqnext = ifq->ifq_next;
5328                         for (tqn = ifq->ifq_head; tqn != NULL; ) {
5329                                 nat = tqn->tqe_parent;
5330                                 tqn = tqn->tqe_next;
5331                                 if (nat->nat_p != IPPROTO_TCP)
5332                                         continue;
5333
5334                                 if ((nat->nat_tcpstate[0] >
5335                                      IPF_TCPS_ESTABLISHED) &&
5336                                     (nat->nat_tcpstate[1] >
5337                                      IPF_TCPS_ESTABLISHED)) {
5338                                         nat_delete(nat, NL_EXPIRE);
5339                                         removed++;
5340                                 }
5341                         }
5342                 }
5343                 break;
5344
5345                 /*
5346                  * Args 5-11 correspond to flushing those particular states
5347                  * for TCP connections.
5348                  */
5349         case IPF_TCPS_CLOSE_WAIT :
5350         case IPF_TCPS_FIN_WAIT_1 :
5351         case IPF_TCPS_CLOSING :
5352         case IPF_TCPS_LAST_ACK :
5353         case IPF_TCPS_FIN_WAIT_2 :
5354         case IPF_TCPS_TIME_WAIT :
5355         case IPF_TCPS_CLOSED :
5356                 tqn = nat_tqb[which].ifq_head;
5357                 while (tqn != NULL) {
5358                         nat = tqn->tqe_parent;
5359                         tqn = tqn->tqe_next;
5360                         nat_delete(nat, NL_FLUSH);
5361                         removed++;
5362                 }
5363                 break;
5364          
5365         default :
5366                 if (which < 30)
5367                         break;
5368            
5369                 /*
5370                  * Take a large arbitrary number to mean the number of seconds
5371                  * for which which consider to be the maximum value we'll allow
5372                  * the expiration to be.
5373                  */
5374                 which = IPF_TTLVAL(which);
5375                 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5376                         if (fr_ticks - nat->nat_touched > which) {
5377                                 nat_delete(nat, NL_FLUSH);
5378                                 removed++;
5379                         } else
5380                                 natp = &nat->nat_next;
5381                 }
5382                 break;
5383         }
5384
5385         if (which != 2) {
5386                 SPL_X(s);
5387                 return removed;
5388         }
5389
5390         /*
5391          * Asked to remove inactive entries because the table is full.
5392          */
5393         if (fr_ticks - nat_last_force_flush > IPF_TTLVAL(5)) {
5394                 nat_last_force_flush = fr_ticks;
5395                 removed = ipf_queueflush(nat_flush_entry, nat_tqb, nat_utqe);
5396         }
5397
5398         SPL_X(s);
5399         return removed;
5400 }
5401
5402
5403 /* ------------------------------------------------------------------------ */
5404 /* Function:    nat_flush_entry                                             */
5405 /* Returns:     0 - always succeeds                                         */
5406 /* Parameters:  entry(I) - pointer to NAT entry                             */
5407 /* Write Locks: ipf_nat                                                     */
5408 /*                                                                          */
5409 /* This function is a stepping stone between ipf_queueflush() and           */
5410 /* nat_dlete().  It is used so we can provide a uniform interface via the   */
5411 /* ipf_queueflush() function.  Since the nat_delete() function returns void */
5412 /* we translate that to mean it always succeeds in deleting something.      */
5413 /* ------------------------------------------------------------------------ */
5414 static int nat_flush_entry(entry)
5415 void *entry;
5416 {
5417         nat_delete(entry, NL_FLUSH);
5418         return 0;
5419 }
5420
5421
5422 /* ------------------------------------------------------------------------ */
5423 /* Function:    nat_gettable                                                */
5424 /* Returns:     int     - 0 = success, else error                           */
5425 /* Parameters:  data(I) - pointer to ioctl data                             */
5426 /*                                                                          */
5427 /* This function handles ioctl requests for tables of nat information.      */
5428 /* At present the only table it deals with is the hash bucket statistics.   */
5429 /* ------------------------------------------------------------------------ */
5430 static int nat_gettable(data)
5431 char *data;
5432 {
5433         ipftable_t table;
5434         int error;
5435
5436         error = fr_inobj(data, &table, IPFOBJ_GTABLE);
5437         if (error != 0)
5438                 return error;
5439
5440         switch (table.ita_type)
5441         {
5442         case IPFTABLE_BUCKETS_NATIN :
5443                 error = COPYOUT(nat_stats.ns_bucketlen[0], table.ita_table, 
5444                                 ipf_nattable_sz * sizeof(u_long));
5445                 break;
5446
5447         case IPFTABLE_BUCKETS_NATOUT :
5448                 error = COPYOUT(nat_stats.ns_bucketlen[1], table.ita_table, 
5449                                 ipf_nattable_sz * sizeof(u_long));
5450                 break;
5451
5452         default :
5453                 return EINVAL;
5454         }
5455
5456         if (error != 0) {
5457                 error = EFAULT;
5458         }
5459         return error;
5460 }