]> CyberLeo.Net >> Repos - FreeBSD/releng/9.3.git/blob - sys/contrib/pf/net/pf_norm.c
o Fix invalid TCP checksums with pf(4). [EN-16:02.pf]
[FreeBSD/releng/9.3.git] / sys / contrib / pf / net / pf_norm.c
1 /*      $OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $ */
2
3 /*
4  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 #ifdef __FreeBSD__
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_pf.h"
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #ifdef DEV_PFLOG
37 #define NPFLOG  DEV_PFLOG
38 #else
39 #define NPFLOG  0
40 #endif
41 #else
42 #include "pflog.h"
43 #endif
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/mbuf.h>
48 #include <sys/filio.h>
49 #include <sys/fcntl.h>
50 #include <sys/socket.h>
51 #include <sys/kernel.h>
52 #include <sys/time.h>
53 #ifndef __FreeBSD__
54 #include <sys/pool.h>
55
56 #include <dev/rndvar.h>
57 #endif
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/bpf.h>
61 #include <net/route.h>
62 #include <net/if_pflog.h>
63
64 #include <netinet/in.h>
65 #include <netinet/in_var.h>
66 #include <netinet/in_systm.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip_var.h>
69 #include <netinet/tcp.h>
70 #include <netinet/tcp_seq.h>
71 #include <netinet/udp.h>
72 #include <netinet/ip_icmp.h>
73
74 #ifdef INET6
75 #include <netinet/ip6.h>
76 #endif /* INET6 */
77
78 #include <net/pfvar.h>
79
80 #ifndef __FreeBSD__
81 struct pf_frent {
82         LIST_ENTRY(pf_frent) fr_next;
83         struct ip *fr_ip;
84         struct mbuf *fr_m;
85 };
86
87 struct pf_frcache {
88         LIST_ENTRY(pf_frcache) fr_next;
89         uint16_t        fr_off;
90         uint16_t        fr_end;
91 };
92 #endif
93
94 #define PFFRAG_SEENLAST 0x0001          /* Seen the last fragment for this */
95 #define PFFRAG_NOBUFFER 0x0002          /* Non-buffering fragment cache */
96 #define PFFRAG_DROP     0x0004          /* Drop all fragments */
97 #define BUFFER_FRAGMENTS(fr)    (!((fr)->fr_flags & PFFRAG_NOBUFFER))
98
99 #ifndef __FreeBSD__
100 struct pf_fragment {
101         RB_ENTRY(pf_fragment) fr_entry;
102         TAILQ_ENTRY(pf_fragment) frag_next;
103         struct in_addr  fr_src;
104         struct in_addr  fr_dst;
105         u_int8_t        fr_p;           /* protocol of this fragment */
106         u_int8_t        fr_flags;       /* status flags */
107         u_int16_t       fr_id;          /* fragment id for reassemble */
108         u_int16_t       fr_max;         /* fragment data max */
109         u_int32_t       fr_timeout;
110 #define fr_queue        fr_u.fru_queue
111 #define fr_cache        fr_u.fru_cache
112         union {
113                 LIST_HEAD(pf_fragq, pf_frent) fru_queue;        /* buffering */
114                 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache;     /* non-buf */
115         } fr_u;
116 };
117 #endif
118
119 #ifdef __FreeBSD__
120 TAILQ_HEAD(pf_fragqueue, pf_fragment);
121 TAILQ_HEAD(pf_cachequeue, pf_fragment);
122 VNET_DEFINE(struct pf_fragqueue,        pf_fragqueue);
123 #define V_pf_fragqueue                  VNET(pf_fragqueue)
124 VNET_DEFINE(struct pf_cachequeue,       pf_cachequeue);
125 #define V_pf_cachequeue                 VNET(pf_cachequeue)
126 #else
127 TAILQ_HEAD(pf_fragqueue, pf_fragment)   pf_fragqueue;
128 TAILQ_HEAD(pf_cachequeue, pf_fragment)  pf_cachequeue;
129 #endif
130
131 #ifndef __FreeBSD__
132 static __inline int      pf_frag_compare(struct pf_fragment *,
133                             struct pf_fragment *);
134 #else
135 static int               pf_frag_compare(struct pf_fragment *,
136                             struct pf_fragment *);
137 #endif
138
139 #ifdef __FreeBSD__
140 RB_HEAD(pf_frag_tree, pf_fragment);
141 VNET_DEFINE(struct pf_frag_tree,        pf_frag_tree);
142 #define V_pf_frag_tree                  VNET(pf_frag_tree)
143 VNET_DEFINE(struct pf_frag_tree,        pf_cache_tree);
144 #define V_pf_cache_tree                 VNET(pf_cache_tree)
145 #else
146 RB_HEAD(pf_frag_tree, pf_fragment)      pf_frag_tree, pf_cache_tree;
147 #endif
148 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
149 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
150
151 /* Private prototypes */
152 void                     pf_ip2key(struct pf_fragment *, struct ip *);
153 void                     pf_remove_fragment(struct pf_fragment *);
154 void                     pf_flush_fragments(void);
155 void                     pf_free_fragment(struct pf_fragment *);
156 struct pf_fragment      *pf_find_fragment(struct ip *, struct pf_frag_tree *);
157 struct mbuf             *pf_reassemble(struct mbuf **, struct pf_fragment **,
158                             struct pf_frent *, int);
159 struct mbuf             *pf_fragcache(struct mbuf **, struct ip*,
160                             struct pf_fragment **, int, int, int *);
161 int                      pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
162                             struct tcphdr *, int, sa_family_t);
163 void                     pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t,
164                             u_int8_t);
165 #ifdef INET6
166 void                     pf_scrub_ip6(struct mbuf **, u_int8_t);
167 #endif
168 #ifdef __FreeBSD__
169 #define DPFPRINTF(x) do {                               \
170         if (V_pf_status.debug >= PF_DEBUG_MISC) {       \
171                 printf("%s: ", __func__);               \
172                 printf x ;                              \
173         }                                               \
174 } while(0)
175 #else
176 #define DPFPRINTF(x) do {                               \
177         if (pf_status.debug >= PF_DEBUG_MISC) {         \
178                 printf("%s: ", __func__);               \
179                 printf x ;                              \
180         }                                               \
181 } while(0)
182 #endif
183
184 /* Globals */
185 #ifdef __FreeBSD__
186 VNET_DEFINE(uma_zone_t,         pf_frent_pl);
187 VNET_DEFINE(uma_zone_t,         pf_frag_pl);
188 VNET_DEFINE(uma_zone_t,         pf_cache_pl);
189 VNET_DEFINE(uma_zone_t,         pf_cent_pl);
190 VNET_DEFINE(uma_zone_t,         pf_state_scrub_pl);
191
192 VNET_DEFINE(int,                pf_nfrents);
193 #define V_pf_nfrents            VNET(pf_nfrents)
194 VNET_DEFINE(int,                pf_ncache);
195 #define V_pf_ncache             VNET(pf_ncache)
196 #else
197 struct pool              pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
198 struct pool              pf_state_scrub_pl;
199 int                      pf_nfrents, pf_ncache;
200 #endif
201
202 void
203 pf_normalize_init(void)
204 {
205 #ifdef __FreeBSD__
206         /*
207          * XXX
208          * No high water mark support(It's hint not hard limit).
209          * uma_zone_set_max(pf_frag_pl, PFFRAG_FRAG_HIWAT);
210          */
211         uma_zone_set_max(V_pf_frent_pl, PFFRAG_FRENT_HIWAT);
212         uma_zone_set_max(V_pf_cache_pl, PFFRAG_FRCACHE_HIWAT);
213         uma_zone_set_max(V_pf_cent_pl, PFFRAG_FRCENT_HIWAT);
214 #else
215         pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
216             NULL);
217         pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
218             NULL);
219         pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
220             "pffrcache", NULL);
221         pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
222             NULL);
223         pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
224             "pfstscr", NULL);
225
226         pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
227         pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
228         pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
229         pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
230 #endif
231
232 #ifdef __FreeBSD__
233         TAILQ_INIT(&V_pf_fragqueue);
234         TAILQ_INIT(&V_pf_cachequeue);
235 #else
236         TAILQ_INIT(&pf_fragqueue);
237         TAILQ_INIT(&pf_cachequeue);
238 #endif
239 }
240
241 #ifdef __FreeBSD__
242 static int
243 #else
244 static __inline int
245 #endif
246 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
247 {
248         int     diff;
249
250         if ((diff = a->fr_id - b->fr_id))
251                 return (diff);
252         else if ((diff = a->fr_p - b->fr_p))
253                 return (diff);
254         else if (a->fr_src.s_addr < b->fr_src.s_addr)
255                 return (-1);
256         else if (a->fr_src.s_addr > b->fr_src.s_addr)
257                 return (1);
258         else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
259                 return (-1);
260         else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
261                 return (1);
262         return (0);
263 }
264
265 void
266 pf_purge_expired_fragments(void)
267 {
268         struct pf_fragment      *frag;
269 #ifdef __FreeBSD__
270         u_int32_t                expire = time_second -
271                                     V_pf_default_rule.timeout[PFTM_FRAG];
272 #else
273         u_int32_t                expire = time_second -
274                                     pf_default_rule.timeout[PFTM_FRAG];
275 #endif
276
277 #ifdef __FreeBSD__
278         while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) {
279                 KASSERT((BUFFER_FRAGMENTS(frag)),
280                     ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__));
281 #else
282         while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
283                 KASSERT(BUFFER_FRAGMENTS(frag));
284 #endif
285                 if (frag->fr_timeout > expire)
286                         break;
287
288                 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
289                 pf_free_fragment(frag);
290         }
291
292 #ifdef __FreeBSD__
293         while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) {
294                 KASSERT((!BUFFER_FRAGMENTS(frag)),
295                     ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__));
296 #else
297         while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
298                 KASSERT(!BUFFER_FRAGMENTS(frag));
299 #endif
300                 if (frag->fr_timeout > expire)
301                         break;
302
303                 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
304                 pf_free_fragment(frag);
305 #ifdef __FreeBSD__
306                 KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) ||
307                     TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag),
308                     ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s",
309                     __FUNCTION__));
310 #else
311                 KASSERT(TAILQ_EMPTY(&pf_cachequeue) ||
312                     TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
313 #endif
314         }
315 }
316
317 /*
318  * Try to flush old fragments to make space for new ones
319  */
320
321 void
322 pf_flush_fragments(void)
323 {
324         struct pf_fragment      *frag;
325         int                      goal;
326
327 #ifdef __FreeBSD__
328         goal = V_pf_nfrents * 9 / 10;
329         DPFPRINTF(("trying to free > %d frents\n",
330             V_pf_nfrents - goal));
331         while (goal < V_pf_nfrents) {
332 #else
333         goal = pf_nfrents * 9 / 10;
334         DPFPRINTF(("trying to free > %d frents\n",
335             pf_nfrents - goal));
336         while (goal < pf_nfrents) {
337 #endif
338 #ifdef __FreeBSD__
339                 frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue);
340 #else
341                 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
342 #endif
343                 if (frag == NULL)
344                         break;
345                 pf_free_fragment(frag);
346         }
347
348
349 #ifdef __FreeBSD__
350         goal = V_pf_ncache * 9 / 10;
351         DPFPRINTF(("trying to free > %d cache entries\n",
352             V_pf_ncache - goal));
353         while (goal < V_pf_ncache) {
354 #else
355         goal = pf_ncache * 9 / 10;
356         DPFPRINTF(("trying to free > %d cache entries\n",
357             pf_ncache - goal));
358         while (goal < pf_ncache) {
359 #endif
360 #ifdef __FreeBSD__
361                 frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue);
362 #else
363                 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
364 #endif
365                 if (frag == NULL)
366                         break;
367                 pf_free_fragment(frag);
368         }
369 }
370
371 /* Frees the fragments and all associated entries */
372
373 void
374 pf_free_fragment(struct pf_fragment *frag)
375 {
376         struct pf_frent         *frent;
377         struct pf_frcache       *frcache;
378
379         /* Free all fragments */
380         if (BUFFER_FRAGMENTS(frag)) {
381                 for (frent = LIST_FIRST(&frag->fr_queue); frent;
382                     frent = LIST_FIRST(&frag->fr_queue)) {
383                         LIST_REMOVE(frent, fr_next);
384
385                         m_freem(frent->fr_m);
386 #ifdef __FreeBSD__
387                         pool_put(&V_pf_frent_pl, frent);
388                         V_pf_nfrents--;
389 #else
390                         pool_put(&pf_frent_pl, frent);
391                         pf_nfrents--;
392 #endif
393                 }
394         } else {
395                 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
396                     frcache = LIST_FIRST(&frag->fr_cache)) {
397                         LIST_REMOVE(frcache, fr_next);
398
399 #ifdef __FreeBSD__
400                         KASSERT((LIST_EMPTY(&frag->fr_cache) ||
401                             LIST_FIRST(&frag->fr_cache)->fr_off >
402                             frcache->fr_end),
403                             ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >"
404                               " frcache->fr_end): %s", __FUNCTION__));
405
406                         pool_put(&V_pf_cent_pl, frcache);
407                         V_pf_ncache--;
408 #else
409                         KASSERT(LIST_EMPTY(&frag->fr_cache) ||
410                             LIST_FIRST(&frag->fr_cache)->fr_off >
411                             frcache->fr_end);
412
413                         pool_put(&pf_cent_pl, frcache);
414                         pf_ncache--;
415 #endif
416                 }
417         }
418
419         pf_remove_fragment(frag);
420 }
421
422 void
423 pf_ip2key(struct pf_fragment *key, struct ip *ip)
424 {
425         key->fr_p = ip->ip_p;
426         key->fr_id = ip->ip_id;
427         key->fr_src.s_addr = ip->ip_src.s_addr;
428         key->fr_dst.s_addr = ip->ip_dst.s_addr;
429 }
430
431 struct pf_fragment *
432 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
433 {
434         struct pf_fragment       key;
435         struct pf_fragment      *frag;
436
437         pf_ip2key(&key, ip);
438
439         frag = RB_FIND(pf_frag_tree, tree, &key);
440         if (frag != NULL) {
441                 /* XXX Are we sure we want to update the timeout? */
442                 frag->fr_timeout = time_second;
443                 if (BUFFER_FRAGMENTS(frag)) {
444 #ifdef __FreeBSD__
445                         TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
446                         TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
447 #else
448                         TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
449                         TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
450 #endif
451                 } else {
452 #ifdef __FreeBSD__
453                         TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
454                         TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next);
455 #else
456                         TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
457                         TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
458 #endif
459                 }
460         }
461
462         return (frag);
463 }
464
465 /* Removes a fragment from the fragment queue and frees the fragment */
466
467 void
468 pf_remove_fragment(struct pf_fragment *frag)
469 {
470         if (BUFFER_FRAGMENTS(frag)) {
471 #ifdef __FreeBSD__
472                 RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
473                 TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
474                 pool_put(&V_pf_frag_pl, frag);
475 #else
476                 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
477                 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
478                 pool_put(&pf_frag_pl, frag);
479 #endif
480         } else {
481 #ifdef __FreeBSD__
482                 RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag);
483                 TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
484                 pool_put(&V_pf_cache_pl, frag);
485 #else
486                 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
487                 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
488                 pool_put(&pf_cache_pl, frag);
489 #endif
490         }
491 }
492
493 #define FR_IP_OFF(fr)   ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
494 struct mbuf *
495 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
496     struct pf_frent *frent, int mff)
497 {
498         struct mbuf     *m = *m0, *m2;
499         struct pf_frent *frea, *next;
500         struct pf_frent *frep = NULL;
501         struct ip       *ip = frent->fr_ip;
502         int              hlen = ip->ip_hl << 2;
503         u_int16_t        off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
504         u_int16_t        ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
505         u_int16_t        max = ip_len + off;
506
507 #ifdef __FreeBSD__
508         KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)),
509             ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
510 #else
511         KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag));
512 #endif
513
514         /* Strip off ip header */
515         m->m_data += hlen;
516         m->m_len -= hlen;
517
518         /* Create a new reassembly queue for this packet */
519         if (*frag == NULL) {
520 #ifdef __FreeBSD__
521                 *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT);
522 #else
523                 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
524 #endif
525                 if (*frag == NULL) {
526                         pf_flush_fragments();
527 #ifdef __FreeBSD__
528                         *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT);
529 #else
530                         *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
531 #endif
532                         if (*frag == NULL)
533                                 goto drop_fragment;
534                 }
535
536                 (*frag)->fr_flags = 0;
537                 (*frag)->fr_max = 0;
538                 (*frag)->fr_src = frent->fr_ip->ip_src;
539                 (*frag)->fr_dst = frent->fr_ip->ip_dst;
540                 (*frag)->fr_p = frent->fr_ip->ip_p;
541                 (*frag)->fr_id = frent->fr_ip->ip_id;
542                 (*frag)->fr_timeout = time_second;
543                 LIST_INIT(&(*frag)->fr_queue);
544
545 #ifdef __FreeBSD__
546                 RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag);
547                 TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next);
548 #else
549                 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
550                 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
551 #endif
552
553                 /* We do not have a previous fragment */
554                 frep = NULL;
555                 goto insert;
556         }
557
558         /*
559          * Find a fragment after the current one:
560          *  - off contains the real shifted offset.
561          */
562         LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
563                 if (FR_IP_OFF(frea) > off)
564                         break;
565                 frep = frea;
566         }
567
568 #ifdef __FreeBSD__
569         KASSERT((frep != NULL || frea != NULL),
570             ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));;
571 #else
572         KASSERT(frep != NULL || frea != NULL);
573 #endif
574
575         if (frep != NULL &&
576             FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
577             4 > off)
578         {
579                 u_int16_t       precut;
580
581                 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
582                     frep->fr_ip->ip_hl * 4 - off;
583                 if (precut >= ip_len)
584                         goto drop_fragment;
585                 m_adj(frent->fr_m, precut);
586                 DPFPRINTF(("overlap -%d\n", precut));
587                 /* Enforce 8 byte boundaries */
588                 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
589                 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
590                 ip_len -= precut;
591                 ip->ip_len = htons(ip_len);
592         }
593
594         for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
595             frea = next)
596         {
597                 u_int16_t       aftercut;
598
599                 aftercut = ip_len + off - FR_IP_OFF(frea);
600                 DPFPRINTF(("adjust overlap %d\n", aftercut));
601                 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
602                     * 4)
603                 {
604                         frea->fr_ip->ip_len =
605                             htons(ntohs(frea->fr_ip->ip_len) - aftercut);
606                         frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
607                             (aftercut >> 3));
608                         m_adj(frea->fr_m, aftercut);
609                         break;
610                 }
611
612                 /* This fragment is completely overlapped, lose it */
613                 next = LIST_NEXT(frea, fr_next);
614                 m_freem(frea->fr_m);
615                 LIST_REMOVE(frea, fr_next);
616 #ifdef __FreeBSD__
617                 pool_put(&V_pf_frent_pl, frea);
618                 V_pf_nfrents--;
619 #else
620                 pool_put(&pf_frent_pl, frea);
621                 pf_nfrents--;
622 #endif
623         }
624
625  insert:
626         /* Update maximum data size */
627         if ((*frag)->fr_max < max)
628                 (*frag)->fr_max = max;
629         /* This is the last segment */
630         if (!mff)
631                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
632
633         if (frep == NULL)
634                 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
635         else
636                 LIST_INSERT_AFTER(frep, frent, fr_next);
637
638         /* Check if we are completely reassembled */
639         if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
640                 return (NULL);
641
642         /* Check if we have all the data */
643         off = 0;
644         for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
645                 next = LIST_NEXT(frep, fr_next);
646
647                 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
648                 if (off < (*frag)->fr_max &&
649                     (next == NULL || FR_IP_OFF(next) != off))
650                 {
651                         DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
652                             off, next == NULL ? -1 : FR_IP_OFF(next),
653                             (*frag)->fr_max));
654                         return (NULL);
655                 }
656         }
657         DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
658         if (off < (*frag)->fr_max)
659                 return (NULL);
660
661         /* We have all the data */
662         frent = LIST_FIRST(&(*frag)->fr_queue);
663 #ifdef __FreeBSD__
664         KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__));
665 #else
666         KASSERT(frent != NULL);
667 #endif
668         if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
669                 DPFPRINTF(("drop: too big: %d\n", off));
670                 pf_free_fragment(*frag);
671                 *frag = NULL;
672                 return (NULL);
673         }
674         next = LIST_NEXT(frent, fr_next);
675
676         /* Magic from ip_input */
677         ip = frent->fr_ip;
678         m = frent->fr_m;
679         m2 = m->m_next;
680         m->m_next = NULL;
681         m_cat(m, m2);
682 #ifdef __FreeBSD__
683         pool_put(&V_pf_frent_pl, frent);
684         V_pf_nfrents--;
685 #else
686         pool_put(&pf_frent_pl, frent);
687         pf_nfrents--;
688 #endif
689         for (frent = next; frent != NULL; frent = next) {
690                 next = LIST_NEXT(frent, fr_next);
691
692                 m2 = frent->fr_m;
693 #ifdef __FreeBSD__
694                 pool_put(&V_pf_frent_pl, frent);
695                 V_pf_nfrents--;
696 #else
697                 pool_put(&pf_frent_pl, frent);
698                 pf_nfrents--;
699 #endif
700 #ifdef __FreeBSD__
701                 m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags;
702                 m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data;
703 #endif
704                 m_cat(m, m2);
705         }
706
707 #ifdef __FreeBSD__
708         while (m->m_pkthdr.csum_data & 0xffff0000)
709                 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
710                     (m->m_pkthdr.csum_data >> 16);
711 #endif
712         ip->ip_src = (*frag)->fr_src;
713         ip->ip_dst = (*frag)->fr_dst;
714
715         /* Remove from fragment queue */
716         pf_remove_fragment(*frag);
717         *frag = NULL;
718
719         hlen = ip->ip_hl << 2;
720         ip->ip_len = htons(off + hlen);
721         m->m_len += hlen;
722         m->m_data -= hlen;
723
724         /* some debugging cruft by sklower, below, will go away soon */
725         /* XXX this should be done elsewhere */
726         if (m->m_flags & M_PKTHDR) {
727                 int plen = 0;
728                 for (m2 = m; m2; m2 = m2->m_next)
729                         plen += m2->m_len;
730                 m->m_pkthdr.len = plen;
731         }
732
733         DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
734         return (m);
735
736  drop_fragment:
737         /* Oops - fail safe - drop packet */
738 #ifdef __FreeBSD__
739         pool_put(&V_pf_frent_pl, frent);
740         V_pf_nfrents--;
741 #else
742         pool_put(&pf_frent_pl, frent);
743         pf_nfrents--;
744 #endif
745         m_freem(m);
746         return (NULL);
747 }
748
749 struct mbuf *
750 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
751     int drop, int *nomem)
752 {
753         struct mbuf             *m = *m0;
754         struct pf_frcache       *frp, *fra, *cur = NULL;
755         int                      ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
756         u_int16_t                off = ntohs(h->ip_off) << 3;
757         u_int16_t                max = ip_len + off;
758         int                      hosed = 0;
759
760 #ifdef __FreeBSD__
761         KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)),
762             ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
763 #else
764         KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
765 #endif
766
767         /* Create a new range queue for this packet */
768         if (*frag == NULL) {
769 #ifdef __FreeBSD__
770                 *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT);
771 #else
772                 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
773 #endif
774                 if (*frag == NULL) {
775                         pf_flush_fragments();
776 #ifdef __FreeBSD__
777                         *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT);
778 #else
779                         *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
780 #endif
781                         if (*frag == NULL)
782                                 goto no_mem;
783                 }
784
785                 /* Get an entry for the queue */
786 #ifdef __FreeBSD__
787                 cur = pool_get(&V_pf_cent_pl, PR_NOWAIT);
788                 if (cur == NULL) {
789                         pool_put(&V_pf_cache_pl, *frag);
790 #else
791                 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
792                 if (cur == NULL) {
793                         pool_put(&pf_cache_pl, *frag);
794 #endif
795                         *frag = NULL;
796                         goto no_mem;
797                 }
798 #ifdef __FreeBSD__
799                 V_pf_ncache++;
800 #else
801                 pf_ncache++;
802 #endif
803
804                 (*frag)->fr_flags = PFFRAG_NOBUFFER;
805                 (*frag)->fr_max = 0;
806                 (*frag)->fr_src = h->ip_src;
807                 (*frag)->fr_dst = h->ip_dst;
808                 (*frag)->fr_p = h->ip_p;
809                 (*frag)->fr_id = h->ip_id;
810                 (*frag)->fr_timeout = time_second;
811
812                 cur->fr_off = off;
813                 cur->fr_end = max;
814                 LIST_INIT(&(*frag)->fr_cache);
815                 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
816
817 #ifdef __FreeBSD__
818                 RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag);
819                 TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next);
820 #else
821                 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
822                 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
823 #endif
824
825                 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
826
827                 goto pass;
828         }
829
830         /*
831          * Find a fragment after the current one:
832          *  - off contains the real shifted offset.
833          */
834         frp = NULL;
835         LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
836                 if (fra->fr_off > off)
837                         break;
838                 frp = fra;
839         }
840
841 #ifdef __FreeBSD__
842         KASSERT((frp != NULL || fra != NULL),
843             ("!(frp != NULL || fra != NULL): %s", __FUNCTION__));
844 #else
845         KASSERT(frp != NULL || fra != NULL);
846 #endif
847
848         if (frp != NULL) {
849                 int     precut;
850
851                 precut = frp->fr_end - off;
852                 if (precut >= ip_len) {
853                         /* Fragment is entirely a duplicate */
854                         DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
855                             h->ip_id, frp->fr_off, frp->fr_end, off, max));
856                         goto drop_fragment;
857                 }
858                 if (precut == 0) {
859                         /* They are adjacent.  Fixup cache entry */
860                         DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
861                             h->ip_id, frp->fr_off, frp->fr_end, off, max));
862                         frp->fr_end = max;
863                 } else if (precut > 0) {
864                         /* The first part of this payload overlaps with a
865                          * fragment that has already been passed.
866                          * Need to trim off the first part of the payload.
867                          * But to do so easily, we need to create another
868                          * mbuf to throw the original header into.
869                          */
870
871                         DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
872                             h->ip_id, precut, frp->fr_off, frp->fr_end, off,
873                             max));
874
875                         off += precut;
876                         max -= precut;
877                         /* Update the previous frag to encompass this one */
878                         frp->fr_end = max;
879
880                         if (!drop) {
881                                 /* XXX Optimization opportunity
882                                  * This is a very heavy way to trim the payload.
883                                  * we could do it much faster by diddling mbuf
884                                  * internals but that would be even less legible
885                                  * than this mbuf magic.  For my next trick,
886                                  * I'll pull a rabbit out of my laptop.
887                                  */
888 #ifdef __FreeBSD__
889                                 *m0 = m_dup(m, M_DONTWAIT);
890 #else
891                                 *m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT);
892 #endif
893                                 if (*m0 == NULL)
894                                         goto no_mem;
895 #ifdef __FreeBSD__
896                                 /* From KAME Project : We have missed this! */
897                                 m_adj(*m0, (h->ip_hl << 2) -
898                                     (*m0)->m_pkthdr.len);
899
900                                 KASSERT(((*m0)->m_next == NULL), 
901                                     ("(*m0)->m_next != NULL: %s", 
902                                     __FUNCTION__));
903 #else
904                                 KASSERT((*m0)->m_next == NULL);
905 #endif
906                                 m_adj(m, precut + (h->ip_hl << 2));
907                                 m_cat(*m0, m);
908                                 m = *m0;
909                                 if (m->m_flags & M_PKTHDR) {
910                                         int plen = 0;
911                                         struct mbuf *t;
912                                         for (t = m; t; t = t->m_next)
913                                                 plen += t->m_len;
914                                         m->m_pkthdr.len = plen;
915                                 }
916
917
918                                 h = mtod(m, struct ip *);
919
920 #ifdef __FreeBSD__
921                                 KASSERT(((int)m->m_len ==
922                                     ntohs(h->ip_len) - precut),
923                                     ("m->m_len != ntohs(h->ip_len) - precut: %s",
924                                     __FUNCTION__));
925 #else
926                                 KASSERT((int)m->m_len ==
927                                     ntohs(h->ip_len) - precut);
928 #endif
929                                 h->ip_off = htons(ntohs(h->ip_off) +
930                                     (precut >> 3));
931                                 h->ip_len = htons(ntohs(h->ip_len) - precut);
932                         } else {
933                                 hosed++;
934                         }
935                 } else {
936                         /* There is a gap between fragments */
937
938                         DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
939                             h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
940                             max));
941
942 #ifdef __FreeBSD__
943                         cur = pool_get(&V_pf_cent_pl, PR_NOWAIT);
944 #else
945                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
946 #endif
947                         if (cur == NULL)
948                                 goto no_mem;
949 #ifdef __FreeBSD__
950                         V_pf_ncache++;
951 #else
952                         pf_ncache++;
953 #endif
954
955                         cur->fr_off = off;
956                         cur->fr_end = max;
957                         LIST_INSERT_AFTER(frp, cur, fr_next);
958                 }
959         }
960
961         if (fra != NULL) {
962                 int     aftercut;
963                 int     merge = 0;
964
965                 aftercut = max - fra->fr_off;
966                 if (aftercut == 0) {
967                         /* Adjacent fragments */
968                         DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
969                             h->ip_id, off, max, fra->fr_off, fra->fr_end));
970                         fra->fr_off = off;
971                         merge = 1;
972                 } else if (aftercut > 0) {
973                         /* Need to chop off the tail of this fragment */
974                         DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
975                             h->ip_id, aftercut, off, max, fra->fr_off,
976                             fra->fr_end));
977                         fra->fr_off = off;
978                         max -= aftercut;
979
980                         merge = 1;
981
982                         if (!drop) {
983                                 m_adj(m, -aftercut);
984                                 if (m->m_flags & M_PKTHDR) {
985                                         int plen = 0;
986                                         struct mbuf *t;
987                                         for (t = m; t; t = t->m_next)
988                                                 plen += t->m_len;
989                                         m->m_pkthdr.len = plen;
990                                 }
991                                 h = mtod(m, struct ip *);
992 #ifdef __FreeBSD__
993                                 KASSERT(((int)m->m_len == ntohs(h->ip_len) - aftercut),
994                                     ("m->m_len != ntohs(h->ip_len) - aftercut: %s",
995                                     __FUNCTION__));
996 #else
997                                 KASSERT((int)m->m_len ==
998                                     ntohs(h->ip_len) - aftercut);
999 #endif
1000                                 h->ip_len = htons(ntohs(h->ip_len) - aftercut);
1001                         } else {
1002                                 hosed++;
1003                         }
1004                 } else if (frp == NULL) {
1005                         /* There is a gap between fragments */
1006                         DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
1007                             h->ip_id, -aftercut, off, max, fra->fr_off,
1008                             fra->fr_end));
1009
1010 #ifdef __FreeBSD__
1011                         cur = pool_get(&V_pf_cent_pl, PR_NOWAIT);
1012 #else
1013                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1014 #endif
1015                         if (cur == NULL)
1016                                 goto no_mem;
1017 #ifdef __FreeBSD__
1018                         V_pf_ncache++;
1019 #else
1020                         pf_ncache++;
1021 #endif
1022
1023                         cur->fr_off = off;
1024                         cur->fr_end = max;
1025                         LIST_INSERT_BEFORE(fra, cur, fr_next);
1026                 }
1027
1028
1029                 /* Need to glue together two separate fragment descriptors */
1030                 if (merge) {
1031                         if (cur && fra->fr_off <= cur->fr_end) {
1032                                 /* Need to merge in a previous 'cur' */
1033                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
1034                                     "%d-%d) %d-%d (%d-%d)\n",
1035                                     h->ip_id, cur->fr_off, cur->fr_end, off,
1036                                     max, fra->fr_off, fra->fr_end));
1037                                 fra->fr_off = cur->fr_off;
1038                                 LIST_REMOVE(cur, fr_next);
1039 #ifdef __FreeBSD__
1040                                 pool_put(&V_pf_cent_pl, cur);
1041                                 V_pf_ncache--;
1042 #else
1043                                 pool_put(&pf_cent_pl, cur);
1044                                 pf_ncache--;
1045 #endif
1046                                 cur = NULL;
1047
1048                         } else if (frp && fra->fr_off <= frp->fr_end) {
1049                                 /* Need to merge in a modified 'frp' */
1050 #ifdef __FreeBSD__
1051                                 KASSERT((cur == NULL), ("cur != NULL: %s",
1052                                     __FUNCTION__));
1053 #else
1054                                 KASSERT(cur == NULL);
1055 #endif
1056                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
1057                                     "%d-%d) %d-%d (%d-%d)\n",
1058                                     h->ip_id, frp->fr_off, frp->fr_end, off,
1059                                     max, fra->fr_off, fra->fr_end));
1060                                 fra->fr_off = frp->fr_off;
1061                                 LIST_REMOVE(frp, fr_next);
1062 #ifdef __FreeBSD__
1063                                 pool_put(&V_pf_cent_pl, frp);
1064                                 V_pf_ncache--;
1065 #else
1066                                 pool_put(&pf_cent_pl, frp);
1067                                 pf_ncache--;
1068 #endif
1069                                 frp = NULL;
1070
1071                         }
1072                 }
1073         }
1074
1075         if (hosed) {
1076                 /*
1077                  * We must keep tracking the overall fragment even when
1078                  * we're going to drop it anyway so that we know when to
1079                  * free the overall descriptor.  Thus we drop the frag late.
1080                  */
1081                 goto drop_fragment;
1082         }
1083
1084
1085  pass:
1086         /* Update maximum data size */
1087         if ((*frag)->fr_max < max)
1088                 (*frag)->fr_max = max;
1089
1090         /* This is the last segment */
1091         if (!mff)
1092                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1093
1094         /* Check if we are completely reassembled */
1095         if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1096             LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
1097             LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
1098                 /* Remove from fragment queue */
1099                 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
1100                     (*frag)->fr_max));
1101                 pf_free_fragment(*frag);
1102                 *frag = NULL;
1103         }
1104
1105         return (m);
1106
1107  no_mem:
1108         *nomem = 1;
1109
1110         /* Still need to pay attention to !IP_MF */
1111         if (!mff && *frag != NULL)
1112                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1113
1114         m_freem(m);
1115         return (NULL);
1116
1117  drop_fragment:
1118
1119         /* Still need to pay attention to !IP_MF */
1120         if (!mff && *frag != NULL)
1121                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1122
1123         if (drop) {
1124                 /* This fragment has been deemed bad.  Don't reass */
1125                 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
1126                         DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
1127                             h->ip_id));
1128                 (*frag)->fr_flags |= PFFRAG_DROP;
1129         }
1130
1131         m_freem(m);
1132         return (NULL);
1133 }
1134
1135 #ifdef INET
1136 int
1137 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
1138     struct pf_pdesc *pd)
1139 {
1140         struct mbuf             *m = *m0;
1141         struct pf_rule          *r;
1142         struct pf_frent         *frent;
1143         struct pf_fragment      *frag = NULL;
1144         struct ip               *h = mtod(m, struct ip *);
1145         int                      mff = (ntohs(h->ip_off) & IP_MF);
1146         int                      hlen = h->ip_hl << 2;
1147         u_int16_t                fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1148         u_int16_t                max;
1149         int                      ip_len;
1150         int                      ip_off;
1151         int                      tag = -1;
1152
1153         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1154         while (r != NULL) {
1155                 r->evaluations++;
1156                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1157                         r = r->skip[PF_SKIP_IFP].ptr;
1158                 else if (r->direction && r->direction != dir)
1159                         r = r->skip[PF_SKIP_DIR].ptr;
1160                 else if (r->af && r->af != AF_INET)
1161                         r = r->skip[PF_SKIP_AF].ptr;
1162                 else if (r->proto && r->proto != h->ip_p)
1163                         r = r->skip[PF_SKIP_PROTO].ptr;
1164                 else if (PF_MISMATCHAW(&r->src.addr,
1165                     (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
1166                     r->src.neg, kif, M_GETFIB(m)))
1167                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1168                 else if (PF_MISMATCHAW(&r->dst.addr,
1169                     (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
1170                     r->dst.neg, NULL, M_GETFIB(m)))
1171                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1172 #ifdef __FreeBSD__
1173                 else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag))
1174 #else
1175                 else if (r->match_tag && !pf_match_tag(m, r, &tag))
1176 #endif
1177                         r = TAILQ_NEXT(r, entries);
1178                 else
1179                         break;
1180         }
1181
1182         if (r == NULL || r->action == PF_NOSCRUB)
1183                 return (PF_PASS);
1184         else {
1185                 r->packets[dir == PF_OUT]++;
1186                 r->bytes[dir == PF_OUT] += pd->tot_len;
1187         }
1188
1189         /* Check for illegal packets */
1190         if (hlen < (int)sizeof(struct ip))
1191                 goto drop;
1192
1193         if (hlen > ntohs(h->ip_len))
1194                 goto drop;
1195
1196         /* Clear IP_DF if the rule uses the no-df option */
1197         if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
1198                 u_int16_t ip_off = h->ip_off;
1199
1200                 h->ip_off &= htons(~IP_DF);
1201                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
1202         }
1203
1204         /* We will need other tests here */
1205         if (!fragoff && !mff)
1206                 goto no_fragment;
1207
1208         /* We're dealing with a fragment now. Don't allow fragments
1209          * with IP_DF to enter the cache. If the flag was cleared by
1210          * no-df above, fine. Otherwise drop it.
1211          */
1212         if (h->ip_off & htons(IP_DF)) {
1213                 DPFPRINTF(("IP_DF\n"));
1214                 goto bad;
1215         }
1216
1217         ip_len = ntohs(h->ip_len) - hlen;
1218         ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1219
1220         /* All fragments are 8 byte aligned */
1221         if (mff && (ip_len & 0x7)) {
1222                 DPFPRINTF(("mff and %d\n", ip_len));
1223                 goto bad;
1224         }
1225
1226         /* Respect maximum length */
1227         if (fragoff + ip_len > IP_MAXPACKET) {
1228                 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
1229                 goto bad;
1230         }
1231         max = fragoff + ip_len;
1232
1233         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
1234                 /* Fully buffer all of the fragments */
1235
1236 #ifdef __FreeBSD__
1237                 frag = pf_find_fragment(h, &V_pf_frag_tree);
1238 #else
1239                 frag = pf_find_fragment(h, &pf_frag_tree);
1240 #endif
1241
1242                 /* Check if we saw the last fragment already */
1243                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1244                     max > frag->fr_max)
1245                         goto bad;
1246
1247                 /* Get an entry for the fragment queue */
1248 #ifdef __FreeBSD__
1249                 frent = pool_get(&V_pf_frent_pl, PR_NOWAIT);
1250 #else
1251                 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
1252 #endif
1253                 if (frent == NULL) {
1254                         REASON_SET(reason, PFRES_MEMORY);
1255                         return (PF_DROP);
1256                 }
1257 #ifdef __FreeBSD__
1258                 V_pf_nfrents++;
1259 #else
1260                 pf_nfrents++;
1261 #endif
1262                 frent->fr_ip = h;
1263                 frent->fr_m = m;
1264
1265                 /* Might return a completely reassembled mbuf, or NULL */
1266                 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
1267                 *m0 = m = pf_reassemble(m0, &frag, frent, mff);
1268
1269                 if (m == NULL)
1270                         return (PF_DROP);
1271
1272                 /* use mtag from concatenated mbuf chain */
1273                 pd->pf_mtag = pf_find_mtag(m);
1274 #ifdef DIAGNOSTIC
1275                 if (pd->pf_mtag == NULL) {
1276                         printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
1277                         if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1278                                 m_freem(m);
1279                                 *m0 = NULL;
1280                                 goto no_mem;
1281                         }
1282                 }
1283 #endif
1284                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1285                         goto drop;
1286
1287                 h = mtod(m, struct ip *);
1288         } else {
1289                 /* non-buffering fragment cache (drops or masks overlaps) */
1290                 int     nomem = 0;
1291
1292 #ifdef __FreeBSD__
1293                 if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) {
1294 #else
1295                 if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) {
1296 #endif
1297                         /*
1298                          * Already passed the fragment cache in the
1299                          * input direction.  If we continued, it would
1300                          * appear to be a dup and would be dropped.
1301                          */
1302                         goto fragment_pass;
1303                 }
1304
1305 #ifdef __FreeBSD__
1306                 frag = pf_find_fragment(h, &V_pf_cache_tree);
1307 #else
1308                 frag = pf_find_fragment(h, &pf_cache_tree);
1309 #endif
1310
1311                 /* Check if we saw the last fragment already */
1312                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1313                     max > frag->fr_max) {
1314                         if (r->rule_flag & PFRULE_FRAGDROP)
1315                                 frag->fr_flags |= PFFRAG_DROP;
1316                         goto bad;
1317                 }
1318
1319                 *m0 = m = pf_fragcache(m0, h, &frag, mff,
1320                     (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
1321                 if (m == NULL) {
1322                         if (nomem)
1323                                 goto no_mem;
1324                         goto drop;
1325                 }
1326
1327                 /* use mtag from copied and trimmed mbuf chain */
1328                 pd->pf_mtag = pf_find_mtag(m);
1329 #ifdef DIAGNOSTIC
1330                 if (pd->pf_mtag == NULL) {
1331                         printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
1332                         if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1333                                 m_freem(m);
1334                                 *m0 = NULL;
1335                                 goto no_mem;
1336                         }
1337                 }
1338 #endif
1339                 if (dir == PF_IN)
1340 #ifdef __FreeBSD__
1341                         pd->pf_mtag->flags |= PF_TAG_FRAGCACHE;
1342 #else
1343                         m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE;
1344 #endif
1345
1346                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1347                         goto drop;
1348                 goto fragment_pass;
1349         }
1350
1351  no_fragment:
1352         /* At this point, only IP_DF is allowed in ip_off */
1353         if (h->ip_off & ~htons(IP_DF)) {
1354                 u_int16_t ip_off = h->ip_off;
1355
1356                 h->ip_off &= htons(IP_DF);
1357                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
1358         }
1359
1360         /* not missing a return here */
1361
1362  fragment_pass:
1363         pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos);
1364
1365         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1366                 pd->flags |= PFDESC_IP_REAS;
1367         return (PF_PASS);
1368
1369  no_mem:
1370         REASON_SET(reason, PFRES_MEMORY);
1371         if (r != NULL && r->log)
1372                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1373         return (PF_DROP);
1374
1375  drop:
1376         REASON_SET(reason, PFRES_NORM);
1377         if (r != NULL && r->log)
1378                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1379         return (PF_DROP);
1380
1381  bad:
1382         DPFPRINTF(("dropping bad fragment\n"));
1383
1384         /* Free associated fragments */
1385         if (frag != NULL)
1386                 pf_free_fragment(frag);
1387
1388         REASON_SET(reason, PFRES_FRAG);
1389         if (r != NULL && r->log)
1390                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1391
1392         return (PF_DROP);
1393 }
1394 #endif
1395
1396 #ifdef INET6
1397 int
1398 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1399     u_short *reason, struct pf_pdesc *pd)
1400 {
1401         struct mbuf             *m = *m0;
1402         struct pf_rule          *r;
1403         struct ip6_hdr          *h = mtod(m, struct ip6_hdr *);
1404         int                      off;
1405         struct ip6_ext           ext;
1406         struct ip6_opt           opt;
1407         struct ip6_opt_jumbo     jumbo;
1408         struct ip6_frag          frag;
1409         u_int32_t                jumbolen = 0, plen;
1410         u_int16_t                fragoff = 0;
1411         int                      optend;
1412         int                      ooff;
1413         u_int8_t                 proto;
1414         int                      terminal;
1415
1416         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1417         while (r != NULL) {
1418                 r->evaluations++;
1419                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1420                         r = r->skip[PF_SKIP_IFP].ptr;
1421                 else if (r->direction && r->direction != dir)
1422                         r = r->skip[PF_SKIP_DIR].ptr;
1423                 else if (r->af && r->af != AF_INET6)
1424                         r = r->skip[PF_SKIP_AF].ptr;
1425 #if 0 /* header chain! */
1426                 else if (r->proto && r->proto != h->ip6_nxt)
1427                         r = r->skip[PF_SKIP_PROTO].ptr;
1428 #endif
1429                 else if (PF_MISMATCHAW(&r->src.addr,
1430                     (struct pf_addr *)&h->ip6_src, AF_INET6,
1431                     r->src.neg, kif, M_GETFIB(m)))
1432                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1433                 else if (PF_MISMATCHAW(&r->dst.addr,
1434                     (struct pf_addr *)&h->ip6_dst, AF_INET6,
1435                     r->dst.neg, NULL, M_GETFIB(m)))
1436                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1437                 else
1438                         break;
1439         }
1440
1441         if (r == NULL || r->action == PF_NOSCRUB)
1442                 return (PF_PASS);
1443         else {
1444                 r->packets[dir == PF_OUT]++;
1445                 r->bytes[dir == PF_OUT] += pd->tot_len;
1446         }
1447
1448         /* Check for illegal packets */
1449         if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
1450                 goto drop;
1451
1452         off = sizeof(struct ip6_hdr);
1453         proto = h->ip6_nxt;
1454         terminal = 0;
1455         do {
1456                 switch (proto) {
1457                 case IPPROTO_FRAGMENT:
1458                         goto fragment;
1459                         break;
1460                 case IPPROTO_AH:
1461                 case IPPROTO_ROUTING:
1462                 case IPPROTO_DSTOPTS:
1463                         if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1464                             NULL, AF_INET6))
1465                                 goto shortpkt;
1466                         if (proto == IPPROTO_AH)
1467                                 off += (ext.ip6e_len + 2) * 4;
1468                         else
1469                                 off += (ext.ip6e_len + 1) * 8;
1470                         proto = ext.ip6e_nxt;
1471                         break;
1472                 case IPPROTO_HOPOPTS:
1473                         if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1474                             NULL, AF_INET6))
1475                                 goto shortpkt;
1476                         optend = off + (ext.ip6e_len + 1) * 8;
1477                         ooff = off + sizeof(ext);
1478                         do {
1479                                 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1480                                     sizeof(opt.ip6o_type), NULL, NULL,
1481                                     AF_INET6))
1482                                         goto shortpkt;
1483                                 if (opt.ip6o_type == IP6OPT_PAD1) {
1484                                         ooff++;
1485                                         continue;
1486                                 }
1487                                 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
1488                                     NULL, NULL, AF_INET6))
1489                                         goto shortpkt;
1490                                 if (ooff + sizeof(opt) + opt.ip6o_len > optend)
1491                                         goto drop;
1492                                 switch (opt.ip6o_type) {
1493                                 case IP6OPT_JUMBO:
1494                                         if (h->ip6_plen != 0)
1495                                                 goto drop;
1496                                         if (!pf_pull_hdr(m, ooff, &jumbo,
1497                                             sizeof(jumbo), NULL, NULL,
1498                                             AF_INET6))
1499                                                 goto shortpkt;
1500                                         memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1501                                             sizeof(jumbolen));
1502                                         jumbolen = ntohl(jumbolen);
1503                                         if (jumbolen <= IPV6_MAXPACKET)
1504                                                 goto drop;
1505                                         if (sizeof(struct ip6_hdr) + jumbolen !=
1506                                             m->m_pkthdr.len)
1507                                                 goto drop;
1508                                         break;
1509                                 default:
1510                                         break;
1511                                 }
1512                                 ooff += sizeof(opt) + opt.ip6o_len;
1513                         } while (ooff < optend);
1514
1515                         off = optend;
1516                         proto = ext.ip6e_nxt;
1517                         break;
1518                 default:
1519                         terminal = 1;
1520                         break;
1521                 }
1522         } while (!terminal);
1523
1524         /* jumbo payload option must be present, or plen > 0 */
1525         if (ntohs(h->ip6_plen) == 0)
1526                 plen = jumbolen;
1527         else
1528                 plen = ntohs(h->ip6_plen);
1529         if (plen == 0)
1530                 goto drop;
1531         if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
1532                 goto shortpkt;
1533
1534         pf_scrub_ip6(&m, r->min_ttl);
1535
1536         return (PF_PASS);
1537
1538  fragment:
1539         if (ntohs(h->ip6_plen) == 0 || jumbolen)
1540                 goto drop;
1541         plen = ntohs(h->ip6_plen);
1542
1543         if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
1544                 goto shortpkt;
1545         fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1546         if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
1547                 goto badfrag;
1548
1549         /* do something about it */
1550         /* remember to set pd->flags |= PFDESC_IP_REAS */
1551         return (PF_PASS);
1552
1553  shortpkt:
1554         REASON_SET(reason, PFRES_SHORT);
1555         if (r != NULL && r->log)
1556                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1557         return (PF_DROP);
1558
1559  drop:
1560         REASON_SET(reason, PFRES_NORM);
1561         if (r != NULL && r->log)
1562                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1563         return (PF_DROP);
1564
1565  badfrag:
1566         REASON_SET(reason, PFRES_FRAG);
1567         if (r != NULL && r->log)
1568                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1569         return (PF_DROP);
1570 }
1571 #endif /* INET6 */
1572
1573 int
1574 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
1575     int off, void *h, struct pf_pdesc *pd)
1576 {
1577         struct pf_rule  *r, *rm = NULL;
1578         struct tcphdr   *th = pd->hdr.tcp;
1579         int              rewrite = 0;
1580         u_short          reason;
1581         u_int8_t         flags;
1582         sa_family_t      af = pd->af;
1583
1584         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1585         while (r != NULL) {
1586                 r->evaluations++;
1587                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1588                         r = r->skip[PF_SKIP_IFP].ptr;
1589                 else if (r->direction && r->direction != dir)
1590                         r = r->skip[PF_SKIP_DIR].ptr;
1591                 else if (r->af && r->af != af)
1592                         r = r->skip[PF_SKIP_AF].ptr;
1593                 else if (r->proto && r->proto != pd->proto)
1594                         r = r->skip[PF_SKIP_PROTO].ptr;
1595                 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
1596                     r->src.neg, kif, M_GETFIB(m)))
1597                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1598                 else if (r->src.port_op && !pf_match_port(r->src.port_op,
1599                             r->src.port[0], r->src.port[1], th->th_sport))
1600                         r = r->skip[PF_SKIP_SRC_PORT].ptr;
1601                 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
1602                     r->dst.neg, NULL, M_GETFIB(m)))
1603                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1604                 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1605                             r->dst.port[0], r->dst.port[1], th->th_dport))
1606                         r = r->skip[PF_SKIP_DST_PORT].ptr;
1607                 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
1608                             pf_osfp_fingerprint(pd, m, off, th),
1609                             r->os_fingerprint))
1610                         r = TAILQ_NEXT(r, entries);
1611                 else {
1612                         rm = r;
1613                         break;
1614                 }
1615         }
1616
1617         if (rm == NULL || rm->action == PF_NOSCRUB)
1618                 return (PF_PASS);
1619         else {
1620                 r->packets[dir == PF_OUT]++;
1621                 r->bytes[dir == PF_OUT] += pd->tot_len;
1622         }
1623
1624         if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
1625                 pd->flags |= PFDESC_TCP_NORM;
1626
1627         flags = th->th_flags;
1628         if (flags & TH_SYN) {
1629                 /* Illegal packet */
1630                 if (flags & TH_RST)
1631                         goto tcp_drop;
1632
1633                 if (flags & TH_FIN)
1634                         flags &= ~TH_FIN;
1635         } else {
1636                 /* Illegal packet */
1637                 if (!(flags & (TH_ACK|TH_RST)))
1638                         goto tcp_drop;
1639         }
1640
1641         if (!(flags & TH_ACK)) {
1642                 /* These flags are only valid if ACK is set */
1643                 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
1644                         goto tcp_drop;
1645         }
1646
1647         /* Check for illegal header length */
1648         if (th->th_off < (sizeof(struct tcphdr) >> 2))
1649                 goto tcp_drop;
1650
1651         /* If flags changed, or reserved data set, then adjust */
1652         if (flags != th->th_flags || th->th_x2 != 0) {
1653                 u_int16_t       ov, nv;
1654
1655                 ov = *(u_int16_t *)(&th->th_ack + 1);
1656                 th->th_flags = flags;
1657                 th->th_x2 = 0;
1658                 nv = *(u_int16_t *)(&th->th_ack + 1);
1659
1660                 th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, ov, nv, 0);
1661                 rewrite = 1;
1662         }
1663
1664         /* Remove urgent pointer, if TH_URG is not set */
1665         if (!(flags & TH_URG) && th->th_urp) {
1666                 th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, th->th_urp,
1667                     0, 0);
1668                 th->th_urp = 0;
1669                 rewrite = 1;
1670         }
1671
1672         /* Process options */
1673         if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af))
1674                 rewrite = 1;
1675
1676         /* copy back packet headers if we sanitized */
1677         if (rewrite)
1678 #ifdef __FreeBSD__
1679                 m_copyback(m, off, sizeof(*th), (caddr_t)th);
1680 #else
1681                 m_copyback(m, off, sizeof(*th), th);
1682 #endif
1683
1684         return (PF_PASS);
1685
1686  tcp_drop:
1687         REASON_SET(&reason, PFRES_NORM);
1688         if (rm != NULL && r->log)
1689                 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
1690         return (PF_DROP);
1691 }
1692
1693 int
1694 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
1695     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
1696 {
1697         u_int32_t tsval, tsecr;
1698         u_int8_t hdr[60];
1699         u_int8_t *opt;
1700
1701 #ifdef __FreeBSD__
1702         KASSERT((src->scrub == NULL), 
1703             ("pf_normalize_tcp_init: src->scrub != NULL"));
1704
1705         src->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT);
1706 #else
1707         KASSERT(src->scrub == NULL);
1708
1709         src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
1710 #endif
1711         if (src->scrub == NULL)
1712                 return (1);
1713         bzero(src->scrub, sizeof(*src->scrub));
1714
1715         switch (pd->af) {
1716 #ifdef INET
1717         case AF_INET: {
1718                 struct ip *h = mtod(m, struct ip *);
1719                 src->scrub->pfss_ttl = h->ip_ttl;
1720                 break;
1721         }
1722 #endif /* INET */
1723 #ifdef INET6
1724         case AF_INET6: {
1725                 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1726                 src->scrub->pfss_ttl = h->ip6_hlim;
1727                 break;
1728         }
1729 #endif /* INET6 */
1730         }
1731
1732
1733         /*
1734          * All normalizations below are only begun if we see the start of
1735          * the connections.  They must all set an enabled bit in pfss_flags
1736          */
1737         if ((th->th_flags & TH_SYN) == 0)
1738                 return (0);
1739
1740
1741         if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
1742             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1743                 /* Diddle with TCP options */
1744                 int hlen;
1745                 opt = hdr + sizeof(struct tcphdr);
1746                 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1747                 while (hlen >= TCPOLEN_TIMESTAMP) {
1748                         switch (*opt) {
1749                         case TCPOPT_EOL:        /* FALLTHROUGH */
1750                         case TCPOPT_NOP:
1751                                 opt++;
1752                                 hlen--;
1753                                 break;
1754                         case TCPOPT_TIMESTAMP:
1755                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1756                                         src->scrub->pfss_flags |=
1757                                             PFSS_TIMESTAMP;
1758                                         src->scrub->pfss_ts_mod =
1759                                             htonl(arc4random());
1760
1761                                         /* note PFSS_PAWS not set yet */
1762                                         memcpy(&tsval, &opt[2],
1763                                             sizeof(u_int32_t));
1764                                         memcpy(&tsecr, &opt[6],
1765                                             sizeof(u_int32_t));
1766                                         src->scrub->pfss_tsval0 = ntohl(tsval);
1767                                         src->scrub->pfss_tsval = ntohl(tsval);
1768                                         src->scrub->pfss_tsecr = ntohl(tsecr);
1769                                         getmicrouptime(&src->scrub->pfss_last);
1770                                 }
1771                                 /* FALLTHROUGH */
1772                         default:
1773                                 hlen -= MAX(opt[1], 2);
1774                                 opt += MAX(opt[1], 2);
1775                                 break;
1776                         }
1777                 }
1778         }
1779
1780         return (0);
1781 }
1782
1783 void
1784 pf_normalize_tcp_cleanup(struct pf_state *state)
1785 {
1786 #ifdef __FreeBSD__
1787         if (state->src.scrub)
1788                 pool_put(&V_pf_state_scrub_pl, state->src.scrub);
1789         if (state->dst.scrub)
1790                 pool_put(&V_pf_state_scrub_pl, state->dst.scrub);
1791 #else
1792         if (state->src.scrub)
1793                 pool_put(&pf_state_scrub_pl, state->src.scrub);
1794         if (state->dst.scrub)
1795                 pool_put(&pf_state_scrub_pl, state->dst.scrub);
1796 #endif
1797
1798         /* Someday... flush the TCP segment reassembly descriptors. */
1799 }
1800
1801 int
1802 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
1803     u_short *reason, struct tcphdr *th, struct pf_state *state,
1804     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
1805 {
1806         struct timeval uptime;
1807         u_int32_t tsval, tsecr;
1808         u_int tsval_from_last;
1809         u_int8_t hdr[60];
1810         u_int8_t *opt;
1811         int copyback = 0;
1812         int got_ts = 0;
1813
1814 #ifdef __FreeBSD__
1815         KASSERT((src->scrub || dst->scrub), 
1816             ("pf_normalize_tcp_statefull: src->scrub && dst->scrub!"));
1817 #else
1818         KASSERT(src->scrub || dst->scrub);
1819 #endif
1820
1821         /*
1822          * Enforce the minimum TTL seen for this connection.  Negate a common
1823          * technique to evade an intrusion detection system and confuse
1824          * firewall state code.
1825          */
1826         switch (pd->af) {
1827 #ifdef INET
1828         case AF_INET: {
1829                 if (src->scrub) {
1830                         struct ip *h = mtod(m, struct ip *);
1831                         if (h->ip_ttl > src->scrub->pfss_ttl)
1832                                 src->scrub->pfss_ttl = h->ip_ttl;
1833                         h->ip_ttl = src->scrub->pfss_ttl;
1834                 }
1835                 break;
1836         }
1837 #endif /* INET */
1838 #ifdef INET6
1839         case AF_INET6: {
1840                 if (src->scrub) {
1841                         struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1842                         if (h->ip6_hlim > src->scrub->pfss_ttl)
1843                                 src->scrub->pfss_ttl = h->ip6_hlim;
1844                         h->ip6_hlim = src->scrub->pfss_ttl;
1845                 }
1846                 break;
1847         }
1848 #endif /* INET6 */
1849         }
1850
1851         if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
1852             ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
1853             (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
1854             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1855                 /* Diddle with TCP options */
1856                 int hlen;
1857                 opt = hdr + sizeof(struct tcphdr);
1858                 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1859                 while (hlen >= TCPOLEN_TIMESTAMP) {
1860                         switch (*opt) {
1861                         case TCPOPT_EOL:        /* FALLTHROUGH */
1862                         case TCPOPT_NOP:
1863                                 opt++;
1864                                 hlen--;
1865                                 break;
1866                         case TCPOPT_TIMESTAMP:
1867                                 /* Modulate the timestamps.  Can be used for
1868                                  * NAT detection, OS uptime determination or
1869                                  * reboot detection.
1870                                  */
1871
1872                                 if (got_ts) {
1873                                         /* Huh?  Multiple timestamps!? */
1874 #ifdef __FreeBSD__
1875                                         if (V_pf_status.debug >= PF_DEBUG_MISC) {
1876 #else
1877                                         if (pf_status.debug >= PF_DEBUG_MISC) {
1878 #endif
1879                                                 DPFPRINTF(("multiple TS??"));
1880                                                 pf_print_state(state);
1881                                                 printf("\n");
1882                                         }
1883                                         REASON_SET(reason, PFRES_TS);
1884                                         return (PF_DROP);
1885                                 }
1886                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1887                                         memcpy(&tsval, &opt[2],
1888                                             sizeof(u_int32_t));
1889                                         if (tsval && src->scrub &&
1890                                             (src->scrub->pfss_flags &
1891                                             PFSS_TIMESTAMP)) {
1892                                                 tsval = ntohl(tsval);
1893                                                 pf_change_proto_a(m, &opt[2],
1894                                                     &th->th_sum,
1895                                                     htonl(tsval +
1896                                                     src->scrub->pfss_ts_mod),
1897                                                     0);
1898                                                 copyback = 1;
1899                                         }
1900
1901                                         /* Modulate TS reply iff valid (!0) */
1902                                         memcpy(&tsecr, &opt[6],
1903                                             sizeof(u_int32_t));
1904                                         if (tsecr && dst->scrub &&
1905                                             (dst->scrub->pfss_flags &
1906                                             PFSS_TIMESTAMP)) {
1907                                                 tsecr = ntohl(tsecr)
1908                                                     - dst->scrub->pfss_ts_mod;
1909                                                 pf_change_proto_a(m, &opt[6],
1910                                                     &th->th_sum, htonl(tsecr),
1911                                                     0);
1912                                                 copyback = 1;
1913                                         }
1914                                         got_ts = 1;
1915                                 }
1916                                 /* FALLTHROUGH */
1917                         default:
1918                                 hlen -= MAX(opt[1], 2);
1919                                 opt += MAX(opt[1], 2);
1920                                 break;
1921                         }
1922                 }
1923                 if (copyback) {
1924                         /* Copyback the options, caller copys back header */
1925                         *writeback = 1;
1926                         m_copyback(m, off + sizeof(struct tcphdr),
1927                             (th->th_off << 2) - sizeof(struct tcphdr), hdr +
1928                             sizeof(struct tcphdr));
1929                 }
1930         }
1931
1932
1933         /*
1934          * Must invalidate PAWS checks on connections idle for too long.
1935          * The fastest allowed timestamp clock is 1ms.  That turns out to
1936          * be about 24 days before it wraps.  XXX Right now our lowerbound
1937          * TS echo check only works for the first 12 days of a connection
1938          * when the TS has exhausted half its 32bit space
1939          */
1940 #define TS_MAX_IDLE     (24*24*60*60)
1941 #define TS_MAX_CONN     (12*24*60*60)   /* XXX remove when better tsecr check */
1942
1943         getmicrouptime(&uptime);
1944         if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
1945             (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
1946             time_second - state->creation > TS_MAX_CONN))  {
1947 #ifdef __FreeBSD__
1948                 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1949 #else
1950                 if (pf_status.debug >= PF_DEBUG_MISC) {
1951 #endif
1952                         DPFPRINTF(("src idled out of PAWS\n"));
1953                         pf_print_state(state);
1954                         printf("\n");
1955                 }
1956                 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
1957                     | PFSS_PAWS_IDLED;
1958         }
1959         if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
1960             uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
1961 #ifdef __FreeBSD__
1962                 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1963 #else
1964                 if (pf_status.debug >= PF_DEBUG_MISC) {
1965 #endif
1966                         DPFPRINTF(("dst idled out of PAWS\n"));
1967                         pf_print_state(state);
1968                         printf("\n");
1969                 }
1970                 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
1971                     | PFSS_PAWS_IDLED;
1972         }
1973
1974         if (got_ts && src->scrub && dst->scrub &&
1975             (src->scrub->pfss_flags & PFSS_PAWS) &&
1976             (dst->scrub->pfss_flags & PFSS_PAWS)) {
1977                 /* Validate that the timestamps are "in-window".
1978                  * RFC1323 describes TCP Timestamp options that allow
1979                  * measurement of RTT (round trip time) and PAWS
1980                  * (protection against wrapped sequence numbers).  PAWS
1981                  * gives us a set of rules for rejecting packets on
1982                  * long fat pipes (packets that were somehow delayed 
1983                  * in transit longer than the time it took to send the
1984                  * full TCP sequence space of 4Gb).  We can use these
1985                  * rules and infer a few others that will let us treat
1986                  * the 32bit timestamp and the 32bit echoed timestamp
1987                  * as sequence numbers to prevent a blind attacker from
1988                  * inserting packets into a connection.
1989                  *
1990                  * RFC1323 tells us:
1991                  *  - The timestamp on this packet must be greater than
1992                  *    or equal to the last value echoed by the other
1993                  *    endpoint.  The RFC says those will be discarded
1994                  *    since it is a dup that has already been acked.
1995                  *    This gives us a lowerbound on the timestamp.
1996                  *        timestamp >= other last echoed timestamp
1997                  *  - The timestamp will be less than or equal to
1998                  *    the last timestamp plus the time between the
1999                  *    last packet and now.  The RFC defines the max
2000                  *    clock rate as 1ms.  We will allow clocks to be
2001                  *    up to 10% fast and will allow a total difference
2002                  *    or 30 seconds due to a route change.  And this
2003                  *    gives us an upperbound on the timestamp.
2004                  *        timestamp <= last timestamp + max ticks
2005                  *    We have to be careful here.  Windows will send an
2006                  *    initial timestamp of zero and then initialize it
2007                  *    to a random value after the 3whs; presumably to
2008                  *    avoid a DoS by having to call an expensive RNG
2009                  *    during a SYN flood.  Proof MS has at least one
2010                  *    good security geek.
2011                  *
2012                  *  - The TCP timestamp option must also echo the other
2013                  *    endpoints timestamp.  The timestamp echoed is the
2014                  *    one carried on the earliest unacknowledged segment
2015                  *    on the left edge of the sequence window.  The RFC
2016                  *    states that the host will reject any echoed
2017                  *    timestamps that were larger than any ever sent.
2018                  *    This gives us an upperbound on the TS echo.
2019                  *        tescr <= largest_tsval
2020                  *  - The lowerbound on the TS echo is a little more
2021                  *    tricky to determine.  The other endpoint's echoed
2022                  *    values will not decrease.  But there may be
2023                  *    network conditions that re-order packets and
2024                  *    cause our view of them to decrease.  For now the
2025                  *    only lowerbound we can safely determine is that
2026                  *    the TS echo will never be less than the original
2027                  *    TS.  XXX There is probably a better lowerbound.
2028                  *    Remove TS_MAX_CONN with better lowerbound check.
2029                  *        tescr >= other original TS
2030                  *
2031                  * It is also important to note that the fastest
2032                  * timestamp clock of 1ms will wrap its 32bit space in
2033                  * 24 days.  So we just disable TS checking after 24
2034                  * days of idle time.  We actually must use a 12d
2035                  * connection limit until we can come up with a better
2036                  * lowerbound to the TS echo check.
2037                  */
2038                 struct timeval delta_ts;
2039                 int ts_fudge;
2040
2041
2042                 /*
2043                  * PFTM_TS_DIFF is how many seconds of leeway to allow
2044                  * a host's timestamp.  This can happen if the previous
2045                  * packet got delayed in transit for much longer than
2046                  * this packet.
2047                  */
2048                 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
2049 #ifdef __FreeBSD__
2050                         ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF];
2051 #else
2052                         ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
2053 #endif
2054
2055
2056                 /* Calculate max ticks since the last timestamp */
2057 #define TS_MAXFREQ      1100            /* RFC max TS freq of 1Khz + 10% skew */
2058 #define TS_MICROSECS    1000000         /* microseconds per second */
2059 #ifdef __FreeBSD__
2060 #ifndef timersub
2061 #define timersub(tvp, uvp, vvp)                                         \
2062         do {                                                            \
2063                 (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;          \
2064                 (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;       \
2065                 if ((vvp)->tv_usec < 0) {                               \
2066                         (vvp)->tv_sec--;                                \
2067                         (vvp)->tv_usec += 1000000;                      \
2068                 }                                                       \
2069         } while (0)
2070 #endif
2071 #endif
2072                 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
2073                 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
2074                 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
2075
2076
2077                 if ((src->state >= TCPS_ESTABLISHED &&
2078                     dst->state >= TCPS_ESTABLISHED) &&
2079                     (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
2080                     SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
2081                     (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
2082                     SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
2083                         /* Bad RFC1323 implementation or an insertion attack.
2084                          *
2085                          * - Solaris 2.6 and 2.7 are known to send another ACK
2086                          *   after the FIN,FIN|ACK,ACK closing that carries
2087                          *   an old timestamp.
2088                          */
2089
2090                         DPFPRINTF(("Timestamp failed %c%c%c%c\n",
2091                             SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
2092                             SEQ_GT(tsval, src->scrub->pfss_tsval +
2093                             tsval_from_last) ? '1' : ' ',
2094                             SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
2095                             SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
2096 #ifdef __FreeBSD__
2097                         DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
2098                             "idle: %jus %lums\n",
2099                             tsval, tsecr, tsval_from_last,
2100                             (uintmax_t)delta_ts.tv_sec,
2101                             delta_ts.tv_usec / 1000));
2102                         DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
2103                             src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
2104                         DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u"
2105                             "\n", dst->scrub->pfss_tsval,
2106                             dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
2107 #else
2108                         DPFPRINTF((" tsval: %lu  tsecr: %lu  +ticks: %lu  "
2109                             "idle: %lus %lums\n",
2110                             tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
2111                             delta_ts.tv_usec / 1000));
2112                         DPFPRINTF((" src->tsval: %lu  tsecr: %lu\n",
2113                             src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
2114                         DPFPRINTF((" dst->tsval: %lu  tsecr: %lu  tsval0: %lu"
2115                             "\n", dst->scrub->pfss_tsval,
2116                             dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
2117 #endif
2118 #ifdef __FreeBSD__
2119                         if (V_pf_status.debug >= PF_DEBUG_MISC) {
2120 #else
2121                         if (pf_status.debug >= PF_DEBUG_MISC) {
2122 #endif
2123                                 pf_print_state(state);
2124                                 pf_print_flags(th->th_flags);
2125                                 printf("\n");
2126                         }
2127                         REASON_SET(reason, PFRES_TS);
2128                         return (PF_DROP);
2129                 }
2130
2131                 /* XXX I'd really like to require tsecr but it's optional */
2132
2133         } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
2134             ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
2135             || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
2136             src->scrub && dst->scrub &&
2137             (src->scrub->pfss_flags & PFSS_PAWS) &&
2138             (dst->scrub->pfss_flags & PFSS_PAWS)) {
2139                 /* Didn't send a timestamp.  Timestamps aren't really useful
2140                  * when:
2141                  *  - connection opening or closing (often not even sent).
2142                  *    but we must not let an attacker to put a FIN on a
2143                  *    data packet to sneak it through our ESTABLISHED check.
2144                  *  - on a TCP reset.  RFC suggests not even looking at TS.
2145                  *  - on an empty ACK.  The TS will not be echoed so it will
2146                  *    probably not help keep the RTT calculation in sync and
2147                  *    there isn't as much danger when the sequence numbers
2148                  *    got wrapped.  So some stacks don't include TS on empty
2149                  *    ACKs :-(
2150                  *
2151                  * To minimize the disruption to mostly RFC1323 conformant
2152                  * stacks, we will only require timestamps on data packets.
2153                  *
2154                  * And what do ya know, we cannot require timestamps on data
2155                  * packets.  There appear to be devices that do legitimate
2156                  * TCP connection hijacking.  There are HTTP devices that allow
2157                  * a 3whs (with timestamps) and then buffer the HTTP request.
2158                  * If the intermediate device has the HTTP response cache, it
2159                  * will spoof the response but not bother timestamping its
2160                  * packets.  So we can look for the presence of a timestamp in
2161                  * the first data packet and if there, require it in all future
2162                  * packets.
2163                  */
2164
2165                 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
2166                         /*
2167                          * Hey!  Someone tried to sneak a packet in.  Or the
2168                          * stack changed its RFC1323 behavior?!?!
2169                          */
2170 #ifdef __FreeBSD__
2171                         if (V_pf_status.debug >= PF_DEBUG_MISC) {
2172 #else
2173                         if (pf_status.debug >= PF_DEBUG_MISC) {
2174 #endif
2175                                 DPFPRINTF(("Did not receive expected RFC1323 "
2176                                     "timestamp\n"));
2177                                 pf_print_state(state);
2178                                 pf_print_flags(th->th_flags);
2179                                 printf("\n");
2180                         }
2181                         REASON_SET(reason, PFRES_TS);
2182                         return (PF_DROP);
2183                 }
2184         }
2185
2186
2187         /*
2188          * We will note if a host sends his data packets with or without
2189          * timestamps.  And require all data packets to contain a timestamp
2190          * if the first does.  PAWS implicitly requires that all data packets be
2191          * timestamped.  But I think there are middle-man devices that hijack
2192          * TCP streams immediately after the 3whs and don't timestamp their
2193          * packets (seen in a WWW accelerator or cache).
2194          */
2195         if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
2196             (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
2197                 if (got_ts)
2198                         src->scrub->pfss_flags |= PFSS_DATA_TS;
2199                 else {
2200                         src->scrub->pfss_flags |= PFSS_DATA_NOTS;
2201 #ifdef __FreeBSD__
2202                         if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
2203 #else
2204                         if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
2205 #endif
2206                             (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
2207                                 /* Don't warn if other host rejected RFC1323 */
2208                                 DPFPRINTF(("Broken RFC1323 stack did not "
2209                                     "timestamp data packet. Disabled PAWS "
2210                                     "security.\n"));
2211                                 pf_print_state(state);
2212                                 pf_print_flags(th->th_flags);
2213                                 printf("\n");
2214                         }
2215                 }
2216         }
2217
2218
2219         /*
2220          * Update PAWS values
2221          */
2222         if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
2223             (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
2224                 getmicrouptime(&src->scrub->pfss_last);
2225                 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
2226                     (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2227                         src->scrub->pfss_tsval = tsval;
2228
2229                 if (tsecr) {
2230                         if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
2231                             (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2232                                 src->scrub->pfss_tsecr = tsecr;
2233
2234                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
2235                             (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
2236                             src->scrub->pfss_tsval0 == 0)) {
2237                                 /* tsval0 MUST be the lowest timestamp */
2238                                 src->scrub->pfss_tsval0 = tsval;
2239                         }
2240
2241                         /* Only fully initialized after a TS gets echoed */
2242                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
2243                                 src->scrub->pfss_flags |= PFSS_PAWS;
2244                 }
2245         }
2246
2247         /* I have a dream....  TCP segment reassembly.... */
2248         return (0);
2249 }
2250
2251 int
2252 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
2253     int off, sa_family_t af)
2254 {
2255         u_int16_t       *mss;
2256         int              thoff;
2257         int              opt, cnt, optlen = 0;
2258         int              rewrite = 0;
2259 #ifdef __FreeBSD__
2260         u_char           opts[TCP_MAXOLEN];
2261 #else
2262         u_char           opts[MAX_TCPOPTLEN];
2263 #endif
2264         u_char          *optp = opts;
2265
2266         thoff = th->th_off << 2;
2267         cnt = thoff - sizeof(struct tcphdr);
2268
2269         if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt,
2270             NULL, NULL, af))
2271                 return (rewrite);
2272
2273         for (; cnt > 0; cnt -= optlen, optp += optlen) {
2274                 opt = optp[0];
2275                 if (opt == TCPOPT_EOL)
2276                         break;
2277                 if (opt == TCPOPT_NOP)
2278                         optlen = 1;
2279                 else {
2280                         if (cnt < 2)
2281                                 break;
2282                         optlen = optp[1];
2283                         if (optlen < 2 || optlen > cnt)
2284                                 break;
2285                 }
2286                 switch (opt) {
2287                 case TCPOPT_MAXSEG:
2288                         mss = (u_int16_t *)(optp + 2);
2289                         if ((ntohs(*mss)) > r->max_mss) {
2290                                 th->th_sum = pf_proto_cksum_fixup(m,
2291                                     th->th_sum, *mss, htons(r->max_mss), 0);
2292                                 *mss = htons(r->max_mss);
2293                                 rewrite = 1;
2294                         }
2295                         break;
2296                 default:
2297                         break;
2298                 }
2299         }
2300
2301         if (rewrite)
2302                 m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts);
2303
2304         return (rewrite);
2305 }
2306
2307 void
2308 pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos)
2309 {
2310         struct mbuf             *m = *m0;
2311         struct ip               *h = mtod(m, struct ip *);
2312
2313         /* Clear IP_DF if no-df was requested */
2314         if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
2315                 u_int16_t ip_off = h->ip_off;
2316
2317                 h->ip_off &= htons(~IP_DF);
2318                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
2319         }
2320
2321         /* Enforce a minimum ttl, may cause endless packet loops */
2322         if (min_ttl && h->ip_ttl < min_ttl) {
2323                 u_int16_t ip_ttl = h->ip_ttl;
2324
2325                 h->ip_ttl = min_ttl;
2326                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
2327         }
2328
2329         /* Enforce tos */
2330         if (flags & PFRULE_SET_TOS) {
2331                 u_int16_t       ov, nv;
2332
2333                 ov = *(u_int16_t *)h;
2334                 h->ip_tos = tos;
2335                 nv = *(u_int16_t *)h;
2336
2337                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0);
2338         }
2339
2340         /* random-id, but not for fragments */
2341         if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) {
2342                 u_int16_t ip_id = h->ip_id;
2343
2344                 h->ip_id = ip_randomid();
2345                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
2346         }
2347 }
2348
2349 #ifdef INET6
2350 void
2351 pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl)
2352 {
2353         struct mbuf             *m = *m0;
2354         struct ip6_hdr          *h = mtod(m, struct ip6_hdr *);
2355
2356         /* Enforce a minimum ttl, may cause endless packet loops */
2357         if (min_ttl && h->ip6_hlim < min_ttl)
2358                 h->ip6_hlim = min_ttl;
2359 }
2360 #endif