]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/contrib/pf/net/pf_norm.c
fix a bug in the fragment cache (used for 'scrub fragment crop/drop-ovl',
[FreeBSD/FreeBSD.git] / sys / contrib / pf / net / pf_norm.c
1 /*      $FreeBSD$       */
2 /*      $OpenBSD: pf_norm.c,v 1.97 2004/09/21 16:59:12 aaron Exp $ */
3
4 /*
5  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #ifdef __FreeBSD__
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include "opt_pf.h"
33 #ifdef DEV_PFLOG
34 #define NPFLOG DEV_PFLOG
35 #else
36 #define NPFLOG 0
37 #endif
38 #else
39 #include "pflog.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/filio.h>
46 #include <sys/fcntl.h>
47 #include <sys/socket.h>
48 #include <sys/kernel.h>
49 #include <sys/time.h>
50 #ifndef __FreeBSD__
51 #include <sys/pool.h>
52
53 #include <dev/rndvar.h>
54 #endif
55 #include <net/if.h>
56 #include <net/if_types.h>
57 #include <net/bpf.h>
58 #include <net/route.h>
59 #include <net/if_pflog.h>
60
61 #include <netinet/in.h>
62 #include <netinet/in_var.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/ip.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/tcp.h>
67 #include <netinet/tcp_seq.h>
68 #include <netinet/udp.h>
69 #include <netinet/ip_icmp.h>
70
71 #ifdef INET6
72 #include <netinet/ip6.h>
73 #endif /* INET6 */
74
75 #include <net/pfvar.h>
76
77 #ifndef __FreeBSD__
78 #include <inttypes.h>
79
80 struct pf_frent {
81         LIST_ENTRY(pf_frent) fr_next;
82         struct ip *fr_ip;
83         struct mbuf *fr_m;
84 };
85
86 struct pf_frcache {
87         LIST_ENTRY(pf_frcache) fr_next;
88         uint16_t        fr_off;
89         uint16_t        fr_end;
90 };
91 #endif
92
93 #define PFFRAG_SEENLAST 0x0001          /* Seen the last fragment for this */
94 #define PFFRAG_NOBUFFER 0x0002          /* Non-buffering fragment cache */
95 #define PFFRAG_DROP     0x0004          /* Drop all fragments */
96 #define BUFFER_FRAGMENTS(fr)    (!((fr)->fr_flags & PFFRAG_NOBUFFER))
97
98 #ifndef __FreeBSD__
99 struct pf_fragment {
100         RB_ENTRY(pf_fragment) fr_entry;
101         TAILQ_ENTRY(pf_fragment) frag_next;
102         struct in_addr  fr_src;
103         struct in_addr  fr_dst;
104         u_int8_t        fr_p;           /* protocol of this fragment */
105         u_int8_t        fr_flags;       /* status flags */
106         u_int16_t       fr_id;          /* fragment id for reassemble */
107         u_int16_t       fr_max;         /* fragment data max */
108         u_int32_t       fr_timeout;
109 #define fr_queue        fr_u.fru_queue
110 #define fr_cache        fr_u.fru_cache
111         union {
112                 LIST_HEAD(pf_fragq, pf_frent) fru_queue;        /* buffering */
113                 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache;     /* non-buf */
114         } fr_u;
115 };
116 #endif
117
118 TAILQ_HEAD(pf_fragqueue, pf_fragment)   pf_fragqueue;
119 TAILQ_HEAD(pf_cachequeue, pf_fragment)  pf_cachequeue;
120
121 #ifndef __FreeBSD__
122 static __inline int      pf_frag_compare(struct pf_fragment *,
123                             struct pf_fragment *);
124 #else
125 static int       pf_frag_compare(struct pf_fragment *,
126                             struct pf_fragment *);
127 #endif
128 RB_HEAD(pf_frag_tree, pf_fragment)      pf_frag_tree, pf_cache_tree;
129 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
130 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
131
132 /* Private prototypes */
133 void                     pf_ip2key(struct pf_fragment *, struct ip *);
134 void                     pf_remove_fragment(struct pf_fragment *);
135 void                     pf_flush_fragments(void);
136 void                     pf_free_fragment(struct pf_fragment *);
137 struct pf_fragment      *pf_find_fragment(struct ip *, struct pf_frag_tree *);
138 struct mbuf             *pf_reassemble(struct mbuf **, struct pf_fragment **,
139                             struct pf_frent *, int);
140 struct mbuf             *pf_fragcache(struct mbuf **, struct ip*,
141                             struct pf_fragment **, int, int, int *);
142 int                      pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
143                             struct tcphdr *, int);
144
145 #define DPFPRINTF(x) do {                               \
146         if (pf_status.debug >= PF_DEBUG_MISC) {         \
147                 printf("%s: ", __func__);               \
148                 printf x ;                              \
149         }                                               \
150 } while(0)
151
152 /* Globals */
153 #ifdef __FreeBSD__
154 uma_zone_t               pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
155 uma_zone_t               pf_state_scrub_pl;
156 #else
157 struct pool              pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
158 struct pool              pf_state_scrub_pl;
159 #endif
160 int                      pf_nfrents, pf_ncache;
161
162 void
163 pf_normalize_init(void)
164 {
165 #ifdef __FreeBSD__
166         /*
167          * XXX
168          * No high water mark support(It's hint not hard limit).
169          * uma_zone_set_max(pf_frag_pl, PFFRAG_FRAG_HIWAT);
170          */
171         uma_zone_set_max(pf_frent_pl, PFFRAG_FRENT_HIWAT);
172         uma_zone_set_max(pf_cache_pl, PFFRAG_FRCACHE_HIWAT);
173         uma_zone_set_max(pf_cent_pl, PFFRAG_FRCENT_HIWAT);
174 #else
175         pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
176             NULL);
177         pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
178             NULL);
179         pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
180             "pffrcache", NULL);
181         pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
182             NULL);
183         pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
184             "pfstscr", NULL);
185
186         pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
187         pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
188         pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
189         pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
190 #endif
191
192         TAILQ_INIT(&pf_fragqueue);
193         TAILQ_INIT(&pf_cachequeue);
194 }
195
196 #ifdef __FreeBSD__
197 static int
198 #else
199 static __inline int
200 #endif
201 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
202 {
203         int     diff;
204
205         if ((diff = a->fr_id - b->fr_id))
206                 return (diff);
207         else if ((diff = a->fr_p - b->fr_p))
208                 return (diff);
209         else if (a->fr_src.s_addr < b->fr_src.s_addr)
210                 return (-1);
211         else if (a->fr_src.s_addr > b->fr_src.s_addr)
212                 return (1);
213         else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
214                 return (-1);
215         else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
216                 return (1);
217         return (0);
218 }
219
220 void
221 pf_purge_expired_fragments(void)
222 {
223         struct pf_fragment      *frag;
224         u_int32_t                expire = time_second -
225                                     pf_default_rule.timeout[PFTM_FRAG];
226
227         while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
228 #ifdef __FreeBSD__
229                 KASSERT((BUFFER_FRAGMENTS(frag)),
230                         ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__));
231 #else
232                 KASSERT(BUFFER_FRAGMENTS(frag));
233 #endif
234                 if (frag->fr_timeout > expire)
235                         break;
236
237                 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
238                 pf_free_fragment(frag);
239         }
240
241         while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
242 #ifdef __FreeBSD__
243                 KASSERT((!BUFFER_FRAGMENTS(frag)),
244                         ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__));
245 #else
246                 KASSERT(!BUFFER_FRAGMENTS(frag));
247 #endif
248                 if (frag->fr_timeout > expire)
249                         break;
250
251                 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
252                 pf_free_fragment(frag);
253 #ifdef __FreeBSD__
254                 KASSERT((TAILQ_EMPTY(&pf_cachequeue) ||
255                     TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag),
256                     ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s",
257                     __FUNCTION__));
258 #else
259                 KASSERT(TAILQ_EMPTY(&pf_cachequeue) ||
260                     TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
261 #endif
262         }
263 }
264
265 /*
266  * Try to flush old fragments to make space for new ones
267  */
268
269 void
270 pf_flush_fragments(void)
271 {
272         struct pf_fragment      *frag;
273         int                      goal;
274
275         goal = pf_nfrents * 9 / 10;
276         DPFPRINTF(("trying to free > %d frents\n",
277             pf_nfrents - goal));
278         while (goal < pf_nfrents) {
279                 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
280                 if (frag == NULL)
281                         break;
282                 pf_free_fragment(frag);
283         }
284
285
286         goal = pf_ncache * 9 / 10;
287         DPFPRINTF(("trying to free > %d cache entries\n",
288             pf_ncache - goal));
289         while (goal < pf_ncache) {
290                 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
291                 if (frag == NULL)
292                         break;
293                 pf_free_fragment(frag);
294         }
295 }
296
297 /* Frees the fragments and all associated entries */
298
299 void
300 pf_free_fragment(struct pf_fragment *frag)
301 {
302         struct pf_frent         *frent;
303         struct pf_frcache       *frcache;
304
305         /* Free all fragments */
306         if (BUFFER_FRAGMENTS(frag)) {
307                 for (frent = LIST_FIRST(&frag->fr_queue); frent;
308                     frent = LIST_FIRST(&frag->fr_queue)) {
309                         LIST_REMOVE(frent, fr_next);
310
311                         m_freem(frent->fr_m);
312                         pool_put(&pf_frent_pl, frent);
313                         pf_nfrents--;
314                 }
315         } else {
316                 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
317                     frcache = LIST_FIRST(&frag->fr_cache)) {
318                         LIST_REMOVE(frcache, fr_next);
319
320 #ifdef __FreeBSD__
321                         KASSERT((LIST_EMPTY(&frag->fr_cache) ||
322                             LIST_FIRST(&frag->fr_cache)->fr_off >
323                             frcache->fr_end),
324                             ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >"
325                              " frcache->fr_end): %s", __FUNCTION__));
326 #else
327                         KASSERT(LIST_EMPTY(&frag->fr_cache) ||
328                             LIST_FIRST(&frag->fr_cache)->fr_off >
329                             frcache->fr_end);
330 #endif
331
332                         pool_put(&pf_cent_pl, frcache);
333                         pf_ncache--;
334                 }
335         }
336
337         pf_remove_fragment(frag);
338 }
339
340 void
341 pf_ip2key(struct pf_fragment *key, struct ip *ip)
342 {
343         key->fr_p = ip->ip_p;
344         key->fr_id = ip->ip_id;
345         key->fr_src.s_addr = ip->ip_src.s_addr;
346         key->fr_dst.s_addr = ip->ip_dst.s_addr;
347 }
348
349 struct pf_fragment *
350 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
351 {
352         struct pf_fragment       key;
353         struct pf_fragment      *frag;
354
355         pf_ip2key(&key, ip);
356
357         frag = RB_FIND(pf_frag_tree, tree, &key);
358         if (frag != NULL) {
359                 /* XXX Are we sure we want to update the timeout? */
360                 frag->fr_timeout = time_second;
361                 if (BUFFER_FRAGMENTS(frag)) {
362                         TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
363                         TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
364                 } else {
365                         TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
366                         TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
367                 }
368         }
369
370         return (frag);
371 }
372
373 /* Removes a fragment from the fragment queue and frees the fragment */
374
375 void
376 pf_remove_fragment(struct pf_fragment *frag)
377 {
378         if (BUFFER_FRAGMENTS(frag)) {
379                 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
380                 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
381                 pool_put(&pf_frag_pl, frag);
382         } else {
383                 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
384                 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
385                 pool_put(&pf_cache_pl, frag);
386         }
387 }
388
389 #define FR_IP_OFF(fr)   ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
390 struct mbuf *
391 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
392     struct pf_frent *frent, int mff)
393 {
394         struct mbuf     *m = *m0, *m2;
395         struct pf_frent *frea, *next;
396         struct pf_frent *frep = NULL;
397         struct ip       *ip = frent->fr_ip;
398         int              hlen = ip->ip_hl << 2;
399         u_int16_t        off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
400         u_int16_t        ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
401         u_int16_t        max = ip_len + off;
402
403 #ifdef __FreeBSD__
404         KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)),
405             ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
406 #else
407         KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag));
408 #endif
409
410         /* Strip off ip header */
411         m->m_data += hlen;
412         m->m_len -= hlen;
413
414         /* Create a new reassembly queue for this packet */
415         if (*frag == NULL) {
416                 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
417                 if (*frag == NULL) {
418                         pf_flush_fragments();
419                         *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
420                         if (*frag == NULL)
421                                 goto drop_fragment;
422                 }
423
424                 (*frag)->fr_flags = 0;
425                 (*frag)->fr_max = 0;
426                 (*frag)->fr_src = frent->fr_ip->ip_src;
427                 (*frag)->fr_dst = frent->fr_ip->ip_dst;
428                 (*frag)->fr_p = frent->fr_ip->ip_p;
429                 (*frag)->fr_id = frent->fr_ip->ip_id;
430                 (*frag)->fr_timeout = time_second;
431                 LIST_INIT(&(*frag)->fr_queue);
432
433                 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
434                 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
435
436                 /* We do not have a previous fragment */
437                 frep = NULL;
438                 goto insert;
439         }
440
441         /*
442          * Find a fragment after the current one:
443          *  - off contains the real shifted offset.
444          */
445         LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
446                 if (FR_IP_OFF(frea) > off)
447                         break;
448                 frep = frea;
449         }
450
451 #ifdef __FreeBSD__
452         KASSERT((frep != NULL || frea != NULL),
453             ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));;
454 #else
455         KASSERT(frep != NULL || frea != NULL);
456 #endif
457
458         if (frep != NULL &&
459             FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
460             4 > off)
461         {
462                 u_int16_t       precut;
463
464                 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
465                     frep->fr_ip->ip_hl * 4 - off;
466                 if (precut >= ip_len)
467                         goto drop_fragment;
468                 m_adj(frent->fr_m, precut);
469                 DPFPRINTF(("overlap -%d\n", precut));
470                 /* Enforce 8 byte boundaries */
471                 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
472                 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
473                 ip_len -= precut;
474                 ip->ip_len = htons(ip_len);
475         }
476
477         for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
478             frea = next)
479         {
480                 u_int16_t       aftercut;
481
482                 aftercut = ip_len + off - FR_IP_OFF(frea);
483                 DPFPRINTF(("adjust overlap %d\n", aftercut));
484                 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
485                     * 4)
486                 {
487                         frea->fr_ip->ip_len =
488                             htons(ntohs(frea->fr_ip->ip_len) - aftercut);
489                         frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
490                             (aftercut >> 3));
491                         m_adj(frea->fr_m, aftercut);
492                         break;
493                 }
494
495                 /* This fragment is completely overlapped, loose it */
496                 next = LIST_NEXT(frea, fr_next);
497                 m_freem(frea->fr_m);
498                 LIST_REMOVE(frea, fr_next);
499                 pool_put(&pf_frent_pl, frea);
500                 pf_nfrents--;
501         }
502
503  insert:
504         /* Update maximum data size */
505         if ((*frag)->fr_max < max)
506                 (*frag)->fr_max = max;
507         /* This is the last segment */
508         if (!mff)
509                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
510
511         if (frep == NULL)
512                 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
513         else
514                 LIST_INSERT_AFTER(frep, frent, fr_next);
515
516         /* Check if we are completely reassembled */
517         if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
518                 return (NULL);
519
520         /* Check if we have all the data */
521         off = 0;
522         for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
523                 next = LIST_NEXT(frep, fr_next);
524
525                 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
526                 if (off < (*frag)->fr_max &&
527                     (next == NULL || FR_IP_OFF(next) != off))
528                 {
529                         DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
530                             off, next == NULL ? -1 : FR_IP_OFF(next),
531                             (*frag)->fr_max));
532                         return (NULL);
533                 }
534         }
535         DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
536         if (off < (*frag)->fr_max)
537                 return (NULL);
538
539         /* We have all the data */
540         frent = LIST_FIRST(&(*frag)->fr_queue);
541 #ifdef __FreeBSD__
542         KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__));
543 #else
544         KASSERT(frent != NULL);
545 #endif
546         if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
547                 DPFPRINTF(("drop: too big: %d\n", off));
548                 pf_free_fragment(*frag);
549                 *frag = NULL;
550                 return (NULL);
551         }
552         next = LIST_NEXT(frent, fr_next);
553
554         /* Magic from ip_input */
555         ip = frent->fr_ip;
556         m = frent->fr_m;
557         m2 = m->m_next;
558         m->m_next = NULL;
559         m_cat(m, m2);
560         pool_put(&pf_frent_pl, frent);
561         pf_nfrents--;
562         for (frent = next; frent != NULL; frent = next) {
563                 next = LIST_NEXT(frent, fr_next);
564
565                 m2 = frent->fr_m;
566                 pool_put(&pf_frent_pl, frent);
567                 pf_nfrents--;
568                 m_cat(m, m2);
569         }
570
571         ip->ip_src = (*frag)->fr_src;
572         ip->ip_dst = (*frag)->fr_dst;
573
574         /* Remove from fragment queue */
575         pf_remove_fragment(*frag);
576         *frag = NULL;
577
578         hlen = ip->ip_hl << 2;
579         ip->ip_len = htons(off + hlen);
580         m->m_len += hlen;
581         m->m_data -= hlen;
582
583         /* some debugging cruft by sklower, below, will go away soon */
584         /* XXX this should be done elsewhere */
585         if (m->m_flags & M_PKTHDR) {
586                 int plen = 0;
587                 for (m2 = m; m2; m2 = m2->m_next)
588                         plen += m2->m_len;
589                 m->m_pkthdr.len = plen;
590         }
591
592         DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
593         return (m);
594
595  drop_fragment:
596         /* Oops - fail safe - drop packet */
597         pool_put(&pf_frent_pl, frent);
598         pf_nfrents--;
599         m_freem(m);
600         return (NULL);
601 }
602
603 struct mbuf *
604 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
605     int drop, int *nomem)
606 {
607         struct mbuf             *m = *m0;
608         struct pf_frcache       *frp, *fra, *cur = NULL;
609         int                      ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
610         u_int16_t                off = ntohs(h->ip_off) << 3;
611         u_int16_t                max = ip_len + off;
612         int                      hosed = 0;
613
614 #ifdef __FreeBSD__
615         KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)),
616             ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
617 #else
618         KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
619 #endif
620
621         /* Create a new range queue for this packet */
622         if (*frag == NULL) {
623                 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
624                 if (*frag == NULL) {
625                         pf_flush_fragments();
626                         *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
627                         if (*frag == NULL)
628                                 goto no_mem;
629                 }
630
631                 /* Get an entry for the queue */
632                 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
633                 if (cur == NULL) {
634                         pool_put(&pf_cache_pl, *frag);
635                         *frag = NULL;
636                         goto no_mem;
637                 }
638                 pf_ncache++;
639
640                 (*frag)->fr_flags = PFFRAG_NOBUFFER;
641                 (*frag)->fr_max = 0;
642                 (*frag)->fr_src = h->ip_src;
643                 (*frag)->fr_dst = h->ip_dst;
644                 (*frag)->fr_p = h->ip_p;
645                 (*frag)->fr_id = h->ip_id;
646                 (*frag)->fr_timeout = time_second;
647
648                 cur->fr_off = off;
649                 cur->fr_end = max;
650                 LIST_INIT(&(*frag)->fr_cache);
651                 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
652
653                 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
654                 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
655
656                 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
657
658                 goto pass;
659         }
660
661         /*
662          * Find a fragment after the current one:
663          *  - off contains the real shifted offset.
664          */
665         frp = NULL;
666         LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
667                 if (fra->fr_off > off)
668                         break;
669                 frp = fra;
670         }
671
672 #ifdef __FreeBSD__
673         KASSERT((frp != NULL || fra != NULL),
674             ("!(frp != NULL || fra != NULL): %s", __FUNCTION__));
675 #else
676         KASSERT(frp != NULL || fra != NULL);
677 #endif
678
679         if (frp != NULL) {
680                 int     precut;
681
682                 precut = frp->fr_end - off;
683                 if (precut >= ip_len) {
684                         /* Fragment is entirely a duplicate */
685                         DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
686                             h->ip_id, frp->fr_off, frp->fr_end, off, max));
687                         goto drop_fragment;
688                 }
689                 if (precut == 0) {
690                         /* They are adjacent.  Fixup cache entry */
691                         DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
692                             h->ip_id, frp->fr_off, frp->fr_end, off, max));
693                         frp->fr_end = max;
694                 } else if (precut > 0) {
695                         /* The first part of this payload overlaps with a
696                          * fragment that has already been passed.
697                          * Need to trim off the first part of the payload.
698                          * But to do so easily, we need to create another
699                          * mbuf to throw the original header into.
700                          */
701
702                         DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
703                             h->ip_id, precut, frp->fr_off, frp->fr_end, off,
704                             max));
705
706                         off += precut;
707                         max -= precut;
708                         /* Update the previous frag to encompass this one */
709                         frp->fr_end = max;
710
711                         if (!drop) {
712                                 /* XXX Optimization opportunity
713                                  * This is a very heavy way to trim the payload.
714                                  * we could do it much faster by diddling mbuf
715                                  * internals but that would be even less legible
716                                  * than this mbuf magic.  For my next trick,
717                                  * I'll pull a rabbit out of my laptop.
718                                  */
719 #ifdef __FreeBSD__
720                                 *m0 = m_dup(m, M_DONTWAIT);
721 #else
722                                 *m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT);
723 #endif
724                                 if (*m0 == NULL)
725                                         goto no_mem;
726 #ifdef __FreeBSD__
727                                 /* From KAME Project : We have missed this! */
728                                 m_adj(*m0, (h->ip_hl << 2) -
729                                     (*m0)->m_pkthdr.len);
730
731                                 KASSERT(((*m0)->m_next == NULL), 
732                                     ("(*m0)->m_next != NULL: %s", 
733                                     __FUNCTION__));
734 #else
735                                 KASSERT((*m0)->m_next == NULL);
736 #endif
737                                 m_adj(m, precut + (h->ip_hl << 2));
738                                 m_cat(*m0, m);
739                                 m = *m0;
740                                 if (m->m_flags & M_PKTHDR) {
741                                         int plen = 0;
742                                         struct mbuf *t;
743                                         for (t = m; t; t = t->m_next)
744                                                 plen += t->m_len;
745                                         m->m_pkthdr.len = plen;
746                                 }
747
748
749                                 h = mtod(m, struct ip *);
750
751 #ifdef __FreeBSD__
752                                 KASSERT(((int)m->m_len ==
753                                     ntohs(h->ip_len) - precut),
754                                     ("m->m_len != ntohs(h->ip_len) - precut: %s",
755                                     __FUNCTION__));
756 #else
757                                 KASSERT((int)m->m_len ==
758                                     ntohs(h->ip_len) - precut);
759 #endif
760                                 h->ip_off = htons(ntohs(h->ip_off) +
761                                     (precut >> 3));
762                                 h->ip_len = htons(ntohs(h->ip_len) - precut);
763                         } else {
764                                 hosed++;
765                         }
766                 } else {
767                         /* There is a gap between fragments */
768
769                         DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
770                             h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
771                             max));
772
773                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
774                         if (cur == NULL)
775                                 goto no_mem;
776                         pf_ncache++;
777
778                         cur->fr_off = off;
779                         cur->fr_end = max;
780                         LIST_INSERT_AFTER(frp, cur, fr_next);
781                 }
782         }
783
784         if (fra != NULL) {
785                 int     aftercut;
786                 int     merge = 0;
787
788                 aftercut = max - fra->fr_off;
789                 if (aftercut == 0) {
790                         /* Adjacent fragments */
791                         DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
792                             h->ip_id, off, max, fra->fr_off, fra->fr_end));
793                         fra->fr_off = off;
794                         merge = 1;
795                 } else if (aftercut > 0) {
796                         /* Need to chop off the tail of this fragment */
797                         DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
798                             h->ip_id, aftercut, off, max, fra->fr_off,
799                             fra->fr_end));
800                         fra->fr_off = off;
801                         max -= aftercut;
802
803                         merge = 1;
804
805                         if (!drop) {
806                                 m_adj(m, -aftercut);
807                                 if (m->m_flags & M_PKTHDR) {
808                                         int plen = 0;
809                                         struct mbuf *t;
810                                         for (t = m; t; t = t->m_next)
811                                                 plen += t->m_len;
812                                         m->m_pkthdr.len = plen;
813                                 }
814                                 h = mtod(m, struct ip *);
815 #ifdef __FreeBSD__
816                                 KASSERT(((int)m->m_len == ntohs(h->ip_len) - aftercut),
817                                     ("m->m_len != ntohs(h->ip_len) - aftercut: %s",
818                                     __FUNCTION__));
819 #else
820                                 KASSERT((int)m->m_len ==
821                                     ntohs(h->ip_len) - aftercut);
822 #endif
823                                 h->ip_len = htons(ntohs(h->ip_len) - aftercut);
824                         } else {
825                                 hosed++;
826                         }
827                 } else if (frp == NULL) {
828                         /* There is a gap between fragments */
829                         DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
830                             h->ip_id, -aftercut, off, max, fra->fr_off,
831                             fra->fr_end));
832
833                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
834                         if (cur == NULL)
835                                 goto no_mem;
836                         pf_ncache++;
837
838                         cur->fr_off = off;
839                         cur->fr_end = max;
840                         LIST_INSERT_BEFORE(fra, cur, fr_next);
841                 }
842
843
844                 /* Need to glue together two separate fragment descriptors */
845                 if (merge) {
846                         if (cur && fra->fr_off <= cur->fr_end) {
847                                 /* Need to merge in a previous 'cur' */
848                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
849                                     "%d-%d) %d-%d (%d-%d)\n",
850                                     h->ip_id, cur->fr_off, cur->fr_end, off,
851                                     max, fra->fr_off, fra->fr_end));
852                                 fra->fr_off = cur->fr_off;
853                                 LIST_REMOVE(cur, fr_next);
854                                 pool_put(&pf_cent_pl, cur);
855                                 pf_ncache--;
856                                 cur = NULL;
857
858                         } else if (frp && fra->fr_off <= frp->fr_end) {
859                                 /* Need to merge in a modified 'frp' */
860 #ifdef __FreeBSD__
861                                 KASSERT((cur == NULL), ("cur != NULL: %s",
862                                     __FUNCTION__));
863 #else
864                                 KASSERT(cur == NULL);
865 #endif
866                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
867                                     "%d-%d) %d-%d (%d-%d)\n",
868                                     h->ip_id, frp->fr_off, frp->fr_end, off,
869                                     max, fra->fr_off, fra->fr_end));
870                                 fra->fr_off = frp->fr_off;
871                                 LIST_REMOVE(frp, fr_next);
872                                 pool_put(&pf_cent_pl, frp);
873                                 pf_ncache--;
874                                 frp = NULL;
875
876                         }
877                 }
878         }
879
880         if (hosed) {
881                 /*
882                  * We must keep tracking the overall fragment even when
883                  * we're going to drop it anyway so that we know when to
884                  * free the overall descriptor.  Thus we drop the frag late.
885                  */
886                 goto drop_fragment;
887         }
888
889
890  pass:
891         /* Update maximum data size */
892         if ((*frag)->fr_max < max)
893                 (*frag)->fr_max = max;
894
895         /* This is the last segment */
896         if (!mff)
897                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
898
899         /* Check if we are completely reassembled */
900         if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
901             LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
902             LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
903                 /* Remove from fragment queue */
904                 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
905                     (*frag)->fr_max));
906                 pf_free_fragment(*frag);
907                 *frag = NULL;
908         }
909
910         return (m);
911
912  no_mem:
913         *nomem = 1;
914
915         /* Still need to pay attention to !IP_MF */
916         if (!mff && *frag != NULL)
917                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
918
919         m_freem(m);
920         return (NULL);
921
922  drop_fragment:
923
924         /* Still need to pay attention to !IP_MF */
925         if (!mff && *frag != NULL)
926                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
927
928         if (drop) {
929                 /* This fragment has been deemed bad.  Don't reass */
930                 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
931                         DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
932                             h->ip_id));
933                 (*frag)->fr_flags |= PFFRAG_DROP;
934         }
935
936         m_freem(m);
937         return (NULL);
938 }
939
940 int
941 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
942     struct pf_pdesc *pd)
943 {
944         struct mbuf             *m = *m0;
945         struct pf_rule          *r;
946         struct pf_frent         *frent;
947         struct pf_fragment      *frag = NULL;
948         struct ip               *h = mtod(m, struct ip *);
949         int                      mff = (ntohs(h->ip_off) & IP_MF);
950         int                      hlen = h->ip_hl << 2;
951         u_int16_t                fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
952         u_int16_t                max;
953         int                      ip_len;
954         int                      ip_off;
955
956         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
957         while (r != NULL) {
958                 r->evaluations++;
959                 if (r->kif != NULL &&
960                     (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
961                         r = r->skip[PF_SKIP_IFP].ptr;
962                 else if (r->direction && r->direction != dir)
963                         r = r->skip[PF_SKIP_DIR].ptr;
964                 else if (r->af && r->af != AF_INET)
965                         r = r->skip[PF_SKIP_AF].ptr;
966                 else if (r->proto && r->proto != h->ip_p)
967                         r = r->skip[PF_SKIP_PROTO].ptr;
968                 else if (PF_MISMATCHAW(&r->src.addr,
969                     (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.neg))
970                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
971                 else if (PF_MISMATCHAW(&r->dst.addr,
972                     (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.neg))
973                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
974                 else
975                         break;
976         }
977
978         if (r == NULL)
979                 return (PF_PASS);
980         else
981                 r->packets++;
982
983         /* Check for illegal packets */
984         if (hlen < (int)sizeof(struct ip))
985                 goto drop;
986
987         if (hlen > ntohs(h->ip_len))
988                 goto drop;
989
990         /* Clear IP_DF if the rule uses the no-df option */
991         if (r->rule_flag & PFRULE_NODF)
992                 h->ip_off &= htons(~IP_DF);
993
994         /* We will need other tests here */
995         if (!fragoff && !mff)
996                 goto no_fragment;
997
998         /* We're dealing with a fragment now. Don't allow fragments
999          * with IP_DF to enter the cache. If the flag was cleared by
1000          * no-df above, fine. Otherwise drop it.
1001          */
1002         if (h->ip_off & htons(IP_DF)) {
1003                 DPFPRINTF(("IP_DF\n"));
1004                 goto bad;
1005         }
1006
1007         ip_len = ntohs(h->ip_len) - hlen;
1008         ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1009
1010         /* All fragments are 8 byte aligned */
1011         if (mff && (ip_len & 0x7)) {
1012                 DPFPRINTF(("mff and %d\n", ip_len));
1013                 goto bad;
1014         }
1015
1016         /* Respect maximum length */
1017         if (fragoff + ip_len > IP_MAXPACKET) {
1018                 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
1019                 goto bad;
1020         }
1021         max = fragoff + ip_len;
1022
1023         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
1024                 /* Fully buffer all of the fragments */
1025
1026                 frag = pf_find_fragment(h, &pf_frag_tree);
1027
1028                 /* Check if we saw the last fragment already */
1029                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1030                     max > frag->fr_max)
1031                         goto bad;
1032
1033                 /* Get an entry for the fragment queue */
1034                 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
1035                 if (frent == NULL) {
1036                         REASON_SET(reason, PFRES_MEMORY);
1037                         return (PF_DROP);
1038                 }
1039                 pf_nfrents++;
1040                 frent->fr_ip = h;
1041                 frent->fr_m = m;
1042
1043                 /* Might return a completely reassembled mbuf, or NULL */
1044                 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
1045                 *m0 = m = pf_reassemble(m0, &frag, frent, mff);
1046
1047                 if (m == NULL)
1048                         return (PF_DROP);
1049
1050                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1051                         goto drop;
1052
1053                 h = mtod(m, struct ip *);
1054         } else {
1055                 /* non-buffering fragment cache (drops or masks overlaps) */
1056                 int     nomem = 0;
1057
1058                 if (dir == PF_OUT) {
1059                         if (m_tag_find(m, PACKET_TAG_PF_FRAGCACHE, NULL) !=
1060                             NULL) {
1061                                 /* Already passed the fragment cache in the
1062                                  * input direction.  If we continued, it would
1063                                  * appear to be a dup and would be dropped.
1064                                  */
1065                                 goto fragment_pass;
1066                         }
1067                 }
1068
1069                 frag = pf_find_fragment(h, &pf_cache_tree);
1070
1071                 /* Check if we saw the last fragment already */
1072                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1073                     max > frag->fr_max) {
1074                         if (r->rule_flag & PFRULE_FRAGDROP)
1075                                 frag->fr_flags |= PFFRAG_DROP;
1076                         goto bad;
1077                 }
1078
1079                 *m0 = m = pf_fragcache(m0, h, &frag, mff,
1080                     (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
1081                 if (m == NULL) {
1082                         if (nomem)
1083                                 goto no_mem;
1084                         goto drop;
1085                 }
1086
1087                 if (dir == PF_IN) {
1088                         struct m_tag    *mtag;
1089
1090                         mtag = m_tag_get(PACKET_TAG_PF_FRAGCACHE, 0, M_NOWAIT);
1091                         if (mtag == NULL)
1092                                 goto no_mem;
1093                         m_tag_prepend(m, mtag);
1094                 }
1095                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1096                         goto drop;
1097                 goto fragment_pass;
1098         }
1099
1100  no_fragment:
1101         /* At this point, only IP_DF is allowed in ip_off */
1102         h->ip_off &= htons(IP_DF);
1103
1104         /* Enforce a minimum ttl, may cause endless packet loops */
1105         if (r->min_ttl && h->ip_ttl < r->min_ttl)
1106                 h->ip_ttl = r->min_ttl;
1107
1108         if (r->rule_flag & PFRULE_RANDOMID) {
1109                 u_int16_t ip_id = h->ip_id;
1110
1111                 h->ip_id = ip_randomid();
1112                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
1113         }
1114         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1115                 pd->flags |= PFDESC_IP_REAS;
1116
1117         return (PF_PASS);
1118
1119  fragment_pass:
1120         /* Enforce a minimum ttl, may cause endless packet loops */
1121         if (r->min_ttl && h->ip_ttl < r->min_ttl)
1122                 h->ip_ttl = r->min_ttl;
1123         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1124                 pd->flags |= PFDESC_IP_REAS;
1125         return (PF_PASS);
1126
1127  no_mem:
1128         REASON_SET(reason, PFRES_MEMORY);
1129         if (r != NULL && r->log)
1130                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL);
1131         return (PF_DROP);
1132
1133  drop:
1134         REASON_SET(reason, PFRES_NORM);
1135         if (r != NULL && r->log)
1136                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL);
1137         return (PF_DROP);
1138
1139  bad:
1140         DPFPRINTF(("dropping bad fragment\n"));
1141
1142         /* Free associated fragments */
1143         if (frag != NULL)
1144                 pf_free_fragment(frag);
1145
1146         REASON_SET(reason, PFRES_FRAG);
1147         if (r != NULL && r->log)
1148                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL);
1149
1150         return (PF_DROP);
1151 }
1152
1153 #ifdef INET6
1154 int
1155 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1156     u_short *reason, struct pf_pdesc *pd)
1157 {
1158         struct mbuf             *m = *m0;
1159         struct pf_rule          *r;
1160         struct ip6_hdr          *h = mtod(m, struct ip6_hdr *);
1161         int                      off;
1162         struct ip6_ext           ext;
1163         struct ip6_opt           opt;
1164         struct ip6_opt_jumbo     jumbo;
1165         struct ip6_frag          frag;
1166         u_int32_t                jumbolen = 0, plen;
1167         u_int16_t                fragoff = 0;
1168         int                      optend;
1169         int                      ooff;
1170         u_int8_t                 proto;
1171         int                      terminal;
1172
1173         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1174         while (r != NULL) {
1175                 r->evaluations++;
1176                 if (r->kif != NULL &&
1177                     (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
1178                         r = r->skip[PF_SKIP_IFP].ptr;
1179                 else if (r->direction && r->direction != dir)
1180                         r = r->skip[PF_SKIP_DIR].ptr;
1181                 else if (r->af && r->af != AF_INET6)
1182                         r = r->skip[PF_SKIP_AF].ptr;
1183 #if 0 /* header chain! */
1184                 else if (r->proto && r->proto != h->ip6_nxt)
1185                         r = r->skip[PF_SKIP_PROTO].ptr;
1186 #endif
1187                 else if (PF_MISMATCHAW(&r->src.addr,
1188                     (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.neg))
1189                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1190                 else if (PF_MISMATCHAW(&r->dst.addr,
1191                     (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.neg))
1192                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1193                 else
1194                         break;
1195         }
1196
1197         if (r == NULL)
1198                 return (PF_PASS);
1199         else
1200                 r->packets++;
1201
1202         /* Check for illegal packets */
1203         if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
1204                 goto drop;
1205
1206         off = sizeof(struct ip6_hdr);
1207         proto = h->ip6_nxt;
1208         terminal = 0;
1209         do {
1210                 switch (proto) {
1211                 case IPPROTO_FRAGMENT:
1212                         goto fragment;
1213                         break;
1214                 case IPPROTO_AH:
1215                 case IPPROTO_ROUTING:
1216                 case IPPROTO_DSTOPTS:
1217                         if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1218                             NULL, AF_INET6))
1219                                 goto shortpkt;
1220                         if (proto == IPPROTO_AH)
1221                                 off += (ext.ip6e_len + 2) * 4;
1222                         else
1223                                 off += (ext.ip6e_len + 1) * 8;
1224                         proto = ext.ip6e_nxt;
1225                         break;
1226                 case IPPROTO_HOPOPTS:
1227                         if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1228                             NULL, AF_INET6))
1229                                 goto shortpkt;
1230                         optend = off + (ext.ip6e_len + 1) * 8;
1231                         ooff = off + sizeof(ext);
1232                         do {
1233                                 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1234                                     sizeof(opt.ip6o_type), NULL, NULL,
1235                                     AF_INET6))
1236                                         goto shortpkt;
1237                                 if (opt.ip6o_type == IP6OPT_PAD1) {
1238                                         ooff++;
1239                                         continue;
1240                                 }
1241                                 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
1242                                     NULL, NULL, AF_INET6))
1243                                         goto shortpkt;
1244                                 if (ooff + sizeof(opt) + opt.ip6o_len > optend)
1245                                         goto drop;
1246                                 switch (opt.ip6o_type) {
1247                                 case IP6OPT_JUMBO:
1248                                         if (h->ip6_plen != 0)
1249                                                 goto drop;
1250                                         if (!pf_pull_hdr(m, ooff, &jumbo,
1251                                             sizeof(jumbo), NULL, NULL,
1252                                             AF_INET6))
1253                                                 goto shortpkt;
1254                                         memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1255                                             sizeof(jumbolen));
1256                                         jumbolen = ntohl(jumbolen);
1257                                         if (jumbolen <= IPV6_MAXPACKET)
1258                                                 goto drop;
1259                                         if (sizeof(struct ip6_hdr) + jumbolen !=
1260                                             m->m_pkthdr.len)
1261                                                 goto drop;
1262                                         break;
1263                                 default:
1264                                         break;
1265                                 }
1266                                 ooff += sizeof(opt) + opt.ip6o_len;
1267                         } while (ooff < optend);
1268
1269                         off = optend;
1270                         proto = ext.ip6e_nxt;
1271                         break;
1272                 default:
1273                         terminal = 1;
1274                         break;
1275                 }
1276         } while (!terminal);
1277
1278         /* jumbo payload option must be present, or plen > 0 */
1279         if (ntohs(h->ip6_plen) == 0)
1280                 plen = jumbolen;
1281         else
1282                 plen = ntohs(h->ip6_plen);
1283         if (plen == 0)
1284                 goto drop;
1285         if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
1286                 goto shortpkt;
1287
1288         /* Enforce a minimum ttl, may cause endless packet loops */
1289         if (r->min_ttl && h->ip6_hlim < r->min_ttl)
1290                 h->ip6_hlim = r->min_ttl;
1291
1292         return (PF_PASS);
1293
1294  fragment:
1295         if (ntohs(h->ip6_plen) == 0 || jumbolen)
1296                 goto drop;
1297         plen = ntohs(h->ip6_plen);
1298
1299         if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
1300                 goto shortpkt;
1301         fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1302         if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
1303                 goto badfrag;
1304
1305         /* do something about it */
1306         /* remember to set pd->flags |= PFDESC_IP_REAS */
1307         return (PF_PASS);
1308
1309  shortpkt:
1310         REASON_SET(reason, PFRES_SHORT);
1311         if (r != NULL && r->log)
1312                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL);
1313         return (PF_DROP);
1314
1315  drop:
1316         REASON_SET(reason, PFRES_NORM);
1317         if (r != NULL && r->log)
1318                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL);
1319         return (PF_DROP);
1320
1321  badfrag:
1322         REASON_SET(reason, PFRES_FRAG);
1323         if (r != NULL && r->log)
1324                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL);
1325         return (PF_DROP);
1326 }
1327 #endif /* INET6 */
1328
1329 int
1330 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
1331     int off, void *h, struct pf_pdesc *pd)
1332 {
1333         struct pf_rule  *r, *rm = NULL;
1334         struct tcphdr   *th = pd->hdr.tcp;
1335         int              rewrite = 0;
1336         u_short          reason;
1337         u_int8_t         flags;
1338         sa_family_t      af = pd->af;
1339
1340         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1341         while (r != NULL) {
1342                 r->evaluations++;
1343                 if (r->kif != NULL &&
1344                     (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
1345                         r = r->skip[PF_SKIP_IFP].ptr;
1346                 else if (r->direction && r->direction != dir)
1347                         r = r->skip[PF_SKIP_DIR].ptr;
1348                 else if (r->af && r->af != af)
1349                         r = r->skip[PF_SKIP_AF].ptr;
1350                 else if (r->proto && r->proto != pd->proto)
1351                         r = r->skip[PF_SKIP_PROTO].ptr;
1352                 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg))
1353                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1354                 else if (r->src.port_op && !pf_match_port(r->src.port_op,
1355                             r->src.port[0], r->src.port[1], th->th_sport))
1356                         r = r->skip[PF_SKIP_SRC_PORT].ptr;
1357                 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg))
1358                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1359                 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1360                             r->dst.port[0], r->dst.port[1], th->th_dport))
1361                         r = r->skip[PF_SKIP_DST_PORT].ptr;
1362                 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
1363                             pf_osfp_fingerprint(pd, m, off, th),
1364                             r->os_fingerprint))
1365                         r = TAILQ_NEXT(r, entries);
1366                 else {
1367                         rm = r;
1368                         break;
1369                 }
1370         }
1371
1372         if (rm == NULL || rm->action == PF_NOSCRUB)
1373                 return (PF_PASS);
1374         else
1375                 r->packets++;
1376
1377         if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
1378                 pd->flags |= PFDESC_TCP_NORM;
1379
1380         flags = th->th_flags;
1381         if (flags & TH_SYN) {
1382                 /* Illegal packet */
1383                 if (flags & TH_RST)
1384                         goto tcp_drop;
1385
1386                 if (flags & TH_FIN)
1387                         flags &= ~TH_FIN;
1388         } else {
1389                 /* Illegal packet */
1390                 if (!(flags & (TH_ACK|TH_RST)))
1391                         goto tcp_drop;
1392         }
1393
1394         if (!(flags & TH_ACK)) {
1395                 /* These flags are only valid if ACK is set */
1396                 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
1397                         goto tcp_drop;
1398         }
1399
1400         /* Check for illegal header length */
1401         if (th->th_off < (sizeof(struct tcphdr) >> 2))
1402                 goto tcp_drop;
1403
1404         /* If flags changed, or reserved data set, then adjust */
1405         if (flags != th->th_flags || th->th_x2 != 0) {
1406                 u_int16_t       ov, nv;
1407
1408                 ov = *(u_int16_t *)(&th->th_ack + 1);
1409                 th->th_flags = flags;
1410                 th->th_x2 = 0;
1411                 nv = *(u_int16_t *)(&th->th_ack + 1);
1412
1413                 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
1414                 rewrite = 1;
1415         }
1416
1417         /* Remove urgent pointer, if TH_URG is not set */
1418         if (!(flags & TH_URG) && th->th_urp) {
1419                 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
1420                 th->th_urp = 0;
1421                 rewrite = 1;
1422         }
1423
1424         /* Process options */
1425         if (r->max_mss && pf_normalize_tcpopt(r, m, th, off))
1426                 rewrite = 1;
1427
1428         /* copy back packet headers if we sanitized */
1429         if (rewrite)
1430                 m_copyback(m, off, sizeof(*th), (caddr_t)th);
1431
1432         return (PF_PASS);
1433
1434  tcp_drop:
1435         REASON_SET(&reason, PFRES_NORM);
1436         if (rm != NULL && r->log)
1437                 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL);
1438         return (PF_DROP);
1439 }
1440
1441 int
1442 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
1443     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
1444 {
1445         u_int32_t tsval, tsecr;
1446         u_int8_t hdr[60];
1447         u_int8_t *opt;
1448
1449 #ifdef __FreeBSD__
1450         KASSERT((src->scrub == NULL), 
1451             ("pf_normalize_tcp_init: src->scrub != NULL"));
1452 #else
1453         KASSERT(src->scrub == NULL);
1454 #endif
1455
1456         src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
1457         if (src->scrub == NULL)
1458                 return (1);
1459         bzero(src->scrub, sizeof(*src->scrub));
1460
1461         switch (pd->af) {
1462 #ifdef INET
1463         case AF_INET: {
1464                 struct ip *h = mtod(m, struct ip *);
1465                 src->scrub->pfss_ttl = h->ip_ttl;
1466                 break;
1467         }
1468 #endif /* INET */
1469 #ifdef INET6
1470         case AF_INET6: {
1471                 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1472                 src->scrub->pfss_ttl = h->ip6_hlim;
1473                 break;
1474         }
1475 #endif /* INET6 */
1476         }
1477
1478
1479         /*
1480          * All normalizations below are only begun if we see the start of
1481          * the connections.  They must all set an enabled bit in pfss_flags
1482          */
1483         if ((th->th_flags & TH_SYN) == 0)
1484                 return (0);
1485
1486
1487         if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
1488             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1489                 /* Diddle with TCP options */
1490                 int hlen;
1491                 opt = hdr + sizeof(struct tcphdr);
1492                 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1493                 while (hlen >= TCPOLEN_TIMESTAMP) {
1494                         switch (*opt) {
1495                         case TCPOPT_EOL:        /* FALLTHROUGH */
1496                         case TCPOPT_NOP:
1497                                 opt++;
1498                                 hlen--;
1499                                 break;
1500                         case TCPOPT_TIMESTAMP:
1501                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1502                                         src->scrub->pfss_flags |=
1503                                             PFSS_TIMESTAMP;
1504                                         src->scrub->pfss_ts_mod =
1505                                             htonl(arc4random());
1506
1507                                         /* note PFSS_PAWS not set yet */
1508                                         memcpy(&tsval, &opt[2],
1509                                             sizeof(u_int32_t));
1510                                         memcpy(&tsecr, &opt[6],
1511                                             sizeof(u_int32_t));
1512                                         src->scrub->pfss_tsval0 = ntohl(tsval);
1513                                         src->scrub->pfss_tsval = ntohl(tsval);
1514                                         src->scrub->pfss_tsecr = ntohl(tsecr);
1515                                         getmicrouptime(&src->scrub->pfss_last);
1516                                 }
1517                                 /* FALLTHROUGH */
1518                         default:
1519                                 hlen -= MAX(opt[1], 2);
1520                                 opt += MAX(opt[1], 2);
1521                                 break;
1522                         }
1523                 }
1524         }
1525
1526         return (0);
1527 }
1528
1529 void
1530 pf_normalize_tcp_cleanup(struct pf_state *state)
1531 {
1532         if (state->src.scrub)
1533                 pool_put(&pf_state_scrub_pl, state->src.scrub);
1534         if (state->dst.scrub)
1535                 pool_put(&pf_state_scrub_pl, state->dst.scrub);
1536
1537         /* Someday... flush the TCP segment reassembly descriptors. */
1538 }
1539
1540 int
1541 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
1542     u_short *reason, struct tcphdr *th, struct pf_state *state,
1543     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
1544 {
1545         struct timeval uptime;
1546         u_int32_t tsval, tsecr;
1547         u_int tsval_from_last;
1548         u_int8_t hdr[60];
1549         u_int8_t *opt;
1550         int copyback = 0;
1551         int got_ts = 0;
1552
1553 #ifdef __FreeBSD__
1554         KASSERT((src->scrub || dst->scrub), 
1555             ("pf_normalize_tcp_statefull: src->scrub && dst->scrub!"));
1556 #else
1557         KASSERT(src->scrub || dst->scrub);
1558 #endif
1559
1560         /*
1561          * Enforce the minimum TTL seen for this connection.  Negate a common
1562          * technique to evade an intrusion detection system and confuse
1563          * firewall state code.
1564          */
1565         switch (pd->af) {
1566 #ifdef INET
1567         case AF_INET: {
1568                 if (src->scrub) {
1569                         struct ip *h = mtod(m, struct ip *);
1570                         if (h->ip_ttl > src->scrub->pfss_ttl)
1571                                 src->scrub->pfss_ttl = h->ip_ttl;
1572                         h->ip_ttl = src->scrub->pfss_ttl;
1573                 }
1574                 break;
1575         }
1576 #endif /* INET */
1577 #ifdef INET6
1578         case AF_INET6: {
1579                 if (src->scrub) {
1580                         struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1581                         if (h->ip6_hlim > src->scrub->pfss_ttl)
1582                                 src->scrub->pfss_ttl = h->ip6_hlim;
1583                         h->ip6_hlim = src->scrub->pfss_ttl;
1584                 }
1585                 break;
1586         }
1587 #endif /* INET6 */
1588         }
1589
1590         if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
1591             ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
1592             (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
1593             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1594                 /* Diddle with TCP options */
1595                 int hlen;
1596                 opt = hdr + sizeof(struct tcphdr);
1597                 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1598                 while (hlen >= TCPOLEN_TIMESTAMP) {
1599                         switch (*opt) {
1600                         case TCPOPT_EOL:        /* FALLTHROUGH */
1601                         case TCPOPT_NOP:
1602                                 opt++;
1603                                 hlen--;
1604                                 break;
1605                         case TCPOPT_TIMESTAMP:
1606                                 /* Modulate the timestamps.  Can be used for
1607                                  * NAT detection, OS uptime determination or
1608                                  * reboot detection.
1609                                  */
1610
1611                                 if (got_ts) {
1612                                         /* Huh?  Multiple timestamps!? */
1613                                         if (pf_status.debug >= PF_DEBUG_MISC) {
1614                                                 DPFPRINTF(("multiple TS??"));
1615                                                 pf_print_state(state);
1616                                                 printf("\n");
1617                                         }
1618                                         REASON_SET(reason, PFRES_TS);
1619                                         return (PF_DROP);
1620                                 }
1621                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1622                                         memcpy(&tsval, &opt[2],
1623                                             sizeof(u_int32_t));
1624                                         if (tsval && src->scrub &&
1625                                             (src->scrub->pfss_flags &
1626                                             PFSS_TIMESTAMP)) {
1627                                                 tsval = ntohl(tsval);
1628                                                 pf_change_a(&opt[2],
1629                                                     &th->th_sum,
1630                                                     htonl(tsval +
1631                                                     src->scrub->pfss_ts_mod),
1632                                                     0);
1633                                                 copyback = 1;
1634                                         }
1635
1636                                         /* Modulate TS reply iff valid (!0) */
1637                                         memcpy(&tsecr, &opt[6],
1638                                             sizeof(u_int32_t));
1639                                         if (tsecr && dst->scrub &&
1640                                             (dst->scrub->pfss_flags &
1641                                             PFSS_TIMESTAMP)) {
1642                                                 tsecr = ntohl(tsecr)
1643                                                     - dst->scrub->pfss_ts_mod;
1644                                                 pf_change_a(&opt[6],
1645                                                     &th->th_sum, htonl(tsecr),
1646                                                     0);
1647                                                 copyback = 1;
1648                                         }
1649                                         got_ts = 1;
1650                                 }
1651                                 /* FALLTHROUGH */
1652                         default:
1653                                 hlen -= MAX(opt[1], 2);
1654                                 opt += MAX(opt[1], 2);
1655                                 break;
1656                         }
1657                 }
1658                 if (copyback) {
1659                         /* Copyback the options, caller copys back header */
1660                         *writeback = 1;
1661                         m_copyback(m, off + sizeof(struct tcphdr),
1662                             (th->th_off << 2) - sizeof(struct tcphdr), hdr +
1663                             sizeof(struct tcphdr));
1664                 }
1665         }
1666
1667
1668         /*
1669          * Must invalidate PAWS checks on connections idle for too long.
1670          * The fastest allowed timestamp clock is 1ms.  That turns out to
1671          * be about 24 days before it wraps.  XXX Right now our lowerbound
1672          * TS echo check only works for the first 12 days of a connection
1673          * when the TS has exhausted half its 32bit space
1674          */
1675 #define TS_MAX_IDLE     (24*24*60*60)
1676 #define TS_MAX_CONN     (12*24*60*60)   /* XXX remove when better tsecr check */
1677
1678         getmicrouptime(&uptime);
1679         if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
1680             (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
1681             time_second - state->creation > TS_MAX_CONN))  {
1682                 if (pf_status.debug >= PF_DEBUG_MISC) {
1683                         DPFPRINTF(("src idled out of PAWS\n"));
1684                         pf_print_state(state);
1685                         printf("\n");
1686                 }
1687                 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
1688                     | PFSS_PAWS_IDLED;
1689         }
1690         if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
1691             uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
1692                 if (pf_status.debug >= PF_DEBUG_MISC) {
1693                         DPFPRINTF(("dst idled out of PAWS\n"));
1694                         pf_print_state(state);
1695                         printf("\n");
1696                 }
1697                 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
1698                     | PFSS_PAWS_IDLED;
1699         }
1700
1701         if (got_ts && src->scrub && dst->scrub &&
1702             (src->scrub->pfss_flags & PFSS_PAWS) &&
1703             (dst->scrub->pfss_flags & PFSS_PAWS)) {
1704                 /* Validate that the timestamps are "in-window".
1705                  * RFC1323 describes TCP Timestamp options that allow
1706                  * measurement of RTT (round trip time) and PAWS
1707                  * (protection against wrapped sequence numbers).  PAWS
1708                  * gives us a set of rules for rejecting packets on
1709                  * long fat pipes (packets that were somehow delayed 
1710                  * in transit longer than the time it took to send the
1711                  * full TCP sequence space of 4Gb).  We can use these
1712                  * rules and infer a few others that will let us treat
1713                  * the 32bit timestamp and the 32bit echoed timestamp
1714                  * as sequence numbers to prevent a blind attacker from
1715                  * inserting packets into a connection.
1716                  *
1717                  * RFC1323 tells us:
1718                  *  - The timestamp on this packet must be greater than
1719                  *    or equal to the last value echoed by the other
1720                  *    endpoint.  The RFC says those will be discarded
1721                  *    since it is a dup that has already been acked.
1722                  *    This gives us a lowerbound on the timestamp.
1723                  *        timestamp >= other last echoed timestamp
1724                  *  - The timestamp will be less than or equal to
1725                  *    the last timestamp plus the time between the
1726                  *    last packet and now.  The RFC defines the max
1727                  *    clock rate as 1ms.  We will allow clocks to be
1728                  *    up to 10% fast and will allow a total difference
1729                  *    or 30 seconds due to a route change.  And this
1730                  *    gives us an upperbound on the timestamp.
1731                  *        timestamp <= last timestamp + max ticks
1732                  *    We have to be careful here.  Windows will send an
1733                  *    initial timestamp of zero and then initialize it
1734                  *    to a random value after the 3whs; presumably to
1735                  *    avoid a DoS by having to call an expensive RNG
1736                  *    during a SYN flood.  Proof MS has at least one
1737                  *    good security geek.
1738                  *
1739                  *  - The TCP timestamp option must also echo the other
1740                  *    endpoints timestamp.  The timestamp echoed is the
1741                  *    one carried on the earliest unacknowledged segment
1742                  *    on the left edge of the sequence window.  The RFC
1743                  *    states that the host will reject any echoed
1744                  *    timestamps that were larger than any ever sent.
1745                  *    This gives us an upperbound on the TS echo.
1746                  *        tescr <= largest_tsval
1747                  *  - The lowerbound on the TS echo is a little more
1748                  *    tricky to determine.  The other endpoint's echoed
1749                  *    values will not decrease.  But there may be
1750                  *    network conditions that re-order packets and
1751                  *    cause our view of them to decrease.  For now the
1752                  *    only lowerbound we can safely determine is that
1753                  *    the TS echo will never be less than the orginal
1754                  *    TS.  XXX There is probably a better lowerbound.
1755                  *    Remove TS_MAX_CONN with better lowerbound check.
1756                  *        tescr >= other original TS
1757                  *
1758                  * It is also important to note that the fastest
1759                  * timestamp clock of 1ms will wrap its 32bit space in
1760                  * 24 days.  So we just disable TS checking after 24
1761                  * days of idle time.  We actually must use a 12d
1762                  * connection limit until we can come up with a better
1763                  * lowerbound to the TS echo check.
1764                  */
1765                 struct timeval delta_ts;
1766                 int ts_fudge;
1767
1768
1769                 /*
1770                  * PFTM_TS_DIFF is how many seconds of leeway to allow
1771                  * a host's timestamp.  This can happen if the previous
1772                  * packet got delayed in transit for much longer than
1773                  * this packet.
1774                  */
1775                 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
1776                         ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
1777
1778
1779                 /* Calculate max ticks since the last timestamp */
1780 #define TS_MAXFREQ      1100            /* RFC max TS freq of 1Khz + 10% skew */
1781 #define TS_MICROSECS    1000000         /* microseconds per second */
1782 #ifdef __FreeBSD__
1783 #ifndef timersub
1784 #define timersub(tvp, uvp, vvp)                                         \
1785         do {                                                            \
1786                 (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;          \
1787                 (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;       \
1788                 if ((vvp)->tv_usec < 0) {                               \
1789                         (vvp)->tv_sec--;                                \
1790                         (vvp)->tv_usec += 1000000;                      \
1791                 }                                                       \
1792         } while (0)
1793 #endif
1794 #endif
1795                 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
1796                 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
1797                 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
1798
1799
1800                 if ((src->state >= TCPS_ESTABLISHED &&
1801                     dst->state >= TCPS_ESTABLISHED) &&
1802                     (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
1803                     SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
1804                     (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
1805                     SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
1806                         /* Bad RFC1323 implementation or an insertion attack.
1807                          *
1808                          * - Solaris 2.6 and 2.7 are known to send another ACK
1809                          *   after the FIN,FIN|ACK,ACK closing that carries
1810                          *   an old timestamp.
1811                          */
1812
1813                         DPFPRINTF(("Timestamp failed %c%c%c%c\n",
1814                             SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
1815                             SEQ_GT(tsval, src->scrub->pfss_tsval +
1816                             tsval_from_last) ? '1' : ' ',
1817                             SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
1818                             SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
1819 #ifdef __FreeBSD__
1820                         DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
1821                             "idle: %jus %lums\n",
1822                             tsval, tsecr, tsval_from_last,
1823                             (uintmax_t)delta_ts.tv_sec,
1824                             delta_ts.tv_usec / 1000));
1825                         DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
1826                             src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
1827                         DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u"
1828                             "\n", dst->scrub->pfss_tsval,
1829                             dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
1830 #else
1831                         DPFPRINTF((" tsval: %lu  tsecr: %lu  +ticks: %lu  "
1832                             "idle: %lus %lums\n",
1833                             tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
1834                             delta_ts.tv_usec / 1000));
1835                         DPFPRINTF((" src->tsval: %lu  tsecr: %lu\n",
1836                             src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
1837                         DPFPRINTF((" dst->tsval: %lu  tsecr: %lu  tsval0: %lu"
1838                             "\n", dst->scrub->pfss_tsval,
1839                             dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
1840 #endif
1841                         if (pf_status.debug >= PF_DEBUG_MISC) {
1842                                 pf_print_state(state);
1843                                 pf_print_flags(th->th_flags);
1844                                 printf("\n");
1845                         }
1846                         REASON_SET(reason, PFRES_TS);
1847                         return (PF_DROP);
1848                 }
1849
1850                 /* XXX I'd really like to require tsecr but it's optional */
1851
1852         } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
1853             ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
1854             || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
1855             src->scrub && dst->scrub &&
1856             (src->scrub->pfss_flags & PFSS_PAWS) &&
1857             (dst->scrub->pfss_flags & PFSS_PAWS)) {
1858                 /* Didn't send a timestamp.  Timestamps aren't really useful
1859                  * when:
1860                  *  - connection opening or closing (often not even sent).
1861                  *    but we must not let an attacker to put a FIN on a
1862                  *    data packet to sneak it through our ESTABLISHED check.
1863                  *  - on a TCP reset.  RFC suggests not even looking at TS.
1864                  *  - on an empty ACK.  The TS will not be echoed so it will
1865                  *    probably not help keep the RTT calculation in sync and
1866                  *    there isn't as much danger when the sequence numbers
1867                  *    got wrapped.  So some stacks don't include TS on empty
1868                  *    ACKs :-(
1869                  *
1870                  * To minimize the disruption to mostly RFC1323 conformant
1871                  * stacks, we will only require timestamps on data packets.
1872                  *
1873                  * And what do ya know, we cannot require timestamps on data
1874                  * packets.  There appear to be devices that do legitimate
1875                  * TCP connection hijacking.  There are HTTP devices that allow
1876                  * a 3whs (with timestamps) and then buffer the HTTP request.
1877                  * If the intermediate device has the HTTP response cache, it
1878                  * will spoof the response but not bother timestamping its
1879                  * packets.  So we can look for the presence of a timestamp in
1880                  * the first data packet and if there, require it in all future
1881                  * packets.
1882                  */
1883
1884                 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
1885                         /*
1886                          * Hey!  Someone tried to sneak a packet in.  Or the
1887                          * stack changed its RFC1323 behavior?!?!
1888                          */
1889                         if (pf_status.debug >= PF_DEBUG_MISC) {
1890                                 DPFPRINTF(("Did not receive expected RFC1323 "
1891                                     "timestamp\n"));
1892                                 pf_print_state(state);
1893                                 pf_print_flags(th->th_flags);
1894                                 printf("\n");
1895                         }
1896                         REASON_SET(reason, PFRES_TS);
1897                         return (PF_DROP);
1898                 }
1899         }
1900
1901
1902         /*
1903          * We will note if a host sends his data packets with or without
1904          * timestamps.  And require all data packets to contain a timestamp
1905          * if the first does.  PAWS implicitly requires that all data packets be
1906          * timestamped.  But I think there are middle-man devices that hijack
1907          * TCP streams immedietly after the 3whs and don't timestamp their
1908          * packets (seen in a WWW accelerator or cache).
1909          */
1910         if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
1911             (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
1912                 if (got_ts)
1913                         src->scrub->pfss_flags |= PFSS_DATA_TS;
1914                 else {
1915                         src->scrub->pfss_flags |= PFSS_DATA_NOTS;
1916                         if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
1917                             (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
1918                                 /* Don't warn if other host rejected RFC1323 */
1919                                 DPFPRINTF(("Broken RFC1323 stack did not "
1920                                     "timestamp data packet. Disabled PAWS "
1921                                     "security.\n"));
1922                                 pf_print_state(state);
1923                                 pf_print_flags(th->th_flags);
1924                                 printf("\n");
1925                         }
1926                 }
1927         }
1928
1929
1930         /*
1931          * Update PAWS values
1932          */
1933         if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
1934             (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
1935                 getmicrouptime(&src->scrub->pfss_last);
1936                 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
1937                     (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1938                         src->scrub->pfss_tsval = tsval;
1939
1940                 if (tsecr) {
1941                         if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
1942                             (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1943                                 src->scrub->pfss_tsecr = tsecr;
1944
1945                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
1946                             (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
1947                             src->scrub->pfss_tsval0 == 0)) {
1948                                 /* tsval0 MUST be the lowest timestamp */
1949                                 src->scrub->pfss_tsval0 = tsval;
1950                         }
1951
1952                         /* Only fully initialized after a TS gets echoed */
1953                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
1954                                 src->scrub->pfss_flags |= PFSS_PAWS;
1955                 }
1956         }
1957
1958         /* I have a dream....  TCP segment reassembly.... */
1959         return (0);
1960 }
1961
1962 int
1963 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
1964     int off)
1965 {
1966         u_int16_t       *mss;
1967         int              thoff;
1968         int              opt, cnt, optlen = 0;
1969         int              rewrite = 0;
1970         u_char          *optp;
1971
1972         thoff = th->th_off << 2;
1973         cnt = thoff - sizeof(struct tcphdr);
1974         optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr);
1975
1976         for (; cnt > 0; cnt -= optlen, optp += optlen) {
1977                 opt = optp[0];
1978                 if (opt == TCPOPT_EOL)
1979                         break;
1980                 if (opt == TCPOPT_NOP)
1981                         optlen = 1;
1982                 else {
1983                         if (cnt < 2)
1984                                 break;
1985                         optlen = optp[1];
1986                         if (optlen < 2 || optlen > cnt)
1987                                 break;
1988                 }
1989                 switch (opt) {
1990                 case TCPOPT_MAXSEG:
1991                         mss = (u_int16_t *)(optp + 2);
1992                         if ((ntohs(*mss)) > r->max_mss) {
1993                                 th->th_sum = pf_cksum_fixup(th->th_sum,
1994                                     *mss, htons(r->max_mss), 0);
1995                                 *mss = htons(r->max_mss);
1996                                 rewrite = 1;
1997                         }
1998                         break;
1999                 default:
2000                         break;
2001                 }
2002         }
2003
2004         return (rewrite);
2005 }