sys/contrib/pf/net/pf_norm.c

   1 /*      $OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $ */
   2
   3 /*
   4  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26  */
  27
  28 #ifdef __FreeBSD__
  29 #include "opt_inet.h"
  30 #include "opt_inet6.h"
  31 #include "opt_pf.h"
  32
  33 #include <sys/cdefs.h>
  34 __FBSDID("$FreeBSD$");
  35
  36 #ifdef DEV_PFLOG
  37 #define NPFLOG  DEV_PFLOG
  38 #else
  39 #define NPFLOG  0
  40 #endif
  41 #else
  42 #include "pflog.h"
  43 #endif
  44
  45 #include <sys/param.h>
  46 #include <sys/systm.h>
  47 #include <sys/mbuf.h>
  48 #include <sys/filio.h>
  49 #include <sys/fcntl.h>
  50 #include <sys/socket.h>
  51 #include <sys/kernel.h>
  52 #include <sys/time.h>
  53 #ifndef __FreeBSD__
  54 #include <sys/pool.h>
  55
  56 #include <dev/rndvar.h>
  57 #endif
  58 #include <net/if.h>
  59 #include <net/if_types.h>
  60 #include <net/bpf.h>
  61 #include <net/route.h>
  62 #include <net/if_pflog.h>
  63
  64 #include <netinet/in.h>
  65 #include <netinet/in_var.h>
  66 #include <netinet/in_systm.h>
  67 #include <netinet/ip.h>
  68 #include <netinet/ip_var.h>
  69 #include <netinet/tcp.h>
  70 #include <netinet/tcp_seq.h>
  71 #include <netinet/udp.h>
  72 #include <netinet/ip_icmp.h>
  73
  74 #ifdef INET6
  75 #include <netinet/ip6.h>
  76 #endif /* INET6 */
  77
  78 #include <net/pfvar.h>
  79
  80 #ifndef __FreeBSD__
  81 struct pf_frent {
  82         LIST_ENTRY(pf_frent) fr_next;
  83         struct ip *fr_ip;
  84         struct mbuf *fr_m;
  85 };
  86
  87 struct pf_frcache {
  88         LIST_ENTRY(pf_frcache) fr_next;
  89         uint16_t        fr_off;
  90         uint16_t        fr_end;
  91 };
  92 #endif
  93
  94 #define PFFRAG_SEENLAST 0x0001          /* Seen the last fragment for this */
  95 #define PFFRAG_NOBUFFER 0x0002          /* Non-buffering fragment cache */
  96 #define PFFRAG_DROP     0x0004          /* Drop all fragments */
  97 #define BUFFER_FRAGMENTS(fr)    (!((fr)->fr_flags & PFFRAG_NOBUFFER))
  98
  99 #ifndef __FreeBSD__
 100 struct pf_fragment {
 101         RB_ENTRY(pf_fragment) fr_entry;
 102         TAILQ_ENTRY(pf_fragment) frag_next;
 103         struct in_addr  fr_src;
 104         struct in_addr  fr_dst;
 105         u_int8_t        fr_p;           /* protocol of this fragment */
 106         u_int8_t        fr_flags;       /* status flags */
 107         u_int16_t       fr_id;          /* fragment id for reassemble */
 108         u_int16_t       fr_max;         /* fragment data max */
 109         u_int32_t       fr_timeout;
 110 #define fr_queue        fr_u.fru_queue
 111 #define fr_cache        fr_u.fru_cache
 112         union {
 113                 LIST_HEAD(pf_fragq, pf_frent) fru_queue;        /* buffering */
 114                 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache;     /* non-buf */
 115         } fr_u;
 116 };
 117 #endif
 118
 119 #ifdef __FreeBSD__
 120 TAILQ_HEAD(pf_fragqueue, pf_fragment);
 121 TAILQ_HEAD(pf_cachequeue, pf_fragment);
 122 VNET_DEFINE(struct pf_fragqueue,        pf_fragqueue);
 123 #define V_pf_fragqueue                  VNET(pf_fragqueue)
 124 VNET_DEFINE(struct pf_cachequeue,       pf_cachequeue);
 125 #define V_pf_cachequeue                 VNET(pf_cachequeue)
 126 #else
 127 TAILQ_HEAD(pf_fragqueue, pf_fragment)   pf_fragqueue;
 128 TAILQ_HEAD(pf_cachequeue, pf_fragment)  pf_cachequeue;
 129 #endif
 130
 131 #ifndef __FreeBSD__
 132 static __inline int      pf_frag_compare(struct pf_fragment *,
 133                             struct pf_fragment *);
 134 #else
 135 static int               pf_frag_compare(struct pf_fragment *,
 136                             struct pf_fragment *);
 137 #endif
 138
 139 #ifdef __FreeBSD__
 140 RB_HEAD(pf_frag_tree, pf_fragment);
 141 VNET_DEFINE(struct pf_frag_tree,        pf_frag_tree);
 142 #define V_pf_frag_tree                  VNET(pf_frag_tree)
 143 VNET_DEFINE(struct pf_frag_tree,        pf_cache_tree);
 144 #define V_pf_cache_tree                 VNET(pf_cache_tree)
 145 #else
 146 RB_HEAD(pf_frag_tree, pf_fragment)      pf_frag_tree, pf_cache_tree;
 147 #endif
 148 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
 149 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
 150
 151 /* Private prototypes */
 152 void                     pf_ip2key(struct pf_fragment *, struct ip *);
 153 void                     pf_remove_fragment(struct pf_fragment *);
 154 void                     pf_flush_fragments(void);
 155 void                     pf_free_fragment(struct pf_fragment *);
 156 struct pf_fragment      *pf_find_fragment(struct ip *, struct pf_frag_tree *);
 157 struct mbuf             *pf_reassemble(struct mbuf **, struct pf_fragment **,
 158                             struct pf_frent *, int);
 159 struct mbuf             *pf_fragcache(struct mbuf **, struct ip*,
 160                             struct pf_fragment **, int, int, int *);
 161 int                      pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
 162                             struct tcphdr *, int, sa_family_t);
 163 void                     pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t,
 164                             u_int8_t);
 165 #ifdef INET6
 166 void                     pf_scrub_ip6(struct mbuf **, u_int8_t);
 167 #endif
 168 #ifdef __FreeBSD__
 169 #define DPFPRINTF(x) do {                               \
 170         if (V_pf_status.debug >= PF_DEBUG_MISC) {       \
 171                 printf("%s: ", __func__);               \
 172                 printf x ;                              \
 173         }                                               \
 174 } while(0)
 175 #else
 176 #define DPFPRINTF(x) do {                               \
 177         if (pf_status.debug >= PF_DEBUG_MISC) {         \
 178                 printf("%s: ", __func__);               \
 179                 printf x ;                              \
 180         }                                               \
 181 } while(0)
 182 #endif
 183
 184 /* Globals */
 185 #ifdef __FreeBSD__
 186 VNET_DEFINE(uma_zone_t,         pf_frent_pl);
 187 VNET_DEFINE(uma_zone_t,         pf_frag_pl);
 188 VNET_DEFINE(uma_zone_t,         pf_cache_pl);
 189 VNET_DEFINE(uma_zone_t,         pf_cent_pl);
 190 VNET_DEFINE(uma_zone_t,         pf_state_scrub_pl);
 191
 192 VNET_DEFINE(int,                pf_nfrents);
 193 #define V_pf_nfrents            VNET(pf_nfrents)
 194 VNET_DEFINE(int,                pf_ncache);
 195 #define V_pf_ncache             VNET(pf_ncache)
 196 #else
 197 struct pool              pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
 198 struct pool              pf_state_scrub_pl;
 199 int                      pf_nfrents, pf_ncache;
 200 #endif
 201
 202 void
 203 pf_normalize_init(void)
 204 {
 205 #ifdef __FreeBSD__
 206         /*
 207          * XXX
 208          * No high water mark support(It's hint not hard limit).
 209          * uma_zone_set_max(pf_frag_pl, PFFRAG_FRAG_HIWAT);
 210          */
 211         uma_zone_set_max(V_pf_frent_pl, PFFRAG_FRENT_HIWAT);
 212         uma_zone_set_max(V_pf_cache_pl, PFFRAG_FRCACHE_HIWAT);
 213         uma_zone_set_max(V_pf_cent_pl, PFFRAG_FRCENT_HIWAT);
 214 #else
 215         pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
 216             NULL);
 217         pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
 218             NULL);
 219         pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
 220             "pffrcache", NULL);
 221         pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
 222             NULL);
 223         pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
 224             "pfstscr", NULL);
 225
 226         pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
 227         pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
 228         pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
 229         pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
 230 #endif
 231
 232 #ifdef __FreeBSD__
 233         TAILQ_INIT(&V_pf_fragqueue);
 234         TAILQ_INIT(&V_pf_cachequeue);
 235 #else
 236         TAILQ_INIT(&pf_fragqueue);
 237         TAILQ_INIT(&pf_cachequeue);
 238 #endif
 239 }
 240
 241 #ifdef __FreeBSD__
 242 static int
 243 #else
 244 static __inline int
 245 #endif
 246 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
 247 {
 248         int     diff;
 249
 250         if ((diff = a->fr_id - b->fr_id))
 251                 return (diff);
 252         else if ((diff = a->fr_p - b->fr_p))
 253                 return (diff);
 254         else if (a->fr_src.s_addr < b->fr_src.s_addr)
 255                 return (-1);
 256         else if (a->fr_src.s_addr > b->fr_src.s_addr)
 257                 return (1);
 258         else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
 259                 return (-1);
 260         else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
 261                 return (1);
 262         return (0);
 263 }
 264
 265 void
 266 pf_purge_expired_fragments(void)
 267 {
 268         struct pf_fragment      *frag;
 269 #ifdef __FreeBSD__
 270         u_int32_t                expire = time_second -
 271                                     V_pf_default_rule.timeout[PFTM_FRAG];
 272 #else
 273         u_int32_t                expire = time_second -
 274                                     pf_default_rule.timeout[PFTM_FRAG];
 275 #endif
 276
 277 #ifdef __FreeBSD__
 278         while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) {
 279                 KASSERT((BUFFER_FRAGMENTS(frag)),
 280                     ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__));
 281 #else
 282         while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
 283                 KASSERT(BUFFER_FRAGMENTS(frag));
 284 #endif
 285                 if (frag->fr_timeout > expire)
 286                         break;
 287
 288                 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
 289                 pf_free_fragment(frag);
 290         }
 291
 292 #ifdef __FreeBSD__
 293         while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) {
 294                 KASSERT((!BUFFER_FRAGMENTS(frag)),
 295                     ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__));
 296 #else
 297         while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
 298                 KASSERT(!BUFFER_FRAGMENTS(frag));
 299 #endif
 300                 if (frag->fr_timeout > expire)
 301                         break;
 302
 303                 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
 304                 pf_free_fragment(frag);
 305 #ifdef __FreeBSD__
 306                 KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) ||
 307                     TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag),
 308                     ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s",
 309                     __FUNCTION__));
 310 #else
 311                 KASSERT(TAILQ_EMPTY(&pf_cachequeue) ||
 312                     TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
 313 #endif
 314         }
 315 }
 316
 317 /*
 318  * Try to flush old fragments to make space for new ones
 319  */
 320
 321 void
 322 pf_flush_fragments(void)
 323 {
 324         struct pf_fragment      *frag;
 325         int                      goal;
 326
 327 #ifdef __FreeBSD__
 328         goal = V_pf_nfrents * 9 / 10;
 329         DPFPRINTF(("trying to free > %d frents\n",
 330             V_pf_nfrents - goal));
 331         while (goal < V_pf_nfrents) {
 332 #else
 333         goal = pf_nfrents * 9 / 10;
 334         DPFPRINTF(("trying to free > %d frents\n",
 335             pf_nfrents - goal));
 336         while (goal < pf_nfrents) {
 337 #endif
 338 #ifdef __FreeBSD__
 339                 frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue);
 340 #else
 341                 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
 342 #endif
 343                 if (frag == NULL)
 344                         break;
 345                 pf_free_fragment(frag);
 346         }
 347
 348
 349 #ifdef __FreeBSD__
 350         goal = V_pf_ncache * 9 / 10;
 351         DPFPRINTF(("trying to free > %d cache entries\n",
 352             V_pf_ncache - goal));
 353         while (goal < V_pf_ncache) {
 354 #else
 355         goal = pf_ncache * 9 / 10;
 356         DPFPRINTF(("trying to free > %d cache entries\n",
 357             pf_ncache - goal));
 358         while (goal < pf_ncache) {
 359 #endif
 360 #ifdef __FreeBSD__
 361                 frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue);
 362 #else
 363                 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
 364 #endif
 365                 if (frag == NULL)
 366                         break;
 367                 pf_free_fragment(frag);
 368         }
 369 }
 370
 371 /* Frees the fragments and all associated entries */
 372
 373 void
 374 pf_free_fragment(struct pf_fragment *frag)
 375 {
 376         struct pf_frent         *frent;
 377         struct pf_frcache       *frcache;
 378
 379         /* Free all fragments */
 380         if (BUFFER_FRAGMENTS(frag)) {
 381                 for (frent = LIST_FIRST(&frag->fr_queue); frent;
 382                     frent = LIST_FIRST(&frag->fr_queue)) {
 383                         LIST_REMOVE(frent, fr_next);
 384
 385                         m_freem(frent->fr_m);
 386 #ifdef __FreeBSD__
 387                         pool_put(&V_pf_frent_pl, frent);
 388                         V_pf_nfrents--;
 389 #else
 390                         pool_put(&pf_frent_pl, frent);
 391                         pf_nfrents--;
 392 #endif
 393                 }
 394         } else {
 395                 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
 396                     frcache = LIST_FIRST(&frag->fr_cache)) {
 397                         LIST_REMOVE(frcache, fr_next);
 398
 399 #ifdef __FreeBSD__
 400                         KASSERT((LIST_EMPTY(&frag->fr_cache) ||
 401                             LIST_FIRST(&frag->fr_cache)->fr_off >
 402                             frcache->fr_end),
 403                             ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >"
 404                               " frcache->fr_end): %s", __FUNCTION__));
 405
 406                         pool_put(&V_pf_cent_pl, frcache);
 407                         V_pf_ncache--;
 408 #else
 409                         KASSERT(LIST_EMPTY(&frag->fr_cache) ||
 410                             LIST_FIRST(&frag->fr_cache)->fr_off >
 411                             frcache->fr_end);
 412
 413                         pool_put(&pf_cent_pl, frcache);
 414                         pf_ncache--;
 415 #endif
 416                 }
 417         }
 418
 419         pf_remove_fragment(frag);
 420 }
 421
 422 void
 423 pf_ip2key(struct pf_fragment *key, struct ip *ip)
 424 {
 425         key->fr_p = ip->ip_p;
 426         key->fr_id = ip->ip_id;
 427         key->fr_src.s_addr = ip->ip_src.s_addr;
 428         key->fr_dst.s_addr = ip->ip_dst.s_addr;
 429 }
 430
 431 struct pf_fragment *
 432 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
 433 {
 434         struct pf_fragment       key;
 435         struct pf_fragment      *frag;
 436
 437         pf_ip2key(&key, ip);
 438
 439         frag = RB_FIND(pf_frag_tree, tree, &key);
 440         if (frag != NULL) {
 441                 /* XXX Are we sure we want to update the timeout? */
 442                 frag->fr_timeout = time_second;
 443                 if (BUFFER_FRAGMENTS(frag)) {
 444 #ifdef __FreeBSD__
 445                         TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
 446                         TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
 447 #else
 448                         TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
 449                         TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
 450 #endif
 451                 } else {
 452 #ifdef __FreeBSD__
 453                         TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
 454                         TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next);
 455 #else
 456                         TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
 457                         TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
 458 #endif
 459                 }
 460         }
 461
 462         return (frag);
 463 }
 464
 465 /* Removes a fragment from the fragment queue and frees the fragment */
 466
 467 void
 468 pf_remove_fragment(struct pf_fragment *frag)
 469 {
 470         if (BUFFER_FRAGMENTS(frag)) {
 471 #ifdef __FreeBSD__
 472                 RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
 473                 TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
 474                 pool_put(&V_pf_frag_pl, frag);
 475 #else
 476                 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
 477                 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
 478                 pool_put(&pf_frag_pl, frag);
 479 #endif
 480         } else {
 481 #ifdef __FreeBSD__
 482                 RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag);
 483                 TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
 484                 pool_put(&V_pf_cache_pl, frag);
 485 #else
 486                 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
 487                 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
 488                 pool_put(&pf_cache_pl, frag);
 489 #endif
 490         }
 491 }
 492
 493 #define FR_IP_OFF(fr)   ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
 494 struct mbuf *
 495 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
 496     struct pf_frent *frent, int mff)
 497 {
 498         struct mbuf     *m = *m0, *m2;
 499         struct pf_frent *frea, *next;
 500         struct pf_frent *frep = NULL;
 501         struct ip       *ip = frent->fr_ip;
 502         int              hlen = ip->ip_hl << 2;
 503         u_int16_t        off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
 504         u_int16_t        ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
 505         u_int16_t        max = ip_len + off;
 506
 507 #ifdef __FreeBSD__
 508         KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)),
 509             ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
 510 #else
 511         KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag));
 512 #endif
 513
 514         /* Strip off ip header */
 515         m->m_data += hlen;
 516         m->m_len -= hlen;
 517
 518         /* Create a new reassembly queue for this packet */
 519         if (*frag == NULL) {
 520 #ifdef __FreeBSD__
 521                 *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT);
 522 #else
 523                 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
 524 #endif
 525                 if (*frag == NULL) {
 526                         pf_flush_fragments();
 527 #ifdef __FreeBSD__
 528                         *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT);
 529 #else
 530                         *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
 531 #endif
 532                         if (*frag == NULL)
 533                                 goto drop_fragment;
 534                 }
 535
 536                 (*frag)->fr_flags = 0;
 537                 (*frag)->fr_max = 0;
 538                 (*frag)->fr_src = frent->fr_ip->ip_src;
 539                 (*frag)->fr_dst = frent->fr_ip->ip_dst;
 540                 (*frag)->fr_p = frent->fr_ip->ip_p;
 541                 (*frag)->fr_id = frent->fr_ip->ip_id;
 542                 (*frag)->fr_timeout = time_second;
 543                 LIST_INIT(&(*frag)->fr_queue);
 544
 545 #ifdef __FreeBSD__
 546                 RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag);
 547                 TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next);
 548 #else
 549                 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
 550                 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
 551 #endif
 552
 553                 /* We do not have a previous fragment */
 554                 frep = NULL;
 555                 goto insert;
 556         }
 557
 558         /*
 559          * Find a fragment after the current one:
 560          *  - off contains the real shifted offset.
 561          */
 562         LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
 563                 if (FR_IP_OFF(frea) > off)
 564                         break;
 565                 frep = frea;
 566         }
 567
 568 #ifdef __FreeBSD__
 569         KASSERT((frep != NULL || frea != NULL),
 570             ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));;
 571 #else
 572         KASSERT(frep != NULL || frea != NULL);
 573 #endif
 574
 575         if (frep != NULL &&
 576             FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
 577             4 > off)
 578         {
 579                 u_int16_t       precut;
 580
 581                 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
 582                     frep->fr_ip->ip_hl * 4 - off;
 583                 if (precut >= ip_len)
 584                         goto drop_fragment;
 585                 m_adj(frent->fr_m, precut);
 586                 DPFPRINTF(("overlap -%d\n", precut));
 587                 /* Enforce 8 byte boundaries */
 588                 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
 589                 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
 590                 ip_len -= precut;
 591                 ip->ip_len = htons(ip_len);
 592         }
 593
 594         for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
 595             frea = next)
 596         {
 597                 u_int16_t       aftercut;
 598
 599                 aftercut = ip_len + off - FR_IP_OFF(frea);
 600                 DPFPRINTF(("adjust overlap %d\n", aftercut));
 601                 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
 602                     * 4)
 603                 {
 604                         frea->fr_ip->ip_len =
 605                             htons(ntohs(frea->fr_ip->ip_len) - aftercut);
 606                         frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
 607                             (aftercut >> 3));
 608                         m_adj(frea->fr_m, aftercut);
 609                         break;
 610                 }
 611
 612                 /* This fragment is completely overlapped, lose it */
 613                 next = LIST_NEXT(frea, fr_next);
 614                 m_freem(frea->fr_m);
 615                 LIST_REMOVE(frea, fr_next);
 616 #ifdef __FreeBSD__
 617                 pool_put(&V_pf_frent_pl, frea);
 618                 V_pf_nfrents--;
 619 #else
 620                 pool_put(&pf_frent_pl, frea);
 621                 pf_nfrents--;
 622 #endif
 623         }
 624
 625  insert:
 626         /* Update maximum data size */
 627         if ((*frag)->fr_max < max)
 628                 (*frag)->fr_max = max;
 629         /* This is the last segment */
 630         if (!mff)
 631                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
 632
 633         if (frep == NULL)
 634                 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
 635         else
 636                 LIST_INSERT_AFTER(frep, frent, fr_next);
 637
 638         /* Check if we are completely reassembled */
 639         if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
 640                 return (NULL);
 641
 642         /* Check if we have all the data */
 643         off = 0;
 644         for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
 645                 next = LIST_NEXT(frep, fr_next);
 646
 647                 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
 648                 if (off < (*frag)->fr_max &&
 649                     (next == NULL || FR_IP_OFF(next) != off))
 650                 {
 651                         DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
 652                             off, next == NULL ? -1 : FR_IP_OFF(next),
 653                             (*frag)->fr_max));
 654                         return (NULL);
 655                 }
 656         }
 657         DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
 658         if (off < (*frag)->fr_max)
 659                 return (NULL);
 660
 661         /* We have all the data */
 662         frent = LIST_FIRST(&(*frag)->fr_queue);
 663 #ifdef __FreeBSD__
 664         KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__));
 665 #else
 666         KASSERT(frent != NULL);
 667 #endif
 668         if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
 669                 DPFPRINTF(("drop: too big: %d\n", off));
 670                 pf_free_fragment(*frag);
 671                 *frag = NULL;
 672                 return (NULL);
 673         }
 674         next = LIST_NEXT(frent, fr_next);
 675
 676         /* Magic from ip_input */
 677         ip = frent->fr_ip;
 678         m = frent->fr_m;
 679         m2 = m->m_next;
 680         m->m_next = NULL;
 681         m_cat(m, m2);
 682 #ifdef __FreeBSD__
 683         pool_put(&V_pf_frent_pl, frent);
 684         V_pf_nfrents--;
 685 #else
 686         pool_put(&pf_frent_pl, frent);
 687         pf_nfrents--;
 688 #endif
 689         for (frent = next; frent != NULL; frent = next) {
 690                 next = LIST_NEXT(frent, fr_next);
 691
 692                 m2 = frent->fr_m;
 693 #ifdef __FreeBSD__
 694                 pool_put(&V_pf_frent_pl, frent);
 695                 V_pf_nfrents--;
 696 #else
 697                 pool_put(&pf_frent_pl, frent);
 698                 pf_nfrents--;
 699 #endif
 700 #ifdef __FreeBSD__
 701                 m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags;
 702                 m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data;
 703 #endif
 704                 m_cat(m, m2);
 705         }
 706
 707 #ifdef __FreeBSD__
 708         while (m->m_pkthdr.csum_data & 0xffff0000)
 709                 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
 710                     (m->m_pkthdr.csum_data >> 16);
 711 #endif
 712         ip->ip_src = (*frag)->fr_src;
 713         ip->ip_dst = (*frag)->fr_dst;
 714
 715         /* Remove from fragment queue */
 716         pf_remove_fragment(*frag);
 717         *frag = NULL;
 718
 719         hlen = ip->ip_hl << 2;
 720         ip->ip_len = htons(off + hlen);
 721         m->m_len += hlen;
 722         m->m_data -= hlen;
 723
 724         /* some debugging cruft by sklower, below, will go away soon */
 725         /* XXX this should be done elsewhere */
 726         if (m->m_flags & M_PKTHDR) {
 727                 int plen = 0;
 728                 for (m2 = m; m2; m2 = m2->m_next)
 729                         plen += m2->m_len;
 730                 m->m_pkthdr.len = plen;
 731         }
 732
 733         DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
 734         return (m);
 735
 736  drop_fragment:
 737         /* Oops - fail safe - drop packet */
 738 #ifdef __FreeBSD__
 739         pool_put(&V_pf_frent_pl, frent);
 740         V_pf_nfrents--;
 741 #else
 742         pool_put(&pf_frent_pl, frent);
 743         pf_nfrents--;
 744 #endif
 745         m_freem(m);
 746         return (NULL);
 747 }
 748
 749 struct mbuf *
 750 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
 751     int drop, int *nomem)
 752 {
 753         struct mbuf             *m = *m0;
 754         struct pf_frcache       *frp, *fra, *cur = NULL;
 755         int                      ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
 756         u_int16_t                off = ntohs(h->ip_off) << 3;
 757         u_int16_t                max = ip_len + off;
 758         int                      hosed = 0;
 759
 760 #ifdef __FreeBSD__
 761         KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)),
 762             ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
 763 #else
 764         KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
 765 #endif
 766
 767         /* Create a new range queue for this packet */
 768         if (*frag == NULL) {
 769 #ifdef __FreeBSD__
 770                 *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT);
 771 #else
 772                 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
 773 #endif
 774                 if (*frag == NULL) {
 775                         pf_flush_fragments();
 776 #ifdef __FreeBSD__
 777                         *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT);
 778 #else
 779                         *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
 780 #endif
 781                         if (*frag == NULL)
 782                                 goto no_mem;
 783                 }
 784
 785                 /* Get an entry for the queue */
 786 #ifdef __FreeBSD__
 787                 cur = pool_get(&V_pf_cent_pl, PR_NOWAIT);
 788                 if (cur == NULL) {
 789                         pool_put(&V_pf_cache_pl, *frag);
 790 #else
 791                 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
 792                 if (cur == NULL) {
 793                         pool_put(&pf_cache_pl, *frag);
 794 #endif
 795                         *frag = NULL;
 796                         goto no_mem;
 797                 }
 798 #ifdef __FreeBSD__
 799                 V_pf_ncache++;
 800 #else
 801                 pf_ncache++;
 802 #endif
 803
 804                 (*frag)->fr_flags = PFFRAG_NOBUFFER;
 805                 (*frag)->fr_max = 0;
 806                 (*frag)->fr_src = h->ip_src;
 807                 (*frag)->fr_dst = h->ip_dst;
 808                 (*frag)->fr_p = h->ip_p;
 809                 (*frag)->fr_id = h->ip_id;
 810                 (*frag)->fr_timeout = time_second;
 811
 812                 cur->fr_off = off;
 813                 cur->fr_end = max;
 814                 LIST_INIT(&(*frag)->fr_cache);
 815                 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
 816
 817 #ifdef __FreeBSD__
 818                 RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag);
 819                 TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next);
 820 #else
 821                 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
 822                 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
 823 #endif
 824
 825                 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
 826
 827                 goto pass;
 828         }
 829
 830         /*
 831          * Find a fragment after the current one:
 832          *  - off contains the real shifted offset.
 833          */
 834         frp = NULL;
 835         LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
 836                 if (fra->fr_off > off)
 837                         break;
 838                 frp = fra;
 839         }
 840
 841 #ifdef __FreeBSD__
 842         KASSERT((frp != NULL || fra != NULL),
 843             ("!(frp != NULL || fra != NULL): %s", __FUNCTION__));
 844 #else
 845         KASSERT(frp != NULL || fra != NULL);
 846 #endif
 847
 848         if (frp != NULL) {
 849                 int     precut;
 850
 851                 precut = frp->fr_end - off;
 852                 if (precut >= ip_len) {
 853                         /* Fragment is entirely a duplicate */
 854                         DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
 855                             h->ip_id, frp->fr_off, frp->fr_end, off, max));
 856                         goto drop_fragment;
 857                 }
 858                 if (precut == 0) {
 859                         /* They are adjacent.  Fixup cache entry */
 860                         DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
 861                             h->ip_id, frp->fr_off, frp->fr_end, off, max));
 862                         frp->fr_end = max;
 863                 } else if (precut > 0) {
 864                         /* The first part of this payload overlaps with a
 865                          * fragment that has already been passed.
 866                          * Need to trim off the first part of the payload.
 867                          * But to do so easily, we need to create another
 868                          * mbuf to throw the original header into.
 869                          */
 870
 871                         DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
 872                             h->ip_id, precut, frp->fr_off, frp->fr_end, off,
 873                             max));
 874
 875                         off += precut;
 876                         max -= precut;
 877                         /* Update the previous frag to encompass this one */
 878                         frp->fr_end = max;
 879
 880                         if (!drop) {
 881                                 /* XXX Optimization opportunity
 882                                  * This is a very heavy way to trim the payload.
 883                                  * we could do it much faster by diddling mbuf
 884                                  * internals but that would be even less legible
 885                                  * than this mbuf magic.  For my next trick,
 886                                  * I'll pull a rabbit out of my laptop.
 887                                  */
 888 #ifdef __FreeBSD__
 889                                 *m0 = m_dup(m, M_DONTWAIT);
 890 #else
 891                                 *m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT);
 892 #endif
 893                                 if (*m0 == NULL)
 894                                         goto no_mem;
 895 #ifdef __FreeBSD__
 896                                 /* From KAME Project : We have missed this! */
 897                                 m_adj(*m0, (h->ip_hl << 2) -
 898                                     (*m0)->m_pkthdr.len);
 899
 900                                 KASSERT(((*m0)->m_next == NULL),
 901                                     ("(*m0)->m_next != NULL: %s",
 902                                     __FUNCTION__));
 903 #else
 904                                 KASSERT((*m0)->m_next == NULL);
 905 #endif
 906                                 m_adj(m, precut + (h->ip_hl << 2));
 907                                 m_cat(*m0, m);
 908                                 m = *m0;
 909                                 if (m->m_flags & M_PKTHDR) {
 910                                         int plen = 0;
 911                                         struct mbuf *t;
 912                                         for (t = m; t; t = t->m_next)
 913                                                 plen += t->m_len;
 914                                         m->m_pkthdr.len = plen;
 915                                 }
 916
 917
 918                                 h = mtod(m, struct ip *);
 919
 920 #ifdef __FreeBSD__
 921                                 KASSERT(((int)m->m_len ==
 922                                     ntohs(h->ip_len) - precut),
 923                                     ("m->m_len != ntohs(h->ip_len) - precut: %s",
 924                                     __FUNCTION__));
 925 #else
 926                                 KASSERT((int)m->m_len ==
 927                                     ntohs(h->ip_len) - precut);
 928 #endif
 929                                 h->ip_off = htons(ntohs(h->ip_off) +
 930                                     (precut >> 3));
 931                                 h->ip_len = htons(ntohs(h->ip_len) - precut);
 932                         } else {
 933                                 hosed++;
 934                         }
 935                 } else {
 936                         /* There is a gap between fragments */
 937
 938                         DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
 939                             h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
 940                             max));
 941
 942 #ifdef __FreeBSD__
 943                         cur = pool_get(&V_pf_cent_pl, PR_NOWAIT);
 944 #else
 945                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
 946 #endif
 947                         if (cur == NULL)
 948                                 goto no_mem;
 949 #ifdef __FreeBSD__
 950                         V_pf_ncache++;
 951 #else
 952                         pf_ncache++;
 953 #endif
 954
 955                         cur->fr_off = off;
 956                         cur->fr_end = max;
 957                         LIST_INSERT_AFTER(frp, cur, fr_next);
 958                 }
 959         }
 960
 961         if (fra != NULL) {
 962                 int     aftercut;
 963                 int     merge = 0;
 964
 965                 aftercut = max - fra->fr_off;
 966                 if (aftercut == 0) {
 967                         /* Adjacent fragments */
 968                         DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
 969                             h->ip_id, off, max, fra->fr_off, fra->fr_end));
 970                         fra->fr_off = off;
 971                         merge = 1;
 972                 } else if (aftercut > 0) {
 973                         /* Need to chop off the tail of this fragment */
 974                         DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
 975                             h->ip_id, aftercut, off, max, fra->fr_off,
 976                             fra->fr_end));
 977                         fra->fr_off = off;
 978                         max -= aftercut;
 979
 980                         merge = 1;
 981
 982                         if (!drop) {
 983                                 m_adj(m, -aftercut);
 984                                 if (m->m_flags & M_PKTHDR) {
 985                                         int plen = 0;
 986                                         struct mbuf *t;
 987                                         for (t = m; t; t = t->m_next)
 988                                                 plen += t->m_len;
 989                                         m->m_pkthdr.len = plen;
 990                                 }
 991                                 h = mtod(m, struct ip *);
 992 #ifdef __FreeBSD__
 993                                 KASSERT(((int)m->m_len == ntohs(h->ip_len) - aftercut),
 994                                     ("m->m_len != ntohs(h->ip_len) - aftercut: %s",
 995                                     __FUNCTION__));
 996 #else
 997                                 KASSERT((int)m->m_len ==
 998                                     ntohs(h->ip_len) - aftercut);
 999 #endif
1000                                 h->ip_len = htons(ntohs(h->ip_len) - aftercut);
1001                         } else {
1002                                 hosed++;
1003                         }
1004                 } else if (frp == NULL) {
1005                         /* There is a gap between fragments */
1006                         DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
1007                             h->ip_id, -aftercut, off, max, fra->fr_off,
1008                             fra->fr_end));
1009
1010 #ifdef __FreeBSD__
1011                         cur = pool_get(&V_pf_cent_pl, PR_NOWAIT);
1012 #else
1013                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1014 #endif
1015                         if (cur == NULL)
1016                                 goto no_mem;
1017 #ifdef __FreeBSD__
1018                         V_pf_ncache++;
1019 #else
1020                         pf_ncache++;
1021 #endif
1022
1023                         cur->fr_off = off;
1024                         cur->fr_end = max;
1025                         LIST_INSERT_BEFORE(fra, cur, fr_next);
1026                 }
1027
1028
1029                 /* Need to glue together two separate fragment descriptors */
1030                 if (merge) {
1031                         if (cur && fra->fr_off <= cur->fr_end) {
1032                                 /* Need to merge in a previous 'cur' */
1033                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
1034                                     "%d-%d) %d-%d (%d-%d)\n",
1035                                     h->ip_id, cur->fr_off, cur->fr_end, off,
1036                                     max, fra->fr_off, fra->fr_end));
1037                                 fra->fr_off = cur->fr_off;
1038                                 LIST_REMOVE(cur, fr_next);
1039 #ifdef __FreeBSD__
1040                                 pool_put(&V_pf_cent_pl, cur);
1041                                 V_pf_ncache--;
1042 #else
1043                                 pool_put(&pf_cent_pl, cur);
1044                                 pf_ncache--;
1045 #endif
1046                                 cur = NULL;
1047
1048                         } else if (frp && fra->fr_off <= frp->fr_end) {
1049                                 /* Need to merge in a modified 'frp' */
1050 #ifdef __FreeBSD__
1051                                 KASSERT((cur == NULL), ("cur != NULL: %s",
1052                                     __FUNCTION__));
1053 #else
1054                                 KASSERT(cur == NULL);
1055 #endif
1056                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
1057                                     "%d-%d) %d-%d (%d-%d)\n",
1058                                     h->ip_id, frp->fr_off, frp->fr_end, off,
1059                                     max, fra->fr_off, fra->fr_end));
1060                                 fra->fr_off = frp->fr_off;
1061                                 LIST_REMOVE(frp, fr_next);
1062 #ifdef __FreeBSD__
1063                                 pool_put(&V_pf_cent_pl, frp);
1064                                 V_pf_ncache--;
1065 #else
1066                                 pool_put(&pf_cent_pl, frp);
1067                                 pf_ncache--;
1068 #endif
1069                                 frp = NULL;
1070
1071                         }
1072                 }
1073         }
1074
1075         if (hosed) {
1076                 /*
1077                  * We must keep tracking the overall fragment even when
1078                  * we're going to drop it anyway so that we know when to
1079                  * free the overall descriptor.  Thus we drop the frag late.
1080                  */
1081                 goto drop_fragment;
1082         }
1083
1084
1085  pass:
1086         /* Update maximum data size */
1087         if ((*frag)->fr_max < max)
1088                 (*frag)->fr_max = max;
1089
1090         /* This is the last segment */
1091         if (!mff)
1092                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1093
1094         /* Check if we are completely reassembled */
1095         if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1096             LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
1097             LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
1098                 /* Remove from fragment queue */
1099                 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
1100                     (*frag)->fr_max));
1101                 pf_free_fragment(*frag);
1102                 *frag = NULL;
1103         }
1104
1105         return (m);
1106
1107  no_mem:
1108         *nomem = 1;
1109
1110         /* Still need to pay attention to !IP_MF */
1111         if (!mff && *frag != NULL)
1112                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1113
1114         m_freem(m);
1115         return (NULL);
1116
1117  drop_fragment:
1118
1119         /* Still need to pay attention to !IP_MF */
1120         if (!mff && *frag != NULL)
1121                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1122
1123         if (drop) {
1124                 /* This fragment has been deemed bad.  Don't reass */
1125                 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
1126                         DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
1127                             h->ip_id));
1128                 (*frag)->fr_flags |= PFFRAG_DROP;
1129         }
1130
1131         m_freem(m);
1132         return (NULL);
1133 }
1134
1135 #ifdef INET
1136 int
1137 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
1138     struct pf_pdesc *pd)
1139 {
1140         struct mbuf             *m = *m0;
1141         struct pf_rule          *r;
1142         struct pf_frent         *frent;
1143         struct pf_fragment      *frag = NULL;
1144         struct ip               *h = mtod(m, struct ip *);
1145         int                      mff = (ntohs(h->ip_off) & IP_MF);
1146         int                      hlen = h->ip_hl << 2;
1147         u_int16_t                fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1148         u_int16_t                max;
1149         int                      ip_len;
1150         int                      ip_off;
1151         int                      tag = -1;
1152
1153         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1154         while (r != NULL) {
1155                 r->evaluations++;
1156                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1157                         r = r->skip[PF_SKIP_IFP].ptr;
1158                 else if (r->direction && r->direction != dir)
1159                         r = r->skip[PF_SKIP_DIR].ptr;
1160                 else if (r->af && r->af != AF_INET)
1161                         r = r->skip[PF_SKIP_AF].ptr;
1162                 else if (r->proto && r->proto != h->ip_p)
1163                         r = r->skip[PF_SKIP_PROTO].ptr;
1164                 else if (PF_MISMATCHAW(&r->src.addr,
1165                     (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
1166                     r->src.neg, kif, M_GETFIB(m)))
1167                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1168                 else if (PF_MISMATCHAW(&r->dst.addr,
1169                     (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
1170                     r->dst.neg, NULL, M_GETFIB(m)))
1171                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1172 #ifdef __FreeBSD__
1173                 else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag))
1174 #else
1175                 else if (r->match_tag && !pf_match_tag(m, r, &tag))
1176 #endif
1177                         r = TAILQ_NEXT(r, entries);
1178                 else
1179                         break;
1180         }
1181
1182         if (r == NULL || r->action == PF_NOSCRUB)
1183                 return (PF_PASS);
1184         else {
1185                 r->packets[dir == PF_OUT]++;
1186                 r->bytes[dir == PF_OUT] += pd->tot_len;
1187         }
1188
1189         /* Check for illegal packets */
1190         if (hlen < (int)sizeof(struct ip))
1191                 goto drop;
1192
1193         if (hlen > ntohs(h->ip_len))
1194                 goto drop;
1195
1196         /* Clear IP_DF if the rule uses the no-df option */
1197         if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
1198                 u_int16_t ip_off = h->ip_off;
1199
1200                 h->ip_off &= htons(~IP_DF);
1201                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
1202         }
1203
1204         /* We will need other tests here */
1205         if (!fragoff && !mff)
1206                 goto no_fragment;
1207
1208         /* We're dealing with a fragment now. Don't allow fragments
1209          * with IP_DF to enter the cache. If the flag was cleared by
1210          * no-df above, fine. Otherwise drop it.
1211          */
1212         if (h->ip_off & htons(IP_DF)) {
1213                 DPFPRINTF(("IP_DF\n"));
1214                 goto bad;
1215         }
1216
1217         ip_len = ntohs(h->ip_len) - hlen;
1218         ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1219
1220         /* All fragments are 8 byte aligned */
1221         if (mff && (ip_len & 0x7)) {
1222                 DPFPRINTF(("mff and %d\n", ip_len));
1223                 goto bad;
1224         }
1225
1226         /* Respect maximum length */
1227         if (fragoff + ip_len > IP_MAXPACKET) {
1228                 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
1229                 goto bad;
1230         }
1231         max = fragoff + ip_len;
1232
1233         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
1234                 /* Fully buffer all of the fragments */
1235
1236 #ifdef __FreeBSD__
1237                 frag = pf_find_fragment(h, &V_pf_frag_tree);
1238 #else
1239                 frag = pf_find_fragment(h, &pf_frag_tree);
1240 #endif
1241
1242                 /* Check if we saw the last fragment already */
1243                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1244                     max > frag->fr_max)
1245                         goto bad;
1246
1247                 /* Get an entry for the fragment queue */
1248 #ifdef __FreeBSD__
1249                 frent = pool_get(&V_pf_frent_pl, PR_NOWAIT);
1250 #else
1251                 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
1252 #endif
1253                 if (frent == NULL) {
1254                         REASON_SET(reason, PFRES_MEMORY);
1255                         return (PF_DROP);
1256                 }
1257 #ifdef __FreeBSD__
1258                 V_pf_nfrents++;
1259 #else
1260                 pf_nfrents++;
1261 #endif
1262                 frent->fr_ip = h;
1263                 frent->fr_m = m;
1264
1265                 /* Might return a completely reassembled mbuf, or NULL */
1266                 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
1267                 *m0 = m = pf_reassemble(m0, &frag, frent, mff);
1268
1269                 if (m == NULL)
1270                         return (PF_DROP);
1271
1272                 /* use mtag from concatenated mbuf chain */
1273                 pd->pf_mtag = pf_find_mtag(m);
1274 #ifdef DIAGNOSTIC
1275                 if (pd->pf_mtag == NULL) {
1276                         printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
1277                         if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1278                                 m_freem(m);
1279                                 *m0 = NULL;
1280                                 goto no_mem;
1281                         }
1282                 }
1283 #endif
1284                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1285                         goto drop;
1286
1287                 h = mtod(m, struct ip *);
1288         } else {
1289                 /* non-buffering fragment cache (drops or masks overlaps) */
1290                 int     nomem = 0;
1291
1292 #ifdef __FreeBSD__
1293                 if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) {
1294 #else
1295                 if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) {
1296 #endif
1297                         /*
1298                          * Already passed the fragment cache in the
1299                          * input direction.  If we continued, it would
1300                          * appear to be a dup and would be dropped.
1301                          */
1302                         goto fragment_pass;
1303                 }
1304
1305 #ifdef __FreeBSD__
1306                 frag = pf_find_fragment(h, &V_pf_cache_tree);
1307 #else
1308                 frag = pf_find_fragment(h, &pf_cache_tree);
1309 #endif
1310
1311                 /* Check if we saw the last fragment already */
1312                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1313                     max > frag->fr_max) {
1314                         if (r->rule_flag & PFRULE_FRAGDROP)
1315                                 frag->fr_flags |= PFFRAG_DROP;
1316                         goto bad;
1317                 }
1318
1319                 *m0 = m = pf_fragcache(m0, h, &frag, mff,
1320                     (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
1321                 if (m == NULL) {
1322                         if (nomem)
1323                                 goto no_mem;
1324                         goto drop;
1325                 }
1326
1327                 /* use mtag from copied and trimmed mbuf chain */
1328                 pd->pf_mtag = pf_find_mtag(m);
1329 #ifdef DIAGNOSTIC
1330                 if (pd->pf_mtag == NULL) {
1331                         printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
1332                         if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1333                                 m_freem(m);
1334                                 *m0 = NULL;
1335                                 goto no_mem;
1336                         }
1337                 }
1338 #endif
1339                 if (dir == PF_IN)
1340 #ifdef __FreeBSD__
1341                         pd->pf_mtag->flags |= PF_TAG_FRAGCACHE;
1342 #else
1343                         m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE;
1344 #endif
1345
1346                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1347                         goto drop;
1348                 goto fragment_pass;
1349         }
1350
1351  no_fragment:
1352         /* At this point, only IP_DF is allowed in ip_off */
1353         if (h->ip_off & ~htons(IP_DF)) {
1354                 u_int16_t ip_off = h->ip_off;
1355
1356                 h->ip_off &= htons(IP_DF);
1357                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
1358         }
1359
1360         /* not missing a return here */
1361
1362  fragment_pass:
1363         pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos);
1364
1365         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1366                 pd->flags |= PFDESC_IP_REAS;
1367         return (PF_PASS);
1368
1369  no_mem:
1370         REASON_SET(reason, PFRES_MEMORY);
1371         if (r != NULL && r->log)
1372                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1373         return (PF_DROP);
1374
1375  drop:
1376         REASON_SET(reason, PFRES_NORM);
1377         if (r != NULL && r->log)
1378                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1379         return (PF_DROP);
1380
1381  bad:
1382         DPFPRINTF(("dropping bad fragment\n"));
1383
1384         /* Free associated fragments */
1385         if (frag != NULL)
1386                 pf_free_fragment(frag);
1387
1388         REASON_SET(reason, PFRES_FRAG);
1389         if (r != NULL && r->log)
1390                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1391
1392         return (PF_DROP);
1393 }
1394 #endif
1395
1396 #ifdef INET6
1397 int
1398 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1399     u_short *reason, struct pf_pdesc *pd)
1400 {
1401         struct mbuf             *m = *m0;
1402         struct pf_rule          *r;
1403         struct ip6_hdr          *h = mtod(m, struct ip6_hdr *);
1404         int                      off;
1405         struct ip6_ext           ext;
1406         struct ip6_opt           opt;
1407         struct ip6_opt_jumbo     jumbo;
1408         struct ip6_frag          frag;
1409         u_int32_t                jumbolen = 0, plen;
1410         u_int16_t                fragoff = 0;
1411         int                      optend;
1412         int                      ooff;
1413         u_int8_t                 proto;
1414         int                      terminal;
1415
1416         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1417         while (r != NULL) {
1418                 r->evaluations++;
1419                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1420                         r = r->skip[PF_SKIP_IFP].ptr;
1421                 else if (r->direction && r->direction != dir)
1422                         r = r->skip[PF_SKIP_DIR].ptr;
1423                 else if (r->af && r->af != AF_INET6)
1424                         r = r->skip[PF_SKIP_AF].ptr;
1425 #if 0 /* header chain! */
1426                 else if (r->proto && r->proto != h->ip6_nxt)
1427                         r = r->skip[PF_SKIP_PROTO].ptr;
1428 #endif
1429                 else if (PF_MISMATCHAW(&r->src.addr,
1430                     (struct pf_addr *)&h->ip6_src, AF_INET6,
1431                     r->src.neg, kif, M_GETFIB(m)))
1432                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1433                 else if (PF_MISMATCHAW(&r->dst.addr,
1434                     (struct pf_addr *)&h->ip6_dst, AF_INET6,
1435                     r->dst.neg, NULL, M_GETFIB(m)))
1436                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1437                 else
1438                         break;
1439         }
1440
1441         if (r == NULL || r->action == PF_NOSCRUB)
1442                 return (PF_PASS);
1443         else {
1444                 r->packets[dir == PF_OUT]++;
1445                 r->bytes[dir == PF_OUT] += pd->tot_len;
1446         }
1447
1448         /* Check for illegal packets */
1449         if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
1450                 goto drop;
1451
1452         off = sizeof(struct ip6_hdr);
1453         proto = h->ip6_nxt;
1454         terminal = 0;
1455         do {
1456                 switch (proto) {
1457                 case IPPROTO_FRAGMENT:
1458                         goto fragment;
1459                         break;
1460                 case IPPROTO_AH:
1461                 case IPPROTO_ROUTING:
1462                 case IPPROTO_DSTOPTS:
1463                         if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1464                             NULL, AF_INET6))
1465                                 goto shortpkt;
1466                         if (proto == IPPROTO_AH)
1467                                 off += (ext.ip6e_len + 2) * 4;
1468                         else
1469                                 off += (ext.ip6e_len + 1) * 8;
1470                         proto = ext.ip6e_nxt;
1471                         break;
1472                 case IPPROTO_HOPOPTS:
1473                         if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1474                             NULL, AF_INET6))
1475                                 goto shortpkt;
1476                         optend = off + (ext.ip6e_len + 1) * 8;
1477                         ooff = off + sizeof(ext);
1478                         do {
1479                                 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1480                                     sizeof(opt.ip6o_type), NULL, NULL,
1481                                     AF_INET6))
1482                                         goto shortpkt;
1483                                 if (opt.ip6o_type == IP6OPT_PAD1) {
1484                                         ooff++;
1485                                         continue;
1486                                 }
1487                                 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
1488                                     NULL, NULL, AF_INET6))
1489                                         goto shortpkt;
1490                                 if (ooff + sizeof(opt) + opt.ip6o_len > optend)
1491                                         goto drop;
1492                                 switch (opt.ip6o_type) {
1493                                 case IP6OPT_JUMBO:
1494                                         if (h->ip6_plen != 0)
1495                                                 goto drop;
1496                                         if (!pf_pull_hdr(m, ooff, &jumbo,
1497                                             sizeof(jumbo), NULL, NULL,
1498                                             AF_INET6))
1499                                                 goto shortpkt;
1500                                         memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1501                                             sizeof(jumbolen));
1502                                         jumbolen = ntohl(jumbolen);
1503                                         if (jumbolen <= IPV6_MAXPACKET)
1504                                                 goto drop;
1505                                         if (sizeof(struct ip6_hdr) + jumbolen !=
1506                                             m->m_pkthdr.len)
1507                                                 goto drop;
1508                                         break;
1509                                 default:
1510                                         break;
1511                                 }
1512                                 ooff += sizeof(opt) + opt.ip6o_len;
1513                         } while (ooff < optend);
1514
1515                         off = optend;
1516                         proto = ext.ip6e_nxt;
1517                         break;
1518                 default:
1519                         terminal = 1;
1520                         break;
1521                 }
1522         } while (!terminal);
1523
1524         /* jumbo payload option must be present, or plen > 0 */
1525         if (ntohs(h->ip6_plen) == 0)
1526                 plen = jumbolen;
1527         else
1528                 plen = ntohs(h->ip6_plen);
1529         if (plen == 0)
1530                 goto drop;
1531         if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
1532                 goto shortpkt;
1533
1534         pf_scrub_ip6(&m, r->min_ttl);
1535
1536         return (PF_PASS);
1537
1538  fragment:
1539         if (ntohs(h->ip6_plen) == 0 || jumbolen)
1540                 goto drop;
1541         plen = ntohs(h->ip6_plen);
1542
1543         if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
1544                 goto shortpkt;
1545         fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1546         if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
1547                 goto badfrag;
1548
1549         /* do something about it */
1550         /* remember to set pd->flags |= PFDESC_IP_REAS */
1551         return (PF_PASS);
1552
1553  shortpkt:
1554         REASON_SET(reason, PFRES_SHORT);
1555         if (r != NULL && r->log)
1556                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1557         return (PF_DROP);
1558
1559  drop:
1560         REASON_SET(reason, PFRES_NORM);
1561         if (r != NULL && r->log)
1562                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1563         return (PF_DROP);
1564
1565  badfrag:
1566         REASON_SET(reason, PFRES_FRAG);
1567         if (r != NULL && r->log)
1568                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1569         return (PF_DROP);
1570 }
1571 #endif /* INET6 */
1572
1573 int
1574 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
1575     int off, void *h, struct pf_pdesc *pd)
1576 {
1577         struct pf_rule  *r, *rm = NULL;
1578         struct tcphdr   *th = pd->hdr.tcp;
1579         int              rewrite = 0;
1580         u_short          reason;
1581         u_int8_t         flags;
1582         sa_family_t      af = pd->af;
1583
1584         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1585         while (r != NULL) {
1586                 r->evaluations++;
1587                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1588                         r = r->skip[PF_SKIP_IFP].ptr;
1589                 else if (r->direction && r->direction != dir)
1590                         r = r->skip[PF_SKIP_DIR].ptr;
1591                 else if (r->af && r->af != af)
1592                         r = r->skip[PF_SKIP_AF].ptr;
1593                 else if (r->proto && r->proto != pd->proto)
1594                         r = r->skip[PF_SKIP_PROTO].ptr;
1595                 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
1596                     r->src.neg, kif, M_GETFIB(m)))
1597                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1598                 else if (r->src.port_op && !pf_match_port(r->src.port_op,
1599                             r->src.port[0], r->src.port[1], th->th_sport))
1600                         r = r->skip[PF_SKIP_SRC_PORT].ptr;
1601                 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
1602                     r->dst.neg, NULL, M_GETFIB(m)))
1603                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1604                 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1605                             r->dst.port[0], r->dst.port[1], th->th_dport))
1606                         r = r->skip[PF_SKIP_DST_PORT].ptr;
1607                 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
1608                             pf_osfp_fingerprint(pd, m, off, th),
1609                             r->os_fingerprint))
1610                         r = TAILQ_NEXT(r, entries);
1611                 else {
1612                         rm = r;
1613                         break;
1614                 }
1615         }
1616
1617         if (rm == NULL || rm->action == PF_NOSCRUB)
1618                 return (PF_PASS);
1619         else {
1620                 r->packets[dir == PF_OUT]++;
1621                 r->bytes[dir == PF_OUT] += pd->tot_len;
1622         }
1623
1624         if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
1625                 pd->flags |= PFDESC_TCP_NORM;
1626
1627         flags = th->th_flags;
1628         if (flags & TH_SYN) {
1629                 /* Illegal packet */
1630                 if (flags & TH_RST)
1631                         goto tcp_drop;
1632
1633                 if (flags & TH_FIN)
1634                         flags &= ~TH_FIN;
1635         } else {
1636                 /* Illegal packet */
1637                 if (!(flags & (TH_ACK|TH_RST)))
1638                         goto tcp_drop;
1639         }
1640
1641         if (!(flags & TH_ACK)) {
1642                 /* These flags are only valid if ACK is set */
1643                 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
1644                         goto tcp_drop;
1645         }
1646
1647         /* Check for illegal header length */
1648         if (th->th_off < (sizeof(struct tcphdr) >> 2))
1649                 goto tcp_drop;
1650
1651         /* If flags changed, or reserved data set, then adjust */
1652         if (flags != th->th_flags || th->th_x2 != 0) {
1653                 u_int16_t       ov, nv;
1654
1655                 ov = *(u_int16_t *)(&th->th_ack + 1);
1656                 th->th_flags = flags;
1657                 th->th_x2 = 0;
1658                 nv = *(u_int16_t *)(&th->th_ack + 1);
1659
1660                 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
1661                 rewrite = 1;
1662         }
1663
1664         /* Remove urgent pointer, if TH_URG is not set */
1665         if (!(flags & TH_URG) && th->th_urp) {
1666                 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
1667                 th->th_urp = 0;
1668                 rewrite = 1;
1669         }
1670
1671         /* Process options */
1672         if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af))
1673                 rewrite = 1;
1674
1675         /* copy back packet headers if we sanitized */
1676         if (rewrite)
1677 #ifdef __FreeBSD__
1678                 m_copyback(m, off, sizeof(*th), (caddr_t)th);
1679 #else
1680                 m_copyback(m, off, sizeof(*th), th);
1681 #endif
1682
1683         return (PF_PASS);
1684
1685  tcp_drop:
1686         REASON_SET(&reason, PFRES_NORM);
1687         if (rm != NULL && r->log)
1688                 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
1689         return (PF_DROP);
1690 }
1691
1692 int
1693 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
1694     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
1695 {
1696         u_int32_t tsval, tsecr;
1697         u_int8_t hdr[60];
1698         u_int8_t *opt;
1699
1700 #ifdef __FreeBSD__
1701         KASSERT((src->scrub == NULL),
1702             ("pf_normalize_tcp_init: src->scrub != NULL"));
1703
1704         src->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT);
1705 #else
1706         KASSERT(src->scrub == NULL);
1707
1708         src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
1709 #endif
1710         if (src->scrub == NULL)
1711                 return (1);
1712         bzero(src->scrub, sizeof(*src->scrub));
1713
1714         switch (pd->af) {
1715 #ifdef INET
1716         case AF_INET: {
1717                 struct ip *h = mtod(m, struct ip *);
1718                 src->scrub->pfss_ttl = h->ip_ttl;
1719                 break;
1720         }
1721 #endif /* INET */
1722 #ifdef INET6
1723         case AF_INET6: {
1724                 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1725                 src->scrub->pfss_ttl = h->ip6_hlim;
1726                 break;
1727         }
1728 #endif /* INET6 */
1729         }
1730
1731
1732         /*
1733          * All normalizations below are only begun if we see the start of
1734          * the connections.  They must all set an enabled bit in pfss_flags
1735          */
1736         if ((th->th_flags & TH_SYN) == 0)
1737                 return (0);
1738
1739
1740         if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
1741             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1742                 /* Diddle with TCP options */
1743                 int hlen;
1744                 opt = hdr + sizeof(struct tcphdr);
1745                 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1746                 while (hlen >= TCPOLEN_TIMESTAMP) {
1747                         switch (*opt) {
1748                         case TCPOPT_EOL:        /* FALLTHROUGH */
1749                         case TCPOPT_NOP:
1750                                 opt++;
1751                                 hlen--;
1752                                 break;
1753                         case TCPOPT_TIMESTAMP:
1754                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1755                                         src->scrub->pfss_flags |=
1756                                             PFSS_TIMESTAMP;
1757                                         src->scrub->pfss_ts_mod =
1758                                             htonl(arc4random());
1759
1760                                         /* note PFSS_PAWS not set yet */
1761                                         memcpy(&tsval, &opt[2],
1762                                             sizeof(u_int32_t));
1763                                         memcpy(&tsecr, &opt[6],
1764                                             sizeof(u_int32_t));
1765                                         src->scrub->pfss_tsval0 = ntohl(tsval);
1766                                         src->scrub->pfss_tsval = ntohl(tsval);
1767                                         src->scrub->pfss_tsecr = ntohl(tsecr);
1768                                         getmicrouptime(&src->scrub->pfss_last);
1769                                 }
1770                                 /* FALLTHROUGH */
1771                         default:
1772                                 hlen -= MAX(opt[1], 2);
1773                                 opt += MAX(opt[1], 2);
1774                                 break;
1775                         }
1776                 }
1777         }
1778
1779         return (0);
1780 }
1781
1782 void
1783 pf_normalize_tcp_cleanup(struct pf_state *state)
1784 {
1785 #ifdef __FreeBSD__
1786         if (state->src.scrub)
1787                 pool_put(&V_pf_state_scrub_pl, state->src.scrub);
1788         if (state->dst.scrub)
1789                 pool_put(&V_pf_state_scrub_pl, state->dst.scrub);
1790 #else
1791         if (state->src.scrub)
1792                 pool_put(&pf_state_scrub_pl, state->src.scrub);
1793         if (state->dst.scrub)
1794                 pool_put(&pf_state_scrub_pl, state->dst.scrub);
1795 #endif
1796
1797         /* Someday... flush the TCP segment reassembly descriptors. */
1798 }
1799
1800 int
1801 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
1802     u_short *reason, struct tcphdr *th, struct pf_state *state,
1803     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
1804 {
1805         struct timeval uptime;
1806         u_int32_t tsval, tsecr;
1807         u_int tsval_from_last;
1808         u_int8_t hdr[60];
1809         u_int8_t *opt;
1810         int copyback = 0;
1811         int got_ts = 0;
1812
1813 #ifdef __FreeBSD__
1814         KASSERT((src->scrub || dst->scrub),
1815             ("pf_normalize_tcp_statefull: src->scrub && dst->scrub!"));
1816 #else
1817         KASSERT(src->scrub || dst->scrub);
1818 #endif
1819
1820         /*
1821          * Enforce the minimum TTL seen for this connection.  Negate a common
1822          * technique to evade an intrusion detection system and confuse
1823          * firewall state code.
1824          */
1825         switch (pd->af) {
1826 #ifdef INET
1827         case AF_INET: {
1828                 if (src->scrub) {
1829                         struct ip *h = mtod(m, struct ip *);
1830                         if (h->ip_ttl > src->scrub->pfss_ttl)
1831                                 src->scrub->pfss_ttl = h->ip_ttl;
1832                         h->ip_ttl = src->scrub->pfss_ttl;
1833                 }
1834                 break;
1835         }
1836 #endif /* INET */
1837 #ifdef INET6
1838         case AF_INET6: {
1839                 if (src->scrub) {
1840                         struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1841                         if (h->ip6_hlim > src->scrub->pfss_ttl)
1842                                 src->scrub->pfss_ttl = h->ip6_hlim;
1843                         h->ip6_hlim = src->scrub->pfss_ttl;
1844                 }
1845                 break;
1846         }
1847 #endif /* INET6 */
1848         }
1849
1850         if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
1851             ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
1852             (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
1853             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1854                 /* Diddle with TCP options */
1855                 int hlen;
1856                 opt = hdr + sizeof(struct tcphdr);
1857                 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1858                 while (hlen >= TCPOLEN_TIMESTAMP) {
1859                         switch (*opt) {
1860                         case TCPOPT_EOL:        /* FALLTHROUGH */
1861                         case TCPOPT_NOP:
1862                                 opt++;
1863                                 hlen--;
1864                                 break;
1865                         case TCPOPT_TIMESTAMP:
1866                                 /* Modulate the timestamps.  Can be used for
1867                                  * NAT detection, OS uptime determination or
1868                                  * reboot detection.
1869                                  */
1870
1871                                 if (got_ts) {
1872                                         /* Huh?  Multiple timestamps!? */
1873 #ifdef __FreeBSD__
1874                                         if (V_pf_status.debug >= PF_DEBUG_MISC) {
1875 #else
1876                                         if (pf_status.debug >= PF_DEBUG_MISC) {
1877 #endif
1878                                                 DPFPRINTF(("multiple TS??"));
1879                                                 pf_print_state(state);
1880                                                 printf("\n");
1881                                         }
1882                                         REASON_SET(reason, PFRES_TS);
1883                                         return (PF_DROP);
1884                                 }
1885                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1886                                         memcpy(&tsval, &opt[2],
1887                                             sizeof(u_int32_t));
1888                                         if (tsval && src->scrub &&
1889                                             (src->scrub->pfss_flags &
1890                                             PFSS_TIMESTAMP)) {
1891                                                 tsval = ntohl(tsval);
1892                                                 pf_change_a(&opt[2],
1893                                                     &th->th_sum,
1894                                                     htonl(tsval +
1895                                                     src->scrub->pfss_ts_mod),
1896                                                     0);
1897                                                 copyback = 1;
1898                                         }
1899
1900                                         /* Modulate TS reply iff valid (!0) */
1901                                         memcpy(&tsecr, &opt[6],
1902                                             sizeof(u_int32_t));
1903                                         if (tsecr && dst->scrub &&
1904                                             (dst->scrub->pfss_flags &
1905                                             PFSS_TIMESTAMP)) {
1906                                                 tsecr = ntohl(tsecr)
1907                                                     - dst->scrub->pfss_ts_mod;
1908                                                 pf_change_a(&opt[6],
1909                                                     &th->th_sum, htonl(tsecr),
1910                                                     0);
1911                                                 copyback = 1;
1912                                         }
1913                                         got_ts = 1;
1914                                 }
1915                                 /* FALLTHROUGH */
1916                         default:
1917                                 hlen -= MAX(opt[1], 2);
1918                                 opt += MAX(opt[1], 2);
1919                                 break;
1920                         }
1921                 }
1922                 if (copyback) {
1923                         /* Copyback the options, caller copys back header */
1924                         *writeback = 1;
1925                         m_copyback(m, off + sizeof(struct tcphdr),
1926                             (th->th_off << 2) - sizeof(struct tcphdr), hdr +
1927                             sizeof(struct tcphdr));
1928                 }
1929         }
1930
1931
1932         /*
1933          * Must invalidate PAWS checks on connections idle for too long.
1934          * The fastest allowed timestamp clock is 1ms.  That turns out to
1935          * be about 24 days before it wraps.  XXX Right now our lowerbound
1936          * TS echo check only works for the first 12 days of a connection
1937          * when the TS has exhausted half its 32bit space
1938          */
1939 #define TS_MAX_IDLE     (24*24*60*60)
1940 #define TS_MAX_CONN     (12*24*60*60)   /* XXX remove when better tsecr check */
1941
1942         getmicrouptime(&uptime);
1943         if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
1944             (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
1945             time_second - state->creation > TS_MAX_CONN))  {
1946 #ifdef __FreeBSD__
1947                 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1948 #else
1949                 if (pf_status.debug >= PF_DEBUG_MISC) {
1950 #endif
1951                         DPFPRINTF(("src idled out of PAWS\n"));
1952                         pf_print_state(state);
1953                         printf("\n");
1954                 }
1955                 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
1956                     | PFSS_PAWS_IDLED;
1957         }
1958         if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
1959             uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
1960 #ifdef __FreeBSD__
1961                 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1962 #else
1963                 if (pf_status.debug >= PF_DEBUG_MISC) {
1964 #endif
1965                         DPFPRINTF(("dst idled out of PAWS\n"));
1966                         pf_print_state(state);
1967                         printf("\n");
1968                 }
1969                 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
1970                     | PFSS_PAWS_IDLED;
1971         }
1972
1973         if (got_ts && src->scrub && dst->scrub &&
1974             (src->scrub->pfss_flags & PFSS_PAWS) &&
1975             (dst->scrub->pfss_flags & PFSS_PAWS)) {
1976                 /* Validate that the timestamps are "in-window".
1977                  * RFC1323 describes TCP Timestamp options that allow
1978                  * measurement of RTT (round trip time) and PAWS
1979                  * (protection against wrapped sequence numbers).  PAWS
1980                  * gives us a set of rules for rejecting packets on
1981                  * long fat pipes (packets that were somehow delayed
1982                  * in transit longer than the time it took to send the
1983                  * full TCP sequence space of 4Gb).  We can use these
1984                  * rules and infer a few others that will let us treat
1985                  * the 32bit timestamp and the 32bit echoed timestamp
1986                  * as sequence numbers to prevent a blind attacker from
1987                  * inserting packets into a connection.
1988                  *
1989                  * RFC1323 tells us:
1990                  *  - The timestamp on this packet must be greater than
1991                  *    or equal to the last value echoed by the other
1992                  *    endpoint.  The RFC says those will be discarded
1993                  *    since it is a dup that has already been acked.
1994                  *    This gives us a lowerbound on the timestamp.
1995                  *        timestamp >= other last echoed timestamp
1996                  *  - The timestamp will be less than or equal to
1997                  *    the last timestamp plus the time between the
1998                  *    last packet and now.  The RFC defines the max
1999                  *    clock rate as 1ms.  We will allow clocks to be
2000                  *    up to 10% fast and will allow a total difference
2001                  *    or 30 seconds due to a route change.  And this
2002                  *    gives us an upperbound on the timestamp.
2003                  *        timestamp <= last timestamp + max ticks
2004                  *    We have to be careful here.  Windows will send an
2005                  *    initial timestamp of zero and then initialize it
2006                  *    to a random value after the 3whs; presumably to
2007                  *    avoid a DoS by having to call an expensive RNG
2008                  *    during a SYN flood.  Proof MS has at least one
2009                  *    good security geek.
2010                  *
2011                  *  - The TCP timestamp option must also echo the other
2012                  *    endpoints timestamp.  The timestamp echoed is the
2013                  *    one carried on the earliest unacknowledged segment
2014                  *    on the left edge of the sequence window.  The RFC
2015                  *    states that the host will reject any echoed
2016                  *    timestamps that were larger than any ever sent.
2017                  *    This gives us an upperbound on the TS echo.
2018                  *        tescr <= largest_tsval
2019                  *  - The lowerbound on the TS echo is a little more
2020                  *    tricky to determine.  The other endpoint's echoed
2021                  *    values will not decrease.  But there may be
2022                  *    network conditions that re-order packets and
2023                  *    cause our view of them to decrease.  For now the
2024                  *    only lowerbound we can safely determine is that
2025                  *    the TS echo will never be less than the original
2026                  *    TS.  XXX There is probably a better lowerbound.
2027                  *    Remove TS_MAX_CONN with better lowerbound check.
2028                  *        tescr >= other original TS
2029                  *
2030                  * It is also important to note that the fastest
2031                  * timestamp clock of 1ms will wrap its 32bit space in
2032                  * 24 days.  So we just disable TS checking after 24
2033                  * days of idle time.  We actually must use a 12d
2034                  * connection limit until we can come up with a better
2035                  * lowerbound to the TS echo check.
2036                  */
2037                 struct timeval delta_ts;
2038                 int ts_fudge;
2039
2040
2041                 /*
2042                  * PFTM_TS_DIFF is how many seconds of leeway to allow
2043                  * a host's timestamp.  This can happen if the previous
2044                  * packet got delayed in transit for much longer than
2045                  * this packet.
2046                  */
2047                 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
2048 #ifdef __FreeBSD__
2049                         ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF];
2050 #else
2051                         ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
2052 #endif
2053
2054
2055                 /* Calculate max ticks since the last timestamp */
2056 #define TS_MAXFREQ      1100            /* RFC max TS freq of 1Khz + 10% skew */
2057 #define TS_MICROSECS    1000000         /* microseconds per second */
2058 #ifdef __FreeBSD__
2059 #ifndef timersub
2060 #define timersub(tvp, uvp, vvp)                                         \
2061         do {                                                            \
2062                 (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;          \
2063                 (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;       \
2064                 if ((vvp)->tv_usec < 0) {                               \
2065                         (vvp)->tv_sec--;                                \
2066                         (vvp)->tv_usec += 1000000;                      \
2067                 }                                                       \
2068         } while (0)
2069 #endif
2070 #endif
2071                 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
2072                 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
2073                 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
2074
2075
2076                 if ((src->state >= TCPS_ESTABLISHED &&
2077                     dst->state >= TCPS_ESTABLISHED) &&
2078                     (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
2079                     SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
2080                     (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
2081                     SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
2082                         /* Bad RFC1323 implementation or an insertion attack.
2083                          *
2084                          * - Solaris 2.6 and 2.7 are known to send another ACK
2085                          *   after the FIN,FIN|ACK,ACK closing that carries
2086                          *   an old timestamp.
2087                          */
2088
2089                         DPFPRINTF(("Timestamp failed %c%c%c%c\n",
2090                             SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
2091                             SEQ_GT(tsval, src->scrub->pfss_tsval +
2092                             tsval_from_last) ? '1' : ' ',
2093                             SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
2094                             SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
2095 #ifdef __FreeBSD__
2096                         DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
2097                             "idle: %jus %lums\n",
2098                             tsval, tsecr, tsval_from_last,
2099                             (uintmax_t)delta_ts.tv_sec,
2100                             delta_ts.tv_usec / 1000));
2101                         DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
2102                             src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
2103                         DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u"
2104                             "\n", dst->scrub->pfss_tsval,
2105                             dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
2106 #else
2107                         DPFPRINTF((" tsval: %lu  tsecr: %lu  +ticks: %lu  "
2108                             "idle: %lus %lums\n",
2109                             tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
2110                             delta_ts.tv_usec / 1000));
2111                         DPFPRINTF((" src->tsval: %lu  tsecr: %lu\n",
2112                             src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
2113                         DPFPRINTF((" dst->tsval: %lu  tsecr: %lu  tsval0: %lu"
2114                             "\n", dst->scrub->pfss_tsval,
2115                             dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
2116 #endif
2117 #ifdef __FreeBSD__
2118                         if (V_pf_status.debug >= PF_DEBUG_MISC) {
2119 #else
2120                         if (pf_status.debug >= PF_DEBUG_MISC) {
2121 #endif
2122                                 pf_print_state(state);
2123                                 pf_print_flags(th->th_flags);
2124                                 printf("\n");
2125                         }
2126                         REASON_SET(reason, PFRES_TS);
2127                         return (PF_DROP);
2128                 }
2129
2130                 /* XXX I'd really like to require tsecr but it's optional */
2131
2132         } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
2133             ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
2134             || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
2135             src->scrub && dst->scrub &&
2136             (src->scrub->pfss_flags & PFSS_PAWS) &&
2137             (dst->scrub->pfss_flags & PFSS_PAWS)) {
2138                 /* Didn't send a timestamp.  Timestamps aren't really useful
2139                  * when:
2140                  *  - connection opening or closing (often not even sent).
2141                  *    but we must not let an attacker to put a FIN on a
2142                  *    data packet to sneak it through our ESTABLISHED check.
2143                  *  - on a TCP reset.  RFC suggests not even looking at TS.
2144                  *  - on an empty ACK.  The TS will not be echoed so it will
2145                  *    probably not help keep the RTT calculation in sync and
2146                  *    there isn't as much danger when the sequence numbers
2147                  *    got wrapped.  So some stacks don't include TS on empty
2148                  *    ACKs :-(
2149                  *
2150                  * To minimize the disruption to mostly RFC1323 conformant
2151                  * stacks, we will only require timestamps on data packets.
2152                  *
2153                  * And what do ya know, we cannot require timestamps on data
2154                  * packets.  There appear to be devices that do legitimate
2155                  * TCP connection hijacking.  There are HTTP devices that allow
2156                  * a 3whs (with timestamps) and then buffer the HTTP request.
2157                  * If the intermediate device has the HTTP response cache, it
2158                  * will spoof the response but not bother timestamping its
2159                  * packets.  So we can look for the presence of a timestamp in
2160                  * the first data packet and if there, require it in all future
2161                  * packets.
2162                  */
2163
2164                 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
2165                         /*
2166                          * Hey!  Someone tried to sneak a packet in.  Or the
2167                          * stack changed its RFC1323 behavior?!?!
2168                          */
2169 #ifdef __FreeBSD__
2170                         if (V_pf_status.debug >= PF_DEBUG_MISC) {
2171 #else
2172                         if (pf_status.debug >= PF_DEBUG_MISC) {
2173 #endif
2174                                 DPFPRINTF(("Did not receive expected RFC1323 "
2175                                     "timestamp\n"));
2176                                 pf_print_state(state);
2177                                 pf_print_flags(th->th_flags);
2178                                 printf("\n");
2179                         }
2180                         REASON_SET(reason, PFRES_TS);
2181                         return (PF_DROP);
2182                 }
2183         }
2184
2185
2186         /*
2187          * We will note if a host sends his data packets with or without
2188          * timestamps.  And require all data packets to contain a timestamp
2189          * if the first does.  PAWS implicitly requires that all data packets be
2190          * timestamped.  But I think there are middle-man devices that hijack
2191          * TCP streams immediately after the 3whs and don't timestamp their
2192          * packets (seen in a WWW accelerator or cache).
2193          */
2194         if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
2195             (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
2196                 if (got_ts)
2197                         src->scrub->pfss_flags |= PFSS_DATA_TS;
2198                 else {
2199                         src->scrub->pfss_flags |= PFSS_DATA_NOTS;
2200 #ifdef __FreeBSD__
2201                         if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
2202 #else
2203                         if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
2204 #endif
2205                             (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
2206                                 /* Don't warn if other host rejected RFC1323 */
2207                                 DPFPRINTF(("Broken RFC1323 stack did not "
2208                                     "timestamp data packet. Disabled PAWS "
2209                                     "security.\n"));
2210                                 pf_print_state(state);
2211                                 pf_print_flags(th->th_flags);
2212                                 printf("\n");
2213                         }
2214                 }
2215         }
2216
2217
2218         /*
2219          * Update PAWS values
2220          */
2221         if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
2222             (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
2223                 getmicrouptime(&src->scrub->pfss_last);
2224                 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
2225                     (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2226                         src->scrub->pfss_tsval = tsval;
2227
2228                 if (tsecr) {
2229                         if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
2230                             (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2231                                 src->scrub->pfss_tsecr = tsecr;
2232
2233                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
2234                             (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
2235                             src->scrub->pfss_tsval0 == 0)) {
2236                                 /* tsval0 MUST be the lowest timestamp */
2237                                 src->scrub->pfss_tsval0 = tsval;
2238                         }
2239
2240                         /* Only fully initialized after a TS gets echoed */
2241                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
2242                                 src->scrub->pfss_flags |= PFSS_PAWS;
2243                 }
2244         }
2245
2246         /* I have a dream....  TCP segment reassembly.... */
2247         return (0);
2248 }
2249
2250 int
2251 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
2252     int off, sa_family_t af)
2253 {
2254         u_int16_t       *mss;
2255         int              thoff;
2256         int              opt, cnt, optlen = 0;
2257         int              rewrite = 0;
2258 #ifdef __FreeBSD__
2259         u_char           opts[TCP_MAXOLEN];
2260 #else
2261         u_char           opts[MAX_TCPOPTLEN];
2262 #endif
2263         u_char          *optp = opts;
2264
2265         thoff = th->th_off << 2;
2266         cnt = thoff - sizeof(struct tcphdr);
2267
2268         if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt,
2269             NULL, NULL, af))
2270                 return (rewrite);
2271
2272         for (; cnt > 0; cnt -= optlen, optp += optlen) {
2273                 opt = optp[0];
2274                 if (opt == TCPOPT_EOL)
2275                         break;
2276                 if (opt == TCPOPT_NOP)
2277                         optlen = 1;
2278                 else {
2279                         if (cnt < 2)
2280                                 break;
2281                         optlen = optp[1];
2282                         if (optlen < 2 || optlen > cnt)
2283                                 break;
2284                 }
2285                 switch (opt) {
2286                 case TCPOPT_MAXSEG:
2287                         mss = (u_int16_t *)(optp + 2);
2288                         if ((ntohs(*mss)) > r->max_mss) {
2289                                 th->th_sum = pf_cksum_fixup(th->th_sum,
2290                                     *mss, htons(r->max_mss), 0);
2291                                 *mss = htons(r->max_mss);
2292                                 rewrite = 1;
2293                         }
2294                         break;
2295                 default:
2296                         break;
2297                 }
2298         }
2299
2300         if (rewrite)
2301                 m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts);
2302
2303         return (rewrite);
2304 }
2305
2306 void
2307 pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos)
2308 {
2309         struct mbuf             *m = *m0;
2310         struct ip               *h = mtod(m, struct ip *);
2311
2312         /* Clear IP_DF if no-df was requested */
2313         if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
2314                 u_int16_t ip_off = h->ip_off;
2315
2316                 h->ip_off &= htons(~IP_DF);
2317                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
2318         }
2319
2320         /* Enforce a minimum ttl, may cause endless packet loops */
2321         if (min_ttl && h->ip_ttl < min_ttl) {
2322                 u_int16_t ip_ttl = h->ip_ttl;
2323
2324                 h->ip_ttl = min_ttl;
2325                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
2326         }
2327
2328         /* Enforce tos */
2329         if (flags & PFRULE_SET_TOS) {
2330                 u_int16_t       ov, nv;
2331
2332                 ov = *(u_int16_t *)h;
2333                 h->ip_tos = tos;
2334                 nv = *(u_int16_t *)h;
2335
2336                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0);
2337         }
2338
2339         /* random-id, but not for fragments */
2340         if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) {
2341                 u_int16_t ip_id = h->ip_id;
2342
2343                 h->ip_id = ip_randomid();
2344                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
2345         }
2346 }
2347
2348 #ifdef INET6
2349 void
2350 pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl)
2351 {
2352         struct mbuf             *m = *m0;
2353         struct ip6_hdr          *h = mtod(m, struct ip6_hdr *);
2354
2355         /* Enforce a minimum ttl, may cause endless packet loops */
2356         if (min_ttl && h->ip6_hlim < min_ttl)
2357                 h->ip6_hlim = min_ttl;
2358 }
2359 #endif