2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
13 * - Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer in the documentation and/or other materials provided
18 * with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 * Effort sponsored in part by the Defense Advanced Research Projects
34 * Agency (DARPA) and Air Force Research Laboratory, Air Force
35 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
37 * $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
45 #include "opt_inet6.h"
49 #include <sys/param.h>
51 #include <sys/endian.h>
52 #include <sys/gsb_crc32.h>
54 #include <sys/interrupt.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/limits.h>
60 #include <sys/random.h>
61 #include <sys/refcount.h>
63 #include <sys/socket.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/ucred.h>
69 #include <net/if_var.h>
70 #include <net/if_types.h>
71 #include <net/if_vlan_var.h>
72 #include <net/route.h>
73 #include <net/route/nhop.h>
77 #include <net/pfvar.h>
78 #include <net/if_pflog.h>
79 #include <net/if_pfsync.h>
81 #include <netinet/in_pcb.h>
82 #include <netinet/in_var.h>
83 #include <netinet/in_fib.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip_fw.h>
86 #include <netinet/ip_icmp.h>
87 #include <netinet/icmp_var.h>
88 #include <netinet/ip_var.h>
89 #include <netinet/tcp.h>
90 #include <netinet/tcp_fsm.h>
91 #include <netinet/tcp_seq.h>
92 #include <netinet/tcp_timer.h>
93 #include <netinet/tcp_var.h>
94 #include <netinet/udp.h>
95 #include <netinet/udp_var.h>
98 #include <netinet/ip_dummynet.h>
99 #include <netinet/ip_fw.h>
100 #include <netpfil/ipfw/dn_heap.h>
101 #include <netpfil/ipfw/ip_fw_private.h>
102 #include <netpfil/ipfw/ip_dn_private.h>
105 #include <netinet/ip6.h>
106 #include <netinet/icmp6.h>
107 #include <netinet6/nd6.h>
108 #include <netinet6/ip6_var.h>
109 #include <netinet6/in6_pcb.h>
110 #include <netinet6/in6_fib.h>
111 #include <netinet6/scope6_var.h>
114 #if defined(SCTP) || defined(SCTP_SUPPORT)
115 #include <netinet/sctp_crc32.h>
118 #include <machine/in_cksum.h>
119 #include <security/mac/mac_framework.h>
121 #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
123 SDT_PROVIDER_DEFINE(pf);
124 SDT_PROBE_DEFINE4(pf, ip, test, done, "int", "int", "struct pf_krule *",
125 "struct pf_kstate *");
126 SDT_PROBE_DEFINE4(pf, ip, test6, done, "int", "int", "struct pf_krule *",
127 "struct pf_kstate *");
128 SDT_PROBE_DEFINE5(pf, ip, state, lookup, "struct pfi_kkif *",
129 "struct pf_state_key_cmp *", "int", "struct pf_pdesc *",
130 "struct pf_kstate *");
132 SDT_PROBE_DEFINE3(pf, eth, test_rule, entry, "int", "struct ifnet *",
134 SDT_PROBE_DEFINE2(pf, eth, test_rule, test, "int", "struct pf_keth_rule *");
135 SDT_PROBE_DEFINE3(pf, eth, test_rule, mismatch,
136 "int", "struct pf_keth_rule *", "char *");
137 SDT_PROBE_DEFINE2(pf, eth, test_rule, match, "int", "struct pf_keth_rule *");
138 SDT_PROBE_DEFINE2(pf, eth, test_rule, final_match,
139 "int", "struct pf_keth_rule *");
146 VNET_DEFINE(struct pf_altqqueue, pf_altqs[4]);
147 VNET_DEFINE(struct pf_kpalist, pf_pabuf);
148 VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active);
149 VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_active);
150 VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive);
151 VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_inactive);
152 VNET_DEFINE(struct pf_kstatus, pf_status);
154 VNET_DEFINE(u_int32_t, ticket_altqs_active);
155 VNET_DEFINE(u_int32_t, ticket_altqs_inactive);
156 VNET_DEFINE(int, altqs_inactive_open);
157 VNET_DEFINE(u_int32_t, ticket_pabuf);
159 VNET_DEFINE(MD5_CTX, pf_tcp_secret_ctx);
160 #define V_pf_tcp_secret_ctx VNET(pf_tcp_secret_ctx)
161 VNET_DEFINE(u_char, pf_tcp_secret[16]);
162 #define V_pf_tcp_secret VNET(pf_tcp_secret)
163 VNET_DEFINE(int, pf_tcp_secret_init);
164 #define V_pf_tcp_secret_init VNET(pf_tcp_secret_init)
165 VNET_DEFINE(int, pf_tcp_iss_off);
166 #define V_pf_tcp_iss_off VNET(pf_tcp_iss_off)
167 VNET_DECLARE(int, pf_vnet_active);
168 #define V_pf_vnet_active VNET(pf_vnet_active)
170 VNET_DEFINE_STATIC(uint32_t, pf_purge_idx);
171 #define V_pf_purge_idx VNET(pf_purge_idx)
173 #ifdef PF_WANT_32_TO_64_COUNTER
174 VNET_DEFINE_STATIC(uint32_t, pf_counter_periodic_iter);
175 #define V_pf_counter_periodic_iter VNET(pf_counter_periodic_iter)
177 VNET_DEFINE(struct allrulelist_head, pf_allrulelist);
178 VNET_DEFINE(size_t, pf_allrulecount);
179 VNET_DEFINE(struct pf_krule *, pf_rulemarker);
183 * Queue for pf_intr() sends.
185 static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations");
186 struct pf_send_entry {
187 STAILQ_ENTRY(pf_send_entry) pfse_next;
202 STAILQ_HEAD(pf_send_head, pf_send_entry);
203 VNET_DEFINE_STATIC(struct pf_send_head, pf_sendqueue);
204 #define V_pf_sendqueue VNET(pf_sendqueue)
206 static struct mtx_padalign pf_sendqueue_mtx;
207 MTX_SYSINIT(pf_sendqueue_mtx, &pf_sendqueue_mtx, "pf send queue", MTX_DEF);
208 #define PF_SENDQ_LOCK() mtx_lock(&pf_sendqueue_mtx)
209 #define PF_SENDQ_UNLOCK() mtx_unlock(&pf_sendqueue_mtx)
212 * Queue for pf_overload_task() tasks.
214 struct pf_overload_entry {
215 SLIST_ENTRY(pf_overload_entry) next;
219 struct pf_krule *rule;
222 SLIST_HEAD(pf_overload_head, pf_overload_entry);
223 VNET_DEFINE_STATIC(struct pf_overload_head, pf_overloadqueue);
224 #define V_pf_overloadqueue VNET(pf_overloadqueue)
225 VNET_DEFINE_STATIC(struct task, pf_overloadtask);
226 #define V_pf_overloadtask VNET(pf_overloadtask)
228 static struct mtx_padalign pf_overloadqueue_mtx;
229 MTX_SYSINIT(pf_overloadqueue_mtx, &pf_overloadqueue_mtx,
230 "pf overload/flush queue", MTX_DEF);
231 #define PF_OVERLOADQ_LOCK() mtx_lock(&pf_overloadqueue_mtx)
232 #define PF_OVERLOADQ_UNLOCK() mtx_unlock(&pf_overloadqueue_mtx)
234 VNET_DEFINE(struct pf_krulequeue, pf_unlinked_rules);
235 struct mtx_padalign pf_unlnkdrules_mtx;
236 MTX_SYSINIT(pf_unlnkdrules_mtx, &pf_unlnkdrules_mtx, "pf unlinked rules",
239 struct sx pf_config_lock;
240 SX_SYSINIT(pf_config_lock, &pf_config_lock, "pf config");
242 struct mtx_padalign pf_table_stats_lock;
243 MTX_SYSINIT(pf_table_stats_lock, &pf_table_stats_lock, "pf table stats",
246 VNET_DEFINE_STATIC(uma_zone_t, pf_sources_z);
247 #define V_pf_sources_z VNET(pf_sources_z)
248 uma_zone_t pf_mtag_z;
249 VNET_DEFINE(uma_zone_t, pf_state_z);
250 VNET_DEFINE(uma_zone_t, pf_state_key_z);
252 VNET_DEFINE(uint64_t, pf_stateid[MAXCPU]);
253 #define PFID_CPUBITS 8
254 #define PFID_CPUSHIFT (sizeof(uint64_t) * NBBY - PFID_CPUBITS)
255 #define PFID_CPUMASK ((uint64_t)((1 << PFID_CPUBITS) - 1) << PFID_CPUSHIFT)
256 #define PFID_MAXID (~PFID_CPUMASK)
257 CTASSERT((1 << PFID_CPUBITS) >= MAXCPU);
259 static void pf_src_tree_remove_state(struct pf_kstate *);
260 static void pf_init_threshold(struct pf_threshold *, u_int32_t,
262 static void pf_add_threshold(struct pf_threshold *);
263 static int pf_check_threshold(struct pf_threshold *);
265 static void pf_change_ap(struct mbuf *, struct pf_addr *, u_int16_t *,
266 u_int16_t *, u_int16_t *, struct pf_addr *,
267 u_int16_t, u_int8_t, sa_family_t);
268 static int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
269 struct tcphdr *, struct pf_state_peer *);
270 static void pf_change_icmp(struct pf_addr *, u_int16_t *,
271 struct pf_addr *, struct pf_addr *, u_int16_t,
272 u_int16_t *, u_int16_t *, u_int16_t *,
273 u_int16_t *, u_int8_t, sa_family_t);
274 static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
275 sa_family_t, struct pf_krule *);
276 static void pf_detach_state(struct pf_kstate *);
277 static int pf_state_key_attach(struct pf_state_key *,
278 struct pf_state_key *, struct pf_kstate *);
279 static void pf_state_key_detach(struct pf_kstate *, int);
280 static int pf_state_key_ctor(void *, int, void *, int);
281 static u_int32_t pf_tcp_iss(struct pf_pdesc *);
282 void pf_rule_to_actions(struct pf_krule *,
283 struct pf_rule_actions *);
284 static int pf_test_eth_rule(int, struct pfi_kkif *,
286 static int pf_test_rule(struct pf_krule **, struct pf_kstate **,
287 int, struct pfi_kkif *, struct mbuf *, int,
288 struct pf_pdesc *, struct pf_krule **,
289 struct pf_kruleset **, struct inpcb *);
290 static int pf_create_state(struct pf_krule *, struct pf_krule *,
291 struct pf_krule *, struct pf_pdesc *,
292 struct pf_ksrc_node *, struct pf_state_key *,
293 struct pf_state_key *, struct mbuf *, int,
294 u_int16_t, u_int16_t, int *, struct pfi_kkif *,
295 struct pf_kstate **, int, u_int16_t, u_int16_t,
297 static int pf_test_fragment(struct pf_krule **, int,
298 struct pfi_kkif *, struct mbuf *, void *,
299 struct pf_pdesc *, struct pf_krule **,
300 struct pf_kruleset **);
301 static int pf_tcp_track_full(struct pf_kstate **,
302 struct pfi_kkif *, struct mbuf *, int,
303 struct pf_pdesc *, u_short *, int *);
304 static int pf_tcp_track_sloppy(struct pf_kstate **,
305 struct pf_pdesc *, u_short *);
306 static int pf_test_state_tcp(struct pf_kstate **, int,
307 struct pfi_kkif *, struct mbuf *, int,
308 void *, struct pf_pdesc *, u_short *);
309 static int pf_test_state_udp(struct pf_kstate **, int,
310 struct pfi_kkif *, struct mbuf *, int,
311 void *, struct pf_pdesc *);
312 static int pf_test_state_icmp(struct pf_kstate **, int,
313 struct pfi_kkif *, struct mbuf *, int,
314 void *, struct pf_pdesc *, u_short *);
315 static int pf_test_state_other(struct pf_kstate **, int,
316 struct pfi_kkif *, struct mbuf *, struct pf_pdesc *);
317 static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
319 static int pf_check_proto_cksum(struct mbuf *, int, int,
320 u_int8_t, sa_family_t);
321 static void pf_print_state_parts(struct pf_kstate *,
322 struct pf_state_key *, struct pf_state_key *);
323 static int pf_addr_wrap_neq(struct pf_addr_wrap *,
324 struct pf_addr_wrap *);
325 static void pf_patch_8(struct mbuf *, u_int16_t *, u_int8_t *, u_int8_t,
327 static struct pf_kstate *pf_find_state(struct pfi_kkif *,
328 struct pf_state_key_cmp *, u_int);
329 static int pf_src_connlimit(struct pf_kstate **);
330 static void pf_overload_task(void *v, int pending);
331 static int pf_insert_src_node(struct pf_ksrc_node **,
332 struct pf_krule *, struct pf_addr *, sa_family_t);
333 static u_int pf_purge_expired_states(u_int, int);
334 static void pf_purge_unlinked_rules(void);
335 static int pf_mtag_uminit(void *, int, int);
336 static void pf_mtag_free(struct m_tag *);
337 static void pf_packet_rework_nat(struct mbuf *, struct pf_pdesc *,
338 int, struct pf_state_key *);
340 static void pf_route(struct mbuf **, struct pf_krule *, int,
341 struct ifnet *, struct pf_kstate *,
342 struct pf_pdesc *, struct inpcb *);
345 static void pf_change_a6(struct pf_addr *, u_int16_t *,
346 struct pf_addr *, u_int8_t);
347 static void pf_route6(struct mbuf **, struct pf_krule *, int,
348 struct ifnet *, struct pf_kstate *,
349 struct pf_pdesc *, struct inpcb *);
351 static __inline void pf_set_protostate(struct pf_kstate *, int, u_int8_t);
353 int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
355 extern int pf_end_threads;
356 extern struct proc *pf_purge_proc;
358 VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
360 #define PACKET_UNDO_NAT(_m, _pd, _off, _s, _dir) \
362 struct pf_state_key *nk; \
363 if ((_dir) == PF_OUT) \
364 nk = (_s)->key[PF_SK_STACK]; \
366 nk = (_s)->key[PF_SK_WIRE]; \
367 pf_packet_rework_nat(_m, _pd, _off, nk); \
370 #define PACKET_LOOPED(pd) ((pd)->pf_mtag && \
371 (pd)->pf_mtag->flags & PF_PACKET_LOOPED)
373 #define STATE_LOOKUP(i, k, d, s, pd) \
375 (s) = pf_find_state((i), (k), (d)); \
376 SDT_PROBE5(pf, ip, state, lookup, i, k, d, pd, (s)); \
379 if (PACKET_LOOPED(pd)) \
383 #define BOUND_IFACE(r, k) \
384 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
386 #define STATE_INC_COUNTERS(s) \
388 counter_u64_add(s->rule.ptr->states_cur, 1); \
389 counter_u64_add(s->rule.ptr->states_tot, 1); \
390 if (s->anchor.ptr != NULL) { \
391 counter_u64_add(s->anchor.ptr->states_cur, 1); \
392 counter_u64_add(s->anchor.ptr->states_tot, 1); \
394 if (s->nat_rule.ptr != NULL) { \
395 counter_u64_add(s->nat_rule.ptr->states_cur, 1);\
396 counter_u64_add(s->nat_rule.ptr->states_tot, 1);\
400 #define STATE_DEC_COUNTERS(s) \
402 if (s->nat_rule.ptr != NULL) \
403 counter_u64_add(s->nat_rule.ptr->states_cur, -1);\
404 if (s->anchor.ptr != NULL) \
405 counter_u64_add(s->anchor.ptr->states_cur, -1); \
406 counter_u64_add(s->rule.ptr->states_cur, -1); \
409 MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
410 VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
411 VNET_DEFINE(struct pf_idhash *, pf_idhash);
412 VNET_DEFINE(struct pf_srchash *, pf_srchash);
414 SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
418 u_long pf_srchashmask;
419 static u_long pf_hashsize;
420 static u_long pf_srchashsize;
421 u_long pf_ioctl_maxcount = 65535;
423 SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN,
424 &pf_hashsize, 0, "Size of pf(4) states hashtable");
425 SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
426 &pf_srchashsize, 0, "Size of pf(4) source nodes hashtable");
427 SYSCTL_ULONG(_net_pf, OID_AUTO, request_maxcount, CTLFLAG_RWTUN,
428 &pf_ioctl_maxcount, 0, "Maximum number of tables, addresses, ... in a single ioctl() call");
430 VNET_DEFINE(void *, pf_swi_cookie);
431 VNET_DEFINE(struct intr_event *, pf_swi_ie);
433 VNET_DEFINE(uint32_t, pf_hashseed);
434 #define V_pf_hashseed VNET(pf_hashseed)
438 pf_bcmp_state_key(struct pf_state_key *k1_orig, struct pf_state_key_cmp *k2_orig)
440 unsigned long *k1 = (unsigned long *)k1_orig;
441 unsigned long *k2 = (unsigned long *)k2_orig;
460 _Static_assert(sizeof(struct pf_state_key_cmp) == 40, "bad size of pf_state_key_cmp");
463 pf_bcmp_state_key(struct pf_state_key *k1_orig, struct pf_state_key_cmp *k2_orig)
466 return (bcmp(k1_orig, k2_orig, sizeof(struct pf_state_key_cmp)));
471 pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
477 if (a->addr32[0] > b->addr32[0])
479 if (a->addr32[0] < b->addr32[0])
485 if (a->addr32[3] > b->addr32[3])
487 if (a->addr32[3] < b->addr32[3])
489 if (a->addr32[2] > b->addr32[2])
491 if (a->addr32[2] < b->addr32[2])
493 if (a->addr32[1] > b->addr32[1])
495 if (a->addr32[1] < b->addr32[1])
497 if (a->addr32[0] > b->addr32[0])
499 if (a->addr32[0] < b->addr32[0])
504 panic("%s: unknown address family %u", __func__, af);
510 pf_packet_rework_nat(struct mbuf *m, struct pf_pdesc *pd, int off,
511 struct pf_state_key *nk)
516 struct tcphdr *th = &pd->hdr.tcp;
518 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
519 pf_change_ap(m, pd->src, &th->th_sport, pd->ip_sum,
520 &th->th_sum, &nk->addr[pd->sidx],
521 nk->port[pd->sidx], 0, pd->af);
522 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
523 pf_change_ap(m, pd->dst, &th->th_dport, pd->ip_sum,
524 &th->th_sum, &nk->addr[pd->didx],
525 nk->port[pd->didx], 0, pd->af);
526 m_copyback(m, off, sizeof(*th), (caddr_t)th);
530 struct udphdr *uh = &pd->hdr.udp;
532 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
533 pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum,
534 &uh->uh_sum, &nk->addr[pd->sidx],
535 nk->port[pd->sidx], 1, pd->af);
536 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
537 pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum,
538 &uh->uh_sum, &nk->addr[pd->didx],
539 nk->port[pd->didx], 1, pd->af);
540 m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
544 struct icmp *ih = &pd->hdr.icmp;
546 if (nk->port[pd->sidx] != ih->icmp_id) {
547 pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
548 ih->icmp_cksum, ih->icmp_id,
549 nk->port[pd->sidx], 0);
550 ih->icmp_id = nk->port[pd->sidx];
551 pd->sport = &ih->icmp_id;
553 m_copyback(m, off, ICMP_MINLEN, (caddr_t)ih);
558 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) {
561 pf_change_a(&pd->src->v4.s_addr,
562 pd->ip_sum, nk->addr[pd->sidx].v4.s_addr,
566 PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
570 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) {
573 pf_change_a(&pd->dst->v4.s_addr,
574 pd->ip_sum, nk->addr[pd->didx].v4.s_addr,
578 PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
586 static __inline uint32_t
587 pf_hashkey(struct pf_state_key *sk)
591 h = murmur3_32_hash32((uint32_t *)sk,
592 sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
595 return (h & pf_hashmask);
598 static __inline uint32_t
599 pf_hashsrc(struct pf_addr *addr, sa_family_t af)
605 h = murmur3_32_hash32((uint32_t *)&addr->v4,
606 sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
609 h = murmur3_32_hash32((uint32_t *)&addr->v6,
610 sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
613 panic("%s: unknown address family %u", __func__, af);
616 return (h & pf_srchashmask);
621 pf_state_hash(struct pf_kstate *s)
623 u_int32_t hv = (intptr_t)s / sizeof(*s);
625 hv ^= crc32(&s->src, sizeof(s->src));
626 hv ^= crc32(&s->dst, sizeof(s->dst));
634 pf_set_protostate(struct pf_kstate *s, int which, u_int8_t newstate)
636 if (which == PF_PEER_DST || which == PF_PEER_BOTH)
637 s->dst.state = newstate;
638 if (which == PF_PEER_DST)
640 if (s->src.state == newstate)
642 if (s->creatorid == V_pf_status.hostid &&
643 s->key[PF_SK_STACK] != NULL &&
644 s->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
645 !(TCPS_HAVEESTABLISHED(s->src.state) ||
646 s->src.state == TCPS_CLOSED) &&
647 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED))
648 atomic_add_32(&V_pf_status.states_halfopen, -1);
650 s->src.state = newstate;
655 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
660 dst->addr32[0] = src->addr32[0];
664 dst->addr32[0] = src->addr32[0];
665 dst->addr32[1] = src->addr32[1];
666 dst->addr32[2] = src->addr32[2];
667 dst->addr32[3] = src->addr32[3];
674 pf_init_threshold(struct pf_threshold *threshold,
675 u_int32_t limit, u_int32_t seconds)
677 threshold->limit = limit * PF_THRESHOLD_MULT;
678 threshold->seconds = seconds;
679 threshold->count = 0;
680 threshold->last = time_uptime;
684 pf_add_threshold(struct pf_threshold *threshold)
686 u_int32_t t = time_uptime, diff = t - threshold->last;
688 if (diff >= threshold->seconds)
689 threshold->count = 0;
691 threshold->count -= threshold->count * diff /
693 threshold->count += PF_THRESHOLD_MULT;
698 pf_check_threshold(struct pf_threshold *threshold)
700 return (threshold->count > threshold->limit);
704 pf_src_connlimit(struct pf_kstate **state)
706 struct pf_overload_entry *pfoe;
709 PF_STATE_LOCK_ASSERT(*state);
711 (*state)->src_node->conn++;
712 (*state)->src.tcp_est = 1;
713 pf_add_threshold(&(*state)->src_node->conn_rate);
715 if ((*state)->rule.ptr->max_src_conn &&
716 (*state)->rule.ptr->max_src_conn <
717 (*state)->src_node->conn) {
718 counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1);
722 if ((*state)->rule.ptr->max_src_conn_rate.limit &&
723 pf_check_threshold(&(*state)->src_node->conn_rate)) {
724 counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1);
731 /* Kill this state. */
732 (*state)->timeout = PFTM_PURGE;
733 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED);
735 if ((*state)->rule.ptr->overload_tbl == NULL)
738 /* Schedule overloading and flushing task. */
739 pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
741 return (1); /* too bad :( */
743 bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
744 pfoe->af = (*state)->key[PF_SK_WIRE]->af;
745 pfoe->rule = (*state)->rule.ptr;
746 pfoe->dir = (*state)->direction;
748 SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
749 PF_OVERLOADQ_UNLOCK();
750 taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
756 pf_overload_task(void *v, int pending)
758 struct pf_overload_head queue;
760 struct pf_overload_entry *pfoe, *pfoe1;
763 CURVNET_SET((struct vnet *)v);
766 queue = V_pf_overloadqueue;
767 SLIST_INIT(&V_pf_overloadqueue);
768 PF_OVERLOADQ_UNLOCK();
770 bzero(&p, sizeof(p));
771 SLIST_FOREACH(pfoe, &queue, next) {
772 counter_u64_add(V_pf_status.lcounters[LCNT_OVERLOAD_TABLE], 1);
773 if (V_pf_status.debug >= PF_DEBUG_MISC) {
774 printf("%s: blocking address ", __func__);
775 pf_print_host(&pfoe->addr, 0, pfoe->af);
779 p.pfra_af = pfoe->af;
784 p.pfra_ip4addr = pfoe->addr.v4;
790 p.pfra_ip6addr = pfoe->addr.v6;
796 pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
801 * Remove those entries, that don't need flushing.
803 SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
804 if (pfoe->rule->flush == 0) {
805 SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
806 free(pfoe, M_PFTEMP);
809 V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH], 1);
811 /* If nothing to flush, return. */
812 if (SLIST_EMPTY(&queue)) {
817 for (int i = 0; i <= pf_hashmask; i++) {
818 struct pf_idhash *ih = &V_pf_idhash[i];
819 struct pf_state_key *sk;
823 LIST_FOREACH(s, &ih->states, entry) {
824 sk = s->key[PF_SK_WIRE];
825 SLIST_FOREACH(pfoe, &queue, next)
826 if (sk->af == pfoe->af &&
827 ((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
828 pfoe->rule == s->rule.ptr) &&
829 ((pfoe->dir == PF_OUT &&
830 PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
831 (pfoe->dir == PF_IN &&
832 PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
833 s->timeout = PFTM_PURGE;
834 pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED);
838 PF_HASHROW_UNLOCK(ih);
840 SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
841 free(pfoe, M_PFTEMP);
842 if (V_pf_status.debug >= PF_DEBUG_MISC)
843 printf("%s: %u states killed", __func__, killed);
849 * Can return locked on failure, so that we can consistently
850 * allocate and insert a new one.
852 struct pf_ksrc_node *
853 pf_find_src_node(struct pf_addr *src, struct pf_krule *rule, sa_family_t af,
856 struct pf_srchash *sh;
857 struct pf_ksrc_node *n;
859 counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
861 sh = &V_pf_srchash[pf_hashsrc(src, af)];
863 LIST_FOREACH(n, &sh->nodes, entry)
864 if (n->rule.ptr == rule && n->af == af &&
865 ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
866 (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
870 PF_HASHROW_UNLOCK(sh);
871 } else if (returnlocked == 0)
872 PF_HASHROW_UNLOCK(sh);
878 pf_free_src_node(struct pf_ksrc_node *sn)
881 for (int i = 0; i < 2; i++) {
882 counter_u64_free(sn->bytes[i]);
883 counter_u64_free(sn->packets[i]);
885 uma_zfree(V_pf_sources_z, sn);
889 pf_insert_src_node(struct pf_ksrc_node **sn, struct pf_krule *rule,
890 struct pf_addr *src, sa_family_t af)
893 KASSERT((rule->rule_flag & PFRULE_SRCTRACK ||
894 rule->rpool.opts & PF_POOL_STICKYADDR),
895 ("%s for non-tracking rule %p", __func__, rule));
898 *sn = pf_find_src_node(src, rule, af, 1);
901 struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)];
903 PF_HASHROW_ASSERT(sh);
905 if (!rule->max_src_nodes ||
906 counter_u64_fetch(rule->src_nodes) < rule->max_src_nodes)
907 (*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
909 counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES],
912 PF_HASHROW_UNLOCK(sh);
916 for (int i = 0; i < 2; i++) {
917 (*sn)->bytes[i] = counter_u64_alloc(M_NOWAIT);
918 (*sn)->packets[i] = counter_u64_alloc(M_NOWAIT);
920 if ((*sn)->bytes[i] == NULL || (*sn)->packets[i] == NULL) {
921 pf_free_src_node(*sn);
922 PF_HASHROW_UNLOCK(sh);
927 pf_init_threshold(&(*sn)->conn_rate,
928 rule->max_src_conn_rate.limit,
929 rule->max_src_conn_rate.seconds);
932 (*sn)->rule.ptr = rule;
933 PF_ACPY(&(*sn)->addr, src, af);
934 LIST_INSERT_HEAD(&sh->nodes, *sn, entry);
935 (*sn)->creation = time_uptime;
936 (*sn)->ruletype = rule->action;
938 if ((*sn)->rule.ptr != NULL)
939 counter_u64_add((*sn)->rule.ptr->src_nodes, 1);
940 PF_HASHROW_UNLOCK(sh);
941 counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1);
943 if (rule->max_src_states &&
944 (*sn)->states >= rule->max_src_states) {
945 counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES],
954 pf_unlink_src_node(struct pf_ksrc_node *src)
957 PF_HASHROW_ASSERT(&V_pf_srchash[pf_hashsrc(&src->addr, src->af)]);
958 LIST_REMOVE(src, entry);
960 counter_u64_add(src->rule.ptr->src_nodes, -1);
964 pf_free_src_nodes(struct pf_ksrc_node_list *head)
966 struct pf_ksrc_node *sn, *tmp;
969 LIST_FOREACH_SAFE(sn, head, entry, tmp) {
970 pf_free_src_node(sn);
974 counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count);
983 pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) +
984 sizeof(struct pf_mtag), NULL, NULL, pf_mtag_uminit, NULL,
988 /* Per-vnet data storage structures initialization. */
992 struct pf_keyhash *kh;
993 struct pf_idhash *ih;
994 struct pf_srchash *sh;
997 if (pf_hashsize == 0 || !powerof2(pf_hashsize))
998 pf_hashsize = PF_HASHSIZ;
999 if (pf_srchashsize == 0 || !powerof2(pf_srchashsize))
1000 pf_srchashsize = PF_SRCHASHSIZ;
1002 V_pf_hashseed = arc4random();
1004 /* States and state keys storage. */
1005 V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_kstate),
1006 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1007 V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z;
1008 uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT);
1009 uma_zone_set_warning(V_pf_state_z, "PF states limit reached");
1011 V_pf_state_key_z = uma_zcreate("pf state keys",
1012 sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
1015 V_pf_keyhash = mallocarray(pf_hashsize, sizeof(struct pf_keyhash),
1016 M_PFHASH, M_NOWAIT | M_ZERO);
1017 V_pf_idhash = mallocarray(pf_hashsize, sizeof(struct pf_idhash),
1018 M_PFHASH, M_NOWAIT | M_ZERO);
1019 if (V_pf_keyhash == NULL || V_pf_idhash == NULL) {
1020 printf("pf: Unable to allocate memory for "
1021 "state_hashsize %lu.\n", pf_hashsize);
1023 free(V_pf_keyhash, M_PFHASH);
1024 free(V_pf_idhash, M_PFHASH);
1026 pf_hashsize = PF_HASHSIZ;
1027 V_pf_keyhash = mallocarray(pf_hashsize,
1028 sizeof(struct pf_keyhash), M_PFHASH, M_WAITOK | M_ZERO);
1029 V_pf_idhash = mallocarray(pf_hashsize,
1030 sizeof(struct pf_idhash), M_PFHASH, M_WAITOK | M_ZERO);
1033 pf_hashmask = pf_hashsize - 1;
1034 for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask;
1036 mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK);
1037 mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
1041 V_pf_sources_z = uma_zcreate("pf source nodes",
1042 sizeof(struct pf_ksrc_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1044 V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
1045 uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
1046 uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached");
1048 V_pf_srchash = mallocarray(pf_srchashsize,
1049 sizeof(struct pf_srchash), M_PFHASH, M_NOWAIT | M_ZERO);
1050 if (V_pf_srchash == NULL) {
1051 printf("pf: Unable to allocate memory for "
1052 "source_hashsize %lu.\n", pf_srchashsize);
1054 pf_srchashsize = PF_SRCHASHSIZ;
1055 V_pf_srchash = mallocarray(pf_srchashsize,
1056 sizeof(struct pf_srchash), M_PFHASH, M_WAITOK | M_ZERO);
1059 pf_srchashmask = pf_srchashsize - 1;
1060 for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++)
1061 mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
1064 TAILQ_INIT(&V_pf_altqs[0]);
1065 TAILQ_INIT(&V_pf_altqs[1]);
1066 TAILQ_INIT(&V_pf_altqs[2]);
1067 TAILQ_INIT(&V_pf_altqs[3]);
1068 TAILQ_INIT(&V_pf_pabuf);
1069 V_pf_altqs_active = &V_pf_altqs[0];
1070 V_pf_altq_ifs_active = &V_pf_altqs[1];
1071 V_pf_altqs_inactive = &V_pf_altqs[2];
1072 V_pf_altq_ifs_inactive = &V_pf_altqs[3];
1074 /* Send & overload+flush queues. */
1075 STAILQ_INIT(&V_pf_sendqueue);
1076 SLIST_INIT(&V_pf_overloadqueue);
1077 TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, curvnet);
1079 /* Unlinked, but may be referenced rules. */
1080 TAILQ_INIT(&V_pf_unlinked_rules);
1087 uma_zdestroy(pf_mtag_z);
1093 struct pf_keyhash *kh;
1094 struct pf_idhash *ih;
1095 struct pf_srchash *sh;
1096 struct pf_send_entry *pfse, *next;
1099 for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask;
1101 KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
1103 KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty",
1105 mtx_destroy(&kh->lock);
1106 mtx_destroy(&ih->lock);
1108 free(V_pf_keyhash, M_PFHASH);
1109 free(V_pf_idhash, M_PFHASH);
1111 for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) {
1112 KASSERT(LIST_EMPTY(&sh->nodes),
1113 ("%s: source node hash not empty", __func__));
1114 mtx_destroy(&sh->lock);
1116 free(V_pf_srchash, M_PFHASH);
1118 STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
1119 m_freem(pfse->pfse_m);
1120 free(pfse, M_PFTEMP);
1123 uma_zdestroy(V_pf_sources_z);
1124 uma_zdestroy(V_pf_state_z);
1125 uma_zdestroy(V_pf_state_key_z);
1129 pf_mtag_uminit(void *mem, int size, int how)
1133 t = (struct m_tag *)mem;
1134 t->m_tag_cookie = MTAG_ABI_COMPAT;
1135 t->m_tag_id = PACKET_TAG_PF;
1136 t->m_tag_len = sizeof(struct pf_mtag);
1137 t->m_tag_free = pf_mtag_free;
1143 pf_mtag_free(struct m_tag *t)
1146 uma_zfree(pf_mtag_z, t);
1150 pf_get_mtag(struct mbuf *m)
1154 if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL)
1155 return ((struct pf_mtag *)(mtag + 1));
1157 mtag = uma_zalloc(pf_mtag_z, M_NOWAIT);
1160 bzero(mtag + 1, sizeof(struct pf_mtag));
1161 m_tag_prepend(m, mtag);
1163 return ((struct pf_mtag *)(mtag + 1));
1167 pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks,
1168 struct pf_kstate *s)
1170 struct pf_keyhash *khs, *khw, *kh;
1171 struct pf_state_key *sk, *cur;
1172 struct pf_kstate *si, *olds = NULL;
1175 KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
1176 KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__));
1177 KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__));
1180 * We need to lock hash slots of both keys. To avoid deadlock
1181 * we always lock the slot with lower address first. Unlock order
1184 * We also need to lock ID hash slot before dropping key
1185 * locks. On success we return with ID hash slot locked.
1189 khs = khw = &V_pf_keyhash[pf_hashkey(skw)];
1190 PF_HASHROW_LOCK(khs);
1192 khs = &V_pf_keyhash[pf_hashkey(sks)];
1193 khw = &V_pf_keyhash[pf_hashkey(skw)];
1195 PF_HASHROW_LOCK(khs);
1196 } else if (khs < khw) {
1197 PF_HASHROW_LOCK(khs);
1198 PF_HASHROW_LOCK(khw);
1200 PF_HASHROW_LOCK(khw);
1201 PF_HASHROW_LOCK(khs);
1205 #define KEYS_UNLOCK() do { \
1207 PF_HASHROW_UNLOCK(khs); \
1208 PF_HASHROW_UNLOCK(khw); \
1210 PF_HASHROW_UNLOCK(khs); \
1214 * First run: start with wire key.
1220 MPASS(s->lock == NULL);
1221 s->lock = &V_pf_idhash[PF_IDHASH(s)].lock;
1224 LIST_FOREACH(cur, &kh->keys, entry)
1225 if (pf_bcmp_state_key(cur, (struct pf_state_key_cmp *)sk) == 0)
1229 /* Key exists. Check for same kif, if none, add to key. */
1230 TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) {
1231 struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)];
1233 PF_HASHROW_LOCK(ih);
1234 if (si->kif == s->kif &&
1235 si->direction == s->direction) {
1236 if (sk->proto == IPPROTO_TCP &&
1237 si->src.state >= TCPS_FIN_WAIT_2 &&
1238 si->dst.state >= TCPS_FIN_WAIT_2) {
1240 * New state matches an old >FIN_WAIT_2
1241 * state. We can't drop key hash locks,
1242 * thus we can't unlink it properly.
1244 * As a workaround we drop it into
1245 * TCPS_CLOSED state, schedule purge
1246 * ASAP and push it into the very end
1247 * of the slot TAILQ, so that it won't
1248 * conflict with our new state.
1250 pf_set_protostate(si, PF_PEER_BOTH,
1252 si->timeout = PFTM_PURGE;
1255 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1256 printf("pf: %s key attach "
1258 (idx == PF_SK_WIRE) ?
1261 pf_print_state_parts(s,
1262 (idx == PF_SK_WIRE) ?
1264 (idx == PF_SK_STACK) ?
1266 printf(", existing: ");
1267 pf_print_state_parts(si,
1268 (idx == PF_SK_WIRE) ?
1270 (idx == PF_SK_STACK) ?
1274 PF_HASHROW_UNLOCK(ih);
1276 uma_zfree(V_pf_state_key_z, sk);
1277 if (idx == PF_SK_STACK)
1279 return (EEXIST); /* collision! */
1282 PF_HASHROW_UNLOCK(ih);
1284 uma_zfree(V_pf_state_key_z, sk);
1287 LIST_INSERT_HEAD(&kh->keys, sk, entry);
1292 /* List is sorted, if-bound states before floating. */
1293 if (s->kif == V_pfi_all)
1294 TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]);
1296 TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]);
1299 TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]);
1300 TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds,
1306 * Attach done. See how should we (or should not?)
1307 * attach a second key.
1310 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
1314 } else if (sks != NULL) {
1316 * Continue attaching with stack key.
1328 KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL,
1329 ("%s failure", __func__));
1336 pf_detach_state(struct pf_kstate *s)
1338 struct pf_state_key *sks = s->key[PF_SK_STACK];
1339 struct pf_keyhash *kh;
1342 kh = &V_pf_keyhash[pf_hashkey(sks)];
1343 PF_HASHROW_LOCK(kh);
1344 if (s->key[PF_SK_STACK] != NULL)
1345 pf_state_key_detach(s, PF_SK_STACK);
1347 * If both point to same key, then we are done.
1349 if (sks == s->key[PF_SK_WIRE]) {
1350 pf_state_key_detach(s, PF_SK_WIRE);
1351 PF_HASHROW_UNLOCK(kh);
1354 PF_HASHROW_UNLOCK(kh);
1357 if (s->key[PF_SK_WIRE] != NULL) {
1358 kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])];
1359 PF_HASHROW_LOCK(kh);
1360 if (s->key[PF_SK_WIRE] != NULL)
1361 pf_state_key_detach(s, PF_SK_WIRE);
1362 PF_HASHROW_UNLOCK(kh);
1367 pf_state_key_detach(struct pf_kstate *s, int idx)
1369 struct pf_state_key *sk = s->key[idx];
1371 struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)];
1373 PF_HASHROW_ASSERT(kh);
1375 TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]);
1378 if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) {
1379 LIST_REMOVE(sk, entry);
1380 uma_zfree(V_pf_state_key_z, sk);
1385 pf_state_key_ctor(void *mem, int size, void *arg, int flags)
1387 struct pf_state_key *sk = mem;
1389 bzero(sk, sizeof(struct pf_state_key_cmp));
1390 TAILQ_INIT(&sk->states[PF_SK_WIRE]);
1391 TAILQ_INIT(&sk->states[PF_SK_STACK]);
1396 struct pf_state_key *
1397 pf_state_key_setup(struct pf_pdesc *pd, struct pf_addr *saddr,
1398 struct pf_addr *daddr, u_int16_t sport, u_int16_t dport)
1400 struct pf_state_key *sk;
1402 sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
1406 PF_ACPY(&sk->addr[pd->sidx], saddr, pd->af);
1407 PF_ACPY(&sk->addr[pd->didx], daddr, pd->af);
1408 sk->port[pd->sidx] = sport;
1409 sk->port[pd->didx] = dport;
1410 sk->proto = pd->proto;
1416 struct pf_state_key *
1417 pf_state_key_clone(struct pf_state_key *orig)
1419 struct pf_state_key *sk;
1421 sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
1425 bcopy(orig, sk, sizeof(struct pf_state_key_cmp));
1431 pf_state_insert(struct pfi_kkif *kif, struct pfi_kkif *orig_kif,
1432 struct pf_state_key *skw, struct pf_state_key *sks, struct pf_kstate *s)
1434 struct pf_idhash *ih;
1435 struct pf_kstate *cur;
1438 KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]),
1439 ("%s: sks not pristine", __func__));
1440 KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]),
1441 ("%s: skw not pristine", __func__));
1442 KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
1445 s->orig_kif = orig_kif;
1447 if (s->id == 0 && s->creatorid == 0) {
1448 /* XXX: should be atomic, but probability of collision low */
1449 if ((s->id = V_pf_stateid[curcpu]++) == PFID_MAXID)
1450 V_pf_stateid[curcpu] = 1;
1451 s->id |= (uint64_t )curcpu << PFID_CPUSHIFT;
1452 s->id = htobe64(s->id);
1453 s->creatorid = V_pf_status.hostid;
1456 /* Returns with ID locked on success. */
1457 if ((error = pf_state_key_attach(skw, sks, s)) != 0)
1460 ih = &V_pf_idhash[PF_IDHASH(s)];
1461 PF_HASHROW_ASSERT(ih);
1462 LIST_FOREACH(cur, &ih->states, entry)
1463 if (cur->id == s->id && cur->creatorid == s->creatorid)
1467 PF_HASHROW_UNLOCK(ih);
1468 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1469 printf("pf: state ID collision: "
1470 "id: %016llx creatorid: %08x\n",
1471 (unsigned long long)be64toh(s->id),
1472 ntohl(s->creatorid));
1477 LIST_INSERT_HEAD(&ih->states, s, entry);
1478 /* One for keys, one for ID hash. */
1479 refcount_init(&s->refs, 2);
1481 pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_INSERT], 1);
1482 if (V_pfsync_insert_state_ptr != NULL)
1483 V_pfsync_insert_state_ptr(s);
1485 /* Returns locked. */
1490 * Find state by ID: returns with locked row on success.
1493 pf_find_state_byid(uint64_t id, uint32_t creatorid)
1495 struct pf_idhash *ih;
1496 struct pf_kstate *s;
1498 pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
1500 ih = &V_pf_idhash[(be64toh(id) % (pf_hashmask + 1))];
1502 PF_HASHROW_LOCK(ih);
1503 LIST_FOREACH(s, &ih->states, entry)
1504 if (s->id == id && s->creatorid == creatorid)
1508 PF_HASHROW_UNLOCK(ih);
1514 * Find state by key.
1515 * Returns with ID hash slot locked on success.
1517 static struct pf_kstate *
1518 pf_find_state(struct pfi_kkif *kif, struct pf_state_key_cmp *key, u_int dir)
1520 struct pf_keyhash *kh;
1521 struct pf_state_key *sk;
1522 struct pf_kstate *s;
1525 pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
1527 kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
1529 PF_HASHROW_LOCK(kh);
1530 LIST_FOREACH(sk, &kh->keys, entry)
1531 if (pf_bcmp_state_key(sk, key) == 0)
1534 PF_HASHROW_UNLOCK(kh);
1538 idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK);
1540 /* List is sorted, if-bound states before floating ones. */
1541 TAILQ_FOREACH(s, &sk->states[idx], key_list[idx])
1542 if (s->kif == V_pfi_all || s->kif == kif) {
1544 PF_HASHROW_UNLOCK(kh);
1545 if (__predict_false(s->timeout >= PFTM_MAX)) {
1547 * State is either being processed by
1548 * pf_unlink_state() in an other thread, or
1549 * is scheduled for immediate expiry.
1556 PF_HASHROW_UNLOCK(kh);
1562 * Returns with ID hash slot locked on success.
1565 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1567 struct pf_keyhash *kh;
1568 struct pf_state_key *sk;
1569 struct pf_kstate *s, *ret = NULL;
1572 pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
1574 kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
1576 PF_HASHROW_LOCK(kh);
1577 LIST_FOREACH(sk, &kh->keys, entry)
1578 if (pf_bcmp_state_key(sk, key) == 0)
1581 PF_HASHROW_UNLOCK(kh);
1596 panic("%s: dir %u", __func__, dir);
1599 TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
1602 PF_HASHROW_UNLOCK(kh);
1618 PF_HASHROW_UNLOCK(kh);
1625 * This routine is inefficient -- locks the state only to unlock immediately on
1627 * It is racy -- after the state is unlocked nothing stops other threads from
1631 pf_find_state_all_exists(struct pf_state_key_cmp *key, u_int dir)
1633 struct pf_kstate *s;
1635 s = pf_find_state_all(key, dir, NULL);
1643 /* END state table stuff */
1646 pf_send(struct pf_send_entry *pfse)
1650 STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next);
1652 swi_sched(V_pf_swi_cookie, 0);
1656 pf_isforlocal(struct mbuf *m, int af)
1661 struct ip *ip = mtod(m, struct ip *);
1663 return (in_localip(ip->ip_dst));
1668 struct ip6_hdr *ip6;
1669 struct in6_ifaddr *ia;
1670 ip6 = mtod(m, struct ip6_hdr *);
1671 ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false);
1674 return (! (ia->ia6_flags & IN6_IFF_NOTREADY));
1678 panic("Unsupported af %d", af);
1687 struct epoch_tracker et;
1688 struct pf_send_head queue;
1689 struct pf_send_entry *pfse, *next;
1691 CURVNET_SET((struct vnet *)v);
1694 queue = V_pf_sendqueue;
1695 STAILQ_INIT(&V_pf_sendqueue);
1698 NET_EPOCH_ENTER(et);
1700 STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
1701 switch (pfse->pfse_type) {
1704 if (pf_isforlocal(pfse->pfse_m, AF_INET)) {
1705 pfse->pfse_m->m_flags |= M_SKIP_FIREWALL;
1706 pfse->pfse_m->m_pkthdr.csum_flags |=
1707 CSUM_IP_VALID | CSUM_IP_CHECKED;
1708 ip_input(pfse->pfse_m);
1710 ip_output(pfse->pfse_m, NULL, NULL, 0, NULL,
1716 icmp_error(pfse->pfse_m, pfse->icmpopts.type,
1717 pfse->icmpopts.code, 0, pfse->icmpopts.mtu);
1722 if (pf_isforlocal(pfse->pfse_m, AF_INET6)) {
1723 pfse->pfse_m->m_flags |= M_SKIP_FIREWALL;
1724 ip6_input(pfse->pfse_m);
1726 ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL,
1731 icmp6_error(pfse->pfse_m, pfse->icmpopts.type,
1732 pfse->icmpopts.code, pfse->icmpopts.mtu);
1736 panic("%s: unknown type", __func__);
1738 free(pfse, M_PFTEMP);
1744 #define pf_purge_thread_period (hz / 10)
1746 #ifdef PF_WANT_32_TO_64_COUNTER
1748 pf_status_counter_u64_periodic(void)
1753 if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 60)) != 0) {
1757 for (int i = 0; i < FCNT_MAX; i++) {
1758 pf_counter_u64_periodic(&V_pf_status.fcounters[i]);
1763 pf_kif_counter_u64_periodic(void)
1765 struct pfi_kkif *kif;
1770 if (__predict_false(V_pf_allkifcount == 0)) {
1774 if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) {
1778 run = V_pf_allkifcount / 10;
1782 for (r = 0; r < run; r++) {
1783 kif = LIST_NEXT(V_pf_kifmarker, pfik_allkiflist);
1785 LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);
1786 LIST_INSERT_HEAD(&V_pf_allkiflist, V_pf_kifmarker, pfik_allkiflist);
1790 LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);
1791 LIST_INSERT_AFTER(kif, V_pf_kifmarker, pfik_allkiflist);
1793 for (int i = 0; i < 2; i++) {
1794 for (int j = 0; j < 2; j++) {
1795 for (int k = 0; k < 2; k++) {
1796 pf_counter_u64_periodic(&kif->pfik_packets[i][j][k]);
1797 pf_counter_u64_periodic(&kif->pfik_bytes[i][j][k]);
1805 pf_rule_counter_u64_periodic(void)
1807 struct pf_krule *rule;
1812 if (__predict_false(V_pf_allrulecount == 0)) {
1816 if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) {
1820 run = V_pf_allrulecount / 10;
1824 for (r = 0; r < run; r++) {
1825 rule = LIST_NEXT(V_pf_rulemarker, allrulelist);
1827 LIST_REMOVE(V_pf_rulemarker, allrulelist);
1828 LIST_INSERT_HEAD(&V_pf_allrulelist, V_pf_rulemarker, allrulelist);
1832 LIST_REMOVE(V_pf_rulemarker, allrulelist);
1833 LIST_INSERT_AFTER(rule, V_pf_rulemarker, allrulelist);
1835 pf_counter_u64_periodic(&rule->evaluations);
1836 for (int i = 0; i < 2; i++) {
1837 pf_counter_u64_periodic(&rule->packets[i]);
1838 pf_counter_u64_periodic(&rule->bytes[i]);
1844 pf_counter_u64_periodic_main(void)
1846 PF_RULES_RLOCK_TRACKER;
1848 V_pf_counter_periodic_iter++;
1851 pf_counter_u64_critical_enter();
1852 pf_status_counter_u64_periodic();
1853 pf_kif_counter_u64_periodic();
1854 pf_rule_counter_u64_periodic();
1855 pf_counter_u64_critical_exit();
1859 #define pf_counter_u64_periodic_main() do { } while (0)
1863 pf_purge_thread(void *unused __unused)
1865 VNET_ITERATOR_DECL(vnet_iter);
1867 sx_xlock(&pf_end_lock);
1868 while (pf_end_threads == 0) {
1869 sx_sleep(pf_purge_thread, &pf_end_lock, 0, "pftm", pf_purge_thread_period);
1872 VNET_FOREACH(vnet_iter) {
1873 CURVNET_SET(vnet_iter);
1875 /* Wait until V_pf_default_rule is initialized. */
1876 if (V_pf_vnet_active == 0) {
1881 pf_counter_u64_periodic_main();
1884 * Process 1/interval fraction of the state
1888 pf_purge_expired_states(V_pf_purge_idx, pf_hashmask /
1889 (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
1892 * Purge other expired types every
1893 * PFTM_INTERVAL seconds.
1895 if (V_pf_purge_idx == 0) {
1897 * Order is important:
1898 * - states and src nodes reference rules
1899 * - states and rules reference kifs
1901 pf_purge_expired_fragments();
1902 pf_purge_expired_src_nodes();
1903 pf_purge_unlinked_rules();
1908 VNET_LIST_RUNLOCK();
1912 sx_xunlock(&pf_end_lock);
1917 pf_unload_vnet_purge(void)
1921 * To cleanse up all kifs and rules we need
1922 * two runs: first one clears reference flags,
1923 * then pf_purge_expired_states() doesn't
1924 * raise them, and then second run frees.
1926 pf_purge_unlinked_rules();
1930 * Now purge everything.
1932 pf_purge_expired_states(0, pf_hashmask);
1933 pf_purge_fragments(UINT_MAX);
1934 pf_purge_expired_src_nodes();
1937 * Now all kifs & rules should be unreferenced,
1938 * thus should be successfully freed.
1940 pf_purge_unlinked_rules();
1945 pf_state_expires(const struct pf_kstate *state)
1952 /* handle all PFTM_* > PFTM_MAX here */
1953 if (state->timeout == PFTM_PURGE)
1954 return (time_uptime);
1955 KASSERT(state->timeout != PFTM_UNLINKED,
1956 ("pf_state_expires: timeout == PFTM_UNLINKED"));
1957 KASSERT((state->timeout < PFTM_MAX),
1958 ("pf_state_expires: timeout > PFTM_MAX"));
1959 timeout = state->rule.ptr->timeout[state->timeout];
1961 timeout = V_pf_default_rule.timeout[state->timeout];
1962 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1963 if (start && state->rule.ptr != &V_pf_default_rule) {
1964 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1965 states = counter_u64_fetch(state->rule.ptr->states_cur);
1967 start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1968 end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1969 states = V_pf_status.states;
1971 if (end && states > start && start < end) {
1973 timeout = (u_int64_t)timeout * (end - states) /
1975 return (state->expire + timeout);
1978 return (time_uptime);
1980 return (state->expire + timeout);
1984 pf_purge_expired_src_nodes()
1986 struct pf_ksrc_node_list freelist;
1987 struct pf_srchash *sh;
1988 struct pf_ksrc_node *cur, *next;
1991 LIST_INIT(&freelist);
1992 for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) {
1993 PF_HASHROW_LOCK(sh);
1994 LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
1995 if (cur->states == 0 && cur->expire <= time_uptime) {
1996 pf_unlink_src_node(cur);
1997 LIST_INSERT_HEAD(&freelist, cur, entry);
1998 } else if (cur->rule.ptr != NULL)
1999 cur->rule.ptr->rule_ref |= PFRULE_REFS;
2000 PF_HASHROW_UNLOCK(sh);
2003 pf_free_src_nodes(&freelist);
2005 V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z);
2009 pf_src_tree_remove_state(struct pf_kstate *s)
2011 struct pf_ksrc_node *sn;
2012 struct pf_srchash *sh;
2015 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE] ?
2016 s->rule.ptr->timeout[PFTM_SRC_NODE] :
2017 V_pf_default_rule.timeout[PFTM_SRC_NODE];
2019 if (s->src_node != NULL) {
2021 sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
2022 PF_HASHROW_LOCK(sh);
2025 if (--sn->states == 0)
2026 sn->expire = time_uptime + timeout;
2027 PF_HASHROW_UNLOCK(sh);
2029 if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
2030 sn = s->nat_src_node;
2031 sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
2032 PF_HASHROW_LOCK(sh);
2033 if (--sn->states == 0)
2034 sn->expire = time_uptime + timeout;
2035 PF_HASHROW_UNLOCK(sh);
2037 s->src_node = s->nat_src_node = NULL;
2041 * Unlink and potentilly free a state. Function may be
2042 * called with ID hash row locked, but always returns
2043 * unlocked, since it needs to go through key hash locking.
2046 pf_unlink_state(struct pf_kstate *s)
2048 struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
2050 PF_HASHROW_ASSERT(ih);
2052 if (s->timeout == PFTM_UNLINKED) {
2054 * State is being processed
2055 * by pf_unlink_state() in
2058 PF_HASHROW_UNLOCK(ih);
2059 return (0); /* XXXGL: undefined actually */
2062 if (s->src.state == PF_TCPS_PROXY_DST) {
2063 /* XXX wire key the right one? */
2064 pf_send_tcp(s->rule.ptr, s->key[PF_SK_WIRE]->af,
2065 &s->key[PF_SK_WIRE]->addr[1],
2066 &s->key[PF_SK_WIRE]->addr[0],
2067 s->key[PF_SK_WIRE]->port[1],
2068 s->key[PF_SK_WIRE]->port[0],
2069 s->src.seqhi, s->src.seqlo + 1,
2070 TH_RST|TH_ACK, 0, 0, 0, 1, s->tag);
2073 LIST_REMOVE(s, entry);
2074 pf_src_tree_remove_state(s);
2076 if (V_pfsync_delete_state_ptr != NULL)
2077 V_pfsync_delete_state_ptr(s);
2079 STATE_DEC_COUNTERS(s);
2081 s->timeout = PFTM_UNLINKED;
2083 /* Ensure we remove it from the list of halfopen states, if needed. */
2084 if (s->key[PF_SK_STACK] != NULL &&
2085 s->key[PF_SK_STACK]->proto == IPPROTO_TCP)
2086 pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED);
2088 PF_HASHROW_UNLOCK(ih);
2091 /* pf_state_insert() initialises refs to 2 */
2092 return (pf_release_staten(s, 2));
2096 pf_alloc_state(int flags)
2099 return (uma_zalloc(V_pf_state_z, flags | M_ZERO));
2103 pf_free_state(struct pf_kstate *cur)
2106 KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur));
2107 KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__,
2110 pf_normalize_tcp_cleanup(cur);
2111 uma_zfree(V_pf_state_z, cur);
2112 pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1);
2116 * Called only from pf_purge_thread(), thus serialized.
2119 pf_purge_expired_states(u_int i, int maxcheck)
2121 struct pf_idhash *ih;
2122 struct pf_kstate *s;
2124 V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
2127 * Go through hash and unlink states that expire now.
2129 while (maxcheck > 0) {
2130 ih = &V_pf_idhash[i];
2132 /* only take the lock if we expect to do work */
2133 if (!LIST_EMPTY(&ih->states)) {
2135 PF_HASHROW_LOCK(ih);
2136 LIST_FOREACH(s, &ih->states, entry) {
2137 if (pf_state_expires(s) <= time_uptime) {
2138 V_pf_status.states -=
2142 s->rule.ptr->rule_ref |= PFRULE_REFS;
2143 if (s->nat_rule.ptr != NULL)
2144 s->nat_rule.ptr->rule_ref |= PFRULE_REFS;
2145 if (s->anchor.ptr != NULL)
2146 s->anchor.ptr->rule_ref |= PFRULE_REFS;
2147 s->kif->pfik_flags |= PFI_IFLAG_REFS;
2149 s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
2151 PF_HASHROW_UNLOCK(ih);
2154 /* Return when we hit end of hash. */
2155 if (++i > pf_hashmask) {
2156 V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
2163 V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
2169 pf_purge_unlinked_rules()
2171 struct pf_krulequeue tmpq;
2172 struct pf_krule *r, *r1;
2175 * If we have overloading task pending, then we'd
2176 * better skip purging this time. There is a tiny
2177 * probability that overloading task references
2178 * an already unlinked rule.
2180 PF_OVERLOADQ_LOCK();
2181 if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
2182 PF_OVERLOADQ_UNLOCK();
2185 PF_OVERLOADQ_UNLOCK();
2188 * Do naive mark-and-sweep garbage collecting of old rules.
2189 * Reference flag is raised by pf_purge_expired_states()
2190 * and pf_purge_expired_src_nodes().
2192 * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK,
2193 * use a temporary queue.
2196 PF_UNLNKDRULES_LOCK();
2197 TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
2198 if (!(r->rule_ref & PFRULE_REFS)) {
2199 TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
2200 TAILQ_INSERT_TAIL(&tmpq, r, entries);
2202 r->rule_ref &= ~PFRULE_REFS;
2204 PF_UNLNKDRULES_UNLOCK();
2206 if (!TAILQ_EMPTY(&tmpq)) {
2209 TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
2210 TAILQ_REMOVE(&tmpq, r, entries);
2219 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
2224 u_int32_t a = ntohl(addr->addr32[0]);
2225 printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
2237 u_int8_t i, curstart, curend, maxstart, maxend;
2238 curstart = curend = maxstart = maxend = 255;
2239 for (i = 0; i < 8; i++) {
2240 if (!addr->addr16[i]) {
2241 if (curstart == 255)
2245 if ((curend - curstart) >
2246 (maxend - maxstart)) {
2247 maxstart = curstart;
2250 curstart = curend = 255;
2253 if ((curend - curstart) >
2254 (maxend - maxstart)) {
2255 maxstart = curstart;
2258 for (i = 0; i < 8; i++) {
2259 if (i >= maxstart && i <= maxend) {
2265 b = ntohs(addr->addr16[i]);
2282 pf_print_state(struct pf_kstate *s)
2284 pf_print_state_parts(s, NULL, NULL);
2288 pf_print_state_parts(struct pf_kstate *s,
2289 struct pf_state_key *skwp, struct pf_state_key *sksp)
2291 struct pf_state_key *skw, *sks;
2292 u_int8_t proto, dir;
2294 /* Do our best to fill these, but they're skipped if NULL */
2295 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
2296 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
2297 proto = skw ? skw->proto : (sks ? sks->proto : 0);
2298 dir = s ? s->direction : 0;
2316 case IPPROTO_ICMPV6:
2320 printf("%u", proto);
2333 pf_print_host(&skw->addr[0], skw->port[0], skw->af);
2335 pf_print_host(&skw->addr[1], skw->port[1], skw->af);
2340 pf_print_host(&sks->addr[0], sks->port[0], sks->af);
2342 pf_print_host(&sks->addr[1], sks->port[1], sks->af);
2347 if (proto == IPPROTO_TCP) {
2348 printf(" [lo=%u high=%u win=%u modulator=%u",
2349 s->src.seqlo, s->src.seqhi,
2350 s->src.max_win, s->src.seqdiff);
2351 if (s->src.wscale && s->dst.wscale)
2352 printf(" wscale=%u",
2353 s->src.wscale & PF_WSCALE_MASK);
2355 printf(" [lo=%u high=%u win=%u modulator=%u",
2356 s->dst.seqlo, s->dst.seqhi,
2357 s->dst.max_win, s->dst.seqdiff);
2358 if (s->src.wscale && s->dst.wscale)
2359 printf(" wscale=%u",
2360 s->dst.wscale & PF_WSCALE_MASK);
2363 printf(" %u:%u", s->src.state, s->dst.state);
2368 pf_print_flags(u_int8_t f)
2390 #define PF_SET_SKIP_STEPS(i) \
2392 while (head[i] != cur) { \
2393 head[i]->skip[i].ptr = cur; \
2394 head[i] = TAILQ_NEXT(head[i], entries); \
2399 pf_calc_skip_steps(struct pf_krulequeue *rules)
2401 struct pf_krule *cur, *prev, *head[PF_SKIP_COUNT];
2404 cur = TAILQ_FIRST(rules);
2406 for (i = 0; i < PF_SKIP_COUNT; ++i)
2408 while (cur != NULL) {
2409 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
2410 PF_SET_SKIP_STEPS(PF_SKIP_IFP);
2411 if (cur->direction != prev->direction)
2412 PF_SET_SKIP_STEPS(PF_SKIP_DIR);
2413 if (cur->af != prev->af)
2414 PF_SET_SKIP_STEPS(PF_SKIP_AF);
2415 if (cur->proto != prev->proto)
2416 PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
2417 if (cur->src.neg != prev->src.neg ||
2418 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
2419 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
2420 if (cur->src.port[0] != prev->src.port[0] ||
2421 cur->src.port[1] != prev->src.port[1] ||
2422 cur->src.port_op != prev->src.port_op)
2423 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2424 if (cur->dst.neg != prev->dst.neg ||
2425 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
2426 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
2427 if (cur->dst.port[0] != prev->dst.port[0] ||
2428 cur->dst.port[1] != prev->dst.port[1] ||
2429 cur->dst.port_op != prev->dst.port_op)
2430 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2433 cur = TAILQ_NEXT(cur, entries);
2435 for (i = 0; i < PF_SKIP_COUNT; ++i)
2436 PF_SET_SKIP_STEPS(i);
2440 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
2442 if (aw1->type != aw2->type)
2444 switch (aw1->type) {
2445 case PF_ADDR_ADDRMASK:
2447 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
2449 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
2452 case PF_ADDR_DYNIFTL:
2453 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
2454 case PF_ADDR_NOROUTE:
2455 case PF_ADDR_URPFFAILED:
2458 return (aw1->p.tbl != aw2->p.tbl);
2460 printf("invalid address type: %d\n", aw1->type);
2466 * Checksum updates are a little complicated because the checksum in the TCP/UDP
2467 * header isn't always a full checksum. In some cases (i.e. output) it's a
2468 * pseudo-header checksum, which is a partial checksum over src/dst IP
2469 * addresses, protocol number and length.
2471 * That means we have the following cases:
2472 * * Input or forwarding: we don't have TSO, the checksum fields are full
2473 * checksums, we need to update the checksum whenever we change anything.
2474 * * Output (i.e. the checksum is a pseudo-header checksum):
2475 * x The field being updated is src/dst address or affects the length of
2476 * the packet. We need to update the pseudo-header checksum (note that this
2477 * checksum is not ones' complement).
2478 * x Some other field is being modified (e.g. src/dst port numbers): We
2479 * don't have to update anything.
2482 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
2486 x = cksum + old - new;
2487 x = (x + (x >> 16)) & 0xffff;
2489 /* optimise: eliminate a branch when not udp */
2490 if (udp && cksum == 0x0000)
2492 if (udp && x == 0x0000)
2495 return (u_int16_t)(x);
2499 pf_patch_8(struct mbuf *m, u_int16_t *cksum, u_int8_t *f, u_int8_t v, bool hi,
2502 u_int16_t old = htons(hi ? (*f << 8) : *f);
2503 u_int16_t new = htons(hi ? ( v << 8) : v);
2510 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
2513 *cksum = pf_cksum_fixup(*cksum, old, new, udp);
2517 pf_patch_16_unaligned(struct mbuf *m, u_int16_t *cksum, void *f, u_int16_t v,
2518 bool hi, u_int8_t udp)
2520 u_int8_t *fb = (u_int8_t *)f;
2521 u_int8_t *vb = (u_int8_t *)&v;
2523 pf_patch_8(m, cksum, fb++, *vb++, hi, udp);
2524 pf_patch_8(m, cksum, fb++, *vb++, !hi, udp);
2528 pf_patch_32_unaligned(struct mbuf *m, u_int16_t *cksum, void *f, u_int32_t v,
2529 bool hi, u_int8_t udp)
2531 u_int8_t *fb = (u_int8_t *)f;
2532 u_int8_t *vb = (u_int8_t *)&v;
2534 pf_patch_8(m, cksum, fb++, *vb++, hi, udp);
2535 pf_patch_8(m, cksum, fb++, *vb++, !hi, udp);
2536 pf_patch_8(m, cksum, fb++, *vb++, hi, udp);
2537 pf_patch_8(m, cksum, fb++, *vb++, !hi, udp);
2541 pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
2542 u_int16_t new, u_int8_t udp)
2544 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
2547 return (pf_cksum_fixup(cksum, old, new, udp));
2551 pf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic,
2552 u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u,
2558 PF_ACPY(&ao, a, af);
2561 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
2569 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2570 ao.addr16[0], an->addr16[0], 0),
2571 ao.addr16[1], an->addr16[1], 0);
2574 *pc = pf_cksum_fixup(pf_cksum_fixup(*pc,
2575 ao.addr16[0], an->addr16[0], u),
2576 ao.addr16[1], an->addr16[1], u);
2578 *pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
2583 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2584 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2585 pf_cksum_fixup(pf_cksum_fixup(*pc,
2586 ao.addr16[0], an->addr16[0], u),
2587 ao.addr16[1], an->addr16[1], u),
2588 ao.addr16[2], an->addr16[2], u),
2589 ao.addr16[3], an->addr16[3], u),
2590 ao.addr16[4], an->addr16[4], u),
2591 ao.addr16[5], an->addr16[5], u),
2592 ao.addr16[6], an->addr16[6], u),
2593 ao.addr16[7], an->addr16[7], u);
2595 *pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
2600 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA |
2601 CSUM_DELAY_DATA_IPV6)) {
2608 /* Changes a u_int32_t. Uses a void * so there are no align restrictions */
2610 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
2614 memcpy(&ao, a, sizeof(ao));
2615 memcpy(a, &an, sizeof(u_int32_t));
2616 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
2617 ao % 65536, an % 65536, u);
2621 pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp)
2625 memcpy(&ao, a, sizeof(ao));
2626 memcpy(a, &an, sizeof(u_int32_t));
2628 *c = pf_proto_cksum_fixup(m,
2629 pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp),
2630 ao % 65536, an % 65536, udp);
2635 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
2639 PF_ACPY(&ao, a, AF_INET6);
2640 PF_ACPY(a, an, AF_INET6);
2642 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2643 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2644 pf_cksum_fixup(pf_cksum_fixup(*c,
2645 ao.addr16[0], an->addr16[0], u),
2646 ao.addr16[1], an->addr16[1], u),
2647 ao.addr16[2], an->addr16[2], u),
2648 ao.addr16[3], an->addr16[3], u),
2649 ao.addr16[4], an->addr16[4], u),
2650 ao.addr16[5], an->addr16[5], u),
2651 ao.addr16[6], an->addr16[6], u),
2652 ao.addr16[7], an->addr16[7], u);
2657 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
2658 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
2659 u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
2661 struct pf_addr oia, ooa;
2663 PF_ACPY(&oia, ia, af);
2665 PF_ACPY(&ooa, oa, af);
2667 /* Change inner protocol port, fix inner protocol checksum. */
2669 u_int16_t oip = *ip;
2676 *pc = pf_cksum_fixup(*pc, oip, *ip, u);
2677 *ic = pf_cksum_fixup(*ic, oip, *ip, 0);
2679 *ic = pf_cksum_fixup(*ic, opc, *pc, 0);
2681 /* Change inner ip address, fix inner ip and icmp checksums. */
2682 PF_ACPY(ia, na, af);
2686 u_int32_t oh2c = *h2c;
2688 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
2689 oia.addr16[0], ia->addr16[0], 0),
2690 oia.addr16[1], ia->addr16[1], 0);
2691 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2692 oia.addr16[0], ia->addr16[0], 0),
2693 oia.addr16[1], ia->addr16[1], 0);
2694 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
2700 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2701 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2702 pf_cksum_fixup(pf_cksum_fixup(*ic,
2703 oia.addr16[0], ia->addr16[0], u),
2704 oia.addr16[1], ia->addr16[1], u),
2705 oia.addr16[2], ia->addr16[2], u),
2706 oia.addr16[3], ia->addr16[3], u),
2707 oia.addr16[4], ia->addr16[4], u),
2708 oia.addr16[5], ia->addr16[5], u),
2709 oia.addr16[6], ia->addr16[6], u),
2710 oia.addr16[7], ia->addr16[7], u);
2714 /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
2716 PF_ACPY(oa, na, af);
2720 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
2721 ooa.addr16[0], oa->addr16[0], 0),
2722 ooa.addr16[1], oa->addr16[1], 0);
2727 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2728 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2729 pf_cksum_fixup(pf_cksum_fixup(*ic,
2730 ooa.addr16[0], oa->addr16[0], u),
2731 ooa.addr16[1], oa->addr16[1], u),
2732 ooa.addr16[2], oa->addr16[2], u),
2733 ooa.addr16[3], oa->addr16[3], u),
2734 ooa.addr16[4], oa->addr16[4], u),
2735 ooa.addr16[5], oa->addr16[5], u),
2736 ooa.addr16[6], oa->addr16[6], u),
2737 ooa.addr16[7], oa->addr16[7], u);
2745 * Need to modulate the sequence numbers in the TCP SACK option
2746 * (credits to Krzysztof Pfaff for report and patch)
2749 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
2750 struct tcphdr *th, struct pf_state_peer *dst)
2752 int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
2753 u_int8_t opts[TCP_MAXOLEN], *opt = opts;
2754 int copyback = 0, i, olen;
2755 struct sackblk sack;
2757 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
2758 if (hlen < TCPOLEN_SACKLEN ||
2759 !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
2762 while (hlen >= TCPOLEN_SACKLEN) {
2763 size_t startoff = opt - opts;
2766 case TCPOPT_EOL: /* FALLTHROUGH */
2774 if (olen >= TCPOLEN_SACKLEN) {
2775 for (i = 2; i + TCPOLEN_SACK <= olen;
2776 i += TCPOLEN_SACK) {
2777 memcpy(&sack, &opt[i], sizeof(sack));
2778 pf_patch_32_unaligned(m,
2779 &th->th_sum, &sack.start,
2780 htonl(ntohl(sack.start) - dst->seqdiff),
2781 PF_ALGNMNT(startoff),
2783 pf_patch_32_unaligned(m, &th->th_sum,
2785 htonl(ntohl(sack.end) - dst->seqdiff),
2786 PF_ALGNMNT(startoff),
2788 memcpy(&opt[i], &sack, sizeof(sack));
2802 m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
2807 pf_build_tcp(const struct pf_krule *r, sa_family_t af,
2808 const struct pf_addr *saddr, const struct pf_addr *daddr,
2809 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2810 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2816 struct ip *h = NULL;
2819 struct ip6_hdr *h6 = NULL;
2823 struct pf_mtag *pf_mtag;
2828 /* maximum segment size tcp option */
2829 tlen = sizeof(struct tcphdr);
2836 len = sizeof(struct ip) + tlen;
2841 len = sizeof(struct ip6_hdr) + tlen;
2845 panic("%s: unsupported af %d", __func__, af);
2848 m = m_gethdr(M_NOWAIT, MT_DATA);
2853 mac_netinet_firewall_send(m);
2855 if ((pf_mtag = pf_get_mtag(m)) == NULL) {
2860 m->m_flags |= M_SKIP_FIREWALL;
2861 pf_mtag->tag = rtag;
2863 if (r != NULL && r->rtableid >= 0)
2864 M_SETFIB(m, r->rtableid);
2867 if (r != NULL && r->qid) {
2868 pf_mtag->qid = r->qid;
2870 /* add hints for ecn */
2871 pf_mtag->hdr = mtod(m, struct ip *);
2874 m->m_data += max_linkhdr;
2875 m->m_pkthdr.len = m->m_len = len;
2876 /* The rest of the stack assumes a rcvif, so provide one.
2877 * This is a locally generated packet, so .. close enough. */
2878 m->m_pkthdr.rcvif = V_loif;
2879 bzero(m->m_data, len);
2883 h = mtod(m, struct ip *);
2885 /* IP header fields included in the TCP checksum */
2886 h->ip_p = IPPROTO_TCP;
2887 h->ip_len = htons(tlen);
2888 h->ip_src.s_addr = saddr->v4.s_addr;
2889 h->ip_dst.s_addr = daddr->v4.s_addr;
2891 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
2896 h6 = mtod(m, struct ip6_hdr *);
2898 /* IP header fields included in the TCP checksum */
2899 h6->ip6_nxt = IPPROTO_TCP;
2900 h6->ip6_plen = htons(tlen);
2901 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
2902 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
2904 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
2910 th->th_sport = sport;
2911 th->th_dport = dport;
2912 th->th_seq = htonl(seq);
2913 th->th_ack = htonl(ack);
2914 th->th_off = tlen >> 2;
2915 th->th_flags = flags;
2916 th->th_win = htons(win);
2919 opt = (char *)(th + 1);
2920 opt[0] = TCPOPT_MAXSEG;
2923 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
2930 th->th_sum = in_cksum(m, len);
2932 /* Finish the IP header */
2934 h->ip_hl = sizeof(*h) >> 2;
2935 h->ip_tos = IPTOS_LOWDELAY;
2936 h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0);
2937 h->ip_len = htons(len);
2938 h->ip_ttl = ttl ? ttl : V_ip_defttl;
2945 th->th_sum = in6_cksum(m, IPPROTO_TCP,
2946 sizeof(struct ip6_hdr), tlen);
2948 h6->ip6_vfc |= IPV6_VERSION;
2949 h6->ip6_hlim = IPV6_DEFHLIM;
2958 pf_send_tcp(const struct pf_krule *r, sa_family_t af,
2959 const struct pf_addr *saddr, const struct pf_addr *daddr,
2960 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2961 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2964 struct pf_send_entry *pfse;
2967 m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, flags,
2968 win, mss, ttl, tag, rtag);
2972 /* Allocate outgoing queue entry, mbuf and mbuf tag. */
2973 pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
2982 pfse->pfse_type = PFSE_IP;
2987 pfse->pfse_type = PFSE_IP6;
2997 pf_return(struct pf_krule *r, struct pf_krule *nr, struct pf_pdesc *pd,
2998 struct pf_state_key *sk, int off, struct mbuf *m, struct tcphdr *th,
2999 struct pfi_kkif *kif, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen,
3002 struct pf_addr * const saddr = pd->src;
3003 struct pf_addr * const daddr = pd->dst;
3004 sa_family_t af = pd->af;
3006 /* undo NAT changes, if they have taken place */
3008 PF_ACPY(saddr, &sk->addr[pd->sidx], af);
3009 PF_ACPY(daddr, &sk->addr[pd->didx], af);
3011 *pd->sport = sk->port[pd->sidx];
3013 *pd->dport = sk->port[pd->didx];
3015 *pd->proto_sum = bproto_sum;
3017 *pd->ip_sum = bip_sum;
3018 m_copyback(m, off, hdrlen, pd->hdr.any);
3020 if (pd->proto == IPPROTO_TCP &&
3021 ((r->rule_flag & PFRULE_RETURNRST) ||
3022 (r->rule_flag & PFRULE_RETURN)) &&
3023 !(th->th_flags & TH_RST)) {
3024 u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
3036 h4 = mtod(m, struct ip *);
3037 len = ntohs(h4->ip_len) - off;
3042 h6 = mtod(m, struct ip6_hdr *);
3043 len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
3048 if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
3049 REASON_SET(reason, PFRES_PROTCKSUM);
3051 if (th->th_flags & TH_SYN)
3053 if (th->th_flags & TH_FIN)
3055 pf_send_tcp(r, af, pd->dst,
3056 pd->src, th->th_dport, th->th_sport,
3057 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3058 r->return_ttl, 1, 0);
3060 } else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
3062 pf_send_icmp(m, r->return_icmp >> 8,
3063 r->return_icmp & 255, af, r);
3064 else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
3066 pf_send_icmp(m, r->return_icmp6 >> 8,
3067 r->return_icmp6 & 255, af, r);
3071 pf_match_ieee8021q_pcp(u_int8_t prio, struct mbuf *m)
3076 mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL);
3080 if (prio == PF_PRIO_ZERO)
3083 mpcp = *(uint8_t *)(mtag + 1);
3085 return (mpcp == prio);
3089 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
3092 struct pf_send_entry *pfse;
3094 struct pf_mtag *pf_mtag;
3096 /* Allocate outgoing queue entry, mbuf and mbuf tag. */
3097 pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
3101 if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) {
3102 free(pfse, M_PFTEMP);
3106 if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
3107 free(pfse, M_PFTEMP);
3111 m0->m_flags |= M_SKIP_FIREWALL;
3113 if (r->rtableid >= 0)
3114 M_SETFIB(m0, r->rtableid);
3118 pf_mtag->qid = r->qid;
3119 /* add hints for ecn */
3120 pf_mtag->hdr = mtod(m0, struct ip *);
3127 pfse->pfse_type = PFSE_ICMP;
3132 pfse->pfse_type = PFSE_ICMP6;
3137 pfse->icmpopts.type = type;
3138 pfse->icmpopts.code = code;
3143 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
3144 * If n is 0, they match if they are equal. If n is != 0, they match if they
3148 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
3149 struct pf_addr *b, sa_family_t af)
3156 if ((a->addr32[0] & m->addr32[0]) ==
3157 (b->addr32[0] & m->addr32[0]))
3163 if (((a->addr32[0] & m->addr32[0]) ==
3164 (b->addr32[0] & m->addr32[0])) &&
3165 ((a->addr32[1] & m->addr32[1]) ==
3166 (b->addr32[1] & m->addr32[1])) &&
3167 ((a->addr32[2] & m->addr32[2]) ==
3168 (b->addr32[2] & m->addr32[2])) &&
3169 ((a->addr32[3] & m->addr32[3]) ==
3170 (b->addr32[3] & m->addr32[3])))
3189 * Return 1 if b <= a <= e, otherwise return 0.
3192 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
3193 struct pf_addr *a, sa_family_t af)
3198 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) ||
3199 (ntohl(a->addr32[0]) > ntohl(e->addr32[0])))
3208 for (i = 0; i < 4; ++i)
3209 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i]))
3211 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i]))
3214 for (i = 0; i < 4; ++i)
3215 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i]))
3217 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i]))
3227 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
3231 return ((p > a1) && (p < a2));
3233 return ((p < a1) || (p > a2));
3235 return ((p >= a1) && (p <= a2));
3249 return (0); /* never reached */
3253 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
3258 return (pf_match(op, a1, a2, p));
3262 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
3264 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
3266 return (pf_match(op, a1, a2, u));
3270 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
3272 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
3274 return (pf_match(op, a1, a2, g));
3278 pf_match_tag(struct mbuf *m, struct pf_krule *r, int *tag, int mtag)
3283 return ((!r->match_tag_not && r->match_tag == *tag) ||
3284 (r->match_tag_not && r->match_tag != *tag));
3288 pf_tag_packet(struct mbuf *m, struct pf_pdesc *pd, int tag)
3291 KASSERT(tag > 0, ("%s: tag %d", __func__, tag));
3293 if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(m)) == NULL))
3296 pd->pf_mtag->tag = tag;
3301 #define PF_ANCHOR_STACKSIZE 32
3302 struct pf_kanchor_stackframe {
3303 struct pf_kruleset *rs;
3304 struct pf_krule *r; /* XXX: + match bit */
3305 struct pf_kanchor *child;
3309 * XXX: We rely on malloc(9) returning pointer aligned addresses.
3311 #define PF_ANCHORSTACK_MATCH 0x00000001
3312 #define PF_ANCHORSTACK_MASK (PF_ANCHORSTACK_MATCH)
3314 #define PF_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
3315 #define PF_ANCHOR_RULE(f) (struct pf_krule *) \
3316 ((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
3317 #define PF_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \
3318 ((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \
3322 pf_step_into_anchor(struct pf_kanchor_stackframe *stack, int *depth,
3323 struct pf_kruleset **rs, int n, struct pf_krule **r, struct pf_krule **a,
3326 struct pf_kanchor_stackframe *f;
3332 if (*depth >= PF_ANCHOR_STACKSIZE) {
3333 printf("%s: anchor stack overflow on %s\n",
3334 __func__, (*r)->anchor->name);
3335 *r = TAILQ_NEXT(*r, entries);
3337 } else if (*depth == 0 && a != NULL)
3339 f = stack + (*depth)++;
3342 if ((*r)->anchor_wildcard) {
3343 struct pf_kanchor_node *parent = &(*r)->anchor->children;
3345 if ((f->child = RB_MIN(pf_kanchor_node, parent)) == NULL) {
3349 *rs = &f->child->ruleset;
3352 *rs = &(*r)->anchor->ruleset;
3354 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3358 pf_step_out_of_anchor(struct pf_kanchor_stackframe *stack, int *depth,
3359 struct pf_kruleset **rs, int n, struct pf_krule **r, struct pf_krule **a,
3362 struct pf_kanchor_stackframe *f;
3363 struct pf_krule *fr;
3371 f = stack + *depth - 1;
3372 fr = PF_ANCHOR_RULE(f);
3373 if (f->child != NULL) {
3375 * This block traverses through
3376 * a wildcard anchor.
3378 if (match != NULL && *match) {
3380 * If any of "*" matched, then
3381 * "foo/ *" matched, mark frame
3384 PF_ANCHOR_SET_MATCH(f);
3387 f->child = RB_NEXT(pf_kanchor_node,
3388 &fr->anchor->children, f->child);
3389 if (f->child != NULL) {
3390 *rs = &f->child->ruleset;
3391 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3399 if (*depth == 0 && a != NULL)
3402 if (PF_ANCHOR_MATCH(f) || (match != NULL && *match))
3404 *r = TAILQ_NEXT(fr, entries);
3405 } while (*r == NULL);
3410 struct pf_keth_anchor_stackframe {
3411 struct pf_keth_ruleset *rs;
3412 struct pf_keth_rule *r; /* XXX: + match bit */
3413 struct pf_keth_anchor *child;
3416 #define PF_ETH_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
3417 #define PF_ETH_ANCHOR_RULE(f) (struct pf_keth_rule *) \
3418 ((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
3419 #define PF_ETH_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \
3420 ((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \
3424 pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth,
3425 struct pf_keth_ruleset **rs, struct pf_keth_rule **r,
3426 struct pf_keth_rule **a, int *match)
3428 struct pf_keth_anchor_stackframe *f;
3434 if (*depth >= PF_ANCHOR_STACKSIZE) {
3435 printf("%s: anchor stack overflow on %s\n",
3436 __func__, (*r)->anchor->name);
3437 *r = TAILQ_NEXT(*r, entries);
3439 } else if (*depth == 0 && a != NULL)
3441 f = stack + (*depth)++;
3444 if ((*r)->anchor_wildcard) {
3445 struct pf_keth_anchor_node *parent = &(*r)->anchor->children;
3447 if ((f->child = RB_MIN(pf_keth_anchor_node, parent)) == NULL) {
3451 *rs = &f->child->ruleset;
3454 *rs = &(*r)->anchor->ruleset;
3456 *r = TAILQ_FIRST((*rs)->active.rules);
3460 pf_step_out_of_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth,
3461 struct pf_keth_ruleset **rs, struct pf_keth_rule **r,
3462 struct pf_keth_rule **a, int *match)
3464 struct pf_keth_anchor_stackframe *f;
3465 struct pf_keth_rule *fr;
3473 f = stack + *depth - 1;
3474 fr = PF_ETH_ANCHOR_RULE(f);
3475 if (f->child != NULL) {
3477 * This block traverses through
3478 * a wildcard anchor.
3480 if (match != NULL && *match) {
3482 * If any of "*" matched, then
3483 * "foo/ *" matched, mark frame
3486 PF_ETH_ANCHOR_SET_MATCH(f);
3489 f->child = RB_NEXT(pf_keth_anchor_node,
3490 &fr->anchor->children, f->child);
3491 if (f->child != NULL) {
3492 *rs = &f->child->ruleset;
3493 *r = TAILQ_FIRST((*rs)->active.rules);
3501 if (*depth == 0 && a != NULL)
3504 if (PF_ETH_ANCHOR_MATCH(f) || (match != NULL && *match))
3506 *r = TAILQ_NEXT(fr, entries);
3507 } while (*r == NULL);
3514 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
3515 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
3520 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3521 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
3525 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3526 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
3527 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
3528 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
3529 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
3530 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
3531 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
3532 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
3538 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
3543 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
3547 if (addr->addr32[3] == 0xffffffff) {
3548 addr->addr32[3] = 0;
3549 if (addr->addr32[2] == 0xffffffff) {
3550 addr->addr32[2] = 0;
3551 if (addr->addr32[1] == 0xffffffff) {
3552 addr->addr32[1] = 0;
3554 htonl(ntohl(addr->addr32[0]) + 1);
3557 htonl(ntohl(addr->addr32[1]) + 1);
3560 htonl(ntohl(addr->addr32[2]) + 1);
3563 htonl(ntohl(addr->addr32[3]) + 1);
3570 pf_rule_to_actions(struct pf_krule *r, struct pf_rule_actions *a)
3577 a->dnpipe = r->dnpipe;
3579 a->dnpipe = r->dnrpipe;
3580 if (r->free_flags & PFRULE_DN_IS_PIPE)
3581 a->flags |= PFRULE_DN_IS_PIPE;
3585 pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m)
3587 struct pf_addr *saddr, *daddr;
3588 u_int16_t sport, dport;
3589 struct inpcbinfo *pi;
3592 pd->lookup.uid = UID_MAX;
3593 pd->lookup.gid = GID_MAX;
3595 switch (pd->proto) {
3597 sport = pd->hdr.tcp.th_sport;
3598 dport = pd->hdr.tcp.th_dport;
3602 sport = pd->hdr.udp.uh_sport;
3603 dport = pd->hdr.udp.uh_dport;
3609 if (direction == PF_IN) {
3624 inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4,
3625 dport, INPLOOKUP_RLOCKPCB, NULL, m);
3627 inp = in_pcblookup_mbuf(pi, saddr->v4, sport,
3628 daddr->v4, dport, INPLOOKUP_WILDCARD |
3629 INPLOOKUP_RLOCKPCB, NULL, m);
3637 inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6,
3638 dport, INPLOOKUP_RLOCKPCB, NULL, m);
3640 inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport,
3641 &daddr->v6, dport, INPLOOKUP_WILDCARD |
3642 INPLOOKUP_RLOCKPCB, NULL, m);
3652 INP_RLOCK_ASSERT(inp);
3653 pd->lookup.uid = inp->inp_cred->cr_uid;
3654 pd->lookup.gid = inp->inp_cred->cr_groups[0];
3661 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3665 u_int8_t *opt, optlen;
3666 u_int8_t wscale = 0;
3668 hlen = th_off << 2; /* hlen <= sizeof(hdr) */
3669 if (hlen <= sizeof(struct tcphdr))
3671 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3673 opt = hdr + sizeof(struct tcphdr);
3674 hlen -= sizeof(struct tcphdr);
3684 if (wscale > TCP_MAX_WINSHIFT)
3685 wscale = TCP_MAX_WINSHIFT;
3686 wscale |= PF_WSCALE_FLAG;
3701 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3705 u_int8_t *opt, optlen;
3706 u_int16_t mss = V_tcp_mssdflt;
3708 hlen = th_off << 2; /* hlen <= sizeof(hdr) */
3709 if (hlen <= sizeof(struct tcphdr))
3711 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3713 opt = hdr + sizeof(struct tcphdr);
3714 hlen -= sizeof(struct tcphdr);
3715 while (hlen >= TCPOLEN_MAXSEG) {
3723 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
3739 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
3741 struct nhop_object *nh;
3743 struct in6_addr dst6;
3754 hlen = sizeof(struct ip);
3755 nh = fib4_lookup(rtableid, addr->v4, 0, 0, 0);
3757 mss = nh->nh_mtu - hlen - sizeof(struct tcphdr);
3762 hlen = sizeof(struct ip6_hdr);
3763 in6_splitscope(&addr->v6, &dst6, &scopeid);
3764 nh = fib6_lookup(rtableid, &dst6, scopeid, 0, 0);
3766 mss = nh->nh_mtu - hlen - sizeof(struct tcphdr);
3771 mss = max(V_tcp_mssdflt, mss);
3772 mss = min(mss, offer);
3773 mss = max(mss, 64); /* sanity - at least max opt space */
3778 pf_tcp_iss(struct pf_pdesc *pd)
3781 u_int32_t digest[4];
3783 if (V_pf_tcp_secret_init == 0) {
3784 arc4random_buf(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
3785 MD5Init(&V_pf_tcp_secret_ctx);
3786 MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
3787 sizeof(V_pf_tcp_secret));
3788 V_pf_tcp_secret_init = 1;
3791 ctx = V_pf_tcp_secret_ctx;
3793 MD5Update(&ctx, (char *)&pd->hdr.tcp.th_sport, sizeof(u_short));
3794 MD5Update(&ctx, (char *)&pd->hdr.tcp.th_dport, sizeof(u_short));
3795 if (pd->af == AF_INET6) {
3796 MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
3797 MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
3799 MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
3800 MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
3802 MD5Final((u_char *)digest, &ctx);
3803 V_pf_tcp_iss_off += 4096;
3804 #define ISN_RANDOM_INCREMENT (4096 - 1)
3805 return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
3807 #undef ISN_RANDOM_INCREMENT
3811 pf_match_eth_addr(const uint8_t *a, const struct pf_keth_rule_addr *r)
3815 /* Always matches if not set */
3819 for (int i = 0; i < ETHER_ADDR_LEN; i++) {
3820 if ((a[i] & r->mask[i]) != (r->addr[i] & r->mask[i])) {
3826 return (match ^ r->neg);
3830 pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0)
3832 struct mbuf *m = *m0;
3833 struct ether_header *e;
3834 struct pf_keth_rule *r, *rm, *a = NULL;
3835 struct pf_keth_ruleset *ruleset = NULL;
3836 struct pf_mtag *mtag;
3837 struct pf_keth_ruleq *rules;
3838 struct pf_addr *src, *dst;
3841 int asd = 0, match = 0;
3843 struct pf_keth_anchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE];
3845 MPASS(kif->pfik_ifp->if_vnet == curvnet);
3848 PF_RULES_RLOCK_TRACKER;
3850 SDT_PROBE3(pf, eth, test_rule, entry, dir, kif->pfik_ifp, m);
3852 ruleset = V_pf_keth;
3853 rules = ck_pr_load_ptr(&ruleset->active.rules);
3854 r = TAILQ_FIRST(rules);
3857 e = mtod(m, struct ether_header *);
3858 proto = ntohs(e->ether_type);
3862 case ETHERTYPE_IP: {
3864 m = m_pullup(m, sizeof(struct ether_header) +
3871 ip = mtodo(m, sizeof(struct ether_header));
3872 src = (struct pf_addr *)&ip->ip_src;
3873 dst = (struct pf_addr *)&ip->ip_dst;
3878 case ETHERTYPE_IPV6: {
3879 struct ip6_hdr *ip6;
3880 m = m_pullup(m, sizeof(struct ether_header) +
3881 sizeof(struct ip6_hdr));
3887 ip6 = mtodo(m, sizeof(struct ether_header));
3888 src = (struct pf_addr *)&ip6->ip6_src;
3889 dst = (struct pf_addr *)&ip6->ip6_dst;
3894 e = mtod(m, struct ether_header *);
3900 counter_u64_add(r->evaluations, 1);
3901 SDT_PROBE2(pf, eth, test_rule, test, r->nr, r);
3903 if (pfi_kkif_match(r->kif, kif) == r->ifnot) {
3904 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
3906 r = r->skip[PFE_SKIP_IFP].ptr;
3908 else if (r->direction && r->direction != dir) {
3909 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
3911 r = r->skip[PFE_SKIP_DIR].ptr;
3913 else if (r->proto && r->proto != proto) {
3914 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
3916 r = r->skip[PFE_SKIP_PROTO].ptr;
3918 else if (! pf_match_eth_addr(e->ether_shost, &r->src)) {
3919 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
3921 r = r->skip[PFE_SKIP_SRC_ADDR].ptr;
3923 else if (! pf_match_eth_addr(e->ether_dhost, &r->dst)) {
3924 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
3926 r = TAILQ_NEXT(r, entries);
3928 else if (af != 0 && PF_MISMATCHAW(&r->ipsrc.addr, src, af,
3929 r->ipsrc.neg, kif, M_GETFIB(m))) {
3930 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
3932 r = TAILQ_NEXT(r, entries);
3934 else if (af != 0 && PF_MISMATCHAW(&r->ipdst.addr, dst, af,
3935 r->ipdst.neg, kif, M_GETFIB(m))) {
3936 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
3938 r = TAILQ_NEXT(r, entries);
3941 if (r->anchor == NULL) {
3945 SDT_PROBE2(pf, eth, test_rule, match, r->nr, r);
3950 r = TAILQ_NEXT(r, entries);
3952 pf_step_into_keth_anchor(anchor_stack, &asd,
3953 &ruleset, &r, &a, &match);
3956 if (r == NULL && pf_step_out_of_keth_anchor(anchor_stack, &asd,
3957 &ruleset, &r, &a, &match))
3963 SDT_PROBE2(pf, eth, test_rule, final_match, (r != NULL ? r->nr : -1), r);
3965 /* Default to pass. */
3971 /* Execute action. */
3972 counter_u64_add(r->packets[dir == PF_OUT], 1);
3973 counter_u64_add(r->bytes[dir == PF_OUT], m_length(m, NULL));
3974 pf_update_timestamp(r);
3976 /* Shortcut. Don't tag if we're just going to drop anyway. */
3977 if (r->action == PF_DROP) {
3983 mtag = pf_get_mtag(m);
3986 counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
3993 mtag = pf_get_mtag(m);
3996 counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
4004 /** While dummynet supports handling Ethernet packets directly
4005 * it still wants some L3/L4 information, and we're not set up
4006 * to provide that here. Instead we'll do what we do for ALTQ
4007 * and merely mark the packet with the dummynet queue/pipe number.
4009 mtag = pf_get_mtag(m);
4012 counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
4015 mtag->dnpipe = r->dnpipe;
4016 mtag->dnflags = r->dnflags;
4027 pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, int direction,
4028 struct pfi_kkif *kif, struct mbuf *m, int off, struct pf_pdesc *pd,
4029 struct pf_krule **am, struct pf_kruleset **rsm, struct inpcb *inp)
4031 struct pf_krule *nr = NULL;
4032 struct pf_addr * const saddr = pd->src;
4033 struct pf_addr * const daddr = pd->dst;
4034 sa_family_t af = pd->af;
4035 struct pf_krule *r, *a = NULL;
4036 struct pf_kruleset *ruleset = NULL;
4037 struct pf_ksrc_node *nsn = NULL;
4038 struct tcphdr *th = &pd->hdr.tcp;
4039 struct pf_state_key *sk = NULL, *nk = NULL;
4041 int rewrite = 0, hdrlen = 0;
4042 int tag = -1, rtableid = -1;
4046 u_int16_t sport = 0, dport = 0;
4047 u_int16_t bproto_sum = 0, bip_sum = 0;
4048 u_int8_t icmptype = 0, icmpcode = 0;
4049 struct pf_kanchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE];
4054 INP_LOCK_ASSERT(inp);
4055 pd->lookup.uid = inp->inp_cred->cr_uid;
4056 pd->lookup.gid = inp->inp_cred->cr_groups[0];
4057 pd->lookup.done = 1;
4060 switch (pd->proto) {
4062 sport = th->th_sport;
4063 dport = th->th_dport;
4064 hdrlen = sizeof(*th);
4067 sport = pd->hdr.udp.uh_sport;
4068 dport = pd->hdr.udp.uh_dport;
4069 hdrlen = sizeof(pd->hdr.udp);
4073 if (pd->af != AF_INET)
4075 sport = dport = pd->hdr.icmp.icmp_id;
4076 hdrlen = sizeof(pd->hdr.icmp);
4077 icmptype = pd->hdr.icmp.icmp_type;
4078 icmpcode = pd->hdr.icmp.icmp_code;
4080 if (icmptype == ICMP_UNREACH ||
4081 icmptype == ICMP_SOURCEQUENCH ||
4082 icmptype == ICMP_REDIRECT ||
4083 icmptype == ICMP_TIMXCEED ||
4084 icmptype == ICMP_PARAMPROB)
4089 case IPPROTO_ICMPV6:
4092 sport = dport = pd->hdr.icmp6.icmp6_id;
4093 hdrlen = sizeof(pd->hdr.icmp6);
4094 icmptype = pd->hdr.icmp6.icmp6_type;
4095 icmpcode = pd->hdr.icmp6.icmp6_code;
4097 if (icmptype == ICMP6_DST_UNREACH ||
4098 icmptype == ICMP6_PACKET_TOO_BIG ||
4099 icmptype == ICMP6_TIME_EXCEEDED ||
4100 icmptype == ICMP6_PARAM_PROB)
4105 sport = dport = hdrlen = 0;
4109 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4111 /* check packet for BINAT/NAT/RDR */
4112 if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk,
4113 &nk, saddr, daddr, sport, dport, anchor_stack)) != NULL) {
4114 KASSERT(sk != NULL, ("%s: null sk", __func__));
4115 KASSERT(nk != NULL, ("%s: null nk", __func__));
4118 PFLOG_PACKET(kif, m, af, direction, PFRES_MATCH, nr, a,
4123 bip_sum = *pd->ip_sum;
4125 switch (pd->proto) {
4127 bproto_sum = th->th_sum;
4128 pd->proto_sum = &th->th_sum;
4130 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
4131 nk->port[pd->sidx] != sport) {
4132 pf_change_ap(m, saddr, &th->th_sport, pd->ip_sum,
4133 &th->th_sum, &nk->addr[pd->sidx],
4134 nk->port[pd->sidx], 0, af);
4135 pd->sport = &th->th_sport;
4136 sport = th->th_sport;
4139 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
4140 nk->port[pd->didx] != dport) {
4141 pf_change_ap(m, daddr, &th->th_dport, pd->ip_sum,
4142 &th->th_sum, &nk->addr[pd->didx],
4143 nk->port[pd->didx], 0, af);
4144 dport = th->th_dport;
4145 pd->dport = &th->th_dport;
4150 bproto_sum = pd->hdr.udp.uh_sum;
4151 pd->proto_sum = &pd->hdr.udp.uh_sum;
4153 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
4154 nk->port[pd->sidx] != sport) {
4155 pf_change_ap(m, saddr, &pd->hdr.udp.uh_sport,
4156 pd->ip_sum, &pd->hdr.udp.uh_sum,
4157 &nk->addr[pd->sidx],
4158 nk->port[pd->sidx], 1, af);
4159 sport = pd->hdr.udp.uh_sport;
4160 pd->sport = &pd->hdr.udp.uh_sport;
4163 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
4164 nk->port[pd->didx] != dport) {
4165 pf_change_ap(m, daddr, &pd->hdr.udp.uh_dport,
4166 pd->ip_sum, &pd->hdr.udp.uh_sum,
4167 &nk->addr[pd->didx],
4168 nk->port[pd->didx], 1, af);
4169 dport = pd->hdr.udp.uh_dport;
4170 pd->dport = &pd->hdr.udp.uh_dport;
4176 nk->port[0] = nk->port[1];
4177 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
4178 pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
4179 nk->addr[pd->sidx].v4.s_addr, 0);
4181 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
4182 pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
4183 nk->addr[pd->didx].v4.s_addr, 0);
4185 if (nk->port[1] != pd->hdr.icmp.icmp_id) {
4186 pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
4187 pd->hdr.icmp.icmp_cksum, sport,
4189 pd->hdr.icmp.icmp_id = nk->port[1];
4190 pd->sport = &pd->hdr.icmp.icmp_id;
4192 m_copyback(m, off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp);
4196 case IPPROTO_ICMPV6:
4197 nk->port[0] = nk->port[1];
4198 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
4199 pf_change_a6(saddr, &pd->hdr.icmp6.icmp6_cksum,
4200 &nk->addr[pd->sidx], 0);
4202 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
4203 pf_change_a6(daddr, &pd->hdr.icmp6.icmp6_cksum,
4204 &nk->addr[pd->didx], 0);
4213 &nk->addr[pd->sidx], AF_INET))
4214 pf_change_a(&saddr->v4.s_addr,
4216 nk->addr[pd->sidx].v4.s_addr, 0);
4219 &nk->addr[pd->didx], AF_INET))
4220 pf_change_a(&daddr->v4.s_addr,
4222 nk->addr[pd->didx].v4.s_addr, 0);
4228 &nk->addr[pd->sidx], AF_INET6))
4229 PF_ACPY(saddr, &nk->addr[pd->sidx], af);
4232 &nk->addr[pd->didx], AF_INET6))
4233 PF_ACPY(daddr, &nk->addr[pd->didx], af);
4245 pf_counter_u64_add(&r->evaluations, 1);
4246 if (pfi_kkif_match(r->kif, kif) == r->ifnot)
4247 r = r->skip[PF_SKIP_IFP].ptr;
4248 else if (r->direction && r->direction != direction)
4249 r = r->skip[PF_SKIP_DIR].ptr;
4250 else if (r->af && r->af != af)
4251 r = r->skip[PF_SKIP_AF].ptr;
4252 else if (r->proto && r->proto != pd->proto)
4253 r = r->skip[PF_SKIP_PROTO].ptr;
4254 else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
4255 r->src.neg, kif, M_GETFIB(m)))
4256 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
4257 /* tcp/udp only. port_op always 0 in other cases */
4258 else if (r->src.port_op && !pf_match_port(r->src.port_op,
4259 r->src.port[0], r->src.port[1], sport))
4260 r = r->skip[PF_SKIP_SRC_PORT].ptr;
4261 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
4262 r->dst.neg, NULL, M_GETFIB(m)))
4263 r = r->skip[PF_SKIP_DST_ADDR].ptr;
4264 /* tcp/udp only. port_op always 0 in other cases */
4265 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
4266 r->dst.port[0], r->dst.port[1], dport))
4267 r = r->skip[PF_SKIP_DST_PORT].ptr;
4268 /* icmp only. type always 0 in other cases */
4269 else if (r->type && r->type != icmptype + 1)
4270 r = TAILQ_NEXT(r, entries);
4271 /* icmp only. type always 0 in other cases */
4272 else if (r->code && r->code != icmpcode + 1)
4273 r = TAILQ_NEXT(r, entries);
4274 else if (r->tos && !(r->tos == pd->tos))
4275 r = TAILQ_NEXT(r, entries);
4276 else if (r->rule_flag & PFRULE_FRAGMENT)
4277 r = TAILQ_NEXT(r, entries);
4278 else if (pd->proto == IPPROTO_TCP &&
4279 (r->flagset & th->th_flags) != r->flags)
4280 r = TAILQ_NEXT(r, entries);
4281 /* tcp/udp only. uid.op always 0 in other cases */
4282 else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
4283 pf_socket_lookup(direction, pd, m), 1)) &&
4284 !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
4286 r = TAILQ_NEXT(r, entries);
4287 /* tcp/udp only. gid.op always 0 in other cases */
4288 else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
4289 pf_socket_lookup(direction, pd, m), 1)) &&
4290 !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
4292 r = TAILQ_NEXT(r, entries);
4294 !pf_match_ieee8021q_pcp(r->prio, m))
4295 r = TAILQ_NEXT(r, entries);
4297 r->prob <= arc4random())
4298 r = TAILQ_NEXT(r, entries);
4299 else if (r->match_tag && !pf_match_tag(m, r, &tag,
4300 pd->pf_mtag ? pd->pf_mtag->tag : 0))
4301 r = TAILQ_NEXT(r, entries);
4302 else if (r->os_fingerprint != PF_OSFP_ANY &&
4303 (pd->proto != IPPROTO_TCP || !pf_osfp_match(
4304 pf_osfp_fingerprint(pd, m, off, th),
4305 r->os_fingerprint)))
4306 r = TAILQ_NEXT(r, entries);
4310 if (r->rtableid >= 0)
4311 rtableid = r->rtableid;
4312 if (r->anchor == NULL) {
4313 if (r->action == PF_MATCH) {
4314 pf_counter_u64_critical_enter();
4315 pf_counter_u64_add_protected(&r->packets[direction == PF_OUT], 1);
4316 pf_counter_u64_add_protected(&r->bytes[direction == PF_OUT], pd->tot_len);
4317 pf_counter_u64_critical_exit();
4318 pf_rule_to_actions(r, &pd->act);
4320 PFLOG_PACKET(kif, m, af,
4321 direction, PFRES_MATCH, r,
4331 r = TAILQ_NEXT(r, entries);
4333 pf_step_into_anchor(anchor_stack, &asd,
4334 &ruleset, PF_RULESET_FILTER, &r, &a,
4337 if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
4338 &ruleset, PF_RULESET_FILTER, &r, &a, &match))
4345 REASON_SET(&reason, PFRES_MATCH);
4347 /* apply actions for last matching pass/block rule */
4348 pf_rule_to_actions(r, &pd->act);
4352 m_copyback(m, off, hdrlen, pd->hdr.any);
4353 PFLOG_PACKET(kif, m, af, direction, reason, r, a,
4357 if ((r->action == PF_DROP) &&
4358 ((r->rule_flag & PFRULE_RETURNRST) ||
4359 (r->rule_flag & PFRULE_RETURNICMP) ||
4360 (r->rule_flag & PFRULE_RETURN))) {
4361 pf_return(r, nr, pd, sk, off, m, th, kif, bproto_sum,
4362 bip_sum, hdrlen, &reason);
4365 if (r->action == PF_DROP)
4368 if (tag > 0 && pf_tag_packet(m, pd, tag)) {
4369 REASON_SET(&reason, PFRES_MEMORY);
4373 M_SETFIB(m, rtableid);
4375 if (!state_icmp && (r->keep_state || nr != NULL ||
4376 (pd->flags & PFDESC_TCP_NORM))) {
4378 action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off,
4379 sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum,
4381 if (action != PF_PASS) {
4382 if (action == PF_DROP &&
4383 (r->rule_flag & PFRULE_RETURN))
4384 pf_return(r, nr, pd, sk, off, m, th, kif,
4385 bproto_sum, bip_sum, hdrlen, &reason);
4390 uma_zfree(V_pf_state_key_z, sk);
4392 uma_zfree(V_pf_state_key_z, nk);
4395 /* copy back packet headers if we performed NAT operations */
4397 m_copyback(m, off, hdrlen, pd->hdr.any);
4399 if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
4400 direction == PF_OUT &&
4401 V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, m))
4403 * We want the state created, but we dont
4404 * want to send this in case a partner
4405 * firewall has to know about it to allow
4406 * replies through it.
4414 uma_zfree(V_pf_state_key_z, sk);
4416 uma_zfree(V_pf_state_key_z, nk);
4421 pf_create_state(struct pf_krule *r, struct pf_krule *nr, struct pf_krule *a,
4422 struct pf_pdesc *pd, struct pf_ksrc_node *nsn, struct pf_state_key *nk,
4423 struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport,
4424 u_int16_t dport, int *rewrite, struct pfi_kkif *kif, struct pf_kstate **sm,
4425 int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen)
4427 struct pf_kstate *s = NULL;
4428 struct pf_ksrc_node *sn = NULL;
4429 struct tcphdr *th = &pd->hdr.tcp;
4430 u_int16_t mss = V_tcp_mssdflt;
4433 /* check maximums */
4434 if (r->max_states &&
4435 (counter_u64_fetch(r->states_cur) >= r->max_states)) {
4436 counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1);
4437 REASON_SET(&reason, PFRES_MAXSTATES);
4440 /* src node for filter rule */
4441 if ((r->rule_flag & PFRULE_SRCTRACK ||
4442 r->rpool.opts & PF_POOL_STICKYADDR) &&
4443 pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
4444 REASON_SET(&reason, PFRES_SRCLIMIT);
4447 /* src node for translation rule */
4448 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
4449 pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
4450 REASON_SET(&reason, PFRES_SRCLIMIT);
4453 s = pf_alloc_state(M_NOWAIT);
4455 REASON_SET(&reason, PFRES_MEMORY);
4459 s->nat_rule.ptr = nr;
4461 STATE_INC_COUNTERS(s);
4463 s->state_flags |= PFSTATE_ALLOWOPTS;
4464 if (r->rule_flag & PFRULE_STATESLOPPY)
4465 s->state_flags |= PFSTATE_SLOPPY;
4466 s->log = r->log & PF_LOG_ALL;
4467 s->sync_state = PFSYNC_S_NONE;
4468 s->qid = pd->act.qid;
4469 s->pqid = pd->act.pqid;
4470 s->dnpipe = pd->act.dnpipe;
4471 s->dnrpipe = pd->act.dnrpipe;
4472 s->state_flags |= pd->act.flags;
4474 s->log |= nr->log & PF_LOG_ALL;
4475 switch (pd->proto) {
4477 s->src.seqlo = ntohl(th->th_seq);
4478 s->src.seqhi = s->src.seqlo + pd->p_len + 1;
4479 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
4480 r->keep_state == PF_STATE_MODULATE) {
4481 /* Generate sequence number modulator */
4482 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
4485 pf_change_proto_a(m, &th->th_seq, &th->th_sum,
4486 htonl(s->src.seqlo + s->src.seqdiff), 0);
4490 if (th->th_flags & TH_SYN) {
4492 s->src.wscale = pf_get_wscale(m, off,
4493 th->th_off, pd->af);
4495 s->src.max_win = MAX(ntohs(th->th_win), 1);
4496 if (s->src.wscale & PF_WSCALE_MASK) {
4497 /* Remove scale factor from initial window */
4498 int win = s->src.max_win;
4499 win += 1 << (s->src.wscale & PF_WSCALE_MASK);
4500 s->src.max_win = (win - 1) >>
4501 (s->src.wscale & PF_WSCALE_MASK);
4503 if (th->th_flags & TH_FIN)
4507 pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT);
4508 pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED);
4509 s->timeout = PFTM_TCP_FIRST_PACKET;
4510 atomic_add_32(&V_pf_status.states_halfopen, 1);
4513 pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE);
4514 pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC);
4515 s->timeout = PFTM_UDP_FIRST_PACKET;
4519 case IPPROTO_ICMPV6:
4521 s->timeout = PFTM_ICMP_FIRST_PACKET;
4524 pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE);
4525 pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC);
4526 s->timeout = PFTM_OTHER_FIRST_PACKET;
4530 if (pf_map_addr(pd->af, r, pd->src, &s->rt_addr, NULL, &sn)) {
4531 REASON_SET(&reason, PFRES_MAPFAILED);
4532 pf_src_tree_remove_state(s);
4533 s->timeout = PFTM_UNLINKED;
4534 STATE_DEC_COUNTERS(s);
4538 s->rt_kif = r->rpool.cur->kif;
4541 s->creation = time_uptime;
4542 s->expire = time_uptime;
4547 /* XXX We only modify one side for now. */
4548 PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
4549 s->nat_src_node = nsn;
4551 if (pd->proto == IPPROTO_TCP) {
4552 if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
4553 off, pd, th, &s->src, &s->dst)) {
4554 REASON_SET(&reason, PFRES_MEMORY);
4555 pf_src_tree_remove_state(s);
4556 s->timeout = PFTM_UNLINKED;
4557 STATE_DEC_COUNTERS(s);
4561 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
4562 pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
4563 &s->src, &s->dst, rewrite)) {
4564 /* This really shouldn't happen!!! */
4565 DPFPRINTF(PF_DEBUG_URGENT,
4566 ("pf_normalize_tcp_stateful failed on first "
4568 pf_src_tree_remove_state(s);
4569 s->timeout = PFTM_UNLINKED;
4570 STATE_DEC_COUNTERS(s);
4575 s->direction = pd->dir;
4578 * sk/nk could already been setup by pf_get_translation().
4581 KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p",
4582 __func__, nr, sk, nk));
4583 sk = pf_state_key_setup(pd, pd->src, pd->dst, sport, dport);
4588 KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p",
4589 __func__, nr, sk, nk));
4591 /* Swap sk/nk for PF_OUT. */
4592 if (pf_state_insert(BOUND_IFACE(r, kif), kif,
4593 (pd->dir == PF_IN) ? sk : nk,
4594 (pd->dir == PF_IN) ? nk : sk, s)) {
4595 REASON_SET(&reason, PFRES_STATEINS);
4596 pf_src_tree_remove_state(s);
4597 s->timeout = PFTM_UNLINKED;
4598 STATE_DEC_COUNTERS(s);
4606 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
4607 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
4608 pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC);
4609 /* undo NAT changes, if they have taken place */
4611 struct pf_state_key *skt = s->key[PF_SK_WIRE];
4612 if (pd->dir == PF_OUT)
4613 skt = s->key[PF_SK_STACK];
4614 PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
4615 PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
4617 *pd->sport = skt->port[pd->sidx];
4619 *pd->dport = skt->port[pd->didx];
4621 *pd->proto_sum = bproto_sum;
4623 *pd->ip_sum = bip_sum;
4624 m_copyback(m, off, hdrlen, pd->hdr.any);
4626 s->src.seqhi = htonl(arc4random());
4627 /* Find mss option */
4628 int rtid = M_GETFIB(m);
4629 mss = pf_get_mss(m, off, th->th_off, pd->af);
4630 mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
4631 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
4633 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
4634 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
4635 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0);
4636 REASON_SET(&reason, PFRES_SYNPROXY);
4637 return (PF_SYNPROXY_DROP);
4644 uma_zfree(V_pf_state_key_z, sk);
4646 uma_zfree(V_pf_state_key_z, nk);
4649 struct pf_srchash *sh;
4651 sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
4652 PF_HASHROW_LOCK(sh);
4653 if (--sn->states == 0 && sn->expire == 0) {
4654 pf_unlink_src_node(sn);
4655 uma_zfree(V_pf_sources_z, sn);
4657 V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
4659 PF_HASHROW_UNLOCK(sh);
4662 if (nsn != sn && nsn != NULL) {
4663 struct pf_srchash *sh;
4665 sh = &V_pf_srchash[pf_hashsrc(&nsn->addr, nsn->af)];
4666 PF_HASHROW_LOCK(sh);
4667 if (--nsn->states == 0 && nsn->expire == 0) {
4668 pf_unlink_src_node(nsn);
4669 uma_zfree(V_pf_sources_z, nsn);
4671 V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
4673 PF_HASHROW_UNLOCK(sh);
4680 pf_test_fragment(struct pf_krule **rm, int direction, struct pfi_kkif *kif,
4681 struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_krule **am,
4682 struct pf_kruleset **rsm)
4684 struct pf_krule *r, *a = NULL;
4685 struct pf_kruleset *ruleset = NULL;
4686 sa_family_t af = pd->af;
4691 struct pf_kanchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE];
4695 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4697 pf_counter_u64_add(&r->evaluations, 1);
4698 if (pfi_kkif_match(r->kif, kif) == r->ifnot)
4699 r = r->skip[PF_SKIP_IFP].ptr;
4700 else if (r->direction && r->direction != direction)
4701 r = r->skip[PF_SKIP_DIR].ptr;
4702 else if (r->af && r->af != af)
4703 r = r->skip[PF_SKIP_AF].ptr;
4704 else if (r->proto && r->proto != pd->proto)
4705 r = r->skip[PF_SKIP_PROTO].ptr;
4706 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
4707 r->src.neg, kif, M_GETFIB(m)))
4708 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
4709 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
4710 r->dst.neg, NULL, M_GETFIB(m)))
4711 r = r->skip[PF_SKIP_DST_ADDR].ptr;
4712 else if (r->tos && !(r->tos == pd->tos))
4713 r = TAILQ_NEXT(r, entries);
4714 else if (r->os_fingerprint != PF_OSFP_ANY)
4715 r = TAILQ_NEXT(r, entries);
4716 else if (pd->proto == IPPROTO_UDP &&
4717 (r->src.port_op || r->dst.port_op))
4718 r = TAILQ_NEXT(r, entries);
4719 else if (pd->proto == IPPROTO_TCP &&
4720 (r->src.port_op || r->dst.port_op || r->flagset))
4721 r = TAILQ_NEXT(r, entries);
4722 else if ((pd->proto == IPPROTO_ICMP ||
4723 pd->proto == IPPROTO_ICMPV6) &&
4724 (r->type || r->code))
4725 r = TAILQ_NEXT(r, entries);
4727 !pf_match_ieee8021q_pcp(r->prio, m))
4728 r = TAILQ_NEXT(r, entries);
4729 else if (r->prob && r->prob <=
4730 (arc4random() % (UINT_MAX - 1) + 1))
4731 r = TAILQ_NEXT(r, entries);
4732 else if (r->match_tag && !pf_match_tag(m, r, &tag,
4733 pd->pf_mtag ? pd->pf_mtag->tag : 0))
4734 r = TAILQ_NEXT(r, entries);
4736 if (r->anchor == NULL) {
4737 if (r->action == PF_MATCH) {
4738 pf_counter_u64_critical_enter();
4739 pf_counter_u64_add_protected(&r->packets[direction == PF_OUT], 1);
4740 pf_counter_u64_add_protected(&r->bytes[direction == PF_OUT], pd->tot_len);
4741 pf_counter_u64_critical_exit();
4742 pf_rule_to_actions(r, &pd->act);
4744 PFLOG_PACKET(kif, m, af,
4745 direction, PFRES_MATCH, r,
4755 r = TAILQ_NEXT(r, entries);
4757 pf_step_into_anchor(anchor_stack, &asd,
4758 &ruleset, PF_RULESET_FILTER, &r, &a,
4761 if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
4762 &ruleset, PF_RULESET_FILTER, &r, &a, &match))
4769 REASON_SET(&reason, PFRES_MATCH);
4771 /* apply actions for last matching pass/block rule */
4772 pf_rule_to_actions(r, &pd->act);
4775 PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd,
4778 if (r->action != PF_PASS)
4781 if (tag > 0 && pf_tag_packet(m, pd, tag)) {
4782 REASON_SET(&reason, PFRES_MEMORY);
4790 pf_tcp_track_full(struct pf_kstate **state, struct pfi_kkif *kif,
4791 struct mbuf *m, int off, struct pf_pdesc *pd, u_short *reason,
4794 struct tcphdr *th = &pd->hdr.tcp;
4795 struct pf_state_peer *src, *dst;
4796 u_int16_t win = ntohs(th->th_win);
4797 u_int32_t ack, end, seq, orig_seq;
4798 u_int8_t sws, dws, psrc, pdst;
4801 if (pd->dir == (*state)->direction) {
4802 src = &(*state)->src;
4803 dst = &(*state)->dst;
4807 src = &(*state)->dst;
4808 dst = &(*state)->src;
4813 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
4814 sws = src->wscale & PF_WSCALE_MASK;
4815 dws = dst->wscale & PF_WSCALE_MASK;
4820 * Sequence tracking algorithm from Guido van Rooij's paper:
4821 * http://www.madison-gurkha.com/publications/tcp_filtering/
4825 orig_seq = seq = ntohl(th->th_seq);
4826 if (src->seqlo == 0) {
4827 /* First packet from this end. Set its state */
4829 if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
4830 src->scrub == NULL) {
4831 if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
4832 REASON_SET(reason, PFRES_MEMORY);
4837 /* Deferred generation of sequence number modulator */
4838 if (dst->seqdiff && !src->seqdiff) {
4839 /* use random iss for the TCP server */
4840 while ((src->seqdiff = arc4random() - seq) == 0)
4842 ack = ntohl(th->th_ack) - dst->seqdiff;
4843 pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
4845 pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
4848 ack = ntohl(th->th_ack);
4851 end = seq + pd->p_len;
4852 if (th->th_flags & TH_SYN) {
4854 if (dst->wscale & PF_WSCALE_FLAG) {
4855 src->wscale = pf_get_wscale(m, off, th->th_off,
4857 if (src->wscale & PF_WSCALE_FLAG) {
4858 /* Remove scale factor from initial
4860 sws = src->wscale & PF_WSCALE_MASK;
4861 win = ((u_int32_t)win + (1 << sws) - 1)
4863 dws = dst->wscale & PF_WSCALE_MASK;
4865 /* fixup other window */
4866 dst->max_win <<= dst->wscale &
4868 /* in case of a retrans SYN|ACK */
4873 if (th->th_flags & TH_FIN)
4877 if (src->state < TCPS_SYN_SENT)
4878 pf_set_protostate(*state, psrc, TCPS_SYN_SENT);
4881 * May need to slide the window (seqhi may have been set by
4882 * the crappy stack check or if we picked up the connection
4883 * after establishment)
4885 if (src->seqhi == 1 ||
4886 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4887 src->seqhi = end + MAX(1, dst->max_win << dws);
4888 if (win > src->max_win)
4892 ack = ntohl(th->th_ack) - dst->seqdiff;
4894 /* Modulate sequence numbers */
4895 pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
4897 pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
4900 end = seq + pd->p_len;
4901 if (th->th_flags & TH_SYN)
4903 if (th->th_flags & TH_FIN)
4907 if ((th->th_flags & TH_ACK) == 0) {
4908 /* Let it pass through the ack skew check */
4910 } else if ((ack == 0 &&
4911 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4912 /* broken tcp stacks do not set ack */
4913 (dst->state < TCPS_SYN_SENT)) {
4915 * Many stacks (ours included) will set the ACK number in an
4916 * FIN|ACK if the SYN times out -- no sequence to ACK.
4922 /* Ease sequencing restrictions on no data packets */
4927 ackskew = dst->seqlo - ack;
4930 * Need to demodulate the sequence numbers in any TCP SACK options
4931 * (Selective ACK). We could optionally validate the SACK values
4932 * against the current ACK window, either forwards or backwards, but
4933 * I'm not confident that SACK has been implemented properly
4934 * everywhere. It wouldn't surprise me if several stacks accidentally
4935 * SACK too far backwards of previously ACKed data. There really aren't
4936 * any security implications of bad SACKing unless the target stack
4937 * doesn't validate the option length correctly. Someone trying to
4938 * spoof into a TCP connection won't bother blindly sending SACK
4941 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
4942 if (pf_modulate_sack(m, off, pd, th, dst))
4946 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
4947 if (SEQ_GEQ(src->seqhi, end) &&
4948 /* Last octet inside other's window space */
4949 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4950 /* Retrans: not more than one window back */
4951 (ackskew >= -MAXACKWINDOW) &&
4952 /* Acking not more than one reassembled fragment backwards */
4953 (ackskew <= (MAXACKWINDOW << sws)) &&
4954 /* Acking not more than one window forward */
4955 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
4956 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
4957 (pd->flags & PFDESC_IP_REAS) == 0)) {
4958 /* Require an exact/+1 sequence match on resets when possible */
4960 if (dst->scrub || src->scrub) {
4961 if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4962 *state, src, dst, copyback))
4966 /* update max window */
4967 if (src->max_win < win)
4969 /* synchronize sequencing */
4970 if (SEQ_GT(end, src->seqlo))
4972 /* slide the window of what the other end can send */
4973 if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4974 dst->seqhi = ack + MAX((win << sws), 1);
4977 if (th->th_flags & TH_SYN)
4978 if (src->state < TCPS_SYN_SENT)
4979 pf_set_protostate(*state, psrc, TCPS_SYN_SENT);
4980 if (th->th_flags & TH_FIN)
4981 if (src->state < TCPS_CLOSING)
4982 pf_set_protostate(*state, psrc, TCPS_CLOSING);
4983 if (th->th_flags & TH_ACK) {
4984 if (dst->state == TCPS_SYN_SENT) {
4985 pf_set_protostate(*state, pdst,
4987 if (src->state == TCPS_ESTABLISHED &&
4988 (*state)->src_node != NULL &&
4989 pf_src_connlimit(state)) {
4990 REASON_SET(reason, PFRES_SRCLIMIT);
4993 } else if (dst->state == TCPS_CLOSING)
4994 pf_set_protostate(*state, pdst,
4997 if (th->th_flags & TH_RST)
4998 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT);
5000 /* update expire time */
5001 (*state)->expire = time_uptime;
5002 if (src->state >= TCPS_FIN_WAIT_2 &&
5003 dst->state >= TCPS_FIN_WAIT_2)
5004 (*state)->timeout = PFTM_TCP_CLOSED;
5005 else if (src->state >= TCPS_CLOSING &&
5006 dst->state >= TCPS_CLOSING)
5007 (*state)->timeout = PFTM_TCP_FIN_WAIT;
5008 else if (src->state < TCPS_ESTABLISHED ||
5009 dst->state < TCPS_ESTABLISHED)
5010 (*state)->timeout = PFTM_TCP_OPENING;
5011 else if (src->state >= TCPS_CLOSING ||
5012 dst->state >= TCPS_CLOSING)
5013 (*state)->timeout = PFTM_TCP_CLOSING;
5015 (*state)->timeout = PFTM_TCP_ESTABLISHED;
5017 /* Fall through to PASS packet */
5019 } else if ((dst->state < TCPS_SYN_SENT ||
5020 dst->state >= TCPS_FIN_WAIT_2 ||
5021 src->state >= TCPS_FIN_WAIT_2) &&
5022 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
5023 /* Within a window forward of the originating packet */
5024 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
5025 /* Within a window backward of the originating packet */
5028 * This currently handles three situations:
5029 * 1) Stupid stacks will shotgun SYNs before their peer
5031 * 2) When PF catches an already established stream (the
5032 * firewall rebooted, the state table was flushed, routes
5034 * 3) Packets get funky immediately after the connection
5035 * closes (this should catch Solaris spurious ACK|FINs
5036 * that web servers like to spew after a close)
5038 * This must be a little more careful than the above code
5039 * since packet floods will also be caught here. We don't
5040 * update the TTL here to mitigate the damage of a packet
5041 * flood and so the same code can handle awkward establishment
5042 * and a loosened connection close.
5043 * In the establishment case, a correct peer response will
5044 * validate the connection, go through the normal state code
5045 * and keep updating the state TTL.
5048 if (V_pf_status.debug >= PF_DEBUG_MISC) {
5049 printf("pf: loose state match: ");
5050 pf_print_state(*state);
5051 pf_print_flags(th->th_flags);
5052 printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
5053 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
5054 pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
5055 (unsigned long long)(*state)->packets[1],
5056 pd->dir == PF_IN ? "in" : "out",
5057 pd->dir == (*state)->direction ? "fwd" : "rev");
5060 if (dst->scrub || src->scrub) {
5061 if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
5062 *state, src, dst, copyback))
5066 /* update max window */
5067 if (src->max_win < win)
5069 /* synchronize sequencing */
5070 if (SEQ_GT(end, src->seqlo))
5072 /* slide the window of what the other end can send */
5073 if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
5074 dst->seqhi = ack + MAX((win << sws), 1);
5077 * Cannot set dst->seqhi here since this could be a shotgunned
5078 * SYN and not an already established connection.
5081 if (th->th_flags & TH_FIN)
5082 if (src->state < TCPS_CLOSING)
5083 pf_set_protostate(*state, psrc, TCPS_CLOSING);
5084 if (th->th_flags & TH_RST)
5085 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT);
5087 /* Fall through to PASS packet */
5090 if ((*state)->dst.state == TCPS_SYN_SENT &&
5091 (*state)->src.state == TCPS_SYN_SENT) {
5092 /* Send RST for state mismatches during handshake */
5093 if (!(th->th_flags & TH_RST))
5094 pf_send_tcp((*state)->rule.ptr, pd->af,
5095 pd->dst, pd->src, th->th_dport,
5096 th->th_sport, ntohl(th->th_ack), 0,
5098 (*state)->rule.ptr->return_ttl, 1, 0);
5102 } else if (V_pf_status.debug >= PF_DEBUG_MISC) {
5103 printf("pf: BAD state: ");
5104 pf_print_state(*state);
5105 pf_print_flags(th->th_flags);
5106 printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
5107 "pkts=%llu:%llu dir=%s,%s\n",
5108 seq, orig_seq, ack, pd->p_len, ackskew,
5109 (unsigned long long)(*state)->packets[0],
5110 (unsigned long long)(*state)->packets[1],
5111 pd->dir == PF_IN ? "in" : "out",
5112 pd->dir == (*state)->direction ? "fwd" : "rev");
5113 printf("pf: State failure on: %c %c %c %c | %c %c\n",
5114 SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
5115 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
5117 (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
5118 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
5119 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
5120 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
5122 REASON_SET(reason, PFRES_BADSTATE);
5130 pf_tcp_track_sloppy(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason)
5132 struct tcphdr *th = &pd->hdr.tcp;
5133 struct pf_state_peer *src, *dst;
5134 u_int8_t psrc, pdst;
5136 if (pd->dir == (*state)->direction) {
5137 src = &(*state)->src;
5138 dst = &(*state)->dst;
5142 src = &(*state)->dst;
5143 dst = &(*state)->src;
5148 if (th->th_flags & TH_SYN)
5149 if (src->state < TCPS_SYN_SENT)
5150 pf_set_protostate(*state, psrc, TCPS_SYN_SENT);
5151 if (th->th_flags & TH_FIN)
5152 if (src->state < TCPS_CLOSING)
5153 pf_set_protostate(*state, psrc, TCPS_CLOSING);
5154 if (th->th_flags & TH_ACK) {
5155 if (dst->state == TCPS_SYN_SENT) {
5156 pf_set_protostate(*state, pdst, TCPS_ESTABLISHED);
5157 if (src->state == TCPS_ESTABLISHED &&
5158 (*state)->src_node != NULL &&
5159 pf_src_connlimit(state)) {
5160 REASON_SET(reason, PFRES_SRCLIMIT);
5163 } else if (dst->state == TCPS_CLOSING) {
5164 pf_set_protostate(*state, pdst, TCPS_FIN_WAIT_2);
5165 } else if (src->state == TCPS_SYN_SENT &&
5166 dst->state < TCPS_SYN_SENT) {
5168 * Handle a special sloppy case where we only see one
5169 * half of the connection. If there is a ACK after
5170 * the initial SYN without ever seeing a packet from
5171 * the destination, set the connection to established.
5173 pf_set_protostate(*state, PF_PEER_BOTH,
5175 dst->state = src->state = TCPS_ESTABLISHED;
5176 if ((*state)->src_node != NULL &&
5177 pf_src_connlimit(state)) {
5178 REASON_SET(reason, PFRES_SRCLIMIT);
5181 } else if (src->state == TCPS_CLOSING &&
5182 dst->state == TCPS_ESTABLISHED &&
5185 * Handle the closing of half connections where we
5186 * don't see the full bidirectional FIN/ACK+ACK
5189 pf_set_protostate(*state, pdst, TCPS_CLOSING);
5192 if (th->th_flags & TH_RST)
5193 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT);
5195 /* update expire time */
5196 (*state)->expire = time_uptime;
5197 if (src->state >= TCPS_FIN_WAIT_2 &&
5198 dst->state >= TCPS_FIN_WAIT_2)
5199 (*state)->timeout = PFTM_TCP_CLOSED;
5200 else if (src->state >= TCPS_CLOSING &&
5201 dst->state >= TCPS_CLOSING)
5202 (*state)->timeout = PFTM_TCP_FIN_WAIT;
5203 else if (src->state < TCPS_ESTABLISHED ||
5204 dst->state < TCPS_ESTABLISHED)
5205 (*state)->timeout = PFTM_TCP_OPENING;
5206 else if (src->state >= TCPS_CLOSING ||
5207 dst->state >= TCPS_CLOSING)
5208 (*state)->timeout = PFTM_TCP_CLOSING;
5210 (*state)->timeout = PFTM_TCP_ESTABLISHED;
5216 pf_synproxy(struct pf_pdesc *pd, struct pf_kstate **state, u_short *reason)
5218 struct pf_state_key *sk = (*state)->key[pd->didx];
5219 struct tcphdr *th = &pd->hdr.tcp;
5221 if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
5222 if (pd->dir != (*state)->direction) {
5223 REASON_SET(reason, PFRES_SYNPROXY);
5224 return (PF_SYNPROXY_DROP);
5226 if (th->th_flags & TH_SYN) {
5227 if (ntohl(th->th_seq) != (*state)->src.seqlo) {
5228 REASON_SET(reason, PFRES_SYNPROXY);
5231 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
5232 pd->src, th->th_dport, th->th_sport,
5233 (*state)->src.seqhi, ntohl(th->th_seq) + 1,
5234 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0);
5235 REASON_SET(reason, PFRES_SYNPROXY);
5236 return (PF_SYNPROXY_DROP);
5237 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK ||
5238 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
5239 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
5240 REASON_SET(reason, PFRES_SYNPROXY);
5242 } else if ((*state)->src_node != NULL &&
5243 pf_src_connlimit(state)) {
5244 REASON_SET(reason, PFRES_SRCLIMIT);
5247 pf_set_protostate(*state, PF_PEER_SRC,
5250 if ((*state)->src.state == PF_TCPS_PROXY_DST) {
5251 if (pd->dir == (*state)->direction) {
5252 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
5253 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
5254 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
5255 REASON_SET(reason, PFRES_SYNPROXY);
5258 (*state)->src.max_win = MAX(ntohs(th->th_win), 1);
5259 if ((*state)->dst.seqhi == 1)
5260 (*state)->dst.seqhi = htonl(arc4random());
5261 pf_send_tcp((*state)->rule.ptr, pd->af,
5262 &sk->addr[pd->sidx], &sk->addr[pd->didx],
5263 sk->port[pd->sidx], sk->port[pd->didx],
5264 (*state)->dst.seqhi, 0, TH_SYN, 0,
5265 (*state)->src.mss, 0, 0, (*state)->tag);
5266 REASON_SET(reason, PFRES_SYNPROXY);
5267 return (PF_SYNPROXY_DROP);
5268 } else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
5270 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
5271 REASON_SET(reason, PFRES_SYNPROXY);
5274 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
5275 (*state)->dst.seqlo = ntohl(th->th_seq);
5276 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
5277 pd->src, th->th_dport, th->th_sport,
5278 ntohl(th->th_ack), ntohl(th->th_seq) + 1,
5279 TH_ACK, (*state)->src.max_win, 0, 0, 0,
5281 pf_send_tcp((*state)->rule.ptr, pd->af,
5282 &sk->addr[pd->sidx], &sk->addr[pd->didx],
5283 sk->port[pd->sidx], sk->port[pd->didx],
5284 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
5285 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0);
5286 (*state)->src.seqdiff = (*state)->dst.seqhi -
5287 (*state)->src.seqlo;
5288 (*state)->dst.seqdiff = (*state)->src.seqhi -
5289 (*state)->dst.seqlo;
5290 (*state)->src.seqhi = (*state)->src.seqlo +
5291 (*state)->dst.max_win;
5292 (*state)->dst.seqhi = (*state)->dst.seqlo +
5293 (*state)->src.max_win;
5294 (*state)->src.wscale = (*state)->dst.wscale = 0;
5295 pf_set_protostate(*state, PF_PEER_BOTH,
5297 REASON_SET(reason, PFRES_SYNPROXY);
5298 return (PF_SYNPROXY_DROP);
5306 pf_test_state_tcp(struct pf_kstate **state, int direction, struct pfi_kkif *kif,
5307 struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
5310 struct pf_state_key_cmp key;
5311 struct tcphdr *th = &pd->hdr.tcp;
5314 struct pf_state_peer *src, *dst;
5316 bzero(&key, sizeof(key));
5318 key.proto = IPPROTO_TCP;
5319 if (direction == PF_IN) { /* wire side, straight */
5320 PF_ACPY(&key.addr[0], pd->src, key.af);
5321 PF_ACPY(&key.addr[1], pd->dst, key.af);
5322 key.port[0] = th->th_sport;
5323 key.port[1] = th->th_dport;
5324 } else { /* stack side, reverse */
5325 PF_ACPY(&key.addr[1], pd->src, key.af);
5326 PF_ACPY(&key.addr[0], pd->dst, key.af);
5327 key.port[1] = th->th_sport;
5328 key.port[0] = th->th_dport;
5331 STATE_LOOKUP(kif, &key, direction, *state, pd);
5333 if (direction == (*state)->direction) {
5334 src = &(*state)->src;
5335 dst = &(*state)->dst;
5337 src = &(*state)->dst;
5338 dst = &(*state)->src;
5341 if ((action = pf_synproxy(pd, state, reason)) != PF_PASS)
5344 if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
5345 dst->state >= TCPS_FIN_WAIT_2 &&
5346 src->state >= TCPS_FIN_WAIT_2) {
5347 if (V_pf_status.debug >= PF_DEBUG_MISC) {
5348 printf("pf: state reuse ");
5349 pf_print_state(*state);
5350 pf_print_flags(th->th_flags);
5353 /* XXX make sure it's the same direction ?? */
5354 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED);
5355 pf_unlink_state(*state);
5360 if ((*state)->state_flags & PFSTATE_SLOPPY) {
5361 if (pf_tcp_track_sloppy(state, pd, reason) == PF_DROP)
5364 if (pf_tcp_track_full(state, kif, m, off, pd, reason,
5365 ©back) == PF_DROP)
5369 /* translate source/destination address, if necessary */
5370 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
5371 struct pf_state_key *nk = (*state)->key[pd->didx];
5373 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
5374 nk->port[pd->sidx] != th->th_sport)
5375 pf_change_ap(m, pd->src, &th->th_sport,
5376 pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx],
5377 nk->port[pd->sidx], 0, pd->af);
5379 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
5380 nk->port[pd->didx] != th->th_dport)
5381 pf_change_ap(m, pd->dst, &th->th_dport,
5382 pd->ip_sum, &th->th_sum, &nk->addr[pd->didx],
5383 nk->port[pd->didx], 0, pd->af);
5387 /* Copyback sequence modulation or stateful scrub changes if needed */
5389 m_copyback(m, off, sizeof(*th), (caddr_t)th);
5395 pf_test_state_udp(struct pf_kstate **state, int direction, struct pfi_kkif *kif,
5396 struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
5398 struct pf_state_peer *src, *dst;
5399 struct pf_state_key_cmp key;
5400 struct udphdr *uh = &pd->hdr.udp;
5403 bzero(&key, sizeof(key));
5405 key.proto = IPPROTO_UDP;
5406 if (direction == PF_IN) { /* wire side, straight */
5407 PF_ACPY(&key.addr[0], pd->src, key.af);
5408 PF_ACPY(&key.addr[1], pd->dst, key.af);
5409 key.port[0] = uh->uh_sport;
5410 key.port[1] = uh->uh_dport;
5411 } else { /* stack side, reverse */
5412 PF_ACPY(&key.addr[1], pd->src, key.af);
5413 PF_ACPY(&key.addr[0], pd->dst, key.af);
5414 key.port[1] = uh->uh_sport;
5415 key.port[0] = uh->uh_dport;
5418 STATE_LOOKUP(kif, &key, direction, *state, pd);
5420 if (direction == (*state)->direction) {
5421 src = &(*state)->src;
5422 dst = &(*state)->dst;
5426 src = &(*state)->dst;
5427 dst = &(*state)->src;
5433 if (src->state < PFUDPS_SINGLE)
5434 pf_set_protostate(*state, psrc, PFUDPS_SINGLE);
5435 if (dst->state == PFUDPS_SINGLE)
5436 pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE);
5438 /* update expire time */
5439 (*state)->expire = time_uptime;
5440 if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
5441 (*state)->timeout = PFTM_UDP_MULTIPLE;
5443 (*state)->timeout = PFTM_UDP_SINGLE;
5445 /* translate source/destination address, if necessary */
5446 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
5447 struct pf_state_key *nk = (*state)->key[pd->didx];
5449 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
5450 nk->port[pd->sidx] != uh->uh_sport)
5451 pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum,
5452 &uh->uh_sum, &nk->addr[pd->sidx],
5453 nk->port[pd->sidx], 1, pd->af);
5455 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
5456 nk->port[pd->didx] != uh->uh_dport)
5457 pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum,
5458 &uh->uh_sum, &nk->addr[pd->didx],
5459 nk->port[pd->didx], 1, pd->af);
5460 m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
5467 pf_test_state_icmp(struct pf_kstate **state, int direction, struct pfi_kkif *kif,
5468 struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
5470 struct pf_addr *saddr = pd->src, *daddr = pd->dst;
5471 u_int16_t icmpid = 0, *icmpsum;
5472 u_int8_t icmptype, icmpcode;
5474 struct pf_state_key_cmp key;
5476 bzero(&key, sizeof(key));
5477 switch (pd->proto) {
5480 icmptype = pd->hdr.icmp.icmp_type;
5481 icmpcode = pd->hdr.icmp.icmp_code;
5482 icmpid = pd->hdr.icmp.icmp_id;
5483 icmpsum = &pd->hdr.icmp.icmp_cksum;
5485 if (icmptype == ICMP_UNREACH ||
5486 icmptype == ICMP_SOURCEQUENCH ||
5487 icmptype == ICMP_REDIRECT ||
5488 icmptype == ICMP_TIMXCEED ||
5489 icmptype == ICMP_PARAMPROB)
5494 case IPPROTO_ICMPV6:
5495 icmptype = pd->hdr.icmp6.icmp6_type;
5496 icmpcode = pd->hdr.icmp6.icmp6_code;
5497 icmpid = pd->hdr.icmp6.icmp6_id;
5498 icmpsum = &pd->hdr.icmp6.icmp6_cksum;
5500 if (icmptype == ICMP6_DST_UNREACH ||
5501 icmptype == ICMP6_PACKET_TOO_BIG ||
5502 icmptype == ICMP6_TIME_EXCEEDED ||
5503 icmptype == ICMP6_PARAM_PROB)
5511 * ICMP query/reply message not related to a TCP/UDP packet.
5512 * Search for an ICMP state.
5515 key.proto = pd->proto;
5516 key.port[0] = key.port[1] = icmpid;
5517 if (direction == PF_IN) { /* wire side, straight */
5518 PF_ACPY(&key.addr[0], pd->src, key.af);
5519 PF_ACPY(&key.addr[1], pd->dst, key.af);
5520 } else { /* stack side, reverse */
5521 PF_ACPY(&key.addr[1], pd->src, key.af);
5522 PF_ACPY(&key.addr[0], pd->dst, key.af);
5525 STATE_LOOKUP(kif, &key, direction, *state, pd);
5527 (*state)->expire = time_uptime;
5528 (*state)->timeout = PFTM_ICMP_ERROR_REPLY;
5530 /* translate source/destination address, if necessary */
5531 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
5532 struct pf_state_key *nk = (*state)->key[pd->didx];
5537 if (PF_ANEQ(pd->src,
5538 &nk->addr[pd->sidx], AF_INET))
5539 pf_change_a(&saddr->v4.s_addr,
5541 nk->addr[pd->sidx].v4.s_addr, 0);
5543 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
5545 pf_change_a(&daddr->v4.s_addr,
5547 nk->addr[pd->didx].v4.s_addr, 0);
5550 pd->hdr.icmp.icmp_id) {
5551 pd->hdr.icmp.icmp_cksum =
5553 pd->hdr.icmp.icmp_cksum, icmpid,
5554 nk->port[pd->sidx], 0);
5555 pd->hdr.icmp.icmp_id =
5559 m_copyback(m, off, ICMP_MINLEN,
5560 (caddr_t )&pd->hdr.icmp);
5565 if (PF_ANEQ(pd->src,
5566 &nk->addr[pd->sidx], AF_INET6))
5568 &pd->hdr.icmp6.icmp6_cksum,
5569 &nk->addr[pd->sidx], 0);
5571 if (PF_ANEQ(pd->dst,
5572 &nk->addr[pd->didx], AF_INET6))
5574 &pd->hdr.icmp6.icmp6_cksum,
5575 &nk->addr[pd->didx], 0);
5577 m_copyback(m, off, sizeof(struct icmp6_hdr),
5578 (caddr_t )&pd->hdr.icmp6);
5587 * ICMP error message in response to a TCP/UDP packet.
5588 * Extract the inner TCP/UDP header and search for that state.
5591 struct pf_pdesc pd2;
5592 bzero(&pd2, sizeof pd2);
5597 struct ip6_hdr h2_6;
5604 /* Payload packet is from the opposite direction. */
5605 pd2.sidx = (direction == PF_IN) ? 1 : 0;
5606 pd2.didx = (direction == PF_IN) ? 0 : 1;
5610 /* offset of h2 in mbuf chain */
5611 ipoff2 = off + ICMP_MINLEN;
5613 if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
5614 NULL, reason, pd2.af)) {
5615 DPFPRINTF(PF_DEBUG_MISC,
5616 ("pf: ICMP error message too short "
5621 * ICMP error messages don't refer to non-first
5624 if (h2.ip_off & htons(IP_OFFMASK)) {
5625 REASON_SET(reason, PFRES_FRAG);
5629 /* offset of protocol header that follows h2 */
5630 off2 = ipoff2 + (h2.ip_hl << 2);
5632 pd2.proto = h2.ip_p;
5633 pd2.src = (struct pf_addr *)&h2.ip_src;
5634 pd2.dst = (struct pf_addr *)&h2.ip_dst;
5635 pd2.ip_sum = &h2.ip_sum;
5640 ipoff2 = off + sizeof(struct icmp6_hdr);
5642 if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
5643 NULL, reason, pd2.af)) {
5644 DPFPRINTF(PF_DEBUG_MISC,
5645 ("pf: ICMP error message too short "
5649 pd2.proto = h2_6.ip6_nxt;
5650 pd2.src = (struct pf_addr *)&h2_6.ip6_src;
5651 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
5653 off2 = ipoff2 + sizeof(h2_6);
5655 switch (pd2.proto) {
5656 case IPPROTO_FRAGMENT:
5658 * ICMPv6 error messages for
5659 * non-first fragments
5661 REASON_SET(reason, PFRES_FRAG);
5664 case IPPROTO_HOPOPTS:
5665 case IPPROTO_ROUTING:
5666 case IPPROTO_DSTOPTS: {
5667 /* get next header and header length */
5668 struct ip6_ext opt6;
5670 if (!pf_pull_hdr(m, off2, &opt6,
5671 sizeof(opt6), NULL, reason,
5673 DPFPRINTF(PF_DEBUG_MISC,
5674 ("pf: ICMPv6 short opt\n"));
5677 if (pd2.proto == IPPROTO_AH)
5678 off2 += (opt6.ip6e_len + 2) * 4;
5680 off2 += (opt6.ip6e_len + 1) * 8;
5681 pd2.proto = opt6.ip6e_nxt;
5682 /* goto the next header */
5689 } while (!terminal);
5694 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) {
5695 if (V_pf_status.debug >= PF_DEBUG_MISC) {
5696 printf("pf: BAD ICMP %d:%d outer dst: ",
5697 icmptype, icmpcode);
5698 pf_print_host(pd->src, 0, pd->af);
5700 pf_print_host(pd->dst, 0, pd->af);
5701 printf(" inner src: ");
5702 pf_print_host(pd2.src, 0, pd2.af);
5704 pf_print_host(pd2.dst, 0, pd2.af);
5707 REASON_SET(reason, PFRES_BADSTATE);
5711 switch (pd2.proto) {
5715 struct pf_state_peer *src, *dst;
5720 * Only the first 8 bytes of the TCP header can be
5721 * expected. Don't access any TCP header fields after
5722 * th_seq, an ackskew test is not possible.
5724 if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
5726 DPFPRINTF(PF_DEBUG_MISC,
5727 ("pf: ICMP error message too short "
5733 key.proto = IPPROTO_TCP;
5734 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5735 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5736 key.port[pd2.sidx] = th.th_sport;
5737 key.port[pd2.didx] = th.th_dport;
5739 STATE_LOOKUP(kif, &key, direction, *state, pd);
5741 if (direction == (*state)->direction) {
5742 src = &(*state)->dst;
5743 dst = &(*state)->src;
5745 src = &(*state)->src;
5746 dst = &(*state)->dst;
5749 if (src->wscale && dst->wscale)
5750 dws = dst->wscale & PF_WSCALE_MASK;
5754 /* Demodulate sequence number */
5755 seq = ntohl(th.th_seq) - src->seqdiff;
5757 pf_change_a(&th.th_seq, icmpsum,
5762 if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
5763 (!SEQ_GEQ(src->seqhi, seq) ||
5764 !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
5765 if (V_pf_status.debug >= PF_DEBUG_MISC) {
5766 printf("pf: BAD ICMP %d:%d ",
5767 icmptype, icmpcode);
5768 pf_print_host(pd->src, 0, pd->af);
5770 pf_print_host(pd->dst, 0, pd->af);
5772 pf_print_state(*state);
5773 printf(" seq=%u\n", seq);
5775 REASON_SET(reason, PFRES_BADSTATE);
5778 if (V_pf_status.debug >= PF_DEBUG_MISC) {
5779 printf("pf: OK ICMP %d:%d ",
5780 icmptype, icmpcode);
5781 pf_print_host(pd->src, 0, pd->af);
5783 pf_print_host(pd->dst, 0, pd->af);
5785 pf_print_state(*state);
5786 printf(" seq=%u\n", seq);
5790 /* translate source/destination address, if necessary */
5791 if ((*state)->key[PF_SK_WIRE] !=
5792 (*state)->key[PF_SK_STACK]) {
5793 struct pf_state_key *nk =
5794 (*state)->key[pd->didx];
5796 if (PF_ANEQ(pd2.src,
5797 &nk->addr[pd2.sidx], pd2.af) ||
5798 nk->port[pd2.sidx] != th.th_sport)
5799 pf_change_icmp(pd2.src, &th.th_sport,
5800 daddr, &nk->addr[pd2.sidx],
5801 nk->port[pd2.sidx], NULL,
5802 pd2.ip_sum, icmpsum,
5803 pd->ip_sum, 0, pd2.af);
5805 if (PF_ANEQ(pd2.dst,
5806 &nk->addr[pd2.didx], pd2.af) ||
5807 nk->port[pd2.didx] != th.th_dport)
5808 pf_change_icmp(pd2.dst, &th.th_dport,
5809 saddr, &nk->addr[pd2.didx],
5810 nk->port[pd2.didx], NULL,
5811 pd2.ip_sum, icmpsum,
5812 pd->ip_sum, 0, pd2.af);
5820 m_copyback(m, off, ICMP_MINLEN,
5821 (caddr_t )&pd->hdr.icmp);
5822 m_copyback(m, ipoff2, sizeof(h2),
5829 sizeof(struct icmp6_hdr),
5830 (caddr_t )&pd->hdr.icmp6);
5831 m_copyback(m, ipoff2, sizeof(h2_6),
5836 m_copyback(m, off2, 8, (caddr_t)&th);
5845 if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
5846 NULL, reason, pd2.af)) {
5847 DPFPRINTF(PF_DEBUG_MISC,
5848 ("pf: ICMP error message too short "
5854 key.proto = IPPROTO_UDP;
5855 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5856 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5857 key.port[pd2.sidx] = uh.uh_sport;
5858 key.port[pd2.didx] = uh.uh_dport;
5860 STATE_LOOKUP(kif, &key, direction, *state, pd);
5862 /* translate source/destination address, if necessary */
5863 if ((*state)->key[PF_SK_WIRE] !=
5864 (*state)->key[PF_SK_STACK]) {
5865 struct pf_state_key *nk =
5866 (*state)->key[pd->didx];
5868 if (PF_ANEQ(pd2.src,
5869 &nk->addr[pd2.sidx], pd2.af) ||
5870 nk->port[pd2.sidx] != uh.uh_sport)
5871 pf_change_icmp(pd2.src, &uh.uh_sport,
5872 daddr, &nk->addr[pd2.sidx],
5873 nk->port[pd2.sidx], &uh.uh_sum,
5874 pd2.ip_sum, icmpsum,
5875 pd->ip_sum, 1, pd2.af);
5877 if (PF_ANEQ(pd2.dst,
5878 &nk->addr[pd2.didx], pd2.af) ||
5879 nk->port[pd2.didx] != uh.uh_dport)
5880 pf_change_icmp(pd2.dst, &uh.uh_dport,
5881 saddr, &nk->addr[pd2.didx],
5882 nk->port[pd2.didx], &uh.uh_sum,
5883 pd2.ip_sum, icmpsum,
5884 pd->ip_sum, 1, pd2.af);
5889 m_copyback(m, off, ICMP_MINLEN,
5890 (caddr_t )&pd->hdr.icmp);
5891 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
5897 sizeof(struct icmp6_hdr),
5898 (caddr_t )&pd->hdr.icmp6);
5899 m_copyback(m, ipoff2, sizeof(h2_6),
5904 m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
5910 case IPPROTO_ICMP: {
5913 if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
5914 NULL, reason, pd2.af)) {
5915 DPFPRINTF(PF_DEBUG_MISC,
5916 ("pf: ICMP error message too short i"
5922 key.proto = IPPROTO_ICMP;
5923 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5924 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5925 key.port[0] = key.port[1] = iih.icmp_id;
5927 STATE_LOOKUP(kif, &key, direction, *state, pd);
5929 /* translate source/destination address, if necessary */
5930 if ((*state)->key[PF_SK_WIRE] !=
5931 (*state)->key[PF_SK_STACK]) {
5932 struct pf_state_key *nk =
5933 (*state)->key[pd->didx];
5935 if (PF_ANEQ(pd2.src,
5936 &nk->addr[pd2.sidx], pd2.af) ||
5937 nk->port[pd2.sidx] != iih.icmp_id)
5938 pf_change_icmp(pd2.src, &iih.icmp_id,
5939 daddr, &nk->addr[pd2.sidx],
5940 nk->port[pd2.sidx], NULL,
5941 pd2.ip_sum, icmpsum,
5942 pd->ip_sum, 0, AF_INET);
5944 if (PF_ANEQ(pd2.dst,
5945 &nk->addr[pd2.didx], pd2.af) ||
5946 nk->port[pd2.didx] != iih.icmp_id)
5947 pf_change_icmp(pd2.dst, &iih.icmp_id,
5948 saddr, &nk->addr[pd2.didx],
5949 nk->port[pd2.didx], NULL,
5950 pd2.ip_sum, icmpsum,
5951 pd->ip_sum, 0, AF_INET);
5953 m_copyback(m, off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp);
5954 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
5955 m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
5962 case IPPROTO_ICMPV6: {
5963 struct icmp6_hdr iih;
5965 if (!pf_pull_hdr(m, off2, &iih,
5966 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
5967 DPFPRINTF(PF_DEBUG_MISC,
5968 ("pf: ICMP error message too short "
5974 key.proto = IPPROTO_ICMPV6;
5975 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5976 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5977 key.port[0] = key.port[1] = iih.icmp6_id;
5979 STATE_LOOKUP(kif, &key, direction, *state, pd);
5981 /* translate source/destination address, if necessary */
5982 if ((*state)->key[PF_SK_WIRE] !=
5983 (*state)->key[PF_SK_STACK]) {
5984 struct pf_state_key *nk =
5985 (*state)->key[pd->didx];
5987 if (PF_ANEQ(pd2.src,
5988 &nk->addr[pd2.sidx], pd2.af) ||
5989 nk->port[pd2.sidx] != iih.icmp6_id)
5990 pf_change_icmp(pd2.src, &iih.icmp6_id,
5991 daddr, &nk->addr[pd2.sidx],
5992 nk->port[pd2.sidx], NULL,
5993 pd2.ip_sum, icmpsum,
5994 pd->ip_sum, 0, AF_INET6);
5996 if (PF_ANEQ(pd2.dst,
5997 &nk->addr[pd2.didx], pd2.af) ||
5998 nk->port[pd2.didx] != iih.icmp6_id)
5999 pf_change_icmp(pd2.dst, &iih.icmp6_id,
6000 saddr, &nk->addr[pd2.didx],
6001 nk->port[pd2.didx], NULL,
6002 pd2.ip_sum, icmpsum,
6003 pd->ip_sum, 0, AF_INET6);
6005 m_copyback(m, off, sizeof(struct icmp6_hdr),
6006 (caddr_t)&pd->hdr.icmp6);
6007 m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
6008 m_copyback(m, off2, sizeof(struct icmp6_hdr),
6017 key.proto = pd2.proto;
6018 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
6019 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
6020 key.port[0] = key.port[1] = 0;
6022 STATE_LOOKUP(kif, &key, direction, *state, pd);
6024 /* translate source/destination address, if necessary */
6025 if ((*state)->key[PF_SK_WIRE] !=
6026 (*state)->key[PF_SK_STACK]) {
6027 struct pf_state_key *nk =
6028 (*state)->key[pd->didx];
6030 if (PF_ANEQ(pd2.src,
6031 &nk->addr[pd2.sidx], pd2.af))
6032 pf_change_icmp(pd2.src, NULL, daddr,
6033 &nk->addr[pd2.sidx], 0, NULL,
6034 pd2.ip_sum, icmpsum,
6035 pd->ip_sum, 0, pd2.af);
6037 if (PF_ANEQ(pd2.dst,
6038 &nk->addr[pd2.didx], pd2.af))
6039 pf_change_icmp(pd2.dst, NULL, saddr,
6040 &nk->addr[pd2.didx], 0, NULL,
6041 pd2.ip_sum, icmpsum,
6042 pd->ip_sum, 0, pd2.af);
6047 m_copyback(m, off, ICMP_MINLEN,
6048 (caddr_t)&pd->hdr.icmp);
6049 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
6055 sizeof(struct icmp6_hdr),
6056 (caddr_t )&pd->hdr.icmp6);
6057 m_copyback(m, ipoff2, sizeof(h2_6),
6071 pf_test_state_other(struct pf_kstate **state, int direction, struct pfi_kkif *kif,
6072 struct mbuf *m, struct pf_pdesc *pd)
6074 struct pf_state_peer *src, *dst;
6075 struct pf_state_key_cmp key;
6078 bzero(&key, sizeof(key));
6080 key.proto = pd->proto;
6081 if (direction == PF_IN) {
6082 PF_ACPY(&key.addr[0], pd->src, key.af);
6083 PF_ACPY(&key.addr[1], pd->dst, key.af);
6084 key.port[0] = key.port[1] = 0;
6086 PF_ACPY(&key.addr[1], pd->src, key.af);
6087 PF_ACPY(&key.addr[0], pd->dst, key.af);
6088 key.port[1] = key.port[0] = 0;
6091 STATE_LOOKUP(kif, &key, direction, *state, pd);
6093 if (direction == (*state)->direction) {
6094 src = &(*state)->src;
6095 dst = &(*state)->dst;
6099 src = &(*state)->dst;
6100 dst = &(*state)->src;
6106 if (src->state < PFOTHERS_SINGLE)
6107 pf_set_protostate(*state, psrc, PFOTHERS_SINGLE);
6108 if (dst->state == PFOTHERS_SINGLE)
6109 pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE);
6111 /* update expire time */
6112 (*state)->expire = time_uptime;
6113 if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
6114 (*state)->timeout = PFTM_OTHER_MULTIPLE;
6116 (*state)->timeout = PFTM_OTHER_SINGLE;
6118 /* translate source/destination address, if necessary */
6119 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
6120 struct pf_state_key *nk = (*state)->key[pd->didx];
6122 KASSERT(nk, ("%s: nk is null", __func__));
6123 KASSERT(pd, ("%s: pd is null", __func__));
6124 KASSERT(pd->src, ("%s: pd->src is null", __func__));
6125 KASSERT(pd->dst, ("%s: pd->dst is null", __func__));
6129 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
6130 pf_change_a(&pd->src->v4.s_addr,
6132 nk->addr[pd->sidx].v4.s_addr,
6135 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
6136 pf_change_a(&pd->dst->v4.s_addr,
6138 nk->addr[pd->didx].v4.s_addr,
6145 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
6146 PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
6148 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
6149 PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
6157 * ipoff and off are measured from the start of the mbuf chain.
6158 * h must be at "ipoff" on the mbuf chain.
6161 pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
6162 u_short *actionp, u_short *reasonp, sa_family_t af)
6167 struct ip *h = mtod(m, struct ip *);
6168 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
6172 ACTION_SET(actionp, PF_PASS);
6174 ACTION_SET(actionp, PF_DROP);
6175 REASON_SET(reasonp, PFRES_FRAG);
6179 if (m->m_pkthdr.len < off + len ||
6180 ntohs(h->ip_len) < off + len) {
6181 ACTION_SET(actionp, PF_DROP);
6182 REASON_SET(reasonp, PFRES_SHORT);
6190 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
6192 if (m->m_pkthdr.len < off + len ||
6193 (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
6194 (unsigned)(off + len)) {
6195 ACTION_SET(actionp, PF_DROP);
6196 REASON_SET(reasonp, PFRES_SHORT);
6203 m_copydata(m, off, len, p);
6208 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kkif *kif,
6214 * Skip check for addresses with embedded interface scope,
6215 * as they would always match anyway.
6217 if (af == AF_INET6 && IN6_IS_SCOPE_EMBED(&addr->v6))
6220 if (af != AF_INET && af != AF_INET6)
6223 /* Skip checks for ipsec interfaces */
6224 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
6227 ifp = (kif != NULL) ? kif->pfik_ifp : NULL;
6232 return (fib6_check_urpf(rtableid, &addr->v6, 0, NHR_NONE,
6237 return (fib4_check_urpf(rtableid, addr->v4, 0, NHR_NONE,
6247 pf_route(struct mbuf **m, struct pf_krule *r, int dir, struct ifnet *oifp,
6248 struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
6250 struct mbuf *m0, *m1;
6251 struct sockaddr_in dst;
6253 struct ifnet *ifp = NULL;
6254 struct pf_addr naddr;
6255 struct pf_ksrc_node *sn = NULL;
6257 uint16_t ip_len, ip_off;
6259 KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
6260 KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
6263 if ((pd->pf_mtag == NULL &&
6264 ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
6265 pd->pf_mtag->routed++ > 3) {
6271 if (r->rt == PF_DUPTO) {
6272 if ((pd->pf_mtag->flags & PF_DUPLICATED)) {
6274 ifp = r->rpool.cur->kif ?
6275 r->rpool.cur->kif->pfik_ifp : NULL;
6277 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
6281 /* When the 2nd interface is not skipped */
6289 pd->pf_mtag->flags |= PF_DUPLICATED;
6290 if (((m0 = m_dup(*m, M_NOWAIT)) == NULL)) {
6297 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
6305 ip = mtod(m0, struct ip *);
6307 bzero(&dst, sizeof(dst));
6308 dst.sin_family = AF_INET;
6309 dst.sin_len = sizeof(dst);
6310 dst.sin_addr = ip->ip_dst;
6312 bzero(&naddr, sizeof(naddr));
6315 if (TAILQ_EMPTY(&r->rpool.list)) {
6316 DPFPRINTF(PF_DEBUG_URGENT,
6317 ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
6320 pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
6322 if (!PF_AZERO(&naddr, AF_INET))
6323 dst.sin_addr.s_addr = naddr.v4.s_addr;
6324 ifp = r->rpool.cur->kif ?
6325 r->rpool.cur->kif->pfik_ifp : NULL;
6327 if (!PF_AZERO(&s->rt_addr, AF_INET))
6328 dst.sin_addr.s_addr =
6329 s->rt_addr.v4.s_addr;
6330 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
6337 if (pf_test(PF_OUT, 0, ifp, &m0, inp) != PF_PASS)
6339 else if (m0 == NULL)
6341 if (m0->m_len < sizeof(struct ip)) {
6342 DPFPRINTF(PF_DEBUG_URGENT,
6343 ("%s: m0->m_len < sizeof(struct ip)\n", __func__));
6346 ip = mtod(m0, struct ip *);
6349 if (ifp->if_flags & IFF_LOOPBACK)
6350 m0->m_flags |= M_SKIP_FIREWALL;
6352 ip_len = ntohs(ip->ip_len);
6353 ip_off = ntohs(ip->ip_off);
6355 /* Copied from FreeBSD 10.0-CURRENT ip_output. */
6356 m0->m_pkthdr.csum_flags |= CSUM_IP;
6357 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
6358 in_delayed_cksum(m0);
6359 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
6361 #if defined(SCTP) || defined(SCTP_SUPPORT)
6362 if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
6363 sctp_delayed_cksum(m0, (uint32_t)(ip->ip_hl << 2));
6364 m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
6369 * If small enough for interface, or the interface will take
6370 * care of the fragmentation for us, we can just send directly.
6372 if (ip_len <= ifp->if_mtu ||
6373 (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
6375 if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
6376 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
6377 m0->m_pkthdr.csum_flags &= ~CSUM_IP;
6379 m_clrprotoflags(m0); /* Avoid confusing lower layers. */
6380 error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
6384 /* Balk when DF bit is set or the interface didn't support TSO. */
6385 if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
6387 KMOD_IPSTAT_INC(ips_cantfrag);
6388 if (r->rt != PF_DUPTO) {
6389 if (s && pd->nat_rule != NULL)
6390 PACKET_UNDO_NAT(m0, pd,
6391 (ip->ip_hl << 2) + (ip_off & IP_OFFMASK),
6394 icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
6401 error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist);
6405 for (; m0; m0 = m1) {
6407 m0->m_nextpkt = NULL;
6409 m_clrprotoflags(m0);
6410 error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
6416 KMOD_IPSTAT_INC(ips_fragmented);
6419 if (r->rt != PF_DUPTO)
6434 pf_route6(struct mbuf **m, struct pf_krule *r, int dir, struct ifnet *oifp,
6435 struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
6438 struct sockaddr_in6 dst;
6439 struct ip6_hdr *ip6;
6440 struct ifnet *ifp = NULL;
6441 struct pf_addr naddr;
6442 struct pf_ksrc_node *sn = NULL;
6444 KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
6445 KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
6448 if ((pd->pf_mtag == NULL &&
6449 ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
6450 pd->pf_mtag->routed++ > 3) {
6456 if (r->rt == PF_DUPTO) {
6457 if ((pd->pf_mtag->flags & PF_DUPLICATED)) {
6459 ifp = r->rpool.cur->kif ?
6460 r->rpool.cur->kif->pfik_ifp : NULL;
6462 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
6466 /* When the 2nd interface is not skipped */
6474 pd->pf_mtag->flags |= PF_DUPLICATED;
6475 if (((m0 = m_dup(*m, M_NOWAIT)) == NULL)) {
6482 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
6490 ip6 = mtod(m0, struct ip6_hdr *);
6492 bzero(&dst, sizeof(dst));
6493 dst.sin6_family = AF_INET6;
6494 dst.sin6_len = sizeof(dst);
6495 dst.sin6_addr = ip6->ip6_dst;
6497 bzero(&naddr, sizeof(naddr));
6500 if (TAILQ_EMPTY(&r->rpool.list)) {
6501 DPFPRINTF(PF_DEBUG_URGENT,
6502 ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
6505 pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
6507 if (!PF_AZERO(&naddr, AF_INET6))
6508 PF_ACPY((struct pf_addr *)&dst.sin6_addr,
6510 ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
6512 if (!PF_AZERO(&s->rt_addr, AF_INET6))
6513 PF_ACPY((struct pf_addr *)&dst.sin6_addr,
6514 &s->rt_addr, AF_INET6);
6515 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
6525 if (pf_test6(PF_OUT, PFIL_FWD, ifp, &m0, inp) != PF_PASS)
6527 else if (m0 == NULL)
6529 if (m0->m_len < sizeof(struct ip6_hdr)) {
6530 DPFPRINTF(PF_DEBUG_URGENT,
6531 ("%s: m0->m_len < sizeof(struct ip6_hdr)\n",
6535 ip6 = mtod(m0, struct ip6_hdr *);
6538 if (ifp->if_flags & IFF_LOOPBACK)
6539 m0->m_flags |= M_SKIP_FIREWALL;
6541 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
6542 ~ifp->if_hwassist) {
6543 uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6);
6544 in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr));
6545 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
6549 * If the packet is too large for the outgoing interface,
6550 * send back an icmp6 error.
6552 if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
6553 dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
6554 if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu)
6555 nd6_output_ifp(ifp, ifp, m0, &dst, NULL);
6557 in6_ifstat_inc(ifp, ifs6_in_toobig);
6558 if (r->rt != PF_DUPTO) {
6559 if (s && pd->nat_rule != NULL)
6560 PACKET_UNDO_NAT(m0, pd,
6561 ((caddr_t)ip6 - m0->m_data) +
6562 sizeof(struct ip6_hdr), s, dir);
6564 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
6570 if (r->rt != PF_DUPTO)
6584 * FreeBSD supports cksum offloads for the following drivers.
6585 * em(4), fxp(4), lge(4), nge(4), re(4), ti(4), txp(4), xl(4)
6587 * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
6588 * network driver performed cksum including pseudo header, need to verify
6591 * network driver performed cksum, needs to additional pseudo header
6592 * cksum computation with partial csum_data(i.e. lack of H/W support for
6593 * pseudo header, for instance sk(4) and possibly gem(4))
6595 * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
6596 * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
6598 * Also, set csum_data to 0xffff to force cksum validation.
6601 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
6607 if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
6609 if (m->m_pkthdr.len < off + len)
6614 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
6615 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
6616 sum = m->m_pkthdr.csum_data;
6618 ip = mtod(m, struct ip *);
6619 sum = in_pseudo(ip->ip_src.s_addr,
6620 ip->ip_dst.s_addr, htonl((u_short)len +
6621 m->m_pkthdr.csum_data + IPPROTO_TCP));
6628 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
6629 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
6630 sum = m->m_pkthdr.csum_data;
6632 ip = mtod(m, struct ip *);
6633 sum = in_pseudo(ip->ip_src.s_addr,
6634 ip->ip_dst.s_addr, htonl((u_short)len +
6635 m->m_pkthdr.csum_data + IPPROTO_UDP));
6643 case IPPROTO_ICMPV6:
6653 if (p == IPPROTO_ICMP) {
6658 sum = in_cksum(m, len);
6662 if (m->m_len < sizeof(struct ip))
6664 sum = in4_cksum(m, p, off, len);
6669 if (m->m_len < sizeof(struct ip6_hdr))
6671 sum = in6_cksum(m, p, off, len);
6682 KMOD_TCPSTAT_INC(tcps_rcvbadsum);
6687 KMOD_UDPSTAT_INC(udps_badsum);
6693 KMOD_ICMPSTAT_INC(icps_checksum);
6698 case IPPROTO_ICMPV6:
6700 KMOD_ICMP6STAT_INC(icp6s_checksum);
6707 if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
6708 m->m_pkthdr.csum_flags |=
6709 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
6710 m->m_pkthdr.csum_data = 0xffff;
6717 pf_pdesc_to_dnflow(int dir, const struct pf_pdesc *pd,
6718 const struct pf_krule *r, const struct pf_kstate *s,
6719 struct ip_fw_args *dnflow)
6721 int dndir = r->direction;
6723 if (s && dndir == PF_INOUT) {
6724 dndir = s->direction;
6725 } else if (dndir == PF_INOUT) {
6726 /* Assume primary direction. Happens when we've set dnpipe in
6727 * the ethernet level code. */
6731 memset(dnflow, 0, sizeof(*dnflow));
6733 if (pd->dport != NULL)
6734 dnflow->f_id.dst_port = ntohs(*pd->dport);
6735 if (pd->sport != NULL)
6736 dnflow->f_id.src_port = ntohs(*pd->sport);
6739 dnflow->flags |= IPFW_ARGS_IN;
6741 dnflow->flags |= IPFW_ARGS_OUT;
6743 if (dir != dndir && pd->act.dnrpipe) {
6744 dnflow->rule.info = pd->act.dnrpipe;
6746 else if (dir == dndir) {
6747 dnflow->rule.info = pd->act.dnpipe;
6753 dnflow->rule.info |= IPFW_IS_DUMMYNET;
6754 if (r->free_flags & PFRULE_DN_IS_PIPE || pd->act.flags & PFRULE_DN_IS_PIPE)
6755 dnflow->rule.info |= IPFW_IS_PIPE;
6757 dnflow->f_id.proto = pd->proto;
6758 dnflow->f_id.extra = dnflow->rule.info;
6761 dnflow->f_id.addr_type = 4;
6762 dnflow->f_id.src_ip = ntohl(pd->src->v4.s_addr);
6763 dnflow->f_id.dst_ip = ntohl(pd->dst->v4.s_addr);
6766 dnflow->flags |= IPFW_ARGS_IP6;
6767 dnflow->f_id.addr_type = 6;
6768 dnflow->f_id.src_ip6 = pd->src->v6;
6769 dnflow->f_id.dst_ip6 = pd->dst->v6;
6772 panic("Invalid AF");
6780 pf_test_eth(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
6783 struct pfi_kkif *kif;
6784 struct mbuf *m = *m0;
6787 MPASS(ifp->if_vnet == curvnet);
6790 if (!V_pf_status.running)
6793 kif = (struct pfi_kkif *)ifp->if_pf_kif;
6796 DPFPRINTF(PF_DEBUG_URGENT,
6797 ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
6800 if (kif->pfik_flags & PFI_IFLAG_SKIP)
6803 if (m->m_flags & M_SKIP_FIREWALL)
6807 return (pf_test_eth_rule(dir, kif, m0));
6812 pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
6814 struct pfi_kkif *kif;
6815 u_short action, reason = 0, log = 0;
6816 struct mbuf *m = *m0;
6817 struct ip *h = NULL;
6818 struct m_tag *ipfwtag;
6819 struct pf_krule *a = NULL, *r = &V_pf_default_rule, *tr, *nr;
6820 struct pf_kstate *s = NULL;
6821 struct pf_kruleset *ruleset = NULL;
6823 int off, dirndx, pqid = 0;
6825 PF_RULES_RLOCK_TRACKER;
6826 KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: bad direction %d\n", __func__, dir));
6829 if (!V_pf_status.running)
6832 kif = (struct pfi_kkif *)ifp->if_pf_kif;
6835 DPFPRINTF(PF_DEBUG_URGENT,
6836 ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
6839 if (kif->pfik_flags & PFI_IFLAG_SKIP)
6842 if (m->m_flags & M_SKIP_FIREWALL)
6845 memset(&pd, 0, sizeof(pd));
6846 pd.pf_mtag = pf_find_mtag(m);
6848 if (pd.pf_mtag && pd.pf_mtag->dnpipe) {
6849 pd.act.dnpipe = pd.pf_mtag->dnpipe;
6850 pd.act.flags = pd.pf_mtag->dnflags;
6853 if (ip_dn_io_ptr != NULL && pd.pf_mtag != NULL &&
6854 pd.pf_mtag->flags & PF_TAG_DUMMYNET) {
6855 /* Dummynet re-injects packets after they've
6856 * completed their delay. We've already
6857 * processed them, so pass unconditionally. */
6859 /* But only once. We may see the packet multiple times (e.g.
6860 * PFIL_IN/PFIL_OUT). */
6861 pd.pf_mtag->flags &= ~PF_TAG_DUMMYNET;
6868 if (__predict_false(ip_divert_ptr != NULL) &&
6869 ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
6870 struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
6871 if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
6872 if (pd.pf_mtag == NULL &&
6873 ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
6877 pd.pf_mtag->flags |= PF_PACKET_LOOPED;
6878 m_tag_delete(m, ipfwtag);
6880 if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
6881 m->m_flags |= M_FASTFWD_OURS;
6882 pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT;
6884 } else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
6885 /* We do IP header normalization and packet reassembly here */
6889 m = *m0; /* pf_normalize messes with m0 */
6890 h = mtod(m, struct ip *);
6892 off = h->ip_hl << 2;
6893 if (off < (int)sizeof(struct ip)) {
6895 REASON_SET(&reason, PFRES_SHORT);
6900 pd.src = (struct pf_addr *)&h->ip_src;
6901 pd.dst = (struct pf_addr *)&h->ip_dst;
6902 pd.sport = pd.dport = NULL;
6903 pd.ip_sum = &h->ip_sum;
6904 pd.proto_sum = NULL;
6907 pd.sidx = (dir == PF_IN) ? 0 : 1;
6908 pd.didx = (dir == PF_IN) ? 1 : 0;
6910 pd.tos = h->ip_tos & ~IPTOS_ECN_MASK;
6911 pd.tot_len = ntohs(h->ip_len);
6913 /* handle fragments that didn't get reassembled by normalization */
6914 if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
6915 action = pf_test_fragment(&r, dir, kif, m, h,
6922 if (!pf_pull_hdr(m, off, &pd.hdr.tcp, sizeof(pd.hdr.tcp),
6923 &action, &reason, AF_INET)) {
6924 log = action != PF_PASS;
6927 pd.p_len = pd.tot_len - off - (pd.hdr.tcp.th_off << 2);
6929 pd.sport = &pd.hdr.tcp.th_sport;
6930 pd.dport = &pd.hdr.tcp.th_dport;
6932 /* Respond to SYN with a syncookie. */
6933 if ((pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_SYN &&
6934 pd.dir == PF_IN && pf_synflood_check(&pd)) {
6935 pf_syncookie_send(m, off, &pd);
6940 if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0)
6942 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6943 if (action == PF_DROP)
6945 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6947 if (action == PF_PASS) {
6948 if (V_pfsync_update_state_ptr != NULL)
6949 V_pfsync_update_state_ptr(s);
6953 } else if (s == NULL) {
6954 /* Validate remote SYN|ACK, re-create original SYN if
6956 if ((pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) ==
6957 TH_ACK && pf_syncookie_validate(&pd) &&
6961 msyn = pf_syncookie_recreate_syn(h->ip_ttl,
6968 action = pf_test(dir, pflags, ifp, &msyn, inp);
6971 if (action == PF_PASS) {
6972 action = pf_test_state_tcp(&s, dir,
6973 kif, m, off, h, &pd, &reason);
6974 if (action != PF_PASS || s == NULL) {
6979 s->src.seqhi = ntohl(pd.hdr.tcp.th_ack)
6981 s->src.seqlo = ntohl(pd.hdr.tcp.th_seq)
6983 pf_set_protostate(s, PF_PEER_SRC,
6986 action = pf_synproxy(&pd, &s, &reason);
6987 if (action != PF_PASS)
6993 action = pf_test_rule(&r, &s, dir, kif, m, off,
6994 &pd, &a, &ruleset, inp);
7001 if (!pf_pull_hdr(m, off, &pd.hdr.udp, sizeof(pd.hdr.udp),
7002 &action, &reason, AF_INET)) {
7003 log = action != PF_PASS;
7006 pd.sport = &pd.hdr.udp.uh_sport;
7007 pd.dport = &pd.hdr.udp.uh_dport;
7008 if (pd.hdr.udp.uh_dport == 0 ||
7009 ntohs(pd.hdr.udp.uh_ulen) > m->m_pkthdr.len - off ||
7010 ntohs(pd.hdr.udp.uh_ulen) < sizeof(struct udphdr)) {
7012 REASON_SET(&reason, PFRES_SHORT);
7015 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
7016 if (action == PF_PASS) {
7017 if (V_pfsync_update_state_ptr != NULL)
7018 V_pfsync_update_state_ptr(s);
7022 } else if (s == NULL)
7023 action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
7028 case IPPROTO_ICMP: {
7029 if (!pf_pull_hdr(m, off, &pd.hdr.icmp, ICMP_MINLEN,
7030 &action, &reason, AF_INET)) {
7031 log = action != PF_PASS;
7034 action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
7036 if (action == PF_PASS) {
7037 if (V_pfsync_update_state_ptr != NULL)
7038 V_pfsync_update_state_ptr(s);
7042 } else if (s == NULL)
7043 action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
7049 case IPPROTO_ICMPV6: {
7051 DPFPRINTF(PF_DEBUG_MISC,
7052 ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
7058 action = pf_test_state_other(&s, dir, kif, m, &pd);
7059 if (action == PF_PASS) {
7060 if (V_pfsync_update_state_ptr != NULL)
7061 V_pfsync_update_state_ptr(s);
7065 } else if (s == NULL)
7066 action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
7073 if (action == PF_PASS && h->ip_hl > 5 &&
7074 !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
7076 REASON_SET(&reason, PFRES_IPOPTIONS);
7078 DPFPRINTF(PF_DEBUG_MISC,
7079 ("pf: dropping packet with ip options\n"));
7082 if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
7084 REASON_SET(&reason, PFRES_MEMORY);
7086 if (r->rtableid >= 0)
7087 M_SETFIB(m, r->rtableid);
7089 if (r->scrub_flags & PFSTATE_SETPRIO) {
7090 if (pd.tos & IPTOS_LOWDELAY)
7092 if (vlan_set_pcp(m, r->set_prio[pqid])) {
7094 REASON_SET(&reason, PFRES_MEMORY);
7096 DPFPRINTF(PF_DEBUG_MISC,
7097 ("pf: failed to allocate 802.1q mtag\n"));
7103 pd.act.pqid = s->pqid;
7104 pd.act.qid = s->qid;
7105 } else if (r->qid) {
7106 pd.act.pqid = r->pqid;
7107 pd.act.qid = r->qid;
7109 if (action == PF_PASS && pd.act.qid) {
7110 if (pd.pf_mtag == NULL &&
7111 ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
7113 REASON_SET(&reason, PFRES_MEMORY);
7116 pd.pf_mtag->qid_hash = pf_state_hash(s);
7117 if (pqid || (pd.tos & IPTOS_LOWDELAY))
7118 pd.pf_mtag->qid = pd.act.pqid;
7120 pd.pf_mtag->qid = pd.act.qid;
7121 /* Add hints for ecn. */
7122 pd.pf_mtag->hdr = h;
7128 * connections redirected to loopback should not match sockets
7129 * bound specifically to loopback due to security implications,
7130 * see tcp_input() and in_pcblookup_listen().
7132 if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
7133 pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
7134 (s->nat_rule.ptr->action == PF_RDR ||
7135 s->nat_rule.ptr->action == PF_BINAT) &&
7136 IN_LOOPBACK(ntohl(pd.dst->v4.s_addr)))
7137 m->m_flags |= M_SKIP_FIREWALL;
7139 if (__predict_false(ip_divert_ptr != NULL) && action == PF_PASS &&
7140 r->divert.port && !PACKET_LOOPED(&pd)) {
7141 ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
7142 sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
7143 if (ipfwtag != NULL) {
7144 ((struct ipfw_rule_ref *)(ipfwtag+1))->info =
7145 ntohs(r->divert.port);
7146 ((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir;
7151 m_tag_prepend(m, ipfwtag);
7152 if (m->m_flags & M_FASTFWD_OURS) {
7153 if (pd.pf_mtag == NULL &&
7154 ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
7156 REASON_SET(&reason, PFRES_MEMORY);
7158 DPFPRINTF(PF_DEBUG_MISC,
7159 ("pf: failed to allocate tag\n"));
7161 pd.pf_mtag->flags |=
7162 PF_FASTFWD_OURS_PRESENT;
7163 m->m_flags &= ~M_FASTFWD_OURS;
7166 ip_divert_ptr(*m0, dir == PF_IN);
7171 /* XXX: ipfw has the same behaviour! */
7173 REASON_SET(&reason, PFRES_MEMORY);
7175 DPFPRINTF(PF_DEBUG_MISC,
7176 ("pf: failed to allocate divert tag\n"));
7181 struct pf_krule *lr;
7183 if (s != NULL && s->nat_rule.ptr != NULL &&
7184 s->nat_rule.ptr->log & PF_LOG_ALL)
7185 lr = s->nat_rule.ptr;
7188 PFLOG_PACKET(kif, m, AF_INET, dir, reason, lr, a, ruleset, &pd,
7192 pf_counter_u64_critical_enter();
7193 pf_counter_u64_add_protected(&kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS],
7195 pf_counter_u64_add_protected(&kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS],
7198 if (action == PF_PASS || r->action == PF_DROP) {
7199 dirndx = (dir == PF_OUT);
7200 pf_counter_u64_add_protected(&r->packets[dirndx], 1);
7201 pf_counter_u64_add_protected(&r->bytes[dirndx], pd.tot_len);
7202 pf_update_timestamp(r);
7205 pf_counter_u64_add_protected(&a->packets[dirndx], 1);
7206 pf_counter_u64_add_protected(&a->bytes[dirndx], pd.tot_len);
7209 if (s->nat_rule.ptr != NULL) {
7210 pf_counter_u64_add_protected(&s->nat_rule.ptr->packets[dirndx],
7212 pf_counter_u64_add_protected(&s->nat_rule.ptr->bytes[dirndx],
7215 if (s->src_node != NULL) {
7216 counter_u64_add(s->src_node->packets[dirndx],
7218 counter_u64_add(s->src_node->bytes[dirndx],
7221 if (s->nat_src_node != NULL) {
7222 counter_u64_add(s->nat_src_node->packets[dirndx],
7224 counter_u64_add(s->nat_src_node->bytes[dirndx],
7227 dirndx = (dir == s->direction) ? 0 : 1;
7228 s->packets[dirndx]++;
7229 s->bytes[dirndx] += pd.tot_len;
7232 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
7233 if (nr != NULL && r == &V_pf_default_rule)
7235 if (tr->src.addr.type == PF_ADDR_TABLE)
7236 pfr_update_stats(tr->src.addr.p.tbl,
7237 (s == NULL) ? pd.src :
7238 &s->key[(s->direction == PF_IN)]->
7239 addr[(s->direction == PF_OUT)],
7240 pd.af, pd.tot_len, dir == PF_OUT,
7241 r->action == PF_PASS, tr->src.neg);
7242 if (tr->dst.addr.type == PF_ADDR_TABLE)
7243 pfr_update_stats(tr->dst.addr.p.tbl,
7244 (s == NULL) ? pd.dst :
7245 &s->key[(s->direction == PF_IN)]->
7246 addr[(s->direction == PF_IN)],
7247 pd.af, pd.tot_len, dir == PF_OUT,
7248 r->action == PF_PASS, tr->dst.neg);
7250 pf_counter_u64_critical_exit();
7253 case PF_SYNPROXY_DROP:
7264 /* pf_route() returns unlocked. */
7266 pf_route(m0, r, dir, kif->pfik_ifp, s, &pd, inp);
7269 /* Dummynet processing. */
7270 if (s && (s->dnpipe || s->dnrpipe)) {
7271 pd.act.dnpipe = s->dnpipe;
7272 pd.act.dnrpipe = s->dnrpipe;
7273 pd.act.flags = s->state_flags;
7274 } else if (r->dnpipe || r->dnrpipe) {
7275 pd.act.dnpipe = r->dnpipe;
7276 pd.act.dnrpipe = r->dnrpipe;
7277 pd.act.flags = r->free_flags;
7279 if (pd.act.dnpipe || pd.act.dnrpipe) {
7280 struct ip_fw_args dnflow;
7281 if (ip_dn_io_ptr == NULL) {
7285 REASON_SET(&reason, PFRES_MEMORY);
7289 if (pd.pf_mtag == NULL &&
7290 ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
7294 REASON_SET(&reason, PFRES_MEMORY);
7298 if (pf_pdesc_to_dnflow(dir, &pd, r, s, &dnflow)) {
7299 pd.pf_mtag->flags |= PF_TAG_DUMMYNET;
7300 ip_dn_io_ptr(m0, &dnflow);
7308 SDT_PROBE4(pf, ip, test, done, action, reason, r, s);
7319 pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
7321 struct pfi_kkif *kif;
7322 u_short action, reason = 0, log = 0;
7323 struct mbuf *m = *m0, *n = NULL;
7325 struct ip6_hdr *h = NULL;
7326 struct pf_krule *a = NULL, *r = &V_pf_default_rule, *tr, *nr;
7327 struct pf_kstate *s = NULL;
7328 struct pf_kruleset *ruleset = NULL;
7330 int off, terminal = 0, dirndx, rh_cnt = 0, pqid = 0;
7332 PF_RULES_RLOCK_TRACKER;
7333 KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: bad direction %d\n", __func__, dir));
7336 if (!V_pf_status.running)
7339 kif = (struct pfi_kkif *)ifp->if_pf_kif;
7341 DPFPRINTF(PF_DEBUG_URGENT,
7342 ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
7345 if (kif->pfik_flags & PFI_IFLAG_SKIP)
7348 if (m->m_flags & M_SKIP_FIREWALL)
7351 memset(&pd, 0, sizeof(pd));
7352 pd.pf_mtag = pf_find_mtag(m);
7354 if (pd.pf_mtag && pd.pf_mtag->dnpipe) {
7355 pd.act.dnpipe = pd.pf_mtag->dnpipe;
7356 pd.act.flags = pd.pf_mtag->dnflags;
7359 if (ip_dn_io_ptr != NULL && pd.pf_mtag != NULL &&
7360 pd.pf_mtag->flags & PF_TAG_DUMMYNET) {
7361 pd.pf_mtag->flags &= ~PF_TAG_DUMMYNET;
7362 /* Dummynet re-injects packets after they've
7363 * completed their delay. We've already
7364 * processed them, so pass unconditionally. */
7370 /* We do IP header normalization and packet reassembly here */
7371 if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
7375 m = *m0; /* pf_normalize messes with m0 */
7376 h = mtod(m, struct ip6_hdr *);
7379 * we do not support jumbogram. if we keep going, zero ip6_plen
7380 * will do something bad, so drop the packet for now.
7382 if (htons(h->ip6_plen) == 0) {
7384 REASON_SET(&reason, PFRES_NORM); /*XXX*/
7388 pd.src = (struct pf_addr *)&h->ip6_src;
7389 pd.dst = (struct pf_addr *)&h->ip6_dst;
7390 pd.sport = pd.dport = NULL;
7392 pd.proto_sum = NULL;
7394 pd.sidx = (dir == PF_IN) ? 0 : 1;
7395 pd.didx = (dir == PF_IN) ? 1 : 0;
7397 pd.tos = IPV6_DSCP(h);
7398 pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
7400 off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
7401 pd.proto = h->ip6_nxt;
7404 case IPPROTO_FRAGMENT:
7405 action = pf_test_fragment(&r, dir, kif, m, h,
7407 if (action == PF_DROP)
7408 REASON_SET(&reason, PFRES_FRAG);
7410 case IPPROTO_ROUTING: {
7411 struct ip6_rthdr rthdr;
7414 DPFPRINTF(PF_DEBUG_MISC,
7415 ("pf: IPv6 more than one rthdr\n"));
7417 REASON_SET(&reason, PFRES_IPOPTIONS);
7421 if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
7423 DPFPRINTF(PF_DEBUG_MISC,
7424 ("pf: IPv6 short rthdr\n"));
7426 REASON_SET(&reason, PFRES_SHORT);
7430 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
7431 DPFPRINTF(PF_DEBUG_MISC,
7432 ("pf: IPv6 rthdr0\n"));
7434 REASON_SET(&reason, PFRES_IPOPTIONS);
7441 case IPPROTO_HOPOPTS:
7442 case IPPROTO_DSTOPTS: {
7443 /* get next header and header length */
7444 struct ip6_ext opt6;
7446 if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
7447 NULL, &reason, pd.af)) {
7448 DPFPRINTF(PF_DEBUG_MISC,
7449 ("pf: IPv6 short opt\n"));
7454 if (pd.proto == IPPROTO_AH)
7455 off += (opt6.ip6e_len + 2) * 4;
7457 off += (opt6.ip6e_len + 1) * 8;
7458 pd.proto = opt6.ip6e_nxt;
7459 /* goto the next header */
7466 } while (!terminal);
7468 /* if there's no routing header, use unmodified mbuf for checksumming */
7474 if (!pf_pull_hdr(m, off, &pd.hdr.tcp, sizeof(pd.hdr.tcp),
7475 &action, &reason, AF_INET6)) {
7476 log = action != PF_PASS;
7479 pd.p_len = pd.tot_len - off - (pd.hdr.tcp.th_off << 2);
7480 pd.sport = &pd.hdr.tcp.th_sport;
7481 pd.dport = &pd.hdr.tcp.th_dport;
7482 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
7483 if (action == PF_DROP)
7485 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
7487 if (action == PF_PASS) {
7488 if (V_pfsync_update_state_ptr != NULL)
7489 V_pfsync_update_state_ptr(s);
7493 } else if (s == NULL)
7494 action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
7500 if (!pf_pull_hdr(m, off, &pd.hdr.udp, sizeof(pd.hdr.udp),
7501 &action, &reason, AF_INET6)) {
7502 log = action != PF_PASS;
7505 pd.sport = &pd.hdr.udp.uh_sport;
7506 pd.dport = &pd.hdr.udp.uh_dport;
7507 if (pd.hdr.udp.uh_dport == 0 ||
7508 ntohs(pd.hdr.udp.uh_ulen) > m->m_pkthdr.len - off ||
7509 ntohs(pd.hdr.udp.uh_ulen) < sizeof(struct udphdr)) {
7511 REASON_SET(&reason, PFRES_SHORT);
7514 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
7515 if (action == PF_PASS) {
7516 if (V_pfsync_update_state_ptr != NULL)
7517 V_pfsync_update_state_ptr(s);
7521 } else if (s == NULL)
7522 action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
7527 case IPPROTO_ICMP: {
7529 DPFPRINTF(PF_DEBUG_MISC,
7530 ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
7534 case IPPROTO_ICMPV6: {
7535 if (!pf_pull_hdr(m, off, &pd.hdr.icmp6, sizeof(pd.hdr.icmp6),
7536 &action, &reason, AF_INET6)) {
7537 log = action != PF_PASS;
7540 action = pf_test_state_icmp(&s, dir, kif,
7541 m, off, h, &pd, &reason);
7542 if (action == PF_PASS) {
7543 if (V_pfsync_update_state_ptr != NULL)
7544 V_pfsync_update_state_ptr(s);
7548 } else if (s == NULL)
7549 action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
7555 action = pf_test_state_other(&s, dir, kif, m, &pd);
7556 if (action == PF_PASS) {
7557 if (V_pfsync_update_state_ptr != NULL)
7558 V_pfsync_update_state_ptr(s);
7562 } else if (s == NULL)
7563 action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
7575 /* handle dangerous IPv6 extension headers. */
7576 if (action == PF_PASS && rh_cnt &&
7577 !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
7579 REASON_SET(&reason, PFRES_IPOPTIONS);
7581 DPFPRINTF(PF_DEBUG_MISC,
7582 ("pf: dropping packet with dangerous v6 headers\n"));
7585 if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
7587 REASON_SET(&reason, PFRES_MEMORY);
7589 if (r->rtableid >= 0)
7590 M_SETFIB(m, r->rtableid);
7592 if (r->scrub_flags & PFSTATE_SETPRIO) {
7593 if (pd.tos & IPTOS_LOWDELAY)
7595 if (vlan_set_pcp(m, r->set_prio[pqid])) {
7597 REASON_SET(&reason, PFRES_MEMORY);
7599 DPFPRINTF(PF_DEBUG_MISC,
7600 ("pf: failed to allocate 802.1q mtag\n"));
7606 pd.act.pqid = s->pqid;
7607 pd.act.qid = s->qid;
7608 } else if (r->qid) {
7609 pd.act.pqid = r->pqid;
7610 pd.act.qid = r->qid;
7612 if (action == PF_PASS && pd.act.qid) {
7613 if (pd.pf_mtag == NULL &&
7614 ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
7616 REASON_SET(&reason, PFRES_MEMORY);
7619 pd.pf_mtag->qid_hash = pf_state_hash(s);
7620 if (pd.tos & IPTOS_LOWDELAY)
7621 pd.pf_mtag->qid = pd.act.pqid;
7623 pd.pf_mtag->qid = pd.act.qid;
7624 /* Add hints for ecn. */
7625 pd.pf_mtag->hdr = h;
7630 if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
7631 pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
7632 (s->nat_rule.ptr->action == PF_RDR ||
7633 s->nat_rule.ptr->action == PF_BINAT) &&
7634 IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
7635 m->m_flags |= M_SKIP_FIREWALL;
7637 /* XXX: Anybody working on it?! */
7639 printf("pf: divert(9) is not supported for IPv6\n");
7642 struct pf_krule *lr;
7644 if (s != NULL && s->nat_rule.ptr != NULL &&
7645 s->nat_rule.ptr->log & PF_LOG_ALL)
7646 lr = s->nat_rule.ptr;
7649 PFLOG_PACKET(kif, m, AF_INET6, dir, reason, lr, a, ruleset,
7653 pf_counter_u64_critical_enter();
7654 pf_counter_u64_add_protected(&kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS],
7656 pf_counter_u64_add_protected(&kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS],
7659 if (action == PF_PASS || r->action == PF_DROP) {
7660 dirndx = (dir == PF_OUT);
7661 pf_counter_u64_add_protected(&r->packets[dirndx], 1);
7662 pf_counter_u64_add_protected(&r->bytes[dirndx], pd.tot_len);
7664 pf_counter_u64_add_protected(&a->packets[dirndx], 1);
7665 pf_counter_u64_add_protected(&a->bytes[dirndx], pd.tot_len);
7668 if (s->nat_rule.ptr != NULL) {
7669 pf_counter_u64_add_protected(&s->nat_rule.ptr->packets[dirndx],
7671 pf_counter_u64_add_protected(&s->nat_rule.ptr->bytes[dirndx],
7674 if (s->src_node != NULL) {
7675 counter_u64_add(s->src_node->packets[dirndx],
7677 counter_u64_add(s->src_node->bytes[dirndx],
7680 if (s->nat_src_node != NULL) {
7681 counter_u64_add(s->nat_src_node->packets[dirndx],
7683 counter_u64_add(s->nat_src_node->bytes[dirndx],
7686 dirndx = (dir == s->direction) ? 0 : 1;
7687 s->packets[dirndx]++;
7688 s->bytes[dirndx] += pd.tot_len;
7691 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
7692 if (nr != NULL && r == &V_pf_default_rule)
7694 if (tr->src.addr.type == PF_ADDR_TABLE)
7695 pfr_update_stats(tr->src.addr.p.tbl,
7696 (s == NULL) ? pd.src :
7697 &s->key[(s->direction == PF_IN)]->addr[0],
7698 pd.af, pd.tot_len, dir == PF_OUT,
7699 r->action == PF_PASS, tr->src.neg);
7700 if (tr->dst.addr.type == PF_ADDR_TABLE)
7701 pfr_update_stats(tr->dst.addr.p.tbl,
7702 (s == NULL) ? pd.dst :
7703 &s->key[(s->direction == PF_IN)]->addr[1],
7704 pd.af, pd.tot_len, dir == PF_OUT,
7705 r->action == PF_PASS, tr->dst.neg);
7707 pf_counter_u64_critical_exit();
7710 case PF_SYNPROXY_DROP:
7721 /* pf_route6() returns unlocked. */
7723 pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd, inp);
7726 /* Dummynet processing. */
7727 if (s && (s->dnpipe || s->dnrpipe)) {
7728 pd.act.dnpipe = s->dnpipe;
7729 pd.act.dnrpipe = s->dnrpipe;
7730 pd.act.flags = s->state_flags;
7732 pd.act.dnpipe = r->dnpipe;
7733 pd.act.dnrpipe = r->dnrpipe;
7734 pd.act.flags = r->free_flags;
7736 if (pd.act.dnpipe || pd.act.dnrpipe) {
7737 struct ip_fw_args dnflow;
7739 if (ip_dn_io_ptr == NULL) {
7743 REASON_SET(&reason, PFRES_MEMORY);
7747 if (pd.pf_mtag == NULL &&
7748 ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
7752 REASON_SET(&reason, PFRES_MEMORY);
7756 if (pf_pdesc_to_dnflow(dir, &pd, r, s, &dnflow)) {
7757 pd.pf_mtag->flags |= PF_TAG_DUMMYNET;
7758 ip_dn_io_ptr(m0, &dnflow);
7769 /* If reassembled packet passed, create new fragments. */
7770 if (action == PF_PASS && *m0 && (pflags & PFIL_FWD) &&
7771 (mtag = m_tag_find(m, PF_REASSEMBLED, NULL)) != NULL)
7772 action = pf_refragment6(ifp, m0, mtag);
7774 SDT_PROBE4(pf, ip, test6, done, action, reason, r, s);