1 /* $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ */
4 * Copyright (c) 2002 Michael Shalayeff
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
32 * Permission to use, copy, modify, and distribute this software for any
33 * purpose with or without fee is hereby granted, provided that the above
34 * copyright notice and this permission notice appear in all copies.
36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
47 #include "opt_inet6.h"
51 #include <sys/cdefs.h>
52 __FBSDID("$FreeBSD$");
55 #define NBPFILTER DEV_BPF
61 #define NPFSYNC DEV_PFSYNC
67 #define NCARP DEV_CARP
71 #endif /* __FreeBSD__ */
73 #include <sys/param.h>
74 #include <sys/kernel.h>
77 #include <sys/interrupt.h>
81 #include <sys/systm.h>
84 #include <sys/socket.h>
86 #include <sys/endian.h>
87 #include <sys/malloc.h>
88 #include <sys/module.h>
89 #include <sys/sockio.h>
90 #include <sys/taskqueue.h>
92 #include <sys/mutex.h>
94 #include <sys/ioctl.h>
95 #include <sys/timeout.h>
97 #include <sys/sysctl.h>
104 #include <net/if_clone.h>
106 #include <net/if_types.h>
107 #include <net/route.h>
109 #include <net/netisr.h>
111 #include <net/vnet.h>
114 #include <netinet/in.h>
115 #include <netinet/if_ether.h>
116 #include <netinet/tcp.h>
117 #include <netinet/tcp_seq.h>
120 #include <netinet/in_systm.h>
121 #include <netinet/in_var.h>
122 #include <netinet/ip.h>
123 #include <netinet/ip_var.h>
127 #include <netinet6/nd6.h>
134 #include <netinet/ip_carp.h>
137 #include <net/pfvar.h>
138 #include <net/if_pfsync.h>
141 #include "bpfilter.h"
145 #define PFSYNC_MINPKT ( \
146 sizeof(struct ip) + \
147 sizeof(struct pfsync_header) + \
148 sizeof(struct pfsync_subheader) + \
149 sizeof(struct pfsync_eof))
157 int pfsync_input_hmac(struct mbuf *, int);
159 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
160 struct pfsync_state_peer *);
162 int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
163 int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
164 int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
165 int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
166 int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
167 int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
168 int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
169 int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
170 int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
171 int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
172 int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
174 int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
176 int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
177 pfsync_in_clr, /* PFSYNC_ACT_CLR */
178 pfsync_in_ins, /* PFSYNC_ACT_INS */
179 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */
180 pfsync_in_upd, /* PFSYNC_ACT_UPD */
181 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */
182 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */
183 pfsync_in_del, /* PFSYNC_ACT_DEL */
184 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */
185 pfsync_in_error, /* PFSYNC_ACT_INS_F */
186 pfsync_in_error, /* PFSYNC_ACT_DEL_F */
187 pfsync_in_bus, /* PFSYNC_ACT_BUS */
188 pfsync_in_tdb, /* PFSYNC_ACT_TDB */
189 pfsync_in_eof /* PFSYNC_ACT_EOF */
193 int (*write)(struct pf_state *, struct mbuf *, int);
198 /* we have one of these for every PFSYNC_S_ */
199 int pfsync_out_state(struct pf_state *, struct mbuf *, int);
200 int pfsync_out_iack(struct pf_state *, struct mbuf *, int);
201 int pfsync_out_upd_c(struct pf_state *, struct mbuf *, int);
202 int pfsync_out_del(struct pf_state *, struct mbuf *, int);
204 struct pfsync_q pfsync_qs[] = {
205 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS },
206 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
207 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD },
208 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C },
209 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }
212 void pfsync_q_ins(struct pf_state *, int);
213 void pfsync_q_del(struct pf_state *);
215 struct pfsync_upd_req_item {
216 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry;
217 struct pfsync_upd_req ur_msg;
219 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
221 struct pfsync_deferral {
222 TAILQ_ENTRY(pfsync_deferral) pd_entry;
223 struct pf_state *pd_st;
226 struct callout pd_tmo;
228 struct timeout pd_tmo;
231 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
233 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \
234 sizeof(struct pfsync_deferral))
237 int pfsync_out_tdb(struct tdb *, struct mbuf *, int);
240 struct pfsync_softc {
242 struct ifnet *sc_ifp;
246 struct ifnet *sc_sync_if;
254 struct ip_moptions sc_imo;
256 struct in_addr sc_sync_peer;
257 u_int8_t sc_maxupdates;
262 struct ip sc_template;
264 struct pf_state_queue sc_qs[PFSYNC_S_COUNT];
267 struct pfsync_upd_reqs sc_upd_req_list;
269 struct pfsync_deferrals sc_deferrals;
275 u_int32_t sc_ureq_sent;
278 struct callout sc_bulkfail_tmo;
280 struct timeout sc_bulkfail_tmo;
283 u_int32_t sc_ureq_received;
284 struct pf_state *sc_bulk_next;
285 struct pf_state *sc_bulk_last;
287 struct callout sc_bulk_tmo;
289 struct timeout sc_bulk_tmo;
292 TAILQ_HEAD(, tdb) sc_tdb_q;
295 struct callout sc_tmo;
297 struct timeout sc_tmo;
300 eventhandler_tag sc_detachtag;
306 static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL;
307 #define V_pfsyncif VNET(pfsyncif)
309 static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
310 #define V_pfsyncstats VNET(pfsyncstats)
312 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
313 SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW,
314 &VNET_NAME(pfsyncstats), pfsyncstats,
315 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
317 struct pfsync_softc *pfsyncif = NULL;
318 struct pfsyncstats pfsyncstats;
319 #define V_pfsyncstats pfsyncstats
323 static void pfsyncintr(void *);
325 void * pfsync_swi_cookie;
327 static struct pfsync_swi pfsync_swi;
328 #define schednetisr(p) swi_sched(pfsync_swi.pfsync_swi_cookie, 0)
329 #define NETISR_PFSYNC
332 void pfsyncattach(int);
334 int pfsync_clone_create(struct if_clone *, int, caddr_t);
335 void pfsync_clone_destroy(struct ifnet *);
337 int pfsync_clone_create(struct if_clone *, int);
338 int pfsync_clone_destroy(struct ifnet *);
340 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
341 struct pf_state_peer *);
342 void pfsync_update_net_tdb(struct pfsync_tdb *);
343 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
349 int pfsyncioctl(struct ifnet *, u_long, caddr_t);
350 void pfsyncstart(struct ifnet *);
352 struct mbuf *pfsync_if_dequeue(struct ifnet *);
353 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *);
355 void pfsync_deferred(struct pf_state *, int);
356 void pfsync_undefer(struct pfsync_deferral *, int);
357 void pfsync_defer_tmo(void *);
359 void pfsync_request_update(u_int32_t, u_int64_t);
360 void pfsync_update_state_req(struct pf_state *);
362 void pfsync_drop(struct pfsync_softc *);
363 void pfsync_sendout(void);
364 void pfsync_send_plus(void *, size_t);
365 int pfsync_tdb_sendout(struct pfsync_softc *);
366 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *);
367 void pfsync_timeout(void *);
368 void pfsync_tdb_timeout(void *);
369 void pfsync_send_bus(struct pfsync_softc *, u_int8_t);
371 void pfsync_bulk_start(void);
372 void pfsync_bulk_status(u_int8_t);
373 void pfsync_bulk_update(void *);
374 void pfsync_bulk_fail(void *);
377 void pfsync_ifdetach(void *, struct ifnet *);
380 #define betoh64 (unsigned long long)be64toh
381 #define timeout_del callout_stop
384 #define PFSYNC_MAX_BULKTRIES 12
390 IFC_SIMPLE_DECLARE(pfsync, 1);
392 struct if_clone pfsync_cloner =
393 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
397 pfsyncattach(int npfsync)
399 if_clone_attach(&pfsync_cloner);
403 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
405 pfsync_clone_create(struct if_clone *ifc, int unit)
408 struct pfsync_softc *sc;
419 sc = malloc(sizeof(struct pfsync_softc), M_DEVBUF, M_NOWAIT | M_ZERO);
423 for (q = 0; q < PFSYNC_S_COUNT; q++)
424 TAILQ_INIT(&sc->sc_qs[q]);
427 sc->pfsync_sync_ok = 1;
428 sc->sc_pool = uma_zcreate("pfsync", PFSYNC_PLSIZE,
429 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
430 if (sc->sc_pool == NULL) {
435 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL);
437 TAILQ_INIT(&sc->sc_upd_req_list);
438 TAILQ_INIT(&sc->sc_deferrals);
441 TAILQ_INIT(&sc->sc_tdb_q);
443 sc->sc_len = PFSYNC_MINPKT;
444 sc->sc_maxupdates = 128;
447 sc->sc_imo.imo_membership = (struct in_multi **)malloc(
448 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_DEVBUF,
450 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
451 sc->sc_imo.imo_multicast_vif = -1;
453 sc->sc_imo.imo_membership = (struct in_multi **)malloc(
454 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
456 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
460 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
462 free(sc->sc_imo.imo_membership, M_DEVBUF);
463 uma_zdestroy(sc->sc_pool);
467 if_initname(ifp, ifc->ifc_name, unit);
469 sc->sc_detachtag = EVENTHANDLER_REGISTER(ifnet_departure_event,
471 pfsync_ifdetach, V_pfsyncif, EVENTHANDLER_PRI_ANY);
473 pfsync_ifdetach, pfsyncif, EVENTHANDLER_PRI_ANY);
475 if (sc->sc_detachtag == NULL) {
477 free(sc->sc_imo.imo_membership, M_DEVBUF);
478 uma_zdestroy(sc->sc_pool);
484 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
487 ifp->if_ioctl = pfsyncioctl;
488 ifp->if_output = pfsyncoutput;
489 ifp->if_start = pfsyncstart;
490 ifp->if_type = IFT_PFSYNC;
491 ifp->if_snd.ifq_maxlen = ifqmaxlen;
492 ifp->if_hdrlen = sizeof(struct pfsync_header);
493 ifp->if_mtu = 1500; /* XXX */
495 callout_init(&sc->sc_tmo, CALLOUT_MPSAFE);
496 callout_init(&sc->sc_bulk_tmo, CALLOUT_MPSAFE);
497 callout_init(&sc->sc_bulkfail_tmo, CALLOUT_MPSAFE);
499 ifp->if_hardmtu = MCLBYTES; /* XXX */
500 timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
501 timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
502 timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
511 if_addgroup(ifp, "carp");
516 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
518 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
536 pfsync_clone_destroy(struct ifnet *ifp)
538 struct pfsync_softc *sc = ifp->if_softc;
541 EVENTHANDLER_DEREGISTER(ifnet_departure_event, sc->sc_detachtag);
543 timeout_del(&sc->sc_bulk_tmo);
544 timeout_del(&sc->sc_tmo);
548 if (!sc->pfsync_sync_ok)
552 carp_group_demote_adj(&sc->sc_if, -1);
562 while (sc->sc_deferred > 0)
563 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
566 UMA_DESTROY(sc->sc_pool);
568 pool_destroy(&sc->sc_pool);
572 free(sc->sc_imo.imo_membership, M_DEVBUF);
574 free(sc->sc_imo.imo_membership, M_IPMOPTS);
590 pfsync_if_dequeue(struct ifnet *ifp)
598 IF_LOCK(&ifp->if_snd);
599 _IF_DROP(&ifp->if_snd);
600 _IF_DEQUEUE(&ifp->if_snd, m);
601 IF_UNLOCK(&ifp->if_snd);
604 IF_DEQUEUE(&ifp->if_snd, m);
612 * Start output on the pfsync interface.
615 pfsyncstart(struct ifnet *ifp)
619 while ((m = pfsync_if_dequeue(ifp)) != NULL) {
621 IF_DROP(&ifp->if_snd);
628 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
629 struct pf_state_peer *d)
631 if (s->scrub.scrub_flag && d->scrub == NULL) {
633 d->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
635 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
637 if (d->scrub == NULL)
646 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
648 bzero(sp, sizeof(struct pfsync_state));
650 /* copy from state key */
651 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
652 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
653 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
654 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
655 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
656 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
657 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
658 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
659 sp->proto = st->key[PF_SK_WIRE]->proto;
660 sp->af = st->key[PF_SK_WIRE]->af;
662 /* copy from state */
663 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
664 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
665 sp->creation = htonl(time_second - st->creation);
666 sp->expire = pf_state_expires(st);
667 if (sp->expire <= time_second)
668 sp->expire = htonl(0);
670 sp->expire = htonl(sp->expire - time_second);
672 sp->direction = st->direction;
674 sp->timeout = st->timeout;
675 sp->state_flags = st->state_flags;
677 sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
678 if (st->nat_src_node)
679 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
681 bcopy(&st->id, &sp->id, sizeof(sp->id));
682 sp->creatorid = st->creatorid;
683 pf_state_peer_hton(&st->src, &sp->src);
684 pf_state_peer_hton(&st->dst, &sp->dst);
686 if (st->rule.ptr == NULL)
687 sp->rule = htonl(-1);
689 sp->rule = htonl(st->rule.ptr->nr);
690 if (st->anchor.ptr == NULL)
691 sp->anchor = htonl(-1);
693 sp->anchor = htonl(st->anchor.ptr->nr);
694 if (st->nat_rule.ptr == NULL)
695 sp->nat_rule = htonl(-1);
697 sp->nat_rule = htonl(st->nat_rule.ptr->nr);
699 pf_state_counter_hton(st->packets[0], sp->packets[0]);
700 pf_state_counter_hton(st->packets[1], sp->packets[1]);
701 pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
702 pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
708 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
710 struct pf_state *st = NULL;
711 struct pf_state_key *skw = NULL, *sks = NULL;
712 struct pf_rule *r = NULL;
718 if (sp->creatorid == 0 && V_pf_status.debug >= PF_DEBUG_MISC) {
720 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
722 printf("pfsync_state_import: invalid creator id:"
723 " %08x\n", ntohl(sp->creatorid));
727 if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
729 if (V_pf_status.debug >= PF_DEBUG_MISC)
731 if (pf_status.debug >= PF_DEBUG_MISC)
733 printf("pfsync_state_import: "
734 "unknown interface: %s\n", sp->ifname);
735 if (flags & PFSYNC_SI_IOCTL)
737 return (0); /* skip this state */
741 * If the ruleset checksums match or the state is coming from the ioctl,
742 * it's safe to associate the state with the rule of that number.
744 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
745 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
746 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
747 r = pf_main_ruleset.rules[
748 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
751 r = &V_pf_default_rule;
753 r = &pf_default_rule;
756 if ((r->max_states && r->states_cur >= r->max_states))
760 if (flags & PFSYNC_SI_IOCTL)
761 pool_flags = PR_WAITOK | PR_ZERO;
763 pool_flags = PR_ZERO;
765 if ((st = pool_get(&V_pf_state_pl, pool_flags)) == NULL)
768 if (flags & PFSYNC_SI_IOCTL)
769 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
771 pool_flags = PR_LIMITFAIL | PR_ZERO;
773 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
777 if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
780 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
781 &sp->key[PF_SK_STACK].addr[0], sp->af) ||
782 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
783 &sp->key[PF_SK_STACK].addr[1], sp->af) ||
784 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
785 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) {
786 if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
791 /* allocate memory for scrub info */
792 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
793 pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
796 /* copy to state key(s) */
797 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
798 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
799 skw->port[0] = sp->key[PF_SK_WIRE].port[0];
800 skw->port[1] = sp->key[PF_SK_WIRE].port[1];
801 skw->proto = sp->proto;
804 sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
805 sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
806 sks->port[0] = sp->key[PF_SK_STACK].port[0];
807 sks->port[1] = sp->key[PF_SK_STACK].port[1];
808 sks->proto = sp->proto;
813 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
814 st->creation = time_second - ntohl(sp->creation);
815 st->expire = time_second;
817 /* XXX No adaptive scaling. */
818 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire);
821 st->expire = ntohl(sp->expire) + time_second;
822 st->direction = sp->direction;
824 st->timeout = sp->timeout;
825 st->state_flags = sp->state_flags;
827 bcopy(sp->id, &st->id, sizeof(st->id));
828 st->creatorid = sp->creatorid;
829 pf_state_peer_ntoh(&sp->src, &st->src);
830 pf_state_peer_ntoh(&sp->dst, &st->dst);
833 st->nat_rule.ptr = NULL;
834 st->anchor.ptr = NULL;
837 st->pfsync_time = time_second;
838 st->sync_state = PFSYNC_S_NONE;
840 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
844 if (!ISSET(flags, PFSYNC_SI_IOCTL))
845 SET(st->state_flags, PFSTATE_NOSYNC);
847 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) {
848 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
853 if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
854 CLR(st->state_flags, PFSTATE_NOSYNC);
855 if (ISSET(st->state_flags, PFSTATE_ACK)) {
856 pfsync_q_ins(st, PFSYNC_S_IACK);
857 schednetisr(NETISR_PFSYNC);
860 CLR(st->state_flags, PFSTATE_ACK);
870 pool_put(&V_pf_state_key_pl, skw);
872 pool_put(&V_pf_state_key_pl, sks);
875 pool_put(&pf_state_key_pl, skw);
877 pool_put(&pf_state_key_pl, sks);
880 cleanup_state: /* pf_state_insert frees the state keys */
884 pool_put(&V_pf_state_scrub_pl, st->dst.scrub);
886 pool_put(&V_pf_state_scrub_pl, st->src.scrub);
887 pool_put(&V_pf_state_pl, st);
890 pool_put(&pf_state_scrub_pl, st->dst.scrub);
892 pool_put(&pf_state_scrub_pl, st->src.scrub);
893 pool_put(&pf_state_pl, st);
901 pfsync_input(struct mbuf *m, __unused int off)
903 pfsync_input(struct mbuf *m, ...)
907 struct pfsync_softc *sc = V_pfsyncif;
909 struct pfsync_softc *sc = pfsyncif;
911 struct pfsync_pkt pkt;
912 struct ip *ip = mtod(m, struct ip *);
913 struct pfsync_header *ph;
914 struct pfsync_subheader subh;
919 V_pfsyncstats.pfsyncs_ipackets++;
921 /* verify that we have a sync interface configured */
923 if (!sc || !sc->sc_sync_if || !V_pf_status.running)
925 if (!sc || !sc->sc_sync_if || !pf_status.running)
929 /* verify that the packet came in on the right interface */
930 if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
931 V_pfsyncstats.pfsyncs_badif++;
936 sc->sc_ifp->if_ipackets++;
937 sc->sc_ifp->if_ibytes += m->m_pkthdr.len;
939 sc->sc_if.if_ipackets++;
940 sc->sc_if.if_ibytes += m->m_pkthdr.len;
942 /* verify that the IP TTL is 255. */
943 if (ip->ip_ttl != PFSYNC_DFLTTL) {
944 V_pfsyncstats.pfsyncs_badttl++;
948 offset = ip->ip_hl << 2;
949 if (m->m_pkthdr.len < offset + sizeof(*ph)) {
950 V_pfsyncstats.pfsyncs_hdrops++;
954 if (offset + sizeof(*ph) > m->m_len) {
955 if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
956 V_pfsyncstats.pfsyncs_hdrops++;
959 ip = mtod(m, struct ip *);
961 ph = (struct pfsync_header *)((char *)ip + offset);
963 /* verify the version */
964 if (ph->version != PFSYNC_VERSION) {
965 V_pfsyncstats.pfsyncs_badver++;
970 if (pfsync_input_hmac(m, offset) != 0) {
976 /* Cheaper to grab this now than having to mess with mbufs later */
978 pkt.src = ip->ip_src;
982 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
984 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
986 pkt.flags |= PFSYNC_SI_CKSUM;
988 offset += sizeof(*ph);
990 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
991 offset += sizeof(subh);
993 if (subh.action >= PFSYNC_ACT_MAX) {
994 V_pfsyncstats.pfsyncs_badact++;
998 rv = (*pfsync_acts[subh.action])(&pkt, m, offset,
1011 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1013 struct pfsync_clr *clr;
1015 int len = sizeof(*clr) * count;
1018 struct pf_state *st, *nexts;
1019 struct pf_state_key *sk, *nextsk;
1020 struct pf_state_item *si;
1021 u_int32_t creatorid;
1024 mp = m_pulldown(m, offset, len, &offp);
1026 V_pfsyncstats.pfsyncs_badlen++;
1029 clr = (struct pfsync_clr *)(mp->m_data + offp);
1035 for (i = 0; i < count; i++) {
1036 creatorid = clr[i].creatorid;
1038 if (clr[i].ifname[0] == '\0') {
1040 for (st = RB_MIN(pf_state_tree_id, &V_tree_id);
1042 nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, st);
1044 for (st = RB_MIN(pf_state_tree_id, &tree_id);
1046 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
1048 if (st->creatorid == creatorid) {
1049 SET(st->state_flags, PFSTATE_NOSYNC);
1050 pf_unlink_state(st);
1054 if (pfi_kif_get(clr[i].ifname) == NULL)
1059 for (sk = RB_MIN(pf_state_tree, &V_pf_statetbl);
1061 for (sk = RB_MIN(pf_state_tree, &pf_statetbl);
1064 nextsk = RB_NEXT(pf_state_tree,
1066 &V_pf_statetbl, sk);
1070 TAILQ_FOREACH(si, &sk->states, entry) {
1071 if (si->s->creatorid == creatorid) {
1072 SET(si->s->state_flags,
1074 pf_unlink_state(si->s);
1089 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1092 struct pfsync_state *sa, *sp;
1093 int len = sizeof(*sp) * count;
1098 mp = m_pulldown(m, offset, len, &offp);
1100 V_pfsyncstats.pfsyncs_badlen++;
1103 sa = (struct pfsync_state *)(mp->m_data + offp);
1109 for (i = 0; i < count; i++) {
1112 /* check for invalid values */
1113 if (sp->timeout >= PFTM_MAX ||
1114 sp->src.state > PF_TCPS_PROXY_DST ||
1115 sp->dst.state > PF_TCPS_PROXY_DST ||
1116 sp->direction > PF_OUT ||
1117 (sp->af != AF_INET && sp->af != AF_INET6)) {
1119 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1121 if (pf_status.debug >= PF_DEBUG_MISC) {
1123 printf("pfsync_input: PFSYNC5_ACT_INS: "
1126 V_pfsyncstats.pfsyncs_badval++;
1130 if (pfsync_state_import(sp, pkt->flags) == ENOMEM) {
1131 /* drop out, but process the rest of the actions */
1144 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1146 struct pfsync_ins_ack *ia, *iaa;
1147 struct pf_state_cmp id_key;
1148 struct pf_state *st;
1151 int len = count * sizeof(*ia);
1155 mp = m_pulldown(m, offset, len, &offp);
1157 V_pfsyncstats.pfsyncs_badlen++;
1160 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
1166 for (i = 0; i < count; i++) {
1169 bcopy(&ia->id, &id_key.id, sizeof(id_key.id));
1170 id_key.creatorid = ia->creatorid;
1172 st = pf_find_state_byid(&id_key);
1176 if (ISSET(st->state_flags, PFSTATE_ACK))
1177 pfsync_deferred(st, 0);
1184 * XXX this is not yet implemented, but we know the size of the
1185 * message so we can skip it.
1188 return (count * sizeof(struct pfsync_ins_ack));
1192 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
1193 struct pfsync_state_peer *dst)
1198 * The state should never go backwards except
1199 * for syn-proxy states. Neither should the
1200 * sequence window slide backwards.
1202 if (st->src.state > src->state &&
1203 (st->src.state < PF_TCPS_PROXY_SRC ||
1204 src->state >= PF_TCPS_PROXY_SRC))
1206 else if (SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))
1208 else if (st->dst.state > dst->state) {
1209 /* There might still be useful
1210 * information about the src state here,
1211 * so import that part of the update,
1212 * then "fail" so we send the updated
1213 * state back to the peer who is missing
1214 * our what we know. */
1215 pf_state_peer_ntoh(src, &st->src);
1216 /* XXX do anything with timeouts? */
1218 } else if (st->dst.state >= TCPS_SYN_SENT &&
1219 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))
1226 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1228 struct pfsync_state *sa, *sp;
1229 struct pf_state_cmp id_key;
1230 struct pf_state_key *sk;
1231 struct pf_state *st;
1235 int len = count * sizeof(*sp);
1239 mp = m_pulldown(m, offset, len, &offp);
1241 V_pfsyncstats.pfsyncs_badlen++;
1244 sa = (struct pfsync_state *)(mp->m_data + offp);
1250 for (i = 0; i < count; i++) {
1253 /* check for invalid values */
1254 if (sp->timeout >= PFTM_MAX ||
1255 sp->src.state > PF_TCPS_PROXY_DST ||
1256 sp->dst.state > PF_TCPS_PROXY_DST) {
1258 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1260 if (pf_status.debug >= PF_DEBUG_MISC) {
1262 printf("pfsync_input: PFSYNC_ACT_UPD: "
1265 V_pfsyncstats.pfsyncs_badval++;
1269 bcopy(sp->id, &id_key.id, sizeof(id_key.id));
1270 id_key.creatorid = sp->creatorid;
1272 st = pf_find_state_byid(&id_key);
1274 /* insert the update */
1275 if (pfsync_state_import(sp, 0))
1276 V_pfsyncstats.pfsyncs_badstate++;
1280 if (ISSET(st->state_flags, PFSTATE_ACK))
1281 pfsync_deferred(st, 1);
1283 sk = st->key[PF_SK_WIRE]; /* XXX right one? */
1285 if (sk->proto == IPPROTO_TCP)
1286 sfail = pfsync_upd_tcp(st, &sp->src, &sp->dst);
1289 * Non-TCP protocol state machine always go
1292 if (st->src.state > sp->src.state)
1294 else if (st->dst.state > sp->dst.state)
1300 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1302 if (pf_status.debug >= PF_DEBUG_MISC) {
1304 printf("pfsync: %s stale update (%d)"
1305 " id: %016llx creatorid: %08x\n",
1306 (sfail < 7 ? "ignoring" : "partial"),
1307 sfail, betoh64(st->id),
1308 ntohl(st->creatorid));
1310 V_pfsyncstats.pfsyncs_stale++;
1312 pfsync_update_state(st);
1313 schednetisr(NETISR_PFSYNC);
1316 pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
1317 pf_state_peer_ntoh(&sp->src, &st->src);
1318 pf_state_peer_ntoh(&sp->dst, &st->dst);
1319 st->expire = ntohl(sp->expire) + time_second;
1320 st->timeout = sp->timeout;
1321 st->pfsync_time = time_second;
1332 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1334 struct pfsync_upd_c *ua, *up;
1335 struct pf_state_key *sk;
1336 struct pf_state_cmp id_key;
1337 struct pf_state *st;
1339 int len = count * sizeof(*up);
1346 mp = m_pulldown(m, offset, len, &offp);
1348 V_pfsyncstats.pfsyncs_badlen++;
1351 ua = (struct pfsync_upd_c *)(mp->m_data + offp);
1357 for (i = 0; i < count; i++) {
1360 /* check for invalid values */
1361 if (up->timeout >= PFTM_MAX ||
1362 up->src.state > PF_TCPS_PROXY_DST ||
1363 up->dst.state > PF_TCPS_PROXY_DST) {
1365 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1367 if (pf_status.debug >= PF_DEBUG_MISC) {
1369 printf("pfsync_input: "
1370 "PFSYNC_ACT_UPD_C: "
1373 V_pfsyncstats.pfsyncs_badval++;
1377 bcopy(&up->id, &id_key.id, sizeof(id_key.id));
1378 id_key.creatorid = up->creatorid;
1380 st = pf_find_state_byid(&id_key);
1382 /* We don't have this state. Ask for it. */
1383 pfsync_request_update(id_key.creatorid, id_key.id);
1387 if (ISSET(st->state_flags, PFSTATE_ACK))
1388 pfsync_deferred(st, 1);
1390 sk = st->key[PF_SK_WIRE]; /* XXX right one? */
1392 if (sk->proto == IPPROTO_TCP)
1393 sfail = pfsync_upd_tcp(st, &up->src, &up->dst);
1396 * Non-TCP protocol state machine always go forwards
1398 if (st->src.state > up->src.state)
1400 else if (st->dst.state > up->dst.state)
1406 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1408 if (pf_status.debug >= PF_DEBUG_MISC) {
1410 printf("pfsync: ignoring stale update "
1412 "creatorid: %08x\n", sfail,
1414 ntohl(st->creatorid));
1416 V_pfsyncstats.pfsyncs_stale++;
1418 pfsync_update_state(st);
1419 schednetisr(NETISR_PFSYNC);
1422 pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1423 pf_state_peer_ntoh(&up->src, &st->src);
1424 pf_state_peer_ntoh(&up->dst, &st->dst);
1425 st->expire = ntohl(up->expire) + time_second;
1426 st->timeout = up->timeout;
1427 st->pfsync_time = time_second;
1438 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1440 struct pfsync_upd_req *ur, *ura;
1442 int len = count * sizeof(*ur);
1445 struct pf_state_cmp id_key;
1446 struct pf_state *st;
1448 mp = m_pulldown(m, offset, len, &offp);
1450 V_pfsyncstats.pfsyncs_badlen++;
1453 ura = (struct pfsync_upd_req *)(mp->m_data + offp);
1455 for (i = 0; i < count; i++) {
1458 bcopy(&ur->id, &id_key.id, sizeof(id_key.id));
1459 id_key.creatorid = ur->creatorid;
1461 if (id_key.id == 0 && id_key.creatorid == 0)
1462 pfsync_bulk_start();
1464 st = pf_find_state_byid(&id_key);
1466 V_pfsyncstats.pfsyncs_badstate++;
1469 if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1472 pfsync_update_state_req(st);
1480 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1483 struct pfsync_state *sa, *sp;
1484 struct pf_state_cmp id_key;
1485 struct pf_state *st;
1486 int len = count * sizeof(*sp);
1490 mp = m_pulldown(m, offset, len, &offp);
1492 V_pfsyncstats.pfsyncs_badlen++;
1495 sa = (struct pfsync_state *)(mp->m_data + offp);
1501 for (i = 0; i < count; i++) {
1504 bcopy(sp->id, &id_key.id, sizeof(id_key.id));
1505 id_key.creatorid = sp->creatorid;
1507 st = pf_find_state_byid(&id_key);
1509 V_pfsyncstats.pfsyncs_badstate++;
1512 SET(st->state_flags, PFSTATE_NOSYNC);
1513 pf_unlink_state(st);
1524 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1527 struct pfsync_del_c *sa, *sp;
1528 struct pf_state_cmp id_key;
1529 struct pf_state *st;
1530 int len = count * sizeof(*sp);
1534 mp = m_pulldown(m, offset, len, &offp);
1536 V_pfsyncstats.pfsyncs_badlen++;
1539 sa = (struct pfsync_del_c *)(mp->m_data + offp);
1545 for (i = 0; i < count; i++) {
1548 bcopy(&sp->id, &id_key.id, sizeof(id_key.id));
1549 id_key.creatorid = sp->creatorid;
1551 st = pf_find_state_byid(&id_key);
1553 V_pfsyncstats.pfsyncs_badstate++;
1557 SET(st->state_flags, PFSTATE_NOSYNC);
1558 pf_unlink_state(st);
1569 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1572 struct pfsync_softc *sc = V_pfsyncif;
1574 struct pfsync_softc *sc = pfsyncif;
1576 struct pfsync_bus *bus;
1578 int len = count * sizeof(*bus);
1581 /* If we're not waiting for a bulk update, who cares. */
1582 if (sc->sc_ureq_sent == 0)
1585 mp = m_pulldown(m, offset, len, &offp);
1587 V_pfsyncstats.pfsyncs_badlen++;
1590 bus = (struct pfsync_bus *)(mp->m_data + offp);
1592 switch (bus->status) {
1593 case PFSYNC_BUS_START:
1595 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail,
1598 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); /* XXX magic */
1601 pf_pool_limits[PF_LIMIT_STATES].limit /
1602 (PFSYNC_BULKPACKETS * sc->sc_maxcount));
1605 if (V_pf_status.debug >= PF_DEBUG_MISC)
1607 if (pf_status.debug >= PF_DEBUG_MISC)
1609 printf("pfsync: received bulk update start\n");
1612 case PFSYNC_BUS_END:
1613 if (time_uptime - ntohl(bus->endtime) >=
1615 /* that's it, we're happy */
1616 sc->sc_ureq_sent = 0;
1617 sc->sc_bulk_tries = 0;
1618 timeout_del(&sc->sc_bulkfail_tmo);
1622 if (!sc->pfsync_sync_ok)
1624 if (!pfsync_sync_ok)
1626 carp_group_demote_adj(&sc->sc_if, -1);
1630 sc->pfsync_sync_ok = 1;
1635 if (V_pf_status.debug >= PF_DEBUG_MISC)
1637 if (pf_status.debug >= PF_DEBUG_MISC)
1639 printf("pfsync: received valid "
1640 "bulk update end\n");
1643 if (V_pf_status.debug >= PF_DEBUG_MISC)
1645 if (pf_status.debug >= PF_DEBUG_MISC)
1647 printf("pfsync: received invalid "
1648 "bulk update end: bad timestamp\n");
1657 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1659 int len = count * sizeof(struct pfsync_tdb);
1662 struct pfsync_tdb *tp;
1668 mp = m_pulldown(m, offset, len, &offp);
1670 V_pfsyncstats.pfsyncs_badlen++;
1673 tp = (struct pfsync_tdb *)(mp->m_data + offp);
1679 for (i = 0; i < count; i++)
1680 pfsync_update_net_tdb(&tp[i]);
1691 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1693 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1698 /* check for invalid values */
1699 if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1700 (pt->dst.sa.sa_family != AF_INET &&
1701 pt->dst.sa.sa_family != AF_INET6))
1705 tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1707 pt->rpl = ntohl(pt->rpl);
1708 pt->cur_bytes = betoh64(pt->cur_bytes);
1710 /* Neither replay nor byte counter should ever decrease. */
1711 if (pt->rpl < tdb->tdb_rpl ||
1712 pt->cur_bytes < tdb->tdb_cur_bytes) {
1717 tdb->tdb_rpl = pt->rpl;
1718 tdb->tdb_cur_bytes = pt->cur_bytes;
1725 if (V_pf_status.debug >= PF_DEBUG_MISC)
1727 if (pf_status.debug >= PF_DEBUG_MISC)
1729 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1731 V_pfsyncstats.pfsyncs_badstate++;
1738 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1740 /* check if we are at the right place in the packet */
1741 if (offset != m->m_pkthdr.len - sizeof(struct pfsync_eof))
1742 V_pfsyncstats.pfsyncs_badact++;
1744 /* we're done. free and let the caller return */
1750 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1752 V_pfsyncstats.pfsyncs_badact++;
1759 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1772 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1775 struct proc *p = curproc;
1777 struct pfsync_softc *sc = ifp->if_softc;
1778 struct ifreq *ifr = (struct ifreq *)data;
1779 struct ip_moptions *imo = &sc->sc_imo;
1780 struct pfsyncreq pfsyncr;
1789 case SIOCSIFDSTADDR:
1793 if (ifp->if_flags & IFF_UP)
1794 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1796 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1798 if (ifp->if_flags & IFF_UP)
1799 ifp->if_flags |= IFF_RUNNING;
1801 ifp->if_flags &= ~IFF_RUNNING;
1805 if (ifr->ifr_mtu <= PFSYNC_MINPKT)
1807 if (ifr->ifr_mtu > MCLBYTES) /* XXX could be bigger */
1808 ifr->ifr_mtu = MCLBYTES;
1809 if (ifr->ifr_mtu < ifp->if_mtu) {
1820 ifp->if_mtu = ifr->ifr_mtu;
1823 bzero(&pfsyncr, sizeof(pfsyncr));
1824 if (sc->sc_sync_if) {
1825 strlcpy(pfsyncr.pfsyncr_syncdev,
1826 sc->sc_sync_if->if_xname, IFNAMSIZ);
1828 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1829 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1830 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1834 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1836 if ((error = suser(p, p->p_acflag)) != 0)
1839 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1845 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1847 sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
1849 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1852 sc->sc_sync_peer.s_addr =
1853 pfsyncr.pfsyncr_syncpeer.s_addr;
1855 if (pfsyncr.pfsyncr_maxupdates > 255)
1864 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1866 if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1867 sc->sc_sync_if = NULL;
1871 if (imo->imo_num_memberships > 0) {
1872 in_delmulti(imo->imo_membership[
1873 --imo->imo_num_memberships]);
1874 imo->imo_multicast_ifp = NULL;
1882 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
1890 if (sifp->if_mtu < sc->sc_ifp->if_mtu ||
1892 if (sifp->if_mtu < sc->sc_if.if_mtu ||
1894 (sc->sc_sync_if != NULL &&
1895 sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1896 sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1898 sc->sc_sync_if = sifp;
1900 if (imo->imo_num_memberships > 0) {
1904 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1908 imo->imo_multicast_ifp = NULL;
1911 if (sc->sc_sync_if &&
1913 sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
1915 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1917 struct in_addr addr;
1919 if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1920 sc->sc_sync_if = NULL;
1925 return (EADDRNOTAVAIL);
1929 addr.s_addr = htonl(INADDR_PFSYNC_GROUP);
1931 addr.s_addr = INADDR_PFSYNC_GROUP;
1937 if ((imo->imo_membership[0] =
1938 in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1939 sc->sc_sync_if = NULL;
1946 imo->imo_num_memberships++;
1947 imo->imo_multicast_ifp = sc->sc_sync_if;
1948 imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1949 imo->imo_multicast_loop = 0;
1952 ip = &sc->sc_template;
1953 bzero(ip, sizeof(*ip));
1954 ip->ip_v = IPVERSION;
1955 ip->ip_hl = sizeof(sc->sc_template) >> 2;
1956 ip->ip_tos = IPTOS_LOWDELAY;
1957 /* len and id are set later */
1958 ip->ip_off = htons(IP_DF);
1959 ip->ip_ttl = PFSYNC_DFLTTL;
1960 ip->ip_p = IPPROTO_PFSYNC;
1961 ip->ip_src.s_addr = INADDR_ANY;
1962 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1964 if (sc->sc_sync_if) {
1965 /* Request a full state table update. */
1966 sc->sc_ureq_sent = time_uptime;
1970 if (sc->pfsync_sync_ok)
1974 carp_group_demote_adj(&sc->sc_if, 1);
1978 sc->pfsync_sync_ok = 0;
1983 if (V_pf_status.debug >= PF_DEBUG_MISC)
1985 if (pf_status.debug >= PF_DEBUG_MISC)
1987 printf("pfsync: requesting bulk update\n");
1989 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
1990 pfsync_bulk_fail, V_pfsyncif);
1992 timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
1994 pfsync_request_update(0, 0);
2011 pfsync_out_state(struct pf_state *st, struct mbuf *m, int offset)
2013 struct pfsync_state *sp = (struct pfsync_state *)(m->m_data + offset);
2015 pfsync_state_export(sp, st);
2017 return (sizeof(*sp));
2021 pfsync_out_iack(struct pf_state *st, struct mbuf *m, int offset)
2023 struct pfsync_ins_ack *iack =
2024 (struct pfsync_ins_ack *)(m->m_data + offset);
2027 iack->creatorid = st->creatorid;
2029 return (sizeof(*iack));
2033 pfsync_out_upd_c(struct pf_state *st, struct mbuf *m, int offset)
2035 struct pfsync_upd_c *up = (struct pfsync_upd_c *)(m->m_data + offset);
2038 pf_state_peer_hton(&st->src, &up->src);
2039 pf_state_peer_hton(&st->dst, &up->dst);
2040 up->creatorid = st->creatorid;
2042 up->expire = pf_state_expires(st);
2043 if (up->expire <= time_second)
2044 up->expire = htonl(0);
2046 up->expire = htonl(up->expire - time_second);
2047 up->timeout = st->timeout;
2049 bzero(up->_pad, sizeof(up->_pad)); /* XXX */
2051 return (sizeof(*up));
2055 pfsync_out_del(struct pf_state *st, struct mbuf *m, int offset)
2057 struct pfsync_del_c *dp = (struct pfsync_del_c *)(m->m_data + offset);
2060 dp->creatorid = st->creatorid;
2062 SET(st->state_flags, PFSTATE_NOSYNC);
2064 return (sizeof(*dp));
2068 pfsync_drop(struct pfsync_softc *sc)
2070 struct pf_state *st;
2071 struct pfsync_upd_req_item *ur;
2077 for (q = 0; q < PFSYNC_S_COUNT; q++) {
2078 if (TAILQ_EMPTY(&sc->sc_qs[q]))
2081 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
2084 KASSERT(st->sync_state == q,
2085 ("%s: st->sync_state == q",
2088 KASSERT(st->sync_state == q);
2091 st->sync_state = PFSYNC_S_NONE;
2093 TAILQ_INIT(&sc->sc_qs[q]);
2096 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
2097 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
2098 pool_put(&sc->sc_pool, ur);
2104 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
2105 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
2106 CLR(t->tdb_flags, TDBF_PFSYNC);
2108 TAILQ_INIT(&sc->sc_tdb_q);
2112 sc->sc_len = PFSYNC_MINPKT;
2116 pfsync_sendout(void)
2119 struct pfsync_softc *sc = V_pfsyncif;
2121 struct pfsync_softc *sc = pfsyncif;
2125 struct ifnet *ifp = sc->sc_ifp;
2127 struct ifnet *ifp = &sc->sc_if;
2132 struct pfsync_header *ph;
2133 struct pfsync_subheader *subh;
2134 struct pf_state *st;
2135 struct pfsync_upd_req_item *ur;
2146 PF_ASSERT(MA_OWNED);
2151 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
2155 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
2157 if (sc->sc_sync_if == NULL) {
2163 MGETHDR(m, M_DONTWAIT, MT_DATA);
2166 sc->sc_ifp->if_oerrors++;
2168 sc->sc_if.if_oerrors++;
2170 V_pfsyncstats.pfsyncs_onomem++;
2176 pktlen = max_linkhdr + sc->sc_len;
2177 if (pktlen > MHLEN) {
2178 /* Find the right pool to allocate from. */
2179 /* XXX: This is ugly. */
2180 m_cljget(m, M_DONTWAIT, pktlen <= MSIZE ? MSIZE :
2181 pktlen <= MCLBYTES ? MCLBYTES :
2182 #if MJUMPAGESIZE != MCLBYTES
2183 pktlen <= MJUMPAGESIZE ? MJUMPAGESIZE :
2185 pktlen <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES);
2187 if (max_linkhdr + sc->sc_len > MHLEN) {
2188 MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
2190 if (!ISSET(m->m_flags, M_EXT)) {
2193 sc->sc_ifp->if_oerrors++;
2195 sc->sc_if.if_oerrors++;
2197 V_pfsyncstats.pfsyncs_onomem++;
2202 m->m_data += max_linkhdr;
2203 m->m_len = m->m_pkthdr.len = sc->sc_len;
2205 /* build the ip header */
2206 ip = (struct ip *)m->m_data;
2207 bcopy(&sc->sc_template, ip, sizeof(*ip));
2208 offset = sizeof(*ip);
2210 ip->ip_len = htons(m->m_pkthdr.len);
2211 ip->ip_id = htons(ip_randomid());
2213 /* build the pfsync header */
2214 ph = (struct pfsync_header *)(m->m_data + offset);
2215 bzero(ph, sizeof(*ph));
2216 offset += sizeof(*ph);
2218 ph->version = PFSYNC_VERSION;
2219 ph->len = htons(sc->sc_len - sizeof(*ip));
2221 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
2223 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
2226 /* walk the queues */
2227 for (q = 0; q < PFSYNC_S_COUNT; q++) {
2228 if (TAILQ_EMPTY(&sc->sc_qs[q]))
2231 subh = (struct pfsync_subheader *)(m->m_data + offset);
2232 offset += sizeof(*subh);
2235 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
2238 KASSERT(st->sync_state == q,
2239 ("%s: st->sync_state == q",
2242 KASSERT(st->sync_state == q);
2246 offset += pfsync_qs[q].write(st, m, offset);
2247 st->sync_state = PFSYNC_S_NONE;
2250 TAILQ_INIT(&sc->sc_qs[q]);
2252 bzero(subh, sizeof(*subh));
2253 subh->action = pfsync_qs[q].action;
2254 subh->count = htons(count);
2257 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
2258 subh = (struct pfsync_subheader *)(m->m_data + offset);
2259 offset += sizeof(*subh);
2262 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
2263 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
2265 bcopy(&ur->ur_msg, m->m_data + offset,
2266 sizeof(ur->ur_msg));
2267 offset += sizeof(ur->ur_msg);
2269 pool_put(&sc->sc_pool, ur);
2274 bzero(subh, sizeof(*subh));
2275 subh->action = PFSYNC_ACT_UPD_REQ;
2276 subh->count = htons(count);
2279 /* has someone built a custom region for us to add? */
2280 if (sc->sc_plus != NULL) {
2281 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
2282 offset += sc->sc_pluslen;
2288 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
2289 subh = (struct pfsync_subheader *)(m->m_data + offset);
2290 offset += sizeof(*subh);
2293 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
2294 offset += pfsync_out_tdb(t, m, offset);
2295 CLR(t->tdb_flags, TDBF_PFSYNC);
2299 TAILQ_INIT(&sc->sc_tdb_q);
2301 bzero(subh, sizeof(*subh));
2302 subh->action = PFSYNC_ACT_TDB;
2303 subh->count = htons(count);
2307 subh = (struct pfsync_subheader *)(m->m_data + offset);
2308 offset += sizeof(*subh);
2310 bzero(subh, sizeof(*subh));
2311 subh->action = PFSYNC_ACT_EOF;
2312 subh->count = htons(1);
2314 /* XXX write checksum in EOF here */
2316 /* we're done, let's put it on the wire */
2319 m->m_data += sizeof(*ip);
2320 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
2324 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
2326 m->m_data -= sizeof(*ip);
2327 m->m_len = m->m_pkthdr.len = sc->sc_len;
2330 if (sc->sc_sync_if == NULL) {
2331 sc->sc_len = PFSYNC_MINPKT;
2338 sc->sc_ifp->if_opackets++;
2339 sc->sc_ifp->if_obytes += m->m_pkthdr.len;
2341 sc->sc_if.if_opackets++;
2342 sc->sc_if.if_obytes += m->m_pkthdr.len;
2348 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0)
2353 V_pfsyncstats.pfsyncs_opackets++;
2362 V_pfsyncstats.pfsyncs_oerrors++;
2368 sc->sc_len = PFSYNC_MINPKT;
2372 pfsync_insert_state(struct pf_state *st)
2375 struct pfsync_softc *sc = V_pfsyncif;
2377 struct pfsync_softc *sc = pfsyncif;
2381 PF_ASSERT(MA_OWNED);
2383 splassert(IPL_SOFTNET);
2386 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
2387 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
2388 SET(st->state_flags, PFSTATE_NOSYNC);
2392 if (sc == NULL || ISSET(st->state_flags, PFSTATE_NOSYNC))
2397 KASSERT(st->sync_state == PFSYNC_S_NONE,
2398 ("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__));
2400 KASSERT(st->sync_state == PFSYNC_S_NONE);
2404 if (sc->sc_len == PFSYNC_MINPKT)
2406 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
2409 timeout_add_sec(&sc->sc_tmo, 1);
2412 pfsync_q_ins(st, PFSYNC_S_INS);
2414 if (ISSET(st->state_flags, PFSTATE_ACK))
2415 schednetisr(NETISR_PFSYNC);
2417 st->sync_updates = 0;
2423 pfsync_defer(struct pf_state *st, struct mbuf *m)
2426 struct pfsync_softc *sc = V_pfsyncif;
2428 struct pfsync_softc *sc = pfsyncif;
2430 struct pfsync_deferral *pd;
2433 PF_ASSERT(MA_OWNED);
2435 splassert(IPL_SOFTNET);
2438 if (sc->sc_deferred >= 128)
2439 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
2441 pd = pool_get(&sc->sc_pool, M_NOWAIT);
2447 m->m_flags |= M_SKIP_FIREWALL;
2449 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
2451 SET(st->state_flags, PFSTATE_ACK);
2456 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
2458 callout_init(&pd->pd_tmo, CALLOUT_MPSAFE);
2459 callout_reset(&pd->pd_tmo, defer, pfsync_defer_tmo,
2462 timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
2463 timeout_add(&pd->pd_tmo, defer);
2470 pfsync_undefer(struct pfsync_deferral *pd, int drop)
2473 struct pfsync_softc *sc = V_pfsyncif;
2475 struct pfsync_softc *sc = pfsyncif;
2480 PF_ASSERT(MA_OWNED);
2482 splassert(IPL_SOFTNET);
2485 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
2488 CLR(pd->pd_st->state_flags, PFSTATE_ACK);
2489 timeout_del(&pd->pd_tmo); /* bah */
2495 /* XXX: use pf_defered?! */
2498 ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0,
2499 (void *)NULL, (void *)NULL);
2506 pool_put(&sc->sc_pool, pd);
2510 pfsync_defer_tmo(void *arg)
2512 #if defined(__FreeBSD__) && defined(VIMAGE)
2513 struct pfsync_deferral *pd = arg;
2519 CURVNET_SET(pd->pd_m->m_pkthdr.rcvif->if_vnet); /* XXX */
2522 pfsync_undefer(arg, 0);
2531 pfsync_deferred(struct pf_state *st, int drop)
2534 struct pfsync_softc *sc = V_pfsyncif;
2536 struct pfsync_softc *sc = pfsyncif;
2538 struct pfsync_deferral *pd;
2540 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
2541 if (pd->pd_st == st) {
2542 pfsync_undefer(pd, drop);
2547 panic("pfsync_send_deferred: unable to find deferred state");
2550 u_int pfsync_upds = 0;
2553 pfsync_update_state(struct pf_state *st)
2556 struct pfsync_softc *sc = V_pfsyncif;
2558 struct pfsync_softc *sc = pfsyncif;
2563 PF_ASSERT(MA_OWNED);
2565 splassert(IPL_SOFTNET);
2571 if (ISSET(st->state_flags, PFSTATE_ACK))
2572 pfsync_deferred(st, 0);
2573 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2574 if (st->sync_state != PFSYNC_S_NONE)
2579 if (sc->sc_len == PFSYNC_MINPKT)
2581 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
2584 timeout_add_sec(&sc->sc_tmo, 1);
2587 switch (st->sync_state) {
2588 case PFSYNC_S_UPD_C:
2591 /* we're already handling it */
2594 if (st->sync_updates >= sc->sc_maxupdates)
2601 pfsync_q_ins(st, PFSYNC_S_UPD_C);
2602 st->sync_updates = 0;
2606 panic("pfsync_update_state: unexpected sync state %d",
2610 if (sync || (time_second - st->pfsync_time) < 2) {
2612 schednetisr(NETISR_PFSYNC);
2617 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
2620 struct pfsync_softc *sc = V_pfsyncif;
2622 struct pfsync_softc *sc = pfsyncif;
2624 struct pfsync_upd_req_item *item;
2625 size_t nlen = sizeof(struct pfsync_upd_req);
2629 * this code does nothing to prevent multiple update requests for the
2630 * same state being generated.
2633 item = pool_get(&sc->sc_pool, PR_NOWAIT);
2639 item->ur_msg.id = id;
2640 item->ur_msg.creatorid = creatorid;
2642 if (TAILQ_EMPTY(&sc->sc_upd_req_list))
2643 nlen += sizeof(struct pfsync_subheader);
2646 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
2648 if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2654 nlen = sizeof(struct pfsync_subheader) +
2655 sizeof(struct pfsync_upd_req);
2658 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
2661 schednetisr(NETISR_PFSYNC);
2665 pfsync_update_state_req(struct pf_state *st)
2668 struct pfsync_softc *sc = V_pfsyncif;
2670 struct pfsync_softc *sc = pfsyncif;
2674 panic("pfsync_update_state_req: nonexistant instance");
2676 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2677 if (st->sync_state != PFSYNC_S_NONE)
2682 switch (st->sync_state) {
2683 case PFSYNC_S_UPD_C:
2687 pfsync_q_ins(st, PFSYNC_S_UPD);
2688 schednetisr(NETISR_PFSYNC);
2694 /* we're already handling it */
2698 panic("pfsync_update_state_req: unexpected sync state %d",
2704 pfsync_delete_state(struct pf_state *st)
2707 struct pfsync_softc *sc = V_pfsyncif;
2709 struct pfsync_softc *sc = pfsyncif;
2713 PF_ASSERT(MA_OWNED);
2715 splassert(IPL_SOFTNET);
2721 if (ISSET(st->state_flags, PFSTATE_ACK))
2722 pfsync_deferred(st, 1);
2723 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2724 if (st->sync_state != PFSYNC_S_NONE)
2729 if (sc->sc_len == PFSYNC_MINPKT)
2731 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
2734 timeout_add_sec(&sc->sc_tmo, 1);
2737 switch (st->sync_state) {
2739 /* we never got to tell the world so just forget about it */
2743 case PFSYNC_S_UPD_C:
2747 /* FALLTHROUGH to putting it on the del list */
2750 pfsync_q_ins(st, PFSYNC_S_DEL);
2754 panic("pfsync_delete_state: unexpected sync state %d",
2760 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2763 struct pfsync_subheader subh;
2764 struct pfsync_clr clr;
2768 struct pfsync_softc *sc = V_pfsyncif;
2770 struct pfsync_softc *sc = pfsyncif;
2774 PF_ASSERT(MA_OWNED);
2776 splassert(IPL_SOFTNET);
2782 bzero(&r, sizeof(r));
2784 r.subh.action = PFSYNC_ACT_CLR;
2785 r.subh.count = htons(1);
2787 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2788 r.clr.creatorid = creatorid;
2790 pfsync_send_plus(&r, sizeof(r));
2794 pfsync_q_ins(struct pf_state *st, int q)
2797 struct pfsync_softc *sc = V_pfsyncif;
2799 struct pfsync_softc *sc = pfsyncif;
2801 size_t nlen = pfsync_qs[q].len;
2805 KASSERT(st->sync_state == PFSYNC_S_NONE,
2806 ("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__));
2808 KASSERT(st->sync_state == PFSYNC_S_NONE);
2811 #if 1 || defined(PFSYNC_DEBUG)
2812 if (sc->sc_len < PFSYNC_MINPKT)
2814 panic("pfsync pkt len is too low %zu", sc->sc_len);
2816 panic("pfsync pkt len is too low %d", sc->sc_len);
2819 if (TAILQ_EMPTY(&sc->sc_qs[q]))
2820 nlen += sizeof(struct pfsync_subheader);
2823 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
2825 if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2837 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2841 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2846 pfsync_q_del(struct pf_state *st)
2849 struct pfsync_softc *sc = V_pfsyncif;
2851 struct pfsync_softc *sc = pfsyncif;
2853 int q = st->sync_state;
2856 KASSERT(st->sync_state != PFSYNC_S_NONE,
2857 ("%s: st->sync_state != PFSYNC_S_NONE", __FUNCTION__));
2859 KASSERT(st->sync_state != PFSYNC_S_NONE);
2862 sc->sc_len -= pfsync_qs[q].len;
2863 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2864 st->sync_state = PFSYNC_S_NONE;
2866 if (TAILQ_EMPTY(&sc->sc_qs[q]))
2867 sc->sc_len -= sizeof(struct pfsync_subheader);
2872 pfsync_update_tdb(struct tdb *t, int output)
2875 struct pfsync_softc *sc = V_pfsyncif;
2877 struct pfsync_softc *sc = pfsyncif;
2879 size_t nlen = sizeof(struct pfsync_tdb);
2885 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2886 if (TAILQ_EMPTY(&sc->sc_tdb_q))
2887 nlen += sizeof(struct pfsync_subheader);
2889 if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2894 nlen = sizeof(struct pfsync_subheader) +
2895 sizeof(struct pfsync_tdb);
2899 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2900 SET(t->tdb_flags, TDBF_PFSYNC);
2903 if (++t->tdb_updates >= sc->sc_maxupdates)
2904 schednetisr(NETISR_PFSYNC);
2908 SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2910 CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2914 pfsync_delete_tdb(struct tdb *t)
2917 struct pfsync_softc *sc = V_pfsyncif;
2919 struct pfsync_softc *sc = pfsyncif;
2922 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2925 sc->sc_len -= sizeof(struct pfsync_tdb);
2926 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2927 CLR(t->tdb_flags, TDBF_PFSYNC);
2929 if (TAILQ_EMPTY(&sc->sc_tdb_q))
2930 sc->sc_len -= sizeof(struct pfsync_subheader);
2934 pfsync_out_tdb(struct tdb *t, struct mbuf *m, int offset)
2936 struct pfsync_tdb *ut = (struct pfsync_tdb *)(m->m_data + offset);
2938 bzero(ut, sizeof(*ut));
2939 ut->spi = t->tdb_spi;
2940 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2942 * When a failover happens, the master's rpl is probably above
2943 * what we see here (we may be up to a second late), so
2944 * increase it a bit for outbound tdbs to manage most such
2947 * For now, just add an offset that is likely to be larger
2948 * than the number of packets we can see in one second. The RFC
2949 * just says the next packet must have a higher seq value.
2951 * XXX What is a good algorithm for this? We could use
2952 * a rate-determined increase, but to know it, we would have
2953 * to extend struct tdb.
2954 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2955 * will soon be replaced anyway. For now, just don't handle
2958 #define RPL_INCR 16384
2959 ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2961 ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2962 ut->sproto = t->tdb_sproto;
2964 return (sizeof(*ut));
2969 pfsync_bulk_start(void)
2972 struct pfsync_softc *sc = V_pfsyncif;
2974 struct pfsync_softc *sc = pfsyncif;
2977 sc->sc_ureq_received = time_uptime;
2979 if (sc->sc_bulk_next == NULL)
2981 sc->sc_bulk_next = TAILQ_FIRST(&V_state_list);
2983 sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2985 sc->sc_bulk_last = sc->sc_bulk_next;
2988 if (V_pf_status.debug >= PF_DEBUG_MISC)
2990 if (pf_status.debug >= PF_DEBUG_MISC)
2992 printf("pfsync: received bulk update request\n");
2994 pfsync_bulk_status(PFSYNC_BUS_START);
2995 pfsync_bulk_update(sc);
2999 pfsync_bulk_update(void *arg)
3001 struct pfsync_softc *sc = arg;
3002 struct pf_state *st = sc->sc_bulk_next;
3008 CURVNET_SET(sc->sc_ifp->if_vnet);
3012 if (st->sync_state == PFSYNC_S_NONE &&
3013 st->timeout < PFTM_MAX &&
3014 st->pfsync_time <= sc->sc_ureq_received) {
3015 pfsync_update_state_req(st);
3019 st = TAILQ_NEXT(st, entry_list);
3022 st = TAILQ_FIRST(&V_state_list);
3024 st = TAILQ_FIRST(&state_list);
3027 if (i > 0 && TAILQ_EMPTY(&sc->sc_qs[PFSYNC_S_UPD])) {
3028 sc->sc_bulk_next = st;
3030 callout_reset(&sc->sc_bulk_tmo, 1,
3031 pfsync_bulk_fail, sc);
3033 timeout_add(&sc->sc_bulk_tmo, 1);
3037 } while (st != sc->sc_bulk_last);
3040 sc->sc_bulk_next = NULL;
3041 sc->sc_bulk_last = NULL;
3042 pfsync_bulk_status(PFSYNC_BUS_END);
3053 pfsync_bulk_status(u_int8_t status)
3056 struct pfsync_subheader subh;
3057 struct pfsync_bus bus;
3061 struct pfsync_softc *sc = V_pfsyncif;
3063 struct pfsync_softc *sc = pfsyncif;
3066 bzero(&r, sizeof(r));
3068 r.subh.action = PFSYNC_ACT_BUS;
3069 r.subh.count = htons(1);
3072 r.bus.creatorid = V_pf_status.hostid;
3074 r.bus.creatorid = pf_status.hostid;
3076 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
3077 r.bus.status = status;
3079 pfsync_send_plus(&r, sizeof(r));
3083 pfsync_bulk_fail(void *arg)
3085 struct pfsync_softc *sc = arg;
3088 CURVNET_SET(sc->sc_ifp->if_vnet);
3091 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
3094 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
3095 pfsync_bulk_fail, V_pfsyncif);
3097 timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
3099 pfsync_request_update(0, 0);
3101 /* Pretend like the transfer was ok */
3102 sc->sc_ureq_sent = 0;
3103 sc->sc_bulk_tries = 0;
3107 if (!sc->pfsync_sync_ok)
3109 if (!pfsync_sync_ok)
3111 carp_group_demote_adj(&sc->sc_if, -1);
3115 sc->pfsync_sync_ok = 1;
3120 if (V_pf_status.debug >= PF_DEBUG_MISC)
3122 if (pf_status.debug >= PF_DEBUG_MISC)
3124 printf("pfsync: failed to receive bulk update\n");
3133 pfsync_send_plus(void *plus, size_t pluslen)
3136 struct pfsync_softc *sc = V_pfsyncif;
3138 struct pfsync_softc *sc = pfsyncif;
3143 if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) {
3145 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) {
3159 sc->sc_len += (sc->sc_pluslen = pluslen);
3176 struct pfsync_softc *sc = V_pfsyncif;
3178 struct pfsync_softc *sc = pfsyncif;
3182 if (sc == NULL || !ISSET(sc->sc_ifp->if_flags, IFF_DRV_RUNNING))
3184 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
3192 pfsync_state_in_use(struct pf_state *st)
3195 struct pfsync_softc *sc = V_pfsyncif;
3197 struct pfsync_softc *sc = pfsyncif;
3203 if (st->sync_state != PFSYNC_S_NONE)
3206 if (sc->sc_bulk_next == NULL && sc->sc_bulk_last == NULL)
3216 pfsync_timeout(void *arg)
3218 #if defined(__FreeBSD__) && defined(VIMAGE)
3219 struct pfsync_softc *sc = arg;
3224 CURVNET_SET(sc->sc_ifp->if_vnet);
3244 /* this is a softnet/netisr handler */
3247 pfsyncintr(void *arg)
3253 struct pfsync_softc *sc = arg;
3261 CURVNET_SET(sc->sc_ifp->if_vnet);
3281 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
3286 /* All sysctl names at this level are terminal. */
3291 case PFSYNCCTL_STATS:
3294 return (sysctl_struct(oldp, oldlenp, newp, newlen,
3295 &V_pfsyncstats, sizeof(V_pfsyncstats)));
3298 return (ENOPROTOOPT);
3303 pfsync_ifdetach(void *arg, struct ifnet *ifp)
3305 struct pfsync_softc *sc = (struct pfsync_softc *)arg;
3306 struct ip_moptions *imo;
3308 if (sc == NULL || sc->sc_sync_if != ifp)
3309 return; /* not for us; unlocked read */
3311 CURVNET_SET(sc->sc_ifp->if_vnet);
3315 /* Deal with a member interface going away from under us. */
3316 sc->sc_sync_if = NULL;
3318 if (imo->imo_num_memberships > 0) {
3319 KASSERT(imo->imo_num_memberships == 1,
3320 ("%s: imo_num_memberships != 1", __func__));
3322 * Our event handler is always called after protocol
3323 * domains have been detached from the underlying ifnet.
3324 * Do not call in_delmulti(); we held a single reference
3325 * which the protocol domain has purged in in_purgemaddrs().
3328 imo->imo_membership[--imo->imo_num_memberships] = NULL;
3330 imo->imo_multicast_ifp = NULL;
3339 vnet_pfsync_init(const void *unused)
3345 error = swi_add(NULL, "pfsync", pfsyncintr, V_pfsyncif,
3346 SWI_NET, INTR_MPSAFE, &pfsync_swi.pfsync_swi_cookie);
3348 panic("%s: swi_add %d", __func__, error);
3350 pfsync_state_import_ptr = pfsync_state_import;
3351 pfsync_up_ptr = pfsync_up;
3352 pfsync_insert_state_ptr = pfsync_insert_state;
3353 pfsync_update_state_ptr = pfsync_update_state;
3354 pfsync_delete_state_ptr = pfsync_delete_state;
3355 pfsync_clear_states_ptr = pfsync_clear_states;
3356 pfsync_state_in_use_ptr = pfsync_state_in_use;
3357 pfsync_defer_ptr = pfsync_defer;
3363 vnet_pfsync_uninit(const void *unused)
3366 swi_remove(pfsync_swi.pfsync_swi_cookie);
3368 pfsync_state_import_ptr = NULL;
3369 pfsync_up_ptr = NULL;
3370 pfsync_insert_state_ptr = NULL;
3371 pfsync_update_state_ptr = NULL;
3372 pfsync_delete_state_ptr = NULL;
3373 pfsync_clear_states_ptr = NULL;
3374 pfsync_state_in_use_ptr = NULL;
3375 pfsync_defer_ptr = NULL;
3377 if_clone_detach(&pfsync_cloner);
3382 /* Define startup order. */
3383 #define PFSYNC_SYSINIT_ORDER SI_SUB_PROTO_BEGIN
3384 #define PFSYNC_MODEVENT_ORDER (SI_ORDER_FIRST) /* On boot slot in here. */
3385 #define PFSYNC_VNET_ORDER (PFSYNC_MODEVENT_ORDER + 2) /* Later still. */
3389 * VNET_SYSINIT is called for each existing vnet and each new vnet.
3391 VNET_SYSINIT(vnet_pfsync_init, PFSYNC_SYSINIT_ORDER, PFSYNC_VNET_ORDER,
3392 vnet_pfsync_init, NULL);
3395 * Closing up shop. These are done in REVERSE ORDER,
3396 * Not called on reboot.
3397 * VNET_SYSUNINIT is called for each exiting vnet as it exits.
3399 VNET_SYSUNINIT(vnet_pfsync_uninit, PFSYNC_SYSINIT_ORDER, PFSYNC_VNET_ORDER,
3400 vnet_pfsync_uninit, NULL);
3402 pfsync_modevent(module_t mod, int type, void *data)
3414 if_clone_detach(&pfsync_cloner);
3425 static moduledata_t pfsync_mod = {
3431 #define PFSYNC_MODVER 1
3433 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
3434 MODULE_VERSION(pfsync, PFSYNC_MODVER);
3435 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
3436 #endif /* __FreeBSD__ */