1 /* $OpenBSD: if_pfsync.c,v 1.73 2006/11/16 13:13:38 henning Exp $ */
4 * Copyright (c) 2002 Michael Shalayeff
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
31 #include "opt_inet6.h"
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
40 #define NBPFILTER DEV_BPF
46 #define NPFSYNC DEV_PFSYNC
52 #define NCARP DEV_CARP
56 #endif /* __FreeBSD__ */
58 #include <sys/param.h>
63 #include <sys/systm.h>
66 #include <sys/socket.h>
68 #include <sys/endian.h>
69 #include <sys/malloc.h>
70 #include <sys/module.h>
71 #include <sys/sockio.h>
72 #include <sys/taskqueue.h>
74 #include <sys/mutex.h>
75 #include <sys/sysctl.h>
77 #include <sys/ioctl.h>
78 #include <sys/timeout.h>
80 #include <sys/kernel.h>
84 #include <net/if_clone.h>
86 #include <net/if_types.h>
87 #include <net/route.h>
89 #include <netinet/in.h>
90 #include <netinet/if_ether.h>
91 #include <netinet/tcp.h>
92 #include <netinet/tcp_seq.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/in_var.h>
97 #include <netinet/ip.h>
98 #include <netinet/ip_var.h>
102 #include <netinet6/nd6.h>
109 #include <netinet/ip_carp.h>
112 #include <net/pfvar.h>
113 #include <net/if_pfsync.h>
116 #include "bpfilter.h"
120 #define PFSYNC_MINMTU \
121 (sizeof(struct pfsync_header) + sizeof(struct pf_state))
124 #define DPRINTF(x) do { if (pfsyncdebug) printf x ; } while (0)
130 struct pfsync_softc *pfsyncif = NULL;
131 struct pfsyncstats pfsyncstats;
133 SYSCTL_DECL(_net_inet_pfsync);
134 SYSCTL_STRUCT(_net_inet_pfsync, 0, stats, CTLFLAG_RW,
135 &pfsyncstats, pfsyncstats,
136 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
139 void pfsyncattach(int);
141 int pfsync_clone_create(struct if_clone *, int, caddr_t);
142 void pfsync_clone_destroy(struct ifnet *);
144 int pfsync_clone_create(struct if_clone *, int);
145 int pfsync_clone_destroy(struct ifnet *);
147 void pfsync_setmtu(struct pfsync_softc *, int);
148 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
149 struct pf_state_peer *);
150 int pfsync_insert_net_state(struct pfsync_state *, u_int8_t);
152 void pfsync_update_net_tdb(struct pfsync_tdb *);
154 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
156 int pfsyncioctl(struct ifnet *, u_long, caddr_t);
157 void pfsyncstart(struct ifnet *);
159 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
160 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
161 int pfsync_sendout(struct pfsync_softc *);
163 int pfsync_tdb_sendout(struct pfsync_softc *);
165 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *);
166 void pfsync_timeout(void *);
168 void pfsync_tdb_timeout(void *);
170 void pfsync_send_bus(struct pfsync_softc *, u_int8_t);
171 void pfsync_bulk_update(void *);
172 void pfsync_bulkfail(void *);
175 void pfsync_ifdetach(void *, struct ifnet *);
176 void pfsync_senddef(void *, int);
179 #define betoh64 (unsigned long long)be64toh
180 #define timeout_del callout_stop
185 extern int ifqmaxlen;
189 IFC_SIMPLE_DECLARE(pfsync, 1);
191 struct if_clone pfsync_cloner =
192 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
196 pfsyncattach(int npfsync)
198 if_clone_attach(&pfsync_cloner);
203 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
205 pfsync_clone_create(struct if_clone *ifc, int unit)
214 if ((pfsyncif = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT)) == NULL)
216 bzero(pfsyncif, sizeof(*pfsyncif));
218 if ((pfsyncif->sc_imo.imo_membership = (struct in_multi **)malloc(
219 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_DEVBUF,
220 M_NOWAIT)) == NULL) {
221 free(pfsyncif, M_DEVBUF);
224 pfsyncif->sc_imo.imo_mfilters = NULL;
225 pfsyncif->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
226 pfsyncif->sc_imo.imo_multicast_vif = -1;
228 ifp = pfsyncif->sc_ifp = if_alloc(IFT_PFSYNC);
230 free(pfsyncif->sc_imo.imo_membership, M_DEVBUF);
231 free(pfsyncif, M_DEVBUF);
234 if_initname(ifp, ifc->ifc_name, unit);
236 pfsyncif->sc_detachtag = EVENTHANDLER_REGISTER(ifnet_departure_event,
237 pfsync_ifdetach, pfsyncif, EVENTHANDLER_PRI_ANY);
238 if (pfsyncif->sc_detachtag == NULL) {
240 free(pfsyncif->sc_imo.imo_membership, M_DEVBUF);
241 free(pfsyncif, M_DEVBUF);
245 pfsyncif->sc_ifq.ifq_maxlen = ifqmaxlen;
246 mtx_init(&pfsyncif->sc_ifq.ifq_mtx, ifp->if_xname,
247 "pfsync send queue", MTX_DEF);
248 TASK_INIT(&pfsyncif->sc_send_task, 0, pfsync_senddef, pfsyncif);
250 pfsyncif->sc_mbuf = NULL;
251 pfsyncif->sc_mbuf_net = NULL;
253 pfsyncif->sc_mbuf_tdb = NULL;
255 pfsyncif->sc_statep.s = NULL;
256 pfsyncif->sc_statep_net.s = NULL;
258 pfsyncif->sc_statep_tdb.t = NULL;
260 pfsyncif->sc_maxupdates = 128;
262 pfsyncif->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
263 pfsyncif->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
265 pfsyncif->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
266 pfsyncif->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP;
268 pfsyncif->sc_ureq_received = 0;
269 pfsyncif->sc_ureq_sent = 0;
270 pfsyncif->sc_bulk_send_next = NULL;
271 pfsyncif->sc_bulk_terminator = NULL;
273 ifp = &pfsyncif->sc_if;
274 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
276 ifp->if_softc = pfsyncif;
277 ifp->if_ioctl = pfsyncioctl;
278 ifp->if_output = pfsyncoutput;
279 ifp->if_start = pfsyncstart;
280 ifp->if_type = IFT_PFSYNC;
281 ifp->if_snd.ifq_maxlen = ifqmaxlen;
282 ifp->if_hdrlen = PFSYNC_HDRLEN;
283 pfsync_setmtu(pfsyncif, ETHERMTU);
285 callout_init(&pfsyncif->sc_tmo, CALLOUT_MPSAFE);
287 callout_init(&pfsyncif->sc_tdb_tmo, CALLOUT_MPSAFE);
289 callout_init(&pfsyncif->sc_bulk_tmo, CALLOUT_MPSAFE);
290 callout_init(&pfsyncif->sc_bulkfail_tmo, CALLOUT_MPSAFE);
292 timeout_set(&pfsyncif->sc_tmo, pfsync_timeout, pfsyncif);
293 timeout_set(&pfsyncif->sc_tdb_tmo, pfsync_tdb_timeout, pfsyncif);
294 timeout_set(&pfsyncif->sc_bulk_tmo, pfsync_bulk_update, pfsyncif);
295 timeout_set(&pfsyncif->sc_bulkfail_tmo, pfsync_bulkfail, pfsyncif);
303 if_addgroup(ifp, "carp");
308 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
310 bpfattach(&pfsyncif->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
322 pfsync_clone_destroy(struct ifnet *ifp)
325 EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfsyncif->sc_detachtag);
326 callout_stop(&pfsyncif->sc_tmo);
328 callout_stop(&pfsyncif->sc_tdb_tmo);
330 callout_stop(&pfsyncif->sc_bulk_tmo);
331 callout_stop(&pfsyncif->sc_bulkfail_tmo);
341 free(pfsyncif->sc_imo.imo_membership, M_DEVBUF);
343 free(pfsyncif, M_DEVBUF);
351 * Start output on the pfsync interface.
354 pfsyncstart(struct ifnet *ifp)
363 IF_LOCK(&ifp->if_snd);
364 _IF_DROP(&ifp->if_snd);
365 _IF_DEQUEUE(&ifp->if_snd, m);
366 IF_UNLOCK(&ifp->if_snd);
369 IF_DROP(&ifp->if_snd);
370 IF_DEQUEUE(&ifp->if_snd, m);
382 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
383 struct pf_state_peer *d)
385 if (s->scrub.scrub_flag && d->scrub == NULL) {
386 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
387 if (d->scrub == NULL)
389 bzero(d->scrub, sizeof(*d->scrub));
396 pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag)
398 struct pf_state *st = NULL;
399 struct pf_rule *r = NULL;
402 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
403 printf("pfsync_insert_net_state: invalid creator id:"
404 " %08x\n", ntohl(sp->creatorid));
408 kif = pfi_kif_get(sp->ifname);
410 if (pf_status.debug >= PF_DEBUG_MISC)
411 printf("pfsync_insert_net_state: "
412 "unknown interface: %s\n", sp->ifname);
413 /* skip this state */
418 * If the ruleset checksums match, it's safe to associate the state
419 * with the rule of that number.
421 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag)
422 r = pf_main_ruleset.rules[
423 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
425 r = &pf_default_rule;
427 if (!r->max_states || r->states < r->max_states)
428 st = pool_get(&pf_state_pl, PR_NOWAIT);
430 pfi_kif_unref(kif, PFI_KIF_REF_NONE);
433 bzero(st, sizeof(*st));
435 /* allocate memory for scrub info */
436 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
437 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) {
438 pfi_kif_unref(kif, PFI_KIF_REF_NONE);
440 pool_put(&pf_state_scrub_pl, st->src.scrub);
441 pool_put(&pf_state_pl, st);
446 /* XXX get pointers to nat_rule and anchor */
448 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
451 /* fill in the rest of the state entry */
452 pf_state_host_ntoh(&sp->lan, &st->lan);
453 pf_state_host_ntoh(&sp->gwy, &st->gwy);
454 pf_state_host_ntoh(&sp->ext, &st->ext);
456 pf_state_peer_ntoh(&sp->src, &st->src);
457 pf_state_peer_ntoh(&sp->dst, &st->dst);
459 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
460 st->creation = time_second - ntohl(sp->creation);
461 st->expire = ntohl(sp->expire) + time_second;
464 st->proto = sp->proto;
465 st->direction = sp->direction;
467 st->timeout = sp->timeout;
468 st->allow_opts = sp->allow_opts;
470 bcopy(sp->id, &st->id, sizeof(st->id));
471 st->creatorid = sp->creatorid;
472 st->sync_flags = PFSTATE_FROMSYNC;
474 if (pf_insert_state(kif, st)) {
475 pfi_kif_unref(kif, PFI_KIF_REF_NONE);
476 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
479 pool_put(&pf_state_scrub_pl, st->dst.scrub);
481 pool_put(&pf_state_scrub_pl, st->src.scrub);
482 pool_put(&pf_state_pl, st);
491 pfsync_input(struct mbuf *m, __unused int off)
493 pfsync_input(struct mbuf *m, ...)
496 struct ip *ip = mtod(m, struct ip *);
497 struct pfsync_header *ph;
498 struct pfsync_softc *sc = pfsyncif;
500 struct pf_state_cmp key;
501 struct pfsync_state *sp;
502 struct pfsync_state_upd *up;
503 struct pfsync_state_del *dp;
504 struct pfsync_state_clr *cp;
505 struct pfsync_state_upd_req *rup;
506 struct pfsync_state_bus *bus;
508 struct pfsync_tdb *pt;
512 int iplen, action, error, i, s, count, offp, sfail, stale = 0;
513 u_int8_t chksum_flag = 0;
515 pfsyncstats.pfsyncs_ipackets++;
517 /* verify that we have a sync interface configured */
518 if (!sc || !sc->sc_sync_ifp || !pf_status.running)
521 /* verify that the packet came in on the right interface */
522 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) {
523 pfsyncstats.pfsyncs_badif++;
527 /* verify that the IP TTL is 255. */
528 if (ip->ip_ttl != PFSYNC_DFLTTL) {
529 pfsyncstats.pfsyncs_badttl++;
533 iplen = ip->ip_hl << 2;
535 if (m->m_pkthdr.len < iplen + sizeof(*ph)) {
536 pfsyncstats.pfsyncs_hdrops++;
540 if (iplen + sizeof(*ph) > m->m_len) {
541 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) {
542 pfsyncstats.pfsyncs_hdrops++;
545 ip = mtod(m, struct ip *);
547 ph = (struct pfsync_header *)((char *)ip + iplen);
549 /* verify the version */
550 if (ph->version != PFSYNC_VERSION) {
551 pfsyncstats.pfsyncs_badver++;
558 /* make sure it's a valid action code */
559 if (action >= PFSYNC_ACT_MAX) {
560 pfsyncstats.pfsyncs_badact++;
564 /* Cheaper to grab this now than having to mess with mbufs later */
567 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
571 case PFSYNC_ACT_CLR: {
572 struct pf_state *nexts;
575 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
576 sizeof(*cp), &offp)) == NULL) {
577 pfsyncstats.pfsyncs_badlen++;
580 cp = (struct pfsync_state_clr *)(mp->m_data + offp);
581 creatorid = cp->creatorid;
587 if (cp->ifname[0] == '\0') {
588 for (st = RB_MIN(pf_state_tree_id, &tree_id);
590 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
591 if (st->creatorid == creatorid) {
592 st->sync_flags |= PFSTATE_FROMSYNC;
597 if ((kif = pfi_kif_get(cp->ifname)) == NULL) {
604 for (st = RB_MIN(pf_state_tree_lan_ext,
605 &kif->pfik_lan_ext); st; st = nexts) {
606 nexts = RB_NEXT(pf_state_tree_lan_ext,
607 &kif->pfik_lan_ext, st);
608 if (st->creatorid == creatorid) {
609 st->sync_flags |= PFSTATE_FROMSYNC;
622 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
623 count * sizeof(*sp), &offp)) == NULL) {
624 pfsyncstats.pfsyncs_badlen++;
632 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
633 i < count; i++, sp++) {
634 /* check for invalid values */
635 if (sp->timeout >= PFTM_MAX ||
636 sp->src.state > PF_TCPS_PROXY_DST ||
637 sp->dst.state > PF_TCPS_PROXY_DST ||
638 sp->direction > PF_OUT ||
639 (sp->af != AF_INET && sp->af != AF_INET6)) {
640 if (pf_status.debug >= PF_DEBUG_MISC)
641 printf("pfsync_insert: PFSYNC_ACT_INS: "
643 pfsyncstats.pfsyncs_badstate++;
647 if ((error = pfsync_insert_net_state(sp,
649 if (error == ENOMEM) {
665 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
666 count * sizeof(*sp), &offp)) == NULL) {
667 pfsyncstats.pfsyncs_badlen++;
675 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
676 i < count; i++, sp++) {
677 int flags = PFSYNC_FLAG_STALE;
679 /* check for invalid values */
680 if (sp->timeout >= PFTM_MAX ||
681 sp->src.state > PF_TCPS_PROXY_DST ||
682 sp->dst.state > PF_TCPS_PROXY_DST) {
683 if (pf_status.debug >= PF_DEBUG_MISC)
684 printf("pfsync_insert: PFSYNC_ACT_UPD: "
686 pfsyncstats.pfsyncs_badstate++;
690 bcopy(sp->id, &key.id, sizeof(key.id));
691 key.creatorid = sp->creatorid;
693 st = pf_find_state_byid(&key);
695 /* insert the update */
696 if (pfsync_insert_net_state(sp, chksum_flag))
697 pfsyncstats.pfsyncs_badstate++;
701 if (st->proto == IPPROTO_TCP) {
703 * The state should never go backwards except
704 * for syn-proxy states. Neither should the
705 * sequence window slide backwards.
707 if (st->src.state > sp->src.state &&
708 (st->src.state < PF_TCPS_PROXY_SRC ||
709 sp->src.state >= PF_TCPS_PROXY_SRC))
711 else if (SEQ_GT(st->src.seqlo,
712 ntohl(sp->src.seqlo)))
714 else if (st->dst.state > sp->dst.state) {
715 /* There might still be useful
716 * information about the src state here,
717 * so import that part of the update,
718 * then "fail" so we send the updated
719 * state back to the peer who is missing
720 * our what we know. */
721 pf_state_peer_ntoh(&sp->src, &st->src);
722 /* XXX do anything with timeouts? */
725 } else if (st->dst.state >= TCPS_SYN_SENT &&
726 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
730 * Non-TCP protocol state machine always go
733 if (st->src.state > sp->src.state)
735 else if (st->dst.state > sp->dst.state)
739 if (pf_status.debug >= PF_DEBUG_MISC)
740 printf("pfsync: %s stale update "
743 (sfail < 7 ? "ignoring"
746 ntohl(st->creatorid));
747 pfsyncstats.pfsyncs_badstate++;
749 if (!(sp->sync_flags & PFSTATE_STALE)) {
750 /* we have a better state, send it */
751 if (sc->sc_mbuf != NULL && !stale)
756 PFSYNC_ACT_UPD, st, flags);
760 pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
761 pf_state_peer_ntoh(&sp->src, &st->src);
762 pf_state_peer_ntoh(&sp->dst, &st->dst);
763 st->expire = ntohl(sp->expire) + time_second;
764 st->timeout = sp->timeout;
766 if (stale && sc->sc_mbuf != NULL)
774 * It's not strictly necessary for us to support the "uncompressed"
775 * delete action, but it's relatively simple and maintains consistency.
778 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
779 count * sizeof(*sp), &offp)) == NULL) {
780 pfsyncstats.pfsyncs_badlen++;
788 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
789 i < count; i++, sp++) {
790 bcopy(sp->id, &key.id, sizeof(key.id));
791 key.creatorid = sp->creatorid;
793 st = pf_find_state_byid(&key);
795 pfsyncstats.pfsyncs_badstate++;
798 st->sync_flags |= PFSTATE_FROMSYNC;
806 case PFSYNC_ACT_UPD_C: {
807 int update_requested = 0;
809 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
810 count * sizeof(*up), &offp)) == NULL) {
811 pfsyncstats.pfsyncs_badlen++;
819 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
820 i < count; i++, up++) {
821 /* check for invalid values */
822 if (up->timeout >= PFTM_MAX ||
823 up->src.state > PF_TCPS_PROXY_DST ||
824 up->dst.state > PF_TCPS_PROXY_DST) {
825 if (pf_status.debug >= PF_DEBUG_MISC)
826 printf("pfsync_insert: "
829 pfsyncstats.pfsyncs_badstate++;
833 bcopy(up->id, &key.id, sizeof(key.id));
834 key.creatorid = up->creatorid;
836 st = pf_find_state_byid(&key);
838 /* We don't have this state. Ask for it. */
839 error = pfsync_request_update(up, &src);
840 if (error == ENOMEM) {
847 update_requested = 1;
848 pfsyncstats.pfsyncs_badstate++;
852 if (st->proto == IPPROTO_TCP) {
854 * The state should never go backwards except
855 * for syn-proxy states. Neither should the
856 * sequence window slide backwards.
858 if (st->src.state > up->src.state &&
859 (st->src.state < PF_TCPS_PROXY_SRC ||
860 up->src.state >= PF_TCPS_PROXY_SRC))
862 else if (st->dst.state > up->dst.state)
864 else if (SEQ_GT(st->src.seqlo,
865 ntohl(up->src.seqlo)))
867 else if (st->dst.state >= TCPS_SYN_SENT &&
868 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
872 * Non-TCP protocol state machine always go
875 if (st->src.state > up->src.state)
877 else if (st->dst.state > up->dst.state)
881 if (pf_status.debug >= PF_DEBUG_MISC)
882 printf("pfsync: ignoring stale update "
884 "creatorid: %08x\n", sfail,
886 ntohl(st->creatorid));
887 pfsyncstats.pfsyncs_badstate++;
889 /* we have a better state, send it out */
890 if ((!stale || update_requested) &&
891 sc->sc_mbuf != NULL) {
893 update_requested = 0;
897 pfsync_pack_state(PFSYNC_ACT_UPD, st,
901 pfsync_alloc_scrub_memory(&up->dst, &st->dst);
902 pf_state_peer_ntoh(&up->src, &st->src);
903 pf_state_peer_ntoh(&up->dst, &st->dst);
904 st->expire = ntohl(up->expire) + time_second;
905 st->timeout = up->timeout;
907 if ((update_requested || stale) && sc->sc_mbuf)
915 case PFSYNC_ACT_DEL_C:
916 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
917 count * sizeof(*dp), &offp)) == NULL) {
918 pfsyncstats.pfsyncs_badlen++;
926 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
927 i < count; i++, dp++) {
928 bcopy(dp->id, &key.id, sizeof(key.id));
929 key.creatorid = dp->creatorid;
931 st = pf_find_state_byid(&key);
933 pfsyncstats.pfsyncs_badstate++;
936 st->sync_flags |= PFSTATE_FROMSYNC;
944 case PFSYNC_ACT_INS_F:
945 case PFSYNC_ACT_DEL_F:
946 /* not implemented */
948 case PFSYNC_ACT_UREQ:
949 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
950 count * sizeof(*rup), &offp)) == NULL) {
951 pfsyncstats.pfsyncs_badlen++;
959 if (sc->sc_mbuf != NULL)
962 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp);
963 i < count; i++, rup++) {
964 bcopy(rup->id, &key.id, sizeof(key.id));
965 key.creatorid = rup->creatorid;
967 if (key.id == 0 && key.creatorid == 0) {
968 sc->sc_ureq_received = time_uptime;
969 if (sc->sc_bulk_send_next == NULL)
970 sc->sc_bulk_send_next =
971 TAILQ_FIRST(&state_list);
972 sc->sc_bulk_terminator = sc->sc_bulk_send_next;
973 if (pf_status.debug >= PF_DEBUG_MISC)
974 printf("pfsync: received "
975 "bulk update request\n");
976 pfsync_send_bus(sc, PFSYNC_BUS_START);
978 callout_reset(&sc->sc_bulk_tmo, 1 * hz,
979 pfsync_bulk_update, pfsyncif);
981 timeout_add(&sc->sc_bulk_tmo, 1 * hz);
984 st = pf_find_state_byid(&key);
986 pfsyncstats.pfsyncs_badstate++;
990 pfsync_pack_state(PFSYNC_ACT_UPD,
994 if (sc->sc_mbuf != NULL)
1001 case PFSYNC_ACT_BUS:
1002 /* If we're not waiting for a bulk update, who cares. */
1003 if (sc->sc_ureq_sent == 0)
1006 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
1007 sizeof(*bus), &offp)) == NULL) {
1008 pfsyncstats.pfsyncs_badlen++;
1011 bus = (struct pfsync_state_bus *)(mp->m_data + offp);
1012 switch (bus->status) {
1013 case PFSYNC_BUS_START:
1015 callout_reset(&sc->sc_bulkfail_tmo,
1016 pf_pool_limits[PF_LIMIT_STATES].limit /
1017 (PFSYNC_BULKPACKETS * sc->sc_maxcount),
1018 pfsync_bulkfail, pfsyncif);
1020 timeout_add(&sc->sc_bulkfail_tmo,
1021 pf_pool_limits[PF_LIMIT_STATES].limit /
1022 (PFSYNC_BULKPACKETS * sc->sc_maxcount));
1024 if (pf_status.debug >= PF_DEBUG_MISC)
1025 printf("pfsync: received bulk "
1028 case PFSYNC_BUS_END:
1029 if (time_uptime - ntohl(bus->endtime) >=
1031 /* that's it, we're happy */
1032 sc->sc_ureq_sent = 0;
1033 sc->sc_bulk_tries = 0;
1034 timeout_del(&sc->sc_bulkfail_tmo);
1036 if (!pfsync_sync_ok)
1038 #ifdef CARP_ADVANCED
1039 carp_group_demote_adj(sc->sc_ifp, -1);
1042 carp_group_demote_adj(&sc->sc_if, -1);
1046 if (pf_status.debug >= PF_DEBUG_MISC)
1047 printf("pfsync: received valid "
1048 "bulk update end\n");
1050 if (pf_status.debug >= PF_DEBUG_MISC)
1051 printf("pfsync: received invalid "
1052 "bulk update end: bad timestamp\n");
1058 case PFSYNC_ACT_TDB_UPD:
1059 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
1060 count * sizeof(*pt), &offp)) == NULL) {
1061 pfsyncstats.pfsyncs_badlen++;
1068 for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp);
1069 i < count; i++, pt++)
1070 pfsync_update_net_tdb(pt);
1085 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1094 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1097 struct proc *p = curproc;
1099 struct pfsync_softc *sc = ifp->if_softc;
1100 struct ifreq *ifr = (struct ifreq *)data;
1101 struct ip_moptions *imo = &sc->sc_imo;
1102 struct pfsyncreq pfsyncr;
1109 case SIOCSIFDSTADDR:
1112 if (ifp->if_flags & IFF_UP)
1113 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1115 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1117 if (ifp->if_flags & IFF_UP)
1118 ifp->if_flags |= IFF_RUNNING;
1120 ifp->if_flags &= ~IFF_RUNNING;
1124 if (ifr->ifr_mtu < PFSYNC_MINMTU)
1126 if (ifr->ifr_mtu > MCLBYTES)
1127 ifr->ifr_mtu = MCLBYTES;
1132 if (ifr->ifr_mtu < ifp->if_mtu)
1134 pfsync_setmtu(sc, ifr->ifr_mtu);
1141 bzero(&pfsyncr, sizeof(pfsyncr));
1142 if (sc->sc_sync_ifp)
1143 strlcpy(pfsyncr.pfsyncr_syncdev,
1144 sc->sc_sync_ifp->if_xname, IFNAMSIZ);
1145 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1146 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1147 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
1152 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1154 if ((error = suser(p, p->p_acflag)) != 0)
1157 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1163 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1165 sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
1167 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1170 sc->sc_sync_peer.s_addr =
1171 pfsyncr.pfsyncr_syncpeer.s_addr;
1173 if (pfsyncr.pfsyncr_maxupdates > 255)
1182 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1184 if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1185 sc->sc_sync_ifp = NULL;
1186 if (sc->sc_mbuf_net != NULL) {
1187 /* Don't keep stale pfsync packets around. */
1189 m_freem(sc->sc_mbuf_net);
1190 sc->sc_mbuf_net = NULL;
1191 sc->sc_statep_net.s = NULL;
1197 if (imo->imo_num_memberships > 0) {
1198 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1199 imo->imo_multicast_ifp = NULL;
1207 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
1215 if (sifp->if_mtu < sc->sc_ifp->if_mtu ||
1217 if (sifp->if_mtu < sc->sc_if.if_mtu ||
1219 (sc->sc_sync_ifp != NULL &&
1220 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
1221 sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1223 sc->sc_sync_ifp = sifp;
1226 pfsync_setmtu(sc, sc->sc_ifp->if_mtu);
1228 pfsync_setmtu(sc, sc->sc_if.if_mtu);
1231 if (imo->imo_num_memberships > 0) {
1235 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1239 imo->imo_multicast_ifp = NULL;
1242 if (sc->sc_sync_ifp &&
1244 sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
1246 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1248 struct in_addr addr;
1250 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
1251 sc->sc_sync_ifp = NULL;
1256 return (EADDRNOTAVAIL);
1260 addr.s_addr = htonl(INADDR_PFSYNC_GROUP);
1262 addr.s_addr = INADDR_PFSYNC_GROUP;
1268 if ((imo->imo_membership[0] =
1269 in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) {
1270 sc->sc_sync_ifp = NULL;
1277 imo->imo_num_memberships++;
1278 imo->imo_multicast_ifp = sc->sc_sync_ifp;
1279 imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1280 imo->imo_multicast_loop = 0;
1283 if (sc->sc_sync_ifp ||
1285 sc->sc_sendaddr.s_addr != htonl(INADDR_PFSYNC_GROUP)) {
1287 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
1289 /* Request a full state table update. */
1290 sc->sc_ureq_sent = time_uptime;
1294 #ifdef CARP_ADVANCED
1295 carp_group_demote_adj(sc->sc_ifp, 1);
1298 carp_group_demote_adj(&sc->sc_if, 1);
1302 if (pf_status.debug >= PF_DEBUG_MISC)
1303 printf("pfsync: requesting bulk update\n");
1305 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
1306 pfsync_bulkfail, pfsyncif);
1308 timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
1310 error = pfsync_request_update(NULL, NULL);
1311 if (error == ENOMEM) {
1335 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req)
1339 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req)
1340 mtu = sc->sc_sync_ifp->if_mtu;
1344 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) /
1345 sizeof(struct pfsync_state);
1346 if (sc->sc_maxcount > 254)
1347 sc->sc_maxcount = 254;
1349 sc->sc_ifp->if_mtu = sizeof(struct pfsync_header) +
1351 sc->sc_if.if_mtu = sizeof(struct pfsync_header) +
1353 sc->sc_maxcount * sizeof(struct pfsync_state);
1357 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
1359 struct pfsync_header *h;
1363 MGETHDR(m, M_DONTWAIT, MT_DATA);
1366 sc->sc_ifp->if_oerrors++;
1368 sc->sc_if.if_oerrors++;
1374 case PFSYNC_ACT_CLR:
1375 len = sizeof(struct pfsync_header) +
1376 sizeof(struct pfsync_state_clr);
1378 case PFSYNC_ACT_UPD_C:
1379 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) +
1380 sizeof(struct pfsync_header);
1382 case PFSYNC_ACT_DEL_C:
1383 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) +
1384 sizeof(struct pfsync_header);
1386 case PFSYNC_ACT_UREQ:
1387 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) +
1388 sizeof(struct pfsync_header);
1390 case PFSYNC_ACT_BUS:
1391 len = sizeof(struct pfsync_header) +
1392 sizeof(struct pfsync_state_bus);
1395 case PFSYNC_ACT_TDB_UPD:
1396 len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) +
1397 sizeof(struct pfsync_header);
1401 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
1402 sizeof(struct pfsync_header);
1407 MCLGET(m, M_DONTWAIT);
1408 if ((m->m_flags & M_EXT) == 0) {
1411 sc->sc_ifp->if_oerrors++;
1413 sc->sc_if.if_oerrors++;
1417 m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1);
1421 m->m_pkthdr.rcvif = NULL;
1422 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header);
1423 h = mtod(m, struct pfsync_header *);
1424 h->version = PFSYNC_VERSION;
1429 if (action != PFSYNC_ACT_TDB_UPD)
1431 bcopy(&pf_status.pf_chksum, &h->pf_chksum,
1432 PF_MD5_DIGEST_LENGTH);
1434 *sp = (void *)((char *)h + PFSYNC_HDRLEN);
1436 if (action == PFSYNC_ACT_TDB_UPD)
1438 callout_reset(&sc->sc_tdb_tmo, hz, pfsync_tdb_timeout,
1441 timeout_add(&sc->sc_tdb_tmo, hz);
1446 callout_reset(&sc->sc_tmo, hz, pfsync_timeout, pfsyncif);
1448 timeout_add(&sc->sc_tmo, hz);
1454 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
1456 struct ifnet *ifp = NULL;
1457 struct pfsync_softc *sc = pfsyncif;
1458 struct pfsync_header *h, *h_net;
1459 struct pfsync_state *sp = NULL;
1460 struct pfsync_state_upd *up = NULL;
1461 struct pfsync_state_del *dp = NULL;
1465 u_int8_t i = 255, newaction = 0;
1476 * If a packet falls in the forest and there's nobody around to
1477 * hear, does it make a sound?
1479 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
1481 sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
1483 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1485 /* Don't leave any stale pfsync packets hanging around. */
1486 if (sc->sc_mbuf != NULL) {
1487 m_freem(sc->sc_mbuf);
1489 sc->sc_statep.s = NULL;
1494 if (action >= PFSYNC_ACT_MAX)
1499 PF_ASSERT(MA_OWNED);
1501 if (sc->sc_mbuf == NULL) {
1502 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1503 (void *)&sc->sc_statep.s)) == NULL) {
1507 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1509 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1510 if (h->action != action) {
1512 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1513 (void *)&sc->sc_statep.s)) == NULL) {
1517 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1520 * If it's an update, look in the packet to see if
1521 * we already have an update for the state.
1523 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) {
1524 struct pfsync_state *usp =
1525 (void *)((char *)h + PFSYNC_HDRLEN);
1527 for (i = 0; i < h->count; i++) {
1528 if (!memcmp(usp->id, &st->id,
1530 usp->creatorid == st->creatorid) {
1543 st->pfsync_time = time_uptime;
1546 /* not a "duplicate" update */
1548 sp = sc->sc_statep.s++;
1549 sc->sc_mbuf->m_pkthdr.len =
1550 sc->sc_mbuf->m_len += sizeof(struct pfsync_state);
1552 bzero(sp, sizeof(*sp));
1554 bcopy(&st->id, sp->id, sizeof(sp->id));
1555 sp->creatorid = st->creatorid;
1557 strlcpy(sp->ifname, st->u.s.kif->pfik_name, sizeof(sp->ifname));
1558 pf_state_host_hton(&st->lan, &sp->lan);
1559 pf_state_host_hton(&st->gwy, &sp->gwy);
1560 pf_state_host_hton(&st->ext, &sp->ext);
1562 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
1564 sp->creation = htonl(secs - st->creation);
1565 pf_state_counter_hton(st->packets[0], sp->packets[0]);
1566 pf_state_counter_hton(st->packets[1], sp->packets[1]);
1567 pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
1568 pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
1569 if ((r = st->rule.ptr) == NULL)
1570 sp->rule = htonl(-1);
1572 sp->rule = htonl(r->nr);
1573 if ((r = st->anchor.ptr) == NULL)
1574 sp->anchor = htonl(-1);
1576 sp->anchor = htonl(r->nr);
1578 sp->proto = st->proto;
1579 sp->direction = st->direction;
1581 sp->allow_opts = st->allow_opts;
1582 sp->timeout = st->timeout;
1584 if (flags & PFSYNC_FLAG_STALE)
1585 sp->sync_flags |= PFSTATE_STALE;
1588 pf_state_peer_hton(&st->src, &sp->src);
1589 pf_state_peer_hton(&st->dst, &sp->dst);
1591 if (st->expire <= secs)
1592 sp->expire = htonl(0);
1594 sp->expire = htonl(st->expire - secs);
1596 /* do we need to build "compressed" actions for network transfer? */
1597 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
1599 case PFSYNC_ACT_UPD:
1600 newaction = PFSYNC_ACT_UPD_C;
1602 case PFSYNC_ACT_DEL:
1603 newaction = PFSYNC_ACT_DEL_C;
1606 /* by default we just send the uncompressed states */
1612 if (sc->sc_mbuf_net == NULL) {
1613 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
1614 (void *)&sc->sc_statep_net.s)) == NULL) {
1619 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *);
1621 switch (newaction) {
1622 case PFSYNC_ACT_UPD_C:
1624 up = (void *)((char *)h_net +
1625 PFSYNC_HDRLEN + (i * sizeof(*up)));
1629 sc->sc_mbuf_net->m_pkthdr.len =
1630 sc->sc_mbuf_net->m_len += sizeof(*up);
1631 up = sc->sc_statep_net.u++;
1633 bzero(up, sizeof(*up));
1634 bcopy(&st->id, up->id, sizeof(up->id));
1635 up->creatorid = st->creatorid;
1637 up->timeout = st->timeout;
1638 up->expire = sp->expire;
1642 case PFSYNC_ACT_DEL_C:
1643 sc->sc_mbuf_net->m_pkthdr.len =
1644 sc->sc_mbuf_net->m_len += sizeof(*dp);
1645 dp = sc->sc_statep_net.d++;
1648 bzero(dp, sizeof(*dp));
1649 bcopy(&st->id, dp->id, sizeof(dp->id));
1650 dp->creatorid = st->creatorid;
1655 if (h->count == sc->sc_maxcount ||
1656 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
1657 ret = pfsync_sendout(sc);
1663 /* This must be called in splnet() */
1665 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
1667 struct ifnet *ifp = NULL;
1668 struct pfsync_header *h;
1669 struct pfsync_softc *sc = pfsyncif;
1670 struct pfsync_state_upd_req *rup;
1681 if (sc->sc_mbuf == NULL) {
1682 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1683 (void *)&sc->sc_statep.s)) == NULL)
1685 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1687 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1688 if (h->action != PFSYNC_ACT_UREQ) {
1690 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1691 (void *)&sc->sc_statep.s)) == NULL)
1693 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1698 sc->sc_sendaddr = *src;
1699 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup);
1701 rup = sc->sc_statep.r++;
1702 bzero(rup, sizeof(*rup));
1704 bcopy(up->id, rup->id, sizeof(rup->id));
1705 rup->creatorid = up->creatorid;
1708 if (h->count == sc->sc_maxcount)
1709 ret = pfsync_sendout(sc);
1715 pfsync_clear_states(u_int32_t creatorid, char *ifname)
1717 struct ifnet *ifp = NULL;
1718 struct pfsync_softc *sc = pfsyncif;
1719 struct pfsync_state_clr *cp;
1731 PF_ASSERT(MA_OWNED);
1734 if (sc->sc_mbuf != NULL)
1736 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
1737 (void *)&sc->sc_statep.c)) == NULL) {
1741 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
1742 cp = sc->sc_statep.c;
1743 cp->creatorid = creatorid;
1745 strlcpy(cp->ifname, ifname, IFNAMSIZ);
1747 ret = (pfsync_sendout(sc));
1753 pfsync_timeout(void *v)
1755 struct pfsync_softc *sc = v;
1771 pfsync_tdb_timeout(void *v)
1773 struct pfsync_softc *sc = v;
1780 pfsync_tdb_sendout(sc);
1788 /* This must be called in splnet() */
1790 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
1792 struct pfsync_state_bus *bus;
1795 PF_ASSERT(MA_OWNED);
1797 if (sc->sc_mbuf != NULL)
1800 if (pfsync_sync_ok &&
1801 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS,
1802 (void *)&sc->sc_statep.b)) != NULL) {
1803 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus);
1804 bus = sc->sc_statep.b;
1805 bus->creatorid = pf_status.hostid;
1806 bus->status = status;
1807 bus->endtime = htonl(time_uptime - sc->sc_ureq_received);
1813 pfsync_bulk_update(void *v)
1815 struct pfsync_softc *sc = v;
1817 struct pf_state *state;
1823 if (sc->sc_mbuf != NULL)
1827 * Grab at most PFSYNC_BULKPACKETS worth of states which have not
1828 * been sent since the latest request was made.
1830 state = sc->sc_bulk_send_next;
1833 /* send state update if syncable and not already sent */
1834 if (!state->sync_flags
1835 && state->timeout < PFTM_MAX
1836 && state->pfsync_time <= sc->sc_ureq_received) {
1837 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
1841 /* figure next state to send */
1842 state = TAILQ_NEXT(state, u.s.entry_list);
1844 /* wrap to start of list if we hit the end */
1846 state = TAILQ_FIRST(&state_list);
1847 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS &&
1848 state != sc->sc_bulk_terminator);
1850 if (!state || state == sc->sc_bulk_terminator) {
1852 pfsync_send_bus(sc, PFSYNC_BUS_END);
1853 sc->sc_ureq_received = 0;
1854 sc->sc_bulk_send_next = NULL;
1855 sc->sc_bulk_terminator = NULL;
1856 timeout_del(&sc->sc_bulk_tmo);
1857 if (pf_status.debug >= PF_DEBUG_MISC)
1858 printf("pfsync: bulk update complete\n");
1860 /* look again for more in a bit */
1862 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update,
1865 timeout_add(&sc->sc_bulk_tmo, 1);
1867 sc->sc_bulk_send_next = state;
1869 if (sc->sc_mbuf != NULL)
1878 pfsync_bulkfail(void *v)
1880 struct pfsync_softc *sc = v;
1886 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
1887 /* Try again in a bit */
1889 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
1892 timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
1895 error = pfsync_request_update(NULL, NULL);
1896 if (error == ENOMEM) {
1897 if (pf_status.debug >= PF_DEBUG_MISC)
1898 printf("pfsync: cannot allocate mbufs for "
1904 /* Pretend like the transfer was ok */
1905 sc->sc_ureq_sent = 0;
1906 sc->sc_bulk_tries = 0;
1908 if (!pfsync_sync_ok)
1910 #ifdef CARP_ADVANCED
1911 carp_group_demote_adj(sc->sc_ifp, -1);
1914 carp_group_demote_adj(&sc->sc_if, -1);
1918 if (pf_status.debug >= PF_DEBUG_MISC)
1919 printf("pfsync: failed to receive "
1920 "bulk update status\n");
1921 timeout_del(&sc->sc_bulkfail_tmo);
1928 /* This must be called in splnet() */
1930 pfsync_sendout(struct pfsync_softc *sc)
1934 struct ifnet *ifp = sc->sc_ifp;
1936 struct ifnet *ifp = &sc->sc_if;
1942 PF_ASSERT(MA_OWNED);
1944 timeout_del(&sc->sc_tmo);
1946 if (sc->sc_mbuf == NULL)
1950 sc->sc_statep.s = NULL;
1957 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1961 if (sc->sc_mbuf_net) {
1963 m = sc->sc_mbuf_net;
1964 sc->sc_mbuf_net = NULL;
1965 sc->sc_statep_net.s = NULL;
1968 return pfsync_sendout_mbuf(sc, m);
1973 pfsync_tdb_sendout(struct pfsync_softc *sc)
1977 struct ifnet *ifp = sc->sc_ifp;
1979 struct ifnet *ifp = &sc->sc_if;
1985 PF_ASSERT(MA_OWNED);
1987 timeout_del(&sc->sc_tdb_tmo);
1989 if (sc->sc_mbuf_tdb == NULL)
1991 m = sc->sc_mbuf_tdb;
1992 sc->sc_mbuf_tdb = NULL;
1993 sc->sc_statep_tdb.t = NULL;
2000 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
2004 return pfsync_sendout_mbuf(sc, m);
2009 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m)
2015 PF_ASSERT(MA_OWNED);
2017 if (sc->sc_sync_ifp ||
2019 sc->sc_sync_peer.s_addr != htonl(INADDR_PFSYNC_GROUP)) {
2021 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
2023 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
2025 pfsyncstats.pfsyncs_onomem++;
2028 ip = mtod(m, struct ip *);
2029 ip->ip_v = IPVERSION;
2030 ip->ip_hl = sizeof(*ip) >> 2;
2031 ip->ip_tos = IPTOS_LOWDELAY;
2033 ip->ip_len = m->m_pkthdr.len;
2035 ip->ip_len = htons(m->m_pkthdr.len);
2037 ip->ip_id = htons(ip_randomid());
2041 ip->ip_off = htons(IP_DF);
2043 ip->ip_ttl = PFSYNC_DFLTTL;
2044 ip->ip_p = IPPROTO_PFSYNC;
2047 bzero(&sa, sizeof(sa));
2048 ip->ip_src.s_addr = INADDR_ANY;
2051 if (sc->sc_sendaddr.s_addr == htonl(INADDR_PFSYNC_GROUP))
2053 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
2055 m->m_flags |= M_MCAST;
2056 ip->ip_dst = sc->sc_sendaddr;
2057 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
2059 pfsyncstats.pfsyncs_opackets++;
2062 if (!IF_HANDOFF(&sc->sc_ifq, m, NULL))
2063 pfsyncstats.pfsyncs_oerrors++;
2064 taskqueue_enqueue(taskqueue_thread, &pfsyncif->sc_send_task);
2066 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
2067 pfsyncstats.pfsyncs_oerrors++;
2076 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
2078 pfsync_update_net_tdb(struct pfsync_tdb *pt)
2083 /* check for invalid values */
2084 if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
2085 (pt->dst.sa.sa_family != AF_INET &&
2086 pt->dst.sa.sa_family != AF_INET6))
2090 tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
2092 pt->rpl = ntohl(pt->rpl);
2093 pt->cur_bytes = betoh64(pt->cur_bytes);
2095 /* Neither replay nor byte counter should ever decrease. */
2096 if (pt->rpl < tdb->tdb_rpl ||
2097 pt->cur_bytes < tdb->tdb_cur_bytes) {
2102 tdb->tdb_rpl = pt->rpl;
2103 tdb->tdb_cur_bytes = pt->cur_bytes;
2109 if (pf_status.debug >= PF_DEBUG_MISC)
2110 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
2112 pfsyncstats.pfsyncs_badstate++;
2116 /* One of our local tdbs have been updated, need to sync rpl with others */
2118 pfsync_update_tdb(struct tdb *tdb, int output)
2120 struct ifnet *ifp = NULL;
2121 struct pfsync_softc *sc = pfsyncif;
2122 struct pfsync_header *h;
2123 struct pfsync_tdb *pt = NULL;
2134 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
2136 sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
2138 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
2140 /* Don't leave any stale pfsync packets hanging around. */
2141 if (sc->sc_mbuf_tdb != NULL) {
2142 m_freem(sc->sc_mbuf_tdb);
2143 sc->sc_mbuf_tdb = NULL;
2144 sc->sc_statep_tdb.t = NULL;
2150 PF_ASSERT(MA_OWNED);
2153 if (sc->sc_mbuf_tdb == NULL) {
2154 if ((sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, PFSYNC_ACT_TDB_UPD,
2155 (void *)&sc->sc_statep_tdb.t)) == NULL) {
2159 h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
2161 h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
2162 if (h->action != PFSYNC_ACT_TDB_UPD) {
2164 * XXX will never happen as long as there's
2165 * only one "TDB action".
2167 pfsync_tdb_sendout(sc);
2168 sc->sc_mbuf_tdb = pfsync_get_mbuf(sc,
2169 PFSYNC_ACT_TDB_UPD, (void *)&sc->sc_statep_tdb.t);
2170 if (sc->sc_mbuf_tdb == NULL) {
2174 h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
2175 } else if (sc->sc_maxupdates) {
2177 * If it's an update, look in the packet to see if
2178 * we already have an update for the state.
2180 struct pfsync_tdb *u =
2181 (void *)((char *)h + PFSYNC_HDRLEN);
2183 for (i = 0; !pt && i < h->count; i++) {
2184 if (tdb->tdb_spi == u->spi &&
2185 tdb->tdb_sproto == u->sproto &&
2186 !bcmp(&tdb->tdb_dst, &u->dst,
2187 SA_LEN(&u->dst.sa))) {
2197 /* not a "duplicate" update */
2198 pt = sc->sc_statep_tdb.t++;
2199 sc->sc_mbuf_tdb->m_pkthdr.len =
2200 sc->sc_mbuf_tdb->m_len += sizeof(struct pfsync_tdb);
2202 bzero(pt, sizeof(*pt));
2204 pt->spi = tdb->tdb_spi;
2205 memcpy(&pt->dst, &tdb->tdb_dst, sizeof pt->dst);
2206 pt->sproto = tdb->tdb_sproto;
2210 * When a failover happens, the master's rpl is probably above
2211 * what we see here (we may be up to a second late), so
2212 * increase it a bit for outbound tdbs to manage most such
2215 * For now, just add an offset that is likely to be larger
2216 * than the number of packets we can see in one second. The RFC
2217 * just says the next packet must have a higher seq value.
2219 * XXX What is a good algorithm for this? We could use
2220 * a rate-determined increase, but to know it, we would have
2221 * to extend struct tdb.
2222 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2223 * will soon be replaced anyway. For now, just don't handle
2226 #define RPL_INCR 16384
2227 pt->rpl = htonl(tdb->tdb_rpl + (output ? RPL_INCR : 0));
2228 pt->cur_bytes = htobe64(tdb->tdb_cur_bytes);
2230 if (h->count == sc->sc_maxcount ||
2231 (sc->sc_maxupdates && (pt->updates >= sc->sc_maxupdates)))
2232 ret = pfsync_tdb_sendout(sc);
2237 #endif /* PFSYNC_TDB */
2241 pfsync_ifdetach(void *arg, struct ifnet *ifp)
2243 struct pfsync_softc *sc = (struct pfsync_softc *)arg;
2244 struct ip_moptions *imo;
2246 if (sc == NULL || sc->sc_sync_ifp != ifp)
2247 return; /* not for us; unlocked read */
2251 /* Deal with a member interface going away from under us. */
2252 sc->sc_sync_ifp = NULL;
2253 if (sc->sc_mbuf_net != NULL) {
2254 m_freem(sc->sc_mbuf_net);
2255 sc->sc_mbuf_net = NULL;
2256 sc->sc_statep_net.s = NULL;
2259 if (imo->imo_num_memberships > 0) {
2260 KASSERT(imo->imo_num_memberships == 1,
2261 ("%s: imo_num_memberships != 1", __func__));
2263 * Our event handler is always called after protocol
2264 * domains have been detached from the underlying ifnet.
2265 * Do not call in_delmulti(); we held a single reference
2266 * which the protocol domain has purged in in_purgemaddrs().
2269 imo->imo_membership[--imo->imo_num_memberships] = NULL;
2271 imo->imo_multicast_ifp = NULL;
2278 pfsync_senddef(void *arg, __unused int pending)
2280 struct pfsync_softc *sc = (struct pfsync_softc *)arg;
2284 IF_DEQUEUE(&sc->sc_ifq, m);
2287 /* Deal with a member interface going away from under us. */
2288 if (sc->sc_sync_ifp == NULL) {
2289 pfsyncstats.pfsyncs_oerrors++;
2293 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
2294 pfsyncstats.pfsyncs_oerrors++;
2299 pfsync_modevent(module_t mod, int type, void *data)
2308 if_clone_detach(&pfsync_cloner);
2318 static moduledata_t pfsync_mod = {
2324 #define PFSYNC_MODVER 1
2326 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
2327 MODULE_VERSION(pfsync, PFSYNC_MODVER);
2328 MODULE_DEPEND(pflog, pf, PF_MODVER, PF_MODVER, PF_MODVER);
2329 #endif /* __FreeBSD__ */