]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netpfil/pf/if_pfsync.c
Merge ^/head r327341 through r327623.
[FreeBSD/FreeBSD.git] / sys / netpfil / pf / if_pfsync.c
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND ISC)
3  *
4  * Copyright (c) 2002 Michael Shalayeff
5  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /*-
31  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
32  *
33  * Permission to use, copy, modify, and distribute this software for any
34  * purpose with or without fee is hereby granted, provided that the above
35  * copyright notice and this permission notice appear in all copies.
36  *
37  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
38  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
39  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
40  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
41  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
42  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
43  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44  */
45
46 /*
47  * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $
48  *
49  * Revisions picked from OpenBSD after revision 1.110 import:
50  * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input()
51  * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
52  * 1.120, 1.175 - use monotonic time_uptime
53  * 1.122 - reduce number of updates for non-TCP sessions
54  * 1.125, 1.127 - rewrite merge or stale processing
55  * 1.128 - cleanups
56  * 1.146 - bzero() mbuf before sparsely filling it with data
57  * 1.170 - SIOCSIFMTU checks
58  * 1.126, 1.142 - deferred packets processing
59  * 1.173 - correct expire time processing
60  */
61
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
64
65 #include "opt_inet.h"
66 #include "opt_inet6.h"
67 #include "opt_pf.h"
68
69 #include <sys/param.h>
70 #include <sys/bus.h>
71 #include <sys/endian.h>
72 #include <sys/interrupt.h>
73 #include <sys/kernel.h>
74 #include <sys/lock.h>
75 #include <sys/mbuf.h>
76 #include <sys/module.h>
77 #include <sys/mutex.h>
78 #include <sys/priv.h>
79 #include <sys/protosw.h>
80 #include <sys/socket.h>
81 #include <sys/sockio.h>
82 #include <sys/sysctl.h>
83 #include <sys/syslog.h>
84
85 #include <net/bpf.h>
86 #include <net/if.h>
87 #include <net/if_var.h>
88 #include <net/if_clone.h>
89 #include <net/if_types.h>
90 #include <net/vnet.h>
91 #include <net/pfvar.h>
92 #include <net/if_pfsync.h>
93
94 #include <netinet/if_ether.h>
95 #include <netinet/in.h>
96 #include <netinet/in_var.h>
97 #include <netinet/ip.h>
98 #include <netinet/ip_carp.h>
99 #include <netinet/ip_var.h>
100 #include <netinet/tcp.h>
101 #include <netinet/tcp_fsm.h>
102 #include <netinet/tcp_seq.h>
103
104 #define PFSYNC_MINPKT ( \
105         sizeof(struct ip) + \
106         sizeof(struct pfsync_header) + \
107         sizeof(struct pfsync_subheader) )
108
109 struct pfsync_pkt {
110         struct ip *ip;
111         struct in_addr src;
112         u_int8_t flags;
113 };
114
115 static int      pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
116                     struct pfsync_state_peer *);
117 static int      pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
118 static int      pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
119 static int      pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
120 static int      pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
121 static int      pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
122 static int      pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
123 static int      pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
124 static int      pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
125 static int      pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
126 static int      pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
127 static int      pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
128 static int      pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
129
130 static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
131         pfsync_in_clr,                  /* PFSYNC_ACT_CLR */
132         pfsync_in_ins,                  /* PFSYNC_ACT_INS */
133         pfsync_in_iack,                 /* PFSYNC_ACT_INS_ACK */
134         pfsync_in_upd,                  /* PFSYNC_ACT_UPD */
135         pfsync_in_upd_c,                /* PFSYNC_ACT_UPD_C */
136         pfsync_in_ureq,                 /* PFSYNC_ACT_UPD_REQ */
137         pfsync_in_del,                  /* PFSYNC_ACT_DEL */
138         pfsync_in_del_c,                /* PFSYNC_ACT_DEL_C */
139         pfsync_in_error,                /* PFSYNC_ACT_INS_F */
140         pfsync_in_error,                /* PFSYNC_ACT_DEL_F */
141         pfsync_in_bus,                  /* PFSYNC_ACT_BUS */
142         pfsync_in_tdb,                  /* PFSYNC_ACT_TDB */
143         pfsync_in_eof                   /* PFSYNC_ACT_EOF */
144 };
145
146 struct pfsync_q {
147         void            (*write)(struct pf_state *, void *);
148         size_t          len;
149         u_int8_t        action;
150 };
151
152 /* we have one of these for every PFSYNC_S_ */
153 static void     pfsync_out_state(struct pf_state *, void *);
154 static void     pfsync_out_iack(struct pf_state *, void *);
155 static void     pfsync_out_upd_c(struct pf_state *, void *);
156 static void     pfsync_out_del(struct pf_state *, void *);
157
158 static struct pfsync_q pfsync_qs[] = {
159         { pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
160         { pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
161         { pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD },
162         { pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
163         { pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C }
164 };
165
166 static void     pfsync_q_ins(struct pf_state *, int, bool);
167 static void     pfsync_q_del(struct pf_state *, bool);
168
169 static void     pfsync_update_state(struct pf_state *);
170
171 struct pfsync_upd_req_item {
172         TAILQ_ENTRY(pfsync_upd_req_item)        ur_entry;
173         struct pfsync_upd_req                   ur_msg;
174 };
175
176 struct pfsync_deferral {
177         struct pfsync_softc             *pd_sc;
178         TAILQ_ENTRY(pfsync_deferral)    pd_entry;
179         u_int                           pd_refs;
180         struct callout                  pd_tmo;
181
182         struct pf_state                 *pd_st;
183         struct mbuf                     *pd_m;
184 };
185
186 struct pfsync_softc {
187         /* Configuration */
188         struct ifnet            *sc_ifp;
189         struct ifnet            *sc_sync_if;
190         struct ip_moptions      sc_imo;
191         struct in_addr          sc_sync_peer;
192         uint32_t                sc_flags;
193 #define PFSYNCF_OK              0x00000001
194 #define PFSYNCF_DEFER           0x00000002
195 #define PFSYNCF_PUSH            0x00000004
196         uint8_t                 sc_maxupdates;
197         struct ip               sc_template;
198         struct callout          sc_tmo;
199         struct mtx              sc_mtx;
200
201         /* Queued data */
202         size_t                  sc_len;
203         TAILQ_HEAD(, pf_state)                  sc_qs[PFSYNC_S_COUNT];
204         TAILQ_HEAD(, pfsync_upd_req_item)       sc_upd_req_list;
205         TAILQ_HEAD(, pfsync_deferral)           sc_deferrals;
206         u_int                   sc_deferred;
207         void                    *sc_plus;
208         size_t                  sc_pluslen;
209
210         /* Bulk update info */
211         struct mtx              sc_bulk_mtx;
212         uint32_t                sc_ureq_sent;
213         int                     sc_bulk_tries;
214         uint32_t                sc_ureq_received;
215         int                     sc_bulk_hashid;
216         uint64_t                sc_bulk_stateid;
217         uint32_t                sc_bulk_creatorid;
218         struct callout          sc_bulk_tmo;
219         struct callout          sc_bulkfail_tmo;
220 };
221
222 #define PFSYNC_LOCK(sc)         mtx_lock(&(sc)->sc_mtx)
223 #define PFSYNC_UNLOCK(sc)       mtx_unlock(&(sc)->sc_mtx)
224 #define PFSYNC_LOCK_ASSERT(sc)  mtx_assert(&(sc)->sc_mtx, MA_OWNED)
225
226 #define PFSYNC_BLOCK(sc)        mtx_lock(&(sc)->sc_bulk_mtx)
227 #define PFSYNC_BUNLOCK(sc)      mtx_unlock(&(sc)->sc_bulk_mtx)
228 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
229
230 static const char pfsyncname[] = "pfsync";
231 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data");
232 static VNET_DEFINE(struct pfsync_softc  *, pfsyncif) = NULL;
233 #define V_pfsyncif              VNET(pfsyncif)
234 static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL;
235 #define V_pfsync_swi_cookie     VNET(pfsync_swi_cookie)
236 static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
237 #define V_pfsyncstats           VNET(pfsyncstats)
238 static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW;
239 #define V_pfsync_carp_adj       VNET(pfsync_carp_adj)
240
241 static void     pfsync_timeout(void *);
242 static void     pfsync_push(struct pfsync_softc *);
243 static void     pfsyncintr(void *);
244 static int      pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
245                     void *);
246 static void     pfsync_multicast_cleanup(struct pfsync_softc *);
247 static void     pfsync_pointers_init(void);
248 static void     pfsync_pointers_uninit(void);
249 static int      pfsync_init(void);
250 static void     pfsync_uninit(void);
251
252 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
253 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
254     &VNET_NAME(pfsyncstats), pfsyncstats,
255     "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
256 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW,
257     &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
258
259 static int      pfsync_clone_create(struct if_clone *, int, caddr_t);
260 static void     pfsync_clone_destroy(struct ifnet *);
261 static int      pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
262                     struct pf_state_peer *);
263 static int      pfsyncoutput(struct ifnet *, struct mbuf *,
264                     const struct sockaddr *, struct route *);
265 static int      pfsyncioctl(struct ifnet *, u_long, caddr_t);
266
267 static int      pfsync_defer(struct pf_state *, struct mbuf *);
268 static void     pfsync_undefer(struct pfsync_deferral *, int);
269 static void     pfsync_undefer_state(struct pf_state *, int);
270 static void     pfsync_defer_tmo(void *);
271
272 static void     pfsync_request_update(u_int32_t, u_int64_t);
273 static void     pfsync_update_state_req(struct pf_state *);
274
275 static void     pfsync_drop(struct pfsync_softc *);
276 static void     pfsync_sendout(int);
277 static void     pfsync_send_plus(void *, size_t);
278
279 static void     pfsync_bulk_start(void);
280 static void     pfsync_bulk_status(u_int8_t);
281 static void     pfsync_bulk_update(void *);
282 static void     pfsync_bulk_fail(void *);
283
284 #ifdef IPSEC
285 static void     pfsync_update_net_tdb(struct pfsync_tdb *);
286 #endif
287
288 #define PFSYNC_MAX_BULKTRIES    12
289
290 VNET_DEFINE(struct if_clone *, pfsync_cloner);
291 #define V_pfsync_cloner VNET(pfsync_cloner)
292
293 static int
294 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
295 {
296         struct pfsync_softc *sc;
297         struct ifnet *ifp;
298         int q;
299
300         if (unit != 0)
301                 return (EINVAL);
302
303         sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
304         sc->sc_flags |= PFSYNCF_OK;
305
306         for (q = 0; q < PFSYNC_S_COUNT; q++)
307                 TAILQ_INIT(&sc->sc_qs[q]);
308
309         TAILQ_INIT(&sc->sc_upd_req_list);
310         TAILQ_INIT(&sc->sc_deferrals);
311
312         sc->sc_len = PFSYNC_MINPKT;
313         sc->sc_maxupdates = 128;
314
315         ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
316         if (ifp == NULL) {
317                 free(sc, M_PFSYNC);
318                 return (ENOSPC);
319         }
320         if_initname(ifp, pfsyncname, unit);
321         ifp->if_softc = sc;
322         ifp->if_ioctl = pfsyncioctl;
323         ifp->if_output = pfsyncoutput;
324         ifp->if_type = IFT_PFSYNC;
325         ifp->if_snd.ifq_maxlen = ifqmaxlen;
326         ifp->if_hdrlen = sizeof(struct pfsync_header);
327         ifp->if_mtu = ETHERMTU;
328         mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
329         mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
330         callout_init(&sc->sc_tmo, 1);
331         callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
332         callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
333
334         if_attach(ifp);
335
336         bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
337
338         V_pfsyncif = sc;
339
340         return (0);
341 }
342
343 static void
344 pfsync_clone_destroy(struct ifnet *ifp)
345 {
346         struct pfsync_softc *sc = ifp->if_softc;
347
348         /*
349          * At this stage, everything should have already been
350          * cleared by pfsync_uninit(), and we have only to
351          * drain callouts.
352          */
353         while (sc->sc_deferred > 0) {
354                 struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals);
355
356                 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
357                 sc->sc_deferred--;
358                 if (callout_stop(&pd->pd_tmo) > 0) {
359                         pf_release_state(pd->pd_st);
360                         m_freem(pd->pd_m);
361                         free(pd, M_PFSYNC);
362                 } else {
363                         pd->pd_refs++;
364                         callout_drain(&pd->pd_tmo);
365                         free(pd, M_PFSYNC);
366                 }
367         }
368
369         callout_drain(&sc->sc_tmo);
370         callout_drain(&sc->sc_bulkfail_tmo);
371         callout_drain(&sc->sc_bulk_tmo);
372
373         if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
374                 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
375         bpfdetach(ifp);
376         if_detach(ifp);
377
378         pfsync_drop(sc);
379
380         if_free(ifp);
381         if (sc->sc_imo.imo_membership)
382                 pfsync_multicast_cleanup(sc);
383         mtx_destroy(&sc->sc_mtx);
384         mtx_destroy(&sc->sc_bulk_mtx);
385         free(sc, M_PFSYNC);
386
387         V_pfsyncif = NULL;
388 }
389
390 static int
391 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
392     struct pf_state_peer *d)
393 {
394         if (s->scrub.scrub_flag && d->scrub == NULL) {
395                 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO);
396                 if (d->scrub == NULL)
397                         return (ENOMEM);
398         }
399
400         return (0);
401 }
402
403
404 static int
405 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
406 {
407         struct pfsync_softc *sc = V_pfsyncif;
408 #ifndef __NO_STRICT_ALIGNMENT
409         struct pfsync_state_key key[2];
410 #endif
411         struct pfsync_state_key *kw, *ks;
412         struct pf_state *st = NULL;
413         struct pf_state_key *skw = NULL, *sks = NULL;
414         struct pf_rule *r = NULL;
415         struct pfi_kif  *kif;
416         int error;
417
418         PF_RULES_RASSERT();
419
420         if (sp->creatorid == 0) {
421                 if (V_pf_status.debug >= PF_DEBUG_MISC)
422                         printf("%s: invalid creator id: %08x\n", __func__,
423                             ntohl(sp->creatorid));
424                 return (EINVAL);
425         }
426
427         if ((kif = pfi_kif_find(sp->ifname)) == NULL) {
428                 if (V_pf_status.debug >= PF_DEBUG_MISC)
429                         printf("%s: unknown interface: %s\n", __func__,
430                             sp->ifname);
431                 if (flags & PFSYNC_SI_IOCTL)
432                         return (EINVAL);
433                 return (0);     /* skip this state */
434         }
435
436         /*
437          * If the ruleset checksums match or the state is coming from the ioctl,
438          * it's safe to associate the state with the rule of that number.
439          */
440         if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
441             (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
442             pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
443                 r = pf_main_ruleset.rules[
444                     PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
445         else
446                 r = &V_pf_default_rule;
447
448         if ((r->max_states &&
449             counter_u64_fetch(r->states_cur) >= r->max_states))
450                 goto cleanup;
451
452         /*
453          * XXXGL: consider M_WAITOK in ioctl path after.
454          */
455         if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL)
456                 goto cleanup;
457
458         if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL)
459                 goto cleanup;
460
461 #ifndef __NO_STRICT_ALIGNMENT
462         bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2);
463         kw = &key[PF_SK_WIRE];
464         ks = &key[PF_SK_STACK];
465 #else
466         kw = &sp->key[PF_SK_WIRE];
467         ks = &sp->key[PF_SK_STACK];
468 #endif
469
470         if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) ||
471             PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) ||
472             kw->port[0] != ks->port[0] ||
473             kw->port[1] != ks->port[1]) {
474                 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
475                 if (sks == NULL)
476                         goto cleanup;
477         } else
478                 sks = skw;
479
480         /* allocate memory for scrub info */
481         if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
482             pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
483                 goto cleanup;
484
485         /* Copy to state key(s). */
486         skw->addr[0] = kw->addr[0];
487         skw->addr[1] = kw->addr[1];
488         skw->port[0] = kw->port[0];
489         skw->port[1] = kw->port[1];
490         skw->proto = sp->proto;
491         skw->af = sp->af;
492         if (sks != skw) {
493                 sks->addr[0] = ks->addr[0];
494                 sks->addr[1] = ks->addr[1];
495                 sks->port[0] = ks->port[0];
496                 sks->port[1] = ks->port[1];
497                 sks->proto = sp->proto;
498                 sks->af = sp->af;
499         }
500
501         /* copy to state */
502         bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
503         st->creation = time_uptime - ntohl(sp->creation);
504         st->expire = time_uptime;
505         if (sp->expire) {
506                 uint32_t timeout;
507
508                 timeout = r->timeout[sp->timeout];
509                 if (!timeout)
510                         timeout = V_pf_default_rule.timeout[sp->timeout];
511
512                 /* sp->expire may have been adaptively scaled by export. */
513                 st->expire -= timeout - ntohl(sp->expire);
514         }
515
516         st->direction = sp->direction;
517         st->log = sp->log;
518         st->timeout = sp->timeout;
519         st->state_flags = sp->state_flags;
520
521         st->id = sp->id;
522         st->creatorid = sp->creatorid;
523         pf_state_peer_ntoh(&sp->src, &st->src);
524         pf_state_peer_ntoh(&sp->dst, &st->dst);
525
526         st->rule.ptr = r;
527         st->nat_rule.ptr = NULL;
528         st->anchor.ptr = NULL;
529         st->rt_kif = NULL;
530
531         st->pfsync_time = time_uptime;
532         st->sync_state = PFSYNC_S_NONE;
533
534         if (!(flags & PFSYNC_SI_IOCTL))
535                 st->state_flags |= PFSTATE_NOSYNC;
536
537         if ((error = pf_state_insert(kif, skw, sks, st)) != 0)
538                 goto cleanup_state;
539
540         /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
541         counter_u64_add(r->states_cur, 1);
542         counter_u64_add(r->states_tot, 1);
543
544         if (!(flags & PFSYNC_SI_IOCTL)) {
545                 st->state_flags &= ~PFSTATE_NOSYNC;
546                 if (st->state_flags & PFSTATE_ACK) {
547                         pfsync_q_ins(st, PFSYNC_S_IACK, true);
548                         pfsync_push(sc);
549                 }
550         }
551         st->state_flags &= ~PFSTATE_ACK;
552         PF_STATE_UNLOCK(st);
553
554         return (0);
555
556 cleanup:
557         error = ENOMEM;
558         if (skw == sks)
559                 sks = NULL;
560         if (skw != NULL)
561                 uma_zfree(V_pf_state_key_z, skw);
562         if (sks != NULL)
563                 uma_zfree(V_pf_state_key_z, sks);
564
565 cleanup_state:  /* pf_state_insert() frees the state keys. */
566         if (st) {
567                 if (st->dst.scrub)
568                         uma_zfree(V_pf_state_scrub_z, st->dst.scrub);
569                 if (st->src.scrub)
570                         uma_zfree(V_pf_state_scrub_z, st->src.scrub);
571                 uma_zfree(V_pf_state_z, st);
572         }
573         return (error);
574 }
575
576 static int
577 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused)
578 {
579         struct pfsync_softc *sc = V_pfsyncif;
580         struct pfsync_pkt pkt;
581         struct mbuf *m = *mp;
582         struct ip *ip = mtod(m, struct ip *);
583         struct pfsync_header *ph;
584         struct pfsync_subheader subh;
585
586         int offset, len;
587         int rv;
588         uint16_t count;
589
590         *mp = NULL;
591         V_pfsyncstats.pfsyncs_ipackets++;
592
593         /* Verify that we have a sync interface configured. */
594         if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
595             (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
596                 goto done;
597
598         /* verify that the packet came in on the right interface */
599         if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
600                 V_pfsyncstats.pfsyncs_badif++;
601                 goto done;
602         }
603
604         if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
605         if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
606         /* verify that the IP TTL is 255. */
607         if (ip->ip_ttl != PFSYNC_DFLTTL) {
608                 V_pfsyncstats.pfsyncs_badttl++;
609                 goto done;
610         }
611
612         offset = ip->ip_hl << 2;
613         if (m->m_pkthdr.len < offset + sizeof(*ph)) {
614                 V_pfsyncstats.pfsyncs_hdrops++;
615                 goto done;
616         }
617
618         if (offset + sizeof(*ph) > m->m_len) {
619                 if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
620                         V_pfsyncstats.pfsyncs_hdrops++;
621                         return (IPPROTO_DONE);
622                 }
623                 ip = mtod(m, struct ip *);
624         }
625         ph = (struct pfsync_header *)((char *)ip + offset);
626
627         /* verify the version */
628         if (ph->version != PFSYNC_VERSION) {
629                 V_pfsyncstats.pfsyncs_badver++;
630                 goto done;
631         }
632
633         len = ntohs(ph->len) + offset;
634         if (m->m_pkthdr.len < len) {
635                 V_pfsyncstats.pfsyncs_badlen++;
636                 goto done;
637         }
638
639         /* Cheaper to grab this now than having to mess with mbufs later */
640         pkt.ip = ip;
641         pkt.src = ip->ip_src;
642         pkt.flags = 0;
643
644         /*
645          * Trusting pf_chksum during packet processing, as well as seeking
646          * in interface name tree, require holding PF_RULES_RLOCK().
647          */
648         PF_RULES_RLOCK();
649         if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
650                 pkt.flags |= PFSYNC_SI_CKSUM;
651
652         offset += sizeof(*ph);
653         while (offset <= len - sizeof(subh)) {
654                 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
655                 offset += sizeof(subh);
656
657                 if (subh.action >= PFSYNC_ACT_MAX) {
658                         V_pfsyncstats.pfsyncs_badact++;
659                         PF_RULES_RUNLOCK();
660                         goto done;
661                 }
662
663                 count = ntohs(subh.count);
664                 V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
665                 rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count);
666                 if (rv == -1) {
667                         PF_RULES_RUNLOCK();
668                         return (IPPROTO_DONE);
669                 }
670
671                 offset += rv;
672         }
673         PF_RULES_RUNLOCK();
674
675 done:
676         m_freem(m);
677         return (IPPROTO_DONE);
678 }
679
680 static int
681 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
682 {
683         struct pfsync_clr *clr;
684         struct mbuf *mp;
685         int len = sizeof(*clr) * count;
686         int i, offp;
687         u_int32_t creatorid;
688
689         mp = m_pulldown(m, offset, len, &offp);
690         if (mp == NULL) {
691                 V_pfsyncstats.pfsyncs_badlen++;
692                 return (-1);
693         }
694         clr = (struct pfsync_clr *)(mp->m_data + offp);
695
696         for (i = 0; i < count; i++) {
697                 creatorid = clr[i].creatorid;
698
699                 if (clr[i].ifname[0] != '\0' &&
700                     pfi_kif_find(clr[i].ifname) == NULL)
701                         continue;
702
703                 for (int i = 0; i <= pf_hashmask; i++) {
704                         struct pf_idhash *ih = &V_pf_idhash[i];
705                         struct pf_state *s;
706 relock:
707                         PF_HASHROW_LOCK(ih);
708                         LIST_FOREACH(s, &ih->states, entry) {
709                                 if (s->creatorid == creatorid) {
710                                         s->state_flags |= PFSTATE_NOSYNC;
711                                         pf_unlink_state(s, PF_ENTER_LOCKED);
712                                         goto relock;
713                                 }
714                         }
715                         PF_HASHROW_UNLOCK(ih);
716                 }
717         }
718
719         return (len);
720 }
721
722 static int
723 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
724 {
725         struct mbuf *mp;
726         struct pfsync_state *sa, *sp;
727         int len = sizeof(*sp) * count;
728         int i, offp;
729
730         mp = m_pulldown(m, offset, len, &offp);
731         if (mp == NULL) {
732                 V_pfsyncstats.pfsyncs_badlen++;
733                 return (-1);
734         }
735         sa = (struct pfsync_state *)(mp->m_data + offp);
736
737         for (i = 0; i < count; i++) {
738                 sp = &sa[i];
739
740                 /* Check for invalid values. */
741                 if (sp->timeout >= PFTM_MAX ||
742                     sp->src.state > PF_TCPS_PROXY_DST ||
743                     sp->dst.state > PF_TCPS_PROXY_DST ||
744                     sp->direction > PF_OUT ||
745                     (sp->af != AF_INET && sp->af != AF_INET6)) {
746                         if (V_pf_status.debug >= PF_DEBUG_MISC)
747                                 printf("%s: invalid value\n", __func__);
748                         V_pfsyncstats.pfsyncs_badval++;
749                         continue;
750                 }
751
752                 if (pfsync_state_import(sp, pkt->flags) == ENOMEM)
753                         /* Drop out, but process the rest of the actions. */
754                         break;
755         }
756
757         return (len);
758 }
759
760 static int
761 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
762 {
763         struct pfsync_ins_ack *ia, *iaa;
764         struct pf_state *st;
765
766         struct mbuf *mp;
767         int len = count * sizeof(*ia);
768         int offp, i;
769
770         mp = m_pulldown(m, offset, len, &offp);
771         if (mp == NULL) {
772                 V_pfsyncstats.pfsyncs_badlen++;
773                 return (-1);
774         }
775         iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
776
777         for (i = 0; i < count; i++) {
778                 ia = &iaa[i];
779
780                 st = pf_find_state_byid(ia->id, ia->creatorid);
781                 if (st == NULL)
782                         continue;
783
784                 if (st->state_flags & PFSTATE_ACK) {
785                         PFSYNC_LOCK(V_pfsyncif);
786                         pfsync_undefer_state(st, 0);
787                         PFSYNC_UNLOCK(V_pfsyncif);
788                 }
789                 PF_STATE_UNLOCK(st);
790         }
791         /*
792          * XXX this is not yet implemented, but we know the size of the
793          * message so we can skip it.
794          */
795
796         return (count * sizeof(struct pfsync_ins_ack));
797 }
798
799 static int
800 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
801     struct pfsync_state_peer *dst)
802 {
803         int sync = 0;
804
805         PF_STATE_LOCK_ASSERT(st);
806
807         /*
808          * The state should never go backwards except
809          * for syn-proxy states.  Neither should the
810          * sequence window slide backwards.
811          */
812         if ((st->src.state > src->state &&
813             (st->src.state < PF_TCPS_PROXY_SRC ||
814             src->state >= PF_TCPS_PROXY_SRC)) ||
815
816             (st->src.state == src->state &&
817             SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
818                 sync++;
819         else
820                 pf_state_peer_ntoh(src, &st->src);
821
822         if ((st->dst.state > dst->state) ||
823
824             (st->dst.state >= TCPS_SYN_SENT &&
825             SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
826                 sync++;
827         else
828                 pf_state_peer_ntoh(dst, &st->dst);
829
830         return (sync);
831 }
832
833 static int
834 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
835 {
836         struct pfsync_softc *sc = V_pfsyncif;
837         struct pfsync_state *sa, *sp;
838         struct pf_state *st;
839         int sync;
840
841         struct mbuf *mp;
842         int len = count * sizeof(*sp);
843         int offp, i;
844
845         mp = m_pulldown(m, offset, len, &offp);
846         if (mp == NULL) {
847                 V_pfsyncstats.pfsyncs_badlen++;
848                 return (-1);
849         }
850         sa = (struct pfsync_state *)(mp->m_data + offp);
851
852         for (i = 0; i < count; i++) {
853                 sp = &sa[i];
854
855                 /* check for invalid values */
856                 if (sp->timeout >= PFTM_MAX ||
857                     sp->src.state > PF_TCPS_PROXY_DST ||
858                     sp->dst.state > PF_TCPS_PROXY_DST) {
859                         if (V_pf_status.debug >= PF_DEBUG_MISC) {
860                                 printf("pfsync_input: PFSYNC_ACT_UPD: "
861                                     "invalid value\n");
862                         }
863                         V_pfsyncstats.pfsyncs_badval++;
864                         continue;
865                 }
866
867                 st = pf_find_state_byid(sp->id, sp->creatorid);
868                 if (st == NULL) {
869                         /* insert the update */
870                         if (pfsync_state_import(sp, 0))
871                                 V_pfsyncstats.pfsyncs_badstate++;
872                         continue;
873                 }
874
875                 if (st->state_flags & PFSTATE_ACK) {
876                         PFSYNC_LOCK(sc);
877                         pfsync_undefer_state(st, 1);
878                         PFSYNC_UNLOCK(sc);
879                 }
880
881                 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
882                         sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
883                 else {
884                         sync = 0;
885
886                         /*
887                          * Non-TCP protocol state machine always go
888                          * forwards
889                          */
890                         if (st->src.state > sp->src.state)
891                                 sync++;
892                         else
893                                 pf_state_peer_ntoh(&sp->src, &st->src);
894                         if (st->dst.state > sp->dst.state)
895                                 sync++;
896                         else
897                                 pf_state_peer_ntoh(&sp->dst, &st->dst);
898                 }
899                 if (sync < 2) {
900                         pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
901                         pf_state_peer_ntoh(&sp->dst, &st->dst);
902                         st->expire = time_uptime;
903                         st->timeout = sp->timeout;
904                 }
905                 st->pfsync_time = time_uptime;
906
907                 if (sync) {
908                         V_pfsyncstats.pfsyncs_stale++;
909
910                         pfsync_update_state(st);
911                         PF_STATE_UNLOCK(st);
912                         PFSYNC_LOCK(sc);
913                         pfsync_push(sc);
914                         PFSYNC_UNLOCK(sc);
915                         continue;
916                 }
917                 PF_STATE_UNLOCK(st);
918         }
919
920         return (len);
921 }
922
923 static int
924 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
925 {
926         struct pfsync_softc *sc = V_pfsyncif;
927         struct pfsync_upd_c *ua, *up;
928         struct pf_state *st;
929         int len = count * sizeof(*up);
930         int sync;
931         struct mbuf *mp;
932         int offp, i;
933
934         mp = m_pulldown(m, offset, len, &offp);
935         if (mp == NULL) {
936                 V_pfsyncstats.pfsyncs_badlen++;
937                 return (-1);
938         }
939         ua = (struct pfsync_upd_c *)(mp->m_data + offp);
940
941         for (i = 0; i < count; i++) {
942                 up = &ua[i];
943
944                 /* check for invalid values */
945                 if (up->timeout >= PFTM_MAX ||
946                     up->src.state > PF_TCPS_PROXY_DST ||
947                     up->dst.state > PF_TCPS_PROXY_DST) {
948                         if (V_pf_status.debug >= PF_DEBUG_MISC) {
949                                 printf("pfsync_input: "
950                                     "PFSYNC_ACT_UPD_C: "
951                                     "invalid value\n");
952                         }
953                         V_pfsyncstats.pfsyncs_badval++;
954                         continue;
955                 }
956
957                 st = pf_find_state_byid(up->id, up->creatorid);
958                 if (st == NULL) {
959                         /* We don't have this state. Ask for it. */
960                         PFSYNC_LOCK(sc);
961                         pfsync_request_update(up->creatorid, up->id);
962                         PFSYNC_UNLOCK(sc);
963                         continue;
964                 }
965
966                 if (st->state_flags & PFSTATE_ACK) {
967                         PFSYNC_LOCK(sc);
968                         pfsync_undefer_state(st, 1);
969                         PFSYNC_UNLOCK(sc);
970                 }
971
972                 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
973                         sync = pfsync_upd_tcp(st, &up->src, &up->dst);
974                 else {
975                         sync = 0;
976
977                         /*
978                          * Non-TCP protocol state machine always go
979                          * forwards
980                          */
981                         if (st->src.state > up->src.state)
982                                 sync++;
983                         else
984                                 pf_state_peer_ntoh(&up->src, &st->src);
985                         if (st->dst.state > up->dst.state)
986                                 sync++;
987                         else
988                                 pf_state_peer_ntoh(&up->dst, &st->dst);
989                 }
990                 if (sync < 2) {
991                         pfsync_alloc_scrub_memory(&up->dst, &st->dst);
992                         pf_state_peer_ntoh(&up->dst, &st->dst);
993                         st->expire = time_uptime;
994                         st->timeout = up->timeout;
995                 }
996                 st->pfsync_time = time_uptime;
997
998                 if (sync) {
999                         V_pfsyncstats.pfsyncs_stale++;
1000
1001                         pfsync_update_state(st);
1002                         PF_STATE_UNLOCK(st);
1003                         PFSYNC_LOCK(sc);
1004                         pfsync_push(sc);
1005                         PFSYNC_UNLOCK(sc);
1006                         continue;
1007                 }
1008                 PF_STATE_UNLOCK(st);
1009         }
1010
1011         return (len);
1012 }
1013
1014 static int
1015 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1016 {
1017         struct pfsync_upd_req *ur, *ura;
1018         struct mbuf *mp;
1019         int len = count * sizeof(*ur);
1020         int i, offp;
1021
1022         struct pf_state *st;
1023
1024         mp = m_pulldown(m, offset, len, &offp);
1025         if (mp == NULL) {
1026                 V_pfsyncstats.pfsyncs_badlen++;
1027                 return (-1);
1028         }
1029         ura = (struct pfsync_upd_req *)(mp->m_data + offp);
1030
1031         for (i = 0; i < count; i++) {
1032                 ur = &ura[i];
1033
1034                 if (ur->id == 0 && ur->creatorid == 0)
1035                         pfsync_bulk_start();
1036                 else {
1037                         st = pf_find_state_byid(ur->id, ur->creatorid);
1038                         if (st == NULL) {
1039                                 V_pfsyncstats.pfsyncs_badstate++;
1040                                 continue;
1041                         }
1042                         if (st->state_flags & PFSTATE_NOSYNC) {
1043                                 PF_STATE_UNLOCK(st);
1044                                 continue;
1045                         }
1046
1047                         pfsync_update_state_req(st);
1048                         PF_STATE_UNLOCK(st);
1049                 }
1050         }
1051
1052         return (len);
1053 }
1054
1055 static int
1056 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1057 {
1058         struct mbuf *mp;
1059         struct pfsync_state *sa, *sp;
1060         struct pf_state *st;
1061         int len = count * sizeof(*sp);
1062         int offp, i;
1063
1064         mp = m_pulldown(m, offset, len, &offp);
1065         if (mp == NULL) {
1066                 V_pfsyncstats.pfsyncs_badlen++;
1067                 return (-1);
1068         }
1069         sa = (struct pfsync_state *)(mp->m_data + offp);
1070
1071         for (i = 0; i < count; i++) {
1072                 sp = &sa[i];
1073
1074                 st = pf_find_state_byid(sp->id, sp->creatorid);
1075                 if (st == NULL) {
1076                         V_pfsyncstats.pfsyncs_badstate++;
1077                         continue;
1078                 }
1079                 st->state_flags |= PFSTATE_NOSYNC;
1080                 pf_unlink_state(st, PF_ENTER_LOCKED);
1081         }
1082
1083         return (len);
1084 }
1085
1086 static int
1087 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1088 {
1089         struct mbuf *mp;
1090         struct pfsync_del_c *sa, *sp;
1091         struct pf_state *st;
1092         int len = count * sizeof(*sp);
1093         int offp, i;
1094
1095         mp = m_pulldown(m, offset, len, &offp);
1096         if (mp == NULL) {
1097                 V_pfsyncstats.pfsyncs_badlen++;
1098                 return (-1);
1099         }
1100         sa = (struct pfsync_del_c *)(mp->m_data + offp);
1101
1102         for (i = 0; i < count; i++) {
1103                 sp = &sa[i];
1104
1105                 st = pf_find_state_byid(sp->id, sp->creatorid);
1106                 if (st == NULL) {
1107                         V_pfsyncstats.pfsyncs_badstate++;
1108                         continue;
1109                 }
1110
1111                 st->state_flags |= PFSTATE_NOSYNC;
1112                 pf_unlink_state(st, PF_ENTER_LOCKED);
1113         }
1114
1115         return (len);
1116 }
1117
1118 static int
1119 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1120 {
1121         struct pfsync_softc *sc = V_pfsyncif;
1122         struct pfsync_bus *bus;
1123         struct mbuf *mp;
1124         int len = count * sizeof(*bus);
1125         int offp;
1126
1127         PFSYNC_BLOCK(sc);
1128
1129         /* If we're not waiting for a bulk update, who cares. */
1130         if (sc->sc_ureq_sent == 0) {
1131                 PFSYNC_BUNLOCK(sc);
1132                 return (len);
1133         }
1134
1135         mp = m_pulldown(m, offset, len, &offp);
1136         if (mp == NULL) {
1137                 PFSYNC_BUNLOCK(sc);
1138                 V_pfsyncstats.pfsyncs_badlen++;
1139                 return (-1);
1140         }
1141         bus = (struct pfsync_bus *)(mp->m_data + offp);
1142
1143         switch (bus->status) {
1144         case PFSYNC_BUS_START:
1145                 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
1146                     V_pf_limits[PF_LIMIT_STATES].limit /
1147                     ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
1148                     sizeof(struct pfsync_state)),
1149                     pfsync_bulk_fail, sc);
1150                 if (V_pf_status.debug >= PF_DEBUG_MISC)
1151                         printf("pfsync: received bulk update start\n");
1152                 break;
1153
1154         case PFSYNC_BUS_END:
1155                 if (time_uptime - ntohl(bus->endtime) >=
1156                     sc->sc_ureq_sent) {
1157                         /* that's it, we're happy */
1158                         sc->sc_ureq_sent = 0;
1159                         sc->sc_bulk_tries = 0;
1160                         callout_stop(&sc->sc_bulkfail_tmo);
1161                         if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
1162                                 (*carp_demote_adj_p)(-V_pfsync_carp_adj,
1163                                     "pfsync bulk done");
1164                         sc->sc_flags |= PFSYNCF_OK;
1165                         if (V_pf_status.debug >= PF_DEBUG_MISC)
1166                                 printf("pfsync: received valid "
1167                                     "bulk update end\n");
1168                 } else {
1169                         if (V_pf_status.debug >= PF_DEBUG_MISC)
1170                                 printf("pfsync: received invalid "
1171                                     "bulk update end: bad timestamp\n");
1172                 }
1173                 break;
1174         }
1175         PFSYNC_BUNLOCK(sc);
1176
1177         return (len);
1178 }
1179
1180 static int
1181 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1182 {
1183         int len = count * sizeof(struct pfsync_tdb);
1184
1185 #if defined(IPSEC)
1186         struct pfsync_tdb *tp;
1187         struct mbuf *mp;
1188         int offp;
1189         int i;
1190         int s;
1191
1192         mp = m_pulldown(m, offset, len, &offp);
1193         if (mp == NULL) {
1194                 V_pfsyncstats.pfsyncs_badlen++;
1195                 return (-1);
1196         }
1197         tp = (struct pfsync_tdb *)(mp->m_data + offp);
1198
1199         for (i = 0; i < count; i++)
1200                 pfsync_update_net_tdb(&tp[i]);
1201 #endif
1202
1203         return (len);
1204 }
1205
1206 #if defined(IPSEC)
1207 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1208 static void
1209 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1210 {
1211         struct tdb              *tdb;
1212         int                      s;
1213
1214         /* check for invalid values */
1215         if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1216             (pt->dst.sa.sa_family != AF_INET &&
1217             pt->dst.sa.sa_family != AF_INET6))
1218                 goto bad;
1219
1220         tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1221         if (tdb) {
1222                 pt->rpl = ntohl(pt->rpl);
1223                 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes);
1224
1225                 /* Neither replay nor byte counter should ever decrease. */
1226                 if (pt->rpl < tdb->tdb_rpl ||
1227                     pt->cur_bytes < tdb->tdb_cur_bytes) {
1228                         goto bad;
1229                 }
1230
1231                 tdb->tdb_rpl = pt->rpl;
1232                 tdb->tdb_cur_bytes = pt->cur_bytes;
1233         }
1234         return;
1235
1236 bad:
1237         if (V_pf_status.debug >= PF_DEBUG_MISC)
1238                 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1239                     "invalid value\n");
1240         V_pfsyncstats.pfsyncs_badstate++;
1241         return;
1242 }
1243 #endif
1244
1245
1246 static int
1247 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1248 {
1249         /* check if we are at the right place in the packet */
1250         if (offset != m->m_pkthdr.len)
1251                 V_pfsyncstats.pfsyncs_badlen++;
1252
1253         /* we're done. free and let the caller return */
1254         m_freem(m);
1255         return (-1);
1256 }
1257
1258 static int
1259 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1260 {
1261         V_pfsyncstats.pfsyncs_badact++;
1262
1263         m_freem(m);
1264         return (-1);
1265 }
1266
1267 static int
1268 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
1269         struct route *rt)
1270 {
1271         m_freem(m);
1272         return (0);
1273 }
1274
1275 /* ARGSUSED */
1276 static int
1277 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1278 {
1279         struct pfsync_softc *sc = ifp->if_softc;
1280         struct ifreq *ifr = (struct ifreq *)data;
1281         struct pfsyncreq pfsyncr;
1282         int error;
1283
1284         switch (cmd) {
1285         case SIOCSIFFLAGS:
1286                 PFSYNC_LOCK(sc);
1287                 if (ifp->if_flags & IFF_UP) {
1288                         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1289                         PFSYNC_UNLOCK(sc);
1290                         pfsync_pointers_init();
1291                 } else {
1292                         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1293                         PFSYNC_UNLOCK(sc);
1294                         pfsync_pointers_uninit();
1295                 }
1296                 break;
1297         case SIOCSIFMTU:
1298                 if (!sc->sc_sync_if ||
1299                     ifr->ifr_mtu <= PFSYNC_MINPKT ||
1300                     ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1301                         return (EINVAL);
1302                 if (ifr->ifr_mtu < ifp->if_mtu) {
1303                         PFSYNC_LOCK(sc);
1304                         if (sc->sc_len > PFSYNC_MINPKT)
1305                                 pfsync_sendout(1);
1306                         PFSYNC_UNLOCK(sc);
1307                 }
1308                 ifp->if_mtu = ifr->ifr_mtu;
1309                 break;
1310         case SIOCGETPFSYNC:
1311                 bzero(&pfsyncr, sizeof(pfsyncr));
1312                 PFSYNC_LOCK(sc);
1313                 if (sc->sc_sync_if) {
1314                         strlcpy(pfsyncr.pfsyncr_syncdev,
1315                             sc->sc_sync_if->if_xname, IFNAMSIZ);
1316                 }
1317                 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1318                 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1319                 pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER ==
1320                     (sc->sc_flags & PFSYNCF_DEFER));
1321                 PFSYNC_UNLOCK(sc);
1322                 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1323
1324         case SIOCSETPFSYNC:
1325             {
1326                 struct ip_moptions *imo = &sc->sc_imo;
1327                 struct ifnet *sifp;
1328                 struct ip *ip;
1329                 void *mship = NULL;
1330
1331                 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1332                         return (error);
1333                 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1334                         return (error);
1335
1336                 if (pfsyncr.pfsyncr_maxupdates > 255)
1337                         return (EINVAL);
1338
1339                 if (pfsyncr.pfsyncr_syncdev[0] == 0)
1340                         sifp = NULL;
1341                 else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL)
1342                         return (EINVAL);
1343
1344                 if (sifp != NULL && (
1345                     pfsyncr.pfsyncr_syncpeer.s_addr == 0 ||
1346                     pfsyncr.pfsyncr_syncpeer.s_addr ==
1347                     htonl(INADDR_PFSYNC_GROUP)))
1348                         mship = malloc((sizeof(struct in_multi *) *
1349                             IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO);
1350
1351                 PFSYNC_LOCK(sc);
1352                 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1353                         sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
1354                 else
1355                         sc->sc_sync_peer.s_addr =
1356                             pfsyncr.pfsyncr_syncpeer.s_addr;
1357
1358                 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1359                 if (pfsyncr.pfsyncr_defer) {
1360                         sc->sc_flags |= PFSYNCF_DEFER;
1361                         pfsync_defer_ptr = pfsync_defer;
1362                 } else {
1363                         sc->sc_flags &= ~PFSYNCF_DEFER;
1364                         pfsync_defer_ptr = NULL;
1365                 }
1366
1367                 if (sifp == NULL) {
1368                         if (sc->sc_sync_if)
1369                                 if_rele(sc->sc_sync_if);
1370                         sc->sc_sync_if = NULL;
1371                         if (imo->imo_membership)
1372                                 pfsync_multicast_cleanup(sc);
1373                         PFSYNC_UNLOCK(sc);
1374                         break;
1375                 }
1376
1377                 if (sc->sc_len > PFSYNC_MINPKT &&
1378                     (sifp->if_mtu < sc->sc_ifp->if_mtu ||
1379                     (sc->sc_sync_if != NULL &&
1380                     sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1381                     sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
1382                         pfsync_sendout(1);
1383
1384                 if (imo->imo_membership)
1385                         pfsync_multicast_cleanup(sc);
1386
1387                 if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
1388                         error = pfsync_multicast_setup(sc, sifp, mship);
1389                         if (error) {
1390                                 if_rele(sifp);
1391                                 free(mship, M_PFSYNC);
1392                                 return (error);
1393                         }
1394                 }
1395                 if (sc->sc_sync_if)
1396                         if_rele(sc->sc_sync_if);
1397                 sc->sc_sync_if = sifp;
1398
1399                 ip = &sc->sc_template;
1400                 bzero(ip, sizeof(*ip));
1401                 ip->ip_v = IPVERSION;
1402                 ip->ip_hl = sizeof(sc->sc_template) >> 2;
1403                 ip->ip_tos = IPTOS_LOWDELAY;
1404                 /* len and id are set later. */
1405                 ip->ip_off = htons(IP_DF);
1406                 ip->ip_ttl = PFSYNC_DFLTTL;
1407                 ip->ip_p = IPPROTO_PFSYNC;
1408                 ip->ip_src.s_addr = INADDR_ANY;
1409                 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1410
1411                 /* Request a full state table update. */
1412                 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
1413                         (*carp_demote_adj_p)(V_pfsync_carp_adj,
1414                             "pfsync bulk start");
1415                 sc->sc_flags &= ~PFSYNCF_OK;
1416                 if (V_pf_status.debug >= PF_DEBUG_MISC)
1417                         printf("pfsync: requesting bulk update\n");
1418                 pfsync_request_update(0, 0);
1419                 PFSYNC_UNLOCK(sc);
1420                 PFSYNC_BLOCK(sc);
1421                 sc->sc_ureq_sent = time_uptime;
1422                 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail,
1423                     sc);
1424                 PFSYNC_BUNLOCK(sc);
1425
1426                 break;
1427             }
1428         default:
1429                 return (ENOTTY);
1430         }
1431
1432         return (0);
1433 }
1434
1435 static void
1436 pfsync_out_state(struct pf_state *st, void *buf)
1437 {
1438         struct pfsync_state *sp = buf;
1439
1440         pfsync_state_export(sp, st);
1441 }
1442
1443 static void
1444 pfsync_out_iack(struct pf_state *st, void *buf)
1445 {
1446         struct pfsync_ins_ack *iack = buf;
1447
1448         iack->id = st->id;
1449         iack->creatorid = st->creatorid;
1450 }
1451
1452 static void
1453 pfsync_out_upd_c(struct pf_state *st, void *buf)
1454 {
1455         struct pfsync_upd_c *up = buf;
1456
1457         bzero(up, sizeof(*up));
1458         up->id = st->id;
1459         pf_state_peer_hton(&st->src, &up->src);
1460         pf_state_peer_hton(&st->dst, &up->dst);
1461         up->creatorid = st->creatorid;
1462         up->timeout = st->timeout;
1463 }
1464
1465 static void
1466 pfsync_out_del(struct pf_state *st, void *buf)
1467 {
1468         struct pfsync_del_c *dp = buf;
1469
1470         dp->id = st->id;
1471         dp->creatorid = st->creatorid;
1472         st->state_flags |= PFSTATE_NOSYNC;
1473 }
1474
1475 static void
1476 pfsync_drop(struct pfsync_softc *sc)
1477 {
1478         struct pf_state *st, *next;
1479         struct pfsync_upd_req_item *ur;
1480         int q;
1481
1482         for (q = 0; q < PFSYNC_S_COUNT; q++) {
1483                 if (TAILQ_EMPTY(&sc->sc_qs[q]))
1484                         continue;
1485
1486                 TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) {
1487                         KASSERT(st->sync_state == q,
1488                                 ("%s: st->sync_state == q",
1489                                         __func__));
1490                         st->sync_state = PFSYNC_S_NONE;
1491                         pf_release_state(st);
1492                 }
1493                 TAILQ_INIT(&sc->sc_qs[q]);
1494         }
1495
1496         while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1497                 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1498                 free(ur, M_PFSYNC);
1499         }
1500
1501         sc->sc_plus = NULL;
1502         sc->sc_len = PFSYNC_MINPKT;
1503 }
1504
1505 static void
1506 pfsync_sendout(int schedswi)
1507 {
1508         struct pfsync_softc *sc = V_pfsyncif;
1509         struct ifnet *ifp = sc->sc_ifp;
1510         struct mbuf *m;
1511         struct ip *ip;
1512         struct pfsync_header *ph;
1513         struct pfsync_subheader *subh;
1514         struct pf_state *st, *st_next;
1515         struct pfsync_upd_req_item *ur;
1516         int offset;
1517         int q, count = 0;
1518
1519         KASSERT(sc != NULL, ("%s: null sc", __func__));
1520         KASSERT(sc->sc_len > PFSYNC_MINPKT,
1521             ("%s: sc_len %zu", __func__, sc->sc_len));
1522         PFSYNC_LOCK_ASSERT(sc);
1523
1524         if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
1525                 pfsync_drop(sc);
1526                 return;
1527         }
1528
1529         m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR);
1530         if (m == NULL) {
1531                 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
1532                 V_pfsyncstats.pfsyncs_onomem++;
1533                 return;
1534         }
1535         m->m_data += max_linkhdr;
1536         m->m_len = m->m_pkthdr.len = sc->sc_len;
1537
1538         /* build the ip header */
1539         ip = (struct ip *)m->m_data;
1540         bcopy(&sc->sc_template, ip, sizeof(*ip));
1541         offset = sizeof(*ip);
1542
1543         ip->ip_len = htons(m->m_pkthdr.len);
1544         ip_fillid(ip);
1545
1546         /* build the pfsync header */
1547         ph = (struct pfsync_header *)(m->m_data + offset);
1548         bzero(ph, sizeof(*ph));
1549         offset += sizeof(*ph);
1550
1551         ph->version = PFSYNC_VERSION;
1552         ph->len = htons(sc->sc_len - sizeof(*ip));
1553         bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1554
1555         /* walk the queues */
1556         for (q = 0; q < PFSYNC_S_COUNT; q++) {
1557                 if (TAILQ_EMPTY(&sc->sc_qs[q]))
1558                         continue;
1559
1560                 subh = (struct pfsync_subheader *)(m->m_data + offset);
1561                 offset += sizeof(*subh);
1562
1563                 count = 0;
1564                 TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, st_next) {
1565                         KASSERT(st->sync_state == q,
1566                                 ("%s: st->sync_state == q",
1567                                         __func__));
1568                         /*
1569                          * XXXGL: some of write methods do unlocked reads
1570                          * of state data :(
1571                          */
1572                         pfsync_qs[q].write(st, m->m_data + offset);
1573                         offset += pfsync_qs[q].len;
1574                         st->sync_state = PFSYNC_S_NONE;
1575                         pf_release_state(st);
1576                         count++;
1577                 }
1578                 TAILQ_INIT(&sc->sc_qs[q]);
1579
1580                 bzero(subh, sizeof(*subh));
1581                 subh->action = pfsync_qs[q].action;
1582                 subh->count = htons(count);
1583                 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
1584         }
1585
1586         if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
1587                 subh = (struct pfsync_subheader *)(m->m_data + offset);
1588                 offset += sizeof(*subh);
1589
1590                 count = 0;
1591                 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1592                         TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1593
1594                         bcopy(&ur->ur_msg, m->m_data + offset,
1595                             sizeof(ur->ur_msg));
1596                         offset += sizeof(ur->ur_msg);
1597                         free(ur, M_PFSYNC);
1598                         count++;
1599                 }
1600
1601                 bzero(subh, sizeof(*subh));
1602                 subh->action = PFSYNC_ACT_UPD_REQ;
1603                 subh->count = htons(count);
1604                 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count;
1605         }
1606
1607         /* has someone built a custom region for us to add? */
1608         if (sc->sc_plus != NULL) {
1609                 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
1610                 offset += sc->sc_pluslen;
1611
1612                 sc->sc_plus = NULL;
1613         }
1614
1615         subh = (struct pfsync_subheader *)(m->m_data + offset);
1616         offset += sizeof(*subh);
1617
1618         bzero(subh, sizeof(*subh));
1619         subh->action = PFSYNC_ACT_EOF;
1620         subh->count = htons(1);
1621         V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++;
1622
1623         /* we're done, let's put it on the wire */
1624         if (ifp->if_bpf) {
1625                 m->m_data += sizeof(*ip);
1626                 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
1627                 BPF_MTAP(ifp, m);
1628                 m->m_data -= sizeof(*ip);
1629                 m->m_len = m->m_pkthdr.len = sc->sc_len;
1630         }
1631
1632         if (sc->sc_sync_if == NULL) {
1633                 sc->sc_len = PFSYNC_MINPKT;
1634                 m_freem(m);
1635                 return;
1636         }
1637
1638         if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
1639         if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
1640         sc->sc_len = PFSYNC_MINPKT;
1641
1642         if (!_IF_QFULL(&sc->sc_ifp->if_snd))
1643                 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
1644         else {
1645                 m_freem(m);
1646                 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
1647         }
1648         if (schedswi)
1649                 swi_sched(V_pfsync_swi_cookie, 0);
1650 }
1651
1652 static void
1653 pfsync_insert_state(struct pf_state *st)
1654 {
1655         struct pfsync_softc *sc = V_pfsyncif;
1656
1657         if (st->state_flags & PFSTATE_NOSYNC)
1658                 return;
1659
1660         if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||
1661             st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1662                 st->state_flags |= PFSTATE_NOSYNC;
1663                 return;
1664         }
1665
1666         KASSERT(st->sync_state == PFSYNC_S_NONE,
1667                 ("%s: st->sync_state %u", __func__, st->sync_state));
1668
1669         PFSYNC_LOCK(sc);
1670         if (sc->sc_len == PFSYNC_MINPKT)
1671                 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
1672
1673         pfsync_q_ins(st, PFSYNC_S_INS, true);
1674         PFSYNC_UNLOCK(sc);
1675
1676         st->sync_updates = 0;
1677 }
1678
1679 static int
1680 pfsync_defer(struct pf_state *st, struct mbuf *m)
1681 {
1682         struct pfsync_softc *sc = V_pfsyncif;
1683         struct pfsync_deferral *pd;
1684
1685         if (m->m_flags & (M_BCAST|M_MCAST))
1686                 return (0);
1687
1688         PFSYNC_LOCK(sc);
1689
1690         if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) ||
1691             !(sc->sc_flags & PFSYNCF_DEFER)) {
1692                 PFSYNC_UNLOCK(sc);
1693                 return (0);
1694         }
1695
1696          if (sc->sc_deferred >= 128)
1697                 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
1698
1699         pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
1700         if (pd == NULL)
1701                 return (0);
1702         sc->sc_deferred++;
1703
1704         m->m_flags |= M_SKIP_FIREWALL;
1705         st->state_flags |= PFSTATE_ACK;
1706
1707         pd->pd_sc = sc;
1708         pd->pd_refs = 0;
1709         pd->pd_st = st;
1710         pf_ref_state(st);
1711         pd->pd_m = m;
1712
1713         TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
1714         callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1715         callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd);
1716
1717         pfsync_push(sc);
1718
1719         return (1);
1720 }
1721
1722 static void
1723 pfsync_undefer(struct pfsync_deferral *pd, int drop)
1724 {
1725         struct pfsync_softc *sc = pd->pd_sc;
1726         struct mbuf *m = pd->pd_m;
1727         struct pf_state *st = pd->pd_st;
1728
1729         PFSYNC_LOCK_ASSERT(sc);
1730
1731         TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
1732         sc->sc_deferred--;
1733         pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */
1734         free(pd, M_PFSYNC);
1735         pf_release_state(st);
1736
1737         if (drop)
1738                 m_freem(m);
1739         else {
1740                 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
1741                 pfsync_push(sc);
1742         }
1743 }
1744
1745 static void
1746 pfsync_defer_tmo(void *arg)
1747 {
1748         struct pfsync_deferral *pd = arg;
1749         struct pfsync_softc *sc = pd->pd_sc;
1750         struct mbuf *m = pd->pd_m;
1751         struct pf_state *st = pd->pd_st;
1752
1753         PFSYNC_LOCK_ASSERT(sc);
1754
1755         CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
1756
1757         TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
1758         sc->sc_deferred--;
1759         pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */
1760         if (pd->pd_refs == 0)
1761                 free(pd, M_PFSYNC);
1762         PFSYNC_UNLOCK(sc);
1763
1764         ip_output(m, NULL, NULL, 0, NULL, NULL);
1765
1766         pf_release_state(st);
1767
1768         CURVNET_RESTORE();
1769 }
1770
1771 static void
1772 pfsync_undefer_state(struct pf_state *st, int drop)
1773 {
1774         struct pfsync_softc *sc = V_pfsyncif;
1775         struct pfsync_deferral *pd;
1776
1777         PFSYNC_LOCK_ASSERT(sc);
1778
1779         TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
1780                  if (pd->pd_st == st) {
1781                         if (callout_stop(&pd->pd_tmo) > 0)
1782                                 pfsync_undefer(pd, drop);
1783                         return;
1784                 }
1785         }
1786
1787         panic("%s: unable to find deferred state", __func__);
1788 }
1789
1790 static void
1791 pfsync_update_state(struct pf_state *st)
1792 {
1793         struct pfsync_softc *sc = V_pfsyncif;
1794         bool sync = false, ref = true;
1795
1796         PF_STATE_LOCK_ASSERT(st);
1797         PFSYNC_LOCK(sc);
1798
1799         if (st->state_flags & PFSTATE_ACK)
1800                 pfsync_undefer_state(st, 0);
1801         if (st->state_flags & PFSTATE_NOSYNC) {
1802                 if (st->sync_state != PFSYNC_S_NONE)
1803                         pfsync_q_del(st, true);
1804                 PFSYNC_UNLOCK(sc);
1805                 return;
1806         }
1807
1808         if (sc->sc_len == PFSYNC_MINPKT)
1809                 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
1810
1811         switch (st->sync_state) {
1812         case PFSYNC_S_UPD_C:
1813         case PFSYNC_S_UPD:
1814         case PFSYNC_S_INS:
1815                 /* we're already handling it */
1816
1817                 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
1818                         st->sync_updates++;
1819                         if (st->sync_updates >= sc->sc_maxupdates)
1820                                 sync = true;
1821                 }
1822                 break;
1823
1824         case PFSYNC_S_IACK:
1825                 pfsync_q_del(st, false);
1826                 ref = false;
1827                 /* FALLTHROUGH */
1828
1829         case PFSYNC_S_NONE:
1830                 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref);
1831                 st->sync_updates = 0;
1832                 break;
1833
1834         default:
1835                 panic("%s: unexpected sync state %d", __func__, st->sync_state);
1836         }
1837
1838         if (sync || (time_uptime - st->pfsync_time) < 2)
1839                 pfsync_push(sc);
1840
1841         PFSYNC_UNLOCK(sc);
1842 }
1843
1844 static void
1845 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
1846 {
1847         struct pfsync_softc *sc = V_pfsyncif;
1848         struct pfsync_upd_req_item *item;
1849         size_t nlen = sizeof(struct pfsync_upd_req);
1850
1851         PFSYNC_LOCK_ASSERT(sc);
1852
1853         /*
1854          * This code does a bit to prevent multiple update requests for the
1855          * same state being generated. It searches current subheader queue,
1856          * but it doesn't lookup into queue of already packed datagrams.
1857          */
1858         TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry)
1859                 if (item->ur_msg.id == id &&
1860                     item->ur_msg.creatorid == creatorid)
1861                         return;
1862
1863         item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT);
1864         if (item == NULL)
1865                 return; /* XXX stats */
1866
1867         item->ur_msg.id = id;
1868         item->ur_msg.creatorid = creatorid;
1869
1870         if (TAILQ_EMPTY(&sc->sc_upd_req_list))
1871                 nlen += sizeof(struct pfsync_subheader);
1872
1873         if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
1874                 pfsync_sendout(1);
1875
1876                 nlen = sizeof(struct pfsync_subheader) +
1877                     sizeof(struct pfsync_upd_req);
1878         }
1879
1880         TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
1881         sc->sc_len += nlen;
1882 }
1883
1884 static void
1885 pfsync_update_state_req(struct pf_state *st)
1886 {
1887         struct pfsync_softc *sc = V_pfsyncif;
1888         bool ref = true;
1889
1890         PF_STATE_LOCK_ASSERT(st);
1891         PFSYNC_LOCK(sc);
1892
1893         if (st->state_flags & PFSTATE_NOSYNC) {
1894                 if (st->sync_state != PFSYNC_S_NONE)
1895                         pfsync_q_del(st, true);
1896                 PFSYNC_UNLOCK(sc);
1897                 return;
1898         }
1899
1900         switch (st->sync_state) {
1901         case PFSYNC_S_UPD_C:
1902         case PFSYNC_S_IACK:
1903                 pfsync_q_del(st, false);
1904                 ref = false;
1905                 /* FALLTHROUGH */
1906
1907         case PFSYNC_S_NONE:
1908                 pfsync_q_ins(st, PFSYNC_S_UPD, ref);
1909                 pfsync_push(sc);
1910                 break;
1911
1912         case PFSYNC_S_INS:
1913         case PFSYNC_S_UPD:
1914         case PFSYNC_S_DEL:
1915                 /* we're already handling it */
1916                 break;
1917
1918         default:
1919                 panic("%s: unexpected sync state %d", __func__, st->sync_state);
1920         }
1921
1922         PFSYNC_UNLOCK(sc);
1923 }
1924
1925 static void
1926 pfsync_delete_state(struct pf_state *st)
1927 {
1928         struct pfsync_softc *sc = V_pfsyncif;
1929         bool ref = true;
1930
1931         PFSYNC_LOCK(sc);
1932         if (st->state_flags & PFSTATE_ACK)
1933                 pfsync_undefer_state(st, 1);
1934         if (st->state_flags & PFSTATE_NOSYNC) {
1935                 if (st->sync_state != PFSYNC_S_NONE)
1936                         pfsync_q_del(st, true);
1937                 PFSYNC_UNLOCK(sc);
1938                 return;
1939         }
1940
1941         if (sc->sc_len == PFSYNC_MINPKT)
1942                 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
1943
1944         switch (st->sync_state) {
1945         case PFSYNC_S_INS:
1946                 /* We never got to tell the world so just forget about it. */
1947                 pfsync_q_del(st, true);
1948                 break;
1949
1950         case PFSYNC_S_UPD_C:
1951         case PFSYNC_S_UPD:
1952         case PFSYNC_S_IACK:
1953                 pfsync_q_del(st, false);
1954                 ref = false;
1955                 /* FALLTHROUGH */
1956
1957         case PFSYNC_S_NONE:
1958                 pfsync_q_ins(st, PFSYNC_S_DEL, ref);
1959                 break;
1960
1961         default:
1962                 panic("%s: unexpected sync state %d", __func__, st->sync_state);
1963         }
1964
1965         PFSYNC_UNLOCK(sc);
1966 }
1967
1968 static void
1969 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
1970 {
1971         struct pfsync_softc *sc = V_pfsyncif;
1972         struct {
1973                 struct pfsync_subheader subh;
1974                 struct pfsync_clr clr;
1975         } __packed r;
1976
1977         bzero(&r, sizeof(r));
1978
1979         r.subh.action = PFSYNC_ACT_CLR;
1980         r.subh.count = htons(1);
1981         V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++;
1982
1983         strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
1984         r.clr.creatorid = creatorid;
1985
1986         PFSYNC_LOCK(sc);
1987         pfsync_send_plus(&r, sizeof(r));
1988         PFSYNC_UNLOCK(sc);
1989 }
1990
1991 static void
1992 pfsync_q_ins(struct pf_state *st, int q, bool ref)
1993 {
1994         struct pfsync_softc *sc = V_pfsyncif;
1995         size_t nlen = pfsync_qs[q].len;
1996
1997         PFSYNC_LOCK_ASSERT(sc);
1998
1999         KASSERT(st->sync_state == PFSYNC_S_NONE,
2000                 ("%s: st->sync_state %u", __func__, st->sync_state));
2001         KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
2002             sc->sc_len));
2003
2004         if (TAILQ_EMPTY(&sc->sc_qs[q]))
2005                 nlen += sizeof(struct pfsync_subheader);
2006
2007         if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
2008                 pfsync_sendout(1);
2009
2010                 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2011         }
2012
2013         sc->sc_len += nlen;
2014         TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2015         st->sync_state = q;
2016         if (ref)
2017                 pf_ref_state(st);
2018 }
2019
2020 static void
2021 pfsync_q_del(struct pf_state *st, bool unref)
2022 {
2023         struct pfsync_softc *sc = V_pfsyncif;
2024         int q = st->sync_state;
2025
2026         PFSYNC_LOCK_ASSERT(sc);
2027         KASSERT(st->sync_state != PFSYNC_S_NONE,
2028                 ("%s: st->sync_state != PFSYNC_S_NONE", __func__));
2029
2030         sc->sc_len -= pfsync_qs[q].len;
2031         TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2032         st->sync_state = PFSYNC_S_NONE;
2033         if (unref)
2034                 pf_release_state(st);
2035
2036         if (TAILQ_EMPTY(&sc->sc_qs[q]))
2037                 sc->sc_len -= sizeof(struct pfsync_subheader);
2038 }
2039
2040 static void
2041 pfsync_bulk_start(void)
2042 {
2043         struct pfsync_softc *sc = V_pfsyncif;
2044
2045         if (V_pf_status.debug >= PF_DEBUG_MISC)
2046                 printf("pfsync: received bulk update request\n");
2047
2048         PFSYNC_BLOCK(sc);
2049
2050         sc->sc_ureq_received = time_uptime;
2051         sc->sc_bulk_hashid = 0;
2052         sc->sc_bulk_stateid = 0;
2053         pfsync_bulk_status(PFSYNC_BUS_START);
2054         callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
2055         PFSYNC_BUNLOCK(sc);
2056 }
2057
2058 static void
2059 pfsync_bulk_update(void *arg)
2060 {
2061         struct pfsync_softc *sc = arg;
2062         struct pf_state *s;
2063         int i, sent = 0;
2064
2065         PFSYNC_BLOCK_ASSERT(sc);
2066         CURVNET_SET(sc->sc_ifp->if_vnet);
2067
2068         /*
2069          * Start with last state from previous invocation.
2070          * It may had gone, in this case start from the
2071          * hash slot.
2072          */
2073         s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid);
2074
2075         if (s != NULL)
2076                 i = PF_IDHASH(s);
2077         else
2078                 i = sc->sc_bulk_hashid;
2079
2080         for (; i <= pf_hashmask; i++) {
2081                 struct pf_idhash *ih = &V_pf_idhash[i];
2082
2083                 if (s != NULL)
2084                         PF_HASHROW_ASSERT(ih);
2085                 else {
2086                         PF_HASHROW_LOCK(ih);
2087                         s = LIST_FIRST(&ih->states);
2088                 }
2089
2090                 for (; s; s = LIST_NEXT(s, entry)) {
2091
2092                         if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) <
2093                             sizeof(struct pfsync_state)) {
2094                                 /* We've filled a packet. */
2095                                 sc->sc_bulk_hashid = i;
2096                                 sc->sc_bulk_stateid = s->id;
2097                                 sc->sc_bulk_creatorid = s->creatorid;
2098                                 PF_HASHROW_UNLOCK(ih);
2099                                 callout_reset(&sc->sc_bulk_tmo, 1,
2100                                     pfsync_bulk_update, sc);
2101                                 goto full;
2102                         }
2103
2104                         if (s->sync_state == PFSYNC_S_NONE &&
2105                             s->timeout < PFTM_MAX &&
2106                             s->pfsync_time <= sc->sc_ureq_received) {
2107                                 pfsync_update_state_req(s);
2108                                 sent++;
2109                         }
2110                 }
2111                 PF_HASHROW_UNLOCK(ih);
2112         }
2113
2114         /* We're done. */
2115         pfsync_bulk_status(PFSYNC_BUS_END);
2116
2117 full:
2118         CURVNET_RESTORE();
2119 }
2120
2121 static void
2122 pfsync_bulk_status(u_int8_t status)
2123 {
2124         struct {
2125                 struct pfsync_subheader subh;
2126                 struct pfsync_bus bus;
2127         } __packed r;
2128
2129         struct pfsync_softc *sc = V_pfsyncif;
2130
2131         bzero(&r, sizeof(r));
2132
2133         r.subh.action = PFSYNC_ACT_BUS;
2134         r.subh.count = htons(1);
2135         V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++;
2136
2137         r.bus.creatorid = V_pf_status.hostid;
2138         r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2139         r.bus.status = status;
2140
2141         PFSYNC_LOCK(sc);
2142         pfsync_send_plus(&r, sizeof(r));
2143         PFSYNC_UNLOCK(sc);
2144 }
2145
2146 static void
2147 pfsync_bulk_fail(void *arg)
2148 {
2149         struct pfsync_softc *sc = arg;
2150
2151         CURVNET_SET(sc->sc_ifp->if_vnet);
2152
2153         PFSYNC_BLOCK_ASSERT(sc);
2154
2155         if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2156                 /* Try again */
2157                 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
2158                     pfsync_bulk_fail, V_pfsyncif);
2159                 PFSYNC_LOCK(sc);
2160                 pfsync_request_update(0, 0);
2161                 PFSYNC_UNLOCK(sc);
2162         } else {
2163                 /* Pretend like the transfer was ok. */
2164                 sc->sc_ureq_sent = 0;
2165                 sc->sc_bulk_tries = 0;
2166                 PFSYNC_LOCK(sc);
2167                 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
2168                         (*carp_demote_adj_p)(-V_pfsync_carp_adj,
2169                             "pfsync bulk fail");
2170                 sc->sc_flags |= PFSYNCF_OK;
2171                 PFSYNC_UNLOCK(sc);
2172                 if (V_pf_status.debug >= PF_DEBUG_MISC)
2173                         printf("pfsync: failed to receive bulk update\n");
2174         }
2175
2176         CURVNET_RESTORE();
2177 }
2178
2179 static void
2180 pfsync_send_plus(void *plus, size_t pluslen)
2181 {
2182         struct pfsync_softc *sc = V_pfsyncif;
2183
2184         PFSYNC_LOCK_ASSERT(sc);
2185
2186         if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu)
2187                 pfsync_sendout(1);
2188
2189         sc->sc_plus = plus;
2190         sc->sc_len += (sc->sc_pluslen = pluslen);
2191
2192         pfsync_sendout(1);
2193 }
2194
2195 static void
2196 pfsync_timeout(void *arg)
2197 {
2198         struct pfsync_softc *sc = arg;
2199
2200         CURVNET_SET(sc->sc_ifp->if_vnet);
2201         PFSYNC_LOCK(sc);
2202         pfsync_push(sc);
2203         PFSYNC_UNLOCK(sc);
2204         CURVNET_RESTORE();
2205 }
2206
2207 static void
2208 pfsync_push(struct pfsync_softc *sc)
2209 {
2210
2211         PFSYNC_LOCK_ASSERT(sc);
2212
2213         sc->sc_flags |= PFSYNCF_PUSH;
2214         swi_sched(V_pfsync_swi_cookie, 0);
2215 }
2216
2217 static void
2218 pfsyncintr(void *arg)
2219 {
2220         struct pfsync_softc *sc = arg;
2221         struct mbuf *m, *n;
2222
2223         CURVNET_SET(sc->sc_ifp->if_vnet);
2224
2225         PFSYNC_LOCK(sc);
2226         if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) {
2227                 pfsync_sendout(0);
2228                 sc->sc_flags &= ~PFSYNCF_PUSH;
2229         }
2230         _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m);
2231         PFSYNC_UNLOCK(sc);
2232
2233         for (; m != NULL; m = n) {
2234
2235                 n = m->m_nextpkt;
2236                 m->m_nextpkt = NULL;
2237
2238                 /*
2239                  * We distinguish between a deferral packet and our
2240                  * own pfsync packet based on M_SKIP_FIREWALL
2241                  * flag. This is XXX.
2242                  */
2243                 if (m->m_flags & M_SKIP_FIREWALL)
2244                         ip_output(m, NULL, NULL, 0, NULL, NULL);
2245                 else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
2246                     NULL) == 0)
2247                         V_pfsyncstats.pfsyncs_opackets++;
2248                 else
2249                         V_pfsyncstats.pfsyncs_oerrors++;
2250         }
2251         CURVNET_RESTORE();
2252 }
2253
2254 static int
2255 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship)
2256 {
2257         struct ip_moptions *imo = &sc->sc_imo;
2258         int error;
2259
2260         if (!(ifp->if_flags & IFF_MULTICAST))
2261                 return (EADDRNOTAVAIL);
2262
2263         imo->imo_membership = (struct in_multi **)mship;
2264         imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
2265         imo->imo_multicast_vif = -1;
2266
2267         if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL,
2268             &imo->imo_membership[0])) != 0) {
2269                 imo->imo_membership = NULL;
2270                 return (error);
2271         }
2272         imo->imo_num_memberships++;
2273         imo->imo_multicast_ifp = ifp;
2274         imo->imo_multicast_ttl = PFSYNC_DFLTTL;
2275         imo->imo_multicast_loop = 0;
2276
2277         return (0);
2278 }
2279
2280 static void
2281 pfsync_multicast_cleanup(struct pfsync_softc *sc)
2282 {
2283         struct ip_moptions *imo = &sc->sc_imo;
2284
2285         in_leavegroup(imo->imo_membership[0], NULL);
2286         free(imo->imo_membership, M_PFSYNC);
2287         imo->imo_membership = NULL;
2288         imo->imo_multicast_ifp = NULL;
2289 }
2290
2291 #ifdef INET
2292 extern  struct domain inetdomain;
2293 static struct protosw in_pfsync_protosw = {
2294         .pr_type =              SOCK_RAW,
2295         .pr_domain =            &inetdomain,
2296         .pr_protocol =          IPPROTO_PFSYNC,
2297         .pr_flags =             PR_ATOMIC|PR_ADDR,
2298         .pr_input =             pfsync_input,
2299         .pr_output =            rip_output,
2300         .pr_ctloutput =         rip_ctloutput,
2301         .pr_usrreqs =           &rip_usrreqs
2302 };
2303 #endif
2304
2305 static void
2306 pfsync_pointers_init()
2307 {
2308
2309         PF_RULES_WLOCK();
2310         pfsync_state_import_ptr = pfsync_state_import;
2311         pfsync_insert_state_ptr = pfsync_insert_state;
2312         pfsync_update_state_ptr = pfsync_update_state;
2313         pfsync_delete_state_ptr = pfsync_delete_state;
2314         pfsync_clear_states_ptr = pfsync_clear_states;
2315         pfsync_defer_ptr = pfsync_defer;
2316         PF_RULES_WUNLOCK();
2317 }
2318
2319 static void
2320 pfsync_pointers_uninit()
2321 {
2322
2323         PF_RULES_WLOCK();
2324         pfsync_state_import_ptr = NULL;
2325         pfsync_insert_state_ptr = NULL;
2326         pfsync_update_state_ptr = NULL;
2327         pfsync_delete_state_ptr = NULL;
2328         pfsync_clear_states_ptr = NULL;
2329         pfsync_defer_ptr = NULL;
2330         PF_RULES_WUNLOCK();
2331 }
2332
2333 static void
2334 vnet_pfsync_init(const void *unused __unused)
2335 {
2336         int error;
2337
2338         V_pfsync_cloner = if_clone_simple(pfsyncname,
2339             pfsync_clone_create, pfsync_clone_destroy, 1);
2340         error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif,
2341             SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
2342         if (error) {
2343                 if_clone_detach(V_pfsync_cloner);
2344                 log(LOG_INFO, "swi_add() failed in %s\n", __func__);
2345         }
2346 }
2347 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
2348     vnet_pfsync_init, NULL);
2349
2350 static void
2351 vnet_pfsync_uninit(const void *unused __unused)
2352 {
2353
2354         if_clone_detach(V_pfsync_cloner);
2355         swi_remove(V_pfsync_swi_cookie);
2356 }
2357 /*
2358  * Detach after pf is gone; otherwise we might touch pfsync memory
2359  * from within pf after freeing pfsync.
2360  */
2361 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND,
2362     vnet_pfsync_uninit, NULL);
2363
2364 static int
2365 pfsync_init()
2366 {
2367 #ifdef INET
2368         int error;
2369
2370         error = pf_proto_register(PF_INET, &in_pfsync_protosw);
2371         if (error)
2372                 return (error);
2373         error = ipproto_register(IPPROTO_PFSYNC);
2374         if (error) {
2375                 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
2376                 return (error);
2377         }
2378 #endif
2379         pfsync_pointers_init();
2380
2381         return (0);
2382 }
2383
2384 static void
2385 pfsync_uninit()
2386 {
2387
2388         pfsync_pointers_uninit();
2389
2390 #ifdef INET
2391         ipproto_unregister(IPPROTO_PFSYNC);
2392         pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
2393 #endif
2394 }
2395
2396 static int
2397 pfsync_modevent(module_t mod, int type, void *data)
2398 {
2399         int error = 0;
2400
2401         switch (type) {
2402         case MOD_LOAD:
2403                 error = pfsync_init();
2404                 break;
2405         case MOD_QUIESCE:
2406                 /*
2407                  * Module should not be unloaded due to race conditions.
2408                  */
2409                 error = EBUSY;
2410                 break;
2411         case MOD_UNLOAD:
2412                 pfsync_uninit();
2413                 break;
2414         default:
2415                 error = EINVAL;
2416                 break;
2417         }
2418
2419         return (error);
2420 }
2421
2422 static moduledata_t pfsync_mod = {
2423         pfsyncname,
2424         pfsync_modevent,
2425         0
2426 };
2427
2428 #define PFSYNC_MODVER 1
2429
2430 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */
2431 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
2432 MODULE_VERSION(pfsync, PFSYNC_MODVER);
2433 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);