]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/ip_carp.c
ssh: Update to OpenSSH 9.3p2
[FreeBSD/FreeBSD.git] / sys / netinet / ip_carp.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2002 Michael Shalayeff.
5  * Copyright (c) 2003 Ryan McBride.
6  * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
22  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28  * THE POSSIBILITY OF SUCH DAMAGE.
29  */
30
31 #include "opt_netlink.h"
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include "opt_bpf.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/devctl.h>
43 #include <sys/jail.h>
44 #include <sys/kernel.h>
45 #include <sys/limits.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/module.h>
49 #include <sys/priv.h>
50 #include <sys/proc.h>
51 #include <sys/socket.h>
52 #include <sys/sockio.h>
53 #include <sys/sysctl.h>
54 #include <sys/syslog.h>
55 #include <sys/taskqueue.h>
56 #include <sys/counter.h>
57
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_dl.h>
62 #include <net/if_llatbl.h>
63 #include <net/if_private.h>
64 #include <net/if_types.h>
65 #include <net/route.h>
66 #include <net/vnet.h>
67
68 #if defined(INET) || defined(INET6)
69 #include <netinet/in.h>
70 #include <netinet/in_var.h>
71 #include <netinet/ip_carp.h>
72 #include <netinet/ip_carp_nl.h>
73 #include <netinet/ip.h>
74 #include <machine/in_cksum.h>
75 #endif
76 #ifdef INET
77 #include <netinet/ip_var.h>
78 #include <netinet/if_ether.h>
79 #endif
80
81 #ifdef INET6
82 #include <netinet/icmp6.h>
83 #include <netinet/ip6.h>
84 #include <netinet6/in6_var.h>
85 #include <netinet6/ip6_var.h>
86 #include <netinet6/scope6_var.h>
87 #include <netinet6/nd6.h>
88 #endif
89
90 #include <netlink/netlink.h>
91 #include <netlink/netlink_ctl.h>
92 #include <netlink/netlink_generic.h>
93 #include <netlink/netlink_message_parser.h>
94
95 #include <crypto/sha1.h>
96
97 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
98
99 struct carp_softc {
100         struct ifnet            *sc_carpdev;    /* Pointer to parent ifnet. */
101         struct ifaddr           **sc_ifas;      /* Our ifaddrs. */
102         struct sockaddr_dl      sc_addr;        /* Our link level address. */
103         struct callout          sc_ad_tmo;      /* Advertising timeout. */
104 #ifdef INET
105         struct callout          sc_md_tmo;      /* Master down timeout. */
106 #endif
107 #ifdef INET6
108         struct callout          sc_md6_tmo;     /* XXX: Master down timeout. */
109 #endif
110         struct mtx              sc_mtx;
111
112         int                     sc_vhid;
113         int                     sc_advskew;
114         int                     sc_advbase;
115         struct in_addr          sc_carpaddr;
116         struct in6_addr         sc_carpaddr6;
117
118         int                     sc_naddrs;
119         int                     sc_naddrs6;
120         int                     sc_ifasiz;
121         enum { INIT = 0, BACKUP, MASTER }       sc_state;
122         int                     sc_suppress;
123         int                     sc_sendad_errors;
124 #define CARP_SENDAD_MAX_ERRORS  3
125         int                     sc_sendad_success;
126 #define CARP_SENDAD_MIN_SUCCESS 3
127
128         int                     sc_init_counter;
129         uint64_t                sc_counter;
130
131         /* authentication */
132 #define CARP_HMAC_PAD   64
133         unsigned char sc_key[CARP_KEY_LEN];
134         unsigned char sc_pad[CARP_HMAC_PAD];
135         SHA1_CTX sc_sha1;
136
137         TAILQ_ENTRY(carp_softc) sc_list;        /* On the carp_if list. */
138         LIST_ENTRY(carp_softc)  sc_next;        /* On the global list. */
139 };
140
141 struct carp_if {
142 #ifdef INET
143         int     cif_naddrs;
144 #endif
145 #ifdef INET6
146         int     cif_naddrs6;
147 #endif
148         TAILQ_HEAD(, carp_softc) cif_vrs;
149 #ifdef INET
150         struct ip_moptions       cif_imo;
151 #endif
152 #ifdef INET6
153         struct ip6_moptions      cif_im6o;
154 #endif
155         struct ifnet    *cif_ifp;
156         struct mtx      cif_mtx;
157         uint32_t        cif_flags;
158 #define CIF_PROMISC     0x00000001
159 };
160
161 /* Kernel equivalent of struct carpreq, but with more fields for new features.
162  * */
163 struct carpkreq {
164         int             carpr_count;
165         int             carpr_vhid;
166         int             carpr_state;
167         int             carpr_advskew;
168         int             carpr_advbase;
169         unsigned char   carpr_key[CARP_KEY_LEN];
170         /* Everything above this is identical to carpreq */
171         struct in_addr  carpr_addr;
172         struct in6_addr carpr_addr6;
173 };
174
175 /*
176  * Brief design of carp(4).
177  *
178  * Any carp-capable ifnet may have a list of carp softcs hanging off
179  * its ifp->if_carp pointer. Each softc represents one unique virtual
180  * host id, or vhid. The softc has a back pointer to the ifnet. All
181  * softcs are joined in a global list, which has quite limited use.
182  *
183  * Any interface address that takes part in CARP negotiation has a
184  * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
185  * AF_INET or AF_INET6 address.
186  *
187  * Although, one can get the softc's backpointer to ifnet and traverse
188  * through its ifp->if_addrhead queue to find all interface addresses
189  * involved in CARP, we keep a growable array of ifaddr pointers. This
190  * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
191  * do calls into the network stack, thus avoiding LORs.
192  *
193  * Locking:
194  *
195  * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
196  * callout-driven events and ioctl()s.
197  *
198  * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx.
199  * To traverse the global list we use the mutex carp_mtx.
200  *
201  * Known issues with locking:
202  *
203  * - Sending ad, we put the pointer to the softc in an mtag, and no reference
204  *   counting is done on the softc.
205  * - On module unload we may race (?) with packet processing thread
206  *   dereferencing our function pointers.
207  */
208
209 /* Accept incoming CARP packets. */
210 VNET_DEFINE_STATIC(int, carp_allow) = 1;
211 #define V_carp_allow    VNET(carp_allow)
212
213 /* Set DSCP in outgoing CARP packets. */
214 VNET_DEFINE_STATIC(int, carp_dscp) = 56;
215 #define V_carp_dscp     VNET(carp_dscp)
216
217 /* Preempt slower nodes. */
218 VNET_DEFINE_STATIC(int, carp_preempt) = 0;
219 #define V_carp_preempt  VNET(carp_preempt)
220
221 /* Log level. */
222 VNET_DEFINE_STATIC(int, carp_log) = 1;
223 #define V_carp_log      VNET(carp_log)
224
225 /* Global advskew demotion. */
226 VNET_DEFINE_STATIC(int, carp_demotion) = 0;
227 #define V_carp_demotion VNET(carp_demotion)
228
229 /* Send error demotion factor. */
230 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW;
231 #define V_carp_senderr_adj      VNET(carp_senderr_adj)
232
233 /* Iface down demotion factor. */
234 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW;
235 #define V_carp_ifdown_adj       VNET(carp_ifdown_adj)
236
237 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS);
238 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS);
239 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
240
241 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
242     "CARP");
243 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow,
244     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
245     &VNET_NAME(carp_allow), 0, carp_allow_sysctl, "I",
246     "Accept incoming CARP packets");
247 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp,
248     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
249     0, 0, carp_dscp_sysctl, "I",
250     "DSCP value for carp packets");
251 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
252     &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
253 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
254     &VNET_NAME(carp_log), 0, "CARP log level");
255 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion,
256     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
257     0, 0, carp_demote_adj_sysctl, "I",
258     "Adjust demotion factor (skew of advskew)");
259 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor,
260     CTLFLAG_VNET | CTLFLAG_RW,
261     &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment");
262 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor,
263     CTLFLAG_VNET | CTLFLAG_RW,
264     &VNET_NAME(carp_ifdown_adj), 0,
265     "Interface down demotion factor adjustment");
266
267 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
268 VNET_PCPUSTAT_SYSINIT(carpstats);
269 VNET_PCPUSTAT_SYSUNINIT(carpstats);
270
271 #define CARPSTATS_ADD(name, val)        \
272     counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
273         sizeof(uint64_t)], (val))
274 #define CARPSTATS_INC(name)             CARPSTATS_ADD(name, 1)
275
276 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
277     carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
278
279 #define CARP_LOCK_INIT(sc)      mtx_init(&(sc)->sc_mtx, "carp_softc",   \
280         NULL, MTX_DEF)
281 #define CARP_LOCK_DESTROY(sc)   mtx_destroy(&(sc)->sc_mtx)
282 #define CARP_LOCK_ASSERT(sc)    mtx_assert(&(sc)->sc_mtx, MA_OWNED)
283 #define CARP_LOCK(sc)           mtx_lock(&(sc)->sc_mtx)
284 #define CARP_UNLOCK(sc)         mtx_unlock(&(sc)->sc_mtx)
285 #define CIF_LOCK_INIT(cif)      mtx_init(&(cif)->cif_mtx, "carp_if",   \
286         NULL, MTX_DEF)
287 #define CIF_LOCK_DESTROY(cif)   mtx_destroy(&(cif)->cif_mtx)
288 #define CIF_LOCK_ASSERT(cif)    mtx_assert(&(cif)->cif_mtx, MA_OWNED)
289 #define CIF_LOCK(cif)           mtx_lock(&(cif)->cif_mtx)
290 #define CIF_UNLOCK(cif)         mtx_unlock(&(cif)->cif_mtx)
291 #define CIF_FREE(cif)   do {                            \
292                 CIF_LOCK(cif);                          \
293                 if (TAILQ_EMPTY(&(cif)->cif_vrs))       \
294                         carp_free_if(cif);              \
295                 else                                    \
296                         CIF_UNLOCK(cif);                \
297 } while (0)
298
299 #define CARP_LOG(...)   do {                            \
300         if (V_carp_log > 0)                             \
301                 log(LOG_INFO, "carp: " __VA_ARGS__);    \
302 } while (0)
303
304 #define CARP_DEBUG(...) do {                            \
305         if (V_carp_log > 1)                             \
306                 log(LOG_DEBUG, __VA_ARGS__);            \
307 } while (0)
308
309 #define IFNET_FOREACH_IFA(ifp, ifa)                                     \
310         CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \
311                 if ((ifa)->ifa_carp != NULL)
312
313 #define CARP_FOREACH_IFA(sc, ifa)                                       \
314         CARP_LOCK_ASSERT(sc);                                           \
315         for (int _i = 0;                                                \
316                 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 &&              \
317                 ((ifa) = sc->sc_ifas[_i]) != NULL;                      \
318                 ++_i)
319
320 #define IFNET_FOREACH_CARP(ifp, sc)                                     \
321         KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) ||                    \
322             sx_xlocked(&carp_sx), ("cif_vrs not locked"));              \
323         TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
324
325 #define DEMOTE_ADVSKEW(sc)                                      \
326     (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ?      \
327     CARP_MAXSKEW :                                              \
328         (((sc)->sc_advskew + V_carp_demotion < 0) ?             \
329         0 : ((sc)->sc_advskew + V_carp_demotion)))
330
331 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int);
332 static struct carp_softc
333                 *carp_alloc(struct ifnet *);
334 static void     carp_destroy(struct carp_softc *);
335 static struct carp_if
336                 *carp_alloc_if(struct ifnet *);
337 static void     carp_free_if(struct carp_if *);
338 static void     carp_set_state(struct carp_softc *, int, const char* reason);
339 static void     carp_sc_state(struct carp_softc *);
340 static void     carp_setrun(struct carp_softc *, sa_family_t);
341 static void     carp_master_down(void *);
342 static void     carp_master_down_locked(struct carp_softc *,
343                     const char* reason);
344 static void     carp_send_ad(void *);
345 static void     carp_send_ad_locked(struct carp_softc *);
346 static void     carp_addroute(struct carp_softc *);
347 static void     carp_ifa_addroute(struct ifaddr *);
348 static void     carp_delroute(struct carp_softc *);
349 static void     carp_ifa_delroute(struct ifaddr *);
350 static void     carp_send_ad_all(void *, int);
351 static void     carp_demote_adj(int, char *);
352
353 static LIST_HEAD(, carp_softc) carp_list;
354 static struct mtx carp_mtx;
355 static struct sx carp_sx;
356 static struct task carp_sendall_task =
357     TASK_INITIALIZER(0, carp_send_ad_all, NULL);
358
359 static int
360 carp_is_supported_if(if_t ifp)
361 {
362         if (ifp == NULL)
363                 return (ENXIO);
364
365         switch (ifp->if_type) {
366         case IFT_ETHER:
367         case IFT_L2VLAN:
368         case IFT_BRIDGE:
369                 break;
370         default:
371                 return (EOPNOTSUPP);
372         }
373
374         return (0);
375 }
376
377 static void
378 carp_hmac_prepare(struct carp_softc *sc)
379 {
380         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
381         uint8_t vhid = sc->sc_vhid & 0xff;
382         struct ifaddr *ifa;
383         int i, found;
384 #ifdef INET
385         struct in_addr last, cur, in;
386 #endif
387 #ifdef INET6
388         struct in6_addr last6, cur6, in6;
389 #endif
390
391         CARP_LOCK_ASSERT(sc);
392
393         /* Compute ipad from key. */
394         bzero(sc->sc_pad, sizeof(sc->sc_pad));
395         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
396         for (i = 0; i < sizeof(sc->sc_pad); i++)
397                 sc->sc_pad[i] ^= 0x36;
398
399         /* Precompute first part of inner hash. */
400         SHA1Init(&sc->sc_sha1);
401         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
402         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
403         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
404         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
405 #ifdef INET
406         cur.s_addr = 0;
407         do {
408                 found = 0;
409                 last = cur;
410                 cur.s_addr = 0xffffffff;
411                 CARP_FOREACH_IFA(sc, ifa) {
412                         in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
413                         if (ifa->ifa_addr->sa_family == AF_INET &&
414                             ntohl(in.s_addr) > ntohl(last.s_addr) &&
415                             ntohl(in.s_addr) < ntohl(cur.s_addr)) {
416                                 cur.s_addr = in.s_addr;
417                                 found++;
418                         }
419                 }
420                 if (found)
421                         SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
422         } while (found);
423 #endif /* INET */
424 #ifdef INET6
425         memset(&cur6, 0, sizeof(cur6));
426         do {
427                 found = 0;
428                 last6 = cur6;
429                 memset(&cur6, 0xff, sizeof(cur6));
430                 CARP_FOREACH_IFA(sc, ifa) {
431                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
432                         if (IN6_IS_SCOPE_EMBED(&in6))
433                                 in6.s6_addr16[1] = 0;
434                         if (ifa->ifa_addr->sa_family == AF_INET6 &&
435                             memcmp(&in6, &last6, sizeof(in6)) > 0 &&
436                             memcmp(&in6, &cur6, sizeof(in6)) < 0) {
437                                 cur6 = in6;
438                                 found++;
439                         }
440                 }
441                 if (found)
442                         SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
443         } while (found);
444 #endif /* INET6 */
445
446         /* convert ipad to opad */
447         for (i = 0; i < sizeof(sc->sc_pad); i++)
448                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
449 }
450
451 static void
452 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
453     unsigned char md[20])
454 {
455         SHA1_CTX sha1ctx;
456
457         CARP_LOCK_ASSERT(sc);
458
459         /* fetch first half of inner hash */
460         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
461
462         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
463         SHA1Final(md, &sha1ctx);
464
465         /* outer hash */
466         SHA1Init(&sha1ctx);
467         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
468         SHA1Update(&sha1ctx, md, 20);
469         SHA1Final(md, &sha1ctx);
470 }
471
472 static int
473 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
474     unsigned char md[20])
475 {
476         unsigned char md2[20];
477
478         CARP_LOCK_ASSERT(sc);
479
480         carp_hmac_generate(sc, counter, md2);
481
482         return (bcmp(md, md2, sizeof(md2)));
483 }
484
485 /*
486  * process input packet.
487  * we have rearranged checks order compared to the rfc,
488  * but it seems more efficient this way or not possible otherwise.
489  */
490 #ifdef INET
491 static int
492 carp_input(struct mbuf **mp, int *offp, int proto)
493 {
494         struct mbuf *m = *mp;
495         struct ip *ip = mtod(m, struct ip *);
496         struct carp_header *ch;
497         int iplen, len;
498
499         iplen = *offp;
500         *mp = NULL;
501
502         CARPSTATS_INC(carps_ipackets);
503
504         if (!V_carp_allow) {
505                 m_freem(m);
506                 return (IPPROTO_DONE);
507         }
508
509         iplen = ip->ip_hl << 2;
510
511         if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
512                 CARPSTATS_INC(carps_badlen);
513                 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
514                     "on %s\n", __func__, m->m_len - sizeof(struct ip),
515                     if_name(m->m_pkthdr.rcvif));
516                 m_freem(m);
517                 return (IPPROTO_DONE);
518         }
519
520         if (iplen + sizeof(*ch) < m->m_len) {
521                 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
522                         CARPSTATS_INC(carps_hdrops);
523                         CARP_DEBUG("%s: pullup failed\n", __func__);
524                         return (IPPROTO_DONE);
525                 }
526                 ip = mtod(m, struct ip *);
527         }
528         ch = (struct carp_header *)((char *)ip + iplen);
529
530         /*
531          * verify that the received packet length is
532          * equal to the CARP header
533          */
534         len = iplen + sizeof(*ch);
535         if (len > m->m_pkthdr.len) {
536                 CARPSTATS_INC(carps_badlen);
537                 CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
538                     m->m_pkthdr.len,
539                     if_name(m->m_pkthdr.rcvif));
540                 m_freem(m);
541                 return (IPPROTO_DONE);
542         }
543
544         if ((m = m_pullup(m, len)) == NULL) {
545                 CARPSTATS_INC(carps_hdrops);
546                 return (IPPROTO_DONE);
547         }
548         ip = mtod(m, struct ip *);
549         ch = (struct carp_header *)((char *)ip + iplen);
550
551         /* verify the CARP checksum */
552         m->m_data += iplen;
553         if (in_cksum(m, len - iplen)) {
554                 CARPSTATS_INC(carps_badsum);
555                 CARP_DEBUG("%s: checksum failed on %s\n", __func__,
556                     if_name(m->m_pkthdr.rcvif));
557                 m_freem(m);
558                 return (IPPROTO_DONE);
559         }
560         m->m_data -= iplen;
561
562         carp_input_c(m, ch, AF_INET, ip->ip_ttl);
563         return (IPPROTO_DONE);
564 }
565 #endif
566
567 #ifdef INET6
568 static int
569 carp6_input(struct mbuf **mp, int *offp, int proto)
570 {
571         struct mbuf *m = *mp;
572         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
573         struct carp_header *ch;
574         u_int len;
575
576         CARPSTATS_INC(carps_ipackets6);
577
578         if (!V_carp_allow) {
579                 m_freem(m);
580                 return (IPPROTO_DONE);
581         }
582
583         /* check if received on a valid carp interface */
584         if (m->m_pkthdr.rcvif->if_carp == NULL) {
585                 CARPSTATS_INC(carps_badif);
586                 CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
587                     __func__, if_name(m->m_pkthdr.rcvif));
588                 m_freem(m);
589                 return (IPPROTO_DONE);
590         }
591
592         /* verify that we have a complete carp packet */
593         if (m->m_len < *offp + sizeof(*ch)) {
594                 len = m->m_len;
595                 m = m_pullup(m, *offp + sizeof(*ch));
596                 if (m == NULL) {
597                         CARPSTATS_INC(carps_badlen);
598                         CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
599                         return (IPPROTO_DONE);
600                 }
601                 ip6 = mtod(m, struct ip6_hdr *);
602         }
603         ch = (struct carp_header *)(mtod(m, char *) + *offp);
604
605         /* verify the CARP checksum */
606         m->m_data += *offp;
607         if (in_cksum(m, sizeof(*ch))) {
608                 CARPSTATS_INC(carps_badsum);
609                 CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
610                     if_name(m->m_pkthdr.rcvif));
611                 m_freem(m);
612                 return (IPPROTO_DONE);
613         }
614         m->m_data -= *offp;
615
616         carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim);
617         return (IPPROTO_DONE);
618 }
619 #endif /* INET6 */
620
621 /*
622  * This routine should not be necessary at all, but some switches
623  * (VMWare ESX vswitches) can echo our own packets back at us,
624  * and we must ignore them or they will cause us to drop out of
625  * MASTER mode.
626  *
627  * We cannot catch all cases of network loops.  Instead, what we
628  * do here is catch any packet that arrives with a carp header
629  * with a VHID of 0, that comes from an address that is our own.
630  * These packets are by definition "from us" (even if they are from
631  * a misconfigured host that is pretending to be us).
632  *
633  * The VHID test is outside this mini-function.
634  */
635 static int
636 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af)
637 {
638 #ifdef INET
639         struct ip *ip4;
640         struct in_addr in4;
641 #endif
642 #ifdef INET6
643         struct ip6_hdr *ip6;
644         struct in6_addr in6;
645 #endif
646
647         switch (af) {
648 #ifdef INET
649         case AF_INET:
650                 ip4 = mtod(m, struct ip *);
651                 in4 = ifatoia(ifa)->ia_addr.sin_addr;
652                 return (in4.s_addr == ip4->ip_src.s_addr);
653 #endif
654 #ifdef INET6
655         case AF_INET6:
656                 ip6 = mtod(m, struct ip6_hdr *);
657                 in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
658                 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0);
659 #endif
660         default:
661                 break;
662         }
663         return (0);
664 }
665
666 static void
667 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
668 {
669         struct ifnet *ifp = m->m_pkthdr.rcvif;
670         struct ifaddr *ifa, *match;
671         struct carp_softc *sc;
672         uint64_t tmp_counter;
673         struct timeval sc_tv, ch_tv;
674         int error;
675         bool multicast = false;
676
677         NET_EPOCH_ASSERT();
678
679         /*
680          * Verify that the VHID is valid on the receiving interface.
681          *
682          * There should be just one match.  If there are none
683          * the VHID is not valid and we drop the packet.  If
684          * there are multiple VHID matches, take just the first
685          * one, for compatibility with previous code.  While we're
686          * scanning, check for obvious loops in the network topology
687          * (these should never happen, and as noted above, we may
688          * miss real loops; this is just a double-check).
689          */
690         error = 0;
691         match = NULL;
692         IFNET_FOREACH_IFA(ifp, ifa) {
693                 if (match == NULL && ifa->ifa_carp != NULL &&
694                     ifa->ifa_addr->sa_family == af &&
695                     ifa->ifa_carp->sc_vhid == ch->carp_vhid)
696                         match = ifa;
697                 if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af))
698                         error = ELOOP;
699         }
700         ifa = error ? NULL : match;
701         if (ifa != NULL)
702                 ifa_ref(ifa);
703
704         if (ifa == NULL) {
705                 if (error == ELOOP) {
706                         CARP_DEBUG("dropping looped packet on interface %s\n",
707                             if_name(ifp));
708                         CARPSTATS_INC(carps_badif);     /* ??? */
709                 } else {
710                         CARPSTATS_INC(carps_badvhid);
711                 }
712                 m_freem(m);
713                 return;
714         }
715
716         /* verify the CARP version. */
717         if (ch->carp_version != CARP_VERSION) {
718                 CARPSTATS_INC(carps_badver);
719                 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp),
720                     ch->carp_version);
721                 ifa_free(ifa);
722                 m_freem(m);
723                 return;
724         }
725
726         sc = ifa->ifa_carp;
727         CARP_LOCK(sc);
728         if (ifa->ifa_addr->sa_family == AF_INET) {
729                 multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr);
730         } else {
731                 multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6);
732         }
733         ifa_free(ifa);
734
735         /* verify that the IP TTL is 255, but only if we're not in unicast mode. */
736         if (multicast && ttl != CARP_DFLTTL) {
737                 CARPSTATS_INC(carps_badttl);
738                 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
739                     ttl, if_name(m->m_pkthdr.rcvif));
740                 goto out;
741         }
742
743         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
744                 CARPSTATS_INC(carps_badauth);
745                 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
746                     sc->sc_vhid, if_name(ifp));
747                 goto out;
748         }
749
750         tmp_counter = ntohl(ch->carp_counter[0]);
751         tmp_counter = tmp_counter<<32;
752         tmp_counter += ntohl(ch->carp_counter[1]);
753
754         /* XXX Replay protection goes here */
755
756         sc->sc_init_counter = 0;
757         sc->sc_counter = tmp_counter;
758
759         sc_tv.tv_sec = sc->sc_advbase;
760         sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
761         ch_tv.tv_sec = ch->carp_advbase;
762         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
763
764         switch (sc->sc_state) {
765         case INIT:
766                 break;
767         case MASTER:
768                 /*
769                  * If we receive an advertisement from a master who's going to
770                  * be more frequent than us, go into BACKUP state.
771                  */
772                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
773                     timevalcmp(&sc_tv, &ch_tv, ==)) {
774                         callout_stop(&sc->sc_ad_tmo);
775                         carp_set_state(sc, BACKUP,
776                             "more frequent advertisement received");
777                         carp_setrun(sc, 0);
778                         carp_delroute(sc);
779                 }
780                 break;
781         case BACKUP:
782                 /*
783                  * If we're pre-empting masters who advertise slower than us,
784                  * and this one claims to be slower, treat him as down.
785                  */
786                 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
787                         carp_master_down_locked(sc,
788                             "preempting a slower master");
789                         break;
790                 }
791
792                 /*
793                  *  If the master is going to advertise at such a low frequency
794                  *  that he's guaranteed to time out, we'd might as well just
795                  *  treat him as timed out now.
796                  */
797                 sc_tv.tv_sec = sc->sc_advbase * 3;
798                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
799                         carp_master_down_locked(sc, "master will time out");
800                         break;
801                 }
802
803                 /*
804                  * Otherwise, we reset the counter and wait for the next
805                  * advertisement.
806                  */
807                 carp_setrun(sc, af);
808                 break;
809         }
810
811 out:
812         CARP_UNLOCK(sc);
813         m_freem(m);
814 }
815
816 static int
817 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
818 {
819         struct m_tag *mtag;
820
821         if (sc->sc_init_counter) {
822                 /* this could also be seconds since unix epoch */
823                 sc->sc_counter = arc4random();
824                 sc->sc_counter = sc->sc_counter << 32;
825                 sc->sc_counter += arc4random();
826         } else
827                 sc->sc_counter++;
828
829         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
830         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
831
832         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
833
834         /* Tag packet for carp_output */
835         if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
836             M_NOWAIT)) == NULL) {
837                 m_freem(m);
838                 CARPSTATS_INC(carps_onomem);
839                 return (ENOMEM);
840         }
841         bcopy(&sc, mtag + 1, sizeof(sc));
842         m_tag_prepend(m, mtag);
843
844         return (0);
845 }
846
847 /*
848  * To avoid LORs and possible recursions this function shouldn't
849  * be called directly, but scheduled via taskqueue.
850  */
851 static void
852 carp_send_ad_all(void *ctx __unused, int pending __unused)
853 {
854         struct carp_softc *sc;
855         struct epoch_tracker et;
856
857         NET_EPOCH_ENTER(et);
858         mtx_lock(&carp_mtx);
859         LIST_FOREACH(sc, &carp_list, sc_next)
860                 if (sc->sc_state == MASTER) {
861                         CARP_LOCK(sc);
862                         CURVNET_SET(sc->sc_carpdev->if_vnet);
863                         carp_send_ad_locked(sc);
864                         CURVNET_RESTORE();
865                         CARP_UNLOCK(sc);
866                 }
867         mtx_unlock(&carp_mtx);
868         NET_EPOCH_EXIT(et);
869 }
870
871 /* Send a periodic advertisement, executed in callout context. */
872 static void
873 carp_send_ad(void *v)
874 {
875         struct carp_softc *sc = v;
876         struct epoch_tracker et;
877
878         NET_EPOCH_ENTER(et);
879         CARP_LOCK_ASSERT(sc);
880         CURVNET_SET(sc->sc_carpdev->if_vnet);
881         carp_send_ad_locked(sc);
882         CURVNET_RESTORE();
883         CARP_UNLOCK(sc);
884         NET_EPOCH_EXIT(et);
885 }
886
887 static void
888 carp_send_ad_error(struct carp_softc *sc, int error)
889 {
890
891         /*
892          * We track errors and successfull sends with this logic:
893          * - Any error resets success counter to 0.
894          * - MAX_ERRORS triggers demotion.
895          * - MIN_SUCCESS successes resets error counter to 0.
896          * - MIN_SUCCESS reverts demotion, if it was triggered before.
897          */
898         if (error) {
899                 if (sc->sc_sendad_errors < INT_MAX)
900                         sc->sc_sendad_errors++;
901                 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
902                         static const char fmt[] = "send error %d on %s";
903                         char msg[sizeof(fmt) + IFNAMSIZ];
904
905                         sprintf(msg, fmt, error, if_name(sc->sc_carpdev));
906                         carp_demote_adj(V_carp_senderr_adj, msg);
907                 }
908                 sc->sc_sendad_success = 0;
909         } else if (sc->sc_sendad_errors > 0) {
910                 if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) {
911                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
912                                 static const char fmt[] = "send ok on %s";
913                                 char msg[sizeof(fmt) + IFNAMSIZ];
914
915                                 sprintf(msg, fmt, if_name(sc->sc_carpdev));
916                                 carp_demote_adj(-V_carp_senderr_adj, msg);
917                         }
918                         sc->sc_sendad_errors = 0;
919                 }
920         }
921 }
922
923 /*
924  * Pick the best ifaddr on the given ifp for sending CARP
925  * advertisements.
926  *
927  * "Best" here is defined by ifa_preferred().  This function is much
928  * much like ifaof_ifpforaddr() except that we just use ifa_preferred().
929  *
930  * (This could be simplified to return the actual address, except that
931  * it has a different format in AF_INET and AF_INET6.)
932  */
933 static struct ifaddr *
934 carp_best_ifa(int af, struct ifnet *ifp)
935 {
936         struct ifaddr *ifa, *best;
937
938         NET_EPOCH_ASSERT();
939
940         if (af >= AF_MAX)
941                 return (NULL);
942         best = NULL;
943         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
944                 if (ifa->ifa_addr->sa_family == af &&
945                     (best == NULL || ifa_preferred(best, ifa)))
946                         best = ifa;
947         }
948         if (best != NULL)
949                 ifa_ref(best);
950         return (best);
951 }
952
953 static void
954 carp_send_ad_locked(struct carp_softc *sc)
955 {
956         struct carp_header ch;
957         struct timeval tv;
958         struct ifaddr *ifa;
959         struct carp_header *ch_ptr;
960         struct mbuf *m;
961         int len, advskew;
962
963         NET_EPOCH_ASSERT();
964         CARP_LOCK_ASSERT(sc);
965
966         advskew = DEMOTE_ADVSKEW(sc);
967         tv.tv_sec = sc->sc_advbase;
968         tv.tv_usec = advskew * 1000000 / 256;
969
970         ch.carp_version = CARP_VERSION;
971         ch.carp_type = CARP_ADVERTISEMENT;
972         ch.carp_vhid = sc->sc_vhid;
973         ch.carp_advbase = sc->sc_advbase;
974         ch.carp_advskew = advskew;
975         ch.carp_authlen = 7;    /* XXX DEFINE */
976         ch.carp_pad1 = 0;       /* must be zero */
977         ch.carp_cksum = 0;
978
979         /* XXXGL: OpenBSD picks first ifaddr with needed family. */
980
981 #ifdef INET
982         if (sc->sc_naddrs) {
983                 struct ip *ip;
984
985                 m = m_gethdr(M_NOWAIT, MT_DATA);
986                 if (m == NULL) {
987                         CARPSTATS_INC(carps_onomem);
988                         goto resched;
989                 }
990                 len = sizeof(*ip) + sizeof(ch);
991                 m->m_pkthdr.len = len;
992                 m->m_pkthdr.rcvif = NULL;
993                 m->m_len = len;
994                 M_ALIGN(m, m->m_len);
995                 if (IN_MULTICAST(sc->sc_carpaddr.s_addr))
996                         m->m_flags |= M_MCAST;
997                 ip = mtod(m, struct ip *);
998                 ip->ip_v = IPVERSION;
999                 ip->ip_hl = sizeof(*ip) >> 2;
1000                 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET;
1001                 ip->ip_len = htons(len);
1002                 ip->ip_off = htons(IP_DF);
1003                 ip->ip_ttl = CARP_DFLTTL;
1004                 ip->ip_p = IPPROTO_CARP;
1005                 ip->ip_sum = 0;
1006                 ip_fillid(ip);
1007
1008                 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev);
1009                 if (ifa != NULL) {
1010                         ip->ip_src.s_addr =
1011                             ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1012                         ifa_free(ifa);
1013                 } else
1014                         ip->ip_src.s_addr = 0;
1015                 ip->ip_dst = sc->sc_carpaddr;
1016
1017                 ch_ptr = (struct carp_header *)(&ip[1]);
1018                 bcopy(&ch, ch_ptr, sizeof(ch));
1019                 if (carp_prepare_ad(m, sc, ch_ptr))
1020                         goto resched;
1021
1022                 m->m_data += sizeof(*ip);
1023                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
1024                 m->m_data -= sizeof(*ip);
1025
1026                 CARPSTATS_INC(carps_opackets);
1027
1028                 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
1029                     &sc->sc_carpdev->if_carp->cif_imo, NULL));
1030         }
1031 #endif /* INET */
1032 #ifdef INET6
1033         if (sc->sc_naddrs6) {
1034                 struct ip6_hdr *ip6;
1035
1036                 m = m_gethdr(M_NOWAIT, MT_DATA);
1037                 if (m == NULL) {
1038                         CARPSTATS_INC(carps_onomem);
1039                         goto resched;
1040                 }
1041                 len = sizeof(*ip6) + sizeof(ch);
1042                 m->m_pkthdr.len = len;
1043                 m->m_pkthdr.rcvif = NULL;
1044                 m->m_len = len;
1045                 M_ALIGN(m, m->m_len);
1046                 ip6 = mtod(m, struct ip6_hdr *);
1047                 bzero(ip6, sizeof(*ip6));
1048                 ip6->ip6_vfc |= IPV6_VERSION;
1049                 /* Traffic class isn't defined in ip6 struct instead
1050                  * it gets offset into flowid field */
1051                 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN +
1052                     IPTOS_DSCP_OFFSET));
1053                 ip6->ip6_hlim = CARP_DFLTTL;
1054                 ip6->ip6_nxt = IPPROTO_CARP;
1055
1056                 /* set the source address */
1057                 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev);
1058                 if (ifa != NULL) {
1059                         bcopy(IFA_IN6(ifa), &ip6->ip6_src,
1060                             sizeof(struct in6_addr));
1061                         ifa_free(ifa);
1062                 } else
1063                         /* This should never happen with IPv6. */
1064                         bzero(&ip6->ip6_src, sizeof(struct in6_addr));
1065
1066                 /* Set the multicast destination. */
1067                 memcpy(&ip6->ip6_dst, &sc->sc_carpaddr6, sizeof(ip6->ip6_dst));
1068                 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
1069                     IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) {
1070                         if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1071                                 m_freem(m);
1072                                 CARP_DEBUG("%s: in6_setscope failed\n", __func__);
1073                                 goto resched;
1074                         }
1075                 }
1076
1077                 ch_ptr = (struct carp_header *)(&ip6[1]);
1078                 bcopy(&ch, ch_ptr, sizeof(ch));
1079                 if (carp_prepare_ad(m, sc, ch_ptr))
1080                         goto resched;
1081
1082                 m->m_data += sizeof(*ip6);
1083                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
1084                 m->m_data -= sizeof(*ip6);
1085
1086                 CARPSTATS_INC(carps_opackets6);
1087
1088                 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
1089                     &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
1090         }
1091 #endif /* INET6 */
1092
1093 resched:
1094         callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
1095 }
1096
1097 static void
1098 carp_addroute(struct carp_softc *sc)
1099 {
1100         struct ifaddr *ifa;
1101
1102         CARP_FOREACH_IFA(sc, ifa)
1103                 carp_ifa_addroute(ifa);
1104 }
1105
1106 static void
1107 carp_ifa_addroute(struct ifaddr *ifa)
1108 {
1109
1110         switch (ifa->ifa_addr->sa_family) {
1111 #ifdef INET
1112         case AF_INET:
1113                 in_addprefix(ifatoia(ifa));
1114                 ifa_add_loopback_route(ifa,
1115                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
1116                 break;
1117 #endif
1118 #ifdef INET6
1119         case AF_INET6:
1120                 ifa_add_loopback_route(ifa,
1121                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
1122                 nd6_add_ifa_lle(ifatoia6(ifa));
1123                 break;
1124 #endif
1125         }
1126 }
1127
1128 static void
1129 carp_delroute(struct carp_softc *sc)
1130 {
1131         struct ifaddr *ifa;
1132
1133         CARP_FOREACH_IFA(sc, ifa)
1134                 carp_ifa_delroute(ifa);
1135 }
1136
1137 static void
1138 carp_ifa_delroute(struct ifaddr *ifa)
1139 {
1140
1141         switch (ifa->ifa_addr->sa_family) {
1142 #ifdef INET
1143         case AF_INET:
1144                 ifa_del_loopback_route(ifa,
1145                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
1146                 in_scrubprefix(ifatoia(ifa), LLE_STATIC);
1147                 break;
1148 #endif
1149 #ifdef INET6
1150         case AF_INET6:
1151                 ifa_del_loopback_route(ifa,
1152                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
1153                 nd6_rem_ifa_lle(ifatoia6(ifa), 1);
1154                 break;
1155 #endif
1156         }
1157 }
1158
1159 int
1160 carp_master(struct ifaddr *ifa)
1161 {
1162         struct carp_softc *sc = ifa->ifa_carp;
1163
1164         return (sc->sc_state == MASTER);
1165 }
1166
1167 #ifdef INET
1168 /*
1169  * Broadcast a gratuitous ARP request containing
1170  * the virtual router MAC address for each IP address
1171  * associated with the virtual router.
1172  */
1173 static void
1174 carp_send_arp(struct carp_softc *sc)
1175 {
1176         struct ifaddr *ifa;
1177         struct in_addr addr;
1178
1179         NET_EPOCH_ASSERT();
1180
1181         CARP_FOREACH_IFA(sc, ifa) {
1182                 if (ifa->ifa_addr->sa_family != AF_INET)
1183                         continue;
1184                 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
1185                 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr));
1186         }
1187 }
1188
1189 int
1190 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
1191 {
1192         struct carp_softc *sc = ifa->ifa_carp;
1193
1194         if (sc->sc_state == MASTER) {
1195                 *enaddr = LLADDR(&sc->sc_addr);
1196                 return (1);
1197         }
1198
1199         return (0);
1200 }
1201 #endif
1202
1203 #ifdef INET6
1204 static void
1205 carp_send_na(struct carp_softc *sc)
1206 {
1207         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1208         struct ifaddr *ifa;
1209         struct in6_addr *in6;
1210
1211         CARP_FOREACH_IFA(sc, ifa) {
1212                 if (ifa->ifa_addr->sa_family != AF_INET6)
1213                         continue;
1214
1215                 in6 = IFA_IN6(ifa);
1216                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1217                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1218                 DELAY(1000);    /* XXX */
1219         }
1220 }
1221
1222 /*
1223  * Returns ifa in case it's a carp address and it is MASTER, or if the address
1224  * matches and is not a carp address.  Returns NULL otherwise.
1225  */
1226 struct ifaddr *
1227 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
1228 {
1229         struct ifaddr *ifa;
1230
1231         NET_EPOCH_ASSERT();
1232
1233         ifa = NULL;
1234         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1235                 if (ifa->ifa_addr->sa_family != AF_INET6)
1236                         continue;
1237                 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
1238                         continue;
1239                 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
1240                         ifa = NULL;
1241                 else
1242                         ifa_ref(ifa);
1243                 break;
1244         }
1245
1246         return (ifa);
1247 }
1248
1249 char *
1250 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
1251 {
1252         struct ifaddr *ifa;
1253
1254         NET_EPOCH_ASSERT();
1255
1256         IFNET_FOREACH_IFA(ifp, ifa)
1257                 if (ifa->ifa_addr->sa_family == AF_INET6 &&
1258                     IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
1259                         struct carp_softc *sc = ifa->ifa_carp;
1260                         struct m_tag *mtag;
1261
1262                         mtag = m_tag_get(PACKET_TAG_CARP,
1263                             sizeof(struct carp_softc *), M_NOWAIT);
1264                         if (mtag == NULL)
1265                                 /* Better a bit than nothing. */
1266                                 return (LLADDR(&sc->sc_addr));
1267
1268                         bcopy(&sc, mtag + 1, sizeof(sc));
1269                         m_tag_prepend(m, mtag);
1270
1271                         return (LLADDR(&sc->sc_addr));
1272                 }
1273
1274         return (NULL);
1275 }
1276 #endif /* INET6 */
1277
1278 int
1279 carp_forus(struct ifnet *ifp, u_char *dhost)
1280 {
1281         struct carp_softc *sc;
1282         uint8_t *ena = dhost;
1283
1284         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1285                 return (0);
1286
1287         CIF_LOCK(ifp->if_carp);
1288         IFNET_FOREACH_CARP(ifp, sc) {
1289                 /*
1290                  * CARP_LOCK() is not here, since would protect nothing, but
1291                  * cause deadlock with if_bridge, calling this under its lock.
1292                  */
1293                 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
1294                     ETHER_ADDR_LEN)) {
1295                         CIF_UNLOCK(ifp->if_carp);
1296                         return (1);
1297                 }
1298         }
1299         CIF_UNLOCK(ifp->if_carp);
1300
1301         return (0);
1302 }
1303
1304 /* Master down timeout event, executed in callout context. */
1305 static void
1306 carp_master_down(void *v)
1307 {
1308         struct carp_softc *sc = v;
1309         struct epoch_tracker et;
1310
1311         NET_EPOCH_ENTER(et);
1312         CARP_LOCK_ASSERT(sc);
1313
1314         CURVNET_SET(sc->sc_carpdev->if_vnet);
1315         if (sc->sc_state == BACKUP) {
1316                 carp_master_down_locked(sc, "master timed out");
1317         }
1318         CURVNET_RESTORE();
1319
1320         CARP_UNLOCK(sc);
1321         NET_EPOCH_EXIT(et);
1322 }
1323
1324 static void
1325 carp_master_down_locked(struct carp_softc *sc, const char *reason)
1326 {
1327
1328         NET_EPOCH_ASSERT();
1329         CARP_LOCK_ASSERT(sc);
1330
1331         switch (sc->sc_state) {
1332         case BACKUP:
1333                 carp_set_state(sc, MASTER, reason);
1334                 carp_send_ad_locked(sc);
1335 #ifdef INET
1336                 carp_send_arp(sc);
1337 #endif
1338 #ifdef INET6
1339                 carp_send_na(sc);
1340 #endif
1341                 carp_setrun(sc, 0);
1342                 carp_addroute(sc);
1343                 break;
1344         case INIT:
1345         case MASTER:
1346 #ifdef INVARIANTS
1347                 panic("carp: VHID %u@%s: master_down event in %s state\n",
1348                     sc->sc_vhid,
1349                     if_name(sc->sc_carpdev),
1350                     sc->sc_state ? "MASTER" : "INIT");
1351 #endif
1352                 break;
1353         }
1354 }
1355
1356 /*
1357  * When in backup state, af indicates whether to reset the master down timer
1358  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1359  */
1360 static void
1361 carp_setrun(struct carp_softc *sc, sa_family_t af)
1362 {
1363         struct timeval tv;
1364
1365         CARP_LOCK_ASSERT(sc);
1366
1367         if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
1368             sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
1369             (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) ||
1370             !V_carp_allow)
1371                 return;
1372
1373         switch (sc->sc_state) {
1374         case INIT:
1375                 carp_set_state(sc, BACKUP, "initialization complete");
1376                 carp_setrun(sc, 0);
1377                 break;
1378         case BACKUP:
1379                 callout_stop(&sc->sc_ad_tmo);
1380                 tv.tv_sec = 3 * sc->sc_advbase;
1381                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1382                 switch (af) {
1383 #ifdef INET
1384                 case AF_INET:
1385                         callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1386                             carp_master_down, sc);
1387                         break;
1388 #endif
1389 #ifdef INET6
1390                 case AF_INET6:
1391                         callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1392                             carp_master_down, sc);
1393                         break;
1394 #endif
1395                 default:
1396 #ifdef INET
1397                         if (sc->sc_naddrs)
1398                                 callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1399                                     carp_master_down, sc);
1400 #endif
1401 #ifdef INET6
1402                         if (sc->sc_naddrs6)
1403                                 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1404                                     carp_master_down, sc);
1405 #endif
1406                         break;
1407                 }
1408                 break;
1409         case MASTER:
1410                 tv.tv_sec = sc->sc_advbase;
1411                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1412                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
1413                     carp_send_ad, sc);
1414                 break;
1415         }
1416 }
1417
1418 /*
1419  * Setup multicast structures.
1420  */
1421 static int
1422 carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
1423 {
1424         struct ifnet *ifp = cif->cif_ifp;
1425         int error = 0;
1426
1427         switch (sa) {
1428 #ifdef INET
1429         case AF_INET:
1430             {
1431                 struct ip_moptions *imo = &cif->cif_imo;
1432                 struct in_mfilter *imf;
1433                 struct in_addr addr;
1434
1435                 if (ip_mfilter_first(&imo->imo_head) != NULL)
1436                         return (0);
1437
1438                 imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
1439                 ip_mfilter_init(&imo->imo_head);
1440                 imo->imo_multicast_vif = -1;
1441
1442                 addr.s_addr = htonl(INADDR_CARP_GROUP);
1443                 if ((error = in_joingroup(ifp, &addr, NULL,
1444                     &imf->imf_inm)) != 0) {
1445                         ip_mfilter_free(imf);
1446                         break;
1447                 }
1448
1449                 ip_mfilter_insert(&imo->imo_head, imf);
1450                 imo->imo_multicast_ifp = ifp;
1451                 imo->imo_multicast_ttl = CARP_DFLTTL;
1452                 imo->imo_multicast_loop = 0;
1453                 break;
1454            }
1455 #endif
1456 #ifdef INET6
1457         case AF_INET6:
1458             {
1459                 struct ip6_moptions *im6o = &cif->cif_im6o;
1460                 struct in6_mfilter *im6f[2];
1461                 struct in6_addr in6;
1462
1463                 if (ip6_mfilter_first(&im6o->im6o_head))
1464                         return (0);
1465
1466                 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
1467                 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
1468
1469                 ip6_mfilter_init(&im6o->im6o_head);
1470                 im6o->im6o_multicast_hlim = CARP_DFLTTL;
1471                 im6o->im6o_multicast_ifp = ifp;
1472
1473                 /* Join IPv6 CARP multicast group. */
1474                 bzero(&in6, sizeof(in6));
1475                 in6.s6_addr16[0] = htons(0xff02);
1476                 in6.s6_addr8[15] = 0x12;
1477                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1478                         ip6_mfilter_free(im6f[0]);
1479                         ip6_mfilter_free(im6f[1]);
1480                         break;
1481                 }
1482                 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) {
1483                         ip6_mfilter_free(im6f[0]);
1484                         ip6_mfilter_free(im6f[1]);
1485                         break;
1486                 }
1487
1488                 /* Join solicited multicast address. */
1489                 bzero(&in6, sizeof(in6));
1490                 in6.s6_addr16[0] = htons(0xff02);
1491                 in6.s6_addr32[1] = 0;
1492                 in6.s6_addr32[2] = htonl(1);
1493                 in6.s6_addr32[3] = 0;
1494                 in6.s6_addr8[12] = 0xff;
1495
1496                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1497                         ip6_mfilter_free(im6f[0]);
1498                         ip6_mfilter_free(im6f[1]);
1499                         break;
1500                 }
1501
1502                 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) {
1503                         in6_leavegroup(im6f[0]->im6f_in6m, NULL);
1504                         ip6_mfilter_free(im6f[0]);
1505                         ip6_mfilter_free(im6f[1]);
1506                         break;
1507                 }
1508                 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]);
1509                 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]);
1510                 break;
1511             }
1512 #endif
1513         }
1514
1515         return (error);
1516 }
1517
1518 /*
1519  * Free multicast structures.
1520  */
1521 static void
1522 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
1523 {
1524 #ifdef INET
1525         struct ip_moptions *imo = &cif->cif_imo;
1526         struct in_mfilter *imf;
1527 #endif
1528 #ifdef INET6
1529         struct ip6_moptions *im6o = &cif->cif_im6o;
1530         struct in6_mfilter *im6f;
1531 #endif
1532         sx_assert(&carp_sx, SA_XLOCKED);
1533
1534         switch (sa) {
1535 #ifdef INET
1536         case AF_INET:
1537                 if (cif->cif_naddrs != 0)
1538                         break;
1539
1540                 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
1541                         ip_mfilter_remove(&imo->imo_head, imf);
1542                         in_leavegroup(imf->imf_inm, NULL);
1543                         ip_mfilter_free(imf);
1544                 }
1545                 break;
1546 #endif
1547 #ifdef INET6
1548         case AF_INET6:
1549                 if (cif->cif_naddrs6 != 0)
1550                         break;
1551
1552                 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) {
1553                         ip6_mfilter_remove(&im6o->im6o_head, im6f);
1554                         in6_leavegroup(im6f->im6f_in6m, NULL);
1555                         ip6_mfilter_free(im6f);
1556                 }
1557                 break;
1558 #endif
1559         }
1560 }
1561
1562 int
1563 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
1564 {
1565         struct m_tag *mtag;
1566         struct carp_softc *sc;
1567
1568         if (!sa)
1569                 return (0);
1570
1571         switch (sa->sa_family) {
1572 #ifdef INET
1573         case AF_INET:
1574                 break;
1575 #endif
1576 #ifdef INET6
1577         case AF_INET6:
1578                 break;
1579 #endif
1580         default:
1581                 return (0);
1582         }
1583
1584         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
1585         if (mtag == NULL)
1586                 return (0);
1587
1588         bcopy(mtag + 1, &sc, sizeof(sc));
1589
1590         switch (sa->sa_family) {
1591         case AF_INET:
1592                 if (! IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)))
1593                         return (0);
1594                 break;
1595         case AF_INET6:
1596                 if (! IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6))
1597                         return (0);
1598                 break;
1599         default:
1600                 panic("Unknown af");
1601         }
1602
1603         /* Set the source MAC address to the Virtual Router MAC Address. */
1604         switch (ifp->if_type) {
1605         case IFT_ETHER:
1606         case IFT_BRIDGE:
1607         case IFT_L2VLAN: {
1608                         struct ether_header *eh;
1609
1610                         eh = mtod(m, struct ether_header *);
1611                         eh->ether_shost[0] = 0;
1612                         eh->ether_shost[1] = 0;
1613                         eh->ether_shost[2] = 0x5e;
1614                         eh->ether_shost[3] = 0;
1615                         eh->ether_shost[4] = 1;
1616                         eh->ether_shost[5] = sc->sc_vhid;
1617                 }
1618                 break;
1619         default:
1620                 printf("%s: carp is not supported for the %d interface type\n",
1621                     if_name(ifp), ifp->if_type);
1622                 return (EOPNOTSUPP);
1623         }
1624
1625         return (0);
1626 }
1627
1628 static struct carp_softc*
1629 carp_alloc(struct ifnet *ifp)
1630 {
1631         struct carp_softc *sc;
1632         struct carp_if *cif;
1633
1634         sx_assert(&carp_sx, SA_XLOCKED);
1635
1636         if ((cif = ifp->if_carp) == NULL)
1637                 cif = carp_alloc_if(ifp);
1638
1639         sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
1640
1641         sc->sc_advbase = CARP_DFLTINTV;
1642         sc->sc_vhid = -1;       /* required setting */
1643         sc->sc_init_counter = 1;
1644         sc->sc_state = INIT;
1645
1646         sc->sc_ifasiz = sizeof(struct ifaddr *);
1647         sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
1648         sc->sc_carpdev = ifp;
1649
1650         sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP);
1651         sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
1652         sc->sc_carpaddr6.s6_addr8[15] = 0x12;
1653
1654         CARP_LOCK_INIT(sc);
1655 #ifdef INET
1656         callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1657 #endif
1658 #ifdef INET6
1659         callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1660 #endif
1661         callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1662
1663         CIF_LOCK(cif);
1664         TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
1665         CIF_UNLOCK(cif);
1666
1667         mtx_lock(&carp_mtx);
1668         LIST_INSERT_HEAD(&carp_list, sc, sc_next);
1669         mtx_unlock(&carp_mtx);
1670
1671         return (sc);
1672 }
1673
1674 static void
1675 carp_grow_ifas(struct carp_softc *sc)
1676 {
1677         struct ifaddr **new;
1678
1679         new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO);
1680         CARP_LOCK(sc);
1681         bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
1682         free(sc->sc_ifas, M_CARP);
1683         sc->sc_ifas = new;
1684         sc->sc_ifasiz *= 2;
1685         CARP_UNLOCK(sc);
1686 }
1687
1688 static void
1689 carp_destroy(struct carp_softc *sc)
1690 {
1691         struct ifnet *ifp = sc->sc_carpdev;
1692         struct carp_if *cif = ifp->if_carp;
1693
1694         sx_assert(&carp_sx, SA_XLOCKED);
1695
1696         if (sc->sc_suppress)
1697                 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed");
1698         CARP_UNLOCK(sc);
1699
1700         CIF_LOCK(cif);
1701         TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
1702         CIF_UNLOCK(cif);
1703
1704         mtx_lock(&carp_mtx);
1705         LIST_REMOVE(sc, sc_next);
1706         mtx_unlock(&carp_mtx);
1707
1708         callout_drain(&sc->sc_ad_tmo);
1709 #ifdef INET
1710         callout_drain(&sc->sc_md_tmo);
1711 #endif
1712 #ifdef INET6
1713         callout_drain(&sc->sc_md6_tmo);
1714 #endif
1715         CARP_LOCK_DESTROY(sc);
1716
1717         free(sc->sc_ifas, M_CARP);
1718         free(sc, M_CARP);
1719 }
1720
1721 static struct carp_if*
1722 carp_alloc_if(struct ifnet *ifp)
1723 {
1724         struct carp_if *cif;
1725         int error;
1726
1727         cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
1728
1729         if ((error = ifpromisc(ifp, 1)) != 0)
1730                 printf("%s: ifpromisc(%s) failed: %d\n",
1731                     __func__, if_name(ifp), error);
1732         else
1733                 cif->cif_flags |= CIF_PROMISC;
1734
1735         CIF_LOCK_INIT(cif);
1736         cif->cif_ifp = ifp;
1737         TAILQ_INIT(&cif->cif_vrs);
1738
1739         IF_ADDR_WLOCK(ifp);
1740         ifp->if_carp = cif;
1741         if_ref(ifp);
1742         IF_ADDR_WUNLOCK(ifp);
1743
1744         return (cif);
1745 }
1746
1747 static void
1748 carp_free_if(struct carp_if *cif)
1749 {
1750         struct ifnet *ifp = cif->cif_ifp;
1751
1752         CIF_LOCK_ASSERT(cif);
1753         KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
1754             __func__));
1755
1756         IF_ADDR_WLOCK(ifp);
1757         ifp->if_carp = NULL;
1758         IF_ADDR_WUNLOCK(ifp);
1759
1760         CIF_LOCK_DESTROY(cif);
1761
1762         if (cif->cif_flags & CIF_PROMISC)
1763                 ifpromisc(ifp, 0);
1764         if_rele(ifp);
1765
1766         free(cif, M_CARP);
1767 }
1768
1769 static bool
1770 carp_carprcp(void *arg, struct carp_softc *sc, int priv)
1771 {
1772         struct carpreq *carpr = arg;
1773
1774         CARP_LOCK(sc);
1775         carpr->carpr_state = sc->sc_state;
1776         carpr->carpr_vhid = sc->sc_vhid;
1777         carpr->carpr_advbase = sc->sc_advbase;
1778         carpr->carpr_advskew = sc->sc_advskew;
1779         if (priv)
1780                 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
1781         else
1782                 bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
1783         CARP_UNLOCK(sc);
1784
1785         return (true);
1786 }
1787
1788 static int
1789 carp_ioctl_set(if_t ifp, struct carpkreq *carpr)
1790 {
1791         struct epoch_tracker et;
1792         struct carp_softc *sc = NULL;
1793         int error = 0;
1794
1795
1796         if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID ||
1797             carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0) {
1798                 return (EINVAL);
1799         }
1800
1801         if (ifp->if_carp) {
1802                 IFNET_FOREACH_CARP(ifp, sc)
1803                         if (sc->sc_vhid == carpr->carpr_vhid)
1804                                 break;
1805         }
1806         if (sc == NULL) {
1807                 sc = carp_alloc(ifp);
1808                 CARP_LOCK(sc);
1809                 sc->sc_vhid = carpr->carpr_vhid;
1810                 LLADDR(&sc->sc_addr)[0] = 0;
1811                 LLADDR(&sc->sc_addr)[1] = 0;
1812                 LLADDR(&sc->sc_addr)[2] = 0x5e;
1813                 LLADDR(&sc->sc_addr)[3] = 0;
1814                 LLADDR(&sc->sc_addr)[4] = 1;
1815                 LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
1816         } else
1817                 CARP_LOCK(sc);
1818         if (carpr->carpr_advbase > 0) {
1819                 if (carpr->carpr_advbase > 255 ||
1820                     carpr->carpr_advbase < CARP_DFLTINTV) {
1821                         error = EINVAL;
1822                         goto out;
1823                 }
1824                 sc->sc_advbase = carpr->carpr_advbase;
1825         }
1826         if (carpr->carpr_advskew >= 255) {
1827                 error = EINVAL;
1828                 goto out;
1829         }
1830         sc->sc_advskew = carpr->carpr_advskew;
1831         if (carpr->carpr_addr.s_addr != INADDR_ANY)
1832                 sc->sc_carpaddr = carpr->carpr_addr;
1833         if (! IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) {
1834                 memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6,
1835                     sizeof(sc->sc_carpaddr6));
1836         }
1837         if (carpr->carpr_key[0] != '\0') {
1838                 bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
1839                 carp_hmac_prepare(sc);
1840         }
1841         if (sc->sc_state != INIT &&
1842             carpr->carpr_state != sc->sc_state) {
1843                 switch (carpr->carpr_state) {
1844                 case BACKUP:
1845                         callout_stop(&sc->sc_ad_tmo);
1846                         carp_set_state(sc, BACKUP,
1847                             "user requested via ifconfig");
1848                         carp_setrun(sc, 0);
1849                         carp_delroute(sc);
1850                         break;
1851                 case MASTER:
1852                         NET_EPOCH_ENTER(et);
1853                         carp_master_down_locked(sc,
1854                             "user requested via ifconfig");
1855                         NET_EPOCH_EXIT(et);
1856                         break;
1857                 default:
1858                         break;
1859                 }
1860         }
1861
1862 out:
1863         CARP_UNLOCK(sc);
1864
1865         return (error);
1866 }
1867
1868 static int
1869 carp_ioctl_get(if_t ifp, struct ucred *cred, struct carpreq *carpr,
1870     bool (*outfn)(void *, struct carp_softc *, int), void *arg)
1871 {
1872         int priveleged;
1873         struct carp_softc *sc;
1874
1875         if (carpr->carpr_vhid < 0 || carpr->carpr_vhid > CARP_MAXVHID)
1876                 return (EINVAL);
1877         if (carpr->carpr_count < 1)
1878                 return (EMSGSIZE);
1879         if (ifp->if_carp == NULL)
1880                 return (ENOENT);
1881
1882         priveleged = (priv_check_cred(cred, PRIV_NETINET_CARP) == 0);
1883         if (carpr->carpr_vhid != 0) {
1884                 IFNET_FOREACH_CARP(ifp, sc)
1885                         if (sc->sc_vhid == carpr->carpr_vhid)
1886                                 break;
1887                 if (sc == NULL)
1888                         return (ENOENT);
1889
1890                 if (! outfn(arg, sc, priveleged))
1891                         return (ENOMEM);
1892                 carpr->carpr_count = 1;
1893         } else  {
1894                 int count;
1895
1896                 count = 0;
1897                 IFNET_FOREACH_CARP(ifp, sc)
1898                         count++;
1899
1900                 if (count > carpr->carpr_count)
1901                         return (EMSGSIZE);
1902
1903                 IFNET_FOREACH_CARP(ifp, sc) {
1904                         if (! outfn(arg, sc, priveleged))
1905                                 return (ENOMEM);
1906                         carpr->carpr_count = count;
1907                 }
1908         }
1909
1910         return (0);
1911 }
1912
1913 int
1914 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
1915 {
1916         struct carpreq carpr;
1917         struct carpkreq carprk = { };
1918         struct ifnet *ifp;
1919         int error = 0;
1920
1921         if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr)))
1922                 return (error);
1923
1924         ifp = ifunit_ref(ifr->ifr_name);
1925         if ((error = carp_is_supported_if(ifp)) != 0)
1926                 goto out;
1927
1928         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1929                 error = EADDRNOTAVAIL;
1930                 goto out;
1931         }
1932
1933         sx_xlock(&carp_sx);
1934         switch (cmd) {
1935         case SIOCSVH:
1936                 if ((error = priv_check(td, PRIV_NETINET_CARP)))
1937                         break;
1938
1939                 memcpy(&carprk, &carpr, sizeof(carpr));
1940                 error = carp_ioctl_set(ifp, &carprk);
1941                 break;
1942
1943         case SIOCGVH:
1944                 error = carp_ioctl_get(ifp, td->td_ucred, &carpr,
1945                     carp_carprcp, &carpr);
1946                 if (error == 0) {
1947                         error = copyout(&carpr,
1948                             (char *)ifr_data_get_ptr(ifr),
1949                             carpr.carpr_count * sizeof(carpr));
1950                 }
1951                 break;
1952         default:
1953                 error = EINVAL;
1954         }
1955         sx_xunlock(&carp_sx);
1956
1957 out:
1958         if (ifp != NULL)
1959                 if_rele(ifp);
1960
1961         return (error);
1962 }
1963
1964 static int
1965 carp_get_vhid(struct ifaddr *ifa)
1966 {
1967
1968         if (ifa == NULL || ifa->ifa_carp == NULL)
1969                 return (0);
1970
1971         return (ifa->ifa_carp->sc_vhid);
1972 }
1973
1974 int
1975 carp_attach(struct ifaddr *ifa, int vhid)
1976 {
1977         struct ifnet *ifp = ifa->ifa_ifp;
1978         struct carp_if *cif = ifp->if_carp;
1979         struct carp_softc *sc;
1980         int index, error;
1981
1982         KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa));
1983
1984         switch (ifa->ifa_addr->sa_family) {
1985 #ifdef INET
1986         case AF_INET:
1987 #endif
1988 #ifdef INET6
1989         case AF_INET6:
1990 #endif
1991                 break;
1992         default:
1993                 return (EPROTOTYPE);
1994         }
1995
1996         sx_xlock(&carp_sx);
1997         if (ifp->if_carp == NULL) {
1998                 sx_xunlock(&carp_sx);
1999                 return (ENOPROTOOPT);
2000         }
2001
2002         IFNET_FOREACH_CARP(ifp, sc)
2003                 if (sc->sc_vhid == vhid)
2004                         break;
2005         if (sc == NULL) {
2006                 sx_xunlock(&carp_sx);
2007                 return (ENOENT);
2008         }
2009
2010         error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
2011         if (error) {
2012                 CIF_FREE(cif);
2013                 sx_xunlock(&carp_sx);
2014                 return (error);
2015         }
2016
2017         index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
2018         if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
2019                 carp_grow_ifas(sc);
2020
2021         switch (ifa->ifa_addr->sa_family) {
2022 #ifdef INET
2023         case AF_INET:
2024                 cif->cif_naddrs++;
2025                 sc->sc_naddrs++;
2026                 break;
2027 #endif
2028 #ifdef INET6
2029         case AF_INET6:
2030                 cif->cif_naddrs6++;
2031                 sc->sc_naddrs6++;
2032                 break;
2033 #endif
2034         }
2035
2036         ifa_ref(ifa);
2037
2038         CARP_LOCK(sc);
2039         sc->sc_ifas[index - 1] = ifa;
2040         ifa->ifa_carp = sc;
2041         carp_hmac_prepare(sc);
2042         carp_sc_state(sc);
2043         CARP_UNLOCK(sc);
2044
2045         sx_xunlock(&carp_sx);
2046
2047         return (0);
2048 }
2049
2050 void
2051 carp_detach(struct ifaddr *ifa, bool keep_cif)
2052 {
2053         struct ifnet *ifp = ifa->ifa_ifp;
2054         struct carp_if *cif = ifp->if_carp;
2055         struct carp_softc *sc = ifa->ifa_carp;
2056         int i, index;
2057
2058         KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
2059
2060         sx_xlock(&carp_sx);
2061
2062         CARP_LOCK(sc);
2063         /* Shift array. */
2064         index = sc->sc_naddrs + sc->sc_naddrs6;
2065         for (i = 0; i < index; i++)
2066                 if (sc->sc_ifas[i] == ifa)
2067                         break;
2068         KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
2069         for (; i < index - 1; i++)
2070                 sc->sc_ifas[i] = sc->sc_ifas[i+1];
2071         sc->sc_ifas[index - 1] = NULL;
2072
2073         switch (ifa->ifa_addr->sa_family) {
2074 #ifdef INET
2075         case AF_INET:
2076                 cif->cif_naddrs--;
2077                 sc->sc_naddrs--;
2078                 break;
2079 #endif
2080 #ifdef INET6
2081         case AF_INET6:
2082                 cif->cif_naddrs6--;
2083                 sc->sc_naddrs6--;
2084                 break;
2085 #endif
2086         }
2087
2088         carp_ifa_delroute(ifa);
2089         carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
2090
2091         ifa->ifa_carp = NULL;
2092         ifa_free(ifa);
2093
2094         carp_hmac_prepare(sc);
2095         carp_sc_state(sc);
2096
2097         if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)
2098                 carp_destroy(sc);
2099         else
2100                 CARP_UNLOCK(sc);
2101
2102         if (!keep_cif)
2103                 CIF_FREE(cif);
2104
2105         sx_xunlock(&carp_sx);
2106 }
2107
2108 static void
2109 carp_set_state(struct carp_softc *sc, int state, const char *reason)
2110 {
2111
2112         CARP_LOCK_ASSERT(sc);
2113
2114         if (sc->sc_state != state) {
2115                 const char *carp_states[] = { CARP_STATES };
2116                 char subsys[IFNAMSIZ+5];
2117
2118                 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
2119                     if_name(sc->sc_carpdev));
2120
2121                 CARP_LOG("%s: %s -> %s (%s)\n", subsys,
2122                     carp_states[sc->sc_state], carp_states[state], reason);
2123
2124                 sc->sc_state = state;
2125
2126                 devctl_notify("CARP", subsys, carp_states[state], NULL);
2127         }
2128 }
2129
2130 static void
2131 carp_linkstate(struct ifnet *ifp)
2132 {
2133         struct carp_softc *sc;
2134
2135         CIF_LOCK(ifp->if_carp);
2136         IFNET_FOREACH_CARP(ifp, sc) {
2137                 CARP_LOCK(sc);
2138                 carp_sc_state(sc);
2139                 CARP_UNLOCK(sc);
2140         }
2141         CIF_UNLOCK(ifp->if_carp);
2142 }
2143
2144 static void
2145 carp_sc_state(struct carp_softc *sc)
2146 {
2147
2148         CARP_LOCK_ASSERT(sc);
2149
2150         if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
2151             !(sc->sc_carpdev->if_flags & IFF_UP) ||
2152             !V_carp_allow) {
2153                 callout_stop(&sc->sc_ad_tmo);
2154 #ifdef INET
2155                 callout_stop(&sc->sc_md_tmo);
2156 #endif
2157 #ifdef INET6
2158                 callout_stop(&sc->sc_md6_tmo);
2159 #endif
2160                 carp_set_state(sc, INIT, "hardware interface down");
2161                 carp_setrun(sc, 0);
2162                 if (!sc->sc_suppress)
2163                         carp_demote_adj(V_carp_ifdown_adj, "interface down");
2164                 sc->sc_suppress = 1;
2165         } else {
2166                 carp_set_state(sc, INIT, "hardware interface up");
2167                 carp_setrun(sc, 0);
2168                 if (sc->sc_suppress)
2169                         carp_demote_adj(-V_carp_ifdown_adj, "interface up");
2170                 sc->sc_suppress = 0;
2171         }
2172 }
2173
2174 static void
2175 carp_demote_adj(int adj, char *reason)
2176 {
2177         atomic_add_int(&V_carp_demotion, adj);
2178         CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason);
2179         taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
2180 }
2181
2182 static int
2183 carp_allow_sysctl(SYSCTL_HANDLER_ARGS)
2184 {
2185         int new, error;
2186         struct carp_softc *sc;
2187
2188         new = V_carp_allow;
2189         error = sysctl_handle_int(oidp, &new, 0, req);
2190         if (error || !req->newptr)
2191                 return (error);
2192
2193         if (V_carp_allow != new) {
2194                 V_carp_allow = new;
2195
2196                 mtx_lock(&carp_mtx);
2197                 LIST_FOREACH(sc, &carp_list, sc_next) {
2198                         CARP_LOCK(sc);
2199                         if (curvnet == sc->sc_carpdev->if_vnet)
2200                                 carp_sc_state(sc);
2201                         CARP_UNLOCK(sc);
2202                 }
2203                 mtx_unlock(&carp_mtx);
2204         }
2205
2206         return (0);
2207 }
2208
2209 static int
2210 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS)
2211 {
2212         int new, error;
2213
2214         new = V_carp_dscp;
2215         error = sysctl_handle_int(oidp, &new, 0, req);
2216         if (error || !req->newptr)
2217                 return (error);
2218
2219         if (new < 0 || new > 63)
2220                 return (EINVAL);
2221
2222         V_carp_dscp = new;
2223
2224         return (0);
2225 }
2226
2227 static int
2228 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
2229 {
2230         int new, error;
2231
2232         new = V_carp_demotion;
2233         error = sysctl_handle_int(oidp, &new, 0, req);
2234         if (error || !req->newptr)
2235                 return (error);
2236
2237         carp_demote_adj(new, "sysctl");
2238
2239         return (0);
2240 }
2241
2242 static int
2243 nlattr_get_carp_key(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target)
2244 {
2245         if (__predict_false(NLA_DATA_LEN(nla) > CARP_KEY_LEN))
2246                 return (EINVAL);
2247
2248         memcpy(target, NLA_DATA_CONST(nla), NLA_DATA_LEN(nla));
2249         return (0);
2250 }
2251
2252 struct carp_nl_send_args {
2253         struct nlmsghdr *hdr;
2254         struct nl_pstate *npt;
2255 };
2256
2257 static bool
2258 carp_nl_send(void *arg, struct carp_softc *sc, int priv)
2259 {
2260         struct carp_nl_send_args *nlsa = arg;
2261         struct nlmsghdr *hdr = nlsa->hdr;
2262         struct nl_pstate *npt = nlsa->npt;
2263         struct nl_writer *nw = npt->nw;
2264         struct genlmsghdr *ghdr_new;
2265
2266         if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
2267                 nlmsg_abort(nw);
2268                 return (false);
2269         }
2270
2271         ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
2272         if (ghdr_new == NULL) {
2273                 nlmsg_abort(nw);
2274                 return (false);
2275         }
2276
2277         ghdr_new->cmd = CARP_NL_CMD_GET;
2278         ghdr_new->version = 0;
2279         ghdr_new->reserved = 0;
2280
2281         CARP_LOCK(sc);
2282
2283         nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid);
2284         nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state);
2285         nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase);
2286         nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew);
2287         nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr);
2288         nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6);
2289
2290         if (priv)
2291                 nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), sc->sc_key);
2292
2293         CARP_UNLOCK(sc);
2294
2295         if (! nlmsg_end(nw)) {
2296                 nlmsg_abort(nw);
2297                 return (false);
2298         }
2299
2300         return (true);
2301 }
2302
2303 struct nl_carp_parsed {
2304         unsigned int    ifindex;
2305         char            *ifname;
2306         uint32_t        state;
2307         uint32_t        vhid;
2308         int32_t         advbase;
2309         int32_t         advskew;
2310         char            key[CARP_KEY_LEN];
2311         struct in_addr  addr;
2312         struct in6_addr addr6;
2313 };
2314
2315 #define _IN(_field)     offsetof(struct genlmsghdr, _field)
2316 #define _OUT(_field)    offsetof(struct nl_carp_parsed, _field)
2317
2318 static const struct nlattr_parser nla_p_set[] = {
2319         { .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 },
2320         { .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 },
2321         { .type = CARP_NL_ADVBASE, .off = _OUT(advbase), .cb = nlattr_get_uint32 },
2322         { .type = CARP_NL_ADVSKEW, .off = _OUT(advskew), .cb = nlattr_get_uint32 },
2323         { .type = CARP_NL_KEY, .off = _OUT(key), .cb = nlattr_get_carp_key },
2324         { .type = CARP_NL_IFINDEX, .off = _OUT(ifindex), .cb = nlattr_get_uint32 },
2325         { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr },
2326         { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr },
2327         { .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string },
2328 };
2329 static const struct nlfield_parser nlf_p_set[] = {
2330 };
2331 NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_set, nla_p_set);
2332 #undef _IN
2333 #undef _OUT
2334
2335
2336 static int
2337 carp_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt)
2338 {
2339         struct nl_carp_parsed attrs = { };
2340         struct carp_nl_send_args args;
2341         struct carpreq carpr = { };
2342         struct epoch_tracker et;
2343         if_t ifp = NULL;
2344         int error;
2345
2346         error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs);
2347         if (error != 0)
2348                 return (error);
2349
2350         NET_EPOCH_ENTER(et);
2351         if (attrs.ifname != NULL)
2352                 ifp = ifunit_ref(attrs.ifname);
2353         else if (attrs.ifindex != 0)
2354                 ifp = ifnet_byindex_ref(attrs.ifindex);
2355         NET_EPOCH_EXIT(et);
2356
2357         if ((error = carp_is_supported_if(ifp)) != 0)
2358                 goto out;
2359
2360         hdr->nlmsg_flags |= NLM_F_MULTI;
2361         args.hdr = hdr;
2362         args.npt = npt;
2363
2364         carpr.carpr_vhid = attrs.vhid;
2365         carpr.carpr_count = CARP_MAXVHID;
2366
2367         sx_xlock(&carp_sx);
2368         error = carp_ioctl_get(ifp, nlp_get_cred(npt->nlp), &carpr,
2369             carp_nl_send, &args);
2370         sx_xunlock(&carp_sx);
2371
2372         if (! nlmsg_end_dump(npt->nw, error, hdr))
2373                 error = ENOMEM;
2374
2375 out:
2376         if (ifp != NULL)
2377                 if_rele(ifp);
2378
2379         return (error);
2380 }
2381
2382 static int
2383 carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
2384 {
2385         struct nl_carp_parsed attrs = { };
2386         struct carpkreq carpr;
2387         struct epoch_tracker et;
2388         if_t ifp = NULL;
2389         int error;
2390
2391         error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs);
2392         if (error != 0)
2393                 return (error);
2394
2395         if (attrs.vhid <= 0 || attrs.vhid > CARP_MAXVHID)
2396                 return (EINVAL);
2397         if (attrs.state > CARP_MAXSTATE)
2398                 return (EINVAL);
2399         if (attrs.advbase < 0 || attrs.advskew < 0)
2400                 return (EINVAL);
2401         if (attrs.advbase > 255)
2402                 return (EINVAL);
2403         if (attrs.advskew >= 255)
2404                 return (EINVAL);
2405
2406         NET_EPOCH_ENTER(et);
2407         if (attrs.ifname != NULL)
2408                 ifp = ifunit_ref(attrs.ifname);
2409         else if (attrs.ifindex != 0)
2410                 ifp = ifnet_byindex_ref(attrs.ifindex);
2411         NET_EPOCH_EXIT(et);
2412
2413         if ((error = carp_is_supported_if(ifp)) != 0)
2414                 goto out;
2415
2416         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2417                 error = EADDRNOTAVAIL;
2418                 goto out;
2419         }
2420
2421         carpr.carpr_count = 1;
2422         carpr.carpr_vhid = attrs.vhid;
2423         carpr.carpr_state = attrs.state;
2424         carpr.carpr_advbase = attrs.advbase;
2425         carpr.carpr_advskew = attrs.advskew;
2426         carpr.carpr_addr = attrs.addr;
2427         carpr.carpr_addr6 = attrs.addr6;
2428
2429         memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key));
2430
2431         sx_xlock(&carp_sx);
2432         error = carp_ioctl_set(ifp, &carpr);
2433         sx_xunlock(&carp_sx);
2434
2435 out:
2436         if (ifp != NULL)
2437                 if_rele(ifp);
2438
2439         return (error);
2440 }
2441
2442 static const struct nlhdr_parser *all_parsers[] = {
2443         &carp_parser
2444 };
2445
2446 static const struct genl_cmd carp_cmds[] = {
2447         {
2448                 .cmd_num = CARP_NL_CMD_GET,
2449                 .cmd_name = "SIOCGVH",
2450                 .cmd_cb = carp_nl_get,
2451                 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP |
2452                     GENL_CMD_CAP_HASPOL,
2453         },
2454         {
2455                 .cmd_num = CARP_NL_CMD_SET,
2456                 .cmd_name = "SIOCSVH",
2457                 .cmd_cb = carp_nl_set,
2458                 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL,
2459                 .cmd_priv = PRIV_NETINET_CARP,
2460         },
2461 };
2462
2463 static void
2464 carp_nl_register(void)
2465 {
2466         bool ret __diagused;
2467         int family_id __diagused;
2468
2469         NL_VERIFY_PARSERS(all_parsers);
2470         family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2,
2471             CARP_NL_CMD_MAX);
2472         MPASS(family_id != 0);
2473
2474         ret = genl_register_cmds(CARP_NL_FAMILY_NAME, carp_cmds,
2475             NL_ARRAY_LEN(carp_cmds));
2476         MPASS(ret);
2477 }
2478
2479 static void
2480 carp_nl_unregister(void)
2481 {
2482         genl_unregister_family(CARP_NL_FAMILY_NAME);
2483 }
2484
2485 static void
2486 carp_mod_cleanup(void)
2487 {
2488
2489         carp_nl_unregister();
2490
2491 #ifdef INET
2492         (void)ipproto_unregister(IPPROTO_CARP);
2493         carp_iamatch_p = NULL;
2494 #endif
2495 #ifdef INET6
2496         (void)ip6proto_unregister(IPPROTO_CARP);
2497         carp_iamatch6_p = NULL;
2498         carp_macmatch6_p = NULL;
2499 #endif
2500         carp_ioctl_p = NULL;
2501         carp_attach_p = NULL;
2502         carp_detach_p = NULL;
2503         carp_get_vhid_p = NULL;
2504         carp_linkstate_p = NULL;
2505         carp_forus_p = NULL;
2506         carp_output_p = NULL;
2507         carp_demote_adj_p = NULL;
2508         carp_master_p = NULL;
2509         mtx_unlock(&carp_mtx);
2510         taskqueue_drain(taskqueue_swi, &carp_sendall_task);
2511         mtx_destroy(&carp_mtx);
2512         sx_destroy(&carp_sx);
2513 }
2514
2515 static void
2516 ipcarp_sysinit(void)
2517 {
2518
2519         /* Load allow as tunable so to postpone carp start after module load */
2520         TUNABLE_INT_FETCH("net.inet.carp.allow", &V_carp_allow);
2521 }
2522 VNET_SYSINIT(ip_carp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipcarp_sysinit, NULL);
2523
2524 static int
2525 carp_mod_load(void)
2526 {
2527         int err;
2528
2529         mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
2530         sx_init(&carp_sx, "carp_sx");
2531         LIST_INIT(&carp_list);
2532         carp_get_vhid_p = carp_get_vhid;
2533         carp_forus_p = carp_forus;
2534         carp_output_p = carp_output;
2535         carp_linkstate_p = carp_linkstate;
2536         carp_ioctl_p = carp_ioctl;
2537         carp_attach_p = carp_attach;
2538         carp_detach_p = carp_detach;
2539         carp_demote_adj_p = carp_demote_adj;
2540         carp_master_p = carp_master;
2541 #ifdef INET6
2542         carp_iamatch6_p = carp_iamatch6;
2543         carp_macmatch6_p = carp_macmatch6;
2544         err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL);
2545         if (err) {
2546                 printf("carp: error %d registering with INET6\n", err);
2547                 carp_mod_cleanup();
2548                 return (err);
2549         }
2550 #endif
2551 #ifdef INET
2552         carp_iamatch_p = carp_iamatch;
2553         err = ipproto_register(IPPROTO_CARP, carp_input, NULL);
2554         if (err) {
2555                 printf("carp: error %d registering with INET\n", err);
2556                 carp_mod_cleanup();
2557                 return (err);
2558         }
2559 #endif
2560
2561         carp_nl_register();
2562
2563         return (0);
2564 }
2565
2566 static int
2567 carp_modevent(module_t mod, int type, void *data)
2568 {
2569         switch (type) {
2570         case MOD_LOAD:
2571                 return carp_mod_load();
2572                 /* NOTREACHED */
2573         case MOD_UNLOAD:
2574                 mtx_lock(&carp_mtx);
2575                 if (LIST_EMPTY(&carp_list))
2576                         carp_mod_cleanup();
2577                 else {
2578                         mtx_unlock(&carp_mtx);
2579                         return (EBUSY);
2580                 }
2581                 break;
2582
2583         default:
2584                 return (EINVAL);
2585         }
2586
2587         return (0);
2588 }
2589
2590 static moduledata_t carp_mod = {
2591         "carp",
2592         carp_modevent,
2593         0
2594 };
2595
2596 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);