]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/ip_carp.c
Merge bmake-20230909
[FreeBSD/FreeBSD.git] / sys / netinet / ip_carp.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2002 Michael Shalayeff.
5  * Copyright (c) 2003 Ryan McBride.
6  * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
22  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28  * THE POSSIBILITY OF SUCH DAMAGE.
29  */
30
31 #include "opt_netlink.h"
32
33 #include <sys/cdefs.h>
34 #include "opt_bpf.h"
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/devctl.h>
41 #include <sys/jail.h>
42 #include <sys/kernel.h>
43 #include <sys/limits.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/module.h>
47 #include <sys/priv.h>
48 #include <sys/proc.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/syslog.h>
53 #include <sys/taskqueue.h>
54 #include <sys/counter.h>
55
56 #include <net/ethernet.h>
57 #include <net/if.h>
58 #include <net/if_var.h>
59 #include <net/if_dl.h>
60 #include <net/if_llatbl.h>
61 #include <net/if_private.h>
62 #include <net/if_types.h>
63 #include <net/route.h>
64 #include <net/vnet.h>
65
66 #if defined(INET) || defined(INET6)
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/ip_carp.h>
70 #include <netinet/ip_carp_nl.h>
71 #include <netinet/ip.h>
72 #include <machine/in_cksum.h>
73 #endif
74 #ifdef INET
75 #include <netinet/ip_var.h>
76 #include <netinet/if_ether.h>
77 #endif
78
79 #ifdef INET6
80 #include <netinet/icmp6.h>
81 #include <netinet/ip6.h>
82 #include <netinet6/in6_var.h>
83 #include <netinet6/ip6_var.h>
84 #include <netinet6/scope6_var.h>
85 #include <netinet6/nd6.h>
86 #endif
87
88 #include <netlink/netlink.h>
89 #include <netlink/netlink_ctl.h>
90 #include <netlink/netlink_generic.h>
91 #include <netlink/netlink_message_parser.h>
92
93 #include <crypto/sha1.h>
94
95 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
96
97 struct carp_softc {
98         struct ifnet            *sc_carpdev;    /* Pointer to parent ifnet. */
99         struct ifaddr           **sc_ifas;      /* Our ifaddrs. */
100         struct sockaddr_dl      sc_addr;        /* Our link level address. */
101         struct callout          sc_ad_tmo;      /* Advertising timeout. */
102 #ifdef INET
103         struct callout          sc_md_tmo;      /* Master down timeout. */
104 #endif
105 #ifdef INET6
106         struct callout          sc_md6_tmo;     /* XXX: Master down timeout. */
107 #endif
108         struct mtx              sc_mtx;
109
110         int                     sc_vhid;
111         int                     sc_advskew;
112         int                     sc_advbase;
113         struct in_addr          sc_carpaddr;
114         struct in6_addr         sc_carpaddr6;
115
116         int                     sc_naddrs;
117         int                     sc_naddrs6;
118         int                     sc_ifasiz;
119         enum { INIT = 0, BACKUP, MASTER }       sc_state;
120         int                     sc_suppress;
121         int                     sc_sendad_errors;
122 #define CARP_SENDAD_MAX_ERRORS  3
123         int                     sc_sendad_success;
124 #define CARP_SENDAD_MIN_SUCCESS 3
125
126         int                     sc_init_counter;
127         uint64_t                sc_counter;
128
129         /* authentication */
130 #define CARP_HMAC_PAD   64
131         unsigned char sc_key[CARP_KEY_LEN];
132         unsigned char sc_pad[CARP_HMAC_PAD];
133         SHA1_CTX sc_sha1;
134
135         TAILQ_ENTRY(carp_softc) sc_list;        /* On the carp_if list. */
136         LIST_ENTRY(carp_softc)  sc_next;        /* On the global list. */
137 };
138
139 struct carp_if {
140 #ifdef INET
141         int     cif_naddrs;
142 #endif
143 #ifdef INET6
144         int     cif_naddrs6;
145 #endif
146         TAILQ_HEAD(, carp_softc) cif_vrs;
147 #ifdef INET
148         struct ip_moptions       cif_imo;
149 #endif
150 #ifdef INET6
151         struct ip6_moptions      cif_im6o;
152 #endif
153         struct ifnet    *cif_ifp;
154         struct mtx      cif_mtx;
155         uint32_t        cif_flags;
156 #define CIF_PROMISC     0x00000001
157 };
158
159 /* Kernel equivalent of struct carpreq, but with more fields for new features.
160  * */
161 struct carpkreq {
162         int             carpr_count;
163         int             carpr_vhid;
164         int             carpr_state;
165         int             carpr_advskew;
166         int             carpr_advbase;
167         unsigned char   carpr_key[CARP_KEY_LEN];
168         /* Everything above this is identical to carpreq */
169         struct in_addr  carpr_addr;
170         struct in6_addr carpr_addr6;
171 };
172
173 /*
174  * Brief design of carp(4).
175  *
176  * Any carp-capable ifnet may have a list of carp softcs hanging off
177  * its ifp->if_carp pointer. Each softc represents one unique virtual
178  * host id, or vhid. The softc has a back pointer to the ifnet. All
179  * softcs are joined in a global list, which has quite limited use.
180  *
181  * Any interface address that takes part in CARP negotiation has a
182  * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
183  * AF_INET or AF_INET6 address.
184  *
185  * Although, one can get the softc's backpointer to ifnet and traverse
186  * through its ifp->if_addrhead queue to find all interface addresses
187  * involved in CARP, we keep a growable array of ifaddr pointers. This
188  * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
189  * do calls into the network stack, thus avoiding LORs.
190  *
191  * Locking:
192  *
193  * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
194  * callout-driven events and ioctl()s.
195  *
196  * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx.
197  * To traverse the global list we use the mutex carp_mtx.
198  *
199  * Known issues with locking:
200  *
201  * - Sending ad, we put the pointer to the softc in an mtag, and no reference
202  *   counting is done on the softc.
203  * - On module unload we may race (?) with packet processing thread
204  *   dereferencing our function pointers.
205  */
206
207 /* Accept incoming CARP packets. */
208 VNET_DEFINE_STATIC(int, carp_allow) = 1;
209 #define V_carp_allow    VNET(carp_allow)
210
211 /* Set DSCP in outgoing CARP packets. */
212 VNET_DEFINE_STATIC(int, carp_dscp) = 56;
213 #define V_carp_dscp     VNET(carp_dscp)
214
215 /* Preempt slower nodes. */
216 VNET_DEFINE_STATIC(int, carp_preempt) = 0;
217 #define V_carp_preempt  VNET(carp_preempt)
218
219 /* Log level. */
220 VNET_DEFINE_STATIC(int, carp_log) = 1;
221 #define V_carp_log      VNET(carp_log)
222
223 /* Global advskew demotion. */
224 VNET_DEFINE_STATIC(int, carp_demotion) = 0;
225 #define V_carp_demotion VNET(carp_demotion)
226
227 /* Send error demotion factor. */
228 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW;
229 #define V_carp_senderr_adj      VNET(carp_senderr_adj)
230
231 /* Iface down demotion factor. */
232 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW;
233 #define V_carp_ifdown_adj       VNET(carp_ifdown_adj)
234
235 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS);
236 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS);
237 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
238
239 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
240     "CARP");
241 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow,
242     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE,
243     &VNET_NAME(carp_allow), 0, carp_allow_sysctl, "I",
244     "Accept incoming CARP packets");
245 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp,
246     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
247     0, 0, carp_dscp_sysctl, "I",
248     "DSCP value for carp packets");
249 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
250     &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
251 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
252     &VNET_NAME(carp_log), 0, "CARP log level");
253 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion,
254     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
255     0, 0, carp_demote_adj_sysctl, "I",
256     "Adjust demotion factor (skew of advskew)");
257 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor,
258     CTLFLAG_VNET | CTLFLAG_RW,
259     &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment");
260 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor,
261     CTLFLAG_VNET | CTLFLAG_RW,
262     &VNET_NAME(carp_ifdown_adj), 0,
263     "Interface down demotion factor adjustment");
264
265 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
266 VNET_PCPUSTAT_SYSINIT(carpstats);
267 VNET_PCPUSTAT_SYSUNINIT(carpstats);
268
269 #define CARPSTATS_ADD(name, val)        \
270     counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
271         sizeof(uint64_t)], (val))
272 #define CARPSTATS_INC(name)             CARPSTATS_ADD(name, 1)
273
274 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
275     carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
276
277 #define CARP_LOCK_INIT(sc)      mtx_init(&(sc)->sc_mtx, "carp_softc",   \
278         NULL, MTX_DEF)
279 #define CARP_LOCK_DESTROY(sc)   mtx_destroy(&(sc)->sc_mtx)
280 #define CARP_LOCK_ASSERT(sc)    mtx_assert(&(sc)->sc_mtx, MA_OWNED)
281 #define CARP_LOCK(sc)           mtx_lock(&(sc)->sc_mtx)
282 #define CARP_UNLOCK(sc)         mtx_unlock(&(sc)->sc_mtx)
283 #define CIF_LOCK_INIT(cif)      mtx_init(&(cif)->cif_mtx, "carp_if",   \
284         NULL, MTX_DEF)
285 #define CIF_LOCK_DESTROY(cif)   mtx_destroy(&(cif)->cif_mtx)
286 #define CIF_LOCK_ASSERT(cif)    mtx_assert(&(cif)->cif_mtx, MA_OWNED)
287 #define CIF_LOCK(cif)           mtx_lock(&(cif)->cif_mtx)
288 #define CIF_UNLOCK(cif)         mtx_unlock(&(cif)->cif_mtx)
289 #define CIF_FREE(cif)   do {                            \
290                 CIF_LOCK(cif);                          \
291                 if (TAILQ_EMPTY(&(cif)->cif_vrs))       \
292                         carp_free_if(cif);              \
293                 else                                    \
294                         CIF_UNLOCK(cif);                \
295 } while (0)
296
297 #define CARP_LOG(...)   do {                            \
298         if (V_carp_log > 0)                             \
299                 log(LOG_INFO, "carp: " __VA_ARGS__);    \
300 } while (0)
301
302 #define CARP_DEBUG(...) do {                            \
303         if (V_carp_log > 1)                             \
304                 log(LOG_DEBUG, __VA_ARGS__);            \
305 } while (0)
306
307 #define IFNET_FOREACH_IFA(ifp, ifa)                                     \
308         CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \
309                 if ((ifa)->ifa_carp != NULL)
310
311 #define CARP_FOREACH_IFA(sc, ifa)                                       \
312         CARP_LOCK_ASSERT(sc);                                           \
313         for (int _i = 0;                                                \
314                 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 &&              \
315                 ((ifa) = sc->sc_ifas[_i]) != NULL;                      \
316                 ++_i)
317
318 #define IFNET_FOREACH_CARP(ifp, sc)                                     \
319         KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) ||                    \
320             sx_xlocked(&carp_sx), ("cif_vrs not locked"));              \
321         TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
322
323 #define DEMOTE_ADVSKEW(sc)                                      \
324     (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ?      \
325     CARP_MAXSKEW :                                              \
326         (((sc)->sc_advskew + V_carp_demotion < 0) ?             \
327         0 : ((sc)->sc_advskew + V_carp_demotion)))
328
329 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int);
330 static struct carp_softc
331                 *carp_alloc(struct ifnet *);
332 static void     carp_destroy(struct carp_softc *);
333 static struct carp_if
334                 *carp_alloc_if(struct ifnet *);
335 static void     carp_free_if(struct carp_if *);
336 static void     carp_set_state(struct carp_softc *, int, const char* reason);
337 static void     carp_sc_state(struct carp_softc *);
338 static void     carp_setrun(struct carp_softc *, sa_family_t);
339 static void     carp_master_down(void *);
340 static void     carp_master_down_locked(struct carp_softc *,
341                     const char* reason);
342 static void     carp_send_ad(void *);
343 static void     carp_send_ad_locked(struct carp_softc *);
344 static void     carp_addroute(struct carp_softc *);
345 static void     carp_ifa_addroute(struct ifaddr *);
346 static void     carp_delroute(struct carp_softc *);
347 static void     carp_ifa_delroute(struct ifaddr *);
348 static void     carp_send_ad_all(void *, int);
349 static void     carp_demote_adj(int, char *);
350
351 static LIST_HEAD(, carp_softc) carp_list;
352 static struct mtx carp_mtx;
353 static struct sx carp_sx;
354 static struct task carp_sendall_task =
355     TASK_INITIALIZER(0, carp_send_ad_all, NULL);
356
357 static int
358 carp_is_supported_if(if_t ifp)
359 {
360         if (ifp == NULL)
361                 return (ENXIO);
362
363         switch (ifp->if_type) {
364         case IFT_ETHER:
365         case IFT_L2VLAN:
366         case IFT_BRIDGE:
367                 break;
368         default:
369                 return (EOPNOTSUPP);
370         }
371
372         return (0);
373 }
374
375 static void
376 carp_hmac_prepare(struct carp_softc *sc)
377 {
378         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
379         uint8_t vhid = sc->sc_vhid & 0xff;
380         struct ifaddr *ifa;
381         int i, found;
382 #ifdef INET
383         struct in_addr last, cur, in;
384 #endif
385 #ifdef INET6
386         struct in6_addr last6, cur6, in6;
387 #endif
388
389         CARP_LOCK_ASSERT(sc);
390
391         /* Compute ipad from key. */
392         bzero(sc->sc_pad, sizeof(sc->sc_pad));
393         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
394         for (i = 0; i < sizeof(sc->sc_pad); i++)
395                 sc->sc_pad[i] ^= 0x36;
396
397         /* Precompute first part of inner hash. */
398         SHA1Init(&sc->sc_sha1);
399         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
400         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
401         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
402         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
403 #ifdef INET
404         cur.s_addr = 0;
405         do {
406                 found = 0;
407                 last = cur;
408                 cur.s_addr = 0xffffffff;
409                 CARP_FOREACH_IFA(sc, ifa) {
410                         in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
411                         if (ifa->ifa_addr->sa_family == AF_INET &&
412                             ntohl(in.s_addr) > ntohl(last.s_addr) &&
413                             ntohl(in.s_addr) < ntohl(cur.s_addr)) {
414                                 cur.s_addr = in.s_addr;
415                                 found++;
416                         }
417                 }
418                 if (found)
419                         SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
420         } while (found);
421 #endif /* INET */
422 #ifdef INET6
423         memset(&cur6, 0, sizeof(cur6));
424         do {
425                 found = 0;
426                 last6 = cur6;
427                 memset(&cur6, 0xff, sizeof(cur6));
428                 CARP_FOREACH_IFA(sc, ifa) {
429                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
430                         if (IN6_IS_SCOPE_EMBED(&in6))
431                                 in6.s6_addr16[1] = 0;
432                         if (ifa->ifa_addr->sa_family == AF_INET6 &&
433                             memcmp(&in6, &last6, sizeof(in6)) > 0 &&
434                             memcmp(&in6, &cur6, sizeof(in6)) < 0) {
435                                 cur6 = in6;
436                                 found++;
437                         }
438                 }
439                 if (found)
440                         SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
441         } while (found);
442 #endif /* INET6 */
443
444         /* convert ipad to opad */
445         for (i = 0; i < sizeof(sc->sc_pad); i++)
446                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
447 }
448
449 static void
450 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
451     unsigned char md[20])
452 {
453         SHA1_CTX sha1ctx;
454
455         CARP_LOCK_ASSERT(sc);
456
457         /* fetch first half of inner hash */
458         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
459
460         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
461         SHA1Final(md, &sha1ctx);
462
463         /* outer hash */
464         SHA1Init(&sha1ctx);
465         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
466         SHA1Update(&sha1ctx, md, 20);
467         SHA1Final(md, &sha1ctx);
468 }
469
470 static int
471 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
472     unsigned char md[20])
473 {
474         unsigned char md2[20];
475
476         CARP_LOCK_ASSERT(sc);
477
478         carp_hmac_generate(sc, counter, md2);
479
480         return (bcmp(md, md2, sizeof(md2)));
481 }
482
483 /*
484  * process input packet.
485  * we have rearranged checks order compared to the rfc,
486  * but it seems more efficient this way or not possible otherwise.
487  */
488 #ifdef INET
489 static int
490 carp_input(struct mbuf **mp, int *offp, int proto)
491 {
492         struct mbuf *m = *mp;
493         struct ip *ip = mtod(m, struct ip *);
494         struct carp_header *ch;
495         int iplen, len;
496
497         iplen = *offp;
498         *mp = NULL;
499
500         CARPSTATS_INC(carps_ipackets);
501
502         if (!V_carp_allow) {
503                 m_freem(m);
504                 return (IPPROTO_DONE);
505         }
506
507         iplen = ip->ip_hl << 2;
508
509         if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
510                 CARPSTATS_INC(carps_badlen);
511                 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
512                     "on %s\n", __func__, m->m_len - sizeof(struct ip),
513                     if_name(m->m_pkthdr.rcvif));
514                 m_freem(m);
515                 return (IPPROTO_DONE);
516         }
517
518         if (iplen + sizeof(*ch) < m->m_len) {
519                 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
520                         CARPSTATS_INC(carps_hdrops);
521                         CARP_DEBUG("%s: pullup failed\n", __func__);
522                         return (IPPROTO_DONE);
523                 }
524                 ip = mtod(m, struct ip *);
525         }
526         ch = (struct carp_header *)((char *)ip + iplen);
527
528         /*
529          * verify that the received packet length is
530          * equal to the CARP header
531          */
532         len = iplen + sizeof(*ch);
533         if (len > m->m_pkthdr.len) {
534                 CARPSTATS_INC(carps_badlen);
535                 CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
536                     m->m_pkthdr.len,
537                     if_name(m->m_pkthdr.rcvif));
538                 m_freem(m);
539                 return (IPPROTO_DONE);
540         }
541
542         if ((m = m_pullup(m, len)) == NULL) {
543                 CARPSTATS_INC(carps_hdrops);
544                 return (IPPROTO_DONE);
545         }
546         ip = mtod(m, struct ip *);
547         ch = (struct carp_header *)((char *)ip + iplen);
548
549         /* verify the CARP checksum */
550         m->m_data += iplen;
551         if (in_cksum(m, len - iplen)) {
552                 CARPSTATS_INC(carps_badsum);
553                 CARP_DEBUG("%s: checksum failed on %s\n", __func__,
554                     if_name(m->m_pkthdr.rcvif));
555                 m_freem(m);
556                 return (IPPROTO_DONE);
557         }
558         m->m_data -= iplen;
559
560         carp_input_c(m, ch, AF_INET, ip->ip_ttl);
561         return (IPPROTO_DONE);
562 }
563 #endif
564
565 #ifdef INET6
566 static int
567 carp6_input(struct mbuf **mp, int *offp, int proto)
568 {
569         struct mbuf *m = *mp;
570         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
571         struct carp_header *ch;
572         u_int len;
573
574         CARPSTATS_INC(carps_ipackets6);
575
576         if (!V_carp_allow) {
577                 m_freem(m);
578                 return (IPPROTO_DONE);
579         }
580
581         /* check if received on a valid carp interface */
582         if (m->m_pkthdr.rcvif->if_carp == NULL) {
583                 CARPSTATS_INC(carps_badif);
584                 CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
585                     __func__, if_name(m->m_pkthdr.rcvif));
586                 m_freem(m);
587                 return (IPPROTO_DONE);
588         }
589
590         /* verify that we have a complete carp packet */
591         if (m->m_len < *offp + sizeof(*ch)) {
592                 len = m->m_len;
593                 m = m_pullup(m, *offp + sizeof(*ch));
594                 if (m == NULL) {
595                         CARPSTATS_INC(carps_badlen);
596                         CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
597                         return (IPPROTO_DONE);
598                 }
599                 ip6 = mtod(m, struct ip6_hdr *);
600         }
601         ch = (struct carp_header *)(mtod(m, char *) + *offp);
602
603         /* verify the CARP checksum */
604         m->m_data += *offp;
605         if (in_cksum(m, sizeof(*ch))) {
606                 CARPSTATS_INC(carps_badsum);
607                 CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
608                     if_name(m->m_pkthdr.rcvif));
609                 m_freem(m);
610                 return (IPPROTO_DONE);
611         }
612         m->m_data -= *offp;
613
614         carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim);
615         return (IPPROTO_DONE);
616 }
617 #endif /* INET6 */
618
619 /*
620  * This routine should not be necessary at all, but some switches
621  * (VMWare ESX vswitches) can echo our own packets back at us,
622  * and we must ignore them or they will cause us to drop out of
623  * MASTER mode.
624  *
625  * We cannot catch all cases of network loops.  Instead, what we
626  * do here is catch any packet that arrives with a carp header
627  * with a VHID of 0, that comes from an address that is our own.
628  * These packets are by definition "from us" (even if they are from
629  * a misconfigured host that is pretending to be us).
630  *
631  * The VHID test is outside this mini-function.
632  */
633 static int
634 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af)
635 {
636 #ifdef INET
637         struct ip *ip4;
638         struct in_addr in4;
639 #endif
640 #ifdef INET6
641         struct ip6_hdr *ip6;
642         struct in6_addr in6;
643 #endif
644
645         switch (af) {
646 #ifdef INET
647         case AF_INET:
648                 ip4 = mtod(m, struct ip *);
649                 in4 = ifatoia(ifa)->ia_addr.sin_addr;
650                 return (in4.s_addr == ip4->ip_src.s_addr);
651 #endif
652 #ifdef INET6
653         case AF_INET6:
654                 ip6 = mtod(m, struct ip6_hdr *);
655                 in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
656                 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0);
657 #endif
658         default:
659                 break;
660         }
661         return (0);
662 }
663
664 static void
665 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
666 {
667         struct ifnet *ifp = m->m_pkthdr.rcvif;
668         struct ifaddr *ifa, *match;
669         struct carp_softc *sc;
670         uint64_t tmp_counter;
671         struct timeval sc_tv, ch_tv;
672         int error;
673         bool multicast = false;
674
675         NET_EPOCH_ASSERT();
676
677         /*
678          * Verify that the VHID is valid on the receiving interface.
679          *
680          * There should be just one match.  If there are none
681          * the VHID is not valid and we drop the packet.  If
682          * there are multiple VHID matches, take just the first
683          * one, for compatibility with previous code.  While we're
684          * scanning, check for obvious loops in the network topology
685          * (these should never happen, and as noted above, we may
686          * miss real loops; this is just a double-check).
687          */
688         error = 0;
689         match = NULL;
690         IFNET_FOREACH_IFA(ifp, ifa) {
691                 if (match == NULL && ifa->ifa_carp != NULL &&
692                     ifa->ifa_addr->sa_family == af &&
693                     ifa->ifa_carp->sc_vhid == ch->carp_vhid)
694                         match = ifa;
695                 if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af))
696                         error = ELOOP;
697         }
698         ifa = error ? NULL : match;
699         if (ifa != NULL)
700                 ifa_ref(ifa);
701
702         if (ifa == NULL) {
703                 if (error == ELOOP) {
704                         CARP_DEBUG("dropping looped packet on interface %s\n",
705                             if_name(ifp));
706                         CARPSTATS_INC(carps_badif);     /* ??? */
707                 } else {
708                         CARPSTATS_INC(carps_badvhid);
709                 }
710                 m_freem(m);
711                 return;
712         }
713
714         /* verify the CARP version. */
715         if (ch->carp_version != CARP_VERSION) {
716                 CARPSTATS_INC(carps_badver);
717                 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp),
718                     ch->carp_version);
719                 ifa_free(ifa);
720                 m_freem(m);
721                 return;
722         }
723
724         sc = ifa->ifa_carp;
725         CARP_LOCK(sc);
726         if (ifa->ifa_addr->sa_family == AF_INET) {
727                 multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr);
728         } else {
729                 multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6);
730         }
731         ifa_free(ifa);
732
733         /* verify that the IP TTL is 255, but only if we're not in unicast mode. */
734         if (multicast && ttl != CARP_DFLTTL) {
735                 CARPSTATS_INC(carps_badttl);
736                 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
737                     ttl, if_name(m->m_pkthdr.rcvif));
738                 goto out;
739         }
740
741         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
742                 CARPSTATS_INC(carps_badauth);
743                 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
744                     sc->sc_vhid, if_name(ifp));
745                 goto out;
746         }
747
748         tmp_counter = ntohl(ch->carp_counter[0]);
749         tmp_counter = tmp_counter<<32;
750         tmp_counter += ntohl(ch->carp_counter[1]);
751
752         /* XXX Replay protection goes here */
753
754         sc->sc_init_counter = 0;
755         sc->sc_counter = tmp_counter;
756
757         sc_tv.tv_sec = sc->sc_advbase;
758         sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
759         ch_tv.tv_sec = ch->carp_advbase;
760         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
761
762         switch (sc->sc_state) {
763         case INIT:
764                 break;
765         case MASTER:
766                 /*
767                  * If we receive an advertisement from a master who's going to
768                  * be more frequent than us, go into BACKUP state.
769                  */
770                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
771                     timevalcmp(&sc_tv, &ch_tv, ==)) {
772                         callout_stop(&sc->sc_ad_tmo);
773                         carp_set_state(sc, BACKUP,
774                             "more frequent advertisement received");
775                         carp_setrun(sc, 0);
776                         carp_delroute(sc);
777                 }
778                 break;
779         case BACKUP:
780                 /*
781                  * If we're pre-empting masters who advertise slower than us,
782                  * and this one claims to be slower, treat him as down.
783                  */
784                 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
785                         carp_master_down_locked(sc,
786                             "preempting a slower master");
787                         break;
788                 }
789
790                 /*
791                  *  If the master is going to advertise at such a low frequency
792                  *  that he's guaranteed to time out, we'd might as well just
793                  *  treat him as timed out now.
794                  */
795                 sc_tv.tv_sec = sc->sc_advbase * 3;
796                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
797                         carp_master_down_locked(sc, "master will time out");
798                         break;
799                 }
800
801                 /*
802                  * Otherwise, we reset the counter and wait for the next
803                  * advertisement.
804                  */
805                 carp_setrun(sc, af);
806                 break;
807         }
808
809 out:
810         CARP_UNLOCK(sc);
811         m_freem(m);
812 }
813
814 static int
815 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
816 {
817         struct m_tag *mtag;
818
819         if (sc->sc_init_counter) {
820                 /* this could also be seconds since unix epoch */
821                 sc->sc_counter = arc4random();
822                 sc->sc_counter = sc->sc_counter << 32;
823                 sc->sc_counter += arc4random();
824         } else
825                 sc->sc_counter++;
826
827         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
828         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
829
830         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
831
832         /* Tag packet for carp_output */
833         if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
834             M_NOWAIT)) == NULL) {
835                 m_freem(m);
836                 CARPSTATS_INC(carps_onomem);
837                 return (ENOMEM);
838         }
839         bcopy(&sc, mtag + 1, sizeof(sc));
840         m_tag_prepend(m, mtag);
841
842         return (0);
843 }
844
845 /*
846  * To avoid LORs and possible recursions this function shouldn't
847  * be called directly, but scheduled via taskqueue.
848  */
849 static void
850 carp_send_ad_all(void *ctx __unused, int pending __unused)
851 {
852         struct carp_softc *sc;
853         struct epoch_tracker et;
854
855         NET_EPOCH_ENTER(et);
856         mtx_lock(&carp_mtx);
857         LIST_FOREACH(sc, &carp_list, sc_next)
858                 if (sc->sc_state == MASTER) {
859                         CARP_LOCK(sc);
860                         CURVNET_SET(sc->sc_carpdev->if_vnet);
861                         carp_send_ad_locked(sc);
862                         CURVNET_RESTORE();
863                         CARP_UNLOCK(sc);
864                 }
865         mtx_unlock(&carp_mtx);
866         NET_EPOCH_EXIT(et);
867 }
868
869 /* Send a periodic advertisement, executed in callout context. */
870 static void
871 carp_send_ad(void *v)
872 {
873         struct carp_softc *sc = v;
874         struct epoch_tracker et;
875
876         NET_EPOCH_ENTER(et);
877         CARP_LOCK_ASSERT(sc);
878         CURVNET_SET(sc->sc_carpdev->if_vnet);
879         carp_send_ad_locked(sc);
880         CURVNET_RESTORE();
881         CARP_UNLOCK(sc);
882         NET_EPOCH_EXIT(et);
883 }
884
885 static void
886 carp_send_ad_error(struct carp_softc *sc, int error)
887 {
888
889         /*
890          * We track errors and successfull sends with this logic:
891          * - Any error resets success counter to 0.
892          * - MAX_ERRORS triggers demotion.
893          * - MIN_SUCCESS successes resets error counter to 0.
894          * - MIN_SUCCESS reverts demotion, if it was triggered before.
895          */
896         if (error) {
897                 if (sc->sc_sendad_errors < INT_MAX)
898                         sc->sc_sendad_errors++;
899                 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
900                         static const char fmt[] = "send error %d on %s";
901                         char msg[sizeof(fmt) + IFNAMSIZ];
902
903                         sprintf(msg, fmt, error, if_name(sc->sc_carpdev));
904                         carp_demote_adj(V_carp_senderr_adj, msg);
905                 }
906                 sc->sc_sendad_success = 0;
907         } else if (sc->sc_sendad_errors > 0) {
908                 if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) {
909                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
910                                 static const char fmt[] = "send ok on %s";
911                                 char msg[sizeof(fmt) + IFNAMSIZ];
912
913                                 sprintf(msg, fmt, if_name(sc->sc_carpdev));
914                                 carp_demote_adj(-V_carp_senderr_adj, msg);
915                         }
916                         sc->sc_sendad_errors = 0;
917                 }
918         }
919 }
920
921 /*
922  * Pick the best ifaddr on the given ifp for sending CARP
923  * advertisements.
924  *
925  * "Best" here is defined by ifa_preferred().  This function is much
926  * much like ifaof_ifpforaddr() except that we just use ifa_preferred().
927  *
928  * (This could be simplified to return the actual address, except that
929  * it has a different format in AF_INET and AF_INET6.)
930  */
931 static struct ifaddr *
932 carp_best_ifa(int af, struct ifnet *ifp)
933 {
934         struct ifaddr *ifa, *best;
935
936         NET_EPOCH_ASSERT();
937
938         if (af >= AF_MAX)
939                 return (NULL);
940         best = NULL;
941         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
942                 if (ifa->ifa_addr->sa_family == af &&
943                     (best == NULL || ifa_preferred(best, ifa)))
944                         best = ifa;
945         }
946         if (best != NULL)
947                 ifa_ref(best);
948         return (best);
949 }
950
951 static void
952 carp_send_ad_locked(struct carp_softc *sc)
953 {
954         struct carp_header ch;
955         struct timeval tv;
956         struct ifaddr *ifa;
957         struct carp_header *ch_ptr;
958         struct mbuf *m;
959         int len, advskew;
960
961         NET_EPOCH_ASSERT();
962         CARP_LOCK_ASSERT(sc);
963
964         advskew = DEMOTE_ADVSKEW(sc);
965         tv.tv_sec = sc->sc_advbase;
966         tv.tv_usec = advskew * 1000000 / 256;
967
968         ch.carp_version = CARP_VERSION;
969         ch.carp_type = CARP_ADVERTISEMENT;
970         ch.carp_vhid = sc->sc_vhid;
971         ch.carp_advbase = sc->sc_advbase;
972         ch.carp_advskew = advskew;
973         ch.carp_authlen = 7;    /* XXX DEFINE */
974         ch.carp_pad1 = 0;       /* must be zero */
975         ch.carp_cksum = 0;
976
977         /* XXXGL: OpenBSD picks first ifaddr with needed family. */
978
979 #ifdef INET
980         if (sc->sc_naddrs) {
981                 struct ip *ip;
982
983                 m = m_gethdr(M_NOWAIT, MT_DATA);
984                 if (m == NULL) {
985                         CARPSTATS_INC(carps_onomem);
986                         goto resched;
987                 }
988                 len = sizeof(*ip) + sizeof(ch);
989                 m->m_pkthdr.len = len;
990                 m->m_pkthdr.rcvif = NULL;
991                 m->m_len = len;
992                 M_ALIGN(m, m->m_len);
993                 if (IN_MULTICAST(sc->sc_carpaddr.s_addr))
994                         m->m_flags |= M_MCAST;
995                 ip = mtod(m, struct ip *);
996                 ip->ip_v = IPVERSION;
997                 ip->ip_hl = sizeof(*ip) >> 2;
998                 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET;
999                 ip->ip_len = htons(len);
1000                 ip->ip_off = htons(IP_DF);
1001                 ip->ip_ttl = CARP_DFLTTL;
1002                 ip->ip_p = IPPROTO_CARP;
1003                 ip->ip_sum = 0;
1004                 ip_fillid(ip);
1005
1006                 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev);
1007                 if (ifa != NULL) {
1008                         ip->ip_src.s_addr =
1009                             ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1010                         ifa_free(ifa);
1011                 } else
1012                         ip->ip_src.s_addr = 0;
1013                 ip->ip_dst = sc->sc_carpaddr;
1014
1015                 ch_ptr = (struct carp_header *)(&ip[1]);
1016                 bcopy(&ch, ch_ptr, sizeof(ch));
1017                 if (carp_prepare_ad(m, sc, ch_ptr))
1018                         goto resched;
1019
1020                 m->m_data += sizeof(*ip);
1021                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
1022                 m->m_data -= sizeof(*ip);
1023
1024                 CARPSTATS_INC(carps_opackets);
1025
1026                 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
1027                     &sc->sc_carpdev->if_carp->cif_imo, NULL));
1028         }
1029 #endif /* INET */
1030 #ifdef INET6
1031         if (sc->sc_naddrs6) {
1032                 struct ip6_hdr *ip6;
1033
1034                 m = m_gethdr(M_NOWAIT, MT_DATA);
1035                 if (m == NULL) {
1036                         CARPSTATS_INC(carps_onomem);
1037                         goto resched;
1038                 }
1039                 len = sizeof(*ip6) + sizeof(ch);
1040                 m->m_pkthdr.len = len;
1041                 m->m_pkthdr.rcvif = NULL;
1042                 m->m_len = len;
1043                 M_ALIGN(m, m->m_len);
1044                 ip6 = mtod(m, struct ip6_hdr *);
1045                 bzero(ip6, sizeof(*ip6));
1046                 ip6->ip6_vfc |= IPV6_VERSION;
1047                 /* Traffic class isn't defined in ip6 struct instead
1048                  * it gets offset into flowid field */
1049                 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN +
1050                     IPTOS_DSCP_OFFSET));
1051                 ip6->ip6_hlim = CARP_DFLTTL;
1052                 ip6->ip6_nxt = IPPROTO_CARP;
1053
1054                 /* set the source address */
1055                 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev);
1056                 if (ifa != NULL) {
1057                         bcopy(IFA_IN6(ifa), &ip6->ip6_src,
1058                             sizeof(struct in6_addr));
1059                         ifa_free(ifa);
1060                 } else
1061                         /* This should never happen with IPv6. */
1062                         bzero(&ip6->ip6_src, sizeof(struct in6_addr));
1063
1064                 /* Set the multicast destination. */
1065                 memcpy(&ip6->ip6_dst, &sc->sc_carpaddr6, sizeof(ip6->ip6_dst));
1066                 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
1067                     IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) {
1068                         if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1069                                 m_freem(m);
1070                                 CARP_DEBUG("%s: in6_setscope failed\n", __func__);
1071                                 goto resched;
1072                         }
1073                 }
1074
1075                 ch_ptr = (struct carp_header *)(&ip6[1]);
1076                 bcopy(&ch, ch_ptr, sizeof(ch));
1077                 if (carp_prepare_ad(m, sc, ch_ptr))
1078                         goto resched;
1079
1080                 m->m_data += sizeof(*ip6);
1081                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
1082                 m->m_data -= sizeof(*ip6);
1083
1084                 CARPSTATS_INC(carps_opackets6);
1085
1086                 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
1087                     &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
1088         }
1089 #endif /* INET6 */
1090
1091 resched:
1092         callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
1093 }
1094
1095 static void
1096 carp_addroute(struct carp_softc *sc)
1097 {
1098         struct ifaddr *ifa;
1099
1100         CARP_FOREACH_IFA(sc, ifa)
1101                 carp_ifa_addroute(ifa);
1102 }
1103
1104 static void
1105 carp_ifa_addroute(struct ifaddr *ifa)
1106 {
1107
1108         switch (ifa->ifa_addr->sa_family) {
1109 #ifdef INET
1110         case AF_INET:
1111                 in_addprefix(ifatoia(ifa));
1112                 ifa_add_loopback_route(ifa,
1113                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
1114                 break;
1115 #endif
1116 #ifdef INET6
1117         case AF_INET6:
1118                 ifa_add_loopback_route(ifa,
1119                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
1120                 nd6_add_ifa_lle(ifatoia6(ifa));
1121                 break;
1122 #endif
1123         }
1124 }
1125
1126 static void
1127 carp_delroute(struct carp_softc *sc)
1128 {
1129         struct ifaddr *ifa;
1130
1131         CARP_FOREACH_IFA(sc, ifa)
1132                 carp_ifa_delroute(ifa);
1133 }
1134
1135 static void
1136 carp_ifa_delroute(struct ifaddr *ifa)
1137 {
1138
1139         switch (ifa->ifa_addr->sa_family) {
1140 #ifdef INET
1141         case AF_INET:
1142                 ifa_del_loopback_route(ifa,
1143                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
1144                 in_scrubprefix(ifatoia(ifa), LLE_STATIC);
1145                 break;
1146 #endif
1147 #ifdef INET6
1148         case AF_INET6:
1149                 ifa_del_loopback_route(ifa,
1150                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
1151                 nd6_rem_ifa_lle(ifatoia6(ifa), 1);
1152                 break;
1153 #endif
1154         }
1155 }
1156
1157 int
1158 carp_master(struct ifaddr *ifa)
1159 {
1160         struct carp_softc *sc = ifa->ifa_carp;
1161
1162         return (sc->sc_state == MASTER);
1163 }
1164
1165 #ifdef INET
1166 /*
1167  * Broadcast a gratuitous ARP request containing
1168  * the virtual router MAC address for each IP address
1169  * associated with the virtual router.
1170  */
1171 static void
1172 carp_send_arp(struct carp_softc *sc)
1173 {
1174         struct ifaddr *ifa;
1175         struct in_addr addr;
1176
1177         NET_EPOCH_ASSERT();
1178
1179         CARP_FOREACH_IFA(sc, ifa) {
1180                 if (ifa->ifa_addr->sa_family != AF_INET)
1181                         continue;
1182                 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
1183                 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr));
1184         }
1185 }
1186
1187 int
1188 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
1189 {
1190         struct carp_softc *sc = ifa->ifa_carp;
1191
1192         if (sc->sc_state == MASTER) {
1193                 *enaddr = LLADDR(&sc->sc_addr);
1194                 return (1);
1195         }
1196
1197         return (0);
1198 }
1199 #endif
1200
1201 #ifdef INET6
1202 static void
1203 carp_send_na(struct carp_softc *sc)
1204 {
1205         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1206         struct ifaddr *ifa;
1207         struct in6_addr *in6;
1208
1209         CARP_FOREACH_IFA(sc, ifa) {
1210                 if (ifa->ifa_addr->sa_family != AF_INET6)
1211                         continue;
1212
1213                 in6 = IFA_IN6(ifa);
1214                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1215                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1216                 DELAY(1000);    /* XXX */
1217         }
1218 }
1219
1220 /*
1221  * Returns ifa in case it's a carp address and it is MASTER, or if the address
1222  * matches and is not a carp address.  Returns NULL otherwise.
1223  */
1224 struct ifaddr *
1225 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
1226 {
1227         struct ifaddr *ifa;
1228
1229         NET_EPOCH_ASSERT();
1230
1231         ifa = NULL;
1232         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1233                 if (ifa->ifa_addr->sa_family != AF_INET6)
1234                         continue;
1235                 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
1236                         continue;
1237                 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
1238                         ifa = NULL;
1239                 else
1240                         ifa_ref(ifa);
1241                 break;
1242         }
1243
1244         return (ifa);
1245 }
1246
1247 char *
1248 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
1249 {
1250         struct ifaddr *ifa;
1251
1252         NET_EPOCH_ASSERT();
1253
1254         IFNET_FOREACH_IFA(ifp, ifa)
1255                 if (ifa->ifa_addr->sa_family == AF_INET6 &&
1256                     IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
1257                         struct carp_softc *sc = ifa->ifa_carp;
1258                         struct m_tag *mtag;
1259
1260                         mtag = m_tag_get(PACKET_TAG_CARP,
1261                             sizeof(struct carp_softc *), M_NOWAIT);
1262                         if (mtag == NULL)
1263                                 /* Better a bit than nothing. */
1264                                 return (LLADDR(&sc->sc_addr));
1265
1266                         bcopy(&sc, mtag + 1, sizeof(sc));
1267                         m_tag_prepend(m, mtag);
1268
1269                         return (LLADDR(&sc->sc_addr));
1270                 }
1271
1272         return (NULL);
1273 }
1274 #endif /* INET6 */
1275
1276 int
1277 carp_forus(struct ifnet *ifp, u_char *dhost)
1278 {
1279         struct carp_softc *sc;
1280         uint8_t *ena = dhost;
1281
1282         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1283                 return (0);
1284
1285         CIF_LOCK(ifp->if_carp);
1286         IFNET_FOREACH_CARP(ifp, sc) {
1287                 /*
1288                  * CARP_LOCK() is not here, since would protect nothing, but
1289                  * cause deadlock with if_bridge, calling this under its lock.
1290                  */
1291                 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
1292                     ETHER_ADDR_LEN)) {
1293                         CIF_UNLOCK(ifp->if_carp);
1294                         return (1);
1295                 }
1296         }
1297         CIF_UNLOCK(ifp->if_carp);
1298
1299         return (0);
1300 }
1301
1302 /* Master down timeout event, executed in callout context. */
1303 static void
1304 carp_master_down(void *v)
1305 {
1306         struct carp_softc *sc = v;
1307         struct epoch_tracker et;
1308
1309         NET_EPOCH_ENTER(et);
1310         CARP_LOCK_ASSERT(sc);
1311
1312         CURVNET_SET(sc->sc_carpdev->if_vnet);
1313         if (sc->sc_state == BACKUP) {
1314                 carp_master_down_locked(sc, "master timed out");
1315         }
1316         CURVNET_RESTORE();
1317
1318         CARP_UNLOCK(sc);
1319         NET_EPOCH_EXIT(et);
1320 }
1321
1322 static void
1323 carp_master_down_locked(struct carp_softc *sc, const char *reason)
1324 {
1325
1326         NET_EPOCH_ASSERT();
1327         CARP_LOCK_ASSERT(sc);
1328
1329         switch (sc->sc_state) {
1330         case BACKUP:
1331                 carp_set_state(sc, MASTER, reason);
1332                 carp_send_ad_locked(sc);
1333 #ifdef INET
1334                 carp_send_arp(sc);
1335 #endif
1336 #ifdef INET6
1337                 carp_send_na(sc);
1338 #endif
1339                 carp_setrun(sc, 0);
1340                 carp_addroute(sc);
1341                 break;
1342         case INIT:
1343         case MASTER:
1344 #ifdef INVARIANTS
1345                 panic("carp: VHID %u@%s: master_down event in %s state\n",
1346                     sc->sc_vhid,
1347                     if_name(sc->sc_carpdev),
1348                     sc->sc_state ? "MASTER" : "INIT");
1349 #endif
1350                 break;
1351         }
1352 }
1353
1354 /*
1355  * When in backup state, af indicates whether to reset the master down timer
1356  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1357  */
1358 static void
1359 carp_setrun(struct carp_softc *sc, sa_family_t af)
1360 {
1361         struct timeval tv;
1362
1363         CARP_LOCK_ASSERT(sc);
1364
1365         if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
1366             sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
1367             (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) ||
1368             !V_carp_allow)
1369                 return;
1370
1371         switch (sc->sc_state) {
1372         case INIT:
1373                 carp_set_state(sc, BACKUP, "initialization complete");
1374                 carp_setrun(sc, 0);
1375                 break;
1376         case BACKUP:
1377                 callout_stop(&sc->sc_ad_tmo);
1378                 tv.tv_sec = 3 * sc->sc_advbase;
1379                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1380                 switch (af) {
1381 #ifdef INET
1382                 case AF_INET:
1383                         callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1384                             carp_master_down, sc);
1385                         break;
1386 #endif
1387 #ifdef INET6
1388                 case AF_INET6:
1389                         callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1390                             carp_master_down, sc);
1391                         break;
1392 #endif
1393                 default:
1394 #ifdef INET
1395                         if (sc->sc_naddrs)
1396                                 callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1397                                     carp_master_down, sc);
1398 #endif
1399 #ifdef INET6
1400                         if (sc->sc_naddrs6)
1401                                 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1402                                     carp_master_down, sc);
1403 #endif
1404                         break;
1405                 }
1406                 break;
1407         case MASTER:
1408                 tv.tv_sec = sc->sc_advbase;
1409                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1410                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
1411                     carp_send_ad, sc);
1412                 break;
1413         }
1414 }
1415
1416 /*
1417  * Setup multicast structures.
1418  */
1419 static int
1420 carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
1421 {
1422         struct ifnet *ifp = cif->cif_ifp;
1423         int error = 0;
1424
1425         switch (sa) {
1426 #ifdef INET
1427         case AF_INET:
1428             {
1429                 struct ip_moptions *imo = &cif->cif_imo;
1430                 struct in_mfilter *imf;
1431                 struct in_addr addr;
1432
1433                 if (ip_mfilter_first(&imo->imo_head) != NULL)
1434                         return (0);
1435
1436                 imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
1437                 ip_mfilter_init(&imo->imo_head);
1438                 imo->imo_multicast_vif = -1;
1439
1440                 addr.s_addr = htonl(INADDR_CARP_GROUP);
1441                 if ((error = in_joingroup(ifp, &addr, NULL,
1442                     &imf->imf_inm)) != 0) {
1443                         ip_mfilter_free(imf);
1444                         break;
1445                 }
1446
1447                 ip_mfilter_insert(&imo->imo_head, imf);
1448                 imo->imo_multicast_ifp = ifp;
1449                 imo->imo_multicast_ttl = CARP_DFLTTL;
1450                 imo->imo_multicast_loop = 0;
1451                 break;
1452            }
1453 #endif
1454 #ifdef INET6
1455         case AF_INET6:
1456             {
1457                 struct ip6_moptions *im6o = &cif->cif_im6o;
1458                 struct in6_mfilter *im6f[2];
1459                 struct in6_addr in6;
1460
1461                 if (ip6_mfilter_first(&im6o->im6o_head))
1462                         return (0);
1463
1464                 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
1465                 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
1466
1467                 ip6_mfilter_init(&im6o->im6o_head);
1468                 im6o->im6o_multicast_hlim = CARP_DFLTTL;
1469                 im6o->im6o_multicast_ifp = ifp;
1470
1471                 /* Join IPv6 CARP multicast group. */
1472                 bzero(&in6, sizeof(in6));
1473                 in6.s6_addr16[0] = htons(0xff02);
1474                 in6.s6_addr8[15] = 0x12;
1475                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1476                         ip6_mfilter_free(im6f[0]);
1477                         ip6_mfilter_free(im6f[1]);
1478                         break;
1479                 }
1480                 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) {
1481                         ip6_mfilter_free(im6f[0]);
1482                         ip6_mfilter_free(im6f[1]);
1483                         break;
1484                 }
1485
1486                 /* Join solicited multicast address. */
1487                 bzero(&in6, sizeof(in6));
1488                 in6.s6_addr16[0] = htons(0xff02);
1489                 in6.s6_addr32[1] = 0;
1490                 in6.s6_addr32[2] = htonl(1);
1491                 in6.s6_addr32[3] = 0;
1492                 in6.s6_addr8[12] = 0xff;
1493
1494                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1495                         ip6_mfilter_free(im6f[0]);
1496                         ip6_mfilter_free(im6f[1]);
1497                         break;
1498                 }
1499
1500                 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) {
1501                         in6_leavegroup(im6f[0]->im6f_in6m, NULL);
1502                         ip6_mfilter_free(im6f[0]);
1503                         ip6_mfilter_free(im6f[1]);
1504                         break;
1505                 }
1506                 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]);
1507                 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]);
1508                 break;
1509             }
1510 #endif
1511         }
1512
1513         return (error);
1514 }
1515
1516 /*
1517  * Free multicast structures.
1518  */
1519 static void
1520 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
1521 {
1522 #ifdef INET
1523         struct ip_moptions *imo = &cif->cif_imo;
1524         struct in_mfilter *imf;
1525 #endif
1526 #ifdef INET6
1527         struct ip6_moptions *im6o = &cif->cif_im6o;
1528         struct in6_mfilter *im6f;
1529 #endif
1530         sx_assert(&carp_sx, SA_XLOCKED);
1531
1532         switch (sa) {
1533 #ifdef INET
1534         case AF_INET:
1535                 if (cif->cif_naddrs != 0)
1536                         break;
1537
1538                 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
1539                         ip_mfilter_remove(&imo->imo_head, imf);
1540                         in_leavegroup(imf->imf_inm, NULL);
1541                         ip_mfilter_free(imf);
1542                 }
1543                 break;
1544 #endif
1545 #ifdef INET6
1546         case AF_INET6:
1547                 if (cif->cif_naddrs6 != 0)
1548                         break;
1549
1550                 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) {
1551                         ip6_mfilter_remove(&im6o->im6o_head, im6f);
1552                         in6_leavegroup(im6f->im6f_in6m, NULL);
1553                         ip6_mfilter_free(im6f);
1554                 }
1555                 break;
1556 #endif
1557         }
1558 }
1559
1560 int
1561 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
1562 {
1563         struct m_tag *mtag;
1564         struct carp_softc *sc;
1565
1566         if (!sa)
1567                 return (0);
1568
1569         switch (sa->sa_family) {
1570 #ifdef INET
1571         case AF_INET:
1572                 break;
1573 #endif
1574 #ifdef INET6
1575         case AF_INET6:
1576                 break;
1577 #endif
1578         default:
1579                 return (0);
1580         }
1581
1582         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
1583         if (mtag == NULL)
1584                 return (0);
1585
1586         bcopy(mtag + 1, &sc, sizeof(sc));
1587
1588         switch (sa->sa_family) {
1589         case AF_INET:
1590                 if (! IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)))
1591                         return (0);
1592                 break;
1593         case AF_INET6:
1594                 if (! IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6))
1595                         return (0);
1596                 break;
1597         default:
1598                 panic("Unknown af");
1599         }
1600
1601         /* Set the source MAC address to the Virtual Router MAC Address. */
1602         switch (ifp->if_type) {
1603         case IFT_ETHER:
1604         case IFT_BRIDGE:
1605         case IFT_L2VLAN: {
1606                         struct ether_header *eh;
1607
1608                         eh = mtod(m, struct ether_header *);
1609                         eh->ether_shost[0] = 0;
1610                         eh->ether_shost[1] = 0;
1611                         eh->ether_shost[2] = 0x5e;
1612                         eh->ether_shost[3] = 0;
1613                         eh->ether_shost[4] = 1;
1614                         eh->ether_shost[5] = sc->sc_vhid;
1615                 }
1616                 break;
1617         default:
1618                 printf("%s: carp is not supported for the %d interface type\n",
1619                     if_name(ifp), ifp->if_type);
1620                 return (EOPNOTSUPP);
1621         }
1622
1623         return (0);
1624 }
1625
1626 static struct carp_softc*
1627 carp_alloc(struct ifnet *ifp)
1628 {
1629         struct carp_softc *sc;
1630         struct carp_if *cif;
1631
1632         sx_assert(&carp_sx, SA_XLOCKED);
1633
1634         if ((cif = ifp->if_carp) == NULL)
1635                 cif = carp_alloc_if(ifp);
1636
1637         sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
1638
1639         sc->sc_advbase = CARP_DFLTINTV;
1640         sc->sc_vhid = -1;       /* required setting */
1641         sc->sc_init_counter = 1;
1642         sc->sc_state = INIT;
1643
1644         sc->sc_ifasiz = sizeof(struct ifaddr *);
1645         sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
1646         sc->sc_carpdev = ifp;
1647
1648         sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP);
1649         sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
1650         sc->sc_carpaddr6.s6_addr8[15] = 0x12;
1651
1652         CARP_LOCK_INIT(sc);
1653 #ifdef INET
1654         callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1655 #endif
1656 #ifdef INET6
1657         callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1658 #endif
1659         callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1660
1661         CIF_LOCK(cif);
1662         TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
1663         CIF_UNLOCK(cif);
1664
1665         mtx_lock(&carp_mtx);
1666         LIST_INSERT_HEAD(&carp_list, sc, sc_next);
1667         mtx_unlock(&carp_mtx);
1668
1669         return (sc);
1670 }
1671
1672 static void
1673 carp_grow_ifas(struct carp_softc *sc)
1674 {
1675         struct ifaddr **new;
1676
1677         new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO);
1678         CARP_LOCK(sc);
1679         bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
1680         free(sc->sc_ifas, M_CARP);
1681         sc->sc_ifas = new;
1682         sc->sc_ifasiz *= 2;
1683         CARP_UNLOCK(sc);
1684 }
1685
1686 static void
1687 carp_destroy(struct carp_softc *sc)
1688 {
1689         struct ifnet *ifp = sc->sc_carpdev;
1690         struct carp_if *cif = ifp->if_carp;
1691
1692         sx_assert(&carp_sx, SA_XLOCKED);
1693
1694         if (sc->sc_suppress)
1695                 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed");
1696         CARP_UNLOCK(sc);
1697
1698         CIF_LOCK(cif);
1699         TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
1700         CIF_UNLOCK(cif);
1701
1702         mtx_lock(&carp_mtx);
1703         LIST_REMOVE(sc, sc_next);
1704         mtx_unlock(&carp_mtx);
1705
1706         callout_drain(&sc->sc_ad_tmo);
1707 #ifdef INET
1708         callout_drain(&sc->sc_md_tmo);
1709 #endif
1710 #ifdef INET6
1711         callout_drain(&sc->sc_md6_tmo);
1712 #endif
1713         CARP_LOCK_DESTROY(sc);
1714
1715         free(sc->sc_ifas, M_CARP);
1716         free(sc, M_CARP);
1717 }
1718
1719 static struct carp_if*
1720 carp_alloc_if(struct ifnet *ifp)
1721 {
1722         struct carp_if *cif;
1723         int error;
1724
1725         cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
1726
1727         if ((error = ifpromisc(ifp, 1)) != 0)
1728                 printf("%s: ifpromisc(%s) failed: %d\n",
1729                     __func__, if_name(ifp), error);
1730         else
1731                 cif->cif_flags |= CIF_PROMISC;
1732
1733         CIF_LOCK_INIT(cif);
1734         cif->cif_ifp = ifp;
1735         TAILQ_INIT(&cif->cif_vrs);
1736
1737         IF_ADDR_WLOCK(ifp);
1738         ifp->if_carp = cif;
1739         if_ref(ifp);
1740         IF_ADDR_WUNLOCK(ifp);
1741
1742         return (cif);
1743 }
1744
1745 static void
1746 carp_free_if(struct carp_if *cif)
1747 {
1748         struct ifnet *ifp = cif->cif_ifp;
1749
1750         CIF_LOCK_ASSERT(cif);
1751         KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
1752             __func__));
1753
1754         IF_ADDR_WLOCK(ifp);
1755         ifp->if_carp = NULL;
1756         IF_ADDR_WUNLOCK(ifp);
1757
1758         CIF_LOCK_DESTROY(cif);
1759
1760         if (cif->cif_flags & CIF_PROMISC)
1761                 ifpromisc(ifp, 0);
1762         if_rele(ifp);
1763
1764         free(cif, M_CARP);
1765 }
1766
1767 static bool
1768 carp_carprcp(void *arg, struct carp_softc *sc, int priv)
1769 {
1770         struct carpreq *carpr = arg;
1771
1772         CARP_LOCK(sc);
1773         carpr->carpr_state = sc->sc_state;
1774         carpr->carpr_vhid = sc->sc_vhid;
1775         carpr->carpr_advbase = sc->sc_advbase;
1776         carpr->carpr_advskew = sc->sc_advskew;
1777         if (priv)
1778                 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
1779         else
1780                 bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
1781         CARP_UNLOCK(sc);
1782
1783         return (true);
1784 }
1785
1786 static int
1787 carp_ioctl_set(if_t ifp, struct carpkreq *carpr)
1788 {
1789         struct epoch_tracker et;
1790         struct carp_softc *sc = NULL;
1791         int error = 0;
1792
1793
1794         if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID ||
1795             carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0) {
1796                 return (EINVAL);
1797         }
1798
1799         if (ifp->if_carp) {
1800                 IFNET_FOREACH_CARP(ifp, sc)
1801                         if (sc->sc_vhid == carpr->carpr_vhid)
1802                                 break;
1803         }
1804         if (sc == NULL) {
1805                 sc = carp_alloc(ifp);
1806                 CARP_LOCK(sc);
1807                 sc->sc_vhid = carpr->carpr_vhid;
1808                 LLADDR(&sc->sc_addr)[0] = 0;
1809                 LLADDR(&sc->sc_addr)[1] = 0;
1810                 LLADDR(&sc->sc_addr)[2] = 0x5e;
1811                 LLADDR(&sc->sc_addr)[3] = 0;
1812                 LLADDR(&sc->sc_addr)[4] = 1;
1813                 LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
1814         } else
1815                 CARP_LOCK(sc);
1816         if (carpr->carpr_advbase > 0) {
1817                 if (carpr->carpr_advbase > 255 ||
1818                     carpr->carpr_advbase < CARP_DFLTINTV) {
1819                         error = EINVAL;
1820                         goto out;
1821                 }
1822                 sc->sc_advbase = carpr->carpr_advbase;
1823         }
1824         if (carpr->carpr_advskew >= 255) {
1825                 error = EINVAL;
1826                 goto out;
1827         }
1828         sc->sc_advskew = carpr->carpr_advskew;
1829         if (carpr->carpr_addr.s_addr != INADDR_ANY)
1830                 sc->sc_carpaddr = carpr->carpr_addr;
1831         if (! IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) {
1832                 memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6,
1833                     sizeof(sc->sc_carpaddr6));
1834         }
1835         if (carpr->carpr_key[0] != '\0') {
1836                 bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
1837                 carp_hmac_prepare(sc);
1838         }
1839         if (sc->sc_state != INIT &&
1840             carpr->carpr_state != sc->sc_state) {
1841                 switch (carpr->carpr_state) {
1842                 case BACKUP:
1843                         callout_stop(&sc->sc_ad_tmo);
1844                         carp_set_state(sc, BACKUP,
1845                             "user requested via ifconfig");
1846                         carp_setrun(sc, 0);
1847                         carp_delroute(sc);
1848                         break;
1849                 case MASTER:
1850                         NET_EPOCH_ENTER(et);
1851                         carp_master_down_locked(sc,
1852                             "user requested via ifconfig");
1853                         NET_EPOCH_EXIT(et);
1854                         break;
1855                 default:
1856                         break;
1857                 }
1858         }
1859
1860 out:
1861         CARP_UNLOCK(sc);
1862
1863         return (error);
1864 }
1865
1866 static int
1867 carp_ioctl_get(if_t ifp, struct ucred *cred, struct carpreq *carpr,
1868     bool (*outfn)(void *, struct carp_softc *, int), void *arg)
1869 {
1870         int priveleged;
1871         struct carp_softc *sc;
1872
1873         if (carpr->carpr_vhid < 0 || carpr->carpr_vhid > CARP_MAXVHID)
1874                 return (EINVAL);
1875         if (carpr->carpr_count < 1)
1876                 return (EMSGSIZE);
1877         if (ifp->if_carp == NULL)
1878                 return (ENOENT);
1879
1880         priveleged = (priv_check_cred(cred, PRIV_NETINET_CARP) == 0);
1881         if (carpr->carpr_vhid != 0) {
1882                 IFNET_FOREACH_CARP(ifp, sc)
1883                         if (sc->sc_vhid == carpr->carpr_vhid)
1884                                 break;
1885                 if (sc == NULL)
1886                         return (ENOENT);
1887
1888                 if (! outfn(arg, sc, priveleged))
1889                         return (ENOMEM);
1890                 carpr->carpr_count = 1;
1891         } else  {
1892                 int count;
1893
1894                 count = 0;
1895                 IFNET_FOREACH_CARP(ifp, sc)
1896                         count++;
1897
1898                 if (count > carpr->carpr_count)
1899                         return (EMSGSIZE);
1900
1901                 IFNET_FOREACH_CARP(ifp, sc) {
1902                         if (! outfn(arg, sc, priveleged))
1903                                 return (ENOMEM);
1904                         carpr->carpr_count = count;
1905                 }
1906         }
1907
1908         return (0);
1909 }
1910
1911 int
1912 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
1913 {
1914         struct carpreq carpr;
1915         struct carpkreq carprk = { };
1916         struct ifnet *ifp;
1917         int error = 0;
1918
1919         if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr)))
1920                 return (error);
1921
1922         ifp = ifunit_ref(ifr->ifr_name);
1923         if ((error = carp_is_supported_if(ifp)) != 0)
1924                 goto out;
1925
1926         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1927                 error = EADDRNOTAVAIL;
1928                 goto out;
1929         }
1930
1931         sx_xlock(&carp_sx);
1932         switch (cmd) {
1933         case SIOCSVH:
1934                 if ((error = priv_check(td, PRIV_NETINET_CARP)))
1935                         break;
1936
1937                 memcpy(&carprk, &carpr, sizeof(carpr));
1938                 error = carp_ioctl_set(ifp, &carprk);
1939                 break;
1940
1941         case SIOCGVH:
1942                 error = carp_ioctl_get(ifp, td->td_ucred, &carpr,
1943                     carp_carprcp, &carpr);
1944                 if (error == 0) {
1945                         error = copyout(&carpr,
1946                             (char *)ifr_data_get_ptr(ifr),
1947                             carpr.carpr_count * sizeof(carpr));
1948                 }
1949                 break;
1950         default:
1951                 error = EINVAL;
1952         }
1953         sx_xunlock(&carp_sx);
1954
1955 out:
1956         if (ifp != NULL)
1957                 if_rele(ifp);
1958
1959         return (error);
1960 }
1961
1962 static int
1963 carp_get_vhid(struct ifaddr *ifa)
1964 {
1965
1966         if (ifa == NULL || ifa->ifa_carp == NULL)
1967                 return (0);
1968
1969         return (ifa->ifa_carp->sc_vhid);
1970 }
1971
1972 int
1973 carp_attach(struct ifaddr *ifa, int vhid)
1974 {
1975         struct ifnet *ifp = ifa->ifa_ifp;
1976         struct carp_if *cif = ifp->if_carp;
1977         struct carp_softc *sc;
1978         int index, error;
1979
1980         KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa));
1981
1982         switch (ifa->ifa_addr->sa_family) {
1983 #ifdef INET
1984         case AF_INET:
1985 #endif
1986 #ifdef INET6
1987         case AF_INET6:
1988 #endif
1989                 break;
1990         default:
1991                 return (EPROTOTYPE);
1992         }
1993
1994         sx_xlock(&carp_sx);
1995         if (ifp->if_carp == NULL) {
1996                 sx_xunlock(&carp_sx);
1997                 return (ENOPROTOOPT);
1998         }
1999
2000         IFNET_FOREACH_CARP(ifp, sc)
2001                 if (sc->sc_vhid == vhid)
2002                         break;
2003         if (sc == NULL) {
2004                 sx_xunlock(&carp_sx);
2005                 return (ENOENT);
2006         }
2007
2008         error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
2009         if (error) {
2010                 CIF_FREE(cif);
2011                 sx_xunlock(&carp_sx);
2012                 return (error);
2013         }
2014
2015         index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
2016         if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
2017                 carp_grow_ifas(sc);
2018
2019         switch (ifa->ifa_addr->sa_family) {
2020 #ifdef INET
2021         case AF_INET:
2022                 cif->cif_naddrs++;
2023                 sc->sc_naddrs++;
2024                 break;
2025 #endif
2026 #ifdef INET6
2027         case AF_INET6:
2028                 cif->cif_naddrs6++;
2029                 sc->sc_naddrs6++;
2030                 break;
2031 #endif
2032         }
2033
2034         ifa_ref(ifa);
2035
2036         CARP_LOCK(sc);
2037         sc->sc_ifas[index - 1] = ifa;
2038         ifa->ifa_carp = sc;
2039         carp_hmac_prepare(sc);
2040         carp_sc_state(sc);
2041         CARP_UNLOCK(sc);
2042
2043         sx_xunlock(&carp_sx);
2044
2045         return (0);
2046 }
2047
2048 void
2049 carp_detach(struct ifaddr *ifa, bool keep_cif)
2050 {
2051         struct ifnet *ifp = ifa->ifa_ifp;
2052         struct carp_if *cif = ifp->if_carp;
2053         struct carp_softc *sc = ifa->ifa_carp;
2054         int i, index;
2055
2056         KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
2057
2058         sx_xlock(&carp_sx);
2059
2060         CARP_LOCK(sc);
2061         /* Shift array. */
2062         index = sc->sc_naddrs + sc->sc_naddrs6;
2063         for (i = 0; i < index; i++)
2064                 if (sc->sc_ifas[i] == ifa)
2065                         break;
2066         KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
2067         for (; i < index - 1; i++)
2068                 sc->sc_ifas[i] = sc->sc_ifas[i+1];
2069         sc->sc_ifas[index - 1] = NULL;
2070
2071         switch (ifa->ifa_addr->sa_family) {
2072 #ifdef INET
2073         case AF_INET:
2074                 cif->cif_naddrs--;
2075                 sc->sc_naddrs--;
2076                 break;
2077 #endif
2078 #ifdef INET6
2079         case AF_INET6:
2080                 cif->cif_naddrs6--;
2081                 sc->sc_naddrs6--;
2082                 break;
2083 #endif
2084         }
2085
2086         carp_ifa_delroute(ifa);
2087         carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
2088
2089         ifa->ifa_carp = NULL;
2090         ifa_free(ifa);
2091
2092         carp_hmac_prepare(sc);
2093         carp_sc_state(sc);
2094
2095         if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)
2096                 carp_destroy(sc);
2097         else
2098                 CARP_UNLOCK(sc);
2099
2100         if (!keep_cif)
2101                 CIF_FREE(cif);
2102
2103         sx_xunlock(&carp_sx);
2104 }
2105
2106 static void
2107 carp_set_state(struct carp_softc *sc, int state, const char *reason)
2108 {
2109
2110         CARP_LOCK_ASSERT(sc);
2111
2112         if (sc->sc_state != state) {
2113                 const char *carp_states[] = { CARP_STATES };
2114                 char subsys[IFNAMSIZ+5];
2115
2116                 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
2117                     if_name(sc->sc_carpdev));
2118
2119                 CARP_LOG("%s: %s -> %s (%s)\n", subsys,
2120                     carp_states[sc->sc_state], carp_states[state], reason);
2121
2122                 sc->sc_state = state;
2123
2124                 devctl_notify("CARP", subsys, carp_states[state], NULL);
2125         }
2126 }
2127
2128 static void
2129 carp_linkstate(struct ifnet *ifp)
2130 {
2131         struct carp_softc *sc;
2132
2133         CIF_LOCK(ifp->if_carp);
2134         IFNET_FOREACH_CARP(ifp, sc) {
2135                 CARP_LOCK(sc);
2136                 carp_sc_state(sc);
2137                 CARP_UNLOCK(sc);
2138         }
2139         CIF_UNLOCK(ifp->if_carp);
2140 }
2141
2142 static void
2143 carp_sc_state(struct carp_softc *sc)
2144 {
2145
2146         CARP_LOCK_ASSERT(sc);
2147
2148         if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
2149             !(sc->sc_carpdev->if_flags & IFF_UP) ||
2150             !V_carp_allow) {
2151                 callout_stop(&sc->sc_ad_tmo);
2152 #ifdef INET
2153                 callout_stop(&sc->sc_md_tmo);
2154 #endif
2155 #ifdef INET6
2156                 callout_stop(&sc->sc_md6_tmo);
2157 #endif
2158                 carp_set_state(sc, INIT, "hardware interface down");
2159                 carp_setrun(sc, 0);
2160                 carp_delroute(sc);
2161                 if (!sc->sc_suppress)
2162                         carp_demote_adj(V_carp_ifdown_adj, "interface down");
2163                 sc->sc_suppress = 1;
2164         } else {
2165                 carp_set_state(sc, INIT, "hardware interface up");
2166                 carp_setrun(sc, 0);
2167                 if (sc->sc_suppress)
2168                         carp_demote_adj(-V_carp_ifdown_adj, "interface up");
2169                 sc->sc_suppress = 0;
2170         }
2171 }
2172
2173 static void
2174 carp_demote_adj(int adj, char *reason)
2175 {
2176         atomic_add_int(&V_carp_demotion, adj);
2177         CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason);
2178         taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
2179 }
2180
2181 static int
2182 carp_allow_sysctl(SYSCTL_HANDLER_ARGS)
2183 {
2184         int new, error;
2185         struct carp_softc *sc;
2186
2187         new = V_carp_allow;
2188         error = sysctl_handle_int(oidp, &new, 0, req);
2189         if (error || !req->newptr)
2190                 return (error);
2191
2192         if (V_carp_allow != new) {
2193                 V_carp_allow = new;
2194
2195                 mtx_lock(&carp_mtx);
2196                 LIST_FOREACH(sc, &carp_list, sc_next) {
2197                         CARP_LOCK(sc);
2198                         if (curvnet == sc->sc_carpdev->if_vnet)
2199                                 carp_sc_state(sc);
2200                         CARP_UNLOCK(sc);
2201                 }
2202                 mtx_unlock(&carp_mtx);
2203         }
2204
2205         return (0);
2206 }
2207
2208 static int
2209 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS)
2210 {
2211         int new, error;
2212
2213         new = V_carp_dscp;
2214         error = sysctl_handle_int(oidp, &new, 0, req);
2215         if (error || !req->newptr)
2216                 return (error);
2217
2218         if (new < 0 || new > 63)
2219                 return (EINVAL);
2220
2221         V_carp_dscp = new;
2222
2223         return (0);
2224 }
2225
2226 static int
2227 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
2228 {
2229         int new, error;
2230
2231         new = V_carp_demotion;
2232         error = sysctl_handle_int(oidp, &new, 0, req);
2233         if (error || !req->newptr)
2234                 return (error);
2235
2236         carp_demote_adj(new, "sysctl");
2237
2238         return (0);
2239 }
2240
2241 static int
2242 nlattr_get_carp_key(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target)
2243 {
2244         if (__predict_false(NLA_DATA_LEN(nla) > CARP_KEY_LEN))
2245                 return (EINVAL);
2246
2247         memcpy(target, NLA_DATA_CONST(nla), NLA_DATA_LEN(nla));
2248         return (0);
2249 }
2250
2251 struct carp_nl_send_args {
2252         struct nlmsghdr *hdr;
2253         struct nl_pstate *npt;
2254 };
2255
2256 static bool
2257 carp_nl_send(void *arg, struct carp_softc *sc, int priv)
2258 {
2259         struct carp_nl_send_args *nlsa = arg;
2260         struct nlmsghdr *hdr = nlsa->hdr;
2261         struct nl_pstate *npt = nlsa->npt;
2262         struct nl_writer *nw = npt->nw;
2263         struct genlmsghdr *ghdr_new;
2264
2265         if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
2266                 nlmsg_abort(nw);
2267                 return (false);
2268         }
2269
2270         ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
2271         if (ghdr_new == NULL) {
2272                 nlmsg_abort(nw);
2273                 return (false);
2274         }
2275
2276         ghdr_new->cmd = CARP_NL_CMD_GET;
2277         ghdr_new->version = 0;
2278         ghdr_new->reserved = 0;
2279
2280         CARP_LOCK(sc);
2281
2282         nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid);
2283         nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state);
2284         nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase);
2285         nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew);
2286         nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr);
2287         nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6);
2288
2289         if (priv)
2290                 nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), sc->sc_key);
2291
2292         CARP_UNLOCK(sc);
2293
2294         if (! nlmsg_end(nw)) {
2295                 nlmsg_abort(nw);
2296                 return (false);
2297         }
2298
2299         return (true);
2300 }
2301
2302 struct nl_carp_parsed {
2303         unsigned int    ifindex;
2304         char            *ifname;
2305         uint32_t        state;
2306         uint32_t        vhid;
2307         int32_t         advbase;
2308         int32_t         advskew;
2309         char            key[CARP_KEY_LEN];
2310         struct in_addr  addr;
2311         struct in6_addr addr6;
2312 };
2313
2314 #define _IN(_field)     offsetof(struct genlmsghdr, _field)
2315 #define _OUT(_field)    offsetof(struct nl_carp_parsed, _field)
2316
2317 static const struct nlattr_parser nla_p_set[] = {
2318         { .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 },
2319         { .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 },
2320         { .type = CARP_NL_ADVBASE, .off = _OUT(advbase), .cb = nlattr_get_uint32 },
2321         { .type = CARP_NL_ADVSKEW, .off = _OUT(advskew), .cb = nlattr_get_uint32 },
2322         { .type = CARP_NL_KEY, .off = _OUT(key), .cb = nlattr_get_carp_key },
2323         { .type = CARP_NL_IFINDEX, .off = _OUT(ifindex), .cb = nlattr_get_uint32 },
2324         { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr },
2325         { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr },
2326         { .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string },
2327 };
2328 static const struct nlfield_parser nlf_p_set[] = {
2329 };
2330 NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_set, nla_p_set);
2331 #undef _IN
2332 #undef _OUT
2333
2334
2335 static int
2336 carp_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt)
2337 {
2338         struct nl_carp_parsed attrs = { };
2339         struct carp_nl_send_args args;
2340         struct carpreq carpr = { };
2341         struct epoch_tracker et;
2342         if_t ifp = NULL;
2343         int error;
2344
2345         error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs);
2346         if (error != 0)
2347                 return (error);
2348
2349         NET_EPOCH_ENTER(et);
2350         if (attrs.ifname != NULL)
2351                 ifp = ifunit_ref(attrs.ifname);
2352         else if (attrs.ifindex != 0)
2353                 ifp = ifnet_byindex_ref(attrs.ifindex);
2354         NET_EPOCH_EXIT(et);
2355
2356         if ((error = carp_is_supported_if(ifp)) != 0)
2357                 goto out;
2358
2359         hdr->nlmsg_flags |= NLM_F_MULTI;
2360         args.hdr = hdr;
2361         args.npt = npt;
2362
2363         carpr.carpr_vhid = attrs.vhid;
2364         carpr.carpr_count = CARP_MAXVHID;
2365
2366         sx_xlock(&carp_sx);
2367         error = carp_ioctl_get(ifp, nlp_get_cred(npt->nlp), &carpr,
2368             carp_nl_send, &args);
2369         sx_xunlock(&carp_sx);
2370
2371         if (! nlmsg_end_dump(npt->nw, error, hdr))
2372                 error = ENOMEM;
2373
2374 out:
2375         if (ifp != NULL)
2376                 if_rele(ifp);
2377
2378         return (error);
2379 }
2380
2381 static int
2382 carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
2383 {
2384         struct nl_carp_parsed attrs = { };
2385         struct carpkreq carpr;
2386         struct epoch_tracker et;
2387         if_t ifp = NULL;
2388         int error;
2389
2390         error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs);
2391         if (error != 0)
2392                 return (error);
2393
2394         if (attrs.vhid <= 0 || attrs.vhid > CARP_MAXVHID)
2395                 return (EINVAL);
2396         if (attrs.state > CARP_MAXSTATE)
2397                 return (EINVAL);
2398         if (attrs.advbase < 0 || attrs.advskew < 0)
2399                 return (EINVAL);
2400         if (attrs.advbase > 255)
2401                 return (EINVAL);
2402         if (attrs.advskew >= 255)
2403                 return (EINVAL);
2404
2405         NET_EPOCH_ENTER(et);
2406         if (attrs.ifname != NULL)
2407                 ifp = ifunit_ref(attrs.ifname);
2408         else if (attrs.ifindex != 0)
2409                 ifp = ifnet_byindex_ref(attrs.ifindex);
2410         NET_EPOCH_EXIT(et);
2411
2412         if ((error = carp_is_supported_if(ifp)) != 0)
2413                 goto out;
2414
2415         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2416                 error = EADDRNOTAVAIL;
2417                 goto out;
2418         }
2419
2420         carpr.carpr_count = 1;
2421         carpr.carpr_vhid = attrs.vhid;
2422         carpr.carpr_state = attrs.state;
2423         carpr.carpr_advbase = attrs.advbase;
2424         carpr.carpr_advskew = attrs.advskew;
2425         carpr.carpr_addr = attrs.addr;
2426         carpr.carpr_addr6 = attrs.addr6;
2427
2428         memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key));
2429
2430         sx_xlock(&carp_sx);
2431         error = carp_ioctl_set(ifp, &carpr);
2432         sx_xunlock(&carp_sx);
2433
2434 out:
2435         if (ifp != NULL)
2436                 if_rele(ifp);
2437
2438         return (error);
2439 }
2440
2441 static const struct nlhdr_parser *all_parsers[] = {
2442         &carp_parser
2443 };
2444
2445 static const struct genl_cmd carp_cmds[] = {
2446         {
2447                 .cmd_num = CARP_NL_CMD_GET,
2448                 .cmd_name = "SIOCGVH",
2449                 .cmd_cb = carp_nl_get,
2450                 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP |
2451                     GENL_CMD_CAP_HASPOL,
2452         },
2453         {
2454                 .cmd_num = CARP_NL_CMD_SET,
2455                 .cmd_name = "SIOCSVH",
2456                 .cmd_cb = carp_nl_set,
2457                 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL,
2458                 .cmd_priv = PRIV_NETINET_CARP,
2459         },
2460 };
2461
2462 static void
2463 carp_nl_register(void)
2464 {
2465         bool ret __diagused;
2466         int family_id __diagused;
2467
2468         NL_VERIFY_PARSERS(all_parsers);
2469         family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2,
2470             CARP_NL_CMD_MAX);
2471         MPASS(family_id != 0);
2472
2473         ret = genl_register_cmds(CARP_NL_FAMILY_NAME, carp_cmds,
2474             NL_ARRAY_LEN(carp_cmds));
2475         MPASS(ret);
2476 }
2477
2478 static void
2479 carp_nl_unregister(void)
2480 {
2481         genl_unregister_family(CARP_NL_FAMILY_NAME);
2482 }
2483
2484 static void
2485 carp_mod_cleanup(void)
2486 {
2487
2488         carp_nl_unregister();
2489
2490 #ifdef INET
2491         (void)ipproto_unregister(IPPROTO_CARP);
2492         carp_iamatch_p = NULL;
2493 #endif
2494 #ifdef INET6
2495         (void)ip6proto_unregister(IPPROTO_CARP);
2496         carp_iamatch6_p = NULL;
2497         carp_macmatch6_p = NULL;
2498 #endif
2499         carp_ioctl_p = NULL;
2500         carp_attach_p = NULL;
2501         carp_detach_p = NULL;
2502         carp_get_vhid_p = NULL;
2503         carp_linkstate_p = NULL;
2504         carp_forus_p = NULL;
2505         carp_output_p = NULL;
2506         carp_demote_adj_p = NULL;
2507         carp_master_p = NULL;
2508         mtx_unlock(&carp_mtx);
2509         taskqueue_drain(taskqueue_swi, &carp_sendall_task);
2510         mtx_destroy(&carp_mtx);
2511         sx_destroy(&carp_sx);
2512 }
2513
2514 static void
2515 ipcarp_sysinit(void)
2516 {
2517
2518         /* Load allow as tunable so to postpone carp start after module load */
2519         TUNABLE_INT_FETCH("net.inet.carp.allow", &V_carp_allow);
2520 }
2521 VNET_SYSINIT(ip_carp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipcarp_sysinit, NULL);
2522
2523 static int
2524 carp_mod_load(void)
2525 {
2526         int err;
2527
2528         mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
2529         sx_init(&carp_sx, "carp_sx");
2530         LIST_INIT(&carp_list);
2531         carp_get_vhid_p = carp_get_vhid;
2532         carp_forus_p = carp_forus;
2533         carp_output_p = carp_output;
2534         carp_linkstate_p = carp_linkstate;
2535         carp_ioctl_p = carp_ioctl;
2536         carp_attach_p = carp_attach;
2537         carp_detach_p = carp_detach;
2538         carp_demote_adj_p = carp_demote_adj;
2539         carp_master_p = carp_master;
2540 #ifdef INET6
2541         carp_iamatch6_p = carp_iamatch6;
2542         carp_macmatch6_p = carp_macmatch6;
2543         err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL);
2544         if (err) {
2545                 printf("carp: error %d registering with INET6\n", err);
2546                 carp_mod_cleanup();
2547                 return (err);
2548         }
2549 #endif
2550 #ifdef INET
2551         carp_iamatch_p = carp_iamatch;
2552         err = ipproto_register(IPPROTO_CARP, carp_input, NULL);
2553         if (err) {
2554                 printf("carp: error %d registering with INET\n", err);
2555                 carp_mod_cleanup();
2556                 return (err);
2557         }
2558 #endif
2559
2560         carp_nl_register();
2561
2562         return (0);
2563 }
2564
2565 static int
2566 carp_modevent(module_t mod, int type, void *data)
2567 {
2568         switch (type) {
2569         case MOD_LOAD:
2570                 return carp_mod_load();
2571                 /* NOTREACHED */
2572         case MOD_UNLOAD:
2573                 mtx_lock(&carp_mtx);
2574                 if (LIST_EMPTY(&carp_list))
2575                         carp_mod_cleanup();
2576                 else {
2577                         mtx_unlock(&carp_mtx);
2578                         return (EBUSY);
2579                 }
2580                 break;
2581
2582         default:
2583                 return (EINVAL);
2584         }
2585
2586         return (0);
2587 }
2588
2589 static moduledata_t carp_mod = {
2590         "carp",
2591         carp_modevent,
2592         0
2593 };
2594
2595 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);