]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/ip_carp.c
zfs: merge openzfs/zfs@688514e47
[FreeBSD/FreeBSD.git] / sys / netinet / ip_carp.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2002 Michael Shalayeff.
5  * Copyright (c) 2003 Ryan McBride.
6  * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
22  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28  * THE POSSIBILITY OF SUCH DAMAGE.
29  */
30
31 #include <sys/cdefs.h>
32 #include "opt_bpf.h"
33 #include "opt_inet.h"
34 #include "opt_inet6.h"
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/devctl.h>
39 #include <sys/jail.h>
40 #include <sys/kernel.h>
41 #include <sys/limits.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/module.h>
45 #include <sys/priv.h>
46 #include <sys/proc.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/sysctl.h>
50 #include <sys/syslog.h>
51 #include <sys/taskqueue.h>
52 #include <sys/counter.h>
53
54 #include <net/ethernet.h>
55 #include <net/if.h>
56 #include <net/if_var.h>
57 #include <net/if_dl.h>
58 #include <net/if_llatbl.h>
59 #include <net/if_private.h>
60 #include <net/if_types.h>
61 #include <net/route.h>
62 #include <net/vnet.h>
63
64 #if defined(INET) || defined(INET6)
65 #include <netinet/in.h>
66 #include <netinet/in_var.h>
67 #include <netinet/ip_carp.h>
68 #include <netinet/ip_carp_nl.h>
69 #include <netinet/ip.h>
70 #include <machine/in_cksum.h>
71 #endif
72 #ifdef INET
73 #include <netinet/ip_var.h>
74 #include <netinet/if_ether.h>
75 #endif
76
77 #ifdef INET6
78 #include <netinet/icmp6.h>
79 #include <netinet/ip6.h>
80 #include <netinet6/in6_var.h>
81 #include <netinet6/ip6_var.h>
82 #include <netinet6/scope6_var.h>
83 #include <netinet6/nd6.h>
84 #endif
85
86 #include <netlink/netlink.h>
87 #include <netlink/netlink_ctl.h>
88 #include <netlink/netlink_generic.h>
89 #include <netlink/netlink_message_parser.h>
90
91 #include <crypto/sha1.h>
92
93 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
94
95 struct carp_softc {
96         struct ifnet            *sc_carpdev;    /* Pointer to parent ifnet. */
97         struct ifaddr           **sc_ifas;      /* Our ifaddrs. */
98         struct sockaddr_dl      sc_addr;        /* Our link level address. */
99         struct callout          sc_ad_tmo;      /* Advertising timeout. */
100 #ifdef INET
101         struct callout          sc_md_tmo;      /* Master down timeout. */
102 #endif
103 #ifdef INET6
104         struct callout          sc_md6_tmo;     /* XXX: Master down timeout. */
105 #endif
106         struct mtx              sc_mtx;
107
108         int                     sc_vhid;
109         int                     sc_advskew;
110         int                     sc_advbase;
111         struct in_addr          sc_carpaddr;
112         struct in6_addr         sc_carpaddr6;
113
114         int                     sc_naddrs;
115         int                     sc_naddrs6;
116         int                     sc_ifasiz;
117         enum { INIT = 0, BACKUP, MASTER }       sc_state;
118         int                     sc_suppress;
119         int                     sc_sendad_errors;
120 #define CARP_SENDAD_MAX_ERRORS  3
121         int                     sc_sendad_success;
122 #define CARP_SENDAD_MIN_SUCCESS 3
123
124         int                     sc_init_counter;
125         uint64_t                sc_counter;
126
127         /* authentication */
128 #define CARP_HMAC_PAD   64
129         unsigned char sc_key[CARP_KEY_LEN];
130         unsigned char sc_pad[CARP_HMAC_PAD];
131         SHA1_CTX sc_sha1;
132
133         TAILQ_ENTRY(carp_softc) sc_list;        /* On the carp_if list. */
134         LIST_ENTRY(carp_softc)  sc_next;        /* On the global list. */
135 };
136
137 struct carp_if {
138 #ifdef INET
139         int     cif_naddrs;
140 #endif
141 #ifdef INET6
142         int     cif_naddrs6;
143 #endif
144         TAILQ_HEAD(, carp_softc) cif_vrs;
145 #ifdef INET
146         struct ip_moptions       cif_imo;
147 #endif
148 #ifdef INET6
149         struct ip6_moptions      cif_im6o;
150 #endif
151         struct ifnet    *cif_ifp;
152         struct mtx      cif_mtx;
153         uint32_t        cif_flags;
154 #define CIF_PROMISC     0x00000001
155 };
156
157 /* Kernel equivalent of struct carpreq, but with more fields for new features.
158  * */
159 struct carpkreq {
160         int             carpr_count;
161         int             carpr_vhid;
162         int             carpr_state;
163         int             carpr_advskew;
164         int             carpr_advbase;
165         unsigned char   carpr_key[CARP_KEY_LEN];
166         /* Everything above this is identical to carpreq */
167         struct in_addr  carpr_addr;
168         struct in6_addr carpr_addr6;
169 };
170
171 /*
172  * Brief design of carp(4).
173  *
174  * Any carp-capable ifnet may have a list of carp softcs hanging off
175  * its ifp->if_carp pointer. Each softc represents one unique virtual
176  * host id, or vhid. The softc has a back pointer to the ifnet. All
177  * softcs are joined in a global list, which has quite limited use.
178  *
179  * Any interface address that takes part in CARP negotiation has a
180  * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
181  * AF_INET or AF_INET6 address.
182  *
183  * Although, one can get the softc's backpointer to ifnet and traverse
184  * through its ifp->if_addrhead queue to find all interface addresses
185  * involved in CARP, we keep a growable array of ifaddr pointers. This
186  * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
187  * do calls into the network stack, thus avoiding LORs.
188  *
189  * Locking:
190  *
191  * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
192  * callout-driven events and ioctl()s.
193  *
194  * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx.
195  * To traverse the global list we use the mutex carp_mtx.
196  *
197  * Known issues with locking:
198  *
199  * - Sending ad, we put the pointer to the softc in an mtag, and no reference
200  *   counting is done on the softc.
201  * - On module unload we may race (?) with packet processing thread
202  *   dereferencing our function pointers.
203  */
204
205 /* Accept incoming CARP packets. */
206 VNET_DEFINE_STATIC(int, carp_allow) = 1;
207 #define V_carp_allow    VNET(carp_allow)
208
209 /* Set DSCP in outgoing CARP packets. */
210 VNET_DEFINE_STATIC(int, carp_dscp) = 56;
211 #define V_carp_dscp     VNET(carp_dscp)
212
213 /* Preempt slower nodes. */
214 VNET_DEFINE_STATIC(int, carp_preempt) = 0;
215 #define V_carp_preempt  VNET(carp_preempt)
216
217 /* Log level. */
218 VNET_DEFINE_STATIC(int, carp_log) = 1;
219 #define V_carp_log      VNET(carp_log)
220
221 /* Global advskew demotion. */
222 VNET_DEFINE_STATIC(int, carp_demotion) = 0;
223 #define V_carp_demotion VNET(carp_demotion)
224
225 /* Send error demotion factor. */
226 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW;
227 #define V_carp_senderr_adj      VNET(carp_senderr_adj)
228
229 /* Iface down demotion factor. */
230 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW;
231 #define V_carp_ifdown_adj       VNET(carp_ifdown_adj)
232
233 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS);
234 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS);
235 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
236
237 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
238     "CARP");
239 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow,
240     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE,
241     &VNET_NAME(carp_allow), 0, carp_allow_sysctl, "I",
242     "Accept incoming CARP packets");
243 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp,
244     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
245     0, 0, carp_dscp_sysctl, "I",
246     "DSCP value for carp packets");
247 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
248     &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
249 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
250     &VNET_NAME(carp_log), 0, "CARP log level");
251 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion,
252     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
253     0, 0, carp_demote_adj_sysctl, "I",
254     "Adjust demotion factor (skew of advskew)");
255 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor,
256     CTLFLAG_VNET | CTLFLAG_RW,
257     &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment");
258 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor,
259     CTLFLAG_VNET | CTLFLAG_RW,
260     &VNET_NAME(carp_ifdown_adj), 0,
261     "Interface down demotion factor adjustment");
262
263 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
264 VNET_PCPUSTAT_SYSINIT(carpstats);
265 VNET_PCPUSTAT_SYSUNINIT(carpstats);
266
267 #define CARPSTATS_ADD(name, val)        \
268     counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
269         sizeof(uint64_t)], (val))
270 #define CARPSTATS_INC(name)             CARPSTATS_ADD(name, 1)
271
272 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
273     carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
274
275 #define CARP_LOCK_INIT(sc)      mtx_init(&(sc)->sc_mtx, "carp_softc",   \
276         NULL, MTX_DEF)
277 #define CARP_LOCK_DESTROY(sc)   mtx_destroy(&(sc)->sc_mtx)
278 #define CARP_LOCK_ASSERT(sc)    mtx_assert(&(sc)->sc_mtx, MA_OWNED)
279 #define CARP_LOCK(sc)           mtx_lock(&(sc)->sc_mtx)
280 #define CARP_UNLOCK(sc)         mtx_unlock(&(sc)->sc_mtx)
281 #define CIF_LOCK_INIT(cif)      mtx_init(&(cif)->cif_mtx, "carp_if",   \
282         NULL, MTX_DEF)
283 #define CIF_LOCK_DESTROY(cif)   mtx_destroy(&(cif)->cif_mtx)
284 #define CIF_LOCK_ASSERT(cif)    mtx_assert(&(cif)->cif_mtx, MA_OWNED)
285 #define CIF_LOCK(cif)           mtx_lock(&(cif)->cif_mtx)
286 #define CIF_UNLOCK(cif)         mtx_unlock(&(cif)->cif_mtx)
287 #define CIF_FREE(cif)   do {                            \
288                 CIF_LOCK(cif);                          \
289                 if (TAILQ_EMPTY(&(cif)->cif_vrs))       \
290                         carp_free_if(cif);              \
291                 else                                    \
292                         CIF_UNLOCK(cif);                \
293 } while (0)
294
295 #define CARP_LOG(...)   do {                            \
296         if (V_carp_log > 0)                             \
297                 log(LOG_INFO, "carp: " __VA_ARGS__);    \
298 } while (0)
299
300 #define CARP_DEBUG(...) do {                            \
301         if (V_carp_log > 1)                             \
302                 log(LOG_DEBUG, __VA_ARGS__);            \
303 } while (0)
304
305 #define IFNET_FOREACH_IFA(ifp, ifa)                                     \
306         CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \
307                 if ((ifa)->ifa_carp != NULL)
308
309 #define CARP_FOREACH_IFA(sc, ifa)                                       \
310         CARP_LOCK_ASSERT(sc);                                           \
311         for (int _i = 0;                                                \
312                 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 &&              \
313                 ((ifa) = sc->sc_ifas[_i]) != NULL;                      \
314                 ++_i)
315
316 #define IFNET_FOREACH_CARP(ifp, sc)                                     \
317         KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) ||                    \
318             sx_xlocked(&carp_sx), ("cif_vrs not locked"));              \
319         TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
320
321 #define DEMOTE_ADVSKEW(sc)                                      \
322     (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ?      \
323     CARP_MAXSKEW :                                              \
324         (((sc)->sc_advskew + V_carp_demotion < 0) ?             \
325         0 : ((sc)->sc_advskew + V_carp_demotion)))
326
327 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int);
328 static struct carp_softc
329                 *carp_alloc(struct ifnet *);
330 static void     carp_destroy(struct carp_softc *);
331 static struct carp_if
332                 *carp_alloc_if(struct ifnet *);
333 static void     carp_free_if(struct carp_if *);
334 static void     carp_set_state(struct carp_softc *, int, const char* reason);
335 static void     carp_sc_state(struct carp_softc *);
336 static void     carp_setrun(struct carp_softc *, sa_family_t);
337 static void     carp_master_down(void *);
338 static void     carp_master_down_locked(struct carp_softc *,
339                     const char* reason);
340 static void     carp_send_ad(void *);
341 static void     carp_send_ad_locked(struct carp_softc *);
342 static void     carp_addroute(struct carp_softc *);
343 static void     carp_ifa_addroute(struct ifaddr *);
344 static void     carp_delroute(struct carp_softc *);
345 static void     carp_ifa_delroute(struct ifaddr *);
346 static void     carp_send_ad_all(void *, int);
347 static void     carp_demote_adj(int, char *);
348
349 static LIST_HEAD(, carp_softc) carp_list;
350 static struct mtx carp_mtx;
351 static struct sx carp_sx;
352 static struct task carp_sendall_task =
353     TASK_INITIALIZER(0, carp_send_ad_all, NULL);
354
355 static int
356 carp_is_supported_if(if_t ifp)
357 {
358         if (ifp == NULL)
359                 return (ENXIO);
360
361         switch (ifp->if_type) {
362         case IFT_ETHER:
363         case IFT_L2VLAN:
364         case IFT_BRIDGE:
365                 break;
366         default:
367                 return (EOPNOTSUPP);
368         }
369
370         return (0);
371 }
372
373 static void
374 carp_hmac_prepare(struct carp_softc *sc)
375 {
376         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
377         uint8_t vhid = sc->sc_vhid & 0xff;
378         struct ifaddr *ifa;
379         int i, found;
380 #ifdef INET
381         struct in_addr last, cur, in;
382 #endif
383 #ifdef INET6
384         struct in6_addr last6, cur6, in6;
385 #endif
386
387         CARP_LOCK_ASSERT(sc);
388
389         /* Compute ipad from key. */
390         bzero(sc->sc_pad, sizeof(sc->sc_pad));
391         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
392         for (i = 0; i < sizeof(sc->sc_pad); i++)
393                 sc->sc_pad[i] ^= 0x36;
394
395         /* Precompute first part of inner hash. */
396         SHA1Init(&sc->sc_sha1);
397         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
398         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
399         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
400         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
401 #ifdef INET
402         cur.s_addr = 0;
403         do {
404                 found = 0;
405                 last = cur;
406                 cur.s_addr = 0xffffffff;
407                 CARP_FOREACH_IFA(sc, ifa) {
408                         in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
409                         if (ifa->ifa_addr->sa_family == AF_INET &&
410                             ntohl(in.s_addr) > ntohl(last.s_addr) &&
411                             ntohl(in.s_addr) < ntohl(cur.s_addr)) {
412                                 cur.s_addr = in.s_addr;
413                                 found++;
414                         }
415                 }
416                 if (found)
417                         SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
418         } while (found);
419 #endif /* INET */
420 #ifdef INET6
421         memset(&cur6, 0, sizeof(cur6));
422         do {
423                 found = 0;
424                 last6 = cur6;
425                 memset(&cur6, 0xff, sizeof(cur6));
426                 CARP_FOREACH_IFA(sc, ifa) {
427                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
428                         if (IN6_IS_SCOPE_EMBED(&in6))
429                                 in6.s6_addr16[1] = 0;
430                         if (ifa->ifa_addr->sa_family == AF_INET6 &&
431                             memcmp(&in6, &last6, sizeof(in6)) > 0 &&
432                             memcmp(&in6, &cur6, sizeof(in6)) < 0) {
433                                 cur6 = in6;
434                                 found++;
435                         }
436                 }
437                 if (found)
438                         SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
439         } while (found);
440 #endif /* INET6 */
441
442         /* convert ipad to opad */
443         for (i = 0; i < sizeof(sc->sc_pad); i++)
444                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
445 }
446
447 static void
448 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
449     unsigned char md[20])
450 {
451         SHA1_CTX sha1ctx;
452
453         CARP_LOCK_ASSERT(sc);
454
455         /* fetch first half of inner hash */
456         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
457
458         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
459         SHA1Final(md, &sha1ctx);
460
461         /* outer hash */
462         SHA1Init(&sha1ctx);
463         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
464         SHA1Update(&sha1ctx, md, 20);
465         SHA1Final(md, &sha1ctx);
466 }
467
468 static int
469 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
470     unsigned char md[20])
471 {
472         unsigned char md2[20];
473
474         CARP_LOCK_ASSERT(sc);
475
476         carp_hmac_generate(sc, counter, md2);
477
478         return (bcmp(md, md2, sizeof(md2)));
479 }
480
481 /*
482  * process input packet.
483  * we have rearranged checks order compared to the rfc,
484  * but it seems more efficient this way or not possible otherwise.
485  */
486 #ifdef INET
487 static int
488 carp_input(struct mbuf **mp, int *offp, int proto)
489 {
490         struct mbuf *m = *mp;
491         struct ip *ip = mtod(m, struct ip *);
492         struct carp_header *ch;
493         int iplen, len;
494
495         iplen = *offp;
496         *mp = NULL;
497
498         CARPSTATS_INC(carps_ipackets);
499
500         if (!V_carp_allow) {
501                 m_freem(m);
502                 return (IPPROTO_DONE);
503         }
504
505         iplen = ip->ip_hl << 2;
506
507         if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
508                 CARPSTATS_INC(carps_badlen);
509                 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
510                     "on %s\n", __func__, m->m_len - sizeof(struct ip),
511                     if_name(m->m_pkthdr.rcvif));
512                 m_freem(m);
513                 return (IPPROTO_DONE);
514         }
515
516         if (iplen + sizeof(*ch) < m->m_len) {
517                 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
518                         CARPSTATS_INC(carps_hdrops);
519                         CARP_DEBUG("%s: pullup failed\n", __func__);
520                         return (IPPROTO_DONE);
521                 }
522                 ip = mtod(m, struct ip *);
523         }
524         ch = (struct carp_header *)((char *)ip + iplen);
525
526         /*
527          * verify that the received packet length is
528          * equal to the CARP header
529          */
530         len = iplen + sizeof(*ch);
531         if (len > m->m_pkthdr.len) {
532                 CARPSTATS_INC(carps_badlen);
533                 CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
534                     m->m_pkthdr.len,
535                     if_name(m->m_pkthdr.rcvif));
536                 m_freem(m);
537                 return (IPPROTO_DONE);
538         }
539
540         if ((m = m_pullup(m, len)) == NULL) {
541                 CARPSTATS_INC(carps_hdrops);
542                 return (IPPROTO_DONE);
543         }
544         ip = mtod(m, struct ip *);
545         ch = (struct carp_header *)((char *)ip + iplen);
546
547         /* verify the CARP checksum */
548         m->m_data += iplen;
549         if (in_cksum(m, len - iplen)) {
550                 CARPSTATS_INC(carps_badsum);
551                 CARP_DEBUG("%s: checksum failed on %s\n", __func__,
552                     if_name(m->m_pkthdr.rcvif));
553                 m_freem(m);
554                 return (IPPROTO_DONE);
555         }
556         m->m_data -= iplen;
557
558         carp_input_c(m, ch, AF_INET, ip->ip_ttl);
559         return (IPPROTO_DONE);
560 }
561 #endif
562
563 #ifdef INET6
564 static int
565 carp6_input(struct mbuf **mp, int *offp, int proto)
566 {
567         struct mbuf *m = *mp;
568         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
569         struct carp_header *ch;
570         u_int len;
571
572         CARPSTATS_INC(carps_ipackets6);
573
574         if (!V_carp_allow) {
575                 m_freem(m);
576                 return (IPPROTO_DONE);
577         }
578
579         /* check if received on a valid carp interface */
580         if (m->m_pkthdr.rcvif->if_carp == NULL) {
581                 CARPSTATS_INC(carps_badif);
582                 CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
583                     __func__, if_name(m->m_pkthdr.rcvif));
584                 m_freem(m);
585                 return (IPPROTO_DONE);
586         }
587
588         /* verify that we have a complete carp packet */
589         if (m->m_len < *offp + sizeof(*ch)) {
590                 len = m->m_len;
591                 m = m_pullup(m, *offp + sizeof(*ch));
592                 if (m == NULL) {
593                         CARPSTATS_INC(carps_badlen);
594                         CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
595                         return (IPPROTO_DONE);
596                 }
597                 ip6 = mtod(m, struct ip6_hdr *);
598         }
599         ch = (struct carp_header *)(mtod(m, char *) + *offp);
600
601         /* verify the CARP checksum */
602         m->m_data += *offp;
603         if (in_cksum(m, sizeof(*ch))) {
604                 CARPSTATS_INC(carps_badsum);
605                 CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
606                     if_name(m->m_pkthdr.rcvif));
607                 m_freem(m);
608                 return (IPPROTO_DONE);
609         }
610         m->m_data -= *offp;
611
612         carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim);
613         return (IPPROTO_DONE);
614 }
615 #endif /* INET6 */
616
617 /*
618  * This routine should not be necessary at all, but some switches
619  * (VMWare ESX vswitches) can echo our own packets back at us,
620  * and we must ignore them or they will cause us to drop out of
621  * MASTER mode.
622  *
623  * We cannot catch all cases of network loops.  Instead, what we
624  * do here is catch any packet that arrives with a carp header
625  * with a VHID of 0, that comes from an address that is our own.
626  * These packets are by definition "from us" (even if they are from
627  * a misconfigured host that is pretending to be us).
628  *
629  * The VHID test is outside this mini-function.
630  */
631 static int
632 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af)
633 {
634 #ifdef INET
635         struct ip *ip4;
636         struct in_addr in4;
637 #endif
638 #ifdef INET6
639         struct ip6_hdr *ip6;
640         struct in6_addr in6;
641 #endif
642
643         switch (af) {
644 #ifdef INET
645         case AF_INET:
646                 ip4 = mtod(m, struct ip *);
647                 in4 = ifatoia(ifa)->ia_addr.sin_addr;
648                 return (in4.s_addr == ip4->ip_src.s_addr);
649 #endif
650 #ifdef INET6
651         case AF_INET6:
652                 ip6 = mtod(m, struct ip6_hdr *);
653                 in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
654                 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0);
655 #endif
656         default:
657                 break;
658         }
659         return (0);
660 }
661
662 static void
663 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
664 {
665         struct ifnet *ifp = m->m_pkthdr.rcvif;
666         struct ifaddr *ifa, *match;
667         struct carp_softc *sc;
668         uint64_t tmp_counter;
669         struct timeval sc_tv, ch_tv;
670         int error;
671         bool multicast = false;
672
673         NET_EPOCH_ASSERT();
674
675         /*
676          * Verify that the VHID is valid on the receiving interface.
677          *
678          * There should be just one match.  If there are none
679          * the VHID is not valid and we drop the packet.  If
680          * there are multiple VHID matches, take just the first
681          * one, for compatibility with previous code.  While we're
682          * scanning, check for obvious loops in the network topology
683          * (these should never happen, and as noted above, we may
684          * miss real loops; this is just a double-check).
685          */
686         error = 0;
687         match = NULL;
688         IFNET_FOREACH_IFA(ifp, ifa) {
689                 if (match == NULL && ifa->ifa_carp != NULL &&
690                     ifa->ifa_addr->sa_family == af &&
691                     ifa->ifa_carp->sc_vhid == ch->carp_vhid)
692                         match = ifa;
693                 if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af))
694                         error = ELOOP;
695         }
696         ifa = error ? NULL : match;
697         if (ifa != NULL)
698                 ifa_ref(ifa);
699
700         if (ifa == NULL) {
701                 if (error == ELOOP) {
702                         CARP_DEBUG("dropping looped packet on interface %s\n",
703                             if_name(ifp));
704                         CARPSTATS_INC(carps_badif);     /* ??? */
705                 } else {
706                         CARPSTATS_INC(carps_badvhid);
707                 }
708                 m_freem(m);
709                 return;
710         }
711
712         /* verify the CARP version. */
713         if (ch->carp_version != CARP_VERSION) {
714                 CARPSTATS_INC(carps_badver);
715                 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp),
716                     ch->carp_version);
717                 ifa_free(ifa);
718                 m_freem(m);
719                 return;
720         }
721
722         sc = ifa->ifa_carp;
723         CARP_LOCK(sc);
724         if (ifa->ifa_addr->sa_family == AF_INET) {
725                 multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr);
726         } else {
727                 multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6);
728         }
729         ifa_free(ifa);
730
731         /* verify that the IP TTL is 255, but only if we're not in unicast mode. */
732         if (multicast && ttl != CARP_DFLTTL) {
733                 CARPSTATS_INC(carps_badttl);
734                 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
735                     ttl, if_name(m->m_pkthdr.rcvif));
736                 goto out;
737         }
738
739         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
740                 CARPSTATS_INC(carps_badauth);
741                 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
742                     sc->sc_vhid, if_name(ifp));
743                 goto out;
744         }
745
746         tmp_counter = ntohl(ch->carp_counter[0]);
747         tmp_counter = tmp_counter<<32;
748         tmp_counter += ntohl(ch->carp_counter[1]);
749
750         /* XXX Replay protection goes here */
751
752         sc->sc_init_counter = 0;
753         sc->sc_counter = tmp_counter;
754
755         sc_tv.tv_sec = sc->sc_advbase;
756         sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
757         ch_tv.tv_sec = ch->carp_advbase;
758         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
759
760         switch (sc->sc_state) {
761         case INIT:
762                 break;
763         case MASTER:
764                 /*
765                  * If we receive an advertisement from a master who's going to
766                  * be more frequent than us, go into BACKUP state.
767                  */
768                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
769                     timevalcmp(&sc_tv, &ch_tv, ==)) {
770                         callout_stop(&sc->sc_ad_tmo);
771                         carp_set_state(sc, BACKUP,
772                             "more frequent advertisement received");
773                         carp_setrun(sc, 0);
774                         carp_delroute(sc);
775                 }
776                 break;
777         case BACKUP:
778                 /*
779                  * If we're pre-empting masters who advertise slower than us,
780                  * and this one claims to be slower, treat him as down.
781                  */
782                 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
783                         carp_master_down_locked(sc,
784                             "preempting a slower master");
785                         break;
786                 }
787
788                 /*
789                  *  If the master is going to advertise at such a low frequency
790                  *  that he's guaranteed to time out, we'd might as well just
791                  *  treat him as timed out now.
792                  */
793                 sc_tv.tv_sec = sc->sc_advbase * 3;
794                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
795                         carp_master_down_locked(sc, "master will time out");
796                         break;
797                 }
798
799                 /*
800                  * Otherwise, we reset the counter and wait for the next
801                  * advertisement.
802                  */
803                 carp_setrun(sc, af);
804                 break;
805         }
806
807 out:
808         CARP_UNLOCK(sc);
809         m_freem(m);
810 }
811
812 static int
813 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
814 {
815         struct m_tag *mtag;
816
817         if (sc->sc_init_counter) {
818                 /* this could also be seconds since unix epoch */
819                 sc->sc_counter = arc4random();
820                 sc->sc_counter = sc->sc_counter << 32;
821                 sc->sc_counter += arc4random();
822         } else
823                 sc->sc_counter++;
824
825         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
826         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
827
828         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
829
830         /* Tag packet for carp_output */
831         if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
832             M_NOWAIT)) == NULL) {
833                 m_freem(m);
834                 CARPSTATS_INC(carps_onomem);
835                 return (ENOMEM);
836         }
837         bcopy(&sc, mtag + 1, sizeof(sc));
838         m_tag_prepend(m, mtag);
839
840         return (0);
841 }
842
843 /*
844  * To avoid LORs and possible recursions this function shouldn't
845  * be called directly, but scheduled via taskqueue.
846  */
847 static void
848 carp_send_ad_all(void *ctx __unused, int pending __unused)
849 {
850         struct carp_softc *sc;
851         struct epoch_tracker et;
852
853         NET_EPOCH_ENTER(et);
854         mtx_lock(&carp_mtx);
855         LIST_FOREACH(sc, &carp_list, sc_next)
856                 if (sc->sc_state == MASTER) {
857                         CARP_LOCK(sc);
858                         CURVNET_SET(sc->sc_carpdev->if_vnet);
859                         carp_send_ad_locked(sc);
860                         CURVNET_RESTORE();
861                         CARP_UNLOCK(sc);
862                 }
863         mtx_unlock(&carp_mtx);
864         NET_EPOCH_EXIT(et);
865 }
866
867 /* Send a periodic advertisement, executed in callout context. */
868 static void
869 carp_send_ad(void *v)
870 {
871         struct carp_softc *sc = v;
872         struct epoch_tracker et;
873
874         NET_EPOCH_ENTER(et);
875         CARP_LOCK_ASSERT(sc);
876         CURVNET_SET(sc->sc_carpdev->if_vnet);
877         carp_send_ad_locked(sc);
878         CURVNET_RESTORE();
879         CARP_UNLOCK(sc);
880         NET_EPOCH_EXIT(et);
881 }
882
883 static void
884 carp_send_ad_error(struct carp_softc *sc, int error)
885 {
886
887         /*
888          * We track errors and successfull sends with this logic:
889          * - Any error resets success counter to 0.
890          * - MAX_ERRORS triggers demotion.
891          * - MIN_SUCCESS successes resets error counter to 0.
892          * - MIN_SUCCESS reverts demotion, if it was triggered before.
893          */
894         if (error) {
895                 if (sc->sc_sendad_errors < INT_MAX)
896                         sc->sc_sendad_errors++;
897                 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
898                         static const char fmt[] = "send error %d on %s";
899                         char msg[sizeof(fmt) + IFNAMSIZ];
900
901                         sprintf(msg, fmt, error, if_name(sc->sc_carpdev));
902                         carp_demote_adj(V_carp_senderr_adj, msg);
903                 }
904                 sc->sc_sendad_success = 0;
905         } else if (sc->sc_sendad_errors > 0) {
906                 if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) {
907                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
908                                 static const char fmt[] = "send ok on %s";
909                                 char msg[sizeof(fmt) + IFNAMSIZ];
910
911                                 sprintf(msg, fmt, if_name(sc->sc_carpdev));
912                                 carp_demote_adj(-V_carp_senderr_adj, msg);
913                         }
914                         sc->sc_sendad_errors = 0;
915                 }
916         }
917 }
918
919 /*
920  * Pick the best ifaddr on the given ifp for sending CARP
921  * advertisements.
922  *
923  * "Best" here is defined by ifa_preferred().  This function is much
924  * much like ifaof_ifpforaddr() except that we just use ifa_preferred().
925  *
926  * (This could be simplified to return the actual address, except that
927  * it has a different format in AF_INET and AF_INET6.)
928  */
929 static struct ifaddr *
930 carp_best_ifa(int af, struct ifnet *ifp)
931 {
932         struct ifaddr *ifa, *best;
933
934         NET_EPOCH_ASSERT();
935
936         if (af >= AF_MAX)
937                 return (NULL);
938         best = NULL;
939         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
940                 if (ifa->ifa_addr->sa_family == af &&
941                     (best == NULL || ifa_preferred(best, ifa)))
942                         best = ifa;
943         }
944         if (best != NULL)
945                 ifa_ref(best);
946         return (best);
947 }
948
949 static void
950 carp_send_ad_locked(struct carp_softc *sc)
951 {
952         struct carp_header ch;
953         struct timeval tv;
954         struct ifaddr *ifa;
955         struct carp_header *ch_ptr;
956         struct mbuf *m;
957         int len, advskew;
958
959         NET_EPOCH_ASSERT();
960         CARP_LOCK_ASSERT(sc);
961
962         advskew = DEMOTE_ADVSKEW(sc);
963         tv.tv_sec = sc->sc_advbase;
964         tv.tv_usec = advskew * 1000000 / 256;
965
966         ch.carp_version = CARP_VERSION;
967         ch.carp_type = CARP_ADVERTISEMENT;
968         ch.carp_vhid = sc->sc_vhid;
969         ch.carp_advbase = sc->sc_advbase;
970         ch.carp_advskew = advskew;
971         ch.carp_authlen = 7;    /* XXX DEFINE */
972         ch.carp_pad1 = 0;       /* must be zero */
973         ch.carp_cksum = 0;
974
975         /* XXXGL: OpenBSD picks first ifaddr with needed family. */
976
977 #ifdef INET
978         if (sc->sc_naddrs) {
979                 struct ip *ip;
980
981                 m = m_gethdr(M_NOWAIT, MT_DATA);
982                 if (m == NULL) {
983                         CARPSTATS_INC(carps_onomem);
984                         goto resched;
985                 }
986                 len = sizeof(*ip) + sizeof(ch);
987                 m->m_pkthdr.len = len;
988                 m->m_pkthdr.rcvif = NULL;
989                 m->m_len = len;
990                 M_ALIGN(m, m->m_len);
991                 if (IN_MULTICAST(sc->sc_carpaddr.s_addr))
992                         m->m_flags |= M_MCAST;
993                 ip = mtod(m, struct ip *);
994                 ip->ip_v = IPVERSION;
995                 ip->ip_hl = sizeof(*ip) >> 2;
996                 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET;
997                 ip->ip_len = htons(len);
998                 ip->ip_off = htons(IP_DF);
999                 ip->ip_ttl = CARP_DFLTTL;
1000                 ip->ip_p = IPPROTO_CARP;
1001                 ip->ip_sum = 0;
1002                 ip_fillid(ip);
1003
1004                 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev);
1005                 if (ifa != NULL) {
1006                         ip->ip_src.s_addr =
1007                             ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1008                         ifa_free(ifa);
1009                 } else
1010                         ip->ip_src.s_addr = 0;
1011                 ip->ip_dst = sc->sc_carpaddr;
1012
1013                 ch_ptr = (struct carp_header *)(&ip[1]);
1014                 bcopy(&ch, ch_ptr, sizeof(ch));
1015                 if (carp_prepare_ad(m, sc, ch_ptr))
1016                         goto resched;
1017
1018                 m->m_data += sizeof(*ip);
1019                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
1020                 m->m_data -= sizeof(*ip);
1021
1022                 CARPSTATS_INC(carps_opackets);
1023
1024                 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
1025                     &sc->sc_carpdev->if_carp->cif_imo, NULL));
1026         }
1027 #endif /* INET */
1028 #ifdef INET6
1029         if (sc->sc_naddrs6) {
1030                 struct ip6_hdr *ip6;
1031
1032                 m = m_gethdr(M_NOWAIT, MT_DATA);
1033                 if (m == NULL) {
1034                         CARPSTATS_INC(carps_onomem);
1035                         goto resched;
1036                 }
1037                 len = sizeof(*ip6) + sizeof(ch);
1038                 m->m_pkthdr.len = len;
1039                 m->m_pkthdr.rcvif = NULL;
1040                 m->m_len = len;
1041                 M_ALIGN(m, m->m_len);
1042                 ip6 = mtod(m, struct ip6_hdr *);
1043                 bzero(ip6, sizeof(*ip6));
1044                 ip6->ip6_vfc |= IPV6_VERSION;
1045                 /* Traffic class isn't defined in ip6 struct instead
1046                  * it gets offset into flowid field */
1047                 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN +
1048                     IPTOS_DSCP_OFFSET));
1049                 ip6->ip6_hlim = CARP_DFLTTL;
1050                 ip6->ip6_nxt = IPPROTO_CARP;
1051
1052                 /* set the source address */
1053                 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev);
1054                 if (ifa != NULL) {
1055                         bcopy(IFA_IN6(ifa), &ip6->ip6_src,
1056                             sizeof(struct in6_addr));
1057                         ifa_free(ifa);
1058                 } else
1059                         /* This should never happen with IPv6. */
1060                         bzero(&ip6->ip6_src, sizeof(struct in6_addr));
1061
1062                 /* Set the multicast destination. */
1063                 memcpy(&ip6->ip6_dst, &sc->sc_carpaddr6, sizeof(ip6->ip6_dst));
1064                 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
1065                     IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) {
1066                         if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1067                                 m_freem(m);
1068                                 CARP_DEBUG("%s: in6_setscope failed\n", __func__);
1069                                 goto resched;
1070                         }
1071                 }
1072
1073                 ch_ptr = (struct carp_header *)(&ip6[1]);
1074                 bcopy(&ch, ch_ptr, sizeof(ch));
1075                 if (carp_prepare_ad(m, sc, ch_ptr))
1076                         goto resched;
1077
1078                 m->m_data += sizeof(*ip6);
1079                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
1080                 m->m_data -= sizeof(*ip6);
1081
1082                 CARPSTATS_INC(carps_opackets6);
1083
1084                 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
1085                     &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
1086         }
1087 #endif /* INET6 */
1088
1089 resched:
1090         callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
1091 }
1092
1093 static void
1094 carp_addroute(struct carp_softc *sc)
1095 {
1096         struct ifaddr *ifa;
1097
1098         CARP_FOREACH_IFA(sc, ifa)
1099                 carp_ifa_addroute(ifa);
1100 }
1101
1102 static void
1103 carp_ifa_addroute(struct ifaddr *ifa)
1104 {
1105
1106         switch (ifa->ifa_addr->sa_family) {
1107 #ifdef INET
1108         case AF_INET:
1109                 in_addprefix(ifatoia(ifa));
1110                 ifa_add_loopback_route(ifa,
1111                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
1112                 break;
1113 #endif
1114 #ifdef INET6
1115         case AF_INET6:
1116                 ifa_add_loopback_route(ifa,
1117                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
1118                 nd6_add_ifa_lle(ifatoia6(ifa));
1119                 break;
1120 #endif
1121         }
1122 }
1123
1124 static void
1125 carp_delroute(struct carp_softc *sc)
1126 {
1127         struct ifaddr *ifa;
1128
1129         CARP_FOREACH_IFA(sc, ifa)
1130                 carp_ifa_delroute(ifa);
1131 }
1132
1133 static void
1134 carp_ifa_delroute(struct ifaddr *ifa)
1135 {
1136
1137         switch (ifa->ifa_addr->sa_family) {
1138 #ifdef INET
1139         case AF_INET:
1140                 ifa_del_loopback_route(ifa,
1141                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
1142                 in_scrubprefix(ifatoia(ifa), LLE_STATIC);
1143                 break;
1144 #endif
1145 #ifdef INET6
1146         case AF_INET6:
1147                 ifa_del_loopback_route(ifa,
1148                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
1149                 nd6_rem_ifa_lle(ifatoia6(ifa), 1);
1150                 break;
1151 #endif
1152         }
1153 }
1154
1155 int
1156 carp_master(struct ifaddr *ifa)
1157 {
1158         struct carp_softc *sc = ifa->ifa_carp;
1159
1160         return (sc->sc_state == MASTER);
1161 }
1162
1163 #ifdef INET
1164 /*
1165  * Broadcast a gratuitous ARP request containing
1166  * the virtual router MAC address for each IP address
1167  * associated with the virtual router.
1168  */
1169 static void
1170 carp_send_arp(struct carp_softc *sc)
1171 {
1172         struct ifaddr *ifa;
1173         struct in_addr addr;
1174
1175         NET_EPOCH_ASSERT();
1176
1177         CARP_FOREACH_IFA(sc, ifa) {
1178                 if (ifa->ifa_addr->sa_family != AF_INET)
1179                         continue;
1180                 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
1181                 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr));
1182         }
1183 }
1184
1185 int
1186 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
1187 {
1188         struct carp_softc *sc = ifa->ifa_carp;
1189
1190         if (sc->sc_state == MASTER) {
1191                 *enaddr = LLADDR(&sc->sc_addr);
1192                 return (1);
1193         }
1194
1195         return (0);
1196 }
1197 #endif
1198
1199 #ifdef INET6
1200 static void
1201 carp_send_na(struct carp_softc *sc)
1202 {
1203         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1204         struct ifaddr *ifa;
1205         struct in6_addr *in6;
1206
1207         CARP_FOREACH_IFA(sc, ifa) {
1208                 if (ifa->ifa_addr->sa_family != AF_INET6)
1209                         continue;
1210
1211                 in6 = IFA_IN6(ifa);
1212                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1213                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1214                 DELAY(1000);    /* XXX */
1215         }
1216 }
1217
1218 /*
1219  * Returns ifa in case it's a carp address and it is MASTER, or if the address
1220  * matches and is not a carp address.  Returns NULL otherwise.
1221  */
1222 struct ifaddr *
1223 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
1224 {
1225         struct ifaddr *ifa;
1226
1227         NET_EPOCH_ASSERT();
1228
1229         ifa = NULL;
1230         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1231                 if (ifa->ifa_addr->sa_family != AF_INET6)
1232                         continue;
1233                 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
1234                         continue;
1235                 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
1236                         ifa = NULL;
1237                 else
1238                         ifa_ref(ifa);
1239                 break;
1240         }
1241
1242         return (ifa);
1243 }
1244
1245 char *
1246 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
1247 {
1248         struct ifaddr *ifa;
1249
1250         NET_EPOCH_ASSERT();
1251
1252         IFNET_FOREACH_IFA(ifp, ifa)
1253                 if (ifa->ifa_addr->sa_family == AF_INET6 &&
1254                     IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
1255                         struct carp_softc *sc = ifa->ifa_carp;
1256                         struct m_tag *mtag;
1257
1258                         mtag = m_tag_get(PACKET_TAG_CARP,
1259                             sizeof(struct carp_softc *), M_NOWAIT);
1260                         if (mtag == NULL)
1261                                 /* Better a bit than nothing. */
1262                                 return (LLADDR(&sc->sc_addr));
1263
1264                         bcopy(&sc, mtag + 1, sizeof(sc));
1265                         m_tag_prepend(m, mtag);
1266
1267                         return (LLADDR(&sc->sc_addr));
1268                 }
1269
1270         return (NULL);
1271 }
1272 #endif /* INET6 */
1273
1274 int
1275 carp_forus(struct ifnet *ifp, u_char *dhost)
1276 {
1277         struct carp_softc *sc;
1278         uint8_t *ena = dhost;
1279
1280         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1281                 return (0);
1282
1283         CIF_LOCK(ifp->if_carp);
1284         IFNET_FOREACH_CARP(ifp, sc) {
1285                 /*
1286                  * CARP_LOCK() is not here, since would protect nothing, but
1287                  * cause deadlock with if_bridge, calling this under its lock.
1288                  */
1289                 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
1290                     ETHER_ADDR_LEN)) {
1291                         CIF_UNLOCK(ifp->if_carp);
1292                         return (1);
1293                 }
1294         }
1295         CIF_UNLOCK(ifp->if_carp);
1296
1297         return (0);
1298 }
1299
1300 /* Master down timeout event, executed in callout context. */
1301 static void
1302 carp_master_down(void *v)
1303 {
1304         struct carp_softc *sc = v;
1305         struct epoch_tracker et;
1306
1307         NET_EPOCH_ENTER(et);
1308         CARP_LOCK_ASSERT(sc);
1309
1310         CURVNET_SET(sc->sc_carpdev->if_vnet);
1311         if (sc->sc_state == BACKUP) {
1312                 carp_master_down_locked(sc, "master timed out");
1313         }
1314         CURVNET_RESTORE();
1315
1316         CARP_UNLOCK(sc);
1317         NET_EPOCH_EXIT(et);
1318 }
1319
1320 static void
1321 carp_master_down_locked(struct carp_softc *sc, const char *reason)
1322 {
1323
1324         NET_EPOCH_ASSERT();
1325         CARP_LOCK_ASSERT(sc);
1326
1327         switch (sc->sc_state) {
1328         case BACKUP:
1329                 carp_set_state(sc, MASTER, reason);
1330                 carp_send_ad_locked(sc);
1331 #ifdef INET
1332                 carp_send_arp(sc);
1333 #endif
1334 #ifdef INET6
1335                 carp_send_na(sc);
1336 #endif
1337                 carp_setrun(sc, 0);
1338                 carp_addroute(sc);
1339                 break;
1340         case INIT:
1341         case MASTER:
1342 #ifdef INVARIANTS
1343                 panic("carp: VHID %u@%s: master_down event in %s state\n",
1344                     sc->sc_vhid,
1345                     if_name(sc->sc_carpdev),
1346                     sc->sc_state ? "MASTER" : "INIT");
1347 #endif
1348                 break;
1349         }
1350 }
1351
1352 /*
1353  * When in backup state, af indicates whether to reset the master down timer
1354  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1355  */
1356 static void
1357 carp_setrun(struct carp_softc *sc, sa_family_t af)
1358 {
1359         struct timeval tv;
1360
1361         CARP_LOCK_ASSERT(sc);
1362
1363         if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
1364             sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
1365             (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) ||
1366             !V_carp_allow)
1367                 return;
1368
1369         switch (sc->sc_state) {
1370         case INIT:
1371                 carp_set_state(sc, BACKUP, "initialization complete");
1372                 carp_setrun(sc, 0);
1373                 break;
1374         case BACKUP:
1375                 callout_stop(&sc->sc_ad_tmo);
1376                 tv.tv_sec = 3 * sc->sc_advbase;
1377                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1378                 switch (af) {
1379 #ifdef INET
1380                 case AF_INET:
1381                         callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1382                             carp_master_down, sc);
1383                         break;
1384 #endif
1385 #ifdef INET6
1386                 case AF_INET6:
1387                         callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1388                             carp_master_down, sc);
1389                         break;
1390 #endif
1391                 default:
1392 #ifdef INET
1393                         if (sc->sc_naddrs)
1394                                 callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1395                                     carp_master_down, sc);
1396 #endif
1397 #ifdef INET6
1398                         if (sc->sc_naddrs6)
1399                                 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1400                                     carp_master_down, sc);
1401 #endif
1402                         break;
1403                 }
1404                 break;
1405         case MASTER:
1406                 tv.tv_sec = sc->sc_advbase;
1407                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1408                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
1409                     carp_send_ad, sc);
1410                 break;
1411         }
1412 }
1413
1414 /*
1415  * Setup multicast structures.
1416  */
1417 static int
1418 carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
1419 {
1420         struct ifnet *ifp = cif->cif_ifp;
1421         int error = 0;
1422
1423         switch (sa) {
1424 #ifdef INET
1425         case AF_INET:
1426             {
1427                 struct ip_moptions *imo = &cif->cif_imo;
1428                 struct in_mfilter *imf;
1429                 struct in_addr addr;
1430
1431                 if (ip_mfilter_first(&imo->imo_head) != NULL)
1432                         return (0);
1433
1434                 imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
1435                 ip_mfilter_init(&imo->imo_head);
1436                 imo->imo_multicast_vif = -1;
1437
1438                 addr.s_addr = htonl(INADDR_CARP_GROUP);
1439                 if ((error = in_joingroup(ifp, &addr, NULL,
1440                     &imf->imf_inm)) != 0) {
1441                         ip_mfilter_free(imf);
1442                         break;
1443                 }
1444
1445                 ip_mfilter_insert(&imo->imo_head, imf);
1446                 imo->imo_multicast_ifp = ifp;
1447                 imo->imo_multicast_ttl = CARP_DFLTTL;
1448                 imo->imo_multicast_loop = 0;
1449                 break;
1450            }
1451 #endif
1452 #ifdef INET6
1453         case AF_INET6:
1454             {
1455                 struct ip6_moptions *im6o = &cif->cif_im6o;
1456                 struct in6_mfilter *im6f[2];
1457                 struct in6_addr in6;
1458
1459                 if (ip6_mfilter_first(&im6o->im6o_head))
1460                         return (0);
1461
1462                 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
1463                 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
1464
1465                 ip6_mfilter_init(&im6o->im6o_head);
1466                 im6o->im6o_multicast_hlim = CARP_DFLTTL;
1467                 im6o->im6o_multicast_ifp = ifp;
1468
1469                 /* Join IPv6 CARP multicast group. */
1470                 bzero(&in6, sizeof(in6));
1471                 in6.s6_addr16[0] = htons(0xff02);
1472                 in6.s6_addr8[15] = 0x12;
1473                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1474                         ip6_mfilter_free(im6f[0]);
1475                         ip6_mfilter_free(im6f[1]);
1476                         break;
1477                 }
1478                 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) {
1479                         ip6_mfilter_free(im6f[0]);
1480                         ip6_mfilter_free(im6f[1]);
1481                         break;
1482                 }
1483
1484                 /* Join solicited multicast address. */
1485                 bzero(&in6, sizeof(in6));
1486                 in6.s6_addr16[0] = htons(0xff02);
1487                 in6.s6_addr32[1] = 0;
1488                 in6.s6_addr32[2] = htonl(1);
1489                 in6.s6_addr32[3] = 0;
1490                 in6.s6_addr8[12] = 0xff;
1491
1492                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1493                         ip6_mfilter_free(im6f[0]);
1494                         ip6_mfilter_free(im6f[1]);
1495                         break;
1496                 }
1497
1498                 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) {
1499                         in6_leavegroup(im6f[0]->im6f_in6m, NULL);
1500                         ip6_mfilter_free(im6f[0]);
1501                         ip6_mfilter_free(im6f[1]);
1502                         break;
1503                 }
1504                 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]);
1505                 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]);
1506                 break;
1507             }
1508 #endif
1509         }
1510
1511         return (error);
1512 }
1513
1514 /*
1515  * Free multicast structures.
1516  */
1517 static void
1518 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
1519 {
1520 #ifdef INET
1521         struct ip_moptions *imo = &cif->cif_imo;
1522         struct in_mfilter *imf;
1523 #endif
1524 #ifdef INET6
1525         struct ip6_moptions *im6o = &cif->cif_im6o;
1526         struct in6_mfilter *im6f;
1527 #endif
1528         sx_assert(&carp_sx, SA_XLOCKED);
1529
1530         switch (sa) {
1531 #ifdef INET
1532         case AF_INET:
1533                 if (cif->cif_naddrs != 0)
1534                         break;
1535
1536                 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
1537                         ip_mfilter_remove(&imo->imo_head, imf);
1538                         in_leavegroup(imf->imf_inm, NULL);
1539                         ip_mfilter_free(imf);
1540                 }
1541                 break;
1542 #endif
1543 #ifdef INET6
1544         case AF_INET6:
1545                 if (cif->cif_naddrs6 != 0)
1546                         break;
1547
1548                 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) {
1549                         ip6_mfilter_remove(&im6o->im6o_head, im6f);
1550                         in6_leavegroup(im6f->im6f_in6m, NULL);
1551                         ip6_mfilter_free(im6f);
1552                 }
1553                 break;
1554 #endif
1555         }
1556 }
1557
1558 int
1559 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
1560 {
1561         struct m_tag *mtag;
1562         struct carp_softc *sc;
1563
1564         if (!sa)
1565                 return (0);
1566
1567         switch (sa->sa_family) {
1568 #ifdef INET
1569         case AF_INET:
1570                 break;
1571 #endif
1572 #ifdef INET6
1573         case AF_INET6:
1574                 break;
1575 #endif
1576         default:
1577                 return (0);
1578         }
1579
1580         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
1581         if (mtag == NULL)
1582                 return (0);
1583
1584         bcopy(mtag + 1, &sc, sizeof(sc));
1585
1586         switch (sa->sa_family) {
1587         case AF_INET:
1588                 if (! IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)))
1589                         return (0);
1590                 break;
1591         case AF_INET6:
1592                 if (! IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6))
1593                         return (0);
1594                 break;
1595         default:
1596                 panic("Unknown af");
1597         }
1598
1599         /* Set the source MAC address to the Virtual Router MAC Address. */
1600         switch (ifp->if_type) {
1601         case IFT_ETHER:
1602         case IFT_BRIDGE:
1603         case IFT_L2VLAN: {
1604                         struct ether_header *eh;
1605
1606                         eh = mtod(m, struct ether_header *);
1607                         eh->ether_shost[0] = 0;
1608                         eh->ether_shost[1] = 0;
1609                         eh->ether_shost[2] = 0x5e;
1610                         eh->ether_shost[3] = 0;
1611                         eh->ether_shost[4] = 1;
1612                         eh->ether_shost[5] = sc->sc_vhid;
1613                 }
1614                 break;
1615         default:
1616                 printf("%s: carp is not supported for the %d interface type\n",
1617                     if_name(ifp), ifp->if_type);
1618                 return (EOPNOTSUPP);
1619         }
1620
1621         return (0);
1622 }
1623
1624 static struct carp_softc*
1625 carp_alloc(struct ifnet *ifp)
1626 {
1627         struct carp_softc *sc;
1628         struct carp_if *cif;
1629
1630         sx_assert(&carp_sx, SA_XLOCKED);
1631
1632         if ((cif = ifp->if_carp) == NULL)
1633                 cif = carp_alloc_if(ifp);
1634
1635         sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
1636
1637         sc->sc_advbase = CARP_DFLTINTV;
1638         sc->sc_vhid = -1;       /* required setting */
1639         sc->sc_init_counter = 1;
1640         sc->sc_state = INIT;
1641
1642         sc->sc_ifasiz = sizeof(struct ifaddr *);
1643         sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
1644         sc->sc_carpdev = ifp;
1645
1646         sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP);
1647         sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
1648         sc->sc_carpaddr6.s6_addr8[15] = 0x12;
1649
1650         CARP_LOCK_INIT(sc);
1651 #ifdef INET
1652         callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1653 #endif
1654 #ifdef INET6
1655         callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1656 #endif
1657         callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1658
1659         CIF_LOCK(cif);
1660         TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
1661         CIF_UNLOCK(cif);
1662
1663         mtx_lock(&carp_mtx);
1664         LIST_INSERT_HEAD(&carp_list, sc, sc_next);
1665         mtx_unlock(&carp_mtx);
1666
1667         return (sc);
1668 }
1669
1670 static void
1671 carp_grow_ifas(struct carp_softc *sc)
1672 {
1673         struct ifaddr **new;
1674
1675         new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO);
1676         CARP_LOCK(sc);
1677         bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
1678         free(sc->sc_ifas, M_CARP);
1679         sc->sc_ifas = new;
1680         sc->sc_ifasiz *= 2;
1681         CARP_UNLOCK(sc);
1682 }
1683
1684 static void
1685 carp_destroy(struct carp_softc *sc)
1686 {
1687         struct ifnet *ifp = sc->sc_carpdev;
1688         struct carp_if *cif = ifp->if_carp;
1689
1690         sx_assert(&carp_sx, SA_XLOCKED);
1691
1692         if (sc->sc_suppress)
1693                 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed");
1694         CARP_UNLOCK(sc);
1695
1696         CIF_LOCK(cif);
1697         TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
1698         CIF_UNLOCK(cif);
1699
1700         mtx_lock(&carp_mtx);
1701         LIST_REMOVE(sc, sc_next);
1702         mtx_unlock(&carp_mtx);
1703
1704         callout_drain(&sc->sc_ad_tmo);
1705 #ifdef INET
1706         callout_drain(&sc->sc_md_tmo);
1707 #endif
1708 #ifdef INET6
1709         callout_drain(&sc->sc_md6_tmo);
1710 #endif
1711         CARP_LOCK_DESTROY(sc);
1712
1713         free(sc->sc_ifas, M_CARP);
1714         free(sc, M_CARP);
1715 }
1716
1717 static struct carp_if*
1718 carp_alloc_if(struct ifnet *ifp)
1719 {
1720         struct carp_if *cif;
1721         int error;
1722
1723         cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
1724
1725         if ((error = ifpromisc(ifp, 1)) != 0)
1726                 printf("%s: ifpromisc(%s) failed: %d\n",
1727                     __func__, if_name(ifp), error);
1728         else
1729                 cif->cif_flags |= CIF_PROMISC;
1730
1731         CIF_LOCK_INIT(cif);
1732         cif->cif_ifp = ifp;
1733         TAILQ_INIT(&cif->cif_vrs);
1734
1735         IF_ADDR_WLOCK(ifp);
1736         ifp->if_carp = cif;
1737         if_ref(ifp);
1738         IF_ADDR_WUNLOCK(ifp);
1739
1740         return (cif);
1741 }
1742
1743 static void
1744 carp_free_if(struct carp_if *cif)
1745 {
1746         struct ifnet *ifp = cif->cif_ifp;
1747
1748         CIF_LOCK_ASSERT(cif);
1749         KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
1750             __func__));
1751
1752         IF_ADDR_WLOCK(ifp);
1753         ifp->if_carp = NULL;
1754         IF_ADDR_WUNLOCK(ifp);
1755
1756         CIF_LOCK_DESTROY(cif);
1757
1758         if (cif->cif_flags & CIF_PROMISC)
1759                 ifpromisc(ifp, 0);
1760         if_rele(ifp);
1761
1762         free(cif, M_CARP);
1763 }
1764
1765 static bool
1766 carp_carprcp(void *arg, struct carp_softc *sc, int priv)
1767 {
1768         struct carpreq *carpr = arg;
1769
1770         CARP_LOCK(sc);
1771         carpr->carpr_state = sc->sc_state;
1772         carpr->carpr_vhid = sc->sc_vhid;
1773         carpr->carpr_advbase = sc->sc_advbase;
1774         carpr->carpr_advskew = sc->sc_advskew;
1775         if (priv)
1776                 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
1777         else
1778                 bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
1779         CARP_UNLOCK(sc);
1780
1781         return (true);
1782 }
1783
1784 static int
1785 carp_ioctl_set(if_t ifp, struct carpkreq *carpr)
1786 {
1787         struct epoch_tracker et;
1788         struct carp_softc *sc = NULL;
1789         int error = 0;
1790
1791
1792         if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID ||
1793             carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0) {
1794                 return (EINVAL);
1795         }
1796
1797         if (ifp->if_carp) {
1798                 IFNET_FOREACH_CARP(ifp, sc)
1799                         if (sc->sc_vhid == carpr->carpr_vhid)
1800                                 break;
1801         }
1802         if (sc == NULL) {
1803                 sc = carp_alloc(ifp);
1804                 CARP_LOCK(sc);
1805                 sc->sc_vhid = carpr->carpr_vhid;
1806                 LLADDR(&sc->sc_addr)[0] = 0;
1807                 LLADDR(&sc->sc_addr)[1] = 0;
1808                 LLADDR(&sc->sc_addr)[2] = 0x5e;
1809                 LLADDR(&sc->sc_addr)[3] = 0;
1810                 LLADDR(&sc->sc_addr)[4] = 1;
1811                 LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
1812         } else
1813                 CARP_LOCK(sc);
1814         if (carpr->carpr_advbase > 0) {
1815                 if (carpr->carpr_advbase > 255 ||
1816                     carpr->carpr_advbase < CARP_DFLTINTV) {
1817                         error = EINVAL;
1818                         goto out;
1819                 }
1820                 sc->sc_advbase = carpr->carpr_advbase;
1821         }
1822         if (carpr->carpr_advskew >= 255) {
1823                 error = EINVAL;
1824                 goto out;
1825         }
1826         sc->sc_advskew = carpr->carpr_advskew;
1827         if (carpr->carpr_addr.s_addr != INADDR_ANY)
1828                 sc->sc_carpaddr = carpr->carpr_addr;
1829         if (! IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) {
1830                 memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6,
1831                     sizeof(sc->sc_carpaddr6));
1832         }
1833         if (carpr->carpr_key[0] != '\0') {
1834                 bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
1835                 carp_hmac_prepare(sc);
1836         }
1837         if (sc->sc_state != INIT &&
1838             carpr->carpr_state != sc->sc_state) {
1839                 switch (carpr->carpr_state) {
1840                 case BACKUP:
1841                         callout_stop(&sc->sc_ad_tmo);
1842                         carp_set_state(sc, BACKUP,
1843                             "user requested via ifconfig");
1844                         carp_setrun(sc, 0);
1845                         carp_delroute(sc);
1846                         break;
1847                 case MASTER:
1848                         NET_EPOCH_ENTER(et);
1849                         carp_master_down_locked(sc,
1850                             "user requested via ifconfig");
1851                         NET_EPOCH_EXIT(et);
1852                         break;
1853                 default:
1854                         break;
1855                 }
1856         }
1857
1858 out:
1859         CARP_UNLOCK(sc);
1860
1861         return (error);
1862 }
1863
1864 static int
1865 carp_ioctl_get(if_t ifp, struct ucred *cred, struct carpreq *carpr,
1866     bool (*outfn)(void *, struct carp_softc *, int), void *arg)
1867 {
1868         int priveleged;
1869         struct carp_softc *sc;
1870
1871         if (carpr->carpr_vhid < 0 || carpr->carpr_vhid > CARP_MAXVHID)
1872                 return (EINVAL);
1873         if (carpr->carpr_count < 1)
1874                 return (EMSGSIZE);
1875         if (ifp->if_carp == NULL)
1876                 return (ENOENT);
1877
1878         priveleged = (priv_check_cred(cred, PRIV_NETINET_CARP) == 0);
1879         if (carpr->carpr_vhid != 0) {
1880                 IFNET_FOREACH_CARP(ifp, sc)
1881                         if (sc->sc_vhid == carpr->carpr_vhid)
1882                                 break;
1883                 if (sc == NULL)
1884                         return (ENOENT);
1885
1886                 if (! outfn(arg, sc, priveleged))
1887                         return (ENOMEM);
1888                 carpr->carpr_count = 1;
1889         } else  {
1890                 int count;
1891
1892                 count = 0;
1893                 IFNET_FOREACH_CARP(ifp, sc)
1894                         count++;
1895
1896                 if (count > carpr->carpr_count)
1897                         return (EMSGSIZE);
1898
1899                 IFNET_FOREACH_CARP(ifp, sc) {
1900                         if (! outfn(arg, sc, priveleged))
1901                                 return (ENOMEM);
1902                         carpr->carpr_count = count;
1903                 }
1904         }
1905
1906         return (0);
1907 }
1908
1909 int
1910 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
1911 {
1912         struct carpreq carpr;
1913         struct carpkreq carprk = { };
1914         struct ifnet *ifp;
1915         int error = 0;
1916
1917         if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr)))
1918                 return (error);
1919
1920         ifp = ifunit_ref(ifr->ifr_name);
1921         if ((error = carp_is_supported_if(ifp)) != 0)
1922                 goto out;
1923
1924         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1925                 error = EADDRNOTAVAIL;
1926                 goto out;
1927         }
1928
1929         sx_xlock(&carp_sx);
1930         switch (cmd) {
1931         case SIOCSVH:
1932                 if ((error = priv_check(td, PRIV_NETINET_CARP)))
1933                         break;
1934
1935                 memcpy(&carprk, &carpr, sizeof(carpr));
1936                 error = carp_ioctl_set(ifp, &carprk);
1937                 break;
1938
1939         case SIOCGVH:
1940                 error = carp_ioctl_get(ifp, td->td_ucred, &carpr,
1941                     carp_carprcp, &carpr);
1942                 if (error == 0) {
1943                         error = copyout(&carpr,
1944                             (char *)ifr_data_get_ptr(ifr),
1945                             carpr.carpr_count * sizeof(carpr));
1946                 }
1947                 break;
1948         default:
1949                 error = EINVAL;
1950         }
1951         sx_xunlock(&carp_sx);
1952
1953 out:
1954         if (ifp != NULL)
1955                 if_rele(ifp);
1956
1957         return (error);
1958 }
1959
1960 static int
1961 carp_get_vhid(struct ifaddr *ifa)
1962 {
1963
1964         if (ifa == NULL || ifa->ifa_carp == NULL)
1965                 return (0);
1966
1967         return (ifa->ifa_carp->sc_vhid);
1968 }
1969
1970 int
1971 carp_attach(struct ifaddr *ifa, int vhid)
1972 {
1973         struct ifnet *ifp = ifa->ifa_ifp;
1974         struct carp_if *cif = ifp->if_carp;
1975         struct carp_softc *sc;
1976         int index, error;
1977
1978         KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa));
1979
1980         switch (ifa->ifa_addr->sa_family) {
1981 #ifdef INET
1982         case AF_INET:
1983 #endif
1984 #ifdef INET6
1985         case AF_INET6:
1986 #endif
1987                 break;
1988         default:
1989                 return (EPROTOTYPE);
1990         }
1991
1992         sx_xlock(&carp_sx);
1993         if (ifp->if_carp == NULL) {
1994                 sx_xunlock(&carp_sx);
1995                 return (ENOPROTOOPT);
1996         }
1997
1998         IFNET_FOREACH_CARP(ifp, sc)
1999                 if (sc->sc_vhid == vhid)
2000                         break;
2001         if (sc == NULL) {
2002                 sx_xunlock(&carp_sx);
2003                 return (ENOENT);
2004         }
2005
2006         error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
2007         if (error) {
2008                 CIF_FREE(cif);
2009                 sx_xunlock(&carp_sx);
2010                 return (error);
2011         }
2012
2013         index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
2014         if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
2015                 carp_grow_ifas(sc);
2016
2017         switch (ifa->ifa_addr->sa_family) {
2018 #ifdef INET
2019         case AF_INET:
2020                 cif->cif_naddrs++;
2021                 sc->sc_naddrs++;
2022                 break;
2023 #endif
2024 #ifdef INET6
2025         case AF_INET6:
2026                 cif->cif_naddrs6++;
2027                 sc->sc_naddrs6++;
2028                 break;
2029 #endif
2030         }
2031
2032         ifa_ref(ifa);
2033
2034         CARP_LOCK(sc);
2035         sc->sc_ifas[index - 1] = ifa;
2036         ifa->ifa_carp = sc;
2037         carp_hmac_prepare(sc);
2038         carp_sc_state(sc);
2039         CARP_UNLOCK(sc);
2040
2041         sx_xunlock(&carp_sx);
2042
2043         return (0);
2044 }
2045
2046 void
2047 carp_detach(struct ifaddr *ifa, bool keep_cif)
2048 {
2049         struct ifnet *ifp = ifa->ifa_ifp;
2050         struct carp_if *cif = ifp->if_carp;
2051         struct carp_softc *sc = ifa->ifa_carp;
2052         int i, index;
2053
2054         KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
2055
2056         sx_xlock(&carp_sx);
2057
2058         CARP_LOCK(sc);
2059         /* Shift array. */
2060         index = sc->sc_naddrs + sc->sc_naddrs6;
2061         for (i = 0; i < index; i++)
2062                 if (sc->sc_ifas[i] == ifa)
2063                         break;
2064         KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
2065         for (; i < index - 1; i++)
2066                 sc->sc_ifas[i] = sc->sc_ifas[i+1];
2067         sc->sc_ifas[index - 1] = NULL;
2068
2069         switch (ifa->ifa_addr->sa_family) {
2070 #ifdef INET
2071         case AF_INET:
2072                 cif->cif_naddrs--;
2073                 sc->sc_naddrs--;
2074                 break;
2075 #endif
2076 #ifdef INET6
2077         case AF_INET6:
2078                 cif->cif_naddrs6--;
2079                 sc->sc_naddrs6--;
2080                 break;
2081 #endif
2082         }
2083
2084         carp_ifa_delroute(ifa);
2085         carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
2086
2087         ifa->ifa_carp = NULL;
2088         ifa_free(ifa);
2089
2090         carp_hmac_prepare(sc);
2091         carp_sc_state(sc);
2092
2093         if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)
2094                 carp_destroy(sc);
2095         else
2096                 CARP_UNLOCK(sc);
2097
2098         if (!keep_cif)
2099                 CIF_FREE(cif);
2100
2101         sx_xunlock(&carp_sx);
2102 }
2103
2104 static void
2105 carp_set_state(struct carp_softc *sc, int state, const char *reason)
2106 {
2107
2108         CARP_LOCK_ASSERT(sc);
2109
2110         if (sc->sc_state != state) {
2111                 const char *carp_states[] = { CARP_STATES };
2112                 char subsys[IFNAMSIZ+5];
2113
2114                 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
2115                     if_name(sc->sc_carpdev));
2116
2117                 CARP_LOG("%s: %s -> %s (%s)\n", subsys,
2118                     carp_states[sc->sc_state], carp_states[state], reason);
2119
2120                 sc->sc_state = state;
2121
2122                 devctl_notify("CARP", subsys, carp_states[state], NULL);
2123         }
2124 }
2125
2126 static void
2127 carp_linkstate(struct ifnet *ifp)
2128 {
2129         struct carp_softc *sc;
2130
2131         CIF_LOCK(ifp->if_carp);
2132         IFNET_FOREACH_CARP(ifp, sc) {
2133                 CARP_LOCK(sc);
2134                 carp_sc_state(sc);
2135                 CARP_UNLOCK(sc);
2136         }
2137         CIF_UNLOCK(ifp->if_carp);
2138 }
2139
2140 static void
2141 carp_sc_state(struct carp_softc *sc)
2142 {
2143
2144         CARP_LOCK_ASSERT(sc);
2145
2146         if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
2147             !(sc->sc_carpdev->if_flags & IFF_UP) ||
2148             !V_carp_allow) {
2149                 callout_stop(&sc->sc_ad_tmo);
2150 #ifdef INET
2151                 callout_stop(&sc->sc_md_tmo);
2152 #endif
2153 #ifdef INET6
2154                 callout_stop(&sc->sc_md6_tmo);
2155 #endif
2156                 carp_set_state(sc, INIT, "hardware interface down");
2157                 carp_setrun(sc, 0);
2158                 carp_delroute(sc);
2159                 if (!sc->sc_suppress)
2160                         carp_demote_adj(V_carp_ifdown_adj, "interface down");
2161                 sc->sc_suppress = 1;
2162         } else {
2163                 carp_set_state(sc, INIT, "hardware interface up");
2164                 carp_setrun(sc, 0);
2165                 if (sc->sc_suppress)
2166                         carp_demote_adj(-V_carp_ifdown_adj, "interface up");
2167                 sc->sc_suppress = 0;
2168         }
2169 }
2170
2171 static void
2172 carp_demote_adj(int adj, char *reason)
2173 {
2174         atomic_add_int(&V_carp_demotion, adj);
2175         CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason);
2176         taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
2177 }
2178
2179 static int
2180 carp_allow_sysctl(SYSCTL_HANDLER_ARGS)
2181 {
2182         int new, error;
2183         struct carp_softc *sc;
2184
2185         new = V_carp_allow;
2186         error = sysctl_handle_int(oidp, &new, 0, req);
2187         if (error || !req->newptr)
2188                 return (error);
2189
2190         if (V_carp_allow != new) {
2191                 V_carp_allow = new;
2192
2193                 mtx_lock(&carp_mtx);
2194                 LIST_FOREACH(sc, &carp_list, sc_next) {
2195                         CARP_LOCK(sc);
2196                         if (curvnet == sc->sc_carpdev->if_vnet)
2197                                 carp_sc_state(sc);
2198                         CARP_UNLOCK(sc);
2199                 }
2200                 mtx_unlock(&carp_mtx);
2201         }
2202
2203         return (0);
2204 }
2205
2206 static int
2207 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS)
2208 {
2209         int new, error;
2210
2211         new = V_carp_dscp;
2212         error = sysctl_handle_int(oidp, &new, 0, req);
2213         if (error || !req->newptr)
2214                 return (error);
2215
2216         if (new < 0 || new > 63)
2217                 return (EINVAL);
2218
2219         V_carp_dscp = new;
2220
2221         return (0);
2222 }
2223
2224 static int
2225 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
2226 {
2227         int new, error;
2228
2229         new = V_carp_demotion;
2230         error = sysctl_handle_int(oidp, &new, 0, req);
2231         if (error || !req->newptr)
2232                 return (error);
2233
2234         carp_demote_adj(new, "sysctl");
2235
2236         return (0);
2237 }
2238
2239 static int
2240 nlattr_get_carp_key(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target)
2241 {
2242         if (__predict_false(NLA_DATA_LEN(nla) > CARP_KEY_LEN))
2243                 return (EINVAL);
2244
2245         memcpy(target, NLA_DATA_CONST(nla), NLA_DATA_LEN(nla));
2246         return (0);
2247 }
2248
2249 struct carp_nl_send_args {
2250         struct nlmsghdr *hdr;
2251         struct nl_pstate *npt;
2252 };
2253
2254 static bool
2255 carp_nl_send(void *arg, struct carp_softc *sc, int priv)
2256 {
2257         struct carp_nl_send_args *nlsa = arg;
2258         struct nlmsghdr *hdr = nlsa->hdr;
2259         struct nl_pstate *npt = nlsa->npt;
2260         struct nl_writer *nw = npt->nw;
2261         struct genlmsghdr *ghdr_new;
2262
2263         if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
2264                 nlmsg_abort(nw);
2265                 return (false);
2266         }
2267
2268         ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
2269         if (ghdr_new == NULL) {
2270                 nlmsg_abort(nw);
2271                 return (false);
2272         }
2273
2274         ghdr_new->cmd = CARP_NL_CMD_GET;
2275         ghdr_new->version = 0;
2276         ghdr_new->reserved = 0;
2277
2278         CARP_LOCK(sc);
2279
2280         nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid);
2281         nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state);
2282         nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase);
2283         nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew);
2284         nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr);
2285         nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6);
2286
2287         if (priv)
2288                 nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), sc->sc_key);
2289
2290         CARP_UNLOCK(sc);
2291
2292         if (! nlmsg_end(nw)) {
2293                 nlmsg_abort(nw);
2294                 return (false);
2295         }
2296
2297         return (true);
2298 }
2299
2300 struct nl_carp_parsed {
2301         unsigned int    ifindex;
2302         char            *ifname;
2303         uint32_t        state;
2304         uint32_t        vhid;
2305         int32_t         advbase;
2306         int32_t         advskew;
2307         char            key[CARP_KEY_LEN];
2308         struct in_addr  addr;
2309         struct in6_addr addr6;
2310 };
2311
2312 #define _IN(_field)     offsetof(struct genlmsghdr, _field)
2313 #define _OUT(_field)    offsetof(struct nl_carp_parsed, _field)
2314
2315 static const struct nlattr_parser nla_p_set[] = {
2316         { .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 },
2317         { .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 },
2318         { .type = CARP_NL_ADVBASE, .off = _OUT(advbase), .cb = nlattr_get_uint32 },
2319         { .type = CARP_NL_ADVSKEW, .off = _OUT(advskew), .cb = nlattr_get_uint32 },
2320         { .type = CARP_NL_KEY, .off = _OUT(key), .cb = nlattr_get_carp_key },
2321         { .type = CARP_NL_IFINDEX, .off = _OUT(ifindex), .cb = nlattr_get_uint32 },
2322         { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr },
2323         { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr },
2324         { .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string },
2325 };
2326 static const struct nlfield_parser nlf_p_set[] = {
2327 };
2328 NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_set, nla_p_set);
2329 #undef _IN
2330 #undef _OUT
2331
2332
2333 static int
2334 carp_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt)
2335 {
2336         struct nl_carp_parsed attrs = { };
2337         struct carp_nl_send_args args;
2338         struct carpreq carpr = { };
2339         struct epoch_tracker et;
2340         if_t ifp = NULL;
2341         int error;
2342
2343         error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs);
2344         if (error != 0)
2345                 return (error);
2346
2347         NET_EPOCH_ENTER(et);
2348         if (attrs.ifname != NULL)
2349                 ifp = ifunit_ref(attrs.ifname);
2350         else if (attrs.ifindex != 0)
2351                 ifp = ifnet_byindex_ref(attrs.ifindex);
2352         NET_EPOCH_EXIT(et);
2353
2354         if ((error = carp_is_supported_if(ifp)) != 0)
2355                 goto out;
2356
2357         hdr->nlmsg_flags |= NLM_F_MULTI;
2358         args.hdr = hdr;
2359         args.npt = npt;
2360
2361         carpr.carpr_vhid = attrs.vhid;
2362         carpr.carpr_count = CARP_MAXVHID;
2363
2364         sx_xlock(&carp_sx);
2365         error = carp_ioctl_get(ifp, nlp_get_cred(npt->nlp), &carpr,
2366             carp_nl_send, &args);
2367         sx_xunlock(&carp_sx);
2368
2369         if (! nlmsg_end_dump(npt->nw, error, hdr))
2370                 error = ENOMEM;
2371
2372 out:
2373         if (ifp != NULL)
2374                 if_rele(ifp);
2375
2376         return (error);
2377 }
2378
2379 static int
2380 carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
2381 {
2382         struct nl_carp_parsed attrs = { };
2383         struct carpkreq carpr;
2384         struct epoch_tracker et;
2385         if_t ifp = NULL;
2386         int error;
2387
2388         error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs);
2389         if (error != 0)
2390                 return (error);
2391
2392         if (attrs.vhid <= 0 || attrs.vhid > CARP_MAXVHID)
2393                 return (EINVAL);
2394         if (attrs.state > CARP_MAXSTATE)
2395                 return (EINVAL);
2396         if (attrs.advbase < 0 || attrs.advskew < 0)
2397                 return (EINVAL);
2398         if (attrs.advbase > 255)
2399                 return (EINVAL);
2400         if (attrs.advskew >= 255)
2401                 return (EINVAL);
2402
2403         NET_EPOCH_ENTER(et);
2404         if (attrs.ifname != NULL)
2405                 ifp = ifunit_ref(attrs.ifname);
2406         else if (attrs.ifindex != 0)
2407                 ifp = ifnet_byindex_ref(attrs.ifindex);
2408         NET_EPOCH_EXIT(et);
2409
2410         if ((error = carp_is_supported_if(ifp)) != 0)
2411                 goto out;
2412
2413         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2414                 error = EADDRNOTAVAIL;
2415                 goto out;
2416         }
2417
2418         carpr.carpr_count = 1;
2419         carpr.carpr_vhid = attrs.vhid;
2420         carpr.carpr_state = attrs.state;
2421         carpr.carpr_advbase = attrs.advbase;
2422         carpr.carpr_advskew = attrs.advskew;
2423         carpr.carpr_addr = attrs.addr;
2424         carpr.carpr_addr6 = attrs.addr6;
2425
2426         memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key));
2427
2428         sx_xlock(&carp_sx);
2429         error = carp_ioctl_set(ifp, &carpr);
2430         sx_xunlock(&carp_sx);
2431
2432 out:
2433         if (ifp != NULL)
2434                 if_rele(ifp);
2435
2436         return (error);
2437 }
2438
2439 static const struct nlhdr_parser *all_parsers[] = {
2440         &carp_parser
2441 };
2442
2443 static const struct genl_cmd carp_cmds[] = {
2444         {
2445                 .cmd_num = CARP_NL_CMD_GET,
2446                 .cmd_name = "SIOCGVH",
2447                 .cmd_cb = carp_nl_get,
2448                 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP |
2449                     GENL_CMD_CAP_HASPOL,
2450         },
2451         {
2452                 .cmd_num = CARP_NL_CMD_SET,
2453                 .cmd_name = "SIOCSVH",
2454                 .cmd_cb = carp_nl_set,
2455                 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL,
2456                 .cmd_priv = PRIV_NETINET_CARP,
2457         },
2458 };
2459
2460 static void
2461 carp_nl_register(void)
2462 {
2463         bool ret __diagused;
2464         int family_id __diagused;
2465
2466         NL_VERIFY_PARSERS(all_parsers);
2467         family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2,
2468             CARP_NL_CMD_MAX);
2469         MPASS(family_id != 0);
2470
2471         ret = genl_register_cmds(CARP_NL_FAMILY_NAME, carp_cmds,
2472             NL_ARRAY_LEN(carp_cmds));
2473         MPASS(ret);
2474 }
2475
2476 static void
2477 carp_nl_unregister(void)
2478 {
2479         genl_unregister_family(CARP_NL_FAMILY_NAME);
2480 }
2481
2482 static void
2483 carp_mod_cleanup(void)
2484 {
2485
2486         carp_nl_unregister();
2487
2488 #ifdef INET
2489         (void)ipproto_unregister(IPPROTO_CARP);
2490         carp_iamatch_p = NULL;
2491 #endif
2492 #ifdef INET6
2493         (void)ip6proto_unregister(IPPROTO_CARP);
2494         carp_iamatch6_p = NULL;
2495         carp_macmatch6_p = NULL;
2496 #endif
2497         carp_ioctl_p = NULL;
2498         carp_attach_p = NULL;
2499         carp_detach_p = NULL;
2500         carp_get_vhid_p = NULL;
2501         carp_linkstate_p = NULL;
2502         carp_forus_p = NULL;
2503         carp_output_p = NULL;
2504         carp_demote_adj_p = NULL;
2505         carp_master_p = NULL;
2506         mtx_unlock(&carp_mtx);
2507         taskqueue_drain(taskqueue_swi, &carp_sendall_task);
2508         mtx_destroy(&carp_mtx);
2509         sx_destroy(&carp_sx);
2510 }
2511
2512 static void
2513 ipcarp_sysinit(void)
2514 {
2515
2516         /* Load allow as tunable so to postpone carp start after module load */
2517         TUNABLE_INT_FETCH("net.inet.carp.allow", &V_carp_allow);
2518 }
2519 VNET_SYSINIT(ip_carp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipcarp_sysinit, NULL);
2520
2521 static int
2522 carp_mod_load(void)
2523 {
2524         int err;
2525
2526         mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
2527         sx_init(&carp_sx, "carp_sx");
2528         LIST_INIT(&carp_list);
2529         carp_get_vhid_p = carp_get_vhid;
2530         carp_forus_p = carp_forus;
2531         carp_output_p = carp_output;
2532         carp_linkstate_p = carp_linkstate;
2533         carp_ioctl_p = carp_ioctl;
2534         carp_attach_p = carp_attach;
2535         carp_detach_p = carp_detach;
2536         carp_demote_adj_p = carp_demote_adj;
2537         carp_master_p = carp_master;
2538 #ifdef INET6
2539         carp_iamatch6_p = carp_iamatch6;
2540         carp_macmatch6_p = carp_macmatch6;
2541         err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL);
2542         if (err) {
2543                 printf("carp: error %d registering with INET6\n", err);
2544                 carp_mod_cleanup();
2545                 return (err);
2546         }
2547 #endif
2548 #ifdef INET
2549         carp_iamatch_p = carp_iamatch;
2550         err = ipproto_register(IPPROTO_CARP, carp_input, NULL);
2551         if (err) {
2552                 printf("carp: error %d registering with INET\n", err);
2553                 carp_mod_cleanup();
2554                 return (err);
2555         }
2556 #endif
2557
2558         carp_nl_register();
2559
2560         return (0);
2561 }
2562
2563 static int
2564 carp_modevent(module_t mod, int type, void *data)
2565 {
2566         switch (type) {
2567         case MOD_LOAD:
2568                 return carp_mod_load();
2569                 /* NOTREACHED */
2570         case MOD_UNLOAD:
2571                 mtx_lock(&carp_mtx);
2572                 if (LIST_EMPTY(&carp_list))
2573                         carp_mod_cleanup();
2574                 else {
2575                         mtx_unlock(&carp_mtx);
2576                         return (EBUSY);
2577                 }
2578                 break;
2579
2580         default:
2581                 return (EINVAL);
2582         }
2583
2584         return (0);
2585 }
2586
2587 static moduledata_t carp_mod = {
2588         "carp",
2589         carp_modevent,
2590         0
2591 };
2592
2593 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);