]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/ip_carp.c
carp: fix source MAC
[FreeBSD/FreeBSD.git] / sys / netinet / ip_carp.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2002 Michael Shalayeff.
5  * Copyright (c) 2003 Ryan McBride.
6  * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
22  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28  * THE POSSIBILITY OF SUCH DAMAGE.
29  */
30
31 #include "opt_netlink.h"
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include "opt_bpf.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/devctl.h>
43 #include <sys/jail.h>
44 #include <sys/kernel.h>
45 #include <sys/limits.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/module.h>
49 #include <sys/priv.h>
50 #include <sys/proc.h>
51 #include <sys/socket.h>
52 #include <sys/sockio.h>
53 #include <sys/sysctl.h>
54 #include <sys/syslog.h>
55 #include <sys/taskqueue.h>
56 #include <sys/counter.h>
57
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_dl.h>
62 #include <net/if_llatbl.h>
63 #include <net/if_private.h>
64 #include <net/if_types.h>
65 #include <net/route.h>
66 #include <net/vnet.h>
67
68 #if defined(INET) || defined(INET6)
69 #include <netinet/in.h>
70 #include <netinet/in_var.h>
71 #include <netinet/ip_carp.h>
72 #include <netinet/ip_carp_nl.h>
73 #include <netinet/ip.h>
74 #include <machine/in_cksum.h>
75 #endif
76 #ifdef INET
77 #include <netinet/ip_var.h>
78 #include <netinet/if_ether.h>
79 #endif
80
81 #ifdef INET6
82 #include <netinet/icmp6.h>
83 #include <netinet/ip6.h>
84 #include <netinet6/in6_var.h>
85 #include <netinet6/ip6_var.h>
86 #include <netinet6/scope6_var.h>
87 #include <netinet6/nd6.h>
88 #endif
89
90 #include <netlink/netlink.h>
91 #include <netlink/netlink_ctl.h>
92 #include <netlink/netlink_generic.h>
93 #include <netlink/netlink_message_parser.h>
94
95 #include <crypto/sha1.h>
96
97 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
98
99 struct carp_softc {
100         struct ifnet            *sc_carpdev;    /* Pointer to parent ifnet. */
101         struct ifaddr           **sc_ifas;      /* Our ifaddrs. */
102         struct sockaddr_dl      sc_addr;        /* Our link level address. */
103         struct callout          sc_ad_tmo;      /* Advertising timeout. */
104 #ifdef INET
105         struct callout          sc_md_tmo;      /* Master down timeout. */
106 #endif
107 #ifdef INET6
108         struct callout          sc_md6_tmo;     /* XXX: Master down timeout. */
109 #endif
110         struct mtx              sc_mtx;
111
112         int                     sc_vhid;
113         int                     sc_advskew;
114         int                     sc_advbase;
115         struct in_addr          sc_carpaddr;
116         struct in6_addr         sc_carpaddr6;
117
118         int                     sc_naddrs;
119         int                     sc_naddrs6;
120         int                     sc_ifasiz;
121         enum { INIT = 0, BACKUP, MASTER }       sc_state;
122         int                     sc_suppress;
123         int                     sc_sendad_errors;
124 #define CARP_SENDAD_MAX_ERRORS  3
125         int                     sc_sendad_success;
126 #define CARP_SENDAD_MIN_SUCCESS 3
127
128         int                     sc_init_counter;
129         uint64_t                sc_counter;
130
131         /* authentication */
132 #define CARP_HMAC_PAD   64
133         unsigned char sc_key[CARP_KEY_LEN];
134         unsigned char sc_pad[CARP_HMAC_PAD];
135         SHA1_CTX sc_sha1;
136
137         TAILQ_ENTRY(carp_softc) sc_list;        /* On the carp_if list. */
138         LIST_ENTRY(carp_softc)  sc_next;        /* On the global list. */
139 };
140
141 struct carp_if {
142 #ifdef INET
143         int     cif_naddrs;
144 #endif
145 #ifdef INET6
146         int     cif_naddrs6;
147 #endif
148         TAILQ_HEAD(, carp_softc) cif_vrs;
149 #ifdef INET
150         struct ip_moptions       cif_imo;
151 #endif
152 #ifdef INET6
153         struct ip6_moptions      cif_im6o;
154 #endif
155         struct ifnet    *cif_ifp;
156         struct mtx      cif_mtx;
157         uint32_t        cif_flags;
158 #define CIF_PROMISC     0x00000001
159 };
160
161 /* Kernel equivalent of struct carpreq, but with more fields for new features.
162  * */
163 struct carpkreq {
164         int             carpr_count;
165         int             carpr_vhid;
166         int             carpr_state;
167         int             carpr_advskew;
168         int             carpr_advbase;
169         unsigned char   carpr_key[CARP_KEY_LEN];
170         /* Everything above this is identical to carpreq */
171         struct in_addr  carpr_addr;
172         struct in6_addr carpr_addr6;
173 };
174
175 /*
176  * Brief design of carp(4).
177  *
178  * Any carp-capable ifnet may have a list of carp softcs hanging off
179  * its ifp->if_carp pointer. Each softc represents one unique virtual
180  * host id, or vhid. The softc has a back pointer to the ifnet. All
181  * softcs are joined in a global list, which has quite limited use.
182  *
183  * Any interface address that takes part in CARP negotiation has a
184  * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
185  * AF_INET or AF_INET6 address.
186  *
187  * Although, one can get the softc's backpointer to ifnet and traverse
188  * through its ifp->if_addrhead queue to find all interface addresses
189  * involved in CARP, we keep a growable array of ifaddr pointers. This
190  * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
191  * do calls into the network stack, thus avoiding LORs.
192  *
193  * Locking:
194  *
195  * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
196  * callout-driven events and ioctl()s.
197  *
198  * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx.
199  * To traverse the global list we use the mutex carp_mtx.
200  *
201  * Known issues with locking:
202  *
203  * - Sending ad, we put the pointer to the softc in an mtag, and no reference
204  *   counting is done on the softc.
205  * - On module unload we may race (?) with packet processing thread
206  *   dereferencing our function pointers.
207  */
208
209 /* Accept incoming CARP packets. */
210 VNET_DEFINE_STATIC(int, carp_allow) = 1;
211 #define V_carp_allow    VNET(carp_allow)
212
213 /* Set DSCP in outgoing CARP packets. */
214 VNET_DEFINE_STATIC(int, carp_dscp) = 56;
215 #define V_carp_dscp     VNET(carp_dscp)
216
217 /* Preempt slower nodes. */
218 VNET_DEFINE_STATIC(int, carp_preempt) = 0;
219 #define V_carp_preempt  VNET(carp_preempt)
220
221 /* Log level. */
222 VNET_DEFINE_STATIC(int, carp_log) = 1;
223 #define V_carp_log      VNET(carp_log)
224
225 /* Global advskew demotion. */
226 VNET_DEFINE_STATIC(int, carp_demotion) = 0;
227 #define V_carp_demotion VNET(carp_demotion)
228
229 /* Send error demotion factor. */
230 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW;
231 #define V_carp_senderr_adj      VNET(carp_senderr_adj)
232
233 /* Iface down demotion factor. */
234 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW;
235 #define V_carp_ifdown_adj       VNET(carp_ifdown_adj)
236
237 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS);
238 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS);
239 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
240
241 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
242     "CARP");
243 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow,
244     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
245     &VNET_NAME(carp_allow), 0, carp_allow_sysctl, "I",
246     "Accept incoming CARP packets");
247 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp,
248     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
249     0, 0, carp_dscp_sysctl, "I",
250     "DSCP value for carp packets");
251 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
252     &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
253 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
254     &VNET_NAME(carp_log), 0, "CARP log level");
255 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion,
256     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
257     0, 0, carp_demote_adj_sysctl, "I",
258     "Adjust demotion factor (skew of advskew)");
259 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor,
260     CTLFLAG_VNET | CTLFLAG_RW,
261     &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment");
262 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor,
263     CTLFLAG_VNET | CTLFLAG_RW,
264     &VNET_NAME(carp_ifdown_adj), 0,
265     "Interface down demotion factor adjustment");
266
267 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
268 VNET_PCPUSTAT_SYSINIT(carpstats);
269 VNET_PCPUSTAT_SYSUNINIT(carpstats);
270
271 #define CARPSTATS_ADD(name, val)        \
272     counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
273         sizeof(uint64_t)], (val))
274 #define CARPSTATS_INC(name)             CARPSTATS_ADD(name, 1)
275
276 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
277     carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
278
279 #define CARP_LOCK_INIT(sc)      mtx_init(&(sc)->sc_mtx, "carp_softc",   \
280         NULL, MTX_DEF)
281 #define CARP_LOCK_DESTROY(sc)   mtx_destroy(&(sc)->sc_mtx)
282 #define CARP_LOCK_ASSERT(sc)    mtx_assert(&(sc)->sc_mtx, MA_OWNED)
283 #define CARP_LOCK(sc)           mtx_lock(&(sc)->sc_mtx)
284 #define CARP_UNLOCK(sc)         mtx_unlock(&(sc)->sc_mtx)
285 #define CIF_LOCK_INIT(cif)      mtx_init(&(cif)->cif_mtx, "carp_if",   \
286         NULL, MTX_DEF)
287 #define CIF_LOCK_DESTROY(cif)   mtx_destroy(&(cif)->cif_mtx)
288 #define CIF_LOCK_ASSERT(cif)    mtx_assert(&(cif)->cif_mtx, MA_OWNED)
289 #define CIF_LOCK(cif)           mtx_lock(&(cif)->cif_mtx)
290 #define CIF_UNLOCK(cif)         mtx_unlock(&(cif)->cif_mtx)
291 #define CIF_FREE(cif)   do {                            \
292                 CIF_LOCK(cif);                          \
293                 if (TAILQ_EMPTY(&(cif)->cif_vrs))       \
294                         carp_free_if(cif);              \
295                 else                                    \
296                         CIF_UNLOCK(cif);                \
297 } while (0)
298
299 #define CARP_LOG(...)   do {                            \
300         if (V_carp_log > 0)                             \
301                 log(LOG_INFO, "carp: " __VA_ARGS__);    \
302 } while (0)
303
304 #define CARP_DEBUG(...) do {                            \
305         if (V_carp_log > 1)                             \
306                 log(LOG_DEBUG, __VA_ARGS__);            \
307 } while (0)
308
309 #define IFNET_FOREACH_IFA(ifp, ifa)                                     \
310         CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \
311                 if ((ifa)->ifa_carp != NULL)
312
313 #define CARP_FOREACH_IFA(sc, ifa)                                       \
314         CARP_LOCK_ASSERT(sc);                                           \
315         for (int _i = 0;                                                \
316                 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 &&              \
317                 ((ifa) = sc->sc_ifas[_i]) != NULL;                      \
318                 ++_i)
319
320 #define IFNET_FOREACH_CARP(ifp, sc)                                     \
321         KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) ||                    \
322             sx_xlocked(&carp_sx), ("cif_vrs not locked"));              \
323         TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
324
325 #define DEMOTE_ADVSKEW(sc)                                      \
326     (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ?      \
327     CARP_MAXSKEW :                                              \
328         (((sc)->sc_advskew + V_carp_demotion < 0) ?             \
329         0 : ((sc)->sc_advskew + V_carp_demotion)))
330
331 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int);
332 static struct carp_softc
333                 *carp_alloc(struct ifnet *);
334 static void     carp_destroy(struct carp_softc *);
335 static struct carp_if
336                 *carp_alloc_if(struct ifnet *);
337 static void     carp_free_if(struct carp_if *);
338 static void     carp_set_state(struct carp_softc *, int, const char* reason);
339 static void     carp_sc_state(struct carp_softc *);
340 static void     carp_setrun(struct carp_softc *, sa_family_t);
341 static void     carp_master_down(void *);
342 static void     carp_master_down_locked(struct carp_softc *,
343                     const char* reason);
344 static void     carp_send_ad(void *);
345 static void     carp_send_ad_locked(struct carp_softc *);
346 static void     carp_addroute(struct carp_softc *);
347 static void     carp_ifa_addroute(struct ifaddr *);
348 static void     carp_delroute(struct carp_softc *);
349 static void     carp_ifa_delroute(struct ifaddr *);
350 static void     carp_send_ad_all(void *, int);
351 static void     carp_demote_adj(int, char *);
352
353 static LIST_HEAD(, carp_softc) carp_list;
354 static struct mtx carp_mtx;
355 static struct sx carp_sx;
356 static struct task carp_sendall_task =
357     TASK_INITIALIZER(0, carp_send_ad_all, NULL);
358
359 static int
360 carp_is_supported_if(if_t ifp)
361 {
362         if (ifp == NULL)
363                 return (ENXIO);
364
365         switch (ifp->if_type) {
366         case IFT_ETHER:
367         case IFT_L2VLAN:
368         case IFT_BRIDGE:
369                 break;
370         default:
371                 return (EOPNOTSUPP);
372         }
373
374         return (0);
375 }
376
377 static void
378 carp_hmac_prepare(struct carp_softc *sc)
379 {
380         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
381         uint8_t vhid = sc->sc_vhid & 0xff;
382         struct ifaddr *ifa;
383         int i, found;
384 #ifdef INET
385         struct in_addr last, cur, in;
386 #endif
387 #ifdef INET6
388         struct in6_addr last6, cur6, in6;
389 #endif
390
391         CARP_LOCK_ASSERT(sc);
392
393         /* Compute ipad from key. */
394         bzero(sc->sc_pad, sizeof(sc->sc_pad));
395         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
396         for (i = 0; i < sizeof(sc->sc_pad); i++)
397                 sc->sc_pad[i] ^= 0x36;
398
399         /* Precompute first part of inner hash. */
400         SHA1Init(&sc->sc_sha1);
401         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
402         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
403         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
404         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
405 #ifdef INET
406         cur.s_addr = 0;
407         do {
408                 found = 0;
409                 last = cur;
410                 cur.s_addr = 0xffffffff;
411                 CARP_FOREACH_IFA(sc, ifa) {
412                         in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
413                         if (ifa->ifa_addr->sa_family == AF_INET &&
414                             ntohl(in.s_addr) > ntohl(last.s_addr) &&
415                             ntohl(in.s_addr) < ntohl(cur.s_addr)) {
416                                 cur.s_addr = in.s_addr;
417                                 found++;
418                         }
419                 }
420                 if (found)
421                         SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
422         } while (found);
423 #endif /* INET */
424 #ifdef INET6
425         memset(&cur6, 0, sizeof(cur6));
426         do {
427                 found = 0;
428                 last6 = cur6;
429                 memset(&cur6, 0xff, sizeof(cur6));
430                 CARP_FOREACH_IFA(sc, ifa) {
431                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
432                         if (IN6_IS_SCOPE_EMBED(&in6))
433                                 in6.s6_addr16[1] = 0;
434                         if (ifa->ifa_addr->sa_family == AF_INET6 &&
435                             memcmp(&in6, &last6, sizeof(in6)) > 0 &&
436                             memcmp(&in6, &cur6, sizeof(in6)) < 0) {
437                                 cur6 = in6;
438                                 found++;
439                         }
440                 }
441                 if (found)
442                         SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
443         } while (found);
444 #endif /* INET6 */
445
446         /* convert ipad to opad */
447         for (i = 0; i < sizeof(sc->sc_pad); i++)
448                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
449 }
450
451 static void
452 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
453     unsigned char md[20])
454 {
455         SHA1_CTX sha1ctx;
456
457         CARP_LOCK_ASSERT(sc);
458
459         /* fetch first half of inner hash */
460         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
461
462         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
463         SHA1Final(md, &sha1ctx);
464
465         /* outer hash */
466         SHA1Init(&sha1ctx);
467         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
468         SHA1Update(&sha1ctx, md, 20);
469         SHA1Final(md, &sha1ctx);
470 }
471
472 static int
473 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
474     unsigned char md[20])
475 {
476         unsigned char md2[20];
477
478         CARP_LOCK_ASSERT(sc);
479
480         carp_hmac_generate(sc, counter, md2);
481
482         return (bcmp(md, md2, sizeof(md2)));
483 }
484
485 /*
486  * process input packet.
487  * we have rearranged checks order compared to the rfc,
488  * but it seems more efficient this way or not possible otherwise.
489  */
490 #ifdef INET
491 static int
492 carp_input(struct mbuf **mp, int *offp, int proto)
493 {
494         struct mbuf *m = *mp;
495         struct ip *ip = mtod(m, struct ip *);
496         struct carp_header *ch;
497         int iplen, len;
498
499         iplen = *offp;
500         *mp = NULL;
501
502         CARPSTATS_INC(carps_ipackets);
503
504         if (!V_carp_allow) {
505                 m_freem(m);
506                 return (IPPROTO_DONE);
507         }
508
509         iplen = ip->ip_hl << 2;
510
511         if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
512                 CARPSTATS_INC(carps_badlen);
513                 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
514                     "on %s\n", __func__, m->m_len - sizeof(struct ip),
515                     if_name(m->m_pkthdr.rcvif));
516                 m_freem(m);
517                 return (IPPROTO_DONE);
518         }
519
520         if (iplen + sizeof(*ch) < m->m_len) {
521                 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
522                         CARPSTATS_INC(carps_hdrops);
523                         CARP_DEBUG("%s: pullup failed\n", __func__);
524                         return (IPPROTO_DONE);
525                 }
526                 ip = mtod(m, struct ip *);
527         }
528         ch = (struct carp_header *)((char *)ip + iplen);
529
530         /*
531          * verify that the received packet length is
532          * equal to the CARP header
533          */
534         len = iplen + sizeof(*ch);
535         if (len > m->m_pkthdr.len) {
536                 CARPSTATS_INC(carps_badlen);
537                 CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
538                     m->m_pkthdr.len,
539                     if_name(m->m_pkthdr.rcvif));
540                 m_freem(m);
541                 return (IPPROTO_DONE);
542         }
543
544         if ((m = m_pullup(m, len)) == NULL) {
545                 CARPSTATS_INC(carps_hdrops);
546                 return (IPPROTO_DONE);
547         }
548         ip = mtod(m, struct ip *);
549         ch = (struct carp_header *)((char *)ip + iplen);
550
551         /* verify the CARP checksum */
552         m->m_data += iplen;
553         if (in_cksum(m, len - iplen)) {
554                 CARPSTATS_INC(carps_badsum);
555                 CARP_DEBUG("%s: checksum failed on %s\n", __func__,
556                     if_name(m->m_pkthdr.rcvif));
557                 m_freem(m);
558                 return (IPPROTO_DONE);
559         }
560         m->m_data -= iplen;
561
562         carp_input_c(m, ch, AF_INET, ip->ip_ttl);
563         return (IPPROTO_DONE);
564 }
565 #endif
566
567 #ifdef INET6
568 static int
569 carp6_input(struct mbuf **mp, int *offp, int proto)
570 {
571         struct mbuf *m = *mp;
572         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
573         struct carp_header *ch;
574         u_int len;
575
576         CARPSTATS_INC(carps_ipackets6);
577
578         if (!V_carp_allow) {
579                 m_freem(m);
580                 return (IPPROTO_DONE);
581         }
582
583         /* check if received on a valid carp interface */
584         if (m->m_pkthdr.rcvif->if_carp == NULL) {
585                 CARPSTATS_INC(carps_badif);
586                 CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
587                     __func__, if_name(m->m_pkthdr.rcvif));
588                 m_freem(m);
589                 return (IPPROTO_DONE);
590         }
591
592         /* verify that we have a complete carp packet */
593         if (m->m_len < *offp + sizeof(*ch)) {
594                 len = m->m_len;
595                 m = m_pullup(m, *offp + sizeof(*ch));
596                 if (m == NULL) {
597                         CARPSTATS_INC(carps_badlen);
598                         CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
599                         return (IPPROTO_DONE);
600                 }
601                 ip6 = mtod(m, struct ip6_hdr *);
602         }
603         ch = (struct carp_header *)(mtod(m, char *) + *offp);
604
605         /* verify the CARP checksum */
606         m->m_data += *offp;
607         if (in_cksum(m, sizeof(*ch))) {
608                 CARPSTATS_INC(carps_badsum);
609                 CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
610                     if_name(m->m_pkthdr.rcvif));
611                 m_freem(m);
612                 return (IPPROTO_DONE);
613         }
614         m->m_data -= *offp;
615
616         carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim);
617         return (IPPROTO_DONE);
618 }
619 #endif /* INET6 */
620
621 /*
622  * This routine should not be necessary at all, but some switches
623  * (VMWare ESX vswitches) can echo our own packets back at us,
624  * and we must ignore them or they will cause us to drop out of
625  * MASTER mode.
626  *
627  * We cannot catch all cases of network loops.  Instead, what we
628  * do here is catch any packet that arrives with a carp header
629  * with a VHID of 0, that comes from an address that is our own.
630  * These packets are by definition "from us" (even if they are from
631  * a misconfigured host that is pretending to be us).
632  *
633  * The VHID test is outside this mini-function.
634  */
635 static int
636 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af)
637 {
638 #ifdef INET
639         struct ip *ip4;
640         struct in_addr in4;
641 #endif
642 #ifdef INET6
643         struct ip6_hdr *ip6;
644         struct in6_addr in6;
645 #endif
646
647         switch (af) {
648 #ifdef INET
649         case AF_INET:
650                 ip4 = mtod(m, struct ip *);
651                 in4 = ifatoia(ifa)->ia_addr.sin_addr;
652                 return (in4.s_addr == ip4->ip_src.s_addr);
653 #endif
654 #ifdef INET6
655         case AF_INET6:
656                 ip6 = mtod(m, struct ip6_hdr *);
657                 in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
658                 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0);
659 #endif
660         default:
661                 break;
662         }
663         return (0);
664 }
665
666 static void
667 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
668 {
669         struct ifnet *ifp = m->m_pkthdr.rcvif;
670         struct ifaddr *ifa, *match;
671         struct carp_softc *sc;
672         uint64_t tmp_counter;
673         struct timeval sc_tv, ch_tv;
674         int error;
675         bool multicast = false;
676
677         NET_EPOCH_ASSERT();
678
679         /*
680          * Verify that the VHID is valid on the receiving interface.
681          *
682          * There should be just one match.  If there are none
683          * the VHID is not valid and we drop the packet.  If
684          * there are multiple VHID matches, take just the first
685          * one, for compatibility with previous code.  While we're
686          * scanning, check for obvious loops in the network topology
687          * (these should never happen, and as noted above, we may
688          * miss real loops; this is just a double-check).
689          */
690         error = 0;
691         match = NULL;
692         IFNET_FOREACH_IFA(ifp, ifa) {
693                 if (match == NULL && ifa->ifa_carp != NULL &&
694                     ifa->ifa_addr->sa_family == af &&
695                     ifa->ifa_carp->sc_vhid == ch->carp_vhid)
696                         match = ifa;
697                 if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af))
698                         error = ELOOP;
699         }
700         ifa = error ? NULL : match;
701         if (ifa != NULL)
702                 ifa_ref(ifa);
703
704         if (ifa == NULL) {
705                 if (error == ELOOP) {
706                         CARP_DEBUG("dropping looped packet on interface %s\n",
707                             if_name(ifp));
708                         CARPSTATS_INC(carps_badif);     /* ??? */
709                 } else {
710                         CARPSTATS_INC(carps_badvhid);
711                 }
712                 m_freem(m);
713                 return;
714         }
715
716         /* verify the CARP version. */
717         if (ch->carp_version != CARP_VERSION) {
718                 CARPSTATS_INC(carps_badver);
719                 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp),
720                     ch->carp_version);
721                 ifa_free(ifa);
722                 m_freem(m);
723                 return;
724         }
725
726         sc = ifa->ifa_carp;
727         CARP_LOCK(sc);
728         if (ifa->ifa_addr->sa_family == AF_INET) {
729                 multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr);
730         } else {
731                 multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6);
732         }
733         ifa_free(ifa);
734
735         /* verify that the IP TTL is 255, but only if we're not in unicast mode. */
736         if (multicast && ttl != CARP_DFLTTL) {
737                 CARPSTATS_INC(carps_badttl);
738                 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
739                     ttl, if_name(m->m_pkthdr.rcvif));
740                 goto out;
741         }
742
743         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
744                 CARPSTATS_INC(carps_badauth);
745                 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
746                     sc->sc_vhid, if_name(ifp));
747                 goto out;
748         }
749
750         tmp_counter = ntohl(ch->carp_counter[0]);
751         tmp_counter = tmp_counter<<32;
752         tmp_counter += ntohl(ch->carp_counter[1]);
753
754         /* XXX Replay protection goes here */
755
756         sc->sc_init_counter = 0;
757         sc->sc_counter = tmp_counter;
758
759         sc_tv.tv_sec = sc->sc_advbase;
760         sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
761         ch_tv.tv_sec = ch->carp_advbase;
762         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
763
764         switch (sc->sc_state) {
765         case INIT:
766                 break;
767         case MASTER:
768                 /*
769                  * If we receive an advertisement from a master who's going to
770                  * be more frequent than us, go into BACKUP state.
771                  */
772                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
773                     timevalcmp(&sc_tv, &ch_tv, ==)) {
774                         callout_stop(&sc->sc_ad_tmo);
775                         carp_set_state(sc, BACKUP,
776                             "more frequent advertisement received");
777                         carp_setrun(sc, 0);
778                         carp_delroute(sc);
779                 }
780                 break;
781         case BACKUP:
782                 /*
783                  * If we're pre-empting masters who advertise slower than us,
784                  * and this one claims to be slower, treat him as down.
785                  */
786                 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
787                         carp_master_down_locked(sc,
788                             "preempting a slower master");
789                         break;
790                 }
791
792                 /*
793                  *  If the master is going to advertise at such a low frequency
794                  *  that he's guaranteed to time out, we'd might as well just
795                  *  treat him as timed out now.
796                  */
797                 sc_tv.tv_sec = sc->sc_advbase * 3;
798                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
799                         carp_master_down_locked(sc, "master will time out");
800                         break;
801                 }
802
803                 /*
804                  * Otherwise, we reset the counter and wait for the next
805                  * advertisement.
806                  */
807                 carp_setrun(sc, af);
808                 break;
809         }
810
811 out:
812         CARP_UNLOCK(sc);
813         m_freem(m);
814 }
815
816 static int
817 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
818 {
819         struct m_tag *mtag;
820
821         if (sc->sc_init_counter) {
822                 /* this could also be seconds since unix epoch */
823                 sc->sc_counter = arc4random();
824                 sc->sc_counter = sc->sc_counter << 32;
825                 sc->sc_counter += arc4random();
826         } else
827                 sc->sc_counter++;
828
829         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
830         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
831
832         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
833
834         /* Tag packet for carp_output */
835         if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
836             M_NOWAIT)) == NULL) {
837                 m_freem(m);
838                 CARPSTATS_INC(carps_onomem);
839                 return (ENOMEM);
840         }
841         bcopy(&sc, mtag + 1, sizeof(sc));
842         m_tag_prepend(m, mtag);
843
844         return (0);
845 }
846
847 /*
848  * To avoid LORs and possible recursions this function shouldn't
849  * be called directly, but scheduled via taskqueue.
850  */
851 static void
852 carp_send_ad_all(void *ctx __unused, int pending __unused)
853 {
854         struct carp_softc *sc;
855         struct epoch_tracker et;
856
857         NET_EPOCH_ENTER(et);
858         mtx_lock(&carp_mtx);
859         LIST_FOREACH(sc, &carp_list, sc_next)
860                 if (sc->sc_state == MASTER) {
861                         CARP_LOCK(sc);
862                         CURVNET_SET(sc->sc_carpdev->if_vnet);
863                         carp_send_ad_locked(sc);
864                         CURVNET_RESTORE();
865                         CARP_UNLOCK(sc);
866                 }
867         mtx_unlock(&carp_mtx);
868         NET_EPOCH_EXIT(et);
869 }
870
871 /* Send a periodic advertisement, executed in callout context. */
872 static void
873 carp_send_ad(void *v)
874 {
875         struct carp_softc *sc = v;
876         struct epoch_tracker et;
877
878         NET_EPOCH_ENTER(et);
879         CARP_LOCK_ASSERT(sc);
880         CURVNET_SET(sc->sc_carpdev->if_vnet);
881         carp_send_ad_locked(sc);
882         CURVNET_RESTORE();
883         CARP_UNLOCK(sc);
884         NET_EPOCH_EXIT(et);
885 }
886
887 static void
888 carp_send_ad_error(struct carp_softc *sc, int error)
889 {
890
891         /*
892          * We track errors and successfull sends with this logic:
893          * - Any error resets success counter to 0.
894          * - MAX_ERRORS triggers demotion.
895          * - MIN_SUCCESS successes resets error counter to 0.
896          * - MIN_SUCCESS reverts demotion, if it was triggered before.
897          */
898         if (error) {
899                 if (sc->sc_sendad_errors < INT_MAX)
900                         sc->sc_sendad_errors++;
901                 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
902                         static const char fmt[] = "send error %d on %s";
903                         char msg[sizeof(fmt) + IFNAMSIZ];
904
905                         sprintf(msg, fmt, error, if_name(sc->sc_carpdev));
906                         carp_demote_adj(V_carp_senderr_adj, msg);
907                 }
908                 sc->sc_sendad_success = 0;
909         } else if (sc->sc_sendad_errors > 0) {
910                 if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) {
911                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
912                                 static const char fmt[] = "send ok on %s";
913                                 char msg[sizeof(fmt) + IFNAMSIZ];
914
915                                 sprintf(msg, fmt, if_name(sc->sc_carpdev));
916                                 carp_demote_adj(-V_carp_senderr_adj, msg);
917                         }
918                         sc->sc_sendad_errors = 0;
919                 }
920         }
921 }
922
923 /*
924  * Pick the best ifaddr on the given ifp for sending CARP
925  * advertisements.
926  *
927  * "Best" here is defined by ifa_preferred().  This function is much
928  * much like ifaof_ifpforaddr() except that we just use ifa_preferred().
929  *
930  * (This could be simplified to return the actual address, except that
931  * it has a different format in AF_INET and AF_INET6.)
932  */
933 static struct ifaddr *
934 carp_best_ifa(int af, struct ifnet *ifp)
935 {
936         struct ifaddr *ifa, *best;
937
938         NET_EPOCH_ASSERT();
939
940         if (af >= AF_MAX)
941                 return (NULL);
942         best = NULL;
943         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
944                 if (ifa->ifa_addr->sa_family == af &&
945                     (best == NULL || ifa_preferred(best, ifa)))
946                         best = ifa;
947         }
948         if (best != NULL)
949                 ifa_ref(best);
950         return (best);
951 }
952
953 static void
954 carp_send_ad_locked(struct carp_softc *sc)
955 {
956         struct carp_header ch;
957         struct timeval tv;
958         struct ifaddr *ifa;
959         struct carp_header *ch_ptr;
960         struct mbuf *m;
961         int len, advskew;
962
963         NET_EPOCH_ASSERT();
964         CARP_LOCK_ASSERT(sc);
965
966         advskew = DEMOTE_ADVSKEW(sc);
967         tv.tv_sec = sc->sc_advbase;
968         tv.tv_usec = advskew * 1000000 / 256;
969
970         ch.carp_version = CARP_VERSION;
971         ch.carp_type = CARP_ADVERTISEMENT;
972         ch.carp_vhid = sc->sc_vhid;
973         ch.carp_advbase = sc->sc_advbase;
974         ch.carp_advskew = advskew;
975         ch.carp_authlen = 7;    /* XXX DEFINE */
976         ch.carp_pad1 = 0;       /* must be zero */
977         ch.carp_cksum = 0;
978
979         /* XXXGL: OpenBSD picks first ifaddr with needed family. */
980
981 #ifdef INET
982         if (sc->sc_naddrs) {
983                 struct ip *ip;
984
985                 m = m_gethdr(M_NOWAIT, MT_DATA);
986                 if (m == NULL) {
987                         CARPSTATS_INC(carps_onomem);
988                         goto resched;
989                 }
990                 len = sizeof(*ip) + sizeof(ch);
991                 m->m_pkthdr.len = len;
992                 m->m_pkthdr.rcvif = NULL;
993                 m->m_len = len;
994                 M_ALIGN(m, m->m_len);
995                 if (IN_MULTICAST(sc->sc_carpaddr.s_addr))
996                         m->m_flags |= M_MCAST;
997                 ip = mtod(m, struct ip *);
998                 ip->ip_v = IPVERSION;
999                 ip->ip_hl = sizeof(*ip) >> 2;
1000                 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET;
1001                 ip->ip_len = htons(len);
1002                 ip->ip_off = htons(IP_DF);
1003                 ip->ip_ttl = CARP_DFLTTL;
1004                 ip->ip_p = IPPROTO_CARP;
1005                 ip->ip_sum = 0;
1006                 ip_fillid(ip);
1007
1008                 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev);
1009                 if (ifa != NULL) {
1010                         ip->ip_src.s_addr =
1011                             ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1012                         ifa_free(ifa);
1013                 } else
1014                         ip->ip_src.s_addr = 0;
1015                 ip->ip_dst = sc->sc_carpaddr;
1016
1017                 ch_ptr = (struct carp_header *)(&ip[1]);
1018                 bcopy(&ch, ch_ptr, sizeof(ch));
1019                 if (carp_prepare_ad(m, sc, ch_ptr))
1020                         goto resched;
1021
1022                 m->m_data += sizeof(*ip);
1023                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
1024                 m->m_data -= sizeof(*ip);
1025
1026                 CARPSTATS_INC(carps_opackets);
1027
1028                 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
1029                     &sc->sc_carpdev->if_carp->cif_imo, NULL));
1030         }
1031 #endif /* INET */
1032 #ifdef INET6
1033         if (sc->sc_naddrs6) {
1034                 struct ip6_hdr *ip6;
1035
1036                 m = m_gethdr(M_NOWAIT, MT_DATA);
1037                 if (m == NULL) {
1038                         CARPSTATS_INC(carps_onomem);
1039                         goto resched;
1040                 }
1041                 len = sizeof(*ip6) + sizeof(ch);
1042                 m->m_pkthdr.len = len;
1043                 m->m_pkthdr.rcvif = NULL;
1044                 m->m_len = len;
1045                 M_ALIGN(m, m->m_len);
1046                 ip6 = mtod(m, struct ip6_hdr *);
1047                 bzero(ip6, sizeof(*ip6));
1048                 ip6->ip6_vfc |= IPV6_VERSION;
1049                 /* Traffic class isn't defined in ip6 struct instead
1050                  * it gets offset into flowid field */
1051                 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN +
1052                     IPTOS_DSCP_OFFSET));
1053                 ip6->ip6_hlim = CARP_DFLTTL;
1054                 ip6->ip6_nxt = IPPROTO_CARP;
1055
1056                 /* set the source address */
1057                 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev);
1058                 if (ifa != NULL) {
1059                         bcopy(IFA_IN6(ifa), &ip6->ip6_src,
1060                             sizeof(struct in6_addr));
1061                         ifa_free(ifa);
1062                 } else
1063                         /* This should never happen with IPv6. */
1064                         bzero(&ip6->ip6_src, sizeof(struct in6_addr));
1065
1066                 /* Set the multicast destination. */
1067                 memcpy(&ip6->ip6_dst, &sc->sc_carpaddr6, sizeof(ip6->ip6_dst));
1068                 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1069                         if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1070                                 m_freem(m);
1071                                 CARP_DEBUG("%s: in6_setscope failed\n", __func__);
1072                                 goto resched;
1073                         }
1074                 }
1075
1076                 ch_ptr = (struct carp_header *)(&ip6[1]);
1077                 bcopy(&ch, ch_ptr, sizeof(ch));
1078                 if (carp_prepare_ad(m, sc, ch_ptr))
1079                         goto resched;
1080
1081                 m->m_data += sizeof(*ip6);
1082                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
1083                 m->m_data -= sizeof(*ip6);
1084
1085                 CARPSTATS_INC(carps_opackets6);
1086
1087                 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
1088                     &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
1089         }
1090 #endif /* INET6 */
1091
1092 resched:
1093         callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
1094 }
1095
1096 static void
1097 carp_addroute(struct carp_softc *sc)
1098 {
1099         struct ifaddr *ifa;
1100
1101         CARP_FOREACH_IFA(sc, ifa)
1102                 carp_ifa_addroute(ifa);
1103 }
1104
1105 static void
1106 carp_ifa_addroute(struct ifaddr *ifa)
1107 {
1108
1109         switch (ifa->ifa_addr->sa_family) {
1110 #ifdef INET
1111         case AF_INET:
1112                 in_addprefix(ifatoia(ifa));
1113                 ifa_add_loopback_route(ifa,
1114                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
1115                 break;
1116 #endif
1117 #ifdef INET6
1118         case AF_INET6:
1119                 ifa_add_loopback_route(ifa,
1120                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
1121                 nd6_add_ifa_lle(ifatoia6(ifa));
1122                 break;
1123 #endif
1124         }
1125 }
1126
1127 static void
1128 carp_delroute(struct carp_softc *sc)
1129 {
1130         struct ifaddr *ifa;
1131
1132         CARP_FOREACH_IFA(sc, ifa)
1133                 carp_ifa_delroute(ifa);
1134 }
1135
1136 static void
1137 carp_ifa_delroute(struct ifaddr *ifa)
1138 {
1139
1140         switch (ifa->ifa_addr->sa_family) {
1141 #ifdef INET
1142         case AF_INET:
1143                 ifa_del_loopback_route(ifa,
1144                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
1145                 in_scrubprefix(ifatoia(ifa), LLE_STATIC);
1146                 break;
1147 #endif
1148 #ifdef INET6
1149         case AF_INET6:
1150                 ifa_del_loopback_route(ifa,
1151                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
1152                 nd6_rem_ifa_lle(ifatoia6(ifa), 1);
1153                 break;
1154 #endif
1155         }
1156 }
1157
1158 int
1159 carp_master(struct ifaddr *ifa)
1160 {
1161         struct carp_softc *sc = ifa->ifa_carp;
1162
1163         return (sc->sc_state == MASTER);
1164 }
1165
1166 #ifdef INET
1167 /*
1168  * Broadcast a gratuitous ARP request containing
1169  * the virtual router MAC address for each IP address
1170  * associated with the virtual router.
1171  */
1172 static void
1173 carp_send_arp(struct carp_softc *sc)
1174 {
1175         struct ifaddr *ifa;
1176         struct in_addr addr;
1177
1178         NET_EPOCH_ASSERT();
1179
1180         CARP_FOREACH_IFA(sc, ifa) {
1181                 if (ifa->ifa_addr->sa_family != AF_INET)
1182                         continue;
1183                 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
1184                 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr));
1185         }
1186 }
1187
1188 int
1189 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
1190 {
1191         struct carp_softc *sc = ifa->ifa_carp;
1192
1193         if (sc->sc_state == MASTER) {
1194                 *enaddr = LLADDR(&sc->sc_addr);
1195                 return (1);
1196         }
1197
1198         return (0);
1199 }
1200 #endif
1201
1202 #ifdef INET6
1203 static void
1204 carp_send_na(struct carp_softc *sc)
1205 {
1206         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1207         struct ifaddr *ifa;
1208         struct in6_addr *in6;
1209
1210         CARP_FOREACH_IFA(sc, ifa) {
1211                 if (ifa->ifa_addr->sa_family != AF_INET6)
1212                         continue;
1213
1214                 in6 = IFA_IN6(ifa);
1215                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1216                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1217                 DELAY(1000);    /* XXX */
1218         }
1219 }
1220
1221 /*
1222  * Returns ifa in case it's a carp address and it is MASTER, or if the address
1223  * matches and is not a carp address.  Returns NULL otherwise.
1224  */
1225 struct ifaddr *
1226 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
1227 {
1228         struct ifaddr *ifa;
1229
1230         NET_EPOCH_ASSERT();
1231
1232         ifa = NULL;
1233         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1234                 if (ifa->ifa_addr->sa_family != AF_INET6)
1235                         continue;
1236                 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
1237                         continue;
1238                 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
1239                         ifa = NULL;
1240                 else
1241                         ifa_ref(ifa);
1242                 break;
1243         }
1244
1245         return (ifa);
1246 }
1247
1248 char *
1249 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
1250 {
1251         struct ifaddr *ifa;
1252
1253         NET_EPOCH_ASSERT();
1254
1255         IFNET_FOREACH_IFA(ifp, ifa)
1256                 if (ifa->ifa_addr->sa_family == AF_INET6 &&
1257                     IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
1258                         struct carp_softc *sc = ifa->ifa_carp;
1259                         struct m_tag *mtag;
1260
1261                         mtag = m_tag_get(PACKET_TAG_CARP,
1262                             sizeof(struct carp_softc *), M_NOWAIT);
1263                         if (mtag == NULL)
1264                                 /* Better a bit than nothing. */
1265                                 return (LLADDR(&sc->sc_addr));
1266
1267                         bcopy(&sc, mtag + 1, sizeof(sc));
1268                         m_tag_prepend(m, mtag);
1269
1270                         return (LLADDR(&sc->sc_addr));
1271                 }
1272
1273         return (NULL);
1274 }
1275 #endif /* INET6 */
1276
1277 int
1278 carp_forus(struct ifnet *ifp, u_char *dhost)
1279 {
1280         struct carp_softc *sc;
1281         uint8_t *ena = dhost;
1282
1283         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1284                 return (0);
1285
1286         CIF_LOCK(ifp->if_carp);
1287         IFNET_FOREACH_CARP(ifp, sc) {
1288                 /*
1289                  * CARP_LOCK() is not here, since would protect nothing, but
1290                  * cause deadlock with if_bridge, calling this under its lock.
1291                  */
1292                 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
1293                     ETHER_ADDR_LEN)) {
1294                         CIF_UNLOCK(ifp->if_carp);
1295                         return (1);
1296                 }
1297         }
1298         CIF_UNLOCK(ifp->if_carp);
1299
1300         return (0);
1301 }
1302
1303 /* Master down timeout event, executed in callout context. */
1304 static void
1305 carp_master_down(void *v)
1306 {
1307         struct carp_softc *sc = v;
1308         struct epoch_tracker et;
1309
1310         NET_EPOCH_ENTER(et);
1311         CARP_LOCK_ASSERT(sc);
1312
1313         CURVNET_SET(sc->sc_carpdev->if_vnet);
1314         if (sc->sc_state == BACKUP) {
1315                 carp_master_down_locked(sc, "master timed out");
1316         }
1317         CURVNET_RESTORE();
1318
1319         CARP_UNLOCK(sc);
1320         NET_EPOCH_EXIT(et);
1321 }
1322
1323 static void
1324 carp_master_down_locked(struct carp_softc *sc, const char *reason)
1325 {
1326
1327         NET_EPOCH_ASSERT();
1328         CARP_LOCK_ASSERT(sc);
1329
1330         switch (sc->sc_state) {
1331         case BACKUP:
1332                 carp_set_state(sc, MASTER, reason);
1333                 carp_send_ad_locked(sc);
1334 #ifdef INET
1335                 carp_send_arp(sc);
1336 #endif
1337 #ifdef INET6
1338                 carp_send_na(sc);
1339 #endif
1340                 carp_setrun(sc, 0);
1341                 carp_addroute(sc);
1342                 break;
1343         case INIT:
1344         case MASTER:
1345 #ifdef INVARIANTS
1346                 panic("carp: VHID %u@%s: master_down event in %s state\n",
1347                     sc->sc_vhid,
1348                     if_name(sc->sc_carpdev),
1349                     sc->sc_state ? "MASTER" : "INIT");
1350 #endif
1351                 break;
1352         }
1353 }
1354
1355 /*
1356  * When in backup state, af indicates whether to reset the master down timer
1357  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1358  */
1359 static void
1360 carp_setrun(struct carp_softc *sc, sa_family_t af)
1361 {
1362         struct timeval tv;
1363
1364         CARP_LOCK_ASSERT(sc);
1365
1366         if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
1367             sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
1368             (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) ||
1369             !V_carp_allow)
1370                 return;
1371
1372         switch (sc->sc_state) {
1373         case INIT:
1374                 carp_set_state(sc, BACKUP, "initialization complete");
1375                 carp_setrun(sc, 0);
1376                 break;
1377         case BACKUP:
1378                 callout_stop(&sc->sc_ad_tmo);
1379                 tv.tv_sec = 3 * sc->sc_advbase;
1380                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1381                 switch (af) {
1382 #ifdef INET
1383                 case AF_INET:
1384                         callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1385                             carp_master_down, sc);
1386                         break;
1387 #endif
1388 #ifdef INET6
1389                 case AF_INET6:
1390                         callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1391                             carp_master_down, sc);
1392                         break;
1393 #endif
1394                 default:
1395 #ifdef INET
1396                         if (sc->sc_naddrs)
1397                                 callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1398                                     carp_master_down, sc);
1399 #endif
1400 #ifdef INET6
1401                         if (sc->sc_naddrs6)
1402                                 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1403                                     carp_master_down, sc);
1404 #endif
1405                         break;
1406                 }
1407                 break;
1408         case MASTER:
1409                 tv.tv_sec = sc->sc_advbase;
1410                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1411                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
1412                     carp_send_ad, sc);
1413                 break;
1414         }
1415 }
1416
1417 /*
1418  * Setup multicast structures.
1419  */
1420 static int
1421 carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
1422 {
1423         struct ifnet *ifp = cif->cif_ifp;
1424         int error = 0;
1425
1426         switch (sa) {
1427 #ifdef INET
1428         case AF_INET:
1429             {
1430                 struct ip_moptions *imo = &cif->cif_imo;
1431                 struct in_mfilter *imf;
1432                 struct in_addr addr;
1433
1434                 if (ip_mfilter_first(&imo->imo_head) != NULL)
1435                         return (0);
1436
1437                 imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
1438                 ip_mfilter_init(&imo->imo_head);
1439                 imo->imo_multicast_vif = -1;
1440
1441                 addr.s_addr = htonl(INADDR_CARP_GROUP);
1442                 if ((error = in_joingroup(ifp, &addr, NULL,
1443                     &imf->imf_inm)) != 0) {
1444                         ip_mfilter_free(imf);
1445                         break;
1446                 }
1447
1448                 ip_mfilter_insert(&imo->imo_head, imf);
1449                 imo->imo_multicast_ifp = ifp;
1450                 imo->imo_multicast_ttl = CARP_DFLTTL;
1451                 imo->imo_multicast_loop = 0;
1452                 break;
1453            }
1454 #endif
1455 #ifdef INET6
1456         case AF_INET6:
1457             {
1458                 struct ip6_moptions *im6o = &cif->cif_im6o;
1459                 struct in6_mfilter *im6f[2];
1460                 struct in6_addr in6;
1461
1462                 if (ip6_mfilter_first(&im6o->im6o_head))
1463                         return (0);
1464
1465                 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
1466                 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
1467
1468                 ip6_mfilter_init(&im6o->im6o_head);
1469                 im6o->im6o_multicast_hlim = CARP_DFLTTL;
1470                 im6o->im6o_multicast_ifp = ifp;
1471
1472                 /* Join IPv6 CARP multicast group. */
1473                 bzero(&in6, sizeof(in6));
1474                 in6.s6_addr16[0] = htons(0xff02);
1475                 in6.s6_addr8[15] = 0x12;
1476                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1477                         ip6_mfilter_free(im6f[0]);
1478                         ip6_mfilter_free(im6f[1]);
1479                         break;
1480                 }
1481                 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) {
1482                         ip6_mfilter_free(im6f[0]);
1483                         ip6_mfilter_free(im6f[1]);
1484                         break;
1485                 }
1486
1487                 /* Join solicited multicast address. */
1488                 bzero(&in6, sizeof(in6));
1489                 in6.s6_addr16[0] = htons(0xff02);
1490                 in6.s6_addr32[1] = 0;
1491                 in6.s6_addr32[2] = htonl(1);
1492                 in6.s6_addr32[3] = 0;
1493                 in6.s6_addr8[12] = 0xff;
1494
1495                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1496                         ip6_mfilter_free(im6f[0]);
1497                         ip6_mfilter_free(im6f[1]);
1498                         break;
1499                 }
1500
1501                 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) {
1502                         in6_leavegroup(im6f[0]->im6f_in6m, NULL);
1503                         ip6_mfilter_free(im6f[0]);
1504                         ip6_mfilter_free(im6f[1]);
1505                         break;
1506                 }
1507                 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]);
1508                 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]);
1509                 break;
1510             }
1511 #endif
1512         }
1513
1514         return (error);
1515 }
1516
1517 /*
1518  * Free multicast structures.
1519  */
1520 static void
1521 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
1522 {
1523 #ifdef INET
1524         struct ip_moptions *imo = &cif->cif_imo;
1525         struct in_mfilter *imf;
1526 #endif
1527 #ifdef INET6
1528         struct ip6_moptions *im6o = &cif->cif_im6o;
1529         struct in6_mfilter *im6f;
1530 #endif
1531         sx_assert(&carp_sx, SA_XLOCKED);
1532
1533         switch (sa) {
1534 #ifdef INET
1535         case AF_INET:
1536                 if (cif->cif_naddrs != 0)
1537                         break;
1538
1539                 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
1540                         ip_mfilter_remove(&imo->imo_head, imf);
1541                         in_leavegroup(imf->imf_inm, NULL);
1542                         ip_mfilter_free(imf);
1543                 }
1544                 break;
1545 #endif
1546 #ifdef INET6
1547         case AF_INET6:
1548                 if (cif->cif_naddrs6 != 0)
1549                         break;
1550
1551                 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) {
1552                         ip6_mfilter_remove(&im6o->im6o_head, im6f);
1553                         in6_leavegroup(im6f->im6f_in6m, NULL);
1554                         ip6_mfilter_free(im6f);
1555                 }
1556                 break;
1557 #endif
1558         }
1559 }
1560
1561 int
1562 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
1563 {
1564         struct m_tag *mtag;
1565         struct carp_softc *sc;
1566
1567         if (!sa)
1568                 return (0);
1569
1570         switch (sa->sa_family) {
1571 #ifdef INET
1572         case AF_INET:
1573                 break;
1574 #endif
1575 #ifdef INET6
1576         case AF_INET6:
1577                 break;
1578 #endif
1579         default:
1580                 return (0);
1581         }
1582
1583         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
1584         if (mtag == NULL)
1585                 return (0);
1586
1587         bcopy(mtag + 1, &sc, sizeof(sc));
1588
1589         switch (sa->sa_family) {
1590         case AF_INET:
1591                 if (! IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)))
1592                         return (0);
1593                 break;
1594         case AF_INET6:
1595                 if (! IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6))
1596                         return (0);
1597                 break;
1598         default:
1599                 panic("Unknown af");
1600         }
1601
1602         /* Set the source MAC address to the Virtual Router MAC Address. */
1603         switch (ifp->if_type) {
1604         case IFT_ETHER:
1605         case IFT_BRIDGE:
1606         case IFT_L2VLAN: {
1607                         struct ether_header *eh;
1608
1609                         eh = mtod(m, struct ether_header *);
1610                         eh->ether_shost[0] = 0;
1611                         eh->ether_shost[1] = 0;
1612                         eh->ether_shost[2] = 0x5e;
1613                         eh->ether_shost[3] = 0;
1614                         eh->ether_shost[4] = 1;
1615                         eh->ether_shost[5] = sc->sc_vhid;
1616                 }
1617                 break;
1618         default:
1619                 printf("%s: carp is not supported for the %d interface type\n",
1620                     if_name(ifp), ifp->if_type);
1621                 return (EOPNOTSUPP);
1622         }
1623
1624         return (0);
1625 }
1626
1627 static struct carp_softc*
1628 carp_alloc(struct ifnet *ifp)
1629 {
1630         struct carp_softc *sc;
1631         struct carp_if *cif;
1632
1633         sx_assert(&carp_sx, SA_XLOCKED);
1634
1635         if ((cif = ifp->if_carp) == NULL)
1636                 cif = carp_alloc_if(ifp);
1637
1638         sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
1639
1640         sc->sc_advbase = CARP_DFLTINTV;
1641         sc->sc_vhid = -1;       /* required setting */
1642         sc->sc_init_counter = 1;
1643         sc->sc_state = INIT;
1644
1645         sc->sc_ifasiz = sizeof(struct ifaddr *);
1646         sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
1647         sc->sc_carpdev = ifp;
1648
1649         sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP);
1650         sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
1651         sc->sc_carpaddr6.s6_addr8[15] = 0x12;
1652
1653         CARP_LOCK_INIT(sc);
1654 #ifdef INET
1655         callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1656 #endif
1657 #ifdef INET6
1658         callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1659 #endif
1660         callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1661
1662         CIF_LOCK(cif);
1663         TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
1664         CIF_UNLOCK(cif);
1665
1666         mtx_lock(&carp_mtx);
1667         LIST_INSERT_HEAD(&carp_list, sc, sc_next);
1668         mtx_unlock(&carp_mtx);
1669
1670         return (sc);
1671 }
1672
1673 static void
1674 carp_grow_ifas(struct carp_softc *sc)
1675 {
1676         struct ifaddr **new;
1677
1678         new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO);
1679         CARP_LOCK(sc);
1680         bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
1681         free(sc->sc_ifas, M_CARP);
1682         sc->sc_ifas = new;
1683         sc->sc_ifasiz *= 2;
1684         CARP_UNLOCK(sc);
1685 }
1686
1687 static void
1688 carp_destroy(struct carp_softc *sc)
1689 {
1690         struct ifnet *ifp = sc->sc_carpdev;
1691         struct carp_if *cif = ifp->if_carp;
1692
1693         sx_assert(&carp_sx, SA_XLOCKED);
1694
1695         if (sc->sc_suppress)
1696                 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed");
1697         CARP_UNLOCK(sc);
1698
1699         CIF_LOCK(cif);
1700         TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
1701         CIF_UNLOCK(cif);
1702
1703         mtx_lock(&carp_mtx);
1704         LIST_REMOVE(sc, sc_next);
1705         mtx_unlock(&carp_mtx);
1706
1707         callout_drain(&sc->sc_ad_tmo);
1708 #ifdef INET
1709         callout_drain(&sc->sc_md_tmo);
1710 #endif
1711 #ifdef INET6
1712         callout_drain(&sc->sc_md6_tmo);
1713 #endif
1714         CARP_LOCK_DESTROY(sc);
1715
1716         free(sc->sc_ifas, M_CARP);
1717         free(sc, M_CARP);
1718 }
1719
1720 static struct carp_if*
1721 carp_alloc_if(struct ifnet *ifp)
1722 {
1723         struct carp_if *cif;
1724         int error;
1725
1726         cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
1727
1728         if ((error = ifpromisc(ifp, 1)) != 0)
1729                 printf("%s: ifpromisc(%s) failed: %d\n",
1730                     __func__, if_name(ifp), error);
1731         else
1732                 cif->cif_flags |= CIF_PROMISC;
1733
1734         CIF_LOCK_INIT(cif);
1735         cif->cif_ifp = ifp;
1736         TAILQ_INIT(&cif->cif_vrs);
1737
1738         IF_ADDR_WLOCK(ifp);
1739         ifp->if_carp = cif;
1740         if_ref(ifp);
1741         IF_ADDR_WUNLOCK(ifp);
1742
1743         return (cif);
1744 }
1745
1746 static void
1747 carp_free_if(struct carp_if *cif)
1748 {
1749         struct ifnet *ifp = cif->cif_ifp;
1750
1751         CIF_LOCK_ASSERT(cif);
1752         KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
1753             __func__));
1754
1755         IF_ADDR_WLOCK(ifp);
1756         ifp->if_carp = NULL;
1757         IF_ADDR_WUNLOCK(ifp);
1758
1759         CIF_LOCK_DESTROY(cif);
1760
1761         if (cif->cif_flags & CIF_PROMISC)
1762                 ifpromisc(ifp, 0);
1763         if_rele(ifp);
1764
1765         free(cif, M_CARP);
1766 }
1767
1768 static bool
1769 carp_carprcp(void *arg, struct carp_softc *sc, int priv)
1770 {
1771         struct carpreq *carpr = arg;
1772
1773         CARP_LOCK(sc);
1774         carpr->carpr_state = sc->sc_state;
1775         carpr->carpr_vhid = sc->sc_vhid;
1776         carpr->carpr_advbase = sc->sc_advbase;
1777         carpr->carpr_advskew = sc->sc_advskew;
1778         if (priv)
1779                 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
1780         else
1781                 bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
1782         CARP_UNLOCK(sc);
1783
1784         return (true);
1785 }
1786
1787 static int
1788 carp_ioctl_set(if_t ifp, struct carpkreq *carpr)
1789 {
1790         struct epoch_tracker et;
1791         struct carp_softc *sc = NULL;
1792         int error = 0;
1793
1794
1795         if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID ||
1796             carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0) {
1797                 return (EINVAL);
1798         }
1799
1800         if (ifp->if_carp) {
1801                 IFNET_FOREACH_CARP(ifp, sc)
1802                         if (sc->sc_vhid == carpr->carpr_vhid)
1803                                 break;
1804         }
1805         if (sc == NULL) {
1806                 sc = carp_alloc(ifp);
1807                 CARP_LOCK(sc);
1808                 sc->sc_vhid = carpr->carpr_vhid;
1809                 LLADDR(&sc->sc_addr)[0] = 0;
1810                 LLADDR(&sc->sc_addr)[1] = 0;
1811                 LLADDR(&sc->sc_addr)[2] = 0x5e;
1812                 LLADDR(&sc->sc_addr)[3] = 0;
1813                 LLADDR(&sc->sc_addr)[4] = 1;
1814                 LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
1815         } else
1816                 CARP_LOCK(sc);
1817         if (carpr->carpr_advbase > 0) {
1818                 if (carpr->carpr_advbase > 255 ||
1819                     carpr->carpr_advbase < CARP_DFLTINTV) {
1820                         error = EINVAL;
1821                         goto out;
1822                 }
1823                 sc->sc_advbase = carpr->carpr_advbase;
1824         }
1825         if (carpr->carpr_advskew >= 255) {
1826                 error = EINVAL;
1827                 goto out;
1828         }
1829         sc->sc_advskew = carpr->carpr_advskew;
1830         if (carpr->carpr_addr.s_addr != INADDR_ANY)
1831                 sc->sc_carpaddr = carpr->carpr_addr;
1832         if (! IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) {
1833                 memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6,
1834                     sizeof(sc->sc_carpaddr6));
1835         }
1836         if (carpr->carpr_key[0] != '\0') {
1837                 bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
1838                 carp_hmac_prepare(sc);
1839         }
1840         if (sc->sc_state != INIT &&
1841             carpr->carpr_state != sc->sc_state) {
1842                 switch (carpr->carpr_state) {
1843                 case BACKUP:
1844                         callout_stop(&sc->sc_ad_tmo);
1845                         carp_set_state(sc, BACKUP,
1846                             "user requested via ifconfig");
1847                         carp_setrun(sc, 0);
1848                         carp_delroute(sc);
1849                         break;
1850                 case MASTER:
1851                         NET_EPOCH_ENTER(et);
1852                         carp_master_down_locked(sc,
1853                             "user requested via ifconfig");
1854                         NET_EPOCH_EXIT(et);
1855                         break;
1856                 default:
1857                         break;
1858                 }
1859         }
1860
1861 out:
1862         CARP_UNLOCK(sc);
1863
1864         return (error);
1865 }
1866
1867 static int
1868 carp_ioctl_get(if_t ifp, struct ucred *cred, struct carpreq *carpr,
1869     bool (*outfn)(void *, struct carp_softc *, int), void *arg)
1870 {
1871         int priveleged;
1872         struct carp_softc *sc;
1873
1874         if (carpr->carpr_vhid < 0 || carpr->carpr_vhid > CARP_MAXVHID)
1875                 return (EINVAL);
1876         if (carpr->carpr_count < 1)
1877                 return (EMSGSIZE);
1878         if (ifp->if_carp == NULL)
1879                 return (ENOENT);
1880
1881         priveleged = (priv_check_cred(cred, PRIV_NETINET_CARP) == 0);
1882         if (carpr->carpr_vhid != 0) {
1883                 IFNET_FOREACH_CARP(ifp, sc)
1884                         if (sc->sc_vhid == carpr->carpr_vhid)
1885                                 break;
1886                 if (sc == NULL)
1887                         return (ENOENT);
1888
1889                 if (! outfn(arg, sc, priveleged))
1890                         return (ENOMEM);
1891                 carpr->carpr_count = 1;
1892         } else  {
1893                 int count;
1894
1895                 count = 0;
1896                 IFNET_FOREACH_CARP(ifp, sc)
1897                         count++;
1898
1899                 if (count > carpr->carpr_count)
1900                         return (EMSGSIZE);
1901
1902                 IFNET_FOREACH_CARP(ifp, sc) {
1903                         if (! outfn(arg, sc, priveleged))
1904                                 return (ENOMEM);
1905                         carpr->carpr_count = count;
1906                 }
1907         }
1908
1909         return (0);
1910 }
1911
1912 int
1913 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
1914 {
1915         struct carpreq carpr;
1916         struct carpkreq carprk = { };
1917         struct ifnet *ifp;
1918         int error = 0;
1919
1920         if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr)))
1921                 return (error);
1922
1923         ifp = ifunit_ref(ifr->ifr_name);
1924         if ((error = carp_is_supported_if(ifp)) != 0)
1925                 goto out;
1926
1927         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1928                 error = EADDRNOTAVAIL;
1929                 goto out;
1930         }
1931
1932         sx_xlock(&carp_sx);
1933         switch (cmd) {
1934         case SIOCSVH:
1935                 if ((error = priv_check(td, PRIV_NETINET_CARP)))
1936                         break;
1937
1938                 memcpy(&carprk, &carpr, sizeof(carpr));
1939                 error = carp_ioctl_set(ifp, &carprk);
1940                 break;
1941
1942         case SIOCGVH:
1943                 error = carp_ioctl_get(ifp, td->td_ucred, &carpr,
1944                     carp_carprcp, &carpr);
1945                 if (error == 0) {
1946                         error = copyout(&carpr,
1947                             (char *)ifr_data_get_ptr(ifr),
1948                             carpr.carpr_count * sizeof(carpr));
1949                 }
1950                 break;
1951         default:
1952                 error = EINVAL;
1953         }
1954         sx_xunlock(&carp_sx);
1955
1956 out:
1957         if (ifp != NULL)
1958                 if_rele(ifp);
1959
1960         return (error);
1961 }
1962
1963 static int
1964 carp_get_vhid(struct ifaddr *ifa)
1965 {
1966
1967         if (ifa == NULL || ifa->ifa_carp == NULL)
1968                 return (0);
1969
1970         return (ifa->ifa_carp->sc_vhid);
1971 }
1972
1973 int
1974 carp_attach(struct ifaddr *ifa, int vhid)
1975 {
1976         struct ifnet *ifp = ifa->ifa_ifp;
1977         struct carp_if *cif = ifp->if_carp;
1978         struct carp_softc *sc;
1979         int index, error;
1980
1981         KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa));
1982
1983         switch (ifa->ifa_addr->sa_family) {
1984 #ifdef INET
1985         case AF_INET:
1986 #endif
1987 #ifdef INET6
1988         case AF_INET6:
1989 #endif
1990                 break;
1991         default:
1992                 return (EPROTOTYPE);
1993         }
1994
1995         sx_xlock(&carp_sx);
1996         if (ifp->if_carp == NULL) {
1997                 sx_xunlock(&carp_sx);
1998                 return (ENOPROTOOPT);
1999         }
2000
2001         IFNET_FOREACH_CARP(ifp, sc)
2002                 if (sc->sc_vhid == vhid)
2003                         break;
2004         if (sc == NULL) {
2005                 sx_xunlock(&carp_sx);
2006                 return (ENOENT);
2007         }
2008
2009         error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
2010         if (error) {
2011                 CIF_FREE(cif);
2012                 sx_xunlock(&carp_sx);
2013                 return (error);
2014         }
2015
2016         index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
2017         if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
2018                 carp_grow_ifas(sc);
2019
2020         switch (ifa->ifa_addr->sa_family) {
2021 #ifdef INET
2022         case AF_INET:
2023                 cif->cif_naddrs++;
2024                 sc->sc_naddrs++;
2025                 break;
2026 #endif
2027 #ifdef INET6
2028         case AF_INET6:
2029                 cif->cif_naddrs6++;
2030                 sc->sc_naddrs6++;
2031                 break;
2032 #endif
2033         }
2034
2035         ifa_ref(ifa);
2036
2037         CARP_LOCK(sc);
2038         sc->sc_ifas[index - 1] = ifa;
2039         ifa->ifa_carp = sc;
2040         carp_hmac_prepare(sc);
2041         carp_sc_state(sc);
2042         CARP_UNLOCK(sc);
2043
2044         sx_xunlock(&carp_sx);
2045
2046         return (0);
2047 }
2048
2049 void
2050 carp_detach(struct ifaddr *ifa, bool keep_cif)
2051 {
2052         struct ifnet *ifp = ifa->ifa_ifp;
2053         struct carp_if *cif = ifp->if_carp;
2054         struct carp_softc *sc = ifa->ifa_carp;
2055         int i, index;
2056
2057         KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
2058
2059         sx_xlock(&carp_sx);
2060
2061         CARP_LOCK(sc);
2062         /* Shift array. */
2063         index = sc->sc_naddrs + sc->sc_naddrs6;
2064         for (i = 0; i < index; i++)
2065                 if (sc->sc_ifas[i] == ifa)
2066                         break;
2067         KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
2068         for (; i < index - 1; i++)
2069                 sc->sc_ifas[i] = sc->sc_ifas[i+1];
2070         sc->sc_ifas[index - 1] = NULL;
2071
2072         switch (ifa->ifa_addr->sa_family) {
2073 #ifdef INET
2074         case AF_INET:
2075                 cif->cif_naddrs--;
2076                 sc->sc_naddrs--;
2077                 break;
2078 #endif
2079 #ifdef INET6
2080         case AF_INET6:
2081                 cif->cif_naddrs6--;
2082                 sc->sc_naddrs6--;
2083                 break;
2084 #endif
2085         }
2086
2087         carp_ifa_delroute(ifa);
2088         carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
2089
2090         ifa->ifa_carp = NULL;
2091         ifa_free(ifa);
2092
2093         carp_hmac_prepare(sc);
2094         carp_sc_state(sc);
2095
2096         if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)
2097                 carp_destroy(sc);
2098         else
2099                 CARP_UNLOCK(sc);
2100
2101         if (!keep_cif)
2102                 CIF_FREE(cif);
2103
2104         sx_xunlock(&carp_sx);
2105 }
2106
2107 static void
2108 carp_set_state(struct carp_softc *sc, int state, const char *reason)
2109 {
2110
2111         CARP_LOCK_ASSERT(sc);
2112
2113         if (sc->sc_state != state) {
2114                 const char *carp_states[] = { CARP_STATES };
2115                 char subsys[IFNAMSIZ+5];
2116
2117                 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
2118                     if_name(sc->sc_carpdev));
2119
2120                 CARP_LOG("%s: %s -> %s (%s)\n", subsys,
2121                     carp_states[sc->sc_state], carp_states[state], reason);
2122
2123                 sc->sc_state = state;
2124
2125                 devctl_notify("CARP", subsys, carp_states[state], NULL);
2126         }
2127 }
2128
2129 static void
2130 carp_linkstate(struct ifnet *ifp)
2131 {
2132         struct carp_softc *sc;
2133
2134         CIF_LOCK(ifp->if_carp);
2135         IFNET_FOREACH_CARP(ifp, sc) {
2136                 CARP_LOCK(sc);
2137                 carp_sc_state(sc);
2138                 CARP_UNLOCK(sc);
2139         }
2140         CIF_UNLOCK(ifp->if_carp);
2141 }
2142
2143 static void
2144 carp_sc_state(struct carp_softc *sc)
2145 {
2146
2147         CARP_LOCK_ASSERT(sc);
2148
2149         if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
2150             !(sc->sc_carpdev->if_flags & IFF_UP) ||
2151             !V_carp_allow) {
2152                 callout_stop(&sc->sc_ad_tmo);
2153 #ifdef INET
2154                 callout_stop(&sc->sc_md_tmo);
2155 #endif
2156 #ifdef INET6
2157                 callout_stop(&sc->sc_md6_tmo);
2158 #endif
2159                 carp_set_state(sc, INIT, "hardware interface down");
2160                 carp_setrun(sc, 0);
2161                 if (!sc->sc_suppress)
2162                         carp_demote_adj(V_carp_ifdown_adj, "interface down");
2163                 sc->sc_suppress = 1;
2164         } else {
2165                 carp_set_state(sc, INIT, "hardware interface up");
2166                 carp_setrun(sc, 0);
2167                 if (sc->sc_suppress)
2168                         carp_demote_adj(-V_carp_ifdown_adj, "interface up");
2169                 sc->sc_suppress = 0;
2170         }
2171 }
2172
2173 static void
2174 carp_demote_adj(int adj, char *reason)
2175 {
2176         atomic_add_int(&V_carp_demotion, adj);
2177         CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason);
2178         taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
2179 }
2180
2181 static int
2182 carp_allow_sysctl(SYSCTL_HANDLER_ARGS)
2183 {
2184         int new, error;
2185         struct carp_softc *sc;
2186
2187         new = V_carp_allow;
2188         error = sysctl_handle_int(oidp, &new, 0, req);
2189         if (error || !req->newptr)
2190                 return (error);
2191
2192         if (V_carp_allow != new) {
2193                 V_carp_allow = new;
2194
2195                 mtx_lock(&carp_mtx);
2196                 LIST_FOREACH(sc, &carp_list, sc_next) {
2197                         CARP_LOCK(sc);
2198                         if (curvnet == sc->sc_carpdev->if_vnet)
2199                                 carp_sc_state(sc);
2200                         CARP_UNLOCK(sc);
2201                 }
2202                 mtx_unlock(&carp_mtx);
2203         }
2204
2205         return (0);
2206 }
2207
2208 static int
2209 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS)
2210 {
2211         int new, error;
2212
2213         new = V_carp_dscp;
2214         error = sysctl_handle_int(oidp, &new, 0, req);
2215         if (error || !req->newptr)
2216                 return (error);
2217
2218         if (new < 0 || new > 63)
2219                 return (EINVAL);
2220
2221         V_carp_dscp = new;
2222
2223         return (0);
2224 }
2225
2226 static int
2227 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
2228 {
2229         int new, error;
2230
2231         new = V_carp_demotion;
2232         error = sysctl_handle_int(oidp, &new, 0, req);
2233         if (error || !req->newptr)
2234                 return (error);
2235
2236         carp_demote_adj(new, "sysctl");
2237
2238         return (0);
2239 }
2240
2241 static int
2242 nlattr_get_carp_key(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target)
2243 {
2244         if (__predict_false(NLA_DATA_LEN(nla) > CARP_KEY_LEN))
2245                 return (EINVAL);
2246
2247         memcpy(target, NLA_DATA_CONST(nla), NLA_DATA_LEN(nla));
2248         return (0);
2249 }
2250
2251 struct carp_nl_send_args {
2252         struct nlmsghdr *hdr;
2253         struct nl_pstate *npt;
2254 };
2255
2256 static bool
2257 carp_nl_send(void *arg, struct carp_softc *sc, int priv)
2258 {
2259         struct carp_nl_send_args *nlsa = arg;
2260         struct nlmsghdr *hdr = nlsa->hdr;
2261         struct nl_pstate *npt = nlsa->npt;
2262         struct nl_writer *nw = npt->nw;
2263         struct genlmsghdr *ghdr_new;
2264
2265         if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
2266                 nlmsg_abort(nw);
2267                 return (false);
2268         }
2269
2270         ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
2271         if (ghdr_new == NULL) {
2272                 nlmsg_abort(nw);
2273                 return (false);
2274         }
2275
2276         ghdr_new->cmd = CARP_NL_CMD_GET;
2277         ghdr_new->version = 0;
2278         ghdr_new->reserved = 0;
2279
2280         CARP_LOCK(sc);
2281
2282         nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid);
2283         nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state);
2284         nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase);
2285         nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew);
2286         nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr);
2287         nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6);
2288
2289         if (priv)
2290                 nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), sc->sc_key);
2291
2292         CARP_UNLOCK(sc);
2293
2294         if (! nlmsg_end(nw)) {
2295                 nlmsg_abort(nw);
2296                 return (false);
2297         }
2298
2299         return (true);
2300 }
2301
2302 struct nl_carp_parsed {
2303         unsigned int    ifindex;
2304         uint32_t        state;
2305         uint32_t        vhid;
2306         int32_t         advbase;
2307         int32_t         advskew;
2308         char            key[CARP_KEY_LEN];
2309         struct in_addr  addr;
2310         struct in6_addr addr6;
2311 };
2312
2313 #define _IN(_field)     offsetof(struct genlmsghdr, _field)
2314 #define _OUT(_field)    offsetof(struct nl_carp_parsed, _field)
2315
2316 static const struct nlattr_parser nla_p_set[] = {
2317         { .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 },
2318         { .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 },
2319         { .type = CARP_NL_ADVBASE, .off = _OUT(advbase), .cb = nlattr_get_uint32 },
2320         { .type = CARP_NL_ADVSKEW, .off = _OUT(advskew), .cb = nlattr_get_uint32 },
2321         { .type = CARP_NL_KEY, .off = _OUT(key), .cb = nlattr_get_carp_key },
2322         { .type = CARP_NL_IFINDEX, .off = _OUT(ifindex), .cb = nlattr_get_uint32 },
2323         { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr },
2324         { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr },
2325 };
2326 static const struct nlfield_parser nlf_p_set[] = {
2327 };
2328 NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_set, nla_p_set);
2329 #undef _IN
2330 #undef _OUT
2331
2332
2333 static int
2334 carp_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt)
2335 {
2336         struct nl_carp_parsed attrs = { };
2337         struct carp_nl_send_args args;
2338         struct carpreq carpr = { };
2339         struct epoch_tracker et;
2340         if_t ifp;
2341         int error;
2342
2343         error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs);
2344         if (error != 0)
2345                 return (error);
2346
2347         NET_EPOCH_ENTER(et);
2348         ifp = ifnet_byindex_ref(attrs.ifindex);
2349         NET_EPOCH_EXIT(et);
2350
2351         if ((error = carp_is_supported_if(ifp)) != 0)
2352                 goto out;
2353
2354         hdr->nlmsg_flags |= NLM_F_MULTI;
2355         args.hdr = hdr;
2356         args.npt = npt;
2357
2358         carpr.carpr_vhid = attrs.vhid;
2359         carpr.carpr_count = CARP_MAXVHID;
2360
2361         sx_xlock(&carp_sx);
2362         error = carp_ioctl_get(ifp, nlp_get_cred(npt->nlp), &carpr,
2363             carp_nl_send, &args);
2364         sx_xunlock(&carp_sx);
2365
2366         if (! nlmsg_end_dump(npt->nw, error, hdr))
2367                 error = ENOMEM;
2368
2369 out:
2370         if (ifp != NULL)
2371                 if_rele(ifp);
2372
2373         return (error);
2374 }
2375
2376 static int
2377 carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
2378 {
2379         struct nl_carp_parsed attrs = { };
2380         struct carpkreq carpr;
2381         struct epoch_tracker et;
2382         if_t ifp;
2383         int error;
2384
2385         error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs);
2386         if (error != 0)
2387                 return (error);
2388
2389         if (attrs.vhid <= 0 || attrs.vhid > CARP_MAXVHID)
2390                 return (EINVAL);
2391         if (attrs.state > CARP_MAXSTATE)
2392                 return (EINVAL);
2393         if (attrs.advbase < 0 || attrs.advskew < 0)
2394                 return (EINVAL);
2395         if (attrs.advbase > 255)
2396                 return (EINVAL);
2397         if (attrs.advskew >= 255)
2398                 return (EINVAL);
2399
2400         NET_EPOCH_ENTER(et);
2401         ifp = ifnet_byindex_ref(attrs.ifindex);
2402         NET_EPOCH_EXIT(et);
2403
2404         if ((error = carp_is_supported_if(ifp)) != 0)
2405                 goto out;
2406
2407         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2408                 error = EADDRNOTAVAIL;
2409                 goto out;
2410         }
2411
2412         carpr.carpr_count = 1;
2413         carpr.carpr_vhid = attrs.vhid;
2414         carpr.carpr_state = attrs.state;
2415         carpr.carpr_advbase = attrs.advbase;
2416         carpr.carpr_advskew = attrs.advskew;
2417         carpr.carpr_addr = attrs.addr;
2418         carpr.carpr_addr6 = attrs.addr6;
2419
2420         memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key));
2421
2422         sx_xlock(&carp_sx);
2423         error = carp_ioctl_set(ifp, &carpr);
2424         sx_xunlock(&carp_sx);
2425
2426 out:
2427         if (ifp != NULL)
2428                 if_rele(ifp);
2429
2430         return (error);
2431 }
2432
2433 static const struct nlhdr_parser *all_parsers[] = {
2434         &carp_parser
2435 };
2436
2437 static const struct genl_cmd carp_cmds[] = {
2438         {
2439                 .cmd_num = CARP_NL_CMD_GET,
2440                 .cmd_name = "SIOCGVH",
2441                 .cmd_cb = carp_nl_get,
2442                 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP |
2443                     GENL_CMD_CAP_HASPOL,
2444         },
2445         {
2446                 .cmd_num = CARP_NL_CMD_SET,
2447                 .cmd_name = "SIOCSVH",
2448                 .cmd_cb = carp_nl_set,
2449                 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL,
2450                 .cmd_priv = PRIV_NETINET_CARP,
2451         },
2452 };
2453
2454 static void
2455 carp_nl_register(void)
2456 {
2457         bool ret __diagused;
2458         int family_id __diagused;
2459
2460         NL_VERIFY_PARSERS(all_parsers);
2461         family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2,
2462             CARP_NL_CMD_MAX);
2463         MPASS(family_id != 0);
2464
2465         ret = genl_register_cmds(CARP_NL_FAMILY_NAME, carp_cmds,
2466             NL_ARRAY_LEN(carp_cmds));
2467         MPASS(ret);
2468 }
2469
2470 static void
2471 carp_nl_unregister(void)
2472 {
2473         genl_unregister_family(CARP_NL_FAMILY_NAME);
2474 }
2475
2476 static void
2477 carp_mod_cleanup(void)
2478 {
2479
2480         carp_nl_unregister();
2481
2482 #ifdef INET
2483         (void)ipproto_unregister(IPPROTO_CARP);
2484         carp_iamatch_p = NULL;
2485 #endif
2486 #ifdef INET6
2487         (void)ip6proto_unregister(IPPROTO_CARP);
2488         carp_iamatch6_p = NULL;
2489         carp_macmatch6_p = NULL;
2490 #endif
2491         carp_ioctl_p = NULL;
2492         carp_attach_p = NULL;
2493         carp_detach_p = NULL;
2494         carp_get_vhid_p = NULL;
2495         carp_linkstate_p = NULL;
2496         carp_forus_p = NULL;
2497         carp_output_p = NULL;
2498         carp_demote_adj_p = NULL;
2499         carp_master_p = NULL;
2500         mtx_unlock(&carp_mtx);
2501         taskqueue_drain(taskqueue_swi, &carp_sendall_task);
2502         mtx_destroy(&carp_mtx);
2503         sx_destroy(&carp_sx);
2504 }
2505
2506 static void
2507 ipcarp_sysinit(void)
2508 {
2509
2510         /* Load allow as tunable so to postpone carp start after module load */
2511         TUNABLE_INT_FETCH("net.inet.carp.allow", &V_carp_allow);
2512 }
2513 VNET_SYSINIT(ip_carp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipcarp_sysinit, NULL);
2514
2515 static int
2516 carp_mod_load(void)
2517 {
2518         int err;
2519
2520         mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
2521         sx_init(&carp_sx, "carp_sx");
2522         LIST_INIT(&carp_list);
2523         carp_get_vhid_p = carp_get_vhid;
2524         carp_forus_p = carp_forus;
2525         carp_output_p = carp_output;
2526         carp_linkstate_p = carp_linkstate;
2527         carp_ioctl_p = carp_ioctl;
2528         carp_attach_p = carp_attach;
2529         carp_detach_p = carp_detach;
2530         carp_demote_adj_p = carp_demote_adj;
2531         carp_master_p = carp_master;
2532 #ifdef INET6
2533         carp_iamatch6_p = carp_iamatch6;
2534         carp_macmatch6_p = carp_macmatch6;
2535         err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL);
2536         if (err) {
2537                 printf("carp: error %d registering with INET6\n", err);
2538                 carp_mod_cleanup();
2539                 return (err);
2540         }
2541 #endif
2542 #ifdef INET
2543         carp_iamatch_p = carp_iamatch;
2544         err = ipproto_register(IPPROTO_CARP, carp_input, NULL);
2545         if (err) {
2546                 printf("carp: error %d registering with INET\n", err);
2547                 carp_mod_cleanup();
2548                 return (err);
2549         }
2550 #endif
2551
2552         carp_nl_register();
2553
2554         return (0);
2555 }
2556
2557 static int
2558 carp_modevent(module_t mod, int type, void *data)
2559 {
2560         switch (type) {
2561         case MOD_LOAD:
2562                 return carp_mod_load();
2563                 /* NOTREACHED */
2564         case MOD_UNLOAD:
2565                 mtx_lock(&carp_mtx);
2566                 if (LIST_EMPTY(&carp_list))
2567                         carp_mod_cleanup();
2568                 else {
2569                         mtx_unlock(&carp_mtx);
2570                         return (EBUSY);
2571                 }
2572                 break;
2573
2574         default:
2575                 return (EINVAL);
2576         }
2577
2578         return (0);
2579 }
2580
2581 static moduledata_t carp_mod = {
2582         "carp",
2583         carp_modevent,
2584         0
2585 };
2586
2587 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);