]> CyberLeo.Net >> Repos - FreeBSD/releng/8.2.git/blob - sys/netinet/ip_carp.c
MFC r216919:
[FreeBSD/releng/8.2.git] / sys / netinet / ip_carp.c
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include "opt_bpf.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/conf.h>
38 #include <sys/kernel.h>
39 #include <sys/limits.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/module.h>
43 #include <sys/time.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/protosw.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/signalvar.h>
50 #include <sys/filio.h>
51 #include <sys/sockio.h>
52
53 #include <sys/socket.h>
54 #include <sys/vnode.h>
55
56 #include <machine/stdarg.h>
57
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/fddi.h>
61 #include <net/iso88025.h>
62 #include <net/if.h>
63 #include <net/if_clone.h>
64 #include <net/if_dl.h>
65 #include <net/if_types.h>
66 #include <net/route.h>
67 #include <net/vnet.h>
68
69 #ifdef INET
70 #include <netinet/in.h>
71 #include <netinet/in_var.h>
72 #include <netinet/in_systm.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip_var.h>
75 #include <netinet/if_ether.h>
76 #include <machine/in_cksum.h>
77 #endif
78
79 #ifdef INET6
80 #include <netinet/icmp6.h>
81 #include <netinet/ip6.h>
82 #include <netinet6/ip6protosw.h>
83 #include <netinet6/ip6_var.h>
84 #include <netinet6/scope6_var.h>
85 #include <netinet6/nd6.h>
86 #endif
87
88 #include <crypto/sha1.h>
89 #include <netinet/ip_carp.h>
90
91 #define CARP_IFNAME     "carp"
92 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
93 SYSCTL_DECL(_net_inet_carp);
94
95 struct carp_softc {
96         struct ifnet            *sc_ifp;        /* Interface clue */
97         struct ifnet            *sc_carpdev;    /* Pointer to parent interface */
98         struct in_ifaddr        *sc_ia;         /* primary iface address */
99         struct ip_moptions       sc_imo;
100 #ifdef INET6
101         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
102         struct ip6_moptions      sc_im6o;
103 #endif /* INET6 */
104         TAILQ_ENTRY(carp_softc)  sc_list;
105
106         enum { INIT = 0, BACKUP, MASTER }       sc_state;
107
108         int                      sc_flags_backup;
109         int                      sc_suppress;
110
111         int                      sc_sendad_errors;
112 #define CARP_SENDAD_MAX_ERRORS  3
113         int                      sc_sendad_success;
114 #define CARP_SENDAD_MIN_SUCCESS 3
115
116         int                      sc_vhid;
117         int                      sc_advskew;
118         int                      sc_naddrs;
119         int                      sc_naddrs6;
120         int                      sc_advbase;    /* seconds */
121         int                      sc_init_counter;
122         u_int64_t                sc_counter;
123
124         /* authentication */
125 #define CARP_HMAC_PAD   64
126         unsigned char sc_key[CARP_KEY_LEN];
127         unsigned char sc_pad[CARP_HMAC_PAD];
128         SHA1_CTX sc_sha1;
129
130         struct callout           sc_ad_tmo;     /* advertisement timeout */
131         struct callout           sc_md_tmo;     /* master down timeout */
132         struct callout           sc_md6_tmo;    /* master down timeout */
133         
134         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
135 };
136 #define SC2IFP(sc)      ((sc)->sc_ifp)
137
138 int carp_suppress_preempt = 0;
139 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 };    /* XXX for now */
140 SYSCTL_NODE(_net_inet, IPPROTO_CARP,    carp,   CTLFLAG_RW, 0,  "CARP");
141 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
142     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
143 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
144     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
145 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
146     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
147 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
148     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
149 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
150     &carp_suppress_preempt, 0, "Preemption is suppressed");
151
152 struct carpstats carpstats;
153 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
154     &carpstats, carpstats,
155     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
156
157 struct carp_if {
158         TAILQ_HEAD(, carp_softc) vhif_vrs;
159         int vhif_nvrs;
160
161         struct ifnet    *vhif_ifp;
162         struct mtx       vhif_mtx;
163 };
164
165 #define CARP_INET       0
166 #define CARP_INET6      1
167 static int proto_reg[] = {-1, -1};
168
169 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */
170 #define SC2CIF(sc)              ((struct carp_if *)(sc)->sc_carpdev->if_carp)
171
172 /* lock per carp_if queue */
173 #define CARP_LOCK_INIT(cif)     mtx_init(&(cif)->vhif_mtx, "carp_if",   \
174         NULL, MTX_DEF)
175 #define CARP_LOCK_DESTROY(cif)  mtx_destroy(&(cif)->vhif_mtx)
176 #define CARP_LOCK_ASSERT(cif)   mtx_assert(&(cif)->vhif_mtx, MA_OWNED)
177 #define CARP_LOCK(cif)          mtx_lock(&(cif)->vhif_mtx)
178 #define CARP_UNLOCK(cif)        mtx_unlock(&(cif)->vhif_mtx)
179
180 #define CARP_SCLOCK(sc)         mtx_lock(&SC2CIF(sc)->vhif_mtx)
181 #define CARP_SCUNLOCK(sc)       mtx_unlock(&SC2CIF(sc)->vhif_mtx)
182 #define CARP_SCLOCK_ASSERT(sc)  mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED)
183
184 #define CARP_LOG(...)   do {                            \
185         if (carp_opts[CARPCTL_LOG] > 0)                 \
186                 log(LOG_INFO, __VA_ARGS__);             \
187 } while (0)
188
189 #define CARP_DEBUG(...) do {                            \
190         if (carp_opts[CARPCTL_LOG] > 1)                 \
191                 log(LOG_DEBUG, __VA_ARGS__);            \
192 } while (0)
193
194 static void     carp_hmac_prepare(struct carp_softc *);
195 static void     carp_hmac_generate(struct carp_softc *, u_int32_t *,
196                     unsigned char *);
197 static int      carp_hmac_verify(struct carp_softc *, u_int32_t *,
198                     unsigned char *);
199 static void     carp_setroute(struct carp_softc *, int);
200 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
201 static int      carp_clone_create(struct if_clone *, int, caddr_t);
202 static void     carp_clone_destroy(struct ifnet *);
203 static void     carpdetach(struct carp_softc *, int);
204 static int      carp_prepare_ad(struct mbuf *, struct carp_softc *,
205                     struct carp_header *);
206 static void     carp_send_ad_all(void);
207 static void     carp_send_ad(void *);
208 static void     carp_send_ad_locked(struct carp_softc *);
209 static void     carp_send_arp(struct carp_softc *);
210 static void     carp_master_down(void *);
211 static void     carp_master_down_locked(struct carp_softc *);
212 static int      carp_ioctl(struct ifnet *, u_long, caddr_t);
213 static int      carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
214                     struct route *);
215 static void     carp_start(struct ifnet *);
216 static void     carp_setrun(struct carp_softc *, sa_family_t);
217 static void     carp_set_state(struct carp_softc *, int);
218 static int      carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
219 enum    { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
220
221 static void     carp_multicast_cleanup(struct carp_softc *);
222 static int      carp_set_addr(struct carp_softc *, struct sockaddr_in *);
223 static int      carp_del_addr(struct carp_softc *, struct sockaddr_in *);
224 static void     carp_carpdev_state_locked(struct carp_if *);
225 static void     carp_sc_state_locked(struct carp_softc *);
226 #ifdef INET6
227 static void     carp_send_na(struct carp_softc *);
228 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
229 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
230 static void     carp_multicast6_cleanup(struct carp_softc *);
231 #endif
232
233 static LIST_HEAD(, carp_softc) carpif_list;
234 static struct mtx carp_mtx;
235 IFC_SIMPLE_DECLARE(carp, 0);
236
237 static eventhandler_tag if_detach_event_tag;
238
239 static __inline u_int16_t
240 carp_cksum(struct mbuf *m, int len)
241 {
242         return (in_cksum(m, len));
243 }
244
245 static void
246 carp_hmac_prepare(struct carp_softc *sc)
247 {
248         u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
249         u_int8_t vhid = sc->sc_vhid & 0xff;
250         struct ifaddr *ifa;
251         int i, found;
252 #ifdef INET
253         struct in_addr last, cur, in;
254 #endif
255 #ifdef INET6
256         struct in6_addr last6, cur6, in6;
257 #endif
258
259         if (sc->sc_carpdev)
260                 CARP_SCLOCK(sc);
261
262         /* XXX: possible race here */
263
264         /* compute ipad from key */
265         bzero(sc->sc_pad, sizeof(sc->sc_pad));
266         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
267         for (i = 0; i < sizeof(sc->sc_pad); i++)
268                 sc->sc_pad[i] ^= 0x36;
269
270         /* precompute first part of inner hash */
271         SHA1Init(&sc->sc_sha1);
272         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
273         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
274         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
275         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
276 #ifdef INET
277         cur.s_addr = 0;
278         do {
279                 found = 0;
280                 last = cur;
281                 cur.s_addr = 0xffffffff;
282                 IF_ADDR_LOCK(SC2IFP(sc));
283                 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
284                         in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
285                         if (ifa->ifa_addr->sa_family == AF_INET &&
286                             ntohl(in.s_addr) > ntohl(last.s_addr) &&
287                             ntohl(in.s_addr) < ntohl(cur.s_addr)) {
288                                 cur.s_addr = in.s_addr;
289                                 found++;
290                         }
291                 }
292                 IF_ADDR_UNLOCK(SC2IFP(sc));
293                 if (found)
294                         SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
295         } while (found);
296 #endif /* INET */
297 #ifdef INET6
298         memset(&cur6, 0, sizeof(cur6));
299         do {
300                 found = 0;
301                 last6 = cur6;
302                 memset(&cur6, 0xff, sizeof(cur6));
303                 IF_ADDR_LOCK(SC2IFP(sc));
304                 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
305                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
306                         if (IN6_IS_SCOPE_EMBED(&in6))
307                                 in6.s6_addr16[1] = 0;
308                         if (ifa->ifa_addr->sa_family == AF_INET6 &&
309                             memcmp(&in6, &last6, sizeof(in6)) > 0 &&
310                             memcmp(&in6, &cur6, sizeof(in6)) < 0) {
311                                 cur6 = in6;
312                                 found++;
313                         }
314                 }
315                 IF_ADDR_UNLOCK(SC2IFP(sc));
316                 if (found)
317                         SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
318         } while (found);
319 #endif /* INET6 */
320
321         /* convert ipad to opad */
322         for (i = 0; i < sizeof(sc->sc_pad); i++)
323                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
324
325         if (sc->sc_carpdev)
326                 CARP_SCUNLOCK(sc);
327 }
328
329 static void
330 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
331     unsigned char md[20])
332 {
333         SHA1_CTX sha1ctx;
334
335         /* fetch first half of inner hash */
336         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
337
338         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
339         SHA1Final(md, &sha1ctx);
340
341         /* outer hash */
342         SHA1Init(&sha1ctx);
343         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
344         SHA1Update(&sha1ctx, md, 20);
345         SHA1Final(md, &sha1ctx);
346 }
347
348 static int
349 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
350     unsigned char md[20])
351 {
352         unsigned char md2[20];
353
354         CARP_SCLOCK_ASSERT(sc);
355
356         carp_hmac_generate(sc, counter, md2);
357
358         return (bcmp(md, md2, sizeof(md2)));
359 }
360
361 static void
362 carp_setroute(struct carp_softc *sc, int cmd)
363 {
364         struct ifaddr *ifa;
365         int s;
366
367         if (sc->sc_carpdev)
368                 CARP_SCLOCK_ASSERT(sc);
369
370         s = splnet();
371         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
372                 if (ifa->ifa_addr->sa_family == AF_INET &&
373                     sc->sc_carpdev != NULL) {
374                         int count = carp_addrcount(
375                             (struct carp_if *)sc->sc_carpdev->if_carp,
376                             ifatoia(ifa), CARP_COUNT_MASTER);
377
378                         if ((cmd == RTM_ADD && count == 1) ||
379                             (cmd == RTM_DELETE && count == 0))
380                                 rtinit(ifa, cmd, RTF_UP | RTF_HOST);
381                 }
382         }
383         splx(s);
384 }
385
386 static int
387 carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
388 {
389
390         struct carp_softc *sc;
391         struct ifnet *ifp;
392
393         sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
394         ifp = SC2IFP(sc) = if_alloc(IFT_ETHER);
395         if (ifp == NULL) {
396                 free(sc, M_CARP);
397                 return (ENOSPC);
398         }
399         
400         sc->sc_flags_backup = 0;
401         sc->sc_suppress = 0;
402         sc->sc_advbase = CARP_DFLTINTV;
403         sc->sc_vhid = -1;       /* required setting */
404         sc->sc_advskew = 0;
405         sc->sc_init_counter = 1;
406         sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
407         sc->sc_imo.imo_membership = (struct in_multi **)malloc(
408             (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
409             M_WAITOK);
410         sc->sc_imo.imo_mfilters = NULL;
411         sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
412         sc->sc_imo.imo_multicast_vif = -1;
413 #ifdef INET6
414         sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc(
415             (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
416             M_WAITOK);
417         sc->sc_im6o.im6o_mfilters = NULL;
418         sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
419         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
420 #endif
421
422         callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
423         callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
424         callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE);
425         
426         ifp->if_softc = sc;
427         if_initname(ifp, CARP_IFNAME, unit);
428         ifp->if_mtu = ETHERMTU;
429         ifp->if_flags = IFF_LOOPBACK;
430         ifp->if_ioctl = carp_ioctl;
431         ifp->if_output = carp_looutput;
432         ifp->if_start = carp_start;
433         ifp->if_type = IFT_CARP;
434         ifp->if_snd.ifq_maxlen = ifqmaxlen;
435         ifp->if_hdrlen = 0;
436         if_attach(ifp);
437         bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t));
438         mtx_lock(&carp_mtx);
439         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
440         mtx_unlock(&carp_mtx);
441         return (0);
442 }
443
444 static void
445 carp_clone_destroy(struct ifnet *ifp)
446 {
447         struct carp_softc *sc = ifp->if_softc;
448
449         if (sc->sc_carpdev)
450                 CARP_SCLOCK(sc);
451         carpdetach(sc, 1);      /* Returns unlocked. */
452
453         mtx_lock(&carp_mtx);
454         LIST_REMOVE(sc, sc_next);
455         mtx_unlock(&carp_mtx);
456         bpfdetach(ifp);
457         if_detach(ifp);
458         if_free_type(ifp, IFT_ETHER);
459         free(sc->sc_imo.imo_membership, M_CARP);
460 #ifdef INET6
461         free(sc->sc_im6o.im6o_membership, M_CARP);
462 #endif
463         free(sc, M_CARP);
464 }
465
466 /*
467  * This function can be called on CARP interface destroy path,
468  * and in case of the removal of the underlying interface as
469  * well. We differentiate these two cases. In the latter case
470  * we do not cleanup our multicast memberships, since they
471  * are already freed. Also, in the latter case we do not
472  * release the lock on return, because the function will be
473  * called once more, for another CARP instance on the same
474  * interface.
475  */
476 static void
477 carpdetach(struct carp_softc *sc, int unlock)
478 {
479         struct carp_if *cif;
480
481         callout_stop(&sc->sc_ad_tmo);
482         callout_stop(&sc->sc_md_tmo);
483         callout_stop(&sc->sc_md6_tmo);
484
485         if (sc->sc_suppress)
486                 carp_suppress_preempt--;
487         sc->sc_suppress = 0;
488
489         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
490                 carp_suppress_preempt--;
491         sc->sc_sendad_errors = 0;
492
493         carp_set_state(sc, INIT);
494         SC2IFP(sc)->if_flags &= ~IFF_UP;
495         carp_setrun(sc, 0);
496         if (unlock)
497                 carp_multicast_cleanup(sc);
498 #ifdef INET6
499         carp_multicast6_cleanup(sc);
500 #endif
501
502         if (sc->sc_carpdev != NULL) {
503                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
504                 CARP_LOCK_ASSERT(cif);
505                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
506                 if (!--cif->vhif_nvrs) {
507                         ifpromisc(sc->sc_carpdev, 0);
508                         sc->sc_carpdev->if_carp = NULL;
509                         CARP_LOCK_DESTROY(cif);
510                         free(cif, M_CARP);
511                 } else if (unlock)
512                         CARP_UNLOCK(cif);
513                 sc->sc_carpdev = NULL;
514         }
515 }
516
517 /* Detach an interface from the carp. */
518 static void
519 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
520 {
521         struct carp_if *cif = (struct carp_if *)ifp->if_carp;
522         struct carp_softc *sc, *nextsc;
523
524         if (cif == NULL)
525                 return;
526
527         /*
528          * XXX: At the end of for() cycle the lock will be destroyed.
529          */
530         CARP_LOCK(cif);
531         for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
532                 nextsc = TAILQ_NEXT(sc, sc_list);
533                 carpdetach(sc, 0);
534         }
535 }
536
537 /*
538  * process input packet.
539  * we have rearranged checks order compared to the rfc,
540  * but it seems more efficient this way or not possible otherwise.
541  */
542 void
543 carp_input(struct mbuf *m, int hlen)
544 {
545         struct ip *ip = mtod(m, struct ip *);
546         struct carp_header *ch;
547         int iplen, len;
548
549         CARPSTATS_INC(carps_ipackets);
550
551         if (!carp_opts[CARPCTL_ALLOW]) {
552                 m_freem(m);
553                 return;
554         }
555
556         /* check if received on a valid carp interface */
557         if (m->m_pkthdr.rcvif->if_carp == NULL) {
558                 CARPSTATS_INC(carps_badif);
559                 CARP_DEBUG("carp_input: packet received on non-carp "
560                     "interface: %s\n",
561                     m->m_pkthdr.rcvif->if_xname);
562                 m_freem(m);
563                 return;
564         }
565
566         /* verify that the IP TTL is 255.  */
567         if (ip->ip_ttl != CARP_DFLTTL) {
568                 CARPSTATS_INC(carps_badttl);
569                 CARP_DEBUG("carp_input: received ttl %d != 255 on %s\n",
570                     ip->ip_ttl,
571                     m->m_pkthdr.rcvif->if_xname);
572                 m_freem(m);
573                 return;
574         }
575
576         iplen = ip->ip_hl << 2;
577
578         if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
579                 CARPSTATS_INC(carps_badlen);
580                 CARP_DEBUG("carp_input: received len %zd < "
581                     "sizeof(struct carp_header) on %s\n",
582                     m->m_len - sizeof(struct ip),
583                     m->m_pkthdr.rcvif->if_xname);
584                 m_freem(m);
585                 return;
586         }
587
588         if (iplen + sizeof(*ch) < m->m_len) {
589                 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
590                         CARPSTATS_INC(carps_hdrops);
591                         CARP_DEBUG("carp_input: pullup failed\n");
592                         return;
593                 }
594                 ip = mtod(m, struct ip *);
595         }
596         ch = (struct carp_header *)((char *)ip + iplen);
597
598         /*
599          * verify that the received packet length is
600          * equal to the CARP header
601          */
602         len = iplen + sizeof(*ch);
603         if (len > m->m_pkthdr.len) {
604                 CARPSTATS_INC(carps_badlen);
605                 CARP_DEBUG("carp_input: packet too short %d on %s\n",
606                     m->m_pkthdr.len,
607                     m->m_pkthdr.rcvif->if_xname);
608                 m_freem(m);
609                 return;
610         }
611
612         if ((m = m_pullup(m, len)) == NULL) {
613                 CARPSTATS_INC(carps_hdrops);
614                 return;
615         }
616         ip = mtod(m, struct ip *);
617         ch = (struct carp_header *)((char *)ip + iplen);
618
619         /* verify the CARP checksum */
620         m->m_data += iplen;
621         if (carp_cksum(m, len - iplen)) {
622                 CARPSTATS_INC(carps_badsum);
623                 CARP_DEBUG("carp_input: checksum failed on %s\n",
624                     m->m_pkthdr.rcvif->if_xname);
625                 m_freem(m);
626                 return;
627         }
628         m->m_data -= iplen;
629
630         carp_input_c(m, ch, AF_INET);
631 }
632
633 #ifdef INET6
634 int
635 carp6_input(struct mbuf **mp, int *offp, int proto)
636 {
637         struct mbuf *m = *mp;
638         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
639         struct carp_header *ch;
640         u_int len;
641
642         CARPSTATS_INC(carps_ipackets6);
643
644         if (!carp_opts[CARPCTL_ALLOW]) {
645                 m_freem(m);
646                 return (IPPROTO_DONE);
647         }
648
649         /* check if received on a valid carp interface */
650         if (m->m_pkthdr.rcvif->if_carp == NULL) {
651                 CARPSTATS_INC(carps_badif);
652                 CARP_DEBUG("carp6_input: packet received on non-carp "
653                     "interface: %s\n",
654                     m->m_pkthdr.rcvif->if_xname);
655                 m_freem(m);
656                 return (IPPROTO_DONE);
657         }
658
659         /* verify that the IP TTL is 255 */
660         if (ip6->ip6_hlim != CARP_DFLTTL) {
661                 CARPSTATS_INC(carps_badttl);
662                 CARP_DEBUG("carp6_input: received ttl %d != 255 on %s\n",
663                     ip6->ip6_hlim,
664                     m->m_pkthdr.rcvif->if_xname);
665                 m_freem(m);
666                 return (IPPROTO_DONE);
667         }
668
669         /* verify that we have a complete carp packet */
670         len = m->m_len;
671         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
672         if (ch == NULL) {
673                 CARPSTATS_INC(carps_badlen);
674                 CARP_DEBUG("carp6_input: packet size %u too small\n", len);
675                 return (IPPROTO_DONE);
676         }
677
678
679         /* verify the CARP checksum */
680         m->m_data += *offp;
681         if (carp_cksum(m, sizeof(*ch))) {
682                 CARPSTATS_INC(carps_badsum);
683                 CARP_DEBUG("carp6_input: checksum failed, on %s\n",
684                     m->m_pkthdr.rcvif->if_xname);
685                 m_freem(m);
686                 return (IPPROTO_DONE);
687         }
688         m->m_data -= *offp;
689
690         carp_input_c(m, ch, AF_INET6);
691         return (IPPROTO_DONE);
692 }
693 #endif /* INET6 */
694
695 static void
696 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
697 {
698         struct ifnet *ifp = m->m_pkthdr.rcvif;
699         struct carp_softc *sc;
700         u_int64_t tmp_counter;
701         struct timeval sc_tv, ch_tv;
702
703         /* verify that the VHID is valid on the receiving interface */
704         CARP_LOCK(ifp->if_carp);
705         TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
706                 if (sc->sc_vhid == ch->carp_vhid)
707                         break;
708
709         if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) &&
710             (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
711                 CARPSTATS_INC(carps_badvhid);
712                 CARP_UNLOCK(ifp->if_carp);
713                 m_freem(m);
714                 return;
715         }
716
717         getmicrotime(&SC2IFP(sc)->if_lastchange);
718         SC2IFP(sc)->if_ipackets++;
719         SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
720
721         if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
722                 struct ip *ip = mtod(m, struct ip *);
723                 uint32_t af1 = af;
724
725                 /* BPF wants net byte order */
726                 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
727                 ip->ip_off = htons(ip->ip_off);
728                 bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m);
729         }
730
731         /* verify the CARP version. */
732         if (ch->carp_version != CARP_VERSION) {
733                 CARPSTATS_INC(carps_badver);
734                 SC2IFP(sc)->if_ierrors++;
735                 CARP_UNLOCK(ifp->if_carp);
736                 CARP_DEBUG("%s; invalid version %d\n",
737                     SC2IFP(sc)->if_xname,
738                     ch->carp_version);
739                 m_freem(m);
740                 return;
741         }
742
743         /* verify the hash */
744         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
745                 CARPSTATS_INC(carps_badauth);
746                 SC2IFP(sc)->if_ierrors++;
747                 CARP_UNLOCK(ifp->if_carp);
748                 CARP_DEBUG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
749                 m_freem(m);
750                 return;
751         }
752
753         tmp_counter = ntohl(ch->carp_counter[0]);
754         tmp_counter = tmp_counter<<32;
755         tmp_counter += ntohl(ch->carp_counter[1]);
756
757         /* XXX Replay protection goes here */
758
759         sc->sc_init_counter = 0;
760         sc->sc_counter = tmp_counter;
761
762         sc_tv.tv_sec = sc->sc_advbase;
763         if (carp_suppress_preempt && sc->sc_advskew <  240)
764                 sc_tv.tv_usec = 240 * 1000000 / 256;
765         else
766                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
767         ch_tv.tv_sec = ch->carp_advbase;
768         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
769
770         switch (sc->sc_state) {
771         case INIT:
772                 break;
773         case MASTER:
774                 /*
775                  * If we receive an advertisement from a master who's going to
776                  * be more frequent than us, go into BACKUP state.
777                  */
778                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
779                     timevalcmp(&sc_tv, &ch_tv, ==)) {
780                         callout_stop(&sc->sc_ad_tmo);
781                         CARP_LOG("%s: MASTER -> BACKUP "
782                            "(more frequent advertisement received)\n",
783                            SC2IFP(sc)->if_xname);
784                         carp_set_state(sc, BACKUP);
785                         carp_setrun(sc, 0);
786                         carp_setroute(sc, RTM_DELETE);
787                 }
788                 break;
789         case BACKUP:
790                 /*
791                  * If we're pre-empting masters who advertise slower than us,
792                  * and this one claims to be slower, treat him as down.
793                  */
794                 if (carp_opts[CARPCTL_PREEMPT] &&
795                     timevalcmp(&sc_tv, &ch_tv, <)) {
796                         CARP_LOG("%s: BACKUP -> MASTER "
797                             "(preempting a slower master)\n",
798                             SC2IFP(sc)->if_xname);
799                         carp_master_down_locked(sc);
800                         break;
801                 }
802
803                 /*
804                  *  If the master is going to advertise at such a low frequency
805                  *  that he's guaranteed to time out, we'd might as well just
806                  *  treat him as timed out now.
807                  */
808                 sc_tv.tv_sec = sc->sc_advbase * 3;
809                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
810                         CARP_LOG("%s: BACKUP -> MASTER "
811                             "(master timed out)\n",
812                             SC2IFP(sc)->if_xname);
813                         carp_master_down_locked(sc);
814                         break;
815                 }
816
817                 /*
818                  * Otherwise, we reset the counter and wait for the next
819                  * advertisement.
820                  */
821                 carp_setrun(sc, af);
822                 break;
823         }
824
825         CARP_UNLOCK(ifp->if_carp);
826
827         m_freem(m);
828         return;
829 }
830
831 static int
832 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
833 {
834         struct m_tag *mtag;
835         struct ifnet *ifp = SC2IFP(sc);
836
837         if (sc->sc_init_counter) {
838                 /* this could also be seconds since unix epoch */
839                 sc->sc_counter = arc4random();
840                 sc->sc_counter = sc->sc_counter << 32;
841                 sc->sc_counter += arc4random();
842         } else
843                 sc->sc_counter++;
844
845         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
846         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
847
848         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
849
850         /* Tag packet for carp_output */
851         mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
852         if (mtag == NULL) {
853                 m_freem(m);
854                 SC2IFP(sc)->if_oerrors++;
855                 return (ENOMEM);
856         }
857         bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
858         m_tag_prepend(m, mtag);
859
860         return (0);
861 }
862
863 static void
864 carp_send_ad_all(void)
865 {
866         struct carp_softc *sc;
867
868         mtx_lock(&carp_mtx);
869         LIST_FOREACH(sc, &carpif_list, sc_next) {
870                 if (sc->sc_carpdev == NULL)
871                         continue;
872                 CARP_SCLOCK(sc);
873                 if ((SC2IFP(sc)->if_flags & IFF_UP) &&
874                     (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) &&
875                      sc->sc_state == MASTER)
876                         carp_send_ad_locked(sc);
877                 CARP_SCUNLOCK(sc);
878         }
879         mtx_unlock(&carp_mtx);
880 }
881
882 static void
883 carp_send_ad(void *v)
884 {
885         struct carp_softc *sc = v;
886
887         CARP_SCLOCK(sc);
888         carp_send_ad_locked(sc);
889         CARP_SCUNLOCK(sc);
890 }
891
892 static void
893 carp_send_ad_locked(struct carp_softc *sc)
894 {
895         struct carp_header ch;
896         struct timeval tv;
897         struct carp_header *ch_ptr;
898         struct mbuf *m;
899         int len, advbase, advskew;
900
901         CARP_SCLOCK_ASSERT(sc);
902
903         /* bow out if we've lost our UPness or RUNNINGuiness */
904         if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
905             (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
906                 advbase = 255;
907                 advskew = 255;
908         } else {
909                 advbase = sc->sc_advbase;
910                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
911                         advskew = sc->sc_advskew;
912                 else
913                         advskew = 240;
914                 tv.tv_sec = advbase;
915                 tv.tv_usec = advskew * 1000000 / 256;
916         }
917
918         ch.carp_version = CARP_VERSION;
919         ch.carp_type = CARP_ADVERTISEMENT;
920         ch.carp_vhid = sc->sc_vhid;
921         ch.carp_advbase = advbase;
922         ch.carp_advskew = advskew;
923         ch.carp_authlen = 7;    /* XXX DEFINE */
924         ch.carp_pad1 = 0;       /* must be zero */
925         ch.carp_cksum = 0;
926
927 #ifdef INET
928         if (sc->sc_ia) {
929                 struct ip *ip;
930
931                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
932                 if (m == NULL) {
933                         SC2IFP(sc)->if_oerrors++;
934                         CARPSTATS_INC(carps_onomem);
935                         /* XXX maybe less ? */
936                         if (advbase != 255 || advskew != 255)
937                                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
938                                     carp_send_ad, sc);
939                         return;
940                 }
941                 len = sizeof(*ip) + sizeof(ch);
942                 m->m_pkthdr.len = len;
943                 m->m_pkthdr.rcvif = NULL;
944                 m->m_len = len;
945                 MH_ALIGN(m, m->m_len);
946                 m->m_flags |= M_MCAST;
947                 ip = mtod(m, struct ip *);
948                 ip->ip_v = IPVERSION;
949                 ip->ip_hl = sizeof(*ip) >> 2;
950                 ip->ip_tos = IPTOS_LOWDELAY;
951                 ip->ip_len = len;
952                 ip->ip_id = ip_newid();
953                 ip->ip_off = IP_DF;
954                 ip->ip_ttl = CARP_DFLTTL;
955                 ip->ip_p = IPPROTO_CARP;
956                 ip->ip_sum = 0;
957                 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
958                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
959
960                 ch_ptr = (struct carp_header *)(&ip[1]);
961                 bcopy(&ch, ch_ptr, sizeof(ch));
962                 if (carp_prepare_ad(m, sc, ch_ptr))
963                         return;
964
965                 m->m_data += sizeof(*ip);
966                 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
967                 m->m_data -= sizeof(*ip);
968
969                 getmicrotime(&SC2IFP(sc)->if_lastchange);
970                 SC2IFP(sc)->if_opackets++;
971                 SC2IFP(sc)->if_obytes += len;
972                 CARPSTATS_INC(carps_opackets);
973
974                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
975                         SC2IFP(sc)->if_oerrors++;
976                         if (sc->sc_sendad_errors < INT_MAX)
977                                 sc->sc_sendad_errors++;
978                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
979                                 carp_suppress_preempt++;
980                                 if (carp_suppress_preempt == 1) {
981                                         CARP_SCUNLOCK(sc);
982                                         carp_send_ad_all();
983                                         CARP_SCLOCK(sc);
984                                 }
985                         }
986                         sc->sc_sendad_success = 0;
987                 } else {
988                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
989                                 if (++sc->sc_sendad_success >=
990                                     CARP_SENDAD_MIN_SUCCESS) {
991                                         carp_suppress_preempt--;
992                                         sc->sc_sendad_errors = 0;
993                                 }
994                         } else
995                                 sc->sc_sendad_errors = 0;
996                 }
997         }
998 #endif /* INET */
999 #ifdef INET6
1000         if (sc->sc_ia6) {
1001                 struct ip6_hdr *ip6;
1002
1003                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1004                 if (m == NULL) {
1005                         SC2IFP(sc)->if_oerrors++;
1006                         CARPSTATS_INC(carps_onomem);
1007                         /* XXX maybe less ? */
1008                         if (advbase != 255 || advskew != 255)
1009                                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
1010                                     carp_send_ad, sc);
1011                         return;
1012                 }
1013                 len = sizeof(*ip6) + sizeof(ch);
1014                 m->m_pkthdr.len = len;
1015                 m->m_pkthdr.rcvif = NULL;
1016                 m->m_len = len;
1017                 MH_ALIGN(m, m->m_len);
1018                 m->m_flags |= M_MCAST;
1019                 ip6 = mtod(m, struct ip6_hdr *);
1020                 bzero(ip6, sizeof(*ip6));
1021                 ip6->ip6_vfc |= IPV6_VERSION;
1022                 ip6->ip6_hlim = CARP_DFLTTL;
1023                 ip6->ip6_nxt = IPPROTO_CARP;
1024                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
1025                     sizeof(struct in6_addr));
1026                 /* set the multicast destination */
1027
1028                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1029                 ip6->ip6_dst.s6_addr8[15] = 0x12;
1030                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1031                         SC2IFP(sc)->if_oerrors++;
1032                         m_freem(m);
1033                         CARP_DEBUG("%s: in6_setscope failed\n", __func__);
1034                         return;
1035                 }
1036
1037                 ch_ptr = (struct carp_header *)(&ip6[1]);
1038                 bcopy(&ch, ch_ptr, sizeof(ch));
1039                 if (carp_prepare_ad(m, sc, ch_ptr))
1040                         return;
1041
1042                 m->m_data += sizeof(*ip6);
1043                 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1044                 m->m_data -= sizeof(*ip6);
1045
1046                 getmicrotime(&SC2IFP(sc)->if_lastchange);
1047                 SC2IFP(sc)->if_opackets++;
1048                 SC2IFP(sc)->if_obytes += len;
1049                 CARPSTATS_INC(carps_opackets6);
1050
1051                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1052                         SC2IFP(sc)->if_oerrors++;
1053                         if (sc->sc_sendad_errors < INT_MAX)
1054                                 sc->sc_sendad_errors++;
1055                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1056                                 carp_suppress_preempt++;
1057                                 if (carp_suppress_preempt == 1) {
1058                                         CARP_SCUNLOCK(sc);
1059                                         carp_send_ad_all();
1060                                         CARP_SCLOCK(sc);
1061                                 }
1062                         }
1063                         sc->sc_sendad_success = 0;
1064                 } else {
1065                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1066                                 if (++sc->sc_sendad_success >=
1067                                     CARP_SENDAD_MIN_SUCCESS) {
1068                                         carp_suppress_preempt--;
1069                                         sc->sc_sendad_errors = 0;
1070                                 }
1071                         } else
1072                                 sc->sc_sendad_errors = 0;
1073                 }
1074         }
1075 #endif /* INET6 */
1076
1077         if (advbase != 255 || advskew != 255)
1078                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
1079                     carp_send_ad, sc);
1080
1081 }
1082
1083 /*
1084  * Broadcast a gratuitous ARP request containing
1085  * the virtual router MAC address for each IP address
1086  * associated with the virtual router.
1087  */
1088 static void
1089 carp_send_arp(struct carp_softc *sc)
1090 {
1091         struct ifaddr *ifa;
1092
1093         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
1094
1095                 if (ifa->ifa_addr->sa_family != AF_INET)
1096                         continue;
1097
1098 /*              arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */
1099                 arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));
1100
1101                 DELAY(1000);    /* XXX */
1102         }
1103 }
1104
1105 #ifdef INET6
1106 static void
1107 carp_send_na(struct carp_softc *sc)
1108 {
1109         struct ifaddr *ifa;
1110         struct in6_addr *in6;
1111         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1112
1113         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
1114
1115                 if (ifa->ifa_addr->sa_family != AF_INET6)
1116                         continue;
1117
1118                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1119                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1120                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1121                 DELAY(1000);    /* XXX */
1122         }
1123 }
1124 #endif /* INET6 */
1125
1126 static int
1127 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
1128 {
1129         struct carp_softc *vh;
1130         struct ifaddr *ifa;
1131         int count = 0;
1132
1133         CARP_LOCK_ASSERT(cif);
1134
1135         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1136                 if ((type == CARP_COUNT_RUNNING &&
1137                     (SC2IFP(vh)->if_flags & IFF_UP) &&
1138                     (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) ||
1139                     (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
1140                         IF_ADDR_LOCK(SC2IFP(vh));
1141                         TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
1142                             ifa_list) {
1143                                 if (ifa->ifa_addr->sa_family == AF_INET &&
1144                                     ia->ia_addr.sin_addr.s_addr ==
1145                                     ifatoia(ifa)->ia_addr.sin_addr.s_addr)
1146                                         count++;
1147                         }
1148                         IF_ADDR_UNLOCK(SC2IFP(vh));
1149                 }
1150         }
1151         return (count);
1152 }
1153
1154 int
1155 carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia,
1156     struct in_addr *isaddr, u_int8_t **enaddr)
1157 {
1158         struct carp_if *cif;
1159         struct carp_softc *vh;
1160         int index, count = 0;
1161         struct ifaddr *ifa;
1162
1163         cif = ifp->if_carp;
1164         CARP_LOCK(cif);
1165
1166         if (carp_opts[CARPCTL_ARPBALANCE]) {
1167                 /*
1168                  * XXX proof of concept implementation.
1169                  * We use the source ip to decide which virtual host should
1170                  * handle the request. If we're master of that virtual host,
1171                  * then we respond, otherwise, just drop the arp packet on
1172                  * the floor.
1173                  */
1174                 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
1175                 if (count == 0) {
1176                         /* should never reach this */
1177                         CARP_UNLOCK(cif);
1178                         return (0);
1179                 }
1180
1181                 /* this should be a hash, like pf_hash() */
1182                 index = ntohl(isaddr->s_addr) % count;
1183                 count = 0;
1184
1185                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1186                         if ((SC2IFP(vh)->if_flags & IFF_UP) &&
1187                             (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) {
1188                                 IF_ADDR_LOCK(SC2IFP(vh));
1189                                 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
1190                                     ifa_list) {
1191                                         if (ifa->ifa_addr->sa_family ==
1192                                             AF_INET &&
1193                                             ia->ia_addr.sin_addr.s_addr ==
1194                                             ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
1195                                                 if (count == index) {
1196                                                         if (vh->sc_state ==
1197                                                             MASTER) {
1198                                                                 *enaddr = IF_LLADDR(vh->sc_ifp);
1199                                                                 IF_ADDR_UNLOCK(SC2IFP(vh));
1200                                                                 CARP_UNLOCK(cif);
1201                                                                 return (1);
1202                                                         } else {
1203                                                                 IF_ADDR_UNLOCK(SC2IFP(vh));
1204                                                                 CARP_UNLOCK(cif);
1205                                                                 return (0);
1206                                                         }
1207                                                 }
1208                                                 count++;
1209                                         }
1210                                 }
1211                                 IF_ADDR_UNLOCK(SC2IFP(vh));
1212                         }
1213                 }
1214         } else {
1215                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1216                         if ((SC2IFP(vh)->if_flags & IFF_UP) &&
1217                             (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
1218                             ia->ia_ifp == SC2IFP(vh) &&
1219                             vh->sc_state == MASTER) {
1220                                 *enaddr = IF_LLADDR(vh->sc_ifp);
1221                                 CARP_UNLOCK(cif);
1222                                 return (1);
1223                         }
1224                 }
1225         }
1226         CARP_UNLOCK(cif);
1227         return (0);
1228 }
1229
1230 #ifdef INET6
1231 struct ifaddr *
1232 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
1233 {
1234         struct carp_if *cif;
1235         struct carp_softc *vh;
1236         struct ifaddr *ifa;
1237
1238         cif = ifp->if_carp;
1239         CARP_LOCK(cif);
1240         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1241                 IF_ADDR_LOCK(SC2IFP(vh));
1242                 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) {
1243                         if (IN6_ARE_ADDR_EQUAL(taddr,
1244                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1245                             (SC2IFP(vh)->if_flags & IFF_UP) &&
1246                             (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
1247                             vh->sc_state == MASTER) {
1248                                 ifa_ref(ifa);
1249                                 IF_ADDR_UNLOCK(SC2IFP(vh));
1250                                 CARP_UNLOCK(cif);
1251                                 return (ifa);
1252                         }
1253                 }
1254                 IF_ADDR_UNLOCK(SC2IFP(vh));
1255         }
1256         CARP_UNLOCK(cif);
1257         
1258         return (NULL);
1259 }
1260
1261 caddr_t
1262 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
1263 {
1264         struct m_tag *mtag;
1265         struct carp_if *cif;
1266         struct carp_softc *sc;
1267         struct ifaddr *ifa;
1268
1269         cif = ifp->if_carp;
1270         CARP_LOCK(cif);
1271         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1272                 IF_ADDR_LOCK(SC2IFP(sc));
1273                 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
1274                         if (IN6_ARE_ADDR_EQUAL(taddr,
1275                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1276                             (SC2IFP(sc)->if_flags & IFF_UP) &&
1277                             (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) {
1278                                 struct ifnet *ifp = SC2IFP(sc);
1279                                 mtag = m_tag_get(PACKET_TAG_CARP,
1280                                     sizeof(struct ifnet *), M_NOWAIT);
1281                                 if (mtag == NULL) {
1282                                         /* better a bit than nothing */
1283                                         IF_ADDR_UNLOCK(SC2IFP(sc));
1284                                         CARP_UNLOCK(cif);
1285                                         return (IF_LLADDR(sc->sc_ifp));
1286                                 }
1287                                 bcopy(&ifp, (caddr_t)(mtag + 1),
1288                                     sizeof(struct ifnet *));
1289                                 m_tag_prepend(m, mtag);
1290
1291                                 IF_ADDR_UNLOCK(SC2IFP(sc));
1292                                 CARP_UNLOCK(cif);
1293                                 return (IF_LLADDR(sc->sc_ifp));
1294                         }
1295                 }
1296                 IF_ADDR_UNLOCK(SC2IFP(sc));
1297         }
1298         CARP_UNLOCK(cif);
1299
1300         return (NULL);
1301 }
1302 #endif
1303
1304 struct ifnet *
1305 carp_forus(struct ifnet *ifp, u_char *dhost)
1306 {
1307         struct carp_if *cif;
1308         struct carp_softc *vh;
1309         u_int8_t *ena = dhost;
1310
1311         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1312                 return (NULL);
1313
1314         cif = ifp->if_carp;
1315         CARP_LOCK(cif);
1316         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
1317                 if ((SC2IFP(vh)->if_flags & IFF_UP) &&
1318                     (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
1319                     vh->sc_state == MASTER &&
1320                     !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
1321                         CARP_UNLOCK(cif);
1322                         return (SC2IFP(vh));
1323                 }
1324
1325         CARP_UNLOCK(cif);
1326         return (NULL);
1327 }
1328
1329 static void
1330 carp_master_down(void *v)
1331 {
1332         struct carp_softc *sc = v;
1333
1334         CARP_SCLOCK(sc);
1335         carp_master_down_locked(sc);
1336         CARP_SCUNLOCK(sc);
1337 }
1338
1339 static void
1340 carp_master_down_locked(struct carp_softc *sc)
1341 {
1342         if (sc->sc_carpdev)
1343                 CARP_SCLOCK_ASSERT(sc);
1344
1345         switch (sc->sc_state) {
1346         case INIT:
1347                 printf("%s: master_down event in INIT state\n",
1348                     SC2IFP(sc)->if_xname);
1349                 break;
1350         case MASTER:
1351                 break;
1352         case BACKUP:
1353                 carp_set_state(sc, MASTER);
1354                 carp_send_ad_locked(sc);
1355                 carp_send_arp(sc);
1356 #ifdef INET6
1357                 carp_send_na(sc);
1358 #endif /* INET6 */
1359                 carp_setrun(sc, 0);
1360                 carp_setroute(sc, RTM_ADD);
1361                 break;
1362         }
1363 }
1364
1365 /*
1366  * When in backup state, af indicates whether to reset the master down timer
1367  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1368  */
1369 static void
1370 carp_setrun(struct carp_softc *sc, sa_family_t af)
1371 {
1372         struct timeval tv;
1373
1374         if (sc->sc_carpdev == NULL) {
1375                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
1376                 carp_set_state(sc, INIT);
1377                 return;
1378         } else
1379                 CARP_SCLOCK_ASSERT(sc);
1380
1381         if (SC2IFP(sc)->if_flags & IFF_UP &&
1382             sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6) &&
1383             sc->sc_carpdev->if_link_state == LINK_STATE_UP)
1384                 SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
1385         else {
1386                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
1387                 carp_setroute(sc, RTM_DELETE);
1388                 return;
1389         }
1390
1391         switch (sc->sc_state) {
1392         case INIT:
1393                 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1394                         carp_send_ad_locked(sc);
1395                         carp_send_arp(sc);
1396 #ifdef INET6
1397                         carp_send_na(sc);
1398 #endif /* INET6 */
1399                         CARP_LOG("%s: INIT -> MASTER (preempting)\n",
1400                             SC2IFP(sc)->if_xname);
1401                         carp_set_state(sc, MASTER);
1402                         carp_setroute(sc, RTM_ADD);
1403                 } else {
1404                         CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname);
1405                         carp_set_state(sc, BACKUP);
1406                         carp_setroute(sc, RTM_DELETE);
1407                         carp_setrun(sc, 0);
1408                 }
1409                 break;
1410         case BACKUP:
1411                 callout_stop(&sc->sc_ad_tmo);
1412                 tv.tv_sec = 3 * sc->sc_advbase;
1413                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1414                 switch (af) {
1415 #ifdef INET
1416                 case AF_INET:
1417                         callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1418                             carp_master_down, sc);
1419                         break;
1420 #endif /* INET */
1421 #ifdef INET6
1422                 case AF_INET6:
1423                         callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1424                             carp_master_down, sc);
1425                         break;
1426 #endif /* INET6 */
1427                 default:
1428                         if (sc->sc_naddrs)
1429                                 callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1430                                     carp_master_down, sc);
1431                         if (sc->sc_naddrs6)
1432                                 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1433                                     carp_master_down, sc);
1434                         break;
1435                 }
1436                 break;
1437         case MASTER:
1438                 tv.tv_sec = sc->sc_advbase;
1439                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1440                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
1441                     carp_send_ad, sc);
1442                 break;
1443         }
1444 }
1445
1446 static void
1447 carp_multicast_cleanup(struct carp_softc *sc)
1448 {
1449         struct ip_moptions *imo = &sc->sc_imo;
1450         u_int16_t n = imo->imo_num_memberships;
1451
1452         /* Clean up our own multicast memberships */
1453         while (n-- > 0) {
1454                 if (imo->imo_membership[n] != NULL) {
1455                         in_delmulti(imo->imo_membership[n]);
1456                         imo->imo_membership[n] = NULL;
1457                 }
1458         }
1459         KASSERT(imo->imo_mfilters == NULL,
1460            ("%s: imo_mfilters != NULL", __func__));
1461         imo->imo_num_memberships = 0;
1462         imo->imo_multicast_ifp = NULL;
1463 }
1464
1465 #ifdef INET6
1466 static void
1467 carp_multicast6_cleanup(struct carp_softc *sc)
1468 {
1469         struct ip6_moptions *im6o = &sc->sc_im6o;
1470         u_int16_t n = im6o->im6o_num_memberships;
1471
1472         while (n-- > 0) {
1473                 if (im6o->im6o_membership[n] != NULL) {
1474                         in6_mc_leave(im6o->im6o_membership[n], NULL);
1475                         im6o->im6o_membership[n] = NULL;
1476                 }
1477         }
1478         KASSERT(im6o->im6o_mfilters == NULL,
1479            ("%s: im6o_mfilters != NULL", __func__));
1480         im6o->im6o_num_memberships = 0;
1481         im6o->im6o_multicast_ifp = NULL;
1482 }
1483 #endif
1484
1485 static int
1486 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1487 {
1488         struct ifnet *ifp;
1489         struct carp_if *cif;
1490         struct in_ifaddr *ia, *ia_if;
1491         struct ip_moptions *imo = &sc->sc_imo;
1492         struct in_addr addr;
1493         u_long iaddr = htonl(sin->sin_addr.s_addr);
1494         int own, error;
1495
1496         if (sin->sin_addr.s_addr == 0) {
1497                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
1498                         carp_set_state(sc, INIT);
1499                 if (sc->sc_naddrs)
1500                         SC2IFP(sc)->if_flags |= IFF_UP;
1501                 if (sc->sc_carpdev)
1502                         CARP_SCLOCK(sc);
1503                 carp_setrun(sc, 0);
1504                 if (sc->sc_carpdev)
1505                         CARP_SCUNLOCK(sc);
1506                 return (0);
1507         }
1508
1509         /* we have to do it by hands to check we won't match on us */
1510         ia_if = NULL; own = 0;
1511         IN_IFADDR_RLOCK();
1512         TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1513                 /* and, yeah, we need a multicast-capable iface too */
1514                 if (ia->ia_ifp != SC2IFP(sc) &&
1515                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1516                     (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
1517                         if (!ia_if)
1518                                 ia_if = ia;
1519                         if (sin->sin_addr.s_addr ==
1520                             ia->ia_addr.sin_addr.s_addr)
1521                                 own++;
1522                 }
1523         }
1524
1525         if (!ia_if) {
1526                 IN_IFADDR_RUNLOCK();
1527                 return (EADDRNOTAVAIL);
1528         }
1529
1530         ia = ia_if;
1531         ifa_ref(&ia->ia_ifa);
1532         IN_IFADDR_RUNLOCK();
1533
1534         ifp = ia->ia_ifp;
1535
1536         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
1537             (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) {
1538                 ifa_free(&ia->ia_ifa);
1539                 return (EADDRNOTAVAIL);
1540         }
1541
1542         if (imo->imo_num_memberships == 0) {
1543                 addr.s_addr = htonl(INADDR_CARP_GROUP);
1544                 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) ==
1545                     NULL) {
1546                         ifa_free(&ia->ia_ifa);
1547                         return (ENOBUFS);
1548                 }
1549                 imo->imo_num_memberships++;
1550                 imo->imo_multicast_ifp = ifp;
1551                 imo->imo_multicast_ttl = CARP_DFLTTL;
1552                 imo->imo_multicast_loop = 0;
1553         }
1554
1555         if (!ifp->if_carp) {
1556
1557                 cif = malloc(sizeof(*cif), M_CARP,
1558                     M_WAITOK|M_ZERO);
1559                 if (!cif) {
1560                         error = ENOBUFS;
1561                         goto cleanup;
1562                 }
1563                 if ((error = ifpromisc(ifp, 1))) {
1564                         free(cif, M_CARP);
1565                         goto cleanup;
1566                 }
1567                 
1568                 CARP_LOCK_INIT(cif);
1569                 CARP_LOCK(cif);
1570                 cif->vhif_ifp = ifp;
1571                 TAILQ_INIT(&cif->vhif_vrs);
1572                 ifp->if_carp = cif;
1573
1574         } else {
1575                 struct carp_softc *vr;
1576
1577                 cif = (struct carp_if *)ifp->if_carp;
1578                 CARP_LOCK(cif);
1579                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1580                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
1581                                 CARP_UNLOCK(cif);
1582                                 error = EEXIST;
1583                                 goto cleanup;
1584                         }
1585         }
1586         sc->sc_ia = ia;
1587         sc->sc_carpdev = ifp;
1588
1589         { /* XXX prevent endless loop if already in queue */
1590         struct carp_softc *vr, *after = NULL;
1591         int myself = 0;
1592         cif = (struct carp_if *)ifp->if_carp;
1593
1594         /* XXX: cif should not change, right? So we still hold the lock */
1595         CARP_LOCK_ASSERT(cif);
1596
1597         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1598                 if (vr == sc)
1599                         myself = 1;
1600                 if (vr->sc_vhid < sc->sc_vhid)
1601                         after = vr;
1602         }
1603
1604         if (!myself) {
1605                 /* We're trying to keep things in order */
1606                 if (after == NULL) {
1607                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1608                 } else {
1609                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
1610                 }
1611                 cif->vhif_nvrs++;
1612         }
1613         }
1614
1615         sc->sc_naddrs++;
1616         SC2IFP(sc)->if_flags |= IFF_UP;
1617         if (own)
1618                 sc->sc_advskew = 0;
1619         carp_sc_state_locked(sc);
1620         carp_setrun(sc, 0);
1621
1622         CARP_UNLOCK(cif);
1623         ifa_free(&ia->ia_ifa);  /* XXXRW: should hold reference for softc. */
1624
1625         return (0);
1626
1627 cleanup:
1628         in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1629         ifa_free(&ia->ia_ifa);
1630         return (error);
1631 }
1632
1633 static int
1634 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1635 {
1636         int error = 0;
1637
1638         if (!--sc->sc_naddrs) {
1639                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1640                 struct ip_moptions *imo = &sc->sc_imo;
1641
1642                 CARP_LOCK(cif);
1643                 callout_stop(&sc->sc_ad_tmo);
1644                 SC2IFP(sc)->if_flags &= ~IFF_UP;
1645                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
1646                 sc->sc_vhid = -1;
1647                 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1648                 imo->imo_multicast_ifp = NULL;
1649                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
1650                 if (!--cif->vhif_nvrs) {
1651                         sc->sc_carpdev->if_carp = NULL;
1652                         CARP_LOCK_DESTROY(cif);
1653                         free(cif, M_CARP);
1654                 } else {
1655                         CARP_UNLOCK(cif);
1656                 }
1657         }
1658
1659         return (error);
1660 }
1661
1662 #ifdef INET6
1663 static int
1664 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1665 {
1666         struct ifnet *ifp;
1667         struct carp_if *cif;
1668         struct in6_ifaddr *ia, *ia_if;
1669         struct ip6_moptions *im6o = &sc->sc_im6o;
1670         struct in6_addr in6;
1671         int own, error;
1672
1673         error = 0;
1674
1675         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1676                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
1677                         carp_set_state(sc, INIT);
1678                 if (sc->sc_naddrs6)
1679                         SC2IFP(sc)->if_flags |= IFF_UP;
1680                 if (sc->sc_carpdev)
1681                         CARP_SCLOCK(sc);
1682                 carp_setrun(sc, 0);
1683                 if (sc->sc_carpdev)
1684                         CARP_SCUNLOCK(sc);
1685                 return (0);
1686         }
1687
1688         /* we have to do it by hands to check we won't match on us */
1689         ia_if = NULL; own = 0;
1690         IN6_IFADDR_RLOCK();
1691         TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
1692                 int i;
1693
1694                 for (i = 0; i < 4; i++) {
1695                         if ((sin6->sin6_addr.s6_addr32[i] &
1696                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1697                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
1698                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1699                                 break;
1700                 }
1701                 /* and, yeah, we need a multicast-capable iface too */
1702                 if (ia->ia_ifp != SC2IFP(sc) &&
1703                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1704                     (i == 4)) {
1705                         if (!ia_if)
1706                                 ia_if = ia;
1707                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1708                             &ia->ia_addr.sin6_addr))
1709                                 own++;
1710                 }
1711         }
1712
1713         if (!ia_if) {
1714                 IN6_IFADDR_RUNLOCK();
1715                 return (EADDRNOTAVAIL);
1716         }
1717         ia = ia_if;
1718         ifa_ref(&ia->ia_ifa);
1719         IN6_IFADDR_RUNLOCK();
1720         ifp = ia->ia_ifp;
1721
1722         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
1723             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) {
1724                 ifa_free(&ia->ia_ifa);
1725                 return (EADDRNOTAVAIL);
1726         }
1727
1728         if (!sc->sc_naddrs6) {
1729                 struct in6_multi *in6m;
1730
1731                 im6o->im6o_multicast_ifp = ifp;
1732
1733                 /* join CARP multicast address */
1734                 bzero(&in6, sizeof(in6));
1735                 in6.s6_addr16[0] = htons(0xff02);
1736                 in6.s6_addr8[15] = 0x12;
1737                 if (in6_setscope(&in6, ifp, NULL) != 0)
1738                         goto cleanup;
1739                 in6m = NULL;
1740                 error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
1741                 if (error)
1742                         goto cleanup;
1743                 im6o->im6o_membership[0] = in6m;
1744                 im6o->im6o_num_memberships++;
1745
1746                 /* join solicited multicast address */
1747                 bzero(&in6, sizeof(in6));
1748                 in6.s6_addr16[0] = htons(0xff02);
1749                 in6.s6_addr32[1] = 0;
1750                 in6.s6_addr32[2] = htonl(1);
1751                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
1752                 in6.s6_addr8[12] = 0xff;
1753                 if (in6_setscope(&in6, ifp, NULL) != 0)
1754                         goto cleanup;
1755                 in6m = NULL;
1756                 error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
1757                 if (error)
1758                         goto cleanup;
1759                 im6o->im6o_membership[1] = in6m;
1760                 im6o->im6o_num_memberships++;
1761         }
1762
1763         if (!ifp->if_carp) {
1764                 cif = malloc(sizeof(*cif), M_CARP,
1765                     M_WAITOK|M_ZERO);
1766                 if (!cif) {
1767                         error = ENOBUFS;
1768                         goto cleanup;
1769                 }
1770                 if ((error = ifpromisc(ifp, 1))) {
1771                         free(cif, M_CARP);
1772                         goto cleanup;
1773                 }
1774
1775                 CARP_LOCK_INIT(cif);
1776                 CARP_LOCK(cif);
1777                 cif->vhif_ifp = ifp;
1778                 TAILQ_INIT(&cif->vhif_vrs);
1779                 ifp->if_carp = cif;
1780
1781         } else {
1782                 struct carp_softc *vr;
1783
1784                 cif = (struct carp_if *)ifp->if_carp;
1785                 CARP_LOCK(cif);
1786                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1787                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
1788                                 CARP_UNLOCK(cif);
1789                                 error = EINVAL;
1790                                 goto cleanup;
1791                         }
1792         }
1793         sc->sc_ia6 = ia;
1794         sc->sc_carpdev = ifp;
1795
1796         { /* XXX prevent endless loop if already in queue */
1797         struct carp_softc *vr, *after = NULL;
1798         int myself = 0;
1799         cif = (struct carp_if *)ifp->if_carp;
1800         CARP_LOCK_ASSERT(cif);
1801
1802         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1803                 if (vr == sc)
1804                         myself = 1;
1805                 if (vr->sc_vhid < sc->sc_vhid)
1806                         after = vr;
1807         }
1808
1809         if (!myself) {
1810                 /* We're trying to keep things in order */
1811                 if (after == NULL) {
1812                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1813                 } else {
1814                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
1815                 }
1816                 cif->vhif_nvrs++;
1817         }
1818         }
1819
1820         sc->sc_naddrs6++;
1821         SC2IFP(sc)->if_flags |= IFF_UP;
1822         if (own)
1823                 sc->sc_advskew = 0;
1824         carp_sc_state_locked(sc);
1825         carp_setrun(sc, 0);
1826
1827         CARP_UNLOCK(cif);
1828         ifa_free(&ia->ia_ifa);  /* XXXRW: should hold reference for softc. */
1829
1830         return (0);
1831
1832 cleanup:
1833         if (!sc->sc_naddrs6)
1834                 carp_multicast6_cleanup(sc);
1835         ifa_free(&ia->ia_ifa);
1836         return (error);
1837 }
1838
1839 static int
1840 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1841 {
1842         int error = 0;
1843
1844         if (!--sc->sc_naddrs6) {
1845                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1846
1847                 CARP_LOCK(cif);
1848                 callout_stop(&sc->sc_ad_tmo);
1849                 SC2IFP(sc)->if_flags &= ~IFF_UP;
1850                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
1851                 sc->sc_vhid = -1;
1852                 carp_multicast6_cleanup(sc);
1853                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
1854                 if (!--cif->vhif_nvrs) {
1855                         CARP_LOCK_DESTROY(cif);
1856                         sc->sc_carpdev->if_carp = NULL;
1857                         free(cif, M_CARP);
1858                 } else
1859                         CARP_UNLOCK(cif);
1860         }
1861
1862         return (error);
1863 }
1864 #endif /* INET6 */
1865
1866 static int
1867 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
1868 {
1869         struct carp_softc *sc = ifp->if_softc, *vr;
1870         struct carpreq carpr;
1871         struct ifaddr *ifa;
1872         struct ifreq *ifr;
1873         struct ifaliasreq *ifra;
1874         int locked = 0, error = 0;
1875
1876         ifa = (struct ifaddr *)addr;
1877         ifra = (struct ifaliasreq *)addr;
1878         ifr = (struct ifreq *)addr;
1879
1880         switch (cmd) {
1881         case SIOCSIFADDR:
1882                 switch (ifa->ifa_addr->sa_family) {
1883 #ifdef INET
1884                 case AF_INET:
1885                         SC2IFP(sc)->if_flags |= IFF_UP;
1886                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
1887                             sizeof(struct sockaddr));
1888                         error = carp_set_addr(sc, satosin(ifa->ifa_addr));
1889                         break;
1890 #endif /* INET */
1891 #ifdef INET6
1892                 case AF_INET6:
1893                         SC2IFP(sc)->if_flags |= IFF_UP;
1894                         error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
1895                         break;
1896 #endif /* INET6 */
1897                 default:
1898                         error = EAFNOSUPPORT;
1899                         break;
1900                 }
1901                 break;
1902
1903         case SIOCAIFADDR:
1904                 switch (ifa->ifa_addr->sa_family) {
1905 #ifdef INET
1906                 case AF_INET:
1907                         SC2IFP(sc)->if_flags |= IFF_UP;
1908                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
1909                             sizeof(struct sockaddr));
1910                         error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
1911                         break;
1912 #endif /* INET */
1913 #ifdef INET6
1914                 case AF_INET6:
1915                         SC2IFP(sc)->if_flags |= IFF_UP;
1916                         error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
1917                         break;
1918 #endif /* INET6 */
1919                 default:
1920                         error = EAFNOSUPPORT;
1921                         break;
1922                 }
1923                 break;
1924
1925         case SIOCDIFADDR:
1926                 switch (ifa->ifa_addr->sa_family) {
1927 #ifdef INET
1928                 case AF_INET:
1929                         error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
1930                         break;
1931 #endif /* INET */
1932 #ifdef INET6
1933                 case AF_INET6:
1934                         error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
1935                         break;
1936 #endif /* INET6 */
1937                 default:
1938                         error = EAFNOSUPPORT;
1939                         break;
1940                 }
1941                 break;
1942
1943         case SIOCSIFFLAGS:
1944                 if (sc->sc_carpdev) {
1945                         locked = 1;
1946                         CARP_SCLOCK(sc);
1947                 }
1948                 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
1949                         callout_stop(&sc->sc_ad_tmo);
1950                         callout_stop(&sc->sc_md_tmo);
1951                         callout_stop(&sc->sc_md6_tmo);
1952                         if (sc->sc_state == MASTER)
1953                                 carp_send_ad_locked(sc);
1954                         carp_set_state(sc, INIT);
1955                         carp_setrun(sc, 0);
1956                 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
1957                         SC2IFP(sc)->if_flags |= IFF_UP;
1958                         carp_setrun(sc, 0);
1959                 }
1960                 break;
1961
1962         case SIOCSVH:
1963                 error = priv_check(curthread, PRIV_NETINET_CARP);
1964                 if (error)
1965                         break;
1966                 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
1967                         break;
1968                 error = 1;
1969                 if (sc->sc_carpdev) {
1970                         locked = 1;
1971                         CARP_SCLOCK(sc);
1972                 }
1973                 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
1974                         switch (carpr.carpr_state) {
1975                         case BACKUP:
1976                                 callout_stop(&sc->sc_ad_tmo);
1977                                 carp_set_state(sc, BACKUP);
1978                                 carp_setrun(sc, 0);
1979                                 carp_setroute(sc, RTM_DELETE);
1980                                 break;
1981                         case MASTER:
1982                                 carp_master_down_locked(sc);
1983                                 break;
1984                         default:
1985                                 break;
1986                         }
1987                 }
1988                 if (carpr.carpr_vhid > 0) {
1989                         if (carpr.carpr_vhid > 255) {
1990                                 error = EINVAL;
1991                                 break;
1992                         }
1993                         if (sc->sc_carpdev) {
1994                                 struct carp_if *cif;
1995                                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1996                                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1997                                         if (vr != sc &&
1998                                             vr->sc_vhid == carpr.carpr_vhid) {
1999                                                 error = EEXIST;
2000                                                 break;
2001                                         }
2002                                 if (error == EEXIST)
2003                                         break;
2004                         }
2005                         sc->sc_vhid = carpr.carpr_vhid;
2006                         IF_LLADDR(sc->sc_ifp)[0] = 0;
2007                         IF_LLADDR(sc->sc_ifp)[1] = 0;
2008                         IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
2009                         IF_LLADDR(sc->sc_ifp)[3] = 0;
2010                         IF_LLADDR(sc->sc_ifp)[4] = 1;
2011                         IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
2012                         error--;
2013                 }
2014                 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
2015                         if (carpr.carpr_advskew >= 255) {
2016                                 error = EINVAL;
2017                                 break;
2018                         }
2019                         if (carpr.carpr_advbase > 255) {
2020                                 error = EINVAL;
2021                                 break;
2022                         }
2023                         sc->sc_advbase = carpr.carpr_advbase;
2024                         sc->sc_advskew = carpr.carpr_advskew;
2025                         error--;
2026                 }
2027                 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
2028                 if (error > 0)
2029                         error = EINVAL;
2030                 else {
2031                         error = 0;
2032                         carp_setrun(sc, 0);
2033                 }
2034                 break;
2035
2036         case SIOCGVH:
2037                 /* XXX: lockless read */
2038                 bzero(&carpr, sizeof(carpr));
2039                 carpr.carpr_state = sc->sc_state;
2040                 carpr.carpr_vhid = sc->sc_vhid;
2041                 carpr.carpr_advbase = sc->sc_advbase;
2042                 carpr.carpr_advskew = sc->sc_advskew;
2043                 error = priv_check(curthread, PRIV_NETINET_CARP);
2044                 if (error == 0)
2045                         bcopy(sc->sc_key, carpr.carpr_key,
2046                             sizeof(carpr.carpr_key));
2047                 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2048                 break;
2049
2050         default:
2051                 error = EINVAL;
2052         }
2053
2054         if (locked)
2055                 CARP_SCUNLOCK(sc);
2056
2057         carp_hmac_prepare(sc);
2058
2059         return (error);
2060 }
2061
2062 /*
2063  * XXX: this is looutput. We should eventually use it from there.
2064  */
2065 static int
2066 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2067     struct route *ro)
2068 {
2069         u_int32_t af;
2070         struct rtentry *rt = NULL;
2071
2072         M_ASSERTPKTHDR(m); /* check if we have the packet header */
2073
2074         if (ro != NULL)
2075                 rt = ro->ro_rt;
2076         if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2077                 m_freem(m);
2078                 return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
2079                         rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
2080         }
2081
2082         ifp->if_opackets++;
2083         ifp->if_obytes += m->m_pkthdr.len;
2084
2085         /* BPF writes need to be handled specially. */
2086         if (dst->sa_family == AF_UNSPEC) {
2087                 bcopy(dst->sa_data, &af, sizeof(af));
2088                 dst->sa_family = af;
2089         }
2090
2091 #if 1   /* XXX */
2092         switch (dst->sa_family) {
2093         case AF_INET:
2094         case AF_INET6:
2095         case AF_IPX:
2096         case AF_APPLETALK:
2097                 break;
2098         default:
2099                 printf("carp_looutput: af=%d unexpected\n", dst->sa_family);
2100                 m_freem(m);
2101                 return (EAFNOSUPPORT);
2102         }
2103 #endif
2104         return(if_simloop(ifp, m, dst->sa_family, 0));
2105 }
2106
2107 /*
2108  * Start output on carp interface. This function should never be called.
2109  */
2110 static void
2111 carp_start(struct ifnet *ifp)
2112 {
2113 #ifdef DEBUG
2114         printf("%s: start called\n", ifp->if_xname);
2115 #endif
2116 }
2117
2118 int
2119 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2120     struct rtentry *rt)
2121 {
2122         struct m_tag *mtag;
2123         struct carp_softc *sc;
2124         struct ifnet *carp_ifp;
2125
2126         if (!sa)
2127                 return (0);
2128
2129         switch (sa->sa_family) {
2130 #ifdef INET
2131         case AF_INET:
2132                 break;
2133 #endif /* INET */
2134 #ifdef INET6
2135         case AF_INET6:
2136                 break;
2137 #endif /* INET6 */
2138         default:
2139                 return (0);
2140         }
2141
2142         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
2143         if (mtag == NULL)
2144                 return (0);
2145
2146         bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
2147         sc = carp_ifp->if_softc;
2148
2149         /* Set the source MAC address to Virtual Router MAC Address */
2150         switch (ifp->if_type) {
2151         case IFT_ETHER:
2152         case IFT_L2VLAN: {
2153                         struct ether_header *eh;
2154
2155                         eh = mtod(m, struct ether_header *);
2156                         eh->ether_shost[0] = 0;
2157                         eh->ether_shost[1] = 0;
2158                         eh->ether_shost[2] = 0x5e;
2159                         eh->ether_shost[3] = 0;
2160                         eh->ether_shost[4] = 1;
2161                         eh->ether_shost[5] = sc->sc_vhid;
2162                 }
2163                 break;
2164         case IFT_FDDI: {
2165                         struct fddi_header *fh;
2166
2167                         fh = mtod(m, struct fddi_header *);
2168                         fh->fddi_shost[0] = 0;
2169                         fh->fddi_shost[1] = 0;
2170                         fh->fddi_shost[2] = 0x5e;
2171                         fh->fddi_shost[3] = 0;
2172                         fh->fddi_shost[4] = 1;
2173                         fh->fddi_shost[5] = sc->sc_vhid;
2174                 }
2175                 break;
2176         case IFT_ISO88025: {
2177                         struct iso88025_header *th;
2178                         th = mtod(m, struct iso88025_header *);
2179                         th->iso88025_shost[0] = 3;
2180                         th->iso88025_shost[1] = 0;
2181                         th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
2182                         th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
2183                         th->iso88025_shost[4] = 0;
2184                         th->iso88025_shost[5] = 0;
2185                 }
2186                 break;
2187         default:
2188                 printf("%s: carp is not supported for this interface type\n",
2189                     ifp->if_xname);
2190                 return (EOPNOTSUPP);
2191         }
2192
2193         return (0);
2194 }
2195
2196 static void
2197 carp_set_state(struct carp_softc *sc, int state)
2198 {
2199         int link_state;
2200
2201         if (sc->sc_carpdev)
2202                 CARP_SCLOCK_ASSERT(sc);
2203
2204         if (sc->sc_state == state)
2205                 return;
2206
2207         sc->sc_state = state;
2208         switch (state) {
2209         case BACKUP:
2210                 link_state = LINK_STATE_DOWN;
2211                 break;
2212         case MASTER:
2213                 link_state = LINK_STATE_UP;
2214                 break;
2215         default:
2216                 link_state = LINK_STATE_UNKNOWN;
2217                 break;
2218         }
2219         if_link_state_change(SC2IFP(sc), link_state);
2220 }
2221
2222 void
2223 carp_carpdev_state(struct ifnet *ifp)
2224 {
2225         struct carp_if *cif;
2226
2227         cif = ifp->if_carp;
2228         CARP_LOCK(cif);
2229         carp_carpdev_state_locked(cif);
2230         CARP_UNLOCK(cif);
2231 }
2232
2233 static void
2234 carp_carpdev_state_locked(struct carp_if *cif)
2235 {
2236         struct carp_softc *sc;
2237
2238         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2239                 carp_sc_state_locked(sc);
2240 }
2241
2242 static void
2243 carp_sc_state_locked(struct carp_softc *sc)
2244 {
2245         CARP_SCLOCK_ASSERT(sc);
2246
2247         if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
2248             !(sc->sc_carpdev->if_flags & IFF_UP)) {
2249                 sc->sc_flags_backup = SC2IFP(sc)->if_flags;
2250                 SC2IFP(sc)->if_flags &= ~IFF_UP;
2251                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
2252                 callout_stop(&sc->sc_ad_tmo);
2253                 callout_stop(&sc->sc_md_tmo);
2254                 callout_stop(&sc->sc_md6_tmo);
2255                 carp_set_state(sc, INIT);
2256                 carp_setrun(sc, 0);
2257                 if (!sc->sc_suppress) {
2258                         carp_suppress_preempt++;
2259                         if (carp_suppress_preempt == 1) {
2260                                 CARP_SCUNLOCK(sc);
2261                                 carp_send_ad_all();
2262                                 CARP_SCLOCK(sc);
2263                         }
2264                 }
2265                 sc->sc_suppress = 1;
2266         } else {
2267                 SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
2268                 carp_set_state(sc, INIT);
2269                 carp_setrun(sc, 0);
2270                 if (sc->sc_suppress)
2271                         carp_suppress_preempt--;
2272                 sc->sc_suppress = 0;
2273         }
2274
2275         return;
2276 }
2277
2278 #ifdef INET
2279 extern  struct domain inetdomain;
2280 static struct protosw in_carp_protosw = {
2281         .pr_type =              SOCK_RAW,
2282         .pr_domain =            &inetdomain,
2283         .pr_protocol =          IPPROTO_CARP,
2284         .pr_flags =             PR_ATOMIC|PR_ADDR,
2285         .pr_input =             carp_input,
2286         .pr_output =            (pr_output_t *)rip_output,
2287         .pr_ctloutput =         rip_ctloutput,
2288         .pr_usrreqs =           &rip_usrreqs
2289 };
2290 #endif
2291
2292 #ifdef INET6
2293 extern  struct domain inet6domain;
2294 static struct ip6protosw in6_carp_protosw = {
2295         .pr_type =              SOCK_RAW,
2296         .pr_domain =            &inet6domain,
2297         .pr_protocol =          IPPROTO_CARP,
2298         .pr_flags =             PR_ATOMIC|PR_ADDR,
2299         .pr_input =             carp6_input,
2300         .pr_output =            rip6_output,
2301         .pr_ctloutput =         rip6_ctloutput,
2302         .pr_usrreqs =           &rip6_usrreqs
2303 };
2304 #endif
2305
2306 static void
2307 carp_mod_cleanup(void)
2308 {
2309
2310         if (if_detach_event_tag == NULL)
2311                 return;
2312         EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
2313         if_clone_detach(&carp_cloner);
2314 #ifdef INET
2315         if (proto_reg[CARP_INET] == 0) {
2316                 (void)ipproto_unregister(IPPROTO_CARP);
2317                 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW);
2318                 proto_reg[CARP_INET] = -1;
2319         }
2320         carp_iamatch_p = NULL;
2321 #endif
2322 #ifdef INET6
2323         if (proto_reg[CARP_INET6] == 0) {
2324                 (void)ip6proto_unregister(IPPROTO_CARP);
2325                 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW);
2326                 proto_reg[CARP_INET6] = -1;
2327         }
2328         carp_iamatch6_p = NULL;
2329         carp_macmatch6_p = NULL;
2330 #endif
2331         carp_linkstate_p = NULL;
2332         carp_forus_p = NULL;
2333         carp_output_p = NULL;
2334         mtx_destroy(&carp_mtx);
2335 }
2336
2337 static int
2338 carp_mod_load(void)
2339 {
2340         int err;
2341
2342         if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
2343                 carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
2344         if (if_detach_event_tag == NULL)
2345                 return (ENOMEM);
2346         mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
2347         LIST_INIT(&carpif_list);
2348         if_clone_attach(&carp_cloner);
2349         carp_linkstate_p = carp_carpdev_state;
2350         carp_forus_p = carp_forus;
2351         carp_output_p = carp_output;
2352 #ifdef INET6
2353         carp_iamatch6_p = carp_iamatch6;
2354         carp_macmatch6_p = carp_macmatch6;
2355         proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
2356             (struct protosw *)&in6_carp_protosw);
2357         if (proto_reg[CARP_INET6] != 0) {
2358                 printf("carp: error %d attaching to PF_INET6\n",
2359                     proto_reg[CARP_INET6]);
2360                 carp_mod_cleanup();
2361                 return (EINVAL);
2362         }
2363         err = ip6proto_register(IPPROTO_CARP);
2364         if (err) {
2365                 printf("carp: error %d registering with INET6\n", err);
2366                 carp_mod_cleanup();
2367                 return (EINVAL);
2368         }
2369 #endif
2370 #ifdef INET
2371         carp_iamatch_p = carp_iamatch;
2372         proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
2373         if (proto_reg[CARP_INET] != 0) {
2374                 printf("carp: error %d attaching to PF_INET\n",
2375                     proto_reg[CARP_INET]);
2376                 carp_mod_cleanup();
2377                 return (EINVAL);
2378         }
2379         err = ipproto_register(IPPROTO_CARP);
2380         if (err) {
2381                 printf("carp: error %d registering with INET\n", err);
2382                 carp_mod_cleanup();
2383                 return (EINVAL);
2384         }
2385 #endif
2386         return 0;
2387 }
2388
2389 static int
2390 carp_modevent(module_t mod, int type, void *data)
2391 {
2392         switch (type) {
2393         case MOD_LOAD:
2394                 return carp_mod_load();
2395                 /* NOTREACHED */
2396         case MOD_UNLOAD:
2397                 /*
2398                  * XXX: For now, disallow module unloading by default due to
2399                  * a race condition where a thread may dereference one of the
2400                  * function pointer hooks after the module has been
2401                  * unloaded, during processing of a packet, causing a panic.
2402                  */
2403 #ifdef CARPMOD_CAN_UNLOAD
2404                 carp_mod_cleanup();
2405 #else
2406                 return (EBUSY);
2407 #endif
2408                 break;
2409
2410         default:
2411                 return (EINVAL);
2412         }
2413
2414         return (0);
2415 }
2416
2417 static moduledata_t carp_mod = {
2418         "carp",
2419         carp_modevent,
2420         0
2421 };
2422
2423 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);