]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/ip_carp.c
MFV r251624:
[FreeBSD/FreeBSD.git] / sys / netinet / ip_carp.c
1 /*-
2  * Copyright (c) 2002 Michael Shalayeff.
3  * Copyright (c) 2003 Ryan McBride.
4  * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include "opt_bpf.h"
33 #include "opt_inet.h"
34 #include "opt_inet6.h"
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/bus.h>
39 #include <sys/jail.h>
40 #include <sys/kernel.h>
41 #include <sys/limits.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/module.h>
45 #include <sys/priv.h>
46 #include <sys/proc.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/sockio.h>
50 #include <sys/sysctl.h>
51 #include <sys/syslog.h>
52 #include <sys/taskqueue.h>
53
54 #include <net/ethernet.h>
55 #include <net/fddi.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_llatbl.h>
59 #include <net/if_types.h>
60 #include <net/iso88025.h>
61 #include <net/route.h>
62 #include <net/vnet.h>
63
64 #if defined(INET) || defined(INET6)
65 #include <netinet/in.h>
66 #include <netinet/in_var.h>
67 #include <netinet/ip_carp.h>
68 #include <netinet/ip.h>
69 #include <machine/in_cksum.h>
70 #endif
71 #ifdef INET
72 #include <netinet/ip_var.h>
73 #include <netinet/if_ether.h>
74 #endif
75
76 #ifdef INET6
77 #include <netinet/icmp6.h>
78 #include <netinet/ip6.h>
79 #include <netinet6/ip6protosw.h>
80 #include <netinet6/in6_var.h>
81 #include <netinet6/ip6_var.h>
82 #include <netinet6/scope6_var.h>
83 #include <netinet6/nd6.h>
84 #endif
85
86 #include <crypto/sha1.h>
87
88 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
89
90 struct carp_softc {
91         struct ifnet            *sc_carpdev;    /* Pointer to parent ifnet. */
92         struct ifaddr           **sc_ifas;      /* Our ifaddrs. */
93         struct sockaddr_dl      sc_addr;        /* Our link level address. */
94         struct callout          sc_ad_tmo;      /* Advertising timeout. */
95 #ifdef INET
96         struct callout          sc_md_tmo;      /* Master down timeout. */
97 #endif
98 #ifdef INET6
99         struct callout          sc_md6_tmo;     /* XXX: Master down timeout. */
100 #endif
101         struct mtx              sc_mtx;
102
103         int                     sc_vhid;
104         int                     sc_advskew;
105         int                     sc_advbase;
106
107         int                     sc_naddrs;
108         int                     sc_naddrs6;
109         int                     sc_ifasiz;
110         enum { INIT = 0, BACKUP, MASTER }       sc_state;
111         int                     sc_suppress;
112         int                     sc_sendad_errors;
113 #define CARP_SENDAD_MAX_ERRORS  3
114         int                     sc_sendad_success;
115 #define CARP_SENDAD_MIN_SUCCESS 3
116
117         int                     sc_init_counter;
118         uint64_t                sc_counter;
119
120         /* authentication */
121 #define CARP_HMAC_PAD   64
122         unsigned char sc_key[CARP_KEY_LEN];
123         unsigned char sc_pad[CARP_HMAC_PAD];
124         SHA1_CTX sc_sha1;
125
126         TAILQ_ENTRY(carp_softc) sc_list;        /* On the carp_if list. */
127         LIST_ENTRY(carp_softc)  sc_next;        /* On the global list. */
128 };
129
130 struct carp_if {
131 #ifdef INET
132         int     cif_naddrs;
133 #endif
134 #ifdef INET6
135         int     cif_naddrs6;
136 #endif
137         TAILQ_HEAD(, carp_softc) cif_vrs;
138 #ifdef INET
139         struct ip_moptions       cif_imo;
140 #endif
141 #ifdef INET6
142         struct ip6_moptions      cif_im6o;
143 #endif
144         struct ifnet    *cif_ifp;
145         struct mtx      cif_mtx;
146 };
147
148 #define CARP_INET       0
149 #define CARP_INET6      1
150 static int proto_reg[] = {-1, -1};
151
152 /*
153  * Brief design of carp(4).
154  *
155  * Any carp-capable ifnet may have a list of carp softcs hanging off
156  * its ifp->if_carp pointer. Each softc represents one unique virtual
157  * host id, or vhid. The softc has a back pointer to the ifnet. All
158  * softcs are joined in a global list, which has quite limited use.
159  *
160  * Any interface address that takes part in CARP negotiation has a
161  * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
162  * AF_INET or AF_INET6 address.
163  *
164  * Although, one can get the softc's backpointer to ifnet and traverse
165  * through its ifp->if_addrhead queue to find all interface addresses
166  * involved in CARP, we keep a growable array of ifaddr pointers. This
167  * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
168  * do calls into the network stack, thus avoiding LORs.
169  *
170  * Locking:
171  *
172  * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
173  * callout-driven events and ioctl()s.
174  *
175  * To traverse the list of softcs on an ifnet we use CIF_LOCK(), to
176  * traverse the global list we use the mutex carp_mtx.
177  *
178  * Known issues with locking:
179  *
180  * - There is no protection for races between two ioctl() requests,
181  *   neither SIOCSVH, nor SIOCAIFADDR & SIOCAIFADDR_IN6. I think that all
182  *   interface ioctl()s should be serialized right in net/if.c.
183  * - Sending ad, we put the pointer to the softc in an mtag, and no reference
184  *   counting is done on the softc.
185  * - On module unload we may race (?) with packet processing thread
186  *   dereferencing our function pointers.
187  */
188
189 static int carp_allow = 1;              /* Accept incoming CARP packets. */
190 static int carp_preempt = 0;            /* Preempt slower nodes. */
191 static int carp_log = 1;                /* Log level. */
192 static int carp_demotion = 0;           /* Global advskew demotion. */
193 static int carp_senderr_adj = CARP_MAXSKEW;     /* Send error demotion factor */
194 static int carp_ifdown_adj = CARP_MAXSKEW;      /* Iface down demotion factor */
195 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
196
197 SYSCTL_NODE(_net_inet, IPPROTO_CARP,    carp,   CTLFLAG_RW, 0,  "CARP");
198 SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW, &carp_allow, 0,
199     "Accept incoming CARP packets");
200 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW, &carp_preempt, 0,
201     "High-priority backup preemption mode");
202 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW, &carp_log, 0,
203     "CARP log level");
204 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW,
205     0, 0, carp_demote_adj_sysctl, "I",
206     "Adjust demotion factor (skew of advskew)");
207 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW,
208     &carp_senderr_adj, 0, "Send error demotion factor adjustment");
209 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW,
210     &carp_ifdown_adj, 0, "Interface down demotion factor adjustment");
211
212 static struct carpstats carpstats;
213 SYSCTL_STRUCT(_net_inet_carp, OID_AUTO, stats, CTLFLAG_RW, &carpstats,
214     carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
215
216 #define CARP_LOCK_INIT(sc)      mtx_init(&(sc)->sc_mtx, "carp_softc",   \
217         NULL, MTX_DEF)
218 #define CARP_LOCK_DESTROY(sc)   mtx_destroy(&(sc)->sc_mtx)
219 #define CARP_LOCK_ASSERT(sc)    mtx_assert(&(sc)->sc_mtx, MA_OWNED)
220 #define CARP_LOCK(sc)           mtx_lock(&(sc)->sc_mtx)
221 #define CARP_UNLOCK(sc)         mtx_unlock(&(sc)->sc_mtx)
222 #define CIF_LOCK_INIT(cif)      mtx_init(&(cif)->cif_mtx, "carp_if",   \
223         NULL, MTX_DEF)
224 #define CIF_LOCK_DESTROY(cif)   mtx_destroy(&(cif)->cif_mtx)
225 #define CIF_LOCK_ASSERT(cif)    mtx_assert(&(cif)->cif_mtx, MA_OWNED)
226 #define CIF_LOCK(cif)           mtx_lock(&(cif)->cif_mtx)
227 #define CIF_UNLOCK(cif)         mtx_unlock(&(cif)->cif_mtx)
228 #define CIF_FREE(cif)   do {                            \
229                 CIF_LOCK_ASSERT(cif);                   \
230                 if (TAILQ_EMPTY(&(cif)->cif_vrs))       \
231                         carp_free_if(cif);              \
232                 else                                    \
233                         CIF_UNLOCK(cif);                \
234 } while (0)
235
236 #define CARP_LOG(...)   do {                            \
237         if (carp_log > 0)                               \
238                 log(LOG_INFO, "carp: " __VA_ARGS__);    \
239 } while (0)
240
241 #define CARP_DEBUG(...) do {                            \
242         if (carp_log > 1)                               \
243                 log(LOG_DEBUG, __VA_ARGS__);            \
244 } while (0)
245
246 #define IFNET_FOREACH_IFA(ifp, ifa)                                     \
247         IF_ADDR_LOCK_ASSERT(ifp);                                       \
248         TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link)             \
249                 if ((ifa)->ifa_carp != NULL)
250
251 #define CARP_FOREACH_IFA(sc, ifa)                                       \
252         CARP_LOCK_ASSERT(sc);                                           \
253         for (int _i = 0;                                                \
254                 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 &&              \
255                 ((ifa) = sc->sc_ifas[_i]) != NULL;                      \
256                 ++_i)
257
258 #define IFNET_FOREACH_CARP(ifp, sc)                                     \
259         CIF_LOCK_ASSERT(ifp->if_carp);                                  \
260         TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
261
262 #define DEMOTE_ADVSKEW(sc)                                      \
263     (((sc)->sc_advskew + carp_demotion > CARP_MAXSKEW) ?        \
264     CARP_MAXSKEW : ((sc)->sc_advskew + carp_demotion))
265
266 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
267 static struct carp_softc
268                 *carp_alloc(struct ifnet *);
269 static void     carp_detach_locked(struct ifaddr *);
270 static void     carp_destroy(struct carp_softc *);
271 static struct carp_if
272                 *carp_alloc_if(struct ifnet *);
273 static void     carp_free_if(struct carp_if *);
274 static void     carp_set_state(struct carp_softc *, int);
275 static void     carp_sc_state(struct carp_softc *);
276 static void     carp_setrun(struct carp_softc *, sa_family_t);
277 static void     carp_master_down(void *);
278 static void     carp_master_down_locked(struct carp_softc *);
279 static void     carp_send_ad(void *);
280 static void     carp_send_ad_locked(struct carp_softc *);
281 static void     carp_addroute(struct carp_softc *);
282 static void     carp_ifa_addroute(struct ifaddr *);
283 static void     carp_delroute(struct carp_softc *);
284 static void     carp_ifa_delroute(struct ifaddr *);
285 static void     carp_send_ad_all(void *, int);
286 static void     carp_demote_adj(int, char *);
287
288 static LIST_HEAD(, carp_softc) carp_list;
289 static struct mtx carp_mtx;
290 static struct task carp_sendall_task =
291     TASK_INITIALIZER(0, carp_send_ad_all, NULL);
292
293 static void
294 carp_hmac_prepare(struct carp_softc *sc)
295 {
296         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
297         uint8_t vhid = sc->sc_vhid & 0xff;
298         struct ifaddr *ifa;
299         int i, found;
300 #ifdef INET
301         struct in_addr last, cur, in;
302 #endif
303 #ifdef INET6
304         struct in6_addr last6, cur6, in6;
305 #endif
306
307         CARP_LOCK_ASSERT(sc);
308
309         /* Compute ipad from key. */
310         bzero(sc->sc_pad, sizeof(sc->sc_pad));
311         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
312         for (i = 0; i < sizeof(sc->sc_pad); i++)
313                 sc->sc_pad[i] ^= 0x36;
314
315         /* Precompute first part of inner hash. */
316         SHA1Init(&sc->sc_sha1);
317         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
318         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
319         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
320         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
321 #ifdef INET
322         cur.s_addr = 0;
323         do {
324                 found = 0;
325                 last = cur;
326                 cur.s_addr = 0xffffffff;
327                 CARP_FOREACH_IFA(sc, ifa) {
328                         in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
329                         if (ifa->ifa_addr->sa_family == AF_INET &&
330                             ntohl(in.s_addr) > ntohl(last.s_addr) &&
331                             ntohl(in.s_addr) < ntohl(cur.s_addr)) {
332                                 cur.s_addr = in.s_addr;
333                                 found++;
334                         }
335                 }
336                 if (found)
337                         SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
338         } while (found);
339 #endif /* INET */
340 #ifdef INET6
341         memset(&cur6, 0, sizeof(cur6));
342         do {
343                 found = 0;
344                 last6 = cur6;
345                 memset(&cur6, 0xff, sizeof(cur6));
346                 CARP_FOREACH_IFA(sc, ifa) {
347                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
348                         if (IN6_IS_SCOPE_EMBED(&in6))
349                                 in6.s6_addr16[1] = 0;
350                         if (ifa->ifa_addr->sa_family == AF_INET6 &&
351                             memcmp(&in6, &last6, sizeof(in6)) > 0 &&
352                             memcmp(&in6, &cur6, sizeof(in6)) < 0) {
353                                 cur6 = in6;
354                                 found++;
355                         }
356                 }
357                 if (found)
358                         SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
359         } while (found);
360 #endif /* INET6 */
361
362         /* convert ipad to opad */
363         for (i = 0; i < sizeof(sc->sc_pad); i++)
364                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
365 }
366
367 static void
368 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
369     unsigned char md[20])
370 {
371         SHA1_CTX sha1ctx;
372
373         CARP_LOCK_ASSERT(sc);
374
375         /* fetch first half of inner hash */
376         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
377
378         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
379         SHA1Final(md, &sha1ctx);
380
381         /* outer hash */
382         SHA1Init(&sha1ctx);
383         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
384         SHA1Update(&sha1ctx, md, 20);
385         SHA1Final(md, &sha1ctx);
386 }
387
388 static int
389 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
390     unsigned char md[20])
391 {
392         unsigned char md2[20];
393
394         CARP_LOCK_ASSERT(sc);
395
396         carp_hmac_generate(sc, counter, md2);
397
398         return (bcmp(md, md2, sizeof(md2)));
399 }
400
401 /*
402  * process input packet.
403  * we have rearranged checks order compared to the rfc,
404  * but it seems more efficient this way or not possible otherwise.
405  */
406 #ifdef INET
407 void
408 carp_input(struct mbuf *m, int hlen)
409 {
410         struct ip *ip = mtod(m, struct ip *);
411         struct carp_header *ch;
412         int iplen, len;
413
414         CARPSTATS_INC(carps_ipackets);
415
416         if (!carp_allow) {
417                 m_freem(m);
418                 return;
419         }
420
421         /* verify that the IP TTL is 255.  */
422         if (ip->ip_ttl != CARP_DFLTTL) {
423                 CARPSTATS_INC(carps_badttl);
424                 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
425                     ip->ip_ttl,
426                     m->m_pkthdr.rcvif->if_xname);
427                 m_freem(m);
428                 return;
429         }
430
431         iplen = ip->ip_hl << 2;
432
433         if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
434                 CARPSTATS_INC(carps_badlen);
435                 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
436                     "on %s\n", __func__, m->m_len - sizeof(struct ip),
437                     m->m_pkthdr.rcvif->if_xname);
438                 m_freem(m);
439                 return;
440         }
441
442         if (iplen + sizeof(*ch) < m->m_len) {
443                 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
444                         CARPSTATS_INC(carps_hdrops);
445                         CARP_DEBUG("%s: pullup failed\n", __func__);
446                         return;
447                 }
448                 ip = mtod(m, struct ip *);
449         }
450         ch = (struct carp_header *)((char *)ip + iplen);
451
452         /*
453          * verify that the received packet length is
454          * equal to the CARP header
455          */
456         len = iplen + sizeof(*ch);
457         if (len > m->m_pkthdr.len) {
458                 CARPSTATS_INC(carps_badlen);
459                 CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
460                     m->m_pkthdr.len,
461                     m->m_pkthdr.rcvif->if_xname);
462                 m_freem(m);
463                 return;
464         }
465
466         if ((m = m_pullup(m, len)) == NULL) {
467                 CARPSTATS_INC(carps_hdrops);
468                 return;
469         }
470         ip = mtod(m, struct ip *);
471         ch = (struct carp_header *)((char *)ip + iplen);
472
473         /* verify the CARP checksum */
474         m->m_data += iplen;
475         if (in_cksum(m, len - iplen)) {
476                 CARPSTATS_INC(carps_badsum);
477                 CARP_DEBUG("%s: checksum failed on %s\n", __func__,
478                     m->m_pkthdr.rcvif->if_xname);
479                 m_freem(m);
480                 return;
481         }
482         m->m_data -= iplen;
483
484         carp_input_c(m, ch, AF_INET);
485 }
486 #endif
487
488 #ifdef INET6
489 int
490 carp6_input(struct mbuf **mp, int *offp, int proto)
491 {
492         struct mbuf *m = *mp;
493         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
494         struct carp_header *ch;
495         u_int len;
496
497         CARPSTATS_INC(carps_ipackets6);
498
499         if (!carp_allow) {
500                 m_freem(m);
501                 return (IPPROTO_DONE);
502         }
503
504         /* check if received on a valid carp interface */
505         if (m->m_pkthdr.rcvif->if_carp == NULL) {
506                 CARPSTATS_INC(carps_badif);
507                 CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
508                     __func__, m->m_pkthdr.rcvif->if_xname);
509                 m_freem(m);
510                 return (IPPROTO_DONE);
511         }
512
513         /* verify that the IP TTL is 255 */
514         if (ip6->ip6_hlim != CARP_DFLTTL) {
515                 CARPSTATS_INC(carps_badttl);
516                 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
517                     ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname);
518                 m_freem(m);
519                 return (IPPROTO_DONE);
520         }
521
522         /* verify that we have a complete carp packet */
523         len = m->m_len;
524         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
525         if (ch == NULL) {
526                 CARPSTATS_INC(carps_badlen);
527                 CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
528                 return (IPPROTO_DONE);
529         }
530
531
532         /* verify the CARP checksum */
533         m->m_data += *offp;
534         if (in_cksum(m, sizeof(*ch))) {
535                 CARPSTATS_INC(carps_badsum);
536                 CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
537                     m->m_pkthdr.rcvif->if_xname);
538                 m_freem(m);
539                 return (IPPROTO_DONE);
540         }
541         m->m_data -= *offp;
542
543         carp_input_c(m, ch, AF_INET6);
544         return (IPPROTO_DONE);
545 }
546 #endif /* INET6 */
547
548 static void
549 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
550 {
551         struct ifnet *ifp = m->m_pkthdr.rcvif;
552         struct ifaddr *ifa;
553         struct carp_softc *sc;
554         uint64_t tmp_counter;
555         struct timeval sc_tv, ch_tv;
556
557         /* verify that the VHID is valid on the receiving interface */
558         IF_ADDR_RLOCK(ifp);
559         IFNET_FOREACH_IFA(ifp, ifa)
560                 if (ifa->ifa_addr->sa_family == af &&
561                     ifa->ifa_carp->sc_vhid == ch->carp_vhid) {
562                         ifa_ref(ifa);
563                         break;
564                 }
565         IF_ADDR_RUNLOCK(ifp);
566
567         if (ifa == NULL) {
568                 CARPSTATS_INC(carps_badvhid);
569                 m_freem(m);
570                 return;
571         }
572
573         /* verify the CARP version. */
574         if (ch->carp_version != CARP_VERSION) {
575                 CARPSTATS_INC(carps_badver);
576                 CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname,
577                     ch->carp_version);
578                 ifa_free(ifa);
579                 m_freem(m);
580                 return;
581         }
582
583         sc = ifa->ifa_carp;
584         CARP_LOCK(sc);
585         ifa_free(ifa);
586
587         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
588                 CARPSTATS_INC(carps_badauth);
589                 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
590                     sc->sc_vhid, ifp->if_xname);
591                 goto out;
592         }
593
594         tmp_counter = ntohl(ch->carp_counter[0]);
595         tmp_counter = tmp_counter<<32;
596         tmp_counter += ntohl(ch->carp_counter[1]);
597
598         /* XXX Replay protection goes here */
599
600         sc->sc_init_counter = 0;
601         sc->sc_counter = tmp_counter;
602
603         sc_tv.tv_sec = sc->sc_advbase;
604         sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
605         ch_tv.tv_sec = ch->carp_advbase;
606         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
607
608         switch (sc->sc_state) {
609         case INIT:
610                 break;
611         case MASTER:
612                 /*
613                  * If we receive an advertisement from a master who's going to
614                  * be more frequent than us, go into BACKUP state.
615                  */
616                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
617                     timevalcmp(&sc_tv, &ch_tv, ==)) {
618                         callout_stop(&sc->sc_ad_tmo);
619                         CARP_LOG("VHID %u@%s: MASTER -> BACKUP "
620                             "(more frequent advertisement received)\n",
621                             sc->sc_vhid,
622                             sc->sc_carpdev->if_xname);
623                         carp_set_state(sc, BACKUP);
624                         carp_setrun(sc, 0);
625                         carp_delroute(sc);
626                 }
627                 break;
628         case BACKUP:
629                 /*
630                  * If we're pre-empting masters who advertise slower than us,
631                  * and this one claims to be slower, treat him as down.
632                  */
633                 if (carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
634                         CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
635                             "(preempting a slower master)\n",
636                             sc->sc_vhid,
637                             sc->sc_carpdev->if_xname);
638                         carp_master_down_locked(sc);
639                         break;
640                 }
641
642                 /*
643                  *  If the master is going to advertise at such a low frequency
644                  *  that he's guaranteed to time out, we'd might as well just
645                  *  treat him as timed out now.
646                  */
647                 sc_tv.tv_sec = sc->sc_advbase * 3;
648                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
649                         CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
650                             "(master timed out)\n",
651                             sc->sc_vhid,
652                             sc->sc_carpdev->if_xname);
653                         carp_master_down_locked(sc);
654                         break;
655                 }
656
657                 /*
658                  * Otherwise, we reset the counter and wait for the next
659                  * advertisement.
660                  */
661                 carp_setrun(sc, af);
662                 break;
663         }
664
665 out:
666         CARP_UNLOCK(sc);
667         m_freem(m);
668 }
669
670 static int
671 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
672 {
673         struct m_tag *mtag;
674
675         if (sc->sc_init_counter) {
676                 /* this could also be seconds since unix epoch */
677                 sc->sc_counter = arc4random();
678                 sc->sc_counter = sc->sc_counter << 32;
679                 sc->sc_counter += arc4random();
680         } else
681                 sc->sc_counter++;
682
683         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
684         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
685
686         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
687
688         /* Tag packet for carp_output */
689         if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
690             M_NOWAIT)) == NULL) {
691                 m_freem(m);
692                 CARPSTATS_INC(carps_onomem);
693                 return (ENOMEM);
694         }
695         bcopy(&sc, mtag + 1, sizeof(sc));
696         m_tag_prepend(m, mtag);
697
698         return (0);
699 }
700
701 /*
702  * To avoid LORs and possible recursions this function shouldn't
703  * be called directly, but scheduled via taskqueue.
704  */
705 static void
706 carp_send_ad_all(void *ctx __unused, int pending __unused)
707 {
708         struct carp_softc *sc;
709
710         mtx_lock(&carp_mtx);
711         LIST_FOREACH(sc, &carp_list, sc_next)
712                 if (sc->sc_state == MASTER) {
713                         CARP_LOCK(sc);
714                         CURVNET_SET(sc->sc_carpdev->if_vnet);
715                         carp_send_ad_locked(sc);
716                         CURVNET_RESTORE();
717                         CARP_UNLOCK(sc);
718                 }
719         mtx_unlock(&carp_mtx);
720 }
721
722 /* Send a periodic advertisement, executed in callout context. */
723 static void
724 carp_send_ad(void *v)
725 {
726         struct carp_softc *sc = v;
727
728         CARP_LOCK_ASSERT(sc);
729         CURVNET_SET(sc->sc_carpdev->if_vnet);
730         carp_send_ad_locked(sc);
731         CURVNET_RESTORE();
732         CARP_UNLOCK(sc);
733 }
734
735 static void
736 carp_send_ad_locked(struct carp_softc *sc)
737 {
738         struct carp_header ch;
739         struct timeval tv;
740         struct sockaddr sa;
741         struct ifaddr *ifa;
742         struct carp_header *ch_ptr;
743         struct mbuf *m;
744         int len, advskew;
745
746         CARP_LOCK_ASSERT(sc);
747
748         advskew = DEMOTE_ADVSKEW(sc);
749         tv.tv_sec = sc->sc_advbase;
750         tv.tv_usec = advskew * 1000000 / 256;
751
752         ch.carp_version = CARP_VERSION;
753         ch.carp_type = CARP_ADVERTISEMENT;
754         ch.carp_vhid = sc->sc_vhid;
755         ch.carp_advbase = sc->sc_advbase;
756         ch.carp_advskew = advskew;
757         ch.carp_authlen = 7;    /* XXX DEFINE */
758         ch.carp_pad1 = 0;       /* must be zero */
759         ch.carp_cksum = 0;
760
761         /* XXXGL: OpenBSD picks first ifaddr with needed family. */
762
763 #ifdef INET
764         if (sc->sc_naddrs) {
765                 struct ip *ip;
766
767                 m = m_gethdr(M_NOWAIT, MT_DATA);
768                 if (m == NULL) {
769                         CARPSTATS_INC(carps_onomem);
770                         goto resched;
771                 }
772                 len = sizeof(*ip) + sizeof(ch);
773                 m->m_pkthdr.len = len;
774                 m->m_pkthdr.rcvif = NULL;
775                 m->m_len = len;
776                 MH_ALIGN(m, m->m_len);
777                 m->m_flags |= M_MCAST;
778                 ip = mtod(m, struct ip *);
779                 ip->ip_v = IPVERSION;
780                 ip->ip_hl = sizeof(*ip) >> 2;
781                 ip->ip_tos = IPTOS_LOWDELAY;
782                 ip->ip_len = htons(len);
783                 ip->ip_id = ip_newid();
784                 ip->ip_off = htons(IP_DF);
785                 ip->ip_ttl = CARP_DFLTTL;
786                 ip->ip_p = IPPROTO_CARP;
787                 ip->ip_sum = 0;
788
789                 bzero(&sa, sizeof(sa));
790                 sa.sa_family = AF_INET;
791                 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
792                 if (ifa != NULL) {
793                         ip->ip_src.s_addr =
794                             ifatoia(ifa)->ia_addr.sin_addr.s_addr;
795                         ifa_free(ifa);
796                 } else
797                         ip->ip_src.s_addr = 0;
798                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
799
800                 ch_ptr = (struct carp_header *)(&ip[1]);
801                 bcopy(&ch, ch_ptr, sizeof(ch));
802                 if (carp_prepare_ad(m, sc, ch_ptr))
803                         goto resched;
804
805                 m->m_data += sizeof(*ip);
806                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
807                 m->m_data -= sizeof(*ip);
808
809                 CARPSTATS_INC(carps_opackets);
810
811                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT,
812                     &sc->sc_carpdev->if_carp->cif_imo, NULL)) {
813                         if (sc->sc_sendad_errors < INT_MAX)
814                                 sc->sc_sendad_errors++;
815                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS)
816                                 carp_demote_adj(carp_senderr_adj, "send error");
817                         sc->sc_sendad_success = 0;
818                 } else {
819                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
820                                 if (++sc->sc_sendad_success >=
821                                     CARP_SENDAD_MIN_SUCCESS) {
822                                         carp_demote_adj(-carp_senderr_adj,
823                                             "send ok");
824                                         sc->sc_sendad_errors = 0;
825                                 }
826                         } else
827                                 sc->sc_sendad_errors = 0;
828                 }
829         }
830 #endif /* INET */
831 #ifdef INET6
832         if (sc->sc_naddrs6) {
833                 struct ip6_hdr *ip6;
834
835                 m = m_gethdr(M_NOWAIT, MT_DATA);
836                 if (m == NULL) {
837                         CARPSTATS_INC(carps_onomem);
838                         goto resched;
839                 }
840                 len = sizeof(*ip6) + sizeof(ch);
841                 m->m_pkthdr.len = len;
842                 m->m_pkthdr.rcvif = NULL;
843                 m->m_len = len;
844                 MH_ALIGN(m, m->m_len);
845                 m->m_flags |= M_MCAST;
846                 ip6 = mtod(m, struct ip6_hdr *);
847                 bzero(ip6, sizeof(*ip6));
848                 ip6->ip6_vfc |= IPV6_VERSION;
849                 ip6->ip6_hlim = CARP_DFLTTL;
850                 ip6->ip6_nxt = IPPROTO_CARP;
851                 bzero(&sa, sizeof(sa));
852
853                 /* set the source address */
854                 sa.sa_family = AF_INET6;
855                 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
856                 if (ifa != NULL) {
857                         bcopy(IFA_IN6(ifa), &ip6->ip6_src,
858                             sizeof(struct in6_addr));
859                         ifa_free(ifa);
860                 } else
861                         /* This should never happen with IPv6. */
862                         bzero(&ip6->ip6_src, sizeof(struct in6_addr));
863
864                 /* Set the multicast destination. */
865                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
866                 ip6->ip6_dst.s6_addr8[15] = 0x12;
867                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
868                         m_freem(m);
869                         CARP_DEBUG("%s: in6_setscope failed\n", __func__);
870                         goto resched;
871                 }
872
873                 ch_ptr = (struct carp_header *)(&ip6[1]);
874                 bcopy(&ch, ch_ptr, sizeof(ch));
875                 if (carp_prepare_ad(m, sc, ch_ptr))
876                         goto resched;
877
878                 m->m_data += sizeof(*ip6);
879                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
880                 m->m_data -= sizeof(*ip6);
881
882                 CARPSTATS_INC(carps_opackets6);
883
884                 if (ip6_output(m, NULL, NULL, 0,
885                     &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)) {
886                         if (sc->sc_sendad_errors < INT_MAX)
887                                 sc->sc_sendad_errors++;
888                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS)
889                                 carp_demote_adj(carp_senderr_adj,
890                                     "send6 error");
891                         sc->sc_sendad_success = 0;
892                 } else {
893                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
894                                 if (++sc->sc_sendad_success >=
895                                     CARP_SENDAD_MIN_SUCCESS) {
896                                         carp_demote_adj(-carp_senderr_adj,
897                                             "send6 ok");
898                                         sc->sc_sendad_errors = 0;
899                                 }
900                         } else
901                                 sc->sc_sendad_errors = 0;
902                 }
903         }
904 #endif /* INET6 */
905
906 resched:
907         callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
908 }
909
910 static void
911 carp_addroute(struct carp_softc *sc)
912 {
913         struct ifaddr *ifa;
914
915         CARP_FOREACH_IFA(sc, ifa)
916                 carp_ifa_addroute(ifa);
917 }
918
919 static void
920 carp_ifa_addroute(struct ifaddr *ifa)
921 {
922
923         switch (ifa->ifa_addr->sa_family) {
924 #ifdef INET
925         case AF_INET:
926                 in_addprefix(ifatoia(ifa), RTF_UP);
927                 ifa_add_loopback_route(ifa,
928                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
929                 break;
930 #endif
931 #ifdef INET6
932         case AF_INET6:
933                 ifa_add_loopback_route(ifa,
934                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
935                 in6_ifaddloop(ifa);
936                 break;
937 #endif
938         }
939 }
940
941 static void
942 carp_delroute(struct carp_softc *sc)
943 {
944         struct ifaddr *ifa;
945
946         CARP_FOREACH_IFA(sc, ifa)
947                 carp_ifa_delroute(ifa);
948 }
949
950 static void
951 carp_ifa_delroute(struct ifaddr *ifa)
952 {
953
954         switch (ifa->ifa_addr->sa_family) {
955 #ifdef INET
956         case AF_INET:
957                 ifa_del_loopback_route(ifa,
958                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
959                 in_scrubprefix(ifatoia(ifa), LLE_STATIC);
960                 break;
961 #endif
962 #ifdef INET6
963         case AF_INET6:
964                 ifa_del_loopback_route(ifa,
965                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
966                 in6_ifremloop(ifa);
967                 break;
968 #endif
969         }
970 }
971
972 int
973 carp_master(struct ifaddr *ifa)
974 {
975         struct carp_softc *sc = ifa->ifa_carp;
976
977         return (sc->sc_state == MASTER);
978 }
979
980 #ifdef INET
981 /*
982  * Broadcast a gratuitous ARP request containing
983  * the virtual router MAC address for each IP address
984  * associated with the virtual router.
985  */
986 static void
987 carp_send_arp(struct carp_softc *sc)
988 {
989         struct ifaddr *ifa;
990
991         CARP_FOREACH_IFA(sc, ifa)
992                 if (ifa->ifa_addr->sa_family == AF_INET)
993                         arp_ifinit2(sc->sc_carpdev, ifa, LLADDR(&sc->sc_addr));
994 }
995
996 int
997 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
998 {
999         struct carp_softc *sc = ifa->ifa_carp;
1000
1001         if (sc->sc_state == MASTER) {
1002                 *enaddr = LLADDR(&sc->sc_addr);
1003                 return (1);
1004         }
1005
1006         return (0);
1007 }
1008 #endif
1009
1010 #ifdef INET6
1011 static void
1012 carp_send_na(struct carp_softc *sc)
1013 {
1014         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1015         struct ifaddr *ifa;
1016         struct in6_addr *in6;
1017
1018         CARP_FOREACH_IFA(sc, ifa) {
1019                 if (ifa->ifa_addr->sa_family != AF_INET6)
1020                         continue;
1021
1022                 in6 = IFA_IN6(ifa);
1023                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1024                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1025                 DELAY(1000);    /* XXX */
1026         }
1027 }
1028
1029 /*
1030  * Returns ifa in case it's a carp address and it is MASTER, or if the address
1031  * matches and is not a carp address.  Returns NULL otherwise.
1032  */
1033 struct ifaddr *
1034 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
1035 {
1036         struct ifaddr *ifa;
1037
1038         ifa = NULL;
1039         IF_ADDR_RLOCK(ifp);
1040         TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1041                 if (ifa->ifa_addr->sa_family != AF_INET6)
1042                         continue;
1043                 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
1044                         continue;
1045                 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
1046                         ifa = NULL;
1047                 else
1048                         ifa_ref(ifa);
1049                 break;
1050         }
1051         IF_ADDR_RUNLOCK(ifp);
1052
1053         return (ifa);
1054 }
1055
1056 caddr_t
1057 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
1058 {
1059         struct ifaddr *ifa;
1060
1061         IF_ADDR_RLOCK(ifp);
1062         IFNET_FOREACH_IFA(ifp, ifa)
1063                 if (ifa->ifa_addr->sa_family == AF_INET6 &&
1064                     IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
1065                         struct carp_softc *sc = ifa->ifa_carp;
1066                         struct m_tag *mtag;
1067
1068                         IF_ADDR_RUNLOCK(ifp);
1069
1070                         mtag = m_tag_get(PACKET_TAG_CARP,
1071                             sizeof(struct carp_softc *), M_NOWAIT);
1072                         if (mtag == NULL)
1073                                 /* Better a bit than nothing. */
1074                                 return (LLADDR(&sc->sc_addr));
1075
1076                         bcopy(&sc, mtag + 1, sizeof(sc));
1077                         m_tag_prepend(m, mtag);
1078
1079                         return (LLADDR(&sc->sc_addr));
1080                 }
1081         IF_ADDR_RUNLOCK(ifp);
1082
1083         return (NULL);
1084 }
1085 #endif /* INET6 */
1086
1087 int
1088 carp_forus(struct ifnet *ifp, u_char *dhost)
1089 {
1090         struct carp_softc *sc;
1091         uint8_t *ena = dhost;
1092
1093         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1094                 return (0);
1095
1096         CIF_LOCK(ifp->if_carp);
1097         IFNET_FOREACH_CARP(ifp, sc) {
1098                 CARP_LOCK(sc);
1099                 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
1100                     ETHER_ADDR_LEN)) {
1101                         CARP_UNLOCK(sc);
1102                         CIF_UNLOCK(ifp->if_carp);
1103                         return (1);
1104                 }
1105                 CARP_UNLOCK(sc);
1106         }
1107         CIF_UNLOCK(ifp->if_carp);
1108
1109         return (0);
1110 }
1111
1112 /* Master down timeout event, executed in callout context. */
1113 static void
1114 carp_master_down(void *v)
1115 {
1116         struct carp_softc *sc = v;
1117
1118         CARP_LOCK_ASSERT(sc);
1119
1120         CURVNET_SET(sc->sc_carpdev->if_vnet);
1121         if (sc->sc_state == BACKUP) {
1122                 CARP_LOG("VHID %u@%s: BACKUP -> MASTER (master down)\n",
1123                     sc->sc_vhid,
1124                     sc->sc_carpdev->if_xname);
1125                 carp_master_down_locked(sc);
1126         }
1127         CURVNET_RESTORE();
1128
1129         CARP_UNLOCK(sc);
1130 }
1131
1132 static void
1133 carp_master_down_locked(struct carp_softc *sc)
1134 {
1135
1136         CARP_LOCK_ASSERT(sc);
1137
1138         switch (sc->sc_state) {
1139         case BACKUP:
1140                 carp_set_state(sc, MASTER);
1141                 carp_send_ad_locked(sc);
1142 #ifdef INET
1143                 carp_send_arp(sc);
1144 #endif
1145 #ifdef INET6
1146                 carp_send_na(sc);
1147 #endif
1148                 carp_setrun(sc, 0);
1149                 carp_addroute(sc);
1150                 break;
1151         case INIT:
1152         case MASTER:
1153 #ifdef INVARIANTS
1154                 panic("carp: VHID %u@%s: master_down event in %s state\n",
1155                     sc->sc_vhid,
1156                     sc->sc_carpdev->if_xname,
1157                     sc->sc_state ? "MASTER" : "INIT");
1158 #endif
1159                 break;
1160         }
1161 }
1162
1163 /*
1164  * When in backup state, af indicates whether to reset the master down timer
1165  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1166  */
1167 static void
1168 carp_setrun(struct carp_softc *sc, sa_family_t af)
1169 {
1170         struct timeval tv;
1171
1172         CARP_LOCK_ASSERT(sc);
1173
1174         if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
1175             sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
1176             (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0))
1177                 return;
1178
1179         switch (sc->sc_state) {
1180         case INIT:
1181                 CARP_LOG("VHID %u@%s: INIT -> BACKUP\n",
1182                     sc->sc_vhid,
1183                     sc->sc_carpdev->if_xname);
1184                 carp_set_state(sc, BACKUP);
1185                 carp_setrun(sc, 0);
1186                 break;
1187         case BACKUP:
1188                 callout_stop(&sc->sc_ad_tmo);
1189                 tv.tv_sec = 3 * sc->sc_advbase;
1190                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1191                 switch (af) {
1192 #ifdef INET
1193                 case AF_INET:
1194                         callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1195                             carp_master_down, sc);
1196                         break;
1197 #endif
1198 #ifdef INET6
1199                 case AF_INET6:
1200                         callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1201                             carp_master_down, sc);
1202                         break;
1203 #endif
1204                 default:
1205 #ifdef INET
1206                         if (sc->sc_naddrs)
1207                                 callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1208                                     carp_master_down, sc);
1209 #endif
1210 #ifdef INET6
1211                         if (sc->sc_naddrs6)
1212                                 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1213                                     carp_master_down, sc);
1214 #endif
1215                         break;
1216                 }
1217                 break;
1218         case MASTER:
1219                 tv.tv_sec = sc->sc_advbase;
1220                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1221                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
1222                     carp_send_ad, sc);
1223                 break;
1224         }
1225 }
1226
1227 /*
1228  * Setup multicast structures.
1229  */
1230 static int
1231 carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
1232 {
1233         struct ifnet *ifp = cif->cif_ifp;
1234         int error = 0;
1235
1236         CIF_LOCK_ASSERT(cif);
1237
1238         switch (sa) {
1239 #ifdef INET
1240         case AF_INET:
1241             {
1242                 struct ip_moptions *imo = &cif->cif_imo;
1243                 struct in_addr addr;
1244
1245                 if (imo->imo_membership)
1246                         return (0);
1247
1248                 imo->imo_membership = (struct in_multi **)malloc(
1249                     (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
1250                     M_NOWAIT);
1251                 if (imo->imo_membership == NULL)
1252                         return (ENOMEM);
1253                 imo->imo_mfilters = NULL;
1254                 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
1255                 imo->imo_multicast_vif = -1;
1256
1257                 addr.s_addr = htonl(INADDR_CARP_GROUP);
1258                 if ((error = in_joingroup(ifp, &addr, NULL,
1259                     &imo->imo_membership[0])) != 0) {
1260                         free(imo->imo_membership, M_CARP);
1261                         break;
1262                 }
1263                 imo->imo_num_memberships++;
1264                 imo->imo_multicast_ifp = ifp;
1265                 imo->imo_multicast_ttl = CARP_DFLTTL;
1266                 imo->imo_multicast_loop = 0;
1267                 break;
1268            }
1269 #endif
1270 #ifdef INET6
1271         case AF_INET6:
1272             {
1273                 struct ip6_moptions *im6o = &cif->cif_im6o;
1274                 struct in6_addr in6;
1275                 struct in6_multi *in6m;
1276
1277                 if (im6o->im6o_membership)
1278                         return (0);
1279
1280                 im6o->im6o_membership = (struct in6_multi **)malloc(
1281                     (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
1282                     M_ZERO | M_NOWAIT);
1283                 if (im6o->im6o_membership == NULL)
1284                         return (ENOMEM);
1285                 im6o->im6o_mfilters = NULL;
1286                 im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
1287                 im6o->im6o_multicast_hlim = CARP_DFLTTL;
1288                 im6o->im6o_multicast_ifp = ifp;
1289
1290                 /* Join IPv6 CARP multicast group. */
1291                 bzero(&in6, sizeof(in6));
1292                 in6.s6_addr16[0] = htons(0xff02);
1293                 in6.s6_addr8[15] = 0x12;
1294                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1295                         free(im6o->im6o_membership, M_CARP);
1296                         break;
1297                 }
1298                 in6m = NULL;
1299                 if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
1300                         free(im6o->im6o_membership, M_CARP);
1301                         break;
1302                 }
1303                 im6o->im6o_membership[0] = in6m;
1304                 im6o->im6o_num_memberships++;
1305
1306                 /* Join solicited multicast address. */
1307                 bzero(&in6, sizeof(in6));
1308                 in6.s6_addr16[0] = htons(0xff02);
1309                 in6.s6_addr32[1] = 0;
1310                 in6.s6_addr32[2] = htonl(1);
1311                 in6.s6_addr32[3] = 0;
1312                 in6.s6_addr8[12] = 0xff;
1313                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1314                         in6_mc_leave(im6o->im6o_membership[0], NULL);
1315                         free(im6o->im6o_membership, M_CARP);
1316                         break;
1317                 }
1318                 in6m = NULL;
1319                 if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
1320                         in6_mc_leave(im6o->im6o_membership[0], NULL);
1321                         free(im6o->im6o_membership, M_CARP);
1322                         break;
1323                 }
1324                 im6o->im6o_membership[1] = in6m;
1325                 im6o->im6o_num_memberships++;
1326                 break;
1327             }
1328 #endif
1329         }
1330
1331         return (error);
1332 }
1333
1334 /*
1335  * Free multicast structures.
1336  */
1337 static void
1338 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
1339 {
1340
1341         CIF_LOCK_ASSERT(cif);
1342         switch (sa) {
1343 #ifdef INET
1344         case AF_INET:
1345                 if (cif->cif_naddrs == 0) {
1346                         struct ip_moptions *imo = &cif->cif_imo;
1347
1348                         in_leavegroup(imo->imo_membership[0], NULL);
1349                         KASSERT(imo->imo_mfilters == NULL,
1350                             ("%s: imo_mfilters != NULL", __func__));
1351                         free(imo->imo_membership, M_CARP);
1352                         imo->imo_membership = NULL;
1353
1354                 }
1355                 break;
1356 #endif
1357 #ifdef INET6
1358         case AF_INET6:
1359                 if (cif->cif_naddrs6 == 0) {
1360                         struct ip6_moptions *im6o = &cif->cif_im6o;
1361
1362                         in6_mc_leave(im6o->im6o_membership[0], NULL);
1363                         in6_mc_leave(im6o->im6o_membership[1], NULL);
1364                         KASSERT(im6o->im6o_mfilters == NULL,
1365                             ("%s: im6o_mfilters != NULL", __func__));
1366                         free(im6o->im6o_membership, M_CARP);
1367                         im6o->im6o_membership = NULL;
1368                 }
1369                 break;
1370 #endif
1371         }
1372 }
1373
1374 int
1375 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
1376 {
1377         struct m_tag *mtag;
1378         struct carp_softc *sc;
1379
1380         if (!sa)
1381                 return (0);
1382
1383         switch (sa->sa_family) {
1384 #ifdef INET
1385         case AF_INET:
1386                 break;
1387 #endif
1388 #ifdef INET6
1389         case AF_INET6:
1390                 break;
1391 #endif
1392         default:
1393                 return (0);
1394         }
1395
1396         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
1397         if (mtag == NULL)
1398                 return (0);
1399
1400         bcopy(mtag + 1, &sc, sizeof(sc));
1401
1402         /* Set the source MAC address to the Virtual Router MAC Address. */
1403         switch (ifp->if_type) {
1404         case IFT_ETHER:
1405         case IFT_BRIDGE:
1406         case IFT_L2VLAN: {
1407                         struct ether_header *eh;
1408
1409                         eh = mtod(m, struct ether_header *);
1410                         eh->ether_shost[0] = 0;
1411                         eh->ether_shost[1] = 0;
1412                         eh->ether_shost[2] = 0x5e;
1413                         eh->ether_shost[3] = 0;
1414                         eh->ether_shost[4] = 1;
1415                         eh->ether_shost[5] = sc->sc_vhid;
1416                 }
1417                 break;
1418         case IFT_FDDI: {
1419                         struct fddi_header *fh;
1420
1421                         fh = mtod(m, struct fddi_header *);
1422                         fh->fddi_shost[0] = 0;
1423                         fh->fddi_shost[1] = 0;
1424                         fh->fddi_shost[2] = 0x5e;
1425                         fh->fddi_shost[3] = 0;
1426                         fh->fddi_shost[4] = 1;
1427                         fh->fddi_shost[5] = sc->sc_vhid;
1428                 }
1429                 break;
1430         case IFT_ISO88025: {
1431                         struct iso88025_header *th;
1432                         th = mtod(m, struct iso88025_header *);
1433                         th->iso88025_shost[0] = 3;
1434                         th->iso88025_shost[1] = 0;
1435                         th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
1436                         th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
1437                         th->iso88025_shost[4] = 0;
1438                         th->iso88025_shost[5] = 0;
1439                 }
1440                 break;
1441         default:
1442                 printf("%s: carp is not supported for the %d interface type\n",
1443                     ifp->if_xname, ifp->if_type);
1444                 return (EOPNOTSUPP);
1445         }
1446
1447         return (0);
1448 }
1449
1450 static struct carp_softc*
1451 carp_alloc(struct ifnet *ifp)
1452 {
1453         struct carp_softc *sc;
1454         struct carp_if *cif;
1455
1456         if ((cif = ifp->if_carp) == NULL) {
1457                 cif = carp_alloc_if(ifp);
1458                 if (cif == NULL)
1459                         return (NULL);
1460         }
1461
1462         sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
1463
1464         sc->sc_advbase = CARP_DFLTINTV;
1465         sc->sc_vhid = -1;       /* required setting */
1466         sc->sc_init_counter = 1;
1467         sc->sc_state = INIT;
1468
1469         sc->sc_ifasiz = sizeof(struct ifaddr *);
1470         sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
1471         sc->sc_carpdev = ifp;
1472
1473         CARP_LOCK_INIT(sc);
1474 #ifdef INET
1475         callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1476 #endif
1477 #ifdef INET6
1478         callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1479 #endif
1480         callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1481
1482         CIF_LOCK(cif);
1483         TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
1484         CIF_UNLOCK(cif);
1485
1486         mtx_lock(&carp_mtx);
1487         LIST_INSERT_HEAD(&carp_list, sc, sc_next);
1488         mtx_unlock(&carp_mtx);
1489
1490         return (sc);
1491 }
1492
1493 static int
1494 carp_grow_ifas(struct carp_softc *sc)
1495 {
1496         struct ifaddr **new;
1497
1498         CARP_LOCK_ASSERT(sc);
1499
1500         new = malloc(sc->sc_ifasiz * 2, M_CARP, M_NOWAIT|M_ZERO);
1501         if (new == NULL)
1502                 return (ENOMEM);
1503         bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
1504         free(sc->sc_ifas, M_CARP);
1505         sc->sc_ifas = new;
1506         sc->sc_ifasiz *= 2;
1507
1508         return (0);
1509 }
1510
1511 static void
1512 carp_destroy(struct carp_softc *sc)
1513 {
1514         struct ifnet *ifp = sc->sc_carpdev;
1515         struct carp_if *cif = ifp->if_carp;
1516
1517         CIF_LOCK_ASSERT(cif);
1518
1519         TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
1520
1521         mtx_lock(&carp_mtx);
1522         LIST_REMOVE(sc, sc_next);
1523         mtx_unlock(&carp_mtx);
1524
1525         CARP_LOCK(sc);
1526         if (sc->sc_suppress)
1527                 carp_demote_adj(-carp_ifdown_adj, "vhid removed");
1528         callout_drain(&sc->sc_ad_tmo);
1529 #ifdef INET
1530         callout_drain(&sc->sc_md_tmo);
1531 #endif
1532 #ifdef INET6
1533         callout_drain(&sc->sc_md6_tmo);
1534 #endif
1535         CARP_LOCK_DESTROY(sc);
1536
1537         free(sc->sc_ifas, M_CARP);
1538         free(sc, M_CARP);
1539 }
1540
1541 static struct carp_if*
1542 carp_alloc_if(struct ifnet *ifp)
1543 {
1544         struct carp_if *cif;
1545
1546         cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
1547
1548         if (ifpromisc(ifp, 1) != 0)
1549                 goto cleanup;
1550
1551         CIF_LOCK_INIT(cif);
1552         cif->cif_ifp = ifp;
1553         TAILQ_INIT(&cif->cif_vrs);
1554
1555         IF_ADDR_WLOCK(ifp);
1556         ifp->if_carp = cif;
1557         if_ref(ifp);
1558         IF_ADDR_WUNLOCK(ifp);
1559
1560         return (cif);
1561
1562 cleanup:
1563         free(cif, M_CARP);
1564
1565         return (NULL);
1566 }
1567
1568 static void
1569 carp_free_if(struct carp_if *cif)
1570 {
1571         struct ifnet *ifp = cif->cif_ifp;
1572
1573         CIF_LOCK_ASSERT(cif);
1574         KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
1575             __func__));
1576
1577         IF_ADDR_WLOCK(ifp);
1578         ifp->if_carp = NULL;
1579         if_rele(ifp);
1580         IF_ADDR_WUNLOCK(ifp);
1581
1582         CIF_LOCK_DESTROY(cif);
1583
1584         ifpromisc(ifp, 0);
1585
1586         free(cif, M_CARP);
1587 }
1588
1589 static void
1590 carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv)
1591 {
1592
1593         CARP_LOCK(sc);
1594         carpr->carpr_state = sc->sc_state;
1595         carpr->carpr_vhid = sc->sc_vhid;
1596         carpr->carpr_advbase = sc->sc_advbase;
1597         carpr->carpr_advskew = sc->sc_advskew;
1598         if (priv)
1599                 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
1600         else
1601                 bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
1602         CARP_UNLOCK(sc);
1603 }
1604
1605 int
1606 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
1607 {
1608         struct carpreq carpr;
1609         struct ifnet *ifp;
1610         struct carp_softc *sc = NULL;
1611         int error = 0, locked = 0;
1612
1613         if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
1614                 return (error);
1615
1616         ifp = ifunit_ref(ifr->ifr_name);
1617         if (ifp == NULL)
1618                 return (ENXIO);
1619
1620         switch (ifp->if_type) {
1621         case IFT_ETHER:
1622         case IFT_L2VLAN:
1623         case IFT_BRIDGE:
1624         case IFT_FDDI:
1625         case IFT_ISO88025:
1626                 break;
1627         default:
1628                 error = EOPNOTSUPP;
1629                 goto out;
1630         }
1631
1632         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1633                 error = EADDRNOTAVAIL;
1634                 goto out;
1635         }
1636
1637         switch (cmd) {
1638         case SIOCSVH:
1639                 if ((error = priv_check(td, PRIV_NETINET_CARP)))
1640                         break;
1641                 if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID ||
1642                     carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) {
1643                         error = EINVAL;
1644                         break;
1645                 }
1646
1647                 if (ifp->if_carp) {
1648                         CIF_LOCK(ifp->if_carp);
1649                         IFNET_FOREACH_CARP(ifp, sc)
1650                                 if (sc->sc_vhid == carpr.carpr_vhid)
1651                                         break;
1652                         CIF_UNLOCK(ifp->if_carp);
1653                 }
1654                 if (sc == NULL) {
1655                         sc = carp_alloc(ifp);
1656                         if (sc == NULL) {
1657                                 error = EINVAL; /* XXX: ifpromisc failed */
1658                                 break;
1659                         }
1660
1661                         CARP_LOCK(sc);
1662                         sc->sc_vhid = carpr.carpr_vhid;
1663                         LLADDR(&sc->sc_addr)[0] = 0;
1664                         LLADDR(&sc->sc_addr)[1] = 0;
1665                         LLADDR(&sc->sc_addr)[2] = 0x5e;
1666                         LLADDR(&sc->sc_addr)[3] = 0;
1667                         LLADDR(&sc->sc_addr)[4] = 1;
1668                         LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
1669                 } else
1670                         CARP_LOCK(sc);
1671                 locked = 1;
1672                 if (carpr.carpr_advbase > 0) {
1673                         if (carpr.carpr_advbase > 255 ||
1674                             carpr.carpr_advbase < CARP_DFLTINTV) {
1675                                 error = EINVAL;
1676                                 break;
1677                         }
1678                         sc->sc_advbase = carpr.carpr_advbase;
1679                 }
1680                 if (carpr.carpr_advskew > 0) {
1681                         if (carpr.carpr_advskew >= 255) {
1682                                 error = EINVAL;
1683                                 break;
1684                         }
1685                         sc->sc_advskew = carpr.carpr_advskew;
1686                 }
1687                 if (carpr.carpr_key[0] != '\0') {
1688                         bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
1689                         carp_hmac_prepare(sc);
1690                 }
1691                 if (sc->sc_state != INIT &&
1692                     carpr.carpr_state != sc->sc_state) {
1693                         switch (carpr.carpr_state) {
1694                         case BACKUP:
1695                                 callout_stop(&sc->sc_ad_tmo);
1696                                 carp_set_state(sc, BACKUP);
1697                                 carp_setrun(sc, 0);
1698                                 carp_delroute(sc);
1699                                 break;
1700                         case MASTER:
1701                                 carp_master_down_locked(sc);
1702                                 break;
1703                         default:
1704                                 break;
1705                         }
1706                 }
1707                 break;
1708
1709         case SIOCGVH:
1710             {
1711                 int priveleged;
1712
1713                 if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) {
1714                         error = EINVAL;
1715                         break;
1716                 }
1717                 if (carpr.carpr_count < 1) {
1718                         error = EMSGSIZE;
1719                         break;
1720                 }
1721                 if (ifp->if_carp == NULL) {
1722                         error = ENOENT;
1723                         break;
1724                 }
1725
1726                 priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0);
1727                 if (carpr.carpr_vhid != 0) {
1728                         CIF_LOCK(ifp->if_carp);
1729                         IFNET_FOREACH_CARP(ifp, sc)
1730                                 if (sc->sc_vhid == carpr.carpr_vhid)
1731                                         break;
1732                         CIF_UNLOCK(ifp->if_carp);
1733                         if (sc == NULL) {
1734                                 error = ENOENT;
1735                                 break;
1736                         }
1737                         carp_carprcp(&carpr, sc, priveleged);
1738                         error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
1739                 } else  {
1740                         int i, count;
1741
1742                         count = 0;
1743                         CIF_LOCK(ifp->if_carp);
1744                         IFNET_FOREACH_CARP(ifp, sc)
1745                                 count++;
1746
1747                         if (count > carpr.carpr_count) {
1748                                 CIF_UNLOCK(ifp->if_carp);
1749                                 error = EMSGSIZE;
1750                                 break;
1751                         }
1752
1753                         i = 0;
1754                         IFNET_FOREACH_CARP(ifp, sc) {
1755                                 carp_carprcp(&carpr, sc, priveleged);
1756                                 carpr.carpr_count = count;
1757                                 error = copyout(&carpr, ifr->ifr_data +
1758                                     (i * sizeof(carpr)), sizeof(carpr));
1759                                 if (error) {
1760                                         CIF_UNLOCK(ifp->if_carp);
1761                                         break;
1762                                 }
1763                                 i++;
1764                         }
1765                         CIF_UNLOCK(ifp->if_carp);
1766                 }
1767                 break;
1768             }
1769         default:
1770                 error = EINVAL;
1771         }
1772
1773 out:
1774         if (locked)
1775                 CARP_UNLOCK(sc);
1776         if_rele(ifp);
1777
1778         return (error);
1779 }
1780
1781 static int
1782 carp_get_vhid(struct ifaddr *ifa)
1783 {
1784
1785         if (ifa == NULL || ifa->ifa_carp == NULL)
1786                 return (0);
1787
1788         return (ifa->ifa_carp->sc_vhid);
1789 }
1790
1791 int
1792 carp_attach(struct ifaddr *ifa, int vhid)
1793 {
1794         struct ifnet *ifp = ifa->ifa_ifp;
1795         struct carp_if *cif = ifp->if_carp;
1796         struct carp_softc *sc;
1797         int index, error;
1798
1799         if (ifp->if_carp == NULL)
1800                 return (ENOPROTOOPT);
1801
1802         switch (ifa->ifa_addr->sa_family) {
1803 #ifdef INET
1804         case AF_INET:
1805 #endif
1806 #ifdef INET6
1807         case AF_INET6:
1808 #endif
1809                 break;
1810         default:
1811                 return (EPROTOTYPE);
1812         }
1813
1814         CIF_LOCK(cif);
1815         IFNET_FOREACH_CARP(ifp, sc)
1816                 if (sc->sc_vhid == vhid)
1817                         break;
1818         if (sc == NULL) {
1819                 CIF_UNLOCK(cif);
1820                 return (ENOENT);
1821         }
1822
1823         if (ifa->ifa_carp) {
1824                 if (ifa->ifa_carp->sc_vhid != vhid)
1825                         carp_detach_locked(ifa);
1826                 else {
1827                         CIF_UNLOCK(cif);
1828                         return (0);
1829                 }
1830         }
1831
1832         error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
1833         if (error) {
1834                 CIF_FREE(cif);
1835                 return (error);
1836         }
1837
1838         CARP_LOCK(sc);
1839         index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
1840         if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
1841                 if ((error = carp_grow_ifas(sc)) != 0) {
1842                         carp_multicast_cleanup(cif,
1843                             ifa->ifa_addr->sa_family);
1844                         CARP_UNLOCK(sc);
1845                         CIF_FREE(cif);
1846                         return (error);
1847                 }
1848
1849         switch (ifa->ifa_addr->sa_family) {
1850 #ifdef INET
1851         case AF_INET:
1852                 cif->cif_naddrs++;
1853                 sc->sc_naddrs++;
1854                 break;
1855 #endif
1856 #ifdef INET6
1857         case AF_INET6:
1858                 cif->cif_naddrs6++;
1859                 sc->sc_naddrs6++;
1860                 break;
1861 #endif
1862         }
1863
1864         ifa_ref(ifa);
1865         sc->sc_ifas[index - 1] = ifa;
1866         ifa->ifa_carp = sc;
1867
1868         carp_hmac_prepare(sc);
1869         carp_sc_state(sc);
1870
1871         CARP_UNLOCK(sc);
1872         CIF_UNLOCK(cif);
1873
1874         return (0);
1875 }
1876
1877 void
1878 carp_detach(struct ifaddr *ifa)
1879 {
1880         struct ifnet *ifp = ifa->ifa_ifp;
1881         struct carp_if *cif = ifp->if_carp;
1882
1883         CIF_LOCK(cif);
1884         carp_detach_locked(ifa);
1885         CIF_FREE(cif);
1886 }
1887
1888 static void
1889 carp_detach_locked(struct ifaddr *ifa)
1890 {
1891         struct ifnet *ifp = ifa->ifa_ifp;
1892         struct carp_if *cif = ifp->if_carp;
1893         struct carp_softc *sc = ifa->ifa_carp;
1894         int i, index;
1895
1896         KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
1897
1898         CIF_LOCK_ASSERT(cif);
1899         CARP_LOCK(sc);
1900
1901         /* Shift array. */
1902         index = sc->sc_naddrs + sc->sc_naddrs6;
1903         for (i = 0; i < index; i++)
1904                 if (sc->sc_ifas[i] == ifa)
1905                         break;
1906         KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
1907         for (; i < index - 1; i++)
1908                 sc->sc_ifas[i] = sc->sc_ifas[i+1];
1909         sc->sc_ifas[index - 1] = NULL;
1910
1911         switch (ifa->ifa_addr->sa_family) {
1912 #ifdef INET
1913         case AF_INET:
1914                 cif->cif_naddrs--;
1915                 sc->sc_naddrs--;
1916                 break;
1917 #endif
1918 #ifdef INET6
1919         case AF_INET6:
1920                 cif->cif_naddrs6--;
1921                 sc->sc_naddrs6--;
1922                 break;
1923 #endif
1924         }
1925
1926         carp_ifa_delroute(ifa);
1927         carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
1928
1929         ifa->ifa_carp = NULL;
1930         ifa_free(ifa);
1931
1932         carp_hmac_prepare(sc);
1933         carp_sc_state(sc);
1934
1935         if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1936                 CARP_UNLOCK(sc);
1937                 carp_destroy(sc);
1938         } else
1939                 CARP_UNLOCK(sc);
1940 }
1941
1942 static void
1943 carp_set_state(struct carp_softc *sc, int state)
1944 {
1945
1946         CARP_LOCK_ASSERT(sc);
1947
1948         if (sc->sc_state != state) {
1949                 const char *carp_states[] = { CARP_STATES };
1950                 char subsys[IFNAMSIZ+5];
1951
1952                 sc->sc_state = state;
1953
1954                 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
1955                     sc->sc_carpdev->if_xname);
1956                 devctl_notify("CARP", subsys, carp_states[state], NULL);
1957         }
1958 }
1959
1960 static void
1961 carp_linkstate(struct ifnet *ifp)
1962 {
1963         struct carp_softc *sc;
1964
1965         CIF_LOCK(ifp->if_carp);
1966         IFNET_FOREACH_CARP(ifp, sc) {
1967                 CARP_LOCK(sc);
1968                 carp_sc_state(sc);
1969                 CARP_UNLOCK(sc);
1970         }
1971         CIF_UNLOCK(ifp->if_carp);
1972 }
1973
1974 static void
1975 carp_sc_state(struct carp_softc *sc)
1976 {
1977
1978         CARP_LOCK_ASSERT(sc);
1979
1980         if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
1981             !(sc->sc_carpdev->if_flags & IFF_UP)) {
1982                 callout_stop(&sc->sc_ad_tmo);
1983 #ifdef INET
1984                 callout_stop(&sc->sc_md_tmo);
1985 #endif
1986 #ifdef INET6
1987                 callout_stop(&sc->sc_md6_tmo);
1988 #endif
1989                 carp_set_state(sc, INIT);
1990                 carp_setrun(sc, 0);
1991                 if (!sc->sc_suppress)
1992                         carp_demote_adj(carp_ifdown_adj, "interface down");
1993                 sc->sc_suppress = 1;
1994         } else {
1995                 carp_set_state(sc, INIT);
1996                 carp_setrun(sc, 0);
1997                 if (sc->sc_suppress)
1998                         carp_demote_adj(-carp_ifdown_adj, "interface up");
1999                 sc->sc_suppress = 0;
2000         }
2001 }
2002
2003 static void
2004 carp_demote_adj(int adj, char *reason)
2005 {
2006         atomic_add_int(&carp_demotion, adj);
2007         CARP_LOG("demoted by %d to %d (%s)\n", adj, carp_demotion, reason);
2008         taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
2009 }
2010
2011 static int
2012 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
2013 {
2014         int new, error;
2015
2016         new = carp_demotion;
2017         error = sysctl_handle_int(oidp, &new, 0, req);
2018         if (error || !req->newptr)
2019                 return (error);
2020
2021         carp_demote_adj(new, "sysctl");
2022
2023         return (0);
2024 }
2025
2026 #ifdef INET
2027 extern  struct domain inetdomain;
2028 static struct protosw in_carp_protosw = {
2029         .pr_type =              SOCK_RAW,
2030         .pr_domain =            &inetdomain,
2031         .pr_protocol =          IPPROTO_CARP,
2032         .pr_flags =             PR_ATOMIC|PR_ADDR,
2033         .pr_input =             carp_input,
2034         .pr_output =            (pr_output_t *)rip_output,
2035         .pr_ctloutput =         rip_ctloutput,
2036         .pr_usrreqs =           &rip_usrreqs
2037 };
2038 #endif
2039
2040 #ifdef INET6
2041 extern  struct domain inet6domain;
2042 static struct ip6protosw in6_carp_protosw = {
2043         .pr_type =              SOCK_RAW,
2044         .pr_domain =            &inet6domain,
2045         .pr_protocol =          IPPROTO_CARP,
2046         .pr_flags =             PR_ATOMIC|PR_ADDR,
2047         .pr_input =             carp6_input,
2048         .pr_output =            rip6_output,
2049         .pr_ctloutput =         rip6_ctloutput,
2050         .pr_usrreqs =           &rip6_usrreqs
2051 };
2052 #endif
2053
2054 static void
2055 carp_mod_cleanup(void)
2056 {
2057
2058 #ifdef INET
2059         if (proto_reg[CARP_INET] == 0) {
2060                 (void)ipproto_unregister(IPPROTO_CARP);
2061                 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW);
2062                 proto_reg[CARP_INET] = -1;
2063         }
2064         carp_iamatch_p = NULL;
2065 #endif
2066 #ifdef INET6
2067         if (proto_reg[CARP_INET6] == 0) {
2068                 (void)ip6proto_unregister(IPPROTO_CARP);
2069                 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW);
2070                 proto_reg[CARP_INET6] = -1;
2071         }
2072         carp_iamatch6_p = NULL;
2073         carp_macmatch6_p = NULL;
2074 #endif
2075         carp_ioctl_p = NULL;
2076         carp_attach_p = NULL;
2077         carp_detach_p = NULL;
2078         carp_get_vhid_p = NULL;
2079         carp_linkstate_p = NULL;
2080         carp_forus_p = NULL;
2081         carp_output_p = NULL;
2082         carp_demote_adj_p = NULL;
2083         carp_master_p = NULL;
2084         mtx_unlock(&carp_mtx);
2085         taskqueue_drain(taskqueue_swi, &carp_sendall_task);
2086         mtx_destroy(&carp_mtx);
2087 }
2088
2089 static int
2090 carp_mod_load(void)
2091 {
2092         int err;
2093
2094         mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
2095         LIST_INIT(&carp_list);
2096         carp_get_vhid_p = carp_get_vhid;
2097         carp_forus_p = carp_forus;
2098         carp_output_p = carp_output;
2099         carp_linkstate_p = carp_linkstate;
2100         carp_ioctl_p = carp_ioctl;
2101         carp_attach_p = carp_attach;
2102         carp_detach_p = carp_detach;
2103         carp_demote_adj_p = carp_demote_adj;
2104         carp_master_p = carp_master;
2105 #ifdef INET6
2106         carp_iamatch6_p = carp_iamatch6;
2107         carp_macmatch6_p = carp_macmatch6;
2108         proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
2109             (struct protosw *)&in6_carp_protosw);
2110         if (proto_reg[CARP_INET6]) {
2111                 printf("carp: error %d attaching to PF_INET6\n",
2112                     proto_reg[CARP_INET6]);
2113                 carp_mod_cleanup();
2114                 return (proto_reg[CARP_INET6]);
2115         }
2116         err = ip6proto_register(IPPROTO_CARP);
2117         if (err) {
2118                 printf("carp: error %d registering with INET6\n", err);
2119                 carp_mod_cleanup();
2120                 return (err);
2121         }
2122 #endif
2123 #ifdef INET
2124         carp_iamatch_p = carp_iamatch;
2125         proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
2126         if (proto_reg[CARP_INET]) {
2127                 printf("carp: error %d attaching to PF_INET\n",
2128                     proto_reg[CARP_INET]);
2129                 carp_mod_cleanup();
2130                 return (proto_reg[CARP_INET]);
2131         }
2132         err = ipproto_register(IPPROTO_CARP);
2133         if (err) {
2134                 printf("carp: error %d registering with INET\n", err);
2135                 carp_mod_cleanup();
2136                 return (err);
2137         }
2138 #endif
2139         return (0);
2140 }
2141
2142 static int
2143 carp_modevent(module_t mod, int type, void *data)
2144 {
2145         switch (type) {
2146         case MOD_LOAD:
2147                 return carp_mod_load();
2148                 /* NOTREACHED */
2149         case MOD_UNLOAD:
2150                 mtx_lock(&carp_mtx);
2151                 if (LIST_EMPTY(&carp_list))
2152                         carp_mod_cleanup();
2153                 else {
2154                         mtx_unlock(&carp_mtx);
2155                         return (EBUSY);
2156                 }
2157                 break;
2158
2159         default:
2160                 return (EINVAL);
2161         }
2162
2163         return (0);
2164 }
2165
2166 static moduledata_t carp_mod = {
2167         "carp",
2168         carp_modevent,
2169         0
2170 };
2171
2172 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);