]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/in.c
A major overhaul of the CARP implementation. The ip_carp.c was started
[FreeBSD/FreeBSD.git] / sys / netinet / in.c
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  * Copyright (C) 2001 WIDE Project.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 4. Neither the name of the University nor the names of its contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  *      @(#)in.c        8.4 (Berkeley) 1/9/95
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include "opt_mpath.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/sockio.h>
41 #include <sys/malloc.h>
42 #include <sys/priv.h>
43 #include <sys/socket.h>
44 #include <sys/jail.h>
45 #include <sys/kernel.h>
46 #include <sys/proc.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49
50 #include <net/if.h>
51 #include <net/if_var.h>
52 #include <net/if_arp.h>
53 #include <net/if_dl.h>
54 #include <net/if_llatbl.h>
55 #include <net/if_types.h>
56 #include <net/route.h>
57 #include <net/vnet.h>
58
59 #include <netinet/if_ether.h>
60 #include <netinet/in.h>
61 #include <netinet/in_var.h>
62 #include <netinet/in_pcb.h>
63 #include <netinet/ip_var.h>
64 #include <netinet/ip_carp.h>
65 #include <netinet/igmp_var.h>
66 #include <netinet/udp.h>
67 #include <netinet/udp_var.h>
68
69 static int in_mask2len(struct in_addr *);
70 static void in_len2mask(struct in_addr *, int);
71 static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
72         struct ifnet *, struct thread *);
73
74 static void     in_socktrim(struct sockaddr_in *);
75 static int      in_ifinit(struct ifnet *, struct in_ifaddr *,
76                     struct sockaddr_in *, int, int, int);
77 static void     in_purgemaddrs(struct ifnet *);
78
79 static VNET_DEFINE(int, nosameprefix);
80 #define V_nosameprefix                  VNET(nosameprefix)
81 SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_RW,
82         &VNET_NAME(nosameprefix), 0,
83         "Refuse to create same prefixes on different interfaces");
84
85 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
86 #define V_ripcbinfo                     VNET(ripcbinfo)
87
88 VNET_DECLARE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
89 #define V_arpstat               VNET(arpstat)
90
91 /*
92  * Return 1 if an internet address is for a ``local'' host
93  * (one to which we have a connection).
94  */
95 int
96 in_localaddr(struct in_addr in)
97 {
98         register u_long i = ntohl(in.s_addr);
99         register struct in_ifaddr *ia;
100
101         IN_IFADDR_RLOCK();
102         TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
103                 if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
104                         IN_IFADDR_RUNLOCK();
105                         return (1);
106                 }
107         }
108         IN_IFADDR_RUNLOCK();
109         return (0);
110 }
111
112 /*
113  * Return 1 if an internet address is for the local host and configured
114  * on one of its interfaces.
115  */
116 int
117 in_localip(struct in_addr in)
118 {
119         struct in_ifaddr *ia;
120
121         IN_IFADDR_RLOCK();
122         LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
123                 if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) {
124                         IN_IFADDR_RUNLOCK();
125                         return (1);
126                 }
127         }
128         IN_IFADDR_RUNLOCK();
129         return (0);
130 }
131
132 /*
133  * Determine whether an IP address is in a reserved set of addresses
134  * that may not be forwarded, or whether datagrams to that destination
135  * may be forwarded.
136  */
137 int
138 in_canforward(struct in_addr in)
139 {
140         register u_long i = ntohl(in.s_addr);
141         register u_long net;
142
143         if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
144                 return (0);
145         if (IN_CLASSA(i)) {
146                 net = i & IN_CLASSA_NET;
147                 if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
148                         return (0);
149         }
150         return (1);
151 }
152
153 /*
154  * Trim a mask in a sockaddr
155  */
156 static void
157 in_socktrim(struct sockaddr_in *ap)
158 {
159     register char *cplim = (char *) &ap->sin_addr;
160     register char *cp = (char *) (&ap->sin_addr + 1);
161
162     ap->sin_len = 0;
163     while (--cp >= cplim)
164         if (*cp) {
165             (ap)->sin_len = cp - (char *) (ap) + 1;
166             break;
167         }
168 }
169
170 static int
171 in_mask2len(mask)
172         struct in_addr *mask;
173 {
174         int x, y;
175         u_char *p;
176
177         p = (u_char *)mask;
178         for (x = 0; x < sizeof(*mask); x++) {
179                 if (p[x] != 0xff)
180                         break;
181         }
182         y = 0;
183         if (x < sizeof(*mask)) {
184                 for (y = 0; y < 8; y++) {
185                         if ((p[x] & (0x80 >> y)) == 0)
186                                 break;
187                 }
188         }
189         return (x * 8 + y);
190 }
191
192 static void
193 in_len2mask(struct in_addr *mask, int len)
194 {
195         int i;
196         u_char *p;
197
198         p = (u_char *)mask;
199         bzero(mask, sizeof(*mask));
200         for (i = 0; i < len / 8; i++)
201                 p[i] = 0xff;
202         if (len % 8)
203                 p[i] = (0xff00 >> (len % 8)) & 0xff;
204 }
205
206 /*
207  * Generic internet control operations (ioctl's).
208  *
209  * ifp is NULL if not an interface-specific ioctl.
210  */
211 /* ARGSUSED */
212 int
213 in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
214     struct thread *td)
215 {
216         register struct ifreq *ifr = (struct ifreq *)data;
217         register struct in_ifaddr *ia, *iap;
218         register struct ifaddr *ifa;
219         struct in_addr allhosts_addr;
220         struct in_addr dst;
221         struct in_ifinfo *ii;
222         struct in_aliasreq *ifra = (struct in_aliasreq *)data;
223         struct sockaddr_in oldaddr;
224         int error, hostIsNew, iaIsNew, maskIsNew;
225         int iaIsFirst;
226
227         ia = NULL;
228         iaIsFirst = 0;
229         iaIsNew = 0;
230         allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
231
232         /*
233          * Filter out ioctls we implement directly; forward the rest on to
234          * in_lifaddr_ioctl() and ifp->if_ioctl().
235          */
236         switch (cmd) {
237         case SIOCGIFADDR:
238         case SIOCGIFBRDADDR:
239         case SIOCGIFDSTADDR:
240         case SIOCGIFNETMASK:
241         case SIOCDIFADDR:
242                 break;
243         case SIOCAIFADDR:
244                 /*
245                  * ifra_addr must be present and be of INET family.
246                  * ifra_broadaddr and ifra_mask are optional.
247                  */
248                 if (ifra->ifra_addr.sin_len != sizeof(struct sockaddr_in) ||
249                     ifra->ifra_addr.sin_family != AF_INET)
250                         return (EINVAL);
251                 if (ifra->ifra_broadaddr.sin_len != 0 &&
252                     (ifra->ifra_broadaddr.sin_len !=
253                     sizeof(struct sockaddr_in) ||
254                     ifra->ifra_broadaddr.sin_family != AF_INET))
255                         return (EINVAL);
256 #if 0
257                 /*
258                  * ifconfig(8) historically doesn't set af_family for mask
259                  * for unknown reason.
260                  */
261                 if (ifra->ifra_mask.sin_len != 0 &&
262                     (ifra->ifra_mask.sin_len != sizeof(struct sockaddr_in) ||
263                     ifra->ifra_mask.sin_family != AF_INET))
264                         return (EINVAL);
265 #endif
266                 break;
267         case SIOCSIFADDR:
268         case SIOCSIFBRDADDR:
269         case SIOCSIFDSTADDR:
270         case SIOCSIFNETMASK:
271                 if (ifr->ifr_addr.sa_family != AF_INET ||
272                     ifr->ifr_addr.sa_len != sizeof(struct sockaddr_in))
273                         return (EINVAL);
274                 break;
275
276         case SIOCALIFADDR:
277                 if (td != NULL) {
278                         error = priv_check(td, PRIV_NET_ADDIFADDR);
279                         if (error)
280                                 return (error);
281                 }
282                 if (ifp == NULL)
283                         return (EINVAL);
284                 return in_lifaddr_ioctl(so, cmd, data, ifp, td);
285
286         case SIOCDLIFADDR:
287                 if (td != NULL) {
288                         error = priv_check(td, PRIV_NET_DELIFADDR);
289                         if (error)
290                                 return (error);
291                 }
292                 if (ifp == NULL)
293                         return (EINVAL);
294                 return in_lifaddr_ioctl(so, cmd, data, ifp, td);
295
296         case SIOCGLIFADDR:
297                 if (ifp == NULL)
298                         return (EINVAL);
299                 return in_lifaddr_ioctl(so, cmd, data, ifp, td);
300
301         default:
302                 if (ifp == NULL || ifp->if_ioctl == NULL)
303                         return (EOPNOTSUPP);
304                 return ((*ifp->if_ioctl)(ifp, cmd, data));
305         }
306
307         if (ifp == NULL)
308                 return (EADDRNOTAVAIL);
309
310         /*
311          * Security checks before we get involved in any work.
312          */
313         switch (cmd) {
314         case SIOCAIFADDR:
315         case SIOCSIFADDR:
316         case SIOCSIFBRDADDR:
317         case SIOCSIFNETMASK:
318         case SIOCSIFDSTADDR:
319                 if (td != NULL) {
320                         error = priv_check(td, PRIV_NET_ADDIFADDR);
321                         if (error)
322                                 return (error);
323                 }
324                 break;
325
326         case SIOCDIFADDR:
327                 if (td != NULL) {
328                         error = priv_check(td, PRIV_NET_DELIFADDR);
329                         if (error)
330                                 return (error);
331                 }
332                 break;
333         }
334
335         /*
336          * Find address for this interface, if it exists.
337          *
338          * If an alias address was specified, find that one instead of the
339          * first one on the interface, if possible.
340          */
341         dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr;
342         IN_IFADDR_RLOCK();
343         LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash) {
344                 if (iap->ia_ifp == ifp &&
345                     iap->ia_addr.sin_addr.s_addr == dst.s_addr) {
346                         if (td == NULL || prison_check_ip4(td->td_ucred,
347                             &dst) == 0)
348                                 ia = iap;
349                         break;
350                 }
351         }
352         if (ia != NULL)
353                 ifa_ref(&ia->ia_ifa);
354         IN_IFADDR_RUNLOCK();
355         if (ia == NULL) {
356                 IF_ADDR_LOCK(ifp);
357                 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
358                         iap = ifatoia(ifa);
359                         if (iap->ia_addr.sin_family == AF_INET) {
360                                 if (td != NULL &&
361                                     prison_check_ip4(td->td_ucred,
362                                     &iap->ia_addr.sin_addr) != 0)
363                                         continue;
364                                 ia = iap;
365                                 break;
366                         }
367                 }
368                 if (ia != NULL)
369                         ifa_ref(&ia->ia_ifa);
370                 IF_ADDR_UNLOCK(ifp);
371         }
372         if (ia == NULL)
373                 iaIsFirst = 1;
374
375         error = 0;
376         switch (cmd) {
377         case SIOCAIFADDR:
378         case SIOCDIFADDR:
379                 if (ifra->ifra_addr.sin_family == AF_INET) {
380                         struct in_ifaddr *oia;
381
382                         IN_IFADDR_RLOCK();
383                         for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) {
384                                 if (ia->ia_ifp == ifp  &&
385                                     ia->ia_addr.sin_addr.s_addr ==
386                                     ifra->ifra_addr.sin_addr.s_addr)
387                                         break;
388                         }
389                         if (ia != NULL && ia != oia)
390                                 ifa_ref(&ia->ia_ifa);
391                         if (oia != NULL && ia != oia)
392                                 ifa_free(&oia->ia_ifa);
393                         IN_IFADDR_RUNLOCK();
394                         if ((ifp->if_flags & IFF_POINTOPOINT)
395                             && (cmd == SIOCAIFADDR)
396                             && (ifra->ifra_dstaddr.sin_addr.s_addr
397                                 == INADDR_ANY)) {
398                                 error = EDESTADDRREQ;
399                                 goto out;
400                         }
401                 }
402                 if (cmd == SIOCDIFADDR && ia == NULL) {
403                         error = EADDRNOTAVAIL;
404                         goto out;
405                 }
406                 /* FALLTHROUGH */
407         case SIOCSIFADDR:
408         case SIOCSIFNETMASK:
409         case SIOCSIFDSTADDR:
410                 if (ia == NULL) {
411                         ia = (struct in_ifaddr *)
412                                 malloc(sizeof *ia, M_IFADDR, M_NOWAIT |
413                                     M_ZERO);
414                         if (ia == NULL) {
415                                 error = ENOBUFS;
416                                 goto out;
417                         }
418
419                         ifa = &ia->ia_ifa;
420                         ifa_init(ifa);
421                         ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
422                         ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
423                         ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
424
425                         ia->ia_sockmask.sin_len = 8;
426                         ia->ia_sockmask.sin_family = AF_INET;
427                         if (ifp->if_flags & IFF_BROADCAST) {
428                                 ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
429                                 ia->ia_broadaddr.sin_family = AF_INET;
430                         }
431                         ia->ia_ifp = ifp;
432
433                         ifa_ref(ifa);                   /* if_addrhead */
434                         IF_ADDR_LOCK(ifp);
435                         TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
436                         IF_ADDR_UNLOCK(ifp);
437                         ifa_ref(ifa);                   /* in_ifaddrhead */
438                         IN_IFADDR_WLOCK();
439                         TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
440                         IN_IFADDR_WUNLOCK();
441                         iaIsNew = 1;
442                 }
443                 break;
444
445         case SIOCSIFBRDADDR:
446         case SIOCGIFADDR:
447         case SIOCGIFNETMASK:
448         case SIOCGIFDSTADDR:
449         case SIOCGIFBRDADDR:
450                 if (ia == NULL) {
451                         error = EADDRNOTAVAIL;
452                         goto out;
453                 }
454                 break;
455         }
456
457         /*
458          * Most paths in this switch return directly or via out.  Only paths
459          * that remove the address break in order to hit common removal code.
460          */
461         switch (cmd) {
462         case SIOCGIFADDR:
463                 *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
464                 goto out;
465
466         case SIOCGIFBRDADDR:
467                 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
468                         error = EINVAL;
469                         goto out;
470                 }
471                 *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
472                 goto out;
473
474         case SIOCGIFDSTADDR:
475                 if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
476                         error = EINVAL;
477                         goto out;
478                 }
479                 *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
480                 goto out;
481
482         case SIOCGIFNETMASK:
483                 *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
484                 goto out;
485
486         case SIOCSIFDSTADDR:
487                 if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
488                         error = EINVAL;
489                         goto out;
490                 }
491                 oldaddr = ia->ia_dstaddr;
492                 ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
493                 if (ifp->if_ioctl != NULL) {
494                         error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR,
495                             (caddr_t)ia);
496                         if (error) {
497                                 ia->ia_dstaddr = oldaddr;
498                                 goto out;
499                         }
500                 }
501                 if (ia->ia_flags & IFA_ROUTE) {
502                         ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
503                         rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
504                         ia->ia_ifa.ifa_dstaddr =
505                                         (struct sockaddr *)&ia->ia_dstaddr;
506                         rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
507                 }
508                 goto out;
509
510         case SIOCSIFBRDADDR:
511                 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
512                         error = EINVAL;
513                         goto out;
514                 }
515                 ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
516                 goto out;
517
518         case SIOCSIFADDR:
519                 error = in_ifinit(ifp, ia,
520                     (struct sockaddr_in *) &ifr->ifr_addr, 1, 0, 0);
521                 if (error != 0 && iaIsNew)
522                         break;
523                 if (error == 0) {
524                         ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
525                         if (iaIsFirst &&
526                             (ifp->if_flags & IFF_MULTICAST) != 0) {
527                                 error = in_joingroup(ifp, &allhosts_addr,
528                                     NULL, &ii->ii_allhosts);
529                         }
530                         EVENTHANDLER_INVOKE(ifaddr_event, ifp);
531                 }
532                 error = 0;
533                 goto out;
534
535         case SIOCSIFNETMASK:
536                 ia->ia_sockmask.sin_addr = ((struct sockaddr_in *)
537                     &ifr->ifr_addr)->sin_addr;
538                 ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
539                 goto out;
540
541         case SIOCAIFADDR:
542                 maskIsNew = 0;
543                 hostIsNew = 1;
544                 error = 0;
545                 if (ifra->ifra_addr.sin_addr.s_addr ==
546                             ia->ia_addr.sin_addr.s_addr)
547                         hostIsNew = 0;
548                 if (ifra->ifra_mask.sin_len) {
549                         /* 
550                          * QL: XXX
551                          * Need to scrub the prefix here in case
552                          * the issued command is SIOCAIFADDR with
553                          * the same address, but with a different
554                          * prefix length. And if the prefix length
555                          * is the same as before, then the call is 
556                          * un-necessarily executed here.
557                          */
558                         in_ifscrub(ifp, ia, LLE_STATIC);
559                         ia->ia_sockmask = ifra->ifra_mask;
560                         ia->ia_sockmask.sin_family = AF_INET;
561                         ia->ia_subnetmask =
562                              ntohl(ia->ia_sockmask.sin_addr.s_addr);
563                         maskIsNew = 1;
564                 }
565                 if ((ifp->if_flags & IFF_POINTOPOINT) &&
566                     (ifra->ifra_dstaddr.sin_family == AF_INET)) {
567                         in_ifscrub(ifp, ia, LLE_STATIC);
568                         ia->ia_dstaddr = ifra->ifra_dstaddr;
569                         maskIsNew  = 1; /* We lie; but the effect's the same */
570                 }
571                 if (hostIsNew || maskIsNew)
572                         error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0,
573                             maskIsNew, ifra->ifra_vhid);
574                 if (error != 0 && iaIsNew)
575                         break;
576
577                 if ((ifp->if_flags & IFF_BROADCAST) &&
578                     ifra->ifra_broadaddr.sin_len)
579                         ia->ia_broadaddr = ifra->ifra_broadaddr;
580                 if (error == 0) {
581                         ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
582                         if (iaIsFirst &&
583                             (ifp->if_flags & IFF_MULTICAST) != 0) {
584                                 error = in_joingroup(ifp, &allhosts_addr,
585                                     NULL, &ii->ii_allhosts);
586                         }
587                         EVENTHANDLER_INVOKE(ifaddr_event, ifp);
588                 }
589                 goto out;
590
591         case SIOCDIFADDR:
592                 /*
593                  * in_ifscrub kills the interface route.
594                  */
595                 in_ifscrub(ifp, ia, LLE_STATIC);
596
597                 /*
598                  * in_ifadown gets rid of all the rest of
599                  * the routes.  This is not quite the right
600                  * thing to do, but at least if we are running
601                  * a routing process they will come back.
602                  */
603                 in_ifadown(&ia->ia_ifa, 1);
604                 EVENTHANDLER_INVOKE(ifaddr_event, ifp);
605                 error = 0;
606                 break;
607
608         default:
609                 panic("in_control: unsupported ioctl");
610         }
611
612         if (ia->ia_ifa.ifa_carp)
613                 (*carp_detach_p)(&ia->ia_ifa);
614
615         IF_ADDR_LOCK(ifp);
616         /* Re-check that ia is still part of the list. */
617         TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
618                 if (ifa == &ia->ia_ifa)
619                         break;
620         }
621         if (ifa == NULL) {
622                 /*
623                  * If we lost the race with another thread, there is no need to
624                  * try it again for the next loop as there is no other exit
625                  * path between here and out.
626                  */
627                 IF_ADDR_UNLOCK(ifp);
628                 error = EADDRNOTAVAIL;
629                 goto out;
630         }
631         TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
632         IF_ADDR_UNLOCK(ifp);
633         ifa_free(&ia->ia_ifa);                          /* if_addrhead */
634
635         IN_IFADDR_WLOCK();
636         TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
637
638         LIST_REMOVE(ia, ia_hash);
639         IN_IFADDR_WUNLOCK();
640         /*
641          * If this is the last IPv4 address configured on this
642          * interface, leave the all-hosts group.
643          * No state-change report need be transmitted.
644          */
645         IFP_TO_IA(ifp, iap);
646         if (iap == NULL) {
647                 ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
648                 IN_MULTI_LOCK();
649                 if (ii->ii_allhosts) {
650                         (void)in_leavegroup_locked(ii->ii_allhosts, NULL);
651                         ii->ii_allhosts = NULL;
652                 }
653                 IN_MULTI_UNLOCK();
654         } else
655                 ifa_free(&iap->ia_ifa);
656
657         ifa_free(&ia->ia_ifa);                          /* in_ifaddrhead */
658 out:
659         if (ia != NULL)
660                 ifa_free(&ia->ia_ifa);
661         return (error);
662 }
663
664 /*
665  * SIOC[GAD]LIFADDR.
666  *      SIOCGLIFADDR: get first address. (?!?)
667  *      SIOCGLIFADDR with IFLR_PREFIX:
668  *              get first address that matches the specified prefix.
669  *      SIOCALIFADDR: add the specified address.
670  *      SIOCALIFADDR with IFLR_PREFIX:
671  *              EINVAL since we can't deduce hostid part of the address.
672  *      SIOCDLIFADDR: delete the specified address.
673  *      SIOCDLIFADDR with IFLR_PREFIX:
674  *              delete the first address that matches the specified prefix.
675  * return values:
676  *      EINVAL on invalid parameters
677  *      EADDRNOTAVAIL on prefix match failed/specified address not found
678  *      other values may be returned from in_ioctl()
679  */
680 static int
681 in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
682     struct ifnet *ifp, struct thread *td)
683 {
684         struct if_laddrreq *iflr = (struct if_laddrreq *)data;
685         struct ifaddr *ifa;
686
687         /* sanity checks */
688         if (data == NULL || ifp == NULL) {
689                 panic("invalid argument to in_lifaddr_ioctl");
690                 /*NOTRECHED*/
691         }
692
693         switch (cmd) {
694         case SIOCGLIFADDR:
695                 /* address must be specified on GET with IFLR_PREFIX */
696                 if ((iflr->flags & IFLR_PREFIX) == 0)
697                         break;
698                 /*FALLTHROUGH*/
699         case SIOCALIFADDR:
700         case SIOCDLIFADDR:
701                 /* address must be specified on ADD and DELETE */
702                 if (iflr->addr.ss_family != AF_INET)
703                         return (EINVAL);
704                 if (iflr->addr.ss_len != sizeof(struct sockaddr_in))
705                         return (EINVAL);
706                 /* XXX need improvement */
707                 if (iflr->dstaddr.ss_family
708                  && iflr->dstaddr.ss_family != AF_INET)
709                         return (EINVAL);
710                 if (iflr->dstaddr.ss_family
711                  && iflr->dstaddr.ss_len != sizeof(struct sockaddr_in))
712                         return (EINVAL);
713                 break;
714         default: /*shouldn't happen*/
715                 return (EOPNOTSUPP);
716         }
717         if (sizeof(struct in_addr) * 8 < iflr->prefixlen)
718                 return (EINVAL);
719
720         switch (cmd) {
721         case SIOCALIFADDR:
722             {
723                 struct in_aliasreq ifra;
724
725                 if (iflr->flags & IFLR_PREFIX)
726                         return (EINVAL);
727
728                 /* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
729                 bzero(&ifra, sizeof(ifra));
730                 bcopy(iflr->iflr_name, ifra.ifra_name,
731                         sizeof(ifra.ifra_name));
732
733                 bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len);
734
735                 if (iflr->dstaddr.ss_family) {  /*XXX*/
736                         bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
737                                 iflr->dstaddr.ss_len);
738                 }
739
740                 ifra.ifra_mask.sin_family = AF_INET;
741                 ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
742                 in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);
743
744                 return (in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, td));
745             }
746         case SIOCGLIFADDR:
747         case SIOCDLIFADDR:
748             {
749                 struct in_ifaddr *ia;
750                 struct in_addr mask, candidate, match;
751                 struct sockaddr_in *sin;
752
753                 bzero(&mask, sizeof(mask));
754                 bzero(&match, sizeof(match));
755                 if (iflr->flags & IFLR_PREFIX) {
756                         /* lookup a prefix rather than address. */
757                         in_len2mask(&mask, iflr->prefixlen);
758
759                         sin = (struct sockaddr_in *)&iflr->addr;
760                         match.s_addr = sin->sin_addr.s_addr;
761                         match.s_addr &= mask.s_addr;
762
763                         /* if you set extra bits, that's wrong */
764                         if (match.s_addr != sin->sin_addr.s_addr)
765                                 return (EINVAL);
766
767                 } else {
768                         /* on getting an address, take the 1st match */
769                         /* on deleting an address, do exact match */
770                         if (cmd != SIOCGLIFADDR) {
771                                 in_len2mask(&mask, 32);
772                                 sin = (struct sockaddr_in *)&iflr->addr;
773                                 match.s_addr = sin->sin_addr.s_addr;
774                         }
775                 }
776
777                 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
778                         if (ifa->ifa_addr->sa_family != AF_INET6)
779                                 continue;
780                         if (match.s_addr == 0)
781                                 break;
782                         candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
783                         candidate.s_addr &= mask.s_addr;
784                         if (candidate.s_addr == match.s_addr)
785                                 break;
786                 }
787                 if (ifa == NULL)
788                         return (EADDRNOTAVAIL);
789                 ia = (struct in_ifaddr *)ifa;
790
791                 if (cmd == SIOCGLIFADDR) {
792                         /* fill in the if_laddrreq structure */
793                         bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len);
794
795                         if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
796                                 bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
797                                         ia->ia_dstaddr.sin_len);
798                         } else
799                                 bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
800
801                         iflr->prefixlen =
802                                 in_mask2len(&ia->ia_sockmask.sin_addr);
803
804                         iflr->flags = 0;        /*XXX*/
805
806                         return (0);
807                 } else {
808                         struct in_aliasreq ifra;
809
810                         /* fill in_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
811                         bzero(&ifra, sizeof(ifra));
812                         bcopy(iflr->iflr_name, ifra.ifra_name,
813                                 sizeof(ifra.ifra_name));
814
815                         bcopy(&ia->ia_addr, &ifra.ifra_addr,
816                                 ia->ia_addr.sin_len);
817                         if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
818                                 bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
819                                         ia->ia_dstaddr.sin_len);
820                         }
821                         bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr,
822                                 ia->ia_sockmask.sin_len);
823
824                         return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra,
825                             ifp, td));
826                 }
827             }
828         }
829
830         return (EOPNOTSUPP);    /*just for safety*/
831 }
832
833 /*
834  * Delete any existing route for an interface.
835  */
836 void
837 in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, u_int flags)
838 {
839
840         in_scrubprefix(ia, flags);
841 }
842
843 /*
844  * Initialize an interface's internet address
845  * and routing table entry.
846  */
847 static int
848 in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
849     int scrub, int masksupplied, int vhid)
850 {
851         register u_long i = ntohl(sin->sin_addr.s_addr);
852         int flags = RTF_UP, error = 0;
853
854         if (scrub)
855                 in_scrubprefix(ia, LLE_STATIC);
856
857         IN_IFADDR_WLOCK();
858         if (ia->ia_addr.sin_family == AF_INET)
859                 LIST_REMOVE(ia, ia_hash);
860         ia->ia_addr = *sin;
861         LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
862             ia, ia_hash);
863         IN_IFADDR_WUNLOCK();
864
865         if (vhid > 0) {
866                 if (carp_attach_p != NULL)
867                         error = (*carp_attach_p)(&ia->ia_ifa, vhid);
868                 else
869                         error = EPROTONOSUPPORT;
870         }
871         if (error)
872                 return (error);
873
874         /*
875          * Give the interface a chance to initialize
876          * if this is its first address,
877          * and to validate the address if necessary.
878          */
879         if (ifp->if_ioctl != NULL &&
880             (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia)) != 0)
881                         /* LIST_REMOVE(ia, ia_hash) is done in in_control */
882                         return (error);
883
884         /*
885          * Be compatible with network classes, if netmask isn't supplied,
886          * guess it based on classes.
887          */
888         if (!masksupplied) {
889                 if (IN_CLASSA(i))
890                         ia->ia_subnetmask = IN_CLASSA_NET;
891                 else if (IN_CLASSB(i))
892                         ia->ia_subnetmask = IN_CLASSB_NET;
893                 else
894                         ia->ia_subnetmask = IN_CLASSC_NET;
895                 ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
896         }
897         ia->ia_subnet = i & ia->ia_subnetmask;
898         in_socktrim(&ia->ia_sockmask);
899         /*
900          * Add route for the network.
901          */
902         ia->ia_ifa.ifa_metric = ifp->if_metric;
903         if (ifp->if_flags & IFF_BROADCAST) {
904                 if (ia->ia_subnetmask == IN_RFC3021_MASK)
905                         ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
906                 else
907                         ia->ia_broadaddr.sin_addr.s_addr =
908                             htonl(ia->ia_subnet | ~ia->ia_subnetmask);
909         } else if (ifp->if_flags & IFF_LOOPBACK) {
910                 ia->ia_dstaddr = ia->ia_addr;
911                 flags |= RTF_HOST;
912         } else if (ifp->if_flags & IFF_POINTOPOINT) {
913                 if (ia->ia_dstaddr.sin_family != AF_INET)
914                         return (0);
915                 flags |= RTF_HOST;
916         }
917         if (!vhid && (error = in_addprefix(ia, flags)) != 0)
918                 return (error);
919
920         if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
921                 return (0);
922
923         if (ifp->if_flags & IFF_POINTOPOINT &&
924             ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
925                         return (0);
926
927         /*
928          * add a loopback route to self
929          */
930         if (V_useloopback && !vhid && !(ifp->if_flags & IFF_LOOPBACK)) {
931                 struct route ia_ro;
932
933                 bzero(&ia_ro, sizeof(ia_ro));
934                 *((struct sockaddr_in *)(&ia_ro.ro_dst)) = ia->ia_addr;
935                 rtalloc_ign_fib(&ia_ro, 0, 0);
936                 if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
937                     (ia_ro.ro_rt->rt_ifp == V_loif)) {
938                         RT_LOCK(ia_ro.ro_rt);
939                         RT_ADDREF(ia_ro.ro_rt);
940                         RTFREE_LOCKED(ia_ro.ro_rt);
941                 } else
942                         error = ifa_add_loopback_route((struct ifaddr *)ia, 
943                                        (struct sockaddr *)&ia->ia_addr);
944                 if (error == 0)
945                         ia->ia_flags |= IFA_RTSELF;
946                 if (ia_ro.ro_rt != NULL)
947                         RTFREE(ia_ro.ro_rt);
948         }
949
950         return (error);
951 }
952
953 #define rtinitflags(x) \
954         ((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
955             ? RTF_HOST : 0)
956
957 /*
958  * Generate a routing message when inserting or deleting 
959  * an interface address alias.
960  */
961 static void in_addralias_rtmsg(int cmd, struct in_addr *prefix, 
962     struct in_ifaddr *target)
963 {
964         struct route pfx_ro;
965         struct sockaddr_in *pfx_addr;
966         struct rtentry msg_rt;
967
968         /* QL: XXX
969          * This is a bit questionable because there is no
970          * additional route entry added/deleted for an address
971          * alias. Therefore this route report is inaccurate.
972          */
973         bzero(&pfx_ro, sizeof(pfx_ro));
974         pfx_addr = (struct sockaddr_in *)(&pfx_ro.ro_dst);
975         pfx_addr->sin_len = sizeof(*pfx_addr);
976         pfx_addr->sin_family = AF_INET;
977         pfx_addr->sin_addr = *prefix;
978         rtalloc_ign_fib(&pfx_ro, 0, 0);
979         if (pfx_ro.ro_rt != NULL) {
980                 msg_rt = *pfx_ro.ro_rt;
981
982                 /* QL: XXX
983                  * Point the gateway to the new interface
984                  * address as if a new prefix route entry has 
985                  * been added through the new address alias. 
986                  * All other parts of the rtentry is accurate, 
987                  * e.g., rt_key, rt_mask, rt_ifp etc.
988                  */
989                 msg_rt.rt_gateway = 
990                         (struct sockaddr *)&target->ia_addr;
991                 rt_newaddrmsg(cmd, 
992                               (struct ifaddr *)target,
993                               0, &msg_rt);
994                 RTFREE(pfx_ro.ro_rt);
995         }
996         return;
997 }
998
999 /*
1000  * Check if we have a route for the given prefix already or add one accordingly.
1001  */
1002 int
1003 in_addprefix(struct in_ifaddr *target, int flags)
1004 {
1005         struct in_ifaddr *ia;
1006         struct in_addr prefix, mask, p, m;
1007         int error;
1008
1009         if ((flags & RTF_HOST) != 0) {
1010                 prefix = target->ia_dstaddr.sin_addr;
1011                 mask.s_addr = 0;
1012         } else {
1013                 prefix = target->ia_addr.sin_addr;
1014                 mask = target->ia_sockmask.sin_addr;
1015                 prefix.s_addr &= mask.s_addr;
1016         }
1017
1018         IN_IFADDR_RLOCK();
1019         TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1020                 if (rtinitflags(ia)) {
1021                         p = ia->ia_dstaddr.sin_addr;
1022
1023                         if (prefix.s_addr != p.s_addr)
1024                                 continue;
1025                 } else {
1026                         p = ia->ia_addr.sin_addr;
1027                         m = ia->ia_sockmask.sin_addr;
1028                         p.s_addr &= m.s_addr;
1029
1030                         if (prefix.s_addr != p.s_addr ||
1031                             mask.s_addr != m.s_addr)
1032                                 continue;
1033                 }
1034
1035                 /*
1036                  * If we got a matching prefix route inserted by other
1037                  * interface address, we are done here.
1038                  */
1039                 if (ia->ia_flags & IFA_ROUTE) {
1040 #ifdef RADIX_MPATH
1041                         if (ia->ia_addr.sin_addr.s_addr == 
1042                             target->ia_addr.sin_addr.s_addr) {
1043                                 IN_IFADDR_RUNLOCK();
1044                                 return (EEXIST);
1045                         } else
1046                                 break;
1047 #endif
1048                         if (V_nosameprefix) {
1049                                 IN_IFADDR_RUNLOCK();
1050                                 return (EEXIST);
1051                         } else {
1052                                 in_addralias_rtmsg(RTM_ADD, &prefix, target);
1053                                 IN_IFADDR_RUNLOCK();
1054                                 return (0);
1055                         }
1056                 }
1057         }
1058         IN_IFADDR_RUNLOCK();
1059
1060         /*
1061          * No-one seem to have this prefix route, so we try to insert it.
1062          */
1063         error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags);
1064         if (!error)
1065                 target->ia_flags |= IFA_ROUTE;
1066         return (error);
1067 }
1068
1069 /*
1070  * If there is no other address in the system that can serve a route to the
1071  * same prefix, remove the route.  Hand over the route to the new address
1072  * otherwise.
1073  */
1074 int
1075 in_scrubprefix(struct in_ifaddr *target, u_int flags)
1076 {
1077         struct in_ifaddr *ia;
1078         struct in_addr prefix, mask, p, m;
1079         int error = 0;
1080         struct sockaddr_in prefix0, mask0;
1081
1082         /*
1083          * Remove the loopback route to the interface address.
1084          * The "useloopback" setting is not consulted because if the
1085          * user configures an interface address, turns off this
1086          * setting, and then tries to delete that interface address,
1087          * checking the current setting of "useloopback" would leave
1088          * that interface address loopback route untouched, which
1089          * would be wrong. Therefore the interface address loopback route
1090          * deletion is unconditional.
1091          */
1092         if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) &&
1093             !(target->ia_ifp->if_flags & IFF_LOOPBACK) &&
1094             (target->ia_flags & IFA_RTSELF)) {
1095                 struct route ia_ro;
1096                 int freeit = 0;
1097
1098                 bzero(&ia_ro, sizeof(ia_ro));
1099                 *((struct sockaddr_in *)(&ia_ro.ro_dst)) = target->ia_addr;
1100                 rtalloc_ign_fib(&ia_ro, 0, 0);
1101                 if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
1102                     (ia_ro.ro_rt->rt_ifp == V_loif)) {
1103                         RT_LOCK(ia_ro.ro_rt);
1104                         if (ia_ro.ro_rt->rt_refcnt <= 1)
1105                                 freeit = 1;
1106                         else if (flags & LLE_STATIC) {
1107                                 RT_REMREF(ia_ro.ro_rt);
1108                                 target->ia_flags &= ~IFA_RTSELF;
1109                         }
1110                         RTFREE_LOCKED(ia_ro.ro_rt);
1111                 }
1112                 if (freeit && (flags & LLE_STATIC)) {
1113                         error = ifa_del_loopback_route((struct ifaddr *)target,
1114                                        (struct sockaddr *)&target->ia_addr);
1115                         if (error == 0)
1116                                 target->ia_flags &= ~IFA_RTSELF;
1117                 }
1118                 if ((flags & LLE_STATIC) &&
1119                         !(target->ia_ifp->if_flags & IFF_NOARP))
1120                         /* remove arp cache */
1121                         arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr);
1122         }
1123
1124         if (rtinitflags(target)) {
1125                 prefix = target->ia_dstaddr.sin_addr;
1126                 mask.s_addr = 0;
1127         } else {
1128                 prefix = target->ia_addr.sin_addr;
1129                 mask = target->ia_sockmask.sin_addr;
1130                 prefix.s_addr &= mask.s_addr;
1131         }
1132
1133         if ((target->ia_flags & IFA_ROUTE) == 0) {
1134                 in_addralias_rtmsg(RTM_DELETE, &prefix, target);
1135                 return (0);
1136         }
1137
1138         IN_IFADDR_RLOCK();
1139         TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1140                 if (rtinitflags(ia)) {
1141                         p = ia->ia_dstaddr.sin_addr;
1142
1143                         if (prefix.s_addr != p.s_addr)
1144                                 continue;
1145                 } else {
1146                         p = ia->ia_addr.sin_addr;
1147                         m = ia->ia_sockmask.sin_addr;
1148                         p.s_addr &= m.s_addr;
1149
1150                         if (prefix.s_addr != p.s_addr ||
1151                             mask.s_addr != m.s_addr)
1152                                 continue;
1153                 }
1154
1155                 if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1156                         continue;
1157
1158                 /*
1159                  * If we got a matching prefix address, move IFA_ROUTE and
1160                  * the route itself to it.  Make sure that routing daemons
1161                  * get a heads-up.
1162                  */
1163                 if ((ia->ia_flags & IFA_ROUTE) == 0) {
1164                         ifa_ref(&ia->ia_ifa);
1165                         IN_IFADDR_RUNLOCK();
1166                         error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
1167                             rtinitflags(target));
1168                         if (error == 0)
1169                                 target->ia_flags &= ~IFA_ROUTE;
1170                         else
1171                                 log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n",
1172                                         error);
1173                         error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
1174                             rtinitflags(ia) | RTF_UP);
1175                         if (error == 0)
1176                                 ia->ia_flags |= IFA_ROUTE;
1177                         else
1178                                 log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n",
1179                                         error);
1180                         ifa_free(&ia->ia_ifa);
1181                         return (error);
1182                 }
1183         }
1184         IN_IFADDR_RUNLOCK();
1185
1186         /*
1187          * remove all L2 entries on the given prefix
1188          */
1189         bzero(&prefix0, sizeof(prefix0));
1190         prefix0.sin_len = sizeof(prefix0);
1191         prefix0.sin_family = AF_INET;
1192         prefix0.sin_addr.s_addr = target->ia_subnet;
1193         bzero(&mask0, sizeof(mask0));
1194         mask0.sin_len = sizeof(mask0);
1195         mask0.sin_family = AF_INET;
1196         mask0.sin_addr.s_addr = target->ia_subnetmask;
1197         lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0, 
1198                             (struct sockaddr *)&mask0, flags);
1199
1200         /*
1201          * As no-one seem to have this prefix, we can remove the route.
1202          */
1203         error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
1204         if (error == 0)
1205                 target->ia_flags &= ~IFA_ROUTE;
1206         else
1207                 log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error);
1208         return (error);
1209 }
1210
1211 #undef rtinitflags
1212
1213 /*
1214  * Return 1 if the address might be a local broadcast address.
1215  */
1216 int
1217 in_broadcast(struct in_addr in, struct ifnet *ifp)
1218 {
1219         register struct ifaddr *ifa;
1220         u_long t;
1221
1222         if (in.s_addr == INADDR_BROADCAST ||
1223             in.s_addr == INADDR_ANY)
1224                 return (1);
1225         if ((ifp->if_flags & IFF_BROADCAST) == 0)
1226                 return (0);
1227         t = ntohl(in.s_addr);
1228         /*
1229          * Look through the list of addresses for a match
1230          * with a broadcast address.
1231          */
1232 #define ia ((struct in_ifaddr *)ifa)
1233         TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1234                 if (ifa->ifa_addr->sa_family == AF_INET &&
1235                     (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
1236                      /*
1237                       * Check for old-style (host 0) broadcast, but
1238                       * taking into account that RFC 3021 obsoletes it.
1239                       */
1240                      (ia->ia_subnetmask != IN_RFC3021_MASK &&
1241                      t == ia->ia_subnet)) &&
1242                      /*
1243                       * Check for an all one subnetmask. These
1244                       * only exist when an interface gets a secondary
1245                       * address.
1246                       */
1247                      ia->ia_subnetmask != (u_long)0xffffffff)
1248                             return (1);
1249         return (0);
1250 #undef ia
1251 }
1252
1253 /*
1254  * On interface removal, clean up IPv4 data structures hung off of the ifnet.
1255  */
1256 void
1257 in_ifdetach(struct ifnet *ifp)
1258 {
1259
1260         in_pcbpurgeif0(&V_ripcbinfo, ifp);
1261         in_pcbpurgeif0(&V_udbinfo, ifp);
1262         in_purgemaddrs(ifp);
1263 }
1264
1265 /*
1266  * Delete all IPv4 multicast address records, and associated link-layer
1267  * multicast address records, associated with ifp.
1268  * XXX It looks like domifdetach runs AFTER the link layer cleanup.
1269  * XXX This should not race with ifma_protospec being set during
1270  * a new allocation, if it does, we have bigger problems.
1271  */
1272 static void
1273 in_purgemaddrs(struct ifnet *ifp)
1274 {
1275         LIST_HEAD(,in_multi) purgeinms;
1276         struct in_multi         *inm, *tinm;
1277         struct ifmultiaddr      *ifma;
1278
1279         LIST_INIT(&purgeinms);
1280         IN_MULTI_LOCK();
1281
1282         /*
1283          * Extract list of in_multi associated with the detaching ifp
1284          * which the PF_INET layer is about to release.
1285          * We need to do this as IF_ADDR_LOCK() may be re-acquired
1286          * by code further down.
1287          */
1288         IF_ADDR_LOCK(ifp);
1289         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1290                 if (ifma->ifma_addr->sa_family != AF_INET ||
1291                     ifma->ifma_protospec == NULL)
1292                         continue;
1293 #if 0
1294                 KASSERT(ifma->ifma_protospec != NULL,
1295                     ("%s: ifma_protospec is NULL", __func__));
1296 #endif
1297                 inm = (struct in_multi *)ifma->ifma_protospec;
1298                 LIST_INSERT_HEAD(&purgeinms, inm, inm_link);
1299         }
1300         IF_ADDR_UNLOCK(ifp);
1301
1302         LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) {
1303                 LIST_REMOVE(inm, inm_link);
1304                 inm_release_locked(inm);
1305         }
1306         igmp_ifdetach(ifp);
1307
1308         IN_MULTI_UNLOCK();
1309 }
1310
1311 struct in_llentry {
1312         struct llentry          base;
1313         struct sockaddr_in      l3_addr4;
1314 };
1315
1316 static struct llentry *
1317 in_lltable_new(const struct sockaddr *l3addr, u_int flags)
1318 {
1319         struct in_llentry *lle;
1320
1321         lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT | M_ZERO);
1322         if (lle == NULL)                /* NB: caller generates msg */
1323                 return NULL;
1324
1325         callout_init(&lle->base.la_timer, CALLOUT_MPSAFE);
1326         /*
1327          * For IPv4 this will trigger "arpresolve" to generate
1328          * an ARP request.
1329          */
1330         lle->base.la_expire = time_uptime; /* mark expired */
1331         lle->l3_addr4 = *(const struct sockaddr_in *)l3addr;
1332         lle->base.lle_refcnt = 1;
1333         LLE_LOCK_INIT(&lle->base);
1334         return &lle->base;
1335 }
1336
1337 /*
1338  * Deletes an address from the address table.
1339  * This function is called by the timer functions
1340  * such as arptimer() and nd6_llinfo_timer(), and
1341  * the caller does the locking.
1342  */
1343 static void
1344 in_lltable_free(struct lltable *llt, struct llentry *lle)
1345 {
1346         LLE_WUNLOCK(lle);
1347         LLE_LOCK_DESTROY(lle);
1348         free(lle, M_LLTABLE);
1349 }
1350
1351
1352 #define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)       (                       \
1353             (((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )
1354
1355 static void
1356 in_lltable_prefix_free(struct lltable *llt, 
1357                        const struct sockaddr *prefix,
1358                        const struct sockaddr *mask,
1359                        u_int flags)
1360 {
1361         const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix;
1362         const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
1363         struct llentry *lle, *next;
1364         register int i;
1365         size_t pkts_dropped;
1366
1367         for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
1368                 LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
1369
1370                         /* 
1371                          * (flags & LLE_STATIC) means deleting all entries
1372                          * including static ARP entries
1373                          */
1374                         if (IN_ARE_MASKED_ADDR_EQUAL((struct sockaddr_in *)L3_ADDR(lle), 
1375                                                      pfx, msk) &&
1376                             ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) {
1377                                 int canceled;
1378
1379                                 canceled = callout_drain(&lle->la_timer);
1380                                 LLE_WLOCK(lle);
1381                                 if (canceled)
1382                                         LLE_REMREF(lle);
1383                                 pkts_dropped = llentry_free(lle);
1384                                 ARPSTAT_ADD(dropped, pkts_dropped);
1385                         }
1386                 }
1387         }
1388 }
1389
1390
1391 static int
1392 in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
1393 {
1394         struct rtentry *rt;
1395
1396         KASSERT(l3addr->sa_family == AF_INET,
1397             ("sin_family %d", l3addr->sa_family));
1398
1399         /* XXX rtalloc1 should take a const param */
1400         rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
1401
1402         if (rt == NULL)
1403                 return (EINVAL);
1404
1405         /*
1406          * If the gateway for an existing host route matches the target L3
1407          * address, which is a special route inserted by some implementation
1408          * such as MANET, and the interface is of the correct type, then
1409          * allow for ARP to proceed.
1410          */
1411         if (rt->rt_flags & RTF_GATEWAY) {
1412                 if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp ||
1413                         rt->rt_ifp->if_type != IFT_ETHER ||
1414                           (rt->rt_ifp->if_flags & 
1415                            (IFF_NOARP | IFF_STATICARP)) != 0 ||
1416                           memcmp(rt->rt_gateway->sa_data, l3addr->sa_data,
1417                                  sizeof(in_addr_t)) != 0) {
1418                         RTFREE_LOCKED(rt);
1419                         return (EINVAL);
1420                 }
1421         }
1422
1423         /*
1424          * Make sure that at least the destination address is covered 
1425          * by the route. This is for handling the case where 2 or more 
1426          * interfaces have the same prefix. An incoming packet arrives
1427          * on one interface and the corresponding outgoing packet leaves
1428          * another interface.
1429          */
1430         if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) {
1431                 const char *sa, *mask, *addr, *lim;
1432                 int len;
1433
1434                 mask = (const char *)rt_mask(rt);
1435                 /*
1436                  * Just being extra cautious to avoid some custom
1437                  * code getting into trouble.
1438                  */
1439                 if (mask == NULL) {
1440                         RTFREE_LOCKED(rt);
1441                         return (EINVAL);
1442                 }
1443
1444                 sa = (const char *)rt_key(rt);
1445                 addr = (const char *)l3addr;
1446                 len = ((const struct sockaddr_in *)l3addr)->sin_len;
1447                 lim = addr + len;
1448
1449                 for ( ; addr < lim; sa++, mask++, addr++) {
1450                         if ((*sa ^ *addr) & *mask) {
1451 #ifdef DIAGNOSTIC
1452                                 log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
1453                                     inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
1454 #endif
1455                                 RTFREE_LOCKED(rt);
1456                                 return (EINVAL);
1457                         }
1458                 }
1459         }
1460
1461         RTFREE_LOCKED(rt);
1462         return (0);
1463 }
1464
1465 /*
1466  * Return NULL if not found or marked for deletion.
1467  * If found return lle read locked.
1468  */
1469 static struct llentry *
1470 in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
1471 {
1472         const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
1473         struct ifnet *ifp = llt->llt_ifp;
1474         struct llentry *lle;
1475         struct llentries *lleh;
1476         u_int hashkey;
1477
1478         IF_AFDATA_LOCK_ASSERT(ifp);
1479         KASSERT(l3addr->sa_family == AF_INET,
1480             ("sin_family %d", l3addr->sa_family));
1481
1482         hashkey = sin->sin_addr.s_addr;
1483         lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
1484         LIST_FOREACH(lle, lleh, lle_next) {
1485                 struct sockaddr_in *sa2 = (struct sockaddr_in *)L3_ADDR(lle);
1486                 if (lle->la_flags & LLE_DELETED)
1487                         continue;
1488                 if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr)
1489                         break;
1490         }
1491         if (lle == NULL) {
1492 #ifdef DIAGNOSTIC
1493                 if (flags & LLE_DELETE)
1494                         log(LOG_INFO, "interface address is missing from cache = %p  in delete\n", lle);        
1495 #endif
1496                 if (!(flags & LLE_CREATE))
1497                         return (NULL);
1498                 /*
1499                  * A route that covers the given address must have
1500                  * been installed 1st because we are doing a resolution,
1501                  * verify this.
1502                  */
1503                 if (!(flags & LLE_IFADDR) &&
1504                     in_lltable_rtcheck(ifp, flags, l3addr) != 0)
1505                         goto done;
1506
1507                 lle = in_lltable_new(l3addr, flags);
1508                 if (lle == NULL) {
1509                         log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
1510                         goto done;
1511                 }
1512                 lle->la_flags = flags & ~LLE_CREATE;
1513                 if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
1514                         bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
1515                         lle->la_flags |= (LLE_VALID | LLE_STATIC);
1516                 }
1517
1518                 lle->lle_tbl  = llt;
1519                 lle->lle_head = lleh;
1520                 LIST_INSERT_HEAD(lleh, lle, lle_next);
1521         } else if (flags & LLE_DELETE) {
1522                 if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
1523                         LLE_WLOCK(lle);
1524                         lle->la_flags = LLE_DELETED;
1525                         EVENTHANDLER_INVOKE(arp_update_event, lle);
1526                         LLE_WUNLOCK(lle);
1527 #ifdef DIAGNOSTIC
1528                         log(LOG_INFO, "ifaddr cache = %p  is deleted\n", lle);  
1529 #endif
1530                 }
1531                 lle = (void *)-1;
1532                 
1533         }
1534         if (LLE_IS_VALID(lle)) {
1535                 if (flags & LLE_EXCLUSIVE)
1536                         LLE_WLOCK(lle);
1537                 else
1538                         LLE_RLOCK(lle);
1539         }
1540 done:
1541         return (lle);
1542 }
1543
1544 static int
1545 in_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
1546 {
1547 #define SIN(lle)        ((struct sockaddr_in *) L3_ADDR(lle))
1548         struct ifnet *ifp = llt->llt_ifp;
1549         struct llentry *lle;
1550         /* XXX stack use */
1551         struct {
1552                 struct rt_msghdr        rtm;
1553                 struct sockaddr_inarp   sin;
1554                 struct sockaddr_dl      sdl;
1555         } arpc;
1556         int error, i;
1557
1558         LLTABLE_LOCK_ASSERT();
1559
1560         error = 0;
1561         for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
1562                 LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
1563                         struct sockaddr_dl *sdl;
1564                         
1565                         /* skip deleted entries */
1566                         if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
1567                                 continue;
1568                         /* Skip if jailed and not a valid IP of the prison. */
1569                         if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
1570                                 continue;
1571                         /*
1572                          * produce a msg made of:
1573                          *  struct rt_msghdr;
1574                          *  struct sockaddr_inarp; (IPv4)
1575                          *  struct sockaddr_dl;
1576                          */
1577                         bzero(&arpc, sizeof(arpc));
1578                         arpc.rtm.rtm_msglen = sizeof(arpc);
1579                         arpc.rtm.rtm_version = RTM_VERSION;
1580                         arpc.rtm.rtm_type = RTM_GET;
1581                         arpc.rtm.rtm_flags = RTF_UP;
1582                         arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
1583                         arpc.sin.sin_family = AF_INET;
1584                         arpc.sin.sin_len = sizeof(arpc.sin);
1585                         arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;
1586
1587                         /* publish */
1588                         if (lle->la_flags & LLE_PUB) {
1589                                 arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
1590                                 /* proxy only */
1591                                 if (lle->la_flags & LLE_PROXY)
1592                                         arpc.sin.sin_other = SIN_PROXY;
1593                         }
1594
1595                         sdl = &arpc.sdl;
1596                         sdl->sdl_family = AF_LINK;
1597                         sdl->sdl_len = sizeof(*sdl);
1598                         sdl->sdl_index = ifp->if_index;
1599                         sdl->sdl_type = ifp->if_type;
1600                         if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
1601                                 sdl->sdl_alen = ifp->if_addrlen;
1602                                 bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
1603                         } else {
1604                                 sdl->sdl_alen = 0;
1605                                 bzero(LLADDR(sdl), ifp->if_addrlen);
1606                         }
1607
1608                         arpc.rtm.rtm_rmx.rmx_expire =
1609                             lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
1610                         arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
1611                         if (lle->la_flags & LLE_STATIC)
1612                                 arpc.rtm.rtm_flags |= RTF_STATIC;
1613                         arpc.rtm.rtm_index = ifp->if_index;
1614                         error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
1615                         if (error)
1616                                 break;
1617                 }
1618         }
1619         return error;
1620 #undef SIN
1621 }
1622
1623 void *
1624 in_domifattach(struct ifnet *ifp)
1625 {
1626         struct in_ifinfo *ii;
1627         struct lltable *llt;
1628
1629         ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO);
1630
1631         llt = lltable_init(ifp, AF_INET);
1632         if (llt != NULL) {
1633                 llt->llt_free = in_lltable_free;
1634                 llt->llt_prefix_free = in_lltable_prefix_free;
1635                 llt->llt_lookup = in_lltable_lookup;
1636                 llt->llt_dump = in_lltable_dump;
1637         }
1638         ii->ii_llt = llt;
1639
1640         ii->ii_igmp = igmp_domifattach(ifp);
1641
1642         return ii;
1643 }
1644
1645 void
1646 in_domifdetach(struct ifnet *ifp, void *aux)
1647 {
1648         struct in_ifinfo *ii = (struct in_ifinfo *)aux;
1649
1650         igmp_domifdetach(ifp);
1651         lltable_free(ii->ii_llt);
1652         free(ii, M_IFADDR);
1653 }