]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - sys/net/rtsock.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / sys / net / rtsock.c
1 /*-
2  * Copyright (c) 1988, 1991, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *      @(#)rtsock.c    8.7 (Berkeley) 10/12/95
30  * $FreeBSD$
31  */
32 #include "opt_sctp.h"
33 #include "opt_inet.h"
34 #include "opt_inet6.h"
35
36 #include <sys/param.h>
37 #include <sys/domain.h>
38 #include <sys/jail.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/priv.h>
43 #include <sys/proc.h>
44 #include <sys/protosw.h>
45 #include <sys/signalvar.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50
51 #include <net/if.h>
52 #include <net/netisr.h>
53 #include <net/raw_cb.h>
54 #include <net/route.h>
55
56 #include <netinet/in.h>
57 #ifdef INET6
58 #include <netinet6/scope6_var.h>
59 #endif
60
61 #ifdef SCTP
62 extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
63 #endif /* SCTP */
64
65 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
66
67 /* NB: these are not modified */
68 static struct   sockaddr route_src = { 2, PF_ROUTE, };
69 static struct   sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
70
71 static struct {
72         int     ip_count;       /* attached w/ AF_INET */
73         int     ip6_count;      /* attached w/ AF_INET6 */
74         int     ipx_count;      /* attached w/ AF_IPX */
75         int     any_count;      /* total attached */
76 } route_cb;
77
78 struct mtx rtsock_mtx;
79 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
80
81 #define RTSOCK_LOCK()   mtx_lock(&rtsock_mtx)
82 #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx)
83 #define RTSOCK_LOCK_ASSERT()    mtx_assert(&rtsock_mtx, MA_OWNED)
84
85 static struct   ifqueue rtsintrq;
86
87 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
88 SYSCTL_INT(_net_route, OID_AUTO, netisr_maxqlen, CTLFLAG_RW,
89     &rtsintrq.ifq_maxlen, 0, "maximum routing socket dispatch queue length");
90
91 struct walkarg {
92         int     w_tmemsize;
93         int     w_op, w_arg;
94         caddr_t w_tmem;
95         struct sysctl_req *w_req;
96 };
97
98 static void     rts_input(struct mbuf *m);
99 static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
100 static int      rt_msg2(int type, struct rt_addrinfo *rtinfo,
101                         caddr_t cp, struct walkarg *w);
102 static int      rt_xaddrs(caddr_t cp, caddr_t cplim,
103                         struct rt_addrinfo *rtinfo);
104 static int      sysctl_dumpentry(struct radix_node *rn, void *vw);
105 static int      sysctl_iflist(int af, struct walkarg *w);
106 static int      sysctl_ifmalist(int af, struct walkarg *w);
107 static int      route_output(struct mbuf *m, struct socket *so);
108 static void     rt_setmetrics(u_long which, const struct rt_metrics *in,
109                         struct rt_metrics_lite *out);
110 static void     rt_getmetrics(const struct rt_metrics_lite *in,
111                         struct rt_metrics *out);
112 static void     rt_dispatch(struct mbuf *, const struct sockaddr *);
113
114 static void
115 rts_init(void)
116 {
117         int tmp;
118
119         rtsintrq.ifq_maxlen = 256;
120         if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
121                 rtsintrq.ifq_maxlen = tmp;
122         mtx_init(&rtsintrq.ifq_mtx, "rts_inq", NULL, MTX_DEF);
123         netisr_register(NETISR_ROUTE, rts_input, &rtsintrq, NETISR_MPSAFE);
124 }
125 SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
126
127 static void
128 rts_input(struct mbuf *m)
129 {
130         struct sockproto route_proto;
131         unsigned short *family;
132         struct m_tag *tag;
133
134         route_proto.sp_family = PF_ROUTE;
135         tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
136         if (tag != NULL) {
137                 family = (unsigned short *)(tag + 1);
138                 route_proto.sp_protocol = *family;
139                 m_tag_delete(m, tag);
140         } else
141                 route_proto.sp_protocol = 0;
142
143         raw_input(m, &route_proto, &route_src);
144 }
145
146 /*
147  * It really doesn't make any sense at all for this code to share much
148  * with raw_usrreq.c, since its functionality is so restricted.  XXX
149  */
150 static void
151 rts_abort(struct socket *so)
152 {
153
154         raw_usrreqs.pru_abort(so);
155 }
156
157 static void
158 rts_close(struct socket *so)
159 {
160
161         raw_usrreqs.pru_close(so);
162 }
163
164 /* pru_accept is EOPNOTSUPP */
165
166 static int
167 rts_attach(struct socket *so, int proto, struct thread *td)
168 {
169         struct rawcb *rp;
170         int s, error;
171
172         KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
173
174         /* XXX */
175         MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
176         if (rp == NULL)
177                 return ENOBUFS;
178
179         /*
180          * The splnet() is necessary to block protocols from sending
181          * error notifications (like RTM_REDIRECT or RTM_LOSING) while
182          * this PCB is extant but incompletely initialized.
183          * Probably we should try to do more of this work beforehand and
184          * eliminate the spl.
185          */
186         s = splnet();
187         so->so_pcb = (caddr_t)rp;
188         so->so_fibnum = td->td_proc->p_fibnum;
189         error = raw_attach(so, proto);
190         rp = sotorawcb(so);
191         if (error) {
192                 splx(s);
193                 so->so_pcb = NULL;
194                 free(rp, M_PCB);
195                 return error;
196         }
197         RTSOCK_LOCK();
198         switch(rp->rcb_proto.sp_protocol) {
199         case AF_INET:
200                 route_cb.ip_count++;
201                 break;
202         case AF_INET6:
203                 route_cb.ip6_count++;
204                 break;
205         case AF_IPX:
206                 route_cb.ipx_count++;
207                 break;
208         }
209         route_cb.any_count++;
210         RTSOCK_UNLOCK();
211         soisconnected(so);
212         so->so_options |= SO_USELOOPBACK;
213         splx(s);
214         return 0;
215 }
216
217 static int
218 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
219 {
220
221         return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
222 }
223
224 static int
225 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
226 {
227
228         return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
229 }
230
231 /* pru_connect2 is EOPNOTSUPP */
232 /* pru_control is EOPNOTSUPP */
233
234 static void
235 rts_detach(struct socket *so)
236 {
237         struct rawcb *rp = sotorawcb(so);
238
239         KASSERT(rp != NULL, ("rts_detach: rp == NULL"));
240
241         RTSOCK_LOCK();
242         switch(rp->rcb_proto.sp_protocol) {
243         case AF_INET:
244                 route_cb.ip_count--;
245                 break;
246         case AF_INET6:
247                 route_cb.ip6_count--;
248                 break;
249         case AF_IPX:
250                 route_cb.ipx_count--;
251                 break;
252         }
253         route_cb.any_count--;
254         RTSOCK_UNLOCK();
255         raw_usrreqs.pru_detach(so);
256 }
257
258 static int
259 rts_disconnect(struct socket *so)
260 {
261
262         return (raw_usrreqs.pru_disconnect(so));
263 }
264
265 /* pru_listen is EOPNOTSUPP */
266
267 static int
268 rts_peeraddr(struct socket *so, struct sockaddr **nam)
269 {
270
271         return (raw_usrreqs.pru_peeraddr(so, nam));
272 }
273
274 /* pru_rcvd is EOPNOTSUPP */
275 /* pru_rcvoob is EOPNOTSUPP */
276
277 static int
278 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
279          struct mbuf *control, struct thread *td)
280 {
281
282         return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
283 }
284
285 /* pru_sense is null */
286
287 static int
288 rts_shutdown(struct socket *so)
289 {
290
291         return (raw_usrreqs.pru_shutdown(so));
292 }
293
294 static int
295 rts_sockaddr(struct socket *so, struct sockaddr **nam)
296 {
297
298         return (raw_usrreqs.pru_sockaddr(so, nam));
299 }
300
301 static struct pr_usrreqs route_usrreqs = {
302         .pru_abort =            rts_abort,
303         .pru_attach =           rts_attach,
304         .pru_bind =             rts_bind,
305         .pru_connect =          rts_connect,
306         .pru_detach =           rts_detach,
307         .pru_disconnect =       rts_disconnect,
308         .pru_peeraddr =         rts_peeraddr,
309         .pru_send =             rts_send,
310         .pru_shutdown =         rts_shutdown,
311         .pru_sockaddr =         rts_sockaddr,
312         .pru_close =            rts_close,
313 };
314
315 #ifndef _SOCKADDR_UNION_DEFINED
316 #define _SOCKADDR_UNION_DEFINED
317 /*
318  * The union of all possible address formats we handle.
319  */
320 union sockaddr_union {
321         struct sockaddr         sa;
322         struct sockaddr_in      sin;
323         struct sockaddr_in6     sin6;
324 };
325 #endif /* _SOCKADDR_UNION_DEFINED */
326
327 static int
328 rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
329     struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
330 {
331
332         /* First, see if the returned address is part of the jail. */
333         if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
334                 info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
335                 return (0);
336         }
337
338         switch (info->rti_info[RTAX_DST]->sa_family) {
339 #ifdef INET
340         case AF_INET:
341         {
342                 struct in_addr ia;
343                 struct ifaddr *ifa;
344                 int found;
345
346                 found = 0;
347                 /*
348                  * Try to find an address on the given outgoing interface
349                  * that belongs to the jail.
350                  */
351                 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
352                         struct sockaddr *sa;
353                         sa = ifa->ifa_addr;
354                         if (sa->sa_family != AF_INET)
355                                 continue;
356                         ia = ((struct sockaddr_in *)sa)->sin_addr;
357                         if (prison_check_ip4(cred, &ia) == 0) {
358                                 found = 1;
359                                 break;
360                         }
361                 }
362                 if (!found) {
363                         /*
364                          * As a last resort return the 'default' jail address.
365                          */
366                         if (prison_get_ip4(cred, &ia) != 0)
367                                 return (ESRCH);
368                 }
369                 bzero(&saun->sin, sizeof(struct sockaddr_in));
370                 saun->sin.sin_len = sizeof(struct sockaddr_in);
371                 saun->sin.sin_family = AF_INET;
372                 saun->sin.sin_addr.s_addr = ia.s_addr;
373                 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
374                 break;
375         }
376 #endif
377 #ifdef INET6
378         case AF_INET6:
379         {
380                 struct in6_addr ia6;
381                 struct ifaddr *ifa;
382                 int found;
383
384                 found = 0;
385                 /*
386                  * Try to find an address on the given outgoing interface
387                  * that belongs to the jail.
388                  */
389                 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
390                         struct sockaddr *sa;
391                         sa = ifa->ifa_addr;
392                         if (sa->sa_family != AF_INET6)
393                                 continue;
394                         bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
395                             &ia6, sizeof(struct in6_addr));
396                         if (prison_check_ip6(cred, &ia6) == 0) {
397                                 found = 1;
398                                 break;
399                         }
400                 }
401                 if (!found) {
402                         /*
403                          * As a last resort return the 'default' jail address.
404                          */
405                         if (prison_get_ip6(cred, &ia6) != 0)
406                                 return (ESRCH);
407                 }
408                 bzero(&saun->sin6, sizeof(struct sockaddr_in6));
409                 saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
410                 saun->sin6.sin6_family = AF_INET6;
411                 bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
412                 if (sa6_recoverscope(&saun->sin6) != 0)
413                         return (ESRCH);
414                 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
415                 break;
416         }
417 #endif
418         default:
419                 return (ESRCH);
420         }
421         return (0);
422 }
423
424 /*ARGSUSED*/
425 static int
426 route_output(struct mbuf *m, struct socket *so)
427 {
428 #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
429         struct rt_msghdr *rtm = NULL;
430         struct rtentry *rt = NULL;
431         struct radix_node_head *rnh;
432         struct rt_addrinfo info;
433         int len, error = 0;
434         struct ifnet *ifp = NULL;
435         union sockaddr_union saun;
436
437 #define senderr(e) { error = e; goto flush;}
438         if (m == NULL || ((m->m_len < sizeof(long)) &&
439                        (m = m_pullup(m, sizeof(long))) == NULL))
440                 return (ENOBUFS);
441         if ((m->m_flags & M_PKTHDR) == 0)
442                 panic("route_output");
443         len = m->m_pkthdr.len;
444         if (len < sizeof(*rtm) ||
445             len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
446                 info.rti_info[RTAX_DST] = NULL;
447                 senderr(EINVAL);
448         }
449         R_Malloc(rtm, struct rt_msghdr *, len);
450         if (rtm == NULL) {
451                 info.rti_info[RTAX_DST] = NULL;
452                 senderr(ENOBUFS);
453         }
454         m_copydata(m, 0, len, (caddr_t)rtm);
455         if (rtm->rtm_version != RTM_VERSION) {
456                 info.rti_info[RTAX_DST] = NULL;
457                 senderr(EPROTONOSUPPORT);
458         }
459         rtm->rtm_pid = curproc->p_pid;
460         bzero(&info, sizeof(info));
461         info.rti_addrs = rtm->rtm_addrs;
462         if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
463                 info.rti_info[RTAX_DST] = NULL;
464                 senderr(EINVAL);
465         }
466         info.rti_flags = rtm->rtm_flags;
467         if (info.rti_info[RTAX_DST] == NULL ||
468             info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
469             (info.rti_info[RTAX_GATEWAY] != NULL &&
470              info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
471                 senderr(EINVAL);
472         if (info.rti_info[RTAX_GENMASK]) {
473                 struct radix_node *t;
474                 t = rn_addmask((caddr_t) info.rti_info[RTAX_GENMASK], 0, 1);
475                 if (t != NULL &&
476                     bcmp((char *)(void *)info.rti_info[RTAX_GENMASK] + 1,
477                     (char *)(void *)t->rn_key + 1,
478                     ((struct sockaddr *)t->rn_key)->sa_len - 1) == 0)
479                         info.rti_info[RTAX_GENMASK] =
480                             (struct sockaddr *)t->rn_key;
481                 else
482                         senderr(ENOBUFS);
483         }
484
485         /*
486          * Verify that the caller has the appropriate privilege; RTM_GET
487          * is the only operation the non-superuser is allowed.
488          */
489         if (rtm->rtm_type != RTM_GET) {
490                 error = priv_check(curthread, PRIV_NET_ROUTE);
491                 if (error)
492                         senderr(error);
493         }
494
495         switch (rtm->rtm_type) {
496                 struct rtentry *saved_nrt;
497
498         case RTM_ADD:
499                 if (info.rti_info[RTAX_GATEWAY] == NULL)
500                         senderr(EINVAL);
501                 saved_nrt = NULL;
502                 error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
503                     so->so_fibnum);
504                 if (error == 0 && saved_nrt) {
505                         RT_LOCK(saved_nrt);
506                         rt_setmetrics(rtm->rtm_inits,
507                                 &rtm->rtm_rmx, &saved_nrt->rt_rmx);
508                         rtm->rtm_index = saved_nrt->rt_ifp->if_index;
509                         RT_REMREF(saved_nrt);
510                         saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
511                         RT_UNLOCK(saved_nrt);
512                 }
513                 break;
514
515         case RTM_DELETE:
516                 saved_nrt = NULL;
517                 error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
518                     so->so_fibnum);
519                 if (error == 0) {
520                         RT_LOCK(saved_nrt);
521                         rt = saved_nrt;
522                         goto report;
523                 }
524                 break;
525
526         case RTM_GET:
527         case RTM_CHANGE:
528         case RTM_LOCK:
529                 rnh = rt_tables[so->so_fibnum][info.rti_info[RTAX_DST]->sa_family];
530                 if (rnh == NULL)
531                         senderr(EAFNOSUPPORT);
532                 RADIX_NODE_HEAD_LOCK(rnh);
533                 rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
534                         info.rti_info[RTAX_NETMASK], rnh);
535                 if (rt == NULL) {       /* XXX looks bogus */
536                         RADIX_NODE_HEAD_UNLOCK(rnh);
537                         senderr(ESRCH);
538                 }
539                 RT_LOCK(rt);
540                 RT_ADDREF(rt);
541                 RADIX_NODE_HEAD_UNLOCK(rnh);
542
543                 /* 
544                  * Fix for PR: 82974
545                  *
546                  * RTM_CHANGE/LOCK need a perfect match, rn_lookup()
547                  * returns a perfect match in case a netmask is
548                  * specified.  For host routes only a longest prefix
549                  * match is returned so it is necessary to compare the
550                  * existence of the netmask.  If both have a netmask
551                  * rnh_lookup() did a perfect match and if none of them
552                  * have a netmask both are host routes which is also a
553                  * perfect match.
554                  */
555
556                 if (rtm->rtm_type != RTM_GET && 
557                     (!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) {
558                         RT_UNLOCK(rt);
559                         senderr(ESRCH);
560                 }
561
562                 switch(rtm->rtm_type) {
563
564                 case RTM_GET:
565                 report:
566                         RT_LOCK_ASSERT(rt);
567                         if ((rt->rt_flags & RTF_HOST) == 0
568                             ? jailed(curthread->td_ucred)
569                             : prison_if(curthread->td_ucred,
570                             rt_key(rt)) != 0) {
571                                 RT_UNLOCK(rt);
572                                 senderr(ESRCH);
573                         }
574                         info.rti_info[RTAX_DST] = rt_key(rt);
575                         info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
576                         info.rti_info[RTAX_NETMASK] = rt_mask(rt);
577                         info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
578                         if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
579                                 ifp = rt->rt_ifp;
580                                 if (ifp) {
581                                         info.rti_info[RTAX_IFP] =
582                                             ifp->if_addr->ifa_addr;
583                                         error = rtm_get_jailed(&info, ifp, rt,
584                                             &saun, curthread->td_ucred);
585                                         if (error != 0) {
586                                                 RT_UNLOCK(rt);
587                                                 senderr(error);
588                                         }
589                                         if (ifp->if_flags & IFF_POINTOPOINT)
590                                                 info.rti_info[RTAX_BRD] =
591                                                     rt->rt_ifa->ifa_dstaddr;
592                                         rtm->rtm_index = ifp->if_index;
593                                 } else {
594                                         info.rti_info[RTAX_IFP] = NULL;
595                                         info.rti_info[RTAX_IFA] = NULL;
596                                 }
597                         } else if ((ifp = rt->rt_ifp) != NULL) {
598                                 rtm->rtm_index = ifp->if_index;
599                         }
600                         len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
601                         if (len > rtm->rtm_msglen) {
602                                 struct rt_msghdr *new_rtm;
603                                 R_Malloc(new_rtm, struct rt_msghdr *, len);
604                                 if (new_rtm == NULL) {
605                                         RT_UNLOCK(rt);
606                                         senderr(ENOBUFS);
607                                 }
608                                 bcopy(rtm, new_rtm, rtm->rtm_msglen);
609                                 Free(rtm); rtm = new_rtm;
610                         }
611                         (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
612                         rtm->rtm_flags = rt->rt_flags;
613                         rtm->rtm_use = 0;
614                         rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
615                         rtm->rtm_addrs = info.rti_addrs;
616                         break;
617
618                 case RTM_CHANGE:
619                         /*
620                          * New gateway could require new ifaddr, ifp;
621                          * flags may also be different; ifp may be specified
622                          * by ll sockaddr when protocol address is ambiguous
623                          */
624                         if (((rt->rt_flags & RTF_GATEWAY) &&
625                              info.rti_info[RTAX_GATEWAY] != NULL) ||
626                             info.rti_info[RTAX_IFP] != NULL ||
627                             (info.rti_info[RTAX_IFA] != NULL &&
628                              !sa_equal(info.rti_info[RTAX_IFA],
629                                        rt->rt_ifa->ifa_addr))) {
630                                 RT_UNLOCK(rt);
631                                 RADIX_NODE_HEAD_LOCK(rnh);
632                                 error = rt_getifa_fib(&info, rt->rt_fibnum);
633                                 RADIX_NODE_HEAD_UNLOCK(rnh);
634                                 if (error != 0)
635                                         senderr(error);
636                                 RT_LOCK(rt);
637                         }
638                         if (info.rti_ifa != NULL &&
639                             info.rti_ifa != rt->rt_ifa &&
640                             rt->rt_ifa != NULL &&
641                             rt->rt_ifa->ifa_rtrequest != NULL) {
642                                 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
643                                     &info);
644                                 IFAFREE(rt->rt_ifa);
645                         }
646                         if (info.rti_info[RTAX_GATEWAY] != NULL) {
647                                 RT_UNLOCK(rt);
648                                 RADIX_NODE_HEAD_LOCK(rnh);
649                                 RT_LOCK(rt);
650                                 
651                                 error = rt_setgate(rt, rt_key(rt),
652                                     info.rti_info[RTAX_GATEWAY]);
653                                 RADIX_NODE_HEAD_UNLOCK(rnh);
654                                 if (error != 0) {
655                                         RT_UNLOCK(rt);
656                                         senderr(error);
657                                 }
658                                 if (!(rt->rt_flags & RTF_LLINFO))
659                                         rt->rt_flags |= RTF_GATEWAY;
660                         }
661                         if (info.rti_ifa != NULL &&
662                             info.rti_ifa != rt->rt_ifa) {
663                                 IFAREF(info.rti_ifa);
664                                 rt->rt_ifa = info.rti_ifa;
665                                 rt->rt_ifp = info.rti_ifp;
666                         }
667                         /* Allow some flags to be toggled on change. */
668                         if (rtm->rtm_fmask & RTF_FMASK)
669                                 rt->rt_flags = (rt->rt_flags &
670                                     ~rtm->rtm_fmask) |
671                                     (rtm->rtm_flags & rtm->rtm_fmask);
672                         rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
673                                         &rt->rt_rmx);
674                         rtm->rtm_index = rt->rt_ifp->if_index;
675                         if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
676                                rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
677                         if (info.rti_info[RTAX_GENMASK])
678                                 rt->rt_genmask = info.rti_info[RTAX_GENMASK];
679                         /* FALLTHROUGH */
680                 case RTM_LOCK:
681                         /* We don't support locks anymore */
682                         break;
683                 }
684                 RT_UNLOCK(rt);
685                 break;
686
687         default:
688                 senderr(EOPNOTSUPP);
689         }
690
691 flush:
692         if (rtm) {
693                 if (error)
694                         rtm->rtm_errno = error;
695                 else
696                         rtm->rtm_flags |= RTF_DONE;
697         }
698         if (rt)         /* XXX can this be true? */
699                 RTFREE(rt);
700     {
701         struct rawcb *rp = NULL;
702         /*
703          * Check to see if we don't want our own messages.
704          */
705         if ((so->so_options & SO_USELOOPBACK) == 0) {
706                 if (route_cb.any_count <= 1) {
707                         if (rtm)
708                                 Free(rtm);
709                         m_freem(m);
710                         return (error);
711                 }
712                 /* There is another listener, so construct message */
713                 rp = sotorawcb(so);
714         }
715         if (rtm) {
716                 m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
717                 if (m->m_pkthdr.len < rtm->rtm_msglen) {
718                         m_freem(m);
719                         m = NULL;
720                 } else if (m->m_pkthdr.len > rtm->rtm_msglen)
721                         m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
722                 Free(rtm);
723         }
724         if (m) {
725                 if (rp) {
726                         /*
727                          * XXX insure we don't get a copy by
728                          * invalidating our protocol
729                          */
730                         unsigned short family = rp->rcb_proto.sp_family;
731                         rp->rcb_proto.sp_family = 0;
732                         rt_dispatch(m, info.rti_info[RTAX_DST]);
733                         rp->rcb_proto.sp_family = family;
734                 } else
735                         rt_dispatch(m, info.rti_info[RTAX_DST]);
736         }
737     }
738         return (error);
739 #undef  sa_equal
740 }
741
742 static void
743 rt_setmetrics(u_long which, const struct rt_metrics *in,
744         struct rt_metrics_lite *out)
745 {
746 #define metric(f, e) if (which & (f)) out->e = in->e;
747         /*
748          * Only these are stored in the routing entry since introduction
749          * of tcp hostcache. The rest is ignored.
750          */
751         metric(RTV_MTU, rmx_mtu);
752         /* Userland -> kernel timebase conversion. */
753         if (which & RTV_EXPIRE)
754                 out->rmx_expire = in->rmx_expire ?
755                     in->rmx_expire - time_second + time_uptime : 0;
756 #undef metric
757 }
758
759 static void
760 rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
761 {
762 #define metric(e) out->e = in->e;
763         bzero(out, sizeof(*out));
764         metric(rmx_mtu);
765         /* Kernel -> userland timebase conversion. */
766         out->rmx_expire = in->rmx_expire ?
767             in->rmx_expire - time_uptime + time_second : 0;
768 #undef metric
769 }
770
771 /*
772  * Extract the addresses of the passed sockaddrs.
773  * Do a little sanity checking so as to avoid bad memory references.
774  * This data is derived straight from userland.
775  */
776 static int
777 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
778 {
779         struct sockaddr *sa;
780         int i;
781
782         for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
783                 if ((rtinfo->rti_addrs & (1 << i)) == 0)
784                         continue;
785                 sa = (struct sockaddr *)cp;
786                 /*
787                  * It won't fit.
788                  */
789                 if (cp + sa->sa_len > cplim)
790                         return (EINVAL);
791                 /*
792                  * there are no more.. quit now
793                  * If there are more bits, they are in error.
794                  * I've seen this. route(1) can evidently generate these. 
795                  * This causes kernel to core dump.
796                  * for compatibility, If we see this, point to a safe address.
797                  */
798                 if (sa->sa_len == 0) {
799                         rtinfo->rti_info[i] = &sa_zero;
800                         return (0); /* should be EINVAL but for compat */
801                 }
802                 /* accept it */
803                 rtinfo->rti_info[i] = sa;
804                 cp += SA_SIZE(sa);
805         }
806         return (0);
807 }
808
809 static struct mbuf *
810 rt_msg1(int type, struct rt_addrinfo *rtinfo)
811 {
812         struct rt_msghdr *rtm;
813         struct mbuf *m;
814         int i;
815         struct sockaddr *sa;
816         int len, dlen;
817
818         switch (type) {
819
820         case RTM_DELADDR:
821         case RTM_NEWADDR:
822                 len = sizeof(struct ifa_msghdr);
823                 break;
824
825         case RTM_DELMADDR:
826         case RTM_NEWMADDR:
827                 len = sizeof(struct ifma_msghdr);
828                 break;
829
830         case RTM_IFINFO:
831                 len = sizeof(struct if_msghdr);
832                 break;
833
834         case RTM_IFANNOUNCE:
835         case RTM_IEEE80211:
836                 len = sizeof(struct if_announcemsghdr);
837                 break;
838
839         default:
840                 len = sizeof(struct rt_msghdr);
841         }
842         if (len > MCLBYTES)
843                 panic("rt_msg1");
844         m = m_gethdr(M_DONTWAIT, MT_DATA);
845         if (m && len > MHLEN) {
846                 MCLGET(m, M_DONTWAIT);
847                 if ((m->m_flags & M_EXT) == 0) {
848                         m_free(m);
849                         m = NULL;
850                 }
851         }
852         if (m == NULL)
853                 return (m);
854         m->m_pkthdr.len = m->m_len = len;
855         m->m_pkthdr.rcvif = NULL;
856         rtm = mtod(m, struct rt_msghdr *);
857         bzero((caddr_t)rtm, len);
858         for (i = 0; i < RTAX_MAX; i++) {
859                 if ((sa = rtinfo->rti_info[i]) == NULL)
860                         continue;
861                 rtinfo->rti_addrs |= (1 << i);
862                 dlen = SA_SIZE(sa);
863                 m_copyback(m, len, dlen, (caddr_t)sa);
864                 len += dlen;
865         }
866         if (m->m_pkthdr.len != len) {
867                 m_freem(m);
868                 return (NULL);
869         }
870         rtm->rtm_msglen = len;
871         rtm->rtm_version = RTM_VERSION;
872         rtm->rtm_type = type;
873         return (m);
874 }
875
876 static int
877 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
878 {
879         int i;
880         int len, dlen, second_time = 0;
881         caddr_t cp0;
882
883         rtinfo->rti_addrs = 0;
884 again:
885         switch (type) {
886
887         case RTM_DELADDR:
888         case RTM_NEWADDR:
889                 len = sizeof(struct ifa_msghdr);
890                 break;
891
892         case RTM_IFINFO:
893                 len = sizeof(struct if_msghdr);
894                 break;
895
896         case RTM_NEWMADDR:
897                 len = sizeof(struct ifma_msghdr);
898                 break;
899
900         default:
901                 len = sizeof(struct rt_msghdr);
902         }
903         cp0 = cp;
904         if (cp0)
905                 cp += len;
906         for (i = 0; i < RTAX_MAX; i++) {
907                 struct sockaddr *sa;
908
909                 if ((sa = rtinfo->rti_info[i]) == NULL)
910                         continue;
911                 rtinfo->rti_addrs |= (1 << i);
912                 dlen = SA_SIZE(sa);
913                 if (cp) {
914                         bcopy((caddr_t)sa, cp, (unsigned)dlen);
915                         cp += dlen;
916                 }
917                 len += dlen;
918         }
919         len = ALIGN(len);
920         if (cp == NULL && w != NULL && !second_time) {
921                 struct walkarg *rw = w;
922
923                 if (rw->w_req) {
924                         if (rw->w_tmemsize < len) {
925                                 if (rw->w_tmem)
926                                         free(rw->w_tmem, M_RTABLE);
927                                 rw->w_tmem = (caddr_t)
928                                         malloc(len, M_RTABLE, M_NOWAIT);
929                                 if (rw->w_tmem)
930                                         rw->w_tmemsize = len;
931                         }
932                         if (rw->w_tmem) {
933                                 cp = rw->w_tmem;
934                                 second_time = 1;
935                                 goto again;
936                         }
937                 }
938         }
939         if (cp) {
940                 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
941
942                 rtm->rtm_version = RTM_VERSION;
943                 rtm->rtm_type = type;
944                 rtm->rtm_msglen = len;
945         }
946         return (len);
947 }
948
949 /*
950  * This routine is called to generate a message from the routing
951  * socket indicating that a redirect has occured, a routing lookup
952  * has failed, or that a protocol has detected timeouts to a particular
953  * destination.
954  */
955 void
956 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
957 {
958         struct rt_msghdr *rtm;
959         struct mbuf *m;
960         struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
961
962         if (route_cb.any_count == 0)
963                 return;
964         m = rt_msg1(type, rtinfo);
965         if (m == NULL)
966                 return;
967         rtm = mtod(m, struct rt_msghdr *);
968         rtm->rtm_flags = RTF_DONE | flags;
969         rtm->rtm_errno = error;
970         rtm->rtm_addrs = rtinfo->rti_addrs;
971         rt_dispatch(m, sa);
972 }
973
974 /*
975  * This routine is called to generate a message from the routing
976  * socket indicating that the status of a network interface has changed.
977  */
978 void
979 rt_ifmsg(struct ifnet *ifp)
980 {
981         struct if_msghdr *ifm;
982         struct mbuf *m;
983         struct rt_addrinfo info;
984
985         if (route_cb.any_count == 0)
986                 return;
987         bzero((caddr_t)&info, sizeof(info));
988         m = rt_msg1(RTM_IFINFO, &info);
989         if (m == NULL)
990                 return;
991         ifm = mtod(m, struct if_msghdr *);
992         ifm->ifm_index = ifp->if_index;
993         ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
994         ifm->ifm_data = ifp->if_data;
995         ifm->ifm_addrs = 0;
996         rt_dispatch(m, NULL);
997 }
998
999 /*
1000  * This is called to generate messages from the routing socket
1001  * indicating a network interface has had addresses associated with it.
1002  * if we ever reverse the logic and replace messages TO the routing
1003  * socket indicate a request to configure interfaces, then it will
1004  * be unnecessary as the routing socket will automatically generate
1005  * copies of it.
1006  */
1007 void
1008 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1009 {
1010         struct rt_addrinfo info;
1011         struct sockaddr *sa = NULL;
1012         int pass;
1013         struct mbuf *m = NULL;
1014         struct ifnet *ifp = ifa->ifa_ifp;
1015
1016         KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
1017                 ("unexpected cmd %u", cmd));
1018 #ifdef SCTP
1019         /*
1020          * notify the SCTP stack
1021          * this will only get called when an address is added/deleted
1022          * XXX pass the ifaddr struct instead if ifa->ifa_addr...
1023          */
1024         sctp_addr_change(ifa, cmd);
1025 #endif /* SCTP */
1026         if (route_cb.any_count == 0)
1027                 return;
1028         for (pass = 1; pass < 3; pass++) {
1029                 bzero((caddr_t)&info, sizeof(info));
1030                 if ((cmd == RTM_ADD && pass == 1) ||
1031                     (cmd == RTM_DELETE && pass == 2)) {
1032                         struct ifa_msghdr *ifam;
1033                         int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1034
1035                         info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1036                         info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
1037                         info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1038                         info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1039                         if ((m = rt_msg1(ncmd, &info)) == NULL)
1040                                 continue;
1041                         ifam = mtod(m, struct ifa_msghdr *);
1042                         ifam->ifam_index = ifp->if_index;
1043                         ifam->ifam_metric = ifa->ifa_metric;
1044                         ifam->ifam_flags = ifa->ifa_flags;
1045                         ifam->ifam_addrs = info.rti_addrs;
1046                 }
1047                 if ((cmd == RTM_ADD && pass == 2) ||
1048                     (cmd == RTM_DELETE && pass == 1)) {
1049                         struct rt_msghdr *rtm;
1050
1051                         if (rt == NULL)
1052                                 continue;
1053                         info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1054                         info.rti_info[RTAX_DST] = sa = rt_key(rt);
1055                         info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1056                         if ((m = rt_msg1(cmd, &info)) == NULL)
1057                                 continue;
1058                         rtm = mtod(m, struct rt_msghdr *);
1059                         rtm->rtm_index = ifp->if_index;
1060                         rtm->rtm_flags |= rt->rt_flags;
1061                         rtm->rtm_errno = error;
1062                         rtm->rtm_addrs = info.rti_addrs;
1063                 }
1064                 rt_dispatch(m, sa);
1065         }
1066 }
1067
1068 /*
1069  * This is the analogue to the rt_newaddrmsg which performs the same
1070  * function but for multicast group memberhips.  This is easier since
1071  * there is no route state to worry about.
1072  */
1073 void
1074 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
1075 {
1076         struct rt_addrinfo info;
1077         struct mbuf *m = NULL;
1078         struct ifnet *ifp = ifma->ifma_ifp;
1079         struct ifma_msghdr *ifmam;
1080
1081         if (route_cb.any_count == 0)
1082                 return;
1083
1084         bzero((caddr_t)&info, sizeof(info));
1085         info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1086         info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL;
1087         /*
1088          * If a link-layer address is present, present it as a ``gateway''
1089          * (similarly to how ARP entries, e.g., are presented).
1090          */
1091         info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
1092         m = rt_msg1(cmd, &info);
1093         if (m == NULL)
1094                 return;
1095         ifmam = mtod(m, struct ifma_msghdr *);
1096         KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
1097             __func__));
1098         ifmam->ifmam_index = ifp->if_index;
1099         ifmam->ifmam_addrs = info.rti_addrs;
1100         rt_dispatch(m, ifma->ifma_addr);
1101 }
1102
1103 static struct mbuf *
1104 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
1105         struct rt_addrinfo *info)
1106 {
1107         struct if_announcemsghdr *ifan;
1108         struct mbuf *m;
1109
1110         if (route_cb.any_count == 0)
1111                 return NULL;
1112         bzero((caddr_t)info, sizeof(*info));
1113         m = rt_msg1(type, info);
1114         if (m != NULL) {
1115                 ifan = mtod(m, struct if_announcemsghdr *);
1116                 ifan->ifan_index = ifp->if_index;
1117                 strlcpy(ifan->ifan_name, ifp->if_xname,
1118                         sizeof(ifan->ifan_name));
1119                 ifan->ifan_what = what;
1120         }
1121         return m;
1122 }
1123
1124 /*
1125  * This is called to generate routing socket messages indicating
1126  * IEEE80211 wireless events.
1127  * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
1128  */
1129 void
1130 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
1131 {
1132         struct mbuf *m;
1133         struct rt_addrinfo info;
1134
1135         m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
1136         if (m != NULL) {
1137                 /*
1138                  * Append the ieee80211 data.  Try to stick it in the
1139                  * mbuf containing the ifannounce msg; otherwise allocate
1140                  * a new mbuf and append.
1141                  *
1142                  * NB: we assume m is a single mbuf.
1143                  */
1144                 if (data_len > M_TRAILINGSPACE(m)) {
1145                         struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
1146                         if (n == NULL) {
1147                                 m_freem(m);
1148                                 return;
1149                         }
1150                         bcopy(data, mtod(n, void *), data_len);
1151                         n->m_len = data_len;
1152                         m->m_next = n;
1153                 } else if (data_len > 0) {
1154                         bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
1155                         m->m_len += data_len;
1156                 }
1157                 if (m->m_flags & M_PKTHDR)
1158                         m->m_pkthdr.len += data_len;
1159                 mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
1160                 rt_dispatch(m, NULL);
1161         }
1162 }
1163
1164 /*
1165  * This is called to generate routing socket messages indicating
1166  * network interface arrival and departure.
1167  */
1168 void
1169 rt_ifannouncemsg(struct ifnet *ifp, int what)
1170 {
1171         struct mbuf *m;
1172         struct rt_addrinfo info;
1173
1174         m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
1175         if (m != NULL)
1176                 rt_dispatch(m, NULL);
1177 }
1178
1179 static void
1180 rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
1181 {
1182         struct m_tag *tag;
1183
1184         /*
1185          * Preserve the family from the sockaddr, if any, in an m_tag for
1186          * use when injecting the mbuf into the routing socket buffer from
1187          * the netisr.
1188          */
1189         if (sa != NULL) {
1190                 tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
1191                     M_NOWAIT);
1192                 if (tag == NULL) {
1193                         m_freem(m);
1194                         return;
1195                 }
1196                 *(unsigned short *)(tag + 1) = sa->sa_family;
1197                 m_tag_prepend(m, tag);
1198         }
1199         netisr_queue(NETISR_ROUTE, m);  /* mbuf is free'd on failure. */
1200 }
1201
1202 /*
1203  * This is used in dumping the kernel table via sysctl().
1204  */
1205 static int
1206 sysctl_dumpentry(struct radix_node *rn, void *vw)
1207 {
1208         struct walkarg *w = vw;
1209         struct rtentry *rt = (struct rtentry *)rn;
1210         int error = 0, size;
1211         struct rt_addrinfo info;
1212
1213         if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1214                 return 0;
1215         if ((rt->rt_flags & RTF_HOST) == 0
1216             ? jailed(w->w_req->td->td_ucred)
1217             : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0)
1218                 return (0);
1219         bzero((caddr_t)&info, sizeof(info));
1220         info.rti_info[RTAX_DST] = rt_key(rt);
1221         info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1222         info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1223         info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1224         if (rt->rt_ifp) {
1225                 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
1226                 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1227                 if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1228                         info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1229         }
1230         size = rt_msg2(RTM_GET, &info, NULL, w);
1231         if (w->w_req && w->w_tmem) {
1232                 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1233
1234                 rtm->rtm_flags = rt->rt_flags;
1235                 rtm->rtm_use = rt->rt_rmx.rmx_pksent;
1236                 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1237                 rtm->rtm_index = rt->rt_ifp->if_index;
1238                 rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1239                 rtm->rtm_addrs = info.rti_addrs;
1240                 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1241                 return (error);
1242         }
1243         return (error);
1244 }
1245
1246 static int
1247 sysctl_iflist(int af, struct walkarg *w)
1248 {
1249         struct ifnet *ifp;
1250         struct ifaddr *ifa;
1251         struct rt_addrinfo info;
1252         int len, error = 0;
1253
1254         bzero((caddr_t)&info, sizeof(info));
1255         IFNET_RLOCK();
1256         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1257                 if (w->w_arg && w->w_arg != ifp->if_index)
1258                         continue;
1259                 ifa = ifp->if_addr;
1260                 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1261                 len = rt_msg2(RTM_IFINFO, &info, NULL, w);
1262                 info.rti_info[RTAX_IFP] = NULL;
1263                 if (w->w_req && w->w_tmem) {
1264                         struct if_msghdr *ifm;
1265
1266                         ifm = (struct if_msghdr *)w->w_tmem;
1267                         ifm->ifm_index = ifp->if_index;
1268                         ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1269                         ifm->ifm_data = ifp->if_data;
1270                         ifm->ifm_addrs = info.rti_addrs;
1271                         error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
1272                         if (error)
1273                                 goto done;
1274                 }
1275                 while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
1276                         if (af && af != ifa->ifa_addr->sa_family)
1277                                 continue;
1278                         if (prison_if(w->w_req->td->td_ucred,
1279                             ifa->ifa_addr) != 0)
1280                                 continue;
1281                         info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1282                         info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1283                         info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1284                         len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
1285                         if (w->w_req && w->w_tmem) {
1286                                 struct ifa_msghdr *ifam;
1287
1288                                 ifam = (struct ifa_msghdr *)w->w_tmem;
1289                                 ifam->ifam_index = ifa->ifa_ifp->if_index;
1290                                 ifam->ifam_flags = ifa->ifa_flags;
1291                                 ifam->ifam_metric = ifa->ifa_metric;
1292                                 ifam->ifam_addrs = info.rti_addrs;
1293                                 error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1294                                 if (error)
1295                                         goto done;
1296                         }
1297                 }
1298                 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1299                         info.rti_info[RTAX_BRD] = NULL;
1300         }
1301 done:
1302         IFNET_RUNLOCK();
1303         return (error);
1304 }
1305
1306 static int
1307 sysctl_ifmalist(int af, struct walkarg *w)
1308 {
1309         struct ifnet *ifp;
1310         struct ifmultiaddr *ifma;
1311         struct  rt_addrinfo info;
1312         int     len, error = 0;
1313         struct ifaddr *ifa;
1314
1315         bzero((caddr_t)&info, sizeof(info));
1316         IFNET_RLOCK();
1317         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1318                 if (w->w_arg && w->w_arg != ifp->if_index)
1319                         continue;
1320                 ifa = ifp->if_addr;
1321                 info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
1322                 IF_ADDR_LOCK(ifp);
1323                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1324                         if (af && af != ifma->ifma_addr->sa_family)
1325                                 continue;
1326                         if (prison_if(w->w_req->td->td_ucred,
1327                             ifma->ifma_addr) != 0)
1328                                 continue;
1329                         info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1330                         info.rti_info[RTAX_GATEWAY] =
1331                             (ifma->ifma_addr->sa_family != AF_LINK) ?
1332                             ifma->ifma_lladdr : NULL;
1333                         len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
1334                         if (w->w_req && w->w_tmem) {
1335                                 struct ifma_msghdr *ifmam;
1336
1337                                 ifmam = (struct ifma_msghdr *)w->w_tmem;
1338                                 ifmam->ifmam_index = ifma->ifma_ifp->if_index;
1339                                 ifmam->ifmam_flags = 0;
1340                                 ifmam->ifmam_addrs = info.rti_addrs;
1341                                 error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1342                                 if (error) {
1343                                         IF_ADDR_UNLOCK(ifp);
1344                                         goto done;
1345                                 }
1346                         }
1347                 }
1348                 IF_ADDR_UNLOCK(ifp);
1349         }
1350 done:
1351         IFNET_RUNLOCK();
1352         return (error);
1353 }
1354
1355 static int
1356 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1357 {
1358         int     *name = (int *)arg1;
1359         u_int   namelen = arg2;
1360         struct radix_node_head *rnh;
1361         int     i, lim, error = EINVAL;
1362         u_char  af;
1363         struct  walkarg w;
1364
1365         name ++;
1366         namelen--;
1367         if (req->newptr)
1368                 return (EPERM);
1369         if (namelen != 3)
1370                 return ((namelen < 3) ? EISDIR : ENOTDIR);
1371         af = name[0];
1372         if (af > AF_MAX)
1373                 return (EINVAL);
1374         bzero(&w, sizeof(w));
1375         w.w_op = name[1];
1376         w.w_arg = name[2];
1377         w.w_req = req;
1378
1379         error = sysctl_wire_old_buffer(req, 0);
1380         if (error)
1381                 return (error);
1382         switch (w.w_op) {
1383
1384         case NET_RT_DUMP:
1385         case NET_RT_FLAGS:
1386                 if (af == 0) {                  /* dump all tables */
1387                         i = 1;
1388                         lim = AF_MAX;
1389                 } else                          /* dump only one table */
1390                         i = lim = af;
1391                 for (error = 0; error == 0 && i <= lim; i++)
1392                         if ((rnh = rt_tables[req->td->td_proc->p_fibnum][i]) != NULL) {
1393                                 RADIX_NODE_HEAD_LOCK(rnh); 
1394                                 error = rnh->rnh_walktree(rnh,
1395                                     sysctl_dumpentry, &w);
1396                                 RADIX_NODE_HEAD_UNLOCK(rnh);
1397                         } else if (af != 0)
1398                                 error = EAFNOSUPPORT;
1399                 break;
1400
1401         case NET_RT_IFLIST:
1402                 error = sysctl_iflist(af, &w);
1403                 break;
1404
1405         case NET_RT_IFMALIST:
1406                 error = sysctl_ifmalist(af, &w);
1407                 break;
1408         }
1409         if (w.w_tmem)
1410                 free(w.w_tmem, M_RTABLE);
1411         return (error);
1412 }
1413
1414 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1415
1416 /*
1417  * Definitions of protocols supported in the ROUTE domain.
1418  */
1419
1420 static struct domain routedomain;               /* or at least forward */
1421
1422 static struct protosw routesw[] = {
1423 {
1424         .pr_type =              SOCK_RAW,
1425         .pr_domain =            &routedomain,
1426         .pr_flags =             PR_ATOMIC|PR_ADDR,
1427         .pr_output =            route_output,
1428         .pr_ctlinput =          raw_ctlinput,
1429         .pr_init =              raw_init,
1430         .pr_usrreqs =           &route_usrreqs
1431 }
1432 };
1433
1434 static struct domain routedomain = {
1435         .dom_family =           PF_ROUTE,
1436         .dom_name =              "route",
1437         .dom_protosw =          routesw,
1438         .dom_protoswNPROTOSW =  &routesw[sizeof(routesw)/sizeof(routesw[0])]
1439 };
1440
1441 DOMAIN_SET(route);