]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/net/rtsock.c
Upgrade Unbound to 1.7.0. More to follow.
[FreeBSD/FreeBSD.git] / sys / net / rtsock.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1988, 1991, 1993
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *      @(#)rtsock.c    8.7 (Berkeley) 10/12/95
32  * $FreeBSD$
33  */
34 #include "opt_mpath.h"
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #include <sys/param.h>
39 #include <sys/jail.h>
40 #include <sys/kernel.h>
41 #include <sys/domain.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/priv.h>
46 #include <sys/proc.h>
47 #include <sys/protosw.h>
48 #include <sys/rwlock.h>
49 #include <sys/signalvar.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54
55 #include <net/if.h>
56 #include <net/if_var.h>
57 #include <net/if_dl.h>
58 #include <net/if_llatbl.h>
59 #include <net/if_types.h>
60 #include <net/netisr.h>
61 #include <net/raw_cb.h>
62 #include <net/route.h>
63 #include <net/route_var.h>
64 #include <net/vnet.h>
65
66 #include <netinet/in.h>
67 #include <netinet/if_ether.h>
68 #include <netinet/ip_carp.h>
69 #ifdef INET6
70 #include <netinet6/ip6_var.h>
71 #include <netinet6/scope6_var.h>
72 #endif
73
74 #ifdef COMPAT_FREEBSD32
75 #include <sys/mount.h>
76 #include <compat/freebsd32/freebsd32.h>
77
78 struct if_msghdr32 {
79         uint16_t ifm_msglen;
80         uint8_t ifm_version;
81         uint8_t ifm_type;
82         int32_t ifm_addrs;
83         int32_t ifm_flags;
84         uint16_t ifm_index;
85         struct  if_data ifm_data;
86 };
87
88 struct if_msghdrl32 {
89         uint16_t ifm_msglen;
90         uint8_t ifm_version;
91         uint8_t ifm_type;
92         int32_t ifm_addrs;
93         int32_t ifm_flags;
94         uint16_t ifm_index;
95         uint16_t _ifm_spare1;
96         uint16_t ifm_len;
97         uint16_t ifm_data_off;
98         struct  if_data ifm_data;
99 };
100
101 struct ifa_msghdrl32 {
102         uint16_t ifam_msglen;
103         uint8_t ifam_version;
104         uint8_t ifam_type;
105         int32_t ifam_addrs;
106         int32_t ifam_flags;
107         uint16_t ifam_index;
108         uint16_t _ifam_spare1;
109         uint16_t ifam_len;
110         uint16_t ifam_data_off;
111         int32_t ifam_metric;
112         struct  if_data ifam_data;
113 };
114
115 #define SA_SIZE32(sa)                                           \
116     (  (((struct sockaddr *)(sa))->sa_len == 0) ?               \
117         sizeof(int)             :                               \
118         1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(int) - 1) ) )
119
120 #endif /* COMPAT_FREEBSD32 */
121
122 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
123
124 /* NB: these are not modified */
125 static struct   sockaddr route_src = { 2, PF_ROUTE, };
126 static struct   sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
127
128 /* These are external hooks for CARP. */
129 int     (*carp_get_vhid_p)(struct ifaddr *);
130
131 /*
132  * Used by rtsock/raw_input callback code to decide whether to filter the update
133  * notification to a socket bound to a particular FIB.
134  */
135 #define RTS_FILTER_FIB  M_PROTO8
136
137 typedef struct {
138         int     ip_count;       /* attached w/ AF_INET */
139         int     ip6_count;      /* attached w/ AF_INET6 */
140         int     any_count;      /* total attached */
141 } route_cb_t;
142 static VNET_DEFINE(route_cb_t, route_cb);
143 #define V_route_cb VNET(route_cb)
144
145 struct mtx rtsock_mtx;
146 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
147
148 #define RTSOCK_LOCK()   mtx_lock(&rtsock_mtx)
149 #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx)
150 #define RTSOCK_LOCK_ASSERT()    mtx_assert(&rtsock_mtx, MA_OWNED)
151
152 static SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
153
154 struct walkarg {
155         int     w_tmemsize;
156         int     w_op, w_arg;
157         caddr_t w_tmem;
158         struct sysctl_req *w_req;
159 };
160
161 static void     rts_input(struct mbuf *m);
162 static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo);
163 static int      rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo,
164                         struct walkarg *w, int *plen);
165 static int      rt_xaddrs(caddr_t cp, caddr_t cplim,
166                         struct rt_addrinfo *rtinfo);
167 static int      sysctl_dumpentry(struct radix_node *rn, void *vw);
168 static int      sysctl_iflist(int af, struct walkarg *w);
169 static int      sysctl_ifmalist(int af, struct walkarg *w);
170 static int      route_output(struct mbuf *m, struct socket *so, ...);
171 static void     rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out);
172 static void     rt_dispatch(struct mbuf *, sa_family_t);
173 static struct sockaddr  *rtsock_fix_netmask(struct sockaddr *dst,
174                         struct sockaddr *smask, struct sockaddr_storage *dmask);
175
176 static struct netisr_handler rtsock_nh = {
177         .nh_name = "rtsock",
178         .nh_handler = rts_input,
179         .nh_proto = NETISR_ROUTE,
180         .nh_policy = NETISR_POLICY_SOURCE,
181 };
182
183 static int
184 sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
185 {
186         int error, qlimit;
187
188         netisr_getqlimit(&rtsock_nh, &qlimit);
189         error = sysctl_handle_int(oidp, &qlimit, 0, req);
190         if (error || !req->newptr)
191                 return (error);
192         if (qlimit < 1)
193                 return (EINVAL);
194         return (netisr_setqlimit(&rtsock_nh, qlimit));
195 }
196 SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
197     0, 0, sysctl_route_netisr_maxqlen, "I",
198     "maximum routing socket dispatch queue length");
199
200 static void
201 vnet_rts_init(void)
202 {
203         int tmp;
204
205         if (IS_DEFAULT_VNET(curvnet)) {
206                 if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
207                         rtsock_nh.nh_qlimit = tmp;
208                 netisr_register(&rtsock_nh);
209         }
210 #ifdef VIMAGE
211          else
212                 netisr_register_vnet(&rtsock_nh);
213 #endif
214 }
215 VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
216     vnet_rts_init, 0);
217
218 #ifdef VIMAGE
219 static void
220 vnet_rts_uninit(void)
221 {
222
223         netisr_unregister_vnet(&rtsock_nh);
224 }
225 VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
226     vnet_rts_uninit, 0);
227 #endif
228
229 static int
230 raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src,
231     struct rawcb *rp)
232 {
233         int fibnum;
234
235         KASSERT(m != NULL, ("%s: m is NULL", __func__));
236         KASSERT(proto != NULL, ("%s: proto is NULL", __func__));
237         KASSERT(rp != NULL, ("%s: rp is NULL", __func__));
238
239         /* No filtering requested. */
240         if ((m->m_flags & RTS_FILTER_FIB) == 0)
241                 return (0);
242
243         /* Check if it is a rts and the fib matches the one of the socket. */
244         fibnum = M_GETFIB(m);
245         if (proto->sp_family != PF_ROUTE ||
246             rp->rcb_socket == NULL ||
247             rp->rcb_socket->so_fibnum == fibnum)
248                 return (0);
249
250         /* Filtering requested and no match, the socket shall be skipped. */
251         return (1);
252 }
253
254 static void
255 rts_input(struct mbuf *m)
256 {
257         struct sockproto route_proto;
258         unsigned short *family;
259         struct m_tag *tag;
260
261         route_proto.sp_family = PF_ROUTE;
262         tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
263         if (tag != NULL) {
264                 family = (unsigned short *)(tag + 1);
265                 route_proto.sp_protocol = *family;
266                 m_tag_delete(m, tag);
267         } else
268                 route_proto.sp_protocol = 0;
269
270         raw_input_ext(m, &route_proto, &route_src, raw_input_rts_cb);
271 }
272
273 /*
274  * It really doesn't make any sense at all for this code to share much
275  * with raw_usrreq.c, since its functionality is so restricted.  XXX
276  */
277 static void
278 rts_abort(struct socket *so)
279 {
280
281         raw_usrreqs.pru_abort(so);
282 }
283
284 static void
285 rts_close(struct socket *so)
286 {
287
288         raw_usrreqs.pru_close(so);
289 }
290
291 /* pru_accept is EOPNOTSUPP */
292
293 static int
294 rts_attach(struct socket *so, int proto, struct thread *td)
295 {
296         struct rawcb *rp;
297         int error;
298
299         KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
300
301         /* XXX */
302         rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
303
304         so->so_pcb = (caddr_t)rp;
305         so->so_fibnum = td->td_proc->p_fibnum;
306         error = raw_attach(so, proto);
307         rp = sotorawcb(so);
308         if (error) {
309                 so->so_pcb = NULL;
310                 free(rp, M_PCB);
311                 return error;
312         }
313         RTSOCK_LOCK();
314         switch(rp->rcb_proto.sp_protocol) {
315         case AF_INET:
316                 V_route_cb.ip_count++;
317                 break;
318         case AF_INET6:
319                 V_route_cb.ip6_count++;
320                 break;
321         }
322         V_route_cb.any_count++;
323         RTSOCK_UNLOCK();
324         soisconnected(so);
325         so->so_options |= SO_USELOOPBACK;
326         return 0;
327 }
328
329 static int
330 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
331 {
332
333         return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
334 }
335
336 static int
337 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
338 {
339
340         return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
341 }
342
343 /* pru_connect2 is EOPNOTSUPP */
344 /* pru_control is EOPNOTSUPP */
345
346 static void
347 rts_detach(struct socket *so)
348 {
349         struct rawcb *rp = sotorawcb(so);
350
351         KASSERT(rp != NULL, ("rts_detach: rp == NULL"));
352
353         RTSOCK_LOCK();
354         switch(rp->rcb_proto.sp_protocol) {
355         case AF_INET:
356                 V_route_cb.ip_count--;
357                 break;
358         case AF_INET6:
359                 V_route_cb.ip6_count--;
360                 break;
361         }
362         V_route_cb.any_count--;
363         RTSOCK_UNLOCK();
364         raw_usrreqs.pru_detach(so);
365 }
366
367 static int
368 rts_disconnect(struct socket *so)
369 {
370
371         return (raw_usrreqs.pru_disconnect(so));
372 }
373
374 /* pru_listen is EOPNOTSUPP */
375
376 static int
377 rts_peeraddr(struct socket *so, struct sockaddr **nam)
378 {
379
380         return (raw_usrreqs.pru_peeraddr(so, nam));
381 }
382
383 /* pru_rcvd is EOPNOTSUPP */
384 /* pru_rcvoob is EOPNOTSUPP */
385
386 static int
387 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
388          struct mbuf *control, struct thread *td)
389 {
390
391         return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
392 }
393
394 /* pru_sense is null */
395
396 static int
397 rts_shutdown(struct socket *so)
398 {
399
400         return (raw_usrreqs.pru_shutdown(so));
401 }
402
403 static int
404 rts_sockaddr(struct socket *so, struct sockaddr **nam)
405 {
406
407         return (raw_usrreqs.pru_sockaddr(so, nam));
408 }
409
410 static struct pr_usrreqs route_usrreqs = {
411         .pru_abort =            rts_abort,
412         .pru_attach =           rts_attach,
413         .pru_bind =             rts_bind,
414         .pru_connect =          rts_connect,
415         .pru_detach =           rts_detach,
416         .pru_disconnect =       rts_disconnect,
417         .pru_peeraddr =         rts_peeraddr,
418         .pru_send =             rts_send,
419         .pru_shutdown =         rts_shutdown,
420         .pru_sockaddr =         rts_sockaddr,
421         .pru_close =            rts_close,
422 };
423
424 #ifndef _SOCKADDR_UNION_DEFINED
425 #define _SOCKADDR_UNION_DEFINED
426 /*
427  * The union of all possible address formats we handle.
428  */
429 union sockaddr_union {
430         struct sockaddr         sa;
431         struct sockaddr_in      sin;
432         struct sockaddr_in6     sin6;
433 };
434 #endif /* _SOCKADDR_UNION_DEFINED */
435
436 static int
437 rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
438     struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
439 {
440
441         /* First, see if the returned address is part of the jail. */
442         if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
443                 info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
444                 return (0);
445         }
446
447         switch (info->rti_info[RTAX_DST]->sa_family) {
448 #ifdef INET
449         case AF_INET:
450         {
451                 struct in_addr ia;
452                 struct ifaddr *ifa;
453                 int found;
454
455                 found = 0;
456                 /*
457                  * Try to find an address on the given outgoing interface
458                  * that belongs to the jail.
459                  */
460                 IF_ADDR_RLOCK(ifp);
461                 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
462                         struct sockaddr *sa;
463                         sa = ifa->ifa_addr;
464                         if (sa->sa_family != AF_INET)
465                                 continue;
466                         ia = ((struct sockaddr_in *)sa)->sin_addr;
467                         if (prison_check_ip4(cred, &ia) == 0) {
468                                 found = 1;
469                                 break;
470                         }
471                 }
472                 IF_ADDR_RUNLOCK(ifp);
473                 if (!found) {
474                         /*
475                          * As a last resort return the 'default' jail address.
476                          */
477                         ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->
478                             sin_addr;
479                         if (prison_get_ip4(cred, &ia) != 0)
480                                 return (ESRCH);
481                 }
482                 bzero(&saun->sin, sizeof(struct sockaddr_in));
483                 saun->sin.sin_len = sizeof(struct sockaddr_in);
484                 saun->sin.sin_family = AF_INET;
485                 saun->sin.sin_addr.s_addr = ia.s_addr;
486                 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
487                 break;
488         }
489 #endif
490 #ifdef INET6
491         case AF_INET6:
492         {
493                 struct in6_addr ia6;
494                 struct ifaddr *ifa;
495                 int found;
496
497                 found = 0;
498                 /*
499                  * Try to find an address on the given outgoing interface
500                  * that belongs to the jail.
501                  */
502                 IF_ADDR_RLOCK(ifp);
503                 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
504                         struct sockaddr *sa;
505                         sa = ifa->ifa_addr;
506                         if (sa->sa_family != AF_INET6)
507                                 continue;
508                         bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
509                             &ia6, sizeof(struct in6_addr));
510                         if (prison_check_ip6(cred, &ia6) == 0) {
511                                 found = 1;
512                                 break;
513                         }
514                 }
515                 IF_ADDR_RUNLOCK(ifp);
516                 if (!found) {
517                         /*
518                          * As a last resort return the 'default' jail address.
519                          */
520                         ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)->
521                             sin6_addr;
522                         if (prison_get_ip6(cred, &ia6) != 0)
523                                 return (ESRCH);
524                 }
525                 bzero(&saun->sin6, sizeof(struct sockaddr_in6));
526                 saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
527                 saun->sin6.sin6_family = AF_INET6;
528                 bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
529                 if (sa6_recoverscope(&saun->sin6) != 0)
530                         return (ESRCH);
531                 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
532                 break;
533         }
534 #endif
535         default:
536                 return (ESRCH);
537         }
538         return (0);
539 }
540
541 /*ARGSUSED*/
542 static int
543 route_output(struct mbuf *m, struct socket *so, ...)
544 {
545         struct rt_msghdr *rtm = NULL;
546         struct rtentry *rt = NULL;
547         struct rib_head *rnh;
548         struct rt_addrinfo info;
549         struct sockaddr_storage ss;
550 #ifdef INET6
551         struct sockaddr_in6 *sin6;
552         int i, rti_need_deembed = 0;
553 #endif
554         int alloc_len = 0, len, error = 0, fibnum;
555         struct ifnet *ifp = NULL;
556         union sockaddr_union saun;
557         sa_family_t saf = AF_UNSPEC;
558         struct rawcb *rp = NULL;
559         struct walkarg w;
560
561         fibnum = so->so_fibnum;
562
563 #define senderr(e) { error = e; goto flush;}
564         if (m == NULL || ((m->m_len < sizeof(long)) &&
565                        (m = m_pullup(m, sizeof(long))) == NULL))
566                 return (ENOBUFS);
567         if ((m->m_flags & M_PKTHDR) == 0)
568                 panic("route_output");
569         len = m->m_pkthdr.len;
570         if (len < sizeof(*rtm) ||
571             len != mtod(m, struct rt_msghdr *)->rtm_msglen)
572                 senderr(EINVAL);
573
574         /*
575          * Most of current messages are in range 200-240 bytes,
576          * minimize possible re-allocation on reply using larger size
577          * buffer aligned on 1k boundaty.
578          */
579         alloc_len = roundup2(len, 1024);
580         if ((rtm = malloc(alloc_len, M_TEMP, M_NOWAIT)) == NULL)
581                 senderr(ENOBUFS);
582
583         m_copydata(m, 0, len, (caddr_t)rtm);
584         bzero(&info, sizeof(info));
585         bzero(&w, sizeof(w));
586
587         if (rtm->rtm_version != RTM_VERSION) {
588                 /* Do not touch message since format is unknown */
589                 free(rtm, M_TEMP);
590                 rtm = NULL;
591                 senderr(EPROTONOSUPPORT);
592         }
593
594         /*
595          * Starting from here, it is possible
596          * to alter original message and insert
597          * caller PID and error value.
598          */
599
600         rtm->rtm_pid = curproc->p_pid;
601         info.rti_addrs = rtm->rtm_addrs;
602
603         info.rti_mflags = rtm->rtm_inits;
604         info.rti_rmx = &rtm->rtm_rmx;
605
606         /*
607          * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
608          * link-local address because rtrequest requires addresses with
609          * embedded scope id.
610          */
611         if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info))
612                 senderr(EINVAL);
613
614         info.rti_flags = rtm->rtm_flags;
615         if (info.rti_info[RTAX_DST] == NULL ||
616             info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
617             (info.rti_info[RTAX_GATEWAY] != NULL &&
618              info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
619                 senderr(EINVAL);
620         saf = info.rti_info[RTAX_DST]->sa_family;
621         /*
622          * Verify that the caller has the appropriate privilege; RTM_GET
623          * is the only operation the non-superuser is allowed.
624          */
625         if (rtm->rtm_type != RTM_GET) {
626                 error = priv_check(curthread, PRIV_NET_ROUTE);
627                 if (error)
628                         senderr(error);
629         }
630
631         /*
632          * The given gateway address may be an interface address.
633          * For example, issuing a "route change" command on a route
634          * entry that was created from a tunnel, and the gateway
635          * address given is the local end point. In this case the 
636          * RTF_GATEWAY flag must be cleared or the destination will
637          * not be reachable even though there is no error message.
638          */
639         if (info.rti_info[RTAX_GATEWAY] != NULL &&
640             info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
641                 struct rt_addrinfo ginfo;
642                 struct sockaddr *gdst;
643
644                 bzero(&ginfo, sizeof(ginfo));
645                 bzero(&ss, sizeof(ss));
646                 ss.ss_len = sizeof(ss);
647
648                 ginfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&ss;
649                 gdst = info.rti_info[RTAX_GATEWAY];
650
651                 /* 
652                  * A host route through the loopback interface is 
653                  * installed for each interface adddress. In pre 8.0
654                  * releases the interface address of a PPP link type
655                  * is not reachable locally. This behavior is fixed as 
656                  * part of the new L2/L3 redesign and rewrite work. The
657                  * signature of this interface address route is the
658                  * AF_LINK sa_family type of the rt_gateway, and the
659                  * rt_ifp has the IFF_LOOPBACK flag set.
660                  */
661                 if (rib_lookup_info(fibnum, gdst, NHR_REF, 0, &ginfo) == 0) {
662                         if (ss.ss_family == AF_LINK &&
663                             ginfo.rti_ifp->if_flags & IFF_LOOPBACK) {
664                                 info.rti_flags &= ~RTF_GATEWAY;
665                                 info.rti_flags |= RTF_GWFLAG_COMPAT;
666                         }
667                         rib_free_info(&ginfo);
668                 }
669         }
670
671         switch (rtm->rtm_type) {
672                 struct rtentry *saved_nrt;
673
674         case RTM_ADD:
675         case RTM_CHANGE:
676                 if (rtm->rtm_type == RTM_ADD) {
677                         if (info.rti_info[RTAX_GATEWAY] == NULL)
678                                 senderr(EINVAL);
679                 }
680                 saved_nrt = NULL;
681
682                 /* support for new ARP code */
683                 if (info.rti_info[RTAX_GATEWAY] != NULL &&
684                     info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
685                     (rtm->rtm_flags & RTF_LLDATA) != 0) {
686                         error = lla_rt_output(rtm, &info);
687 #ifdef INET6
688                         if (error == 0)
689                                 rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
690 #endif
691                         break;
692                 }
693                 error = rtrequest1_fib(rtm->rtm_type, &info, &saved_nrt,
694                     fibnum);
695                 if (error == 0 && saved_nrt != NULL) {
696 #ifdef INET6
697                         rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
698 #endif
699                         RT_LOCK(saved_nrt);
700                         rtm->rtm_index = saved_nrt->rt_ifp->if_index;
701                         RT_REMREF(saved_nrt);
702                         RT_UNLOCK(saved_nrt);
703                 }
704                 break;
705
706         case RTM_DELETE:
707                 saved_nrt = NULL;
708                 /* support for new ARP code */
709                 if (info.rti_info[RTAX_GATEWAY] && 
710                     (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
711                     (rtm->rtm_flags & RTF_LLDATA) != 0) {
712                         error = lla_rt_output(rtm, &info);
713 #ifdef INET6
714                         if (error == 0)
715                                 rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
716 #endif
717                         break;
718                 }
719                 error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, fibnum);
720                 if (error == 0) {
721                         RT_LOCK(saved_nrt);
722                         rt = saved_nrt;
723                         goto report;
724                 }
725 #ifdef INET6
726                 /* rt_msg2() will not be used when RTM_DELETE fails. */
727                 rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
728 #endif
729                 break;
730
731         case RTM_GET:
732                 rnh = rt_tables_get_rnh(fibnum, saf);
733                 if (rnh == NULL)
734                         senderr(EAFNOSUPPORT);
735
736                 RIB_RLOCK(rnh);
737
738                 if (info.rti_info[RTAX_NETMASK] == NULL &&
739                     rtm->rtm_type == RTM_GET) {
740                         /*
741                          * Provide longest prefix match for
742                          * address lookup (no mask).
743                          * 'route -n get addr'
744                          */
745                         rt = (struct rtentry *) rnh->rnh_matchaddr(
746                             info.rti_info[RTAX_DST], &rnh->head);
747                 } else
748                         rt = (struct rtentry *) rnh->rnh_lookup(
749                             info.rti_info[RTAX_DST],
750                             info.rti_info[RTAX_NETMASK], &rnh->head);
751
752                 if (rt == NULL) {
753                         RIB_RUNLOCK(rnh);
754                         senderr(ESRCH);
755                 }
756 #ifdef RADIX_MPATH
757                 /*
758                  * for RTM_CHANGE/LOCK, if we got multipath routes,
759                  * we require users to specify a matching RTAX_GATEWAY.
760                  *
761                  * for RTM_GET, gate is optional even with multipath.
762                  * if gate == NULL the first match is returned.
763                  * (no need to call rt_mpath_matchgate if gate == NULL)
764                  */
765                 if (rt_mpath_capable(rnh) &&
766                     (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
767                         rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
768                         if (!rt) {
769                                 RIB_RUNLOCK(rnh);
770                                 senderr(ESRCH);
771                         }
772                 }
773 #endif
774                 /*
775                  * If performing proxied L2 entry insertion, and
776                  * the actual PPP host entry is found, perform
777                  * another search to retrieve the prefix route of
778                  * the local end point of the PPP link.
779                  */
780                 if (rtm->rtm_flags & RTF_ANNOUNCE) {
781                         struct sockaddr laddr;
782
783                         if (rt->rt_ifp != NULL && 
784                             rt->rt_ifp->if_type == IFT_PROPVIRTUAL) {
785                                 struct ifaddr *ifa;
786
787                                 ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1,
788                                                 RT_ALL_FIBS);
789                                 if (ifa != NULL)
790                                         rt_maskedcopy(ifa->ifa_addr,
791                                                       &laddr,
792                                                       ifa->ifa_netmask);
793                         } else
794                                 rt_maskedcopy(rt->rt_ifa->ifa_addr,
795                                               &laddr,
796                                               rt->rt_ifa->ifa_netmask);
797                         /* 
798                          * refactor rt and no lock operation necessary
799                          */
800                         rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr,
801                             &rnh->head);
802                         if (rt == NULL) {
803                                 RIB_RUNLOCK(rnh);
804                                 senderr(ESRCH);
805                         }
806                 } 
807                 RT_LOCK(rt);
808                 RT_ADDREF(rt);
809                 RIB_RUNLOCK(rnh);
810
811 report:
812                 RT_LOCK_ASSERT(rt);
813                 if ((rt->rt_flags & RTF_HOST) == 0
814                     ? jailed_without_vnet(curthread->td_ucred)
815                     : prison_if(curthread->td_ucred,
816                     rt_key(rt)) != 0) {
817                         RT_UNLOCK(rt);
818                         senderr(ESRCH);
819                 }
820                 info.rti_info[RTAX_DST] = rt_key(rt);
821                 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
822                 info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
823                     rt_mask(rt), &ss);
824                 info.rti_info[RTAX_GENMASK] = 0;
825                 if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
826                         ifp = rt->rt_ifp;
827                         if (ifp) {
828                                 info.rti_info[RTAX_IFP] =
829                                     ifp->if_addr->ifa_addr;
830                                 error = rtm_get_jailed(&info, ifp, rt,
831                                     &saun, curthread->td_ucred);
832                                 if (error != 0) {
833                                         RT_UNLOCK(rt);
834                                         senderr(error);
835                                 }
836                                 if (ifp->if_flags & IFF_POINTOPOINT)
837                                         info.rti_info[RTAX_BRD] =
838                                             rt->rt_ifa->ifa_dstaddr;
839                                 rtm->rtm_index = ifp->if_index;
840                         } else {
841                                 info.rti_info[RTAX_IFP] = NULL;
842                                 info.rti_info[RTAX_IFA] = NULL;
843                         }
844                 } else if ((ifp = rt->rt_ifp) != NULL) {
845                         rtm->rtm_index = ifp->if_index;
846                 }
847
848                 /* Check if we need to realloc storage */
849                 rtsock_msg_buffer(rtm->rtm_type, &info, NULL, &len);
850                 if (len > alloc_len) {
851                         struct rt_msghdr *new_rtm;
852                         new_rtm = malloc(len, M_TEMP, M_NOWAIT);
853                         if (new_rtm == NULL) {
854                                 RT_UNLOCK(rt);
855                                 senderr(ENOBUFS);
856                         }
857                         bcopy(rtm, new_rtm, rtm->rtm_msglen);
858                         free(rtm, M_TEMP);
859                         rtm = new_rtm;
860                         alloc_len = len;
861                 }
862
863                 w.w_tmem = (caddr_t)rtm;
864                 w.w_tmemsize = alloc_len;
865                 rtsock_msg_buffer(rtm->rtm_type, &info, &w, &len);
866
867                 if (rt->rt_flags & RTF_GWFLAG_COMPAT)
868                         rtm->rtm_flags = RTF_GATEWAY | 
869                                 (rt->rt_flags & ~RTF_GWFLAG_COMPAT);
870                 else
871                         rtm->rtm_flags = rt->rt_flags;
872                 rt_getmetrics(rt, &rtm->rtm_rmx);
873                 rtm->rtm_addrs = info.rti_addrs;
874
875                 RT_UNLOCK(rt);
876                 break;
877
878         default:
879                 senderr(EOPNOTSUPP);
880         }
881
882 flush:
883         if (rt != NULL)
884                 RTFREE(rt);
885         /*
886          * Check to see if we don't want our own messages.
887          */
888         if ((so->so_options & SO_USELOOPBACK) == 0) {
889                 if (V_route_cb.any_count <= 1) {
890                         if (rtm != NULL)
891                                 free(rtm, M_TEMP);
892                         m_freem(m);
893                         return (error);
894                 }
895                 /* There is another listener, so construct message */
896                 rp = sotorawcb(so);
897         }
898
899         if (rtm != NULL) {
900 #ifdef INET6
901                 if (rti_need_deembed) {
902                         /* sin6_scope_id is recovered before sending rtm. */
903                         sin6 = (struct sockaddr_in6 *)&ss;
904                         for (i = 0; i < RTAX_MAX; i++) {
905                                 if (info.rti_info[i] == NULL)
906                                         continue;
907                                 if (info.rti_info[i]->sa_family != AF_INET6)
908                                         continue;
909                                 bcopy(info.rti_info[i], sin6, sizeof(*sin6));
910                                 if (sa6_recoverscope(sin6) == 0)
911                                         bcopy(sin6, info.rti_info[i],
912                                                     sizeof(*sin6));
913                         }
914                 }
915 #endif
916                 if (error != 0)
917                         rtm->rtm_errno = error;
918                 else
919                         rtm->rtm_flags |= RTF_DONE;
920
921                 m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
922                 if (m->m_pkthdr.len < rtm->rtm_msglen) {
923                         m_freem(m);
924                         m = NULL;
925                 } else if (m->m_pkthdr.len > rtm->rtm_msglen)
926                         m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
927
928                 free(rtm, M_TEMP);
929         }
930         if (m != NULL) {
931                 M_SETFIB(m, fibnum);
932                 m->m_flags |= RTS_FILTER_FIB;
933                 if (rp) {
934                         /*
935                          * XXX insure we don't get a copy by
936                          * invalidating our protocol
937                          */
938                         unsigned short family = rp->rcb_proto.sp_family;
939                         rp->rcb_proto.sp_family = 0;
940                         rt_dispatch(m, saf);
941                         rp->rcb_proto.sp_family = family;
942                 } else
943                         rt_dispatch(m, saf);
944         }
945
946         return (error);
947 }
948
949 static void
950 rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
951 {
952
953         bzero(out, sizeof(*out));
954         out->rmx_mtu = rt->rt_mtu;
955         out->rmx_weight = rt->rt_weight;
956         out->rmx_pksent = counter_u64_fetch(rt->rt_pksent);
957         /* Kernel -> userland timebase conversion. */
958         out->rmx_expire = rt->rt_expire ?
959             rt->rt_expire - time_uptime + time_second : 0;
960 }
961
962 /*
963  * Extract the addresses of the passed sockaddrs.
964  * Do a little sanity checking so as to avoid bad memory references.
965  * This data is derived straight from userland.
966  */
967 static int
968 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
969 {
970         struct sockaddr *sa;
971         int i;
972
973         for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
974                 if ((rtinfo->rti_addrs & (1 << i)) == 0)
975                         continue;
976                 sa = (struct sockaddr *)cp;
977                 /*
978                  * It won't fit.
979                  */
980                 if (cp + sa->sa_len > cplim)
981                         return (EINVAL);
982                 /*
983                  * there are no more.. quit now
984                  * If there are more bits, they are in error.
985                  * I've seen this. route(1) can evidently generate these. 
986                  * This causes kernel to core dump.
987                  * for compatibility, If we see this, point to a safe address.
988                  */
989                 if (sa->sa_len == 0) {
990                         rtinfo->rti_info[i] = &sa_zero;
991                         return (0); /* should be EINVAL but for compat */
992                 }
993                 /* accept it */
994 #ifdef INET6
995                 if (sa->sa_family == AF_INET6)
996                         sa6_embedscope((struct sockaddr_in6 *)sa,
997                             V_ip6_use_defzone);
998 #endif
999                 rtinfo->rti_info[i] = sa;
1000                 cp += SA_SIZE(sa);
1001         }
1002         return (0);
1003 }
1004
1005 /*
1006  * Fill in @dmask with valid netmask leaving original @smask
1007  * intact. Mostly used with radix netmasks.
1008  */
1009 static struct sockaddr *
1010 rtsock_fix_netmask(struct sockaddr *dst, struct sockaddr *smask,
1011     struct sockaddr_storage *dmask)
1012 {
1013         if (dst == NULL || smask == NULL)
1014                 return (NULL);
1015
1016         memset(dmask, 0, dst->sa_len);
1017         memcpy(dmask, smask, smask->sa_len);
1018         dmask->ss_len = dst->sa_len;
1019         dmask->ss_family = dst->sa_family;
1020
1021         return ((struct sockaddr *)dmask);
1022 }
1023
1024 /*
1025  * Writes information related to @rtinfo object to newly-allocated mbuf.
1026  * Assumes MCLBYTES is enough to construct any message.
1027  * Used for OS notifications of vaious events (if/ifa announces,etc)
1028  *
1029  * Returns allocated mbuf or NULL on failure.
1030  */
1031 static struct mbuf *
1032 rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
1033 {
1034         struct rt_msghdr *rtm;
1035         struct mbuf *m;
1036         int i;
1037         struct sockaddr *sa;
1038 #ifdef INET6
1039         struct sockaddr_storage ss;
1040         struct sockaddr_in6 *sin6;
1041 #endif
1042         int len, dlen;
1043
1044         switch (type) {
1045
1046         case RTM_DELADDR:
1047         case RTM_NEWADDR:
1048                 len = sizeof(struct ifa_msghdr);
1049                 break;
1050
1051         case RTM_DELMADDR:
1052         case RTM_NEWMADDR:
1053                 len = sizeof(struct ifma_msghdr);
1054                 break;
1055
1056         case RTM_IFINFO:
1057                 len = sizeof(struct if_msghdr);
1058                 break;
1059
1060         case RTM_IFANNOUNCE:
1061         case RTM_IEEE80211:
1062                 len = sizeof(struct if_announcemsghdr);
1063                 break;
1064
1065         default:
1066                 len = sizeof(struct rt_msghdr);
1067         }
1068
1069         /* XXXGL: can we use MJUMPAGESIZE cluster here? */
1070         KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
1071         if (len > MHLEN)
1072                 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1073         else
1074                 m = m_gethdr(M_NOWAIT, MT_DATA);
1075         if (m == NULL)
1076                 return (m);
1077
1078         m->m_pkthdr.len = m->m_len = len;
1079         rtm = mtod(m, struct rt_msghdr *);
1080         bzero((caddr_t)rtm, len);
1081         for (i = 0; i < RTAX_MAX; i++) {
1082                 if ((sa = rtinfo->rti_info[i]) == NULL)
1083                         continue;
1084                 rtinfo->rti_addrs |= (1 << i);
1085                 dlen = SA_SIZE(sa);
1086 #ifdef INET6
1087                 if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
1088                         sin6 = (struct sockaddr_in6 *)&ss;
1089                         bcopy(sa, sin6, sizeof(*sin6));
1090                         if (sa6_recoverscope(sin6) == 0)
1091                                 sa = (struct sockaddr *)sin6;
1092                 }
1093 #endif
1094                 m_copyback(m, len, dlen, (caddr_t)sa);
1095                 len += dlen;
1096         }
1097         if (m->m_pkthdr.len != len) {
1098                 m_freem(m);
1099                 return (NULL);
1100         }
1101         rtm->rtm_msglen = len;
1102         rtm->rtm_version = RTM_VERSION;
1103         rtm->rtm_type = type;
1104         return (m);
1105 }
1106
1107 /*
1108  * Writes information related to @rtinfo object to preallocated buffer.
1109  * Stores needed size in @plen. If @w is NULL, calculates size without
1110  * writing.
1111  * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation.
1112  *
1113  * Returns 0 on success.
1114  *
1115  */
1116 static int
1117 rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen)
1118 {
1119         int i;
1120         int len, buflen = 0, dlen;
1121         caddr_t cp = NULL;
1122         struct rt_msghdr *rtm = NULL;
1123 #ifdef INET6
1124         struct sockaddr_storage ss;
1125         struct sockaddr_in6 *sin6;
1126 #endif
1127 #ifdef COMPAT_FREEBSD32
1128         bool compat32 = false;
1129 #endif
1130
1131         switch (type) {
1132
1133         case RTM_DELADDR:
1134         case RTM_NEWADDR:
1135                 if (w != NULL && w->w_op == NET_RT_IFLISTL) {
1136 #ifdef COMPAT_FREEBSD32
1137                         if (w->w_req->flags & SCTL_MASK32) {
1138                                 len = sizeof(struct ifa_msghdrl32);
1139                                 compat32 = true;
1140                         } else
1141 #endif
1142                                 len = sizeof(struct ifa_msghdrl);
1143                 } else
1144                         len = sizeof(struct ifa_msghdr);
1145                 break;
1146
1147         case RTM_IFINFO:
1148 #ifdef COMPAT_FREEBSD32
1149                 if (w != NULL && w->w_req->flags & SCTL_MASK32) {
1150                         if (w->w_op == NET_RT_IFLISTL)
1151                                 len = sizeof(struct if_msghdrl32);
1152                         else
1153                                 len = sizeof(struct if_msghdr32);
1154                         compat32 = true;
1155                         break;
1156                 }
1157 #endif
1158                 if (w != NULL && w->w_op == NET_RT_IFLISTL)
1159                         len = sizeof(struct if_msghdrl);
1160                 else
1161                         len = sizeof(struct if_msghdr);
1162                 break;
1163
1164         case RTM_NEWMADDR:
1165                 len = sizeof(struct ifma_msghdr);
1166                 break;
1167
1168         default:
1169                 len = sizeof(struct rt_msghdr);
1170         }
1171
1172         if (w != NULL) {
1173                 rtm = (struct rt_msghdr *)w->w_tmem;
1174                 buflen = w->w_tmemsize - len;
1175                 cp = (caddr_t)w->w_tmem + len;
1176         }
1177
1178         rtinfo->rti_addrs = 0;
1179         for (i = 0; i < RTAX_MAX; i++) {
1180                 struct sockaddr *sa;
1181
1182                 if ((sa = rtinfo->rti_info[i]) == NULL)
1183                         continue;
1184                 rtinfo->rti_addrs |= (1 << i);
1185 #ifdef COMPAT_FREEBSD32
1186                 if (compat32)
1187                         dlen = SA_SIZE32(sa);
1188                 else
1189 #endif
1190                         dlen = SA_SIZE(sa);
1191                 if (cp != NULL && buflen >= dlen) {
1192 #ifdef INET6
1193                         if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
1194                                 sin6 = (struct sockaddr_in6 *)&ss;
1195                                 bcopy(sa, sin6, sizeof(*sin6));
1196                                 if (sa6_recoverscope(sin6) == 0)
1197                                         sa = (struct sockaddr *)sin6;
1198                         }
1199 #endif
1200                         bcopy((caddr_t)sa, cp, (unsigned)dlen);
1201                         cp += dlen;
1202                         buflen -= dlen;
1203                 } else if (cp != NULL) {
1204                         /*
1205                          * Buffer too small. Count needed size
1206                          * and return with error.
1207                          */
1208                         cp = NULL;
1209                 }
1210
1211                 len += dlen;
1212         }
1213
1214         if (cp != NULL) {
1215                 dlen = ALIGN(len) - len;
1216                 if (buflen < dlen)
1217                         cp = NULL;
1218                 else
1219                         buflen -= dlen;
1220         }
1221         len = ALIGN(len);
1222
1223         if (cp != NULL) {
1224                 /* fill header iff buffer is large enough */
1225                 rtm->rtm_version = RTM_VERSION;
1226                 rtm->rtm_type = type;
1227                 rtm->rtm_msglen = len;
1228         }
1229
1230         *plen = len;
1231
1232         if (w != NULL && cp == NULL)
1233                 return (ENOBUFS);
1234
1235         return (0);
1236 }
1237
1238 /*
1239  * This routine is called to generate a message from the routing
1240  * socket indicating that a redirect has occurred, a routing lookup
1241  * has failed, or that a protocol has detected timeouts to a particular
1242  * destination.
1243  */
1244 void
1245 rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
1246     int fibnum)
1247 {
1248         struct rt_msghdr *rtm;
1249         struct mbuf *m;
1250         struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1251
1252         if (V_route_cb.any_count == 0)
1253                 return;
1254         m = rtsock_msg_mbuf(type, rtinfo);
1255         if (m == NULL)
1256                 return;
1257
1258         if (fibnum != RT_ALL_FIBS) {
1259                 KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
1260                     "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
1261                 M_SETFIB(m, fibnum);
1262                 m->m_flags |= RTS_FILTER_FIB;
1263         }
1264
1265         rtm = mtod(m, struct rt_msghdr *);
1266         rtm->rtm_flags = RTF_DONE | flags;
1267         rtm->rtm_errno = error;
1268         rtm->rtm_addrs = rtinfo->rti_addrs;
1269         rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
1270 }
1271
1272 void
1273 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
1274 {
1275
1276         rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS);
1277 }
1278
1279 /*
1280  * This routine is called to generate a message from the routing
1281  * socket indicating that the status of a network interface has changed.
1282  */
1283 void
1284 rt_ifmsg(struct ifnet *ifp)
1285 {
1286         struct if_msghdr *ifm;
1287         struct mbuf *m;
1288         struct rt_addrinfo info;
1289
1290         if (V_route_cb.any_count == 0)
1291                 return;
1292         bzero((caddr_t)&info, sizeof(info));
1293         m = rtsock_msg_mbuf(RTM_IFINFO, &info);
1294         if (m == NULL)
1295                 return;
1296         ifm = mtod(m, struct if_msghdr *);
1297         ifm->ifm_index = ifp->if_index;
1298         ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1299         if_data_copy(ifp, &ifm->ifm_data);
1300         ifm->ifm_addrs = 0;
1301         rt_dispatch(m, AF_UNSPEC);
1302 }
1303
1304 /*
1305  * Announce interface address arrival/withdraw.
1306  * Please do not call directly, use rt_addrmsg().
1307  * Assume input data to be valid.
1308  * Returns 0 on success.
1309  */
1310 int
1311 rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
1312 {
1313         struct rt_addrinfo info;
1314         struct sockaddr *sa;
1315         int ncmd;
1316         struct mbuf *m;
1317         struct ifa_msghdr *ifam;
1318         struct ifnet *ifp = ifa->ifa_ifp;
1319         struct sockaddr_storage ss;
1320
1321         if (V_route_cb.any_count == 0)
1322                 return (0);
1323
1324         ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1325
1326         bzero((caddr_t)&info, sizeof(info));
1327         info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1328         info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
1329         info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
1330             info.rti_info[RTAX_IFP], ifa->ifa_netmask, &ss);
1331         info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1332         if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL)
1333                 return (ENOBUFS);
1334         ifam = mtod(m, struct ifa_msghdr *);
1335         ifam->ifam_index = ifp->if_index;
1336         ifam->ifam_metric = ifa->ifa_ifp->if_metric;
1337         ifam->ifam_flags = ifa->ifa_flags;
1338         ifam->ifam_addrs = info.rti_addrs;
1339
1340         if (fibnum != RT_ALL_FIBS) {
1341                 M_SETFIB(m, fibnum);
1342                 m->m_flags |= RTS_FILTER_FIB;
1343         }
1344
1345         rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
1346
1347         return (0);
1348 }
1349
1350 /*
1351  * Announce route addition/removal.
1352  * Please do not call directly, use rt_routemsg().
1353  * Note that @rt data MAY be inconsistent/invalid:
1354  * if some userland app sends us "invalid" route message (invalid mask,
1355  * no dst, wrong address families, etc...) we need to pass it back
1356  * to app (and any other rtsock consumers) with rtm_errno field set to
1357  * non-zero value.
1358  *
1359  * Returns 0 on success.
1360  */
1361 int
1362 rtsock_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt,
1363     int fibnum)
1364 {
1365         struct rt_addrinfo info;
1366         struct sockaddr *sa;
1367         struct mbuf *m;
1368         struct rt_msghdr *rtm;
1369         struct sockaddr_storage ss;
1370
1371         if (V_route_cb.any_count == 0)
1372                 return (0);
1373
1374         bzero((caddr_t)&info, sizeof(info));
1375         info.rti_info[RTAX_DST] = sa = rt_key(rt);
1376         info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(sa, rt_mask(rt), &ss);
1377         info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1378         if ((m = rtsock_msg_mbuf(cmd, &info)) == NULL)
1379                 return (ENOBUFS);
1380         rtm = mtod(m, struct rt_msghdr *);
1381         rtm->rtm_index = ifp->if_index;
1382         rtm->rtm_flags |= rt->rt_flags;
1383         rtm->rtm_errno = error;
1384         rtm->rtm_addrs = info.rti_addrs;
1385
1386         if (fibnum != RT_ALL_FIBS) {
1387                 M_SETFIB(m, fibnum);
1388                 m->m_flags |= RTS_FILTER_FIB;
1389         }
1390
1391         rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
1392
1393         return (0);
1394 }
1395
1396 /*
1397  * This is the analogue to the rt_newaddrmsg which performs the same
1398  * function but for multicast group memberhips.  This is easier since
1399  * there is no route state to worry about.
1400  */
1401 void
1402 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
1403 {
1404         struct rt_addrinfo info;
1405         struct mbuf *m = NULL;
1406         struct ifnet *ifp = ifma->ifma_ifp;
1407         struct ifma_msghdr *ifmam;
1408
1409         if (V_route_cb.any_count == 0)
1410                 return;
1411
1412         bzero((caddr_t)&info, sizeof(info));
1413         info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1414         if (ifp && ifp->if_addr)
1415                 info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
1416         else
1417                 info.rti_info[RTAX_IFP] = NULL;
1418         /*
1419          * If a link-layer address is present, present it as a ``gateway''
1420          * (similarly to how ARP entries, e.g., are presented).
1421          */
1422         info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
1423         m = rtsock_msg_mbuf(cmd, &info);
1424         if (m == NULL)
1425                 return;
1426         ifmam = mtod(m, struct ifma_msghdr *);
1427         KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
1428             __func__));
1429         ifmam->ifmam_index = ifp->if_index;
1430         ifmam->ifmam_addrs = info.rti_addrs;
1431         rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC);
1432 }
1433
1434 static struct mbuf *
1435 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
1436         struct rt_addrinfo *info)
1437 {
1438         struct if_announcemsghdr *ifan;
1439         struct mbuf *m;
1440
1441         if (V_route_cb.any_count == 0)
1442                 return NULL;
1443         bzero((caddr_t)info, sizeof(*info));
1444         m = rtsock_msg_mbuf(type, info);
1445         if (m != NULL) {
1446                 ifan = mtod(m, struct if_announcemsghdr *);
1447                 ifan->ifan_index = ifp->if_index;
1448                 strlcpy(ifan->ifan_name, ifp->if_xname,
1449                         sizeof(ifan->ifan_name));
1450                 ifan->ifan_what = what;
1451         }
1452         return m;
1453 }
1454
1455 /*
1456  * This is called to generate routing socket messages indicating
1457  * IEEE80211 wireless events.
1458  * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
1459  */
1460 void
1461 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
1462 {
1463         struct mbuf *m;
1464         struct rt_addrinfo info;
1465
1466         m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
1467         if (m != NULL) {
1468                 /*
1469                  * Append the ieee80211 data.  Try to stick it in the
1470                  * mbuf containing the ifannounce msg; otherwise allocate
1471                  * a new mbuf and append.
1472                  *
1473                  * NB: we assume m is a single mbuf.
1474                  */
1475                 if (data_len > M_TRAILINGSPACE(m)) {
1476                         struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
1477                         if (n == NULL) {
1478                                 m_freem(m);
1479                                 return;
1480                         }
1481                         bcopy(data, mtod(n, void *), data_len);
1482                         n->m_len = data_len;
1483                         m->m_next = n;
1484                 } else if (data_len > 0) {
1485                         bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
1486                         m->m_len += data_len;
1487                 }
1488                 if (m->m_flags & M_PKTHDR)
1489                         m->m_pkthdr.len += data_len;
1490                 mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
1491                 rt_dispatch(m, AF_UNSPEC);
1492         }
1493 }
1494
1495 /*
1496  * This is called to generate routing socket messages indicating
1497  * network interface arrival and departure.
1498  */
1499 void
1500 rt_ifannouncemsg(struct ifnet *ifp, int what)
1501 {
1502         struct mbuf *m;
1503         struct rt_addrinfo info;
1504
1505         m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
1506         if (m != NULL)
1507                 rt_dispatch(m, AF_UNSPEC);
1508 }
1509
1510 static void
1511 rt_dispatch(struct mbuf *m, sa_family_t saf)
1512 {
1513         struct m_tag *tag;
1514
1515         /*
1516          * Preserve the family from the sockaddr, if any, in an m_tag for
1517          * use when injecting the mbuf into the routing socket buffer from
1518          * the netisr.
1519          */
1520         if (saf != AF_UNSPEC) {
1521                 tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
1522                     M_NOWAIT);
1523                 if (tag == NULL) {
1524                         m_freem(m);
1525                         return;
1526                 }
1527                 *(unsigned short *)(tag + 1) = saf;
1528                 m_tag_prepend(m, tag);
1529         }
1530 #ifdef VIMAGE
1531         if (V_loif)
1532                 m->m_pkthdr.rcvif = V_loif;
1533         else {
1534                 m_freem(m);
1535                 return;
1536         }
1537 #endif
1538         netisr_queue(NETISR_ROUTE, m);  /* mbuf is free'd on failure. */
1539 }
1540
1541 /*
1542  * This is used in dumping the kernel table via sysctl().
1543  */
1544 static int
1545 sysctl_dumpentry(struct radix_node *rn, void *vw)
1546 {
1547         struct walkarg *w = vw;
1548         struct rtentry *rt = (struct rtentry *)rn;
1549         int error = 0, size;
1550         struct rt_addrinfo info;
1551         struct sockaddr_storage ss;
1552
1553         if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1554                 return 0;
1555         if ((rt->rt_flags & RTF_HOST) == 0
1556             ? jailed_without_vnet(w->w_req->td->td_ucred)
1557             : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0)
1558                 return (0);
1559         bzero((caddr_t)&info, sizeof(info));
1560         info.rti_info[RTAX_DST] = rt_key(rt);
1561         info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1562         info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
1563             rt_mask(rt), &ss);
1564         info.rti_info[RTAX_GENMASK] = 0;
1565         if (rt->rt_ifp) {
1566                 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
1567                 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1568                 if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1569                         info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1570         }
1571         if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0)
1572                 return (error);
1573         if (w->w_req && w->w_tmem) {
1574                 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1575
1576                 if (rt->rt_flags & RTF_GWFLAG_COMPAT)
1577                         rtm->rtm_flags = RTF_GATEWAY | 
1578                                 (rt->rt_flags & ~RTF_GWFLAG_COMPAT);
1579                 else
1580                         rtm->rtm_flags = rt->rt_flags;
1581                 rt_getmetrics(rt, &rtm->rtm_rmx);
1582                 rtm->rtm_index = rt->rt_ifp->if_index;
1583                 rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1584                 rtm->rtm_addrs = info.rti_addrs;
1585                 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1586                 return (error);
1587         }
1588         return (error);
1589 }
1590
1591 static int
1592 sysctl_iflist_ifml(struct ifnet *ifp, const struct if_data *src_ifd,
1593     struct rt_addrinfo *info, struct walkarg *w, int len)
1594 {
1595         struct if_msghdrl *ifm;
1596         struct if_data *ifd;
1597
1598         ifm = (struct if_msghdrl *)w->w_tmem;
1599
1600 #ifdef COMPAT_FREEBSD32
1601         if (w->w_req->flags & SCTL_MASK32) {
1602                 struct if_msghdrl32 *ifm32;
1603
1604                 ifm32 = (struct if_msghdrl32 *)ifm;
1605                 ifm32->ifm_addrs = info->rti_addrs;
1606                 ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1607                 ifm32->ifm_index = ifp->if_index;
1608                 ifm32->_ifm_spare1 = 0;
1609                 ifm32->ifm_len = sizeof(*ifm32);
1610                 ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
1611                 ifd = &ifm32->ifm_data;
1612         } else
1613 #endif
1614         {
1615                 ifm->ifm_addrs = info->rti_addrs;
1616                 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1617                 ifm->ifm_index = ifp->if_index;
1618                 ifm->_ifm_spare1 = 0;
1619                 ifm->ifm_len = sizeof(*ifm);
1620                 ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
1621                 ifd = &ifm->ifm_data;
1622         }
1623
1624         memcpy(ifd, src_ifd, sizeof(*ifd));
1625
1626         return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
1627 }
1628
1629 static int
1630 sysctl_iflist_ifm(struct ifnet *ifp, const struct if_data *src_ifd,
1631     struct rt_addrinfo *info, struct walkarg *w, int len)
1632 {
1633         struct if_msghdr *ifm;
1634         struct if_data *ifd;
1635
1636         ifm = (struct if_msghdr *)w->w_tmem;
1637
1638 #ifdef COMPAT_FREEBSD32
1639         if (w->w_req->flags & SCTL_MASK32) {
1640                 struct if_msghdr32 *ifm32;
1641
1642                 ifm32 = (struct if_msghdr32 *)ifm;
1643                 ifm32->ifm_addrs = info->rti_addrs;
1644                 ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1645                 ifm32->ifm_index = ifp->if_index;
1646                 ifd = &ifm32->ifm_data;
1647         } else
1648 #endif
1649         {
1650                 ifm->ifm_addrs = info->rti_addrs;
1651                 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1652                 ifm->ifm_index = ifp->if_index;
1653                 ifd = &ifm->ifm_data;
1654         }
1655
1656         memcpy(ifd, src_ifd, sizeof(*ifd));
1657
1658         return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
1659 }
1660
1661 static int
1662 sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
1663     struct walkarg *w, int len)
1664 {
1665         struct ifa_msghdrl *ifam;
1666         struct if_data *ifd;
1667
1668         ifam = (struct ifa_msghdrl *)w->w_tmem;
1669
1670 #ifdef COMPAT_FREEBSD32
1671         if (w->w_req->flags & SCTL_MASK32) {
1672                 struct ifa_msghdrl32 *ifam32;
1673
1674                 ifam32 = (struct ifa_msghdrl32 *)ifam;
1675                 ifam32->ifam_addrs = info->rti_addrs;
1676                 ifam32->ifam_flags = ifa->ifa_flags;
1677                 ifam32->ifam_index = ifa->ifa_ifp->if_index;
1678                 ifam32->_ifam_spare1 = 0;
1679                 ifam32->ifam_len = sizeof(*ifam32);
1680                 ifam32->ifam_data_off =
1681                     offsetof(struct ifa_msghdrl32, ifam_data);
1682                 ifam32->ifam_metric = ifa->ifa_ifp->if_metric;
1683                 ifd = &ifam32->ifam_data;
1684         } else
1685 #endif
1686         {
1687                 ifam->ifam_addrs = info->rti_addrs;
1688                 ifam->ifam_flags = ifa->ifa_flags;
1689                 ifam->ifam_index = ifa->ifa_ifp->if_index;
1690                 ifam->_ifam_spare1 = 0;
1691                 ifam->ifam_len = sizeof(*ifam);
1692                 ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
1693                 ifam->ifam_metric = ifa->ifa_ifp->if_metric;
1694                 ifd = &ifam->ifam_data;
1695         }
1696
1697         bzero(ifd, sizeof(*ifd));
1698         ifd->ifi_datalen = sizeof(struct if_data);
1699         ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets);
1700         ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets);
1701         ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes);
1702         ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes);
1703
1704         /* Fixup if_data carp(4) vhid. */
1705         if (carp_get_vhid_p != NULL)
1706                 ifd->ifi_vhid = (*carp_get_vhid_p)(ifa);
1707
1708         return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
1709 }
1710
1711 static int
1712 sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
1713     struct walkarg *w, int len)
1714 {
1715         struct ifa_msghdr *ifam;
1716
1717         ifam = (struct ifa_msghdr *)w->w_tmem;
1718         ifam->ifam_addrs = info->rti_addrs;
1719         ifam->ifam_flags = ifa->ifa_flags;
1720         ifam->ifam_index = ifa->ifa_ifp->if_index;
1721         ifam->ifam_metric = ifa->ifa_ifp->if_metric;
1722
1723         return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
1724 }
1725
1726 static int
1727 sysctl_iflist(int af, struct walkarg *w)
1728 {
1729         struct ifnet *ifp;
1730         struct ifaddr *ifa;
1731         struct if_data ifd;
1732         struct rt_addrinfo info;
1733         int len, error = 0;
1734         struct sockaddr_storage ss;
1735
1736         bzero((caddr_t)&info, sizeof(info));
1737         bzero(&ifd, sizeof(ifd));
1738         IFNET_RLOCK_NOSLEEP();
1739         TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1740                 if (w->w_arg && w->w_arg != ifp->if_index)
1741                         continue;
1742                 if_data_copy(ifp, &ifd);
1743                 IF_ADDR_RLOCK(ifp);
1744                 ifa = ifp->if_addr;
1745                 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1746                 error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
1747                 if (error != 0)
1748                         goto done;
1749                 info.rti_info[RTAX_IFP] = NULL;
1750                 if (w->w_req && w->w_tmem) {
1751                         if (w->w_op == NET_RT_IFLISTL)
1752                                 error = sysctl_iflist_ifml(ifp, &ifd, &info, w,
1753                                     len);
1754                         else
1755                                 error = sysctl_iflist_ifm(ifp, &ifd, &info, w,
1756                                     len);
1757                         if (error)
1758                                 goto done;
1759                 }
1760                 while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
1761                         if (af && af != ifa->ifa_addr->sa_family)
1762                                 continue;
1763                         if (prison_if(w->w_req->td->td_ucred,
1764                             ifa->ifa_addr) != 0)
1765                                 continue;
1766                         info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1767                         info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
1768                             ifa->ifa_addr, ifa->ifa_netmask, &ss);
1769                         info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1770                         error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len);
1771                         if (error != 0)
1772                                 goto done;
1773                         if (w->w_req && w->w_tmem) {
1774                                 if (w->w_op == NET_RT_IFLISTL)
1775                                         error = sysctl_iflist_ifaml(ifa, &info,
1776                                             w, len);
1777                                 else
1778                                         error = sysctl_iflist_ifam(ifa, &info,
1779                                             w, len);
1780                                 if (error)
1781                                         goto done;
1782                         }
1783                 }
1784                 IF_ADDR_RUNLOCK(ifp);
1785                 info.rti_info[RTAX_IFA] = NULL;
1786                 info.rti_info[RTAX_NETMASK] = NULL;
1787                 info.rti_info[RTAX_BRD] = NULL;
1788         }
1789 done:
1790         if (ifp != NULL)
1791                 IF_ADDR_RUNLOCK(ifp);
1792         IFNET_RUNLOCK_NOSLEEP();
1793         return (error);
1794 }
1795
1796 static int
1797 sysctl_ifmalist(int af, struct walkarg *w)
1798 {
1799         struct rt_addrinfo info;
1800         struct ifaddr *ifa;
1801         struct ifmultiaddr *ifma;
1802         struct ifnet *ifp;
1803         int error, len;
1804
1805         error = 0;
1806         bzero((caddr_t)&info, sizeof(info));
1807
1808         IFNET_RLOCK_NOSLEEP();
1809         TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1810                 if (w->w_arg && w->w_arg != ifp->if_index)
1811                         continue;
1812                 ifa = ifp->if_addr;
1813                 info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
1814                 IF_ADDR_RLOCK(ifp);
1815                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1816                         if (af && af != ifma->ifma_addr->sa_family)
1817                                 continue;
1818                         if (prison_if(w->w_req->td->td_ucred,
1819                             ifma->ifma_addr) != 0)
1820                                 continue;
1821                         info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1822                         info.rti_info[RTAX_GATEWAY] =
1823                             (ifma->ifma_addr->sa_family != AF_LINK) ?
1824                             ifma->ifma_lladdr : NULL;
1825                         error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len);
1826                         if (error != 0)
1827                                 break;
1828                         if (w->w_req && w->w_tmem) {
1829                                 struct ifma_msghdr *ifmam;
1830
1831                                 ifmam = (struct ifma_msghdr *)w->w_tmem;
1832                                 ifmam->ifmam_index = ifma->ifma_ifp->if_index;
1833                                 ifmam->ifmam_flags = 0;
1834                                 ifmam->ifmam_addrs = info.rti_addrs;
1835                                 error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1836                                 if (error != 0)
1837                                         break;
1838                         }
1839                 }
1840                 IF_ADDR_RUNLOCK(ifp);
1841                 if (error != 0)
1842                         break;
1843         }
1844         IFNET_RUNLOCK_NOSLEEP();
1845         return (error);
1846 }
1847
1848 static int
1849 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1850 {
1851         int     *name = (int *)arg1;
1852         u_int   namelen = arg2;
1853         struct rib_head *rnh = NULL; /* silence compiler. */
1854         int     i, lim, error = EINVAL;
1855         int     fib = 0;
1856         u_char  af;
1857         struct  walkarg w;
1858
1859         name ++;
1860         namelen--;
1861         if (req->newptr)
1862                 return (EPERM);
1863         if (name[1] == NET_RT_DUMP) {
1864                 if (namelen == 3)
1865                         fib = req->td->td_proc->p_fibnum;
1866                 else if (namelen == 4)
1867                         fib = (name[3] == RT_ALL_FIBS) ?
1868                             req->td->td_proc->p_fibnum : name[3];
1869                 else
1870                         return ((namelen < 3) ? EISDIR : ENOTDIR);
1871                 if (fib < 0 || fib >= rt_numfibs)
1872                         return (EINVAL);
1873         } else if (namelen != 3)
1874                 return ((namelen < 3) ? EISDIR : ENOTDIR);
1875         af = name[0];
1876         if (af > AF_MAX)
1877                 return (EINVAL);
1878         bzero(&w, sizeof(w));
1879         w.w_op = name[1];
1880         w.w_arg = name[2];
1881         w.w_req = req;
1882
1883         error = sysctl_wire_old_buffer(req, 0);
1884         if (error)
1885                 return (error);
1886         
1887         /*
1888          * Allocate reply buffer in advance.
1889          * All rtsock messages has maximum length of u_short.
1890          */
1891         w.w_tmemsize = 65536;
1892         w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK);
1893
1894         switch (w.w_op) {
1895
1896         case NET_RT_DUMP:
1897         case NET_RT_FLAGS:
1898                 if (af == 0) {                  /* dump all tables */
1899                         i = 1;
1900                         lim = AF_MAX;
1901                 } else                          /* dump only one table */
1902                         i = lim = af;
1903
1904                 /*
1905                  * take care of llinfo entries, the caller must
1906                  * specify an AF
1907                  */
1908                 if (w.w_op == NET_RT_FLAGS &&
1909                     (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) {
1910                         if (af != 0)
1911                                 error = lltable_sysctl_dumparp(af, w.w_req);
1912                         else
1913                                 error = EINVAL;
1914                         break;
1915                 }
1916                 /*
1917                  * take care of routing entries
1918                  */
1919                 for (error = 0; error == 0 && i <= lim; i++) {
1920                         rnh = rt_tables_get_rnh(fib, i);
1921                         if (rnh != NULL) {
1922                                 RIB_RLOCK(rnh); 
1923                                 error = rnh->rnh_walktree(&rnh->head,
1924                                     sysctl_dumpentry, &w);
1925                                 RIB_RUNLOCK(rnh);
1926                         } else if (af != 0)
1927                                 error = EAFNOSUPPORT;
1928                 }
1929                 break;
1930
1931         case NET_RT_IFLIST:
1932         case NET_RT_IFLISTL:
1933                 error = sysctl_iflist(af, &w);
1934                 break;
1935
1936         case NET_RT_IFMALIST:
1937                 error = sysctl_ifmalist(af, &w);
1938                 break;
1939         }
1940
1941         free(w.w_tmem, M_TEMP);
1942         return (error);
1943 }
1944
1945 static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1946
1947 /*
1948  * Definitions of protocols supported in the ROUTE domain.
1949  */
1950
1951 static struct domain routedomain;               /* or at least forward */
1952
1953 static struct protosw routesw[] = {
1954 {
1955         .pr_type =              SOCK_RAW,
1956         .pr_domain =            &routedomain,
1957         .pr_flags =             PR_ATOMIC|PR_ADDR,
1958         .pr_output =            route_output,
1959         .pr_ctlinput =          raw_ctlinput,
1960         .pr_init =              raw_init,
1961         .pr_usrreqs =           &route_usrreqs
1962 }
1963 };
1964
1965 static struct domain routedomain = {
1966         .dom_family =           PF_ROUTE,
1967         .dom_name =              "route",
1968         .dom_protosw =          routesw,
1969         .dom_protoswNPROTOSW =  &routesw[nitems(routesw)]
1970 };
1971
1972 VNET_DOMAIN_SET(route);