sys/netinet/ip_mroute.c

   1 /*
   2  * IP multicast forwarding procedures
   3  *
   4  * Written by David Waitzman, BBN Labs, August 1988.
   5  * Modified by Steve Deering, Stanford, February 1989.
   6  * Modified by Mark J. Steiglitz, Stanford, May, 1991
   7  * Modified by Van Jacobson, LBL, January 1993
   8  * Modified by Ajit Thyagarajan, PARC, August 1993
   9  * Modified by Bill Fenner, PARC, April 1995
  10  *
  11  * MROUTING Revision: 3.5
  12  * $FreeBSD$
  13  */
  14
  15 #include "opt_mrouting.h"
  16 #include "opt_random_ip_id.h"
  17
  18 #include <sys/param.h>
  19 #include <sys/kernel.h>
  20 #include <sys/lock.h>
  21 #include <sys/malloc.h>
  22 #include <sys/mbuf.h>
  23 #include <sys/protosw.h>
  24 #include <sys/signalvar.h>
  25 #include <sys/socket.h>
  26 #include <sys/socketvar.h>
  27 #include <sys/sockio.h>
  28 #include <sys/sx.h>
  29 #include <sys/sysctl.h>
  30 #include <sys/syslog.h>
  31 #include <sys/systm.h>
  32 #include <sys/time.h>
  33 #include <net/if.h>
  34 #include <net/route.h>
  35 #include <netinet/in.h>
  36 #include <netinet/igmp.h>
  37 #include <netinet/in_systm.h>
  38 #include <netinet/in_var.h>
  39 #include <netinet/ip.h>
  40 #include <netinet/ip_encap.h>
  41 #include <netinet/ip_mroute.h>
  42 #include <netinet/ip_var.h>
  43 #include <netinet/udp.h>
  44 #include <machine/in_cksum.h>
  45
  46 #ifndef MROUTING
  47 extern u_long   _ip_mcast_src(int vifi);
  48 extern int      _ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
  49                     struct ip_moptions *imo);
  50 extern int      _ip_mrouter_done(void);
  51 extern int      _ip_mrouter_get(struct socket *so, struct sockopt *sopt);
  52 extern int      _ip_mrouter_set(struct socket *so, struct sockopt *sopt);
  53 extern int      _mrt_ioctl(int req, caddr_t data);
  54
  55 /*
  56  * Dummy routines and globals used when multicast routing is not compiled in.
  57  */
  58
  59 struct socket  *ip_mrouter  = NULL;
  60 u_int           rsvpdebug = 0;
  61
  62 int
  63 _ip_mrouter_set(so, sopt)
  64         struct socket *so;
  65         struct sockopt *sopt;
  66 {
  67         return(EOPNOTSUPP);
  68 }
  69
  70 int (*ip_mrouter_set)(struct socket *, struct sockopt *) = _ip_mrouter_set;
  71
  72
  73 int
  74 _ip_mrouter_get(so, sopt)
  75         struct socket *so;
  76         struct sockopt *sopt;
  77 {
  78         return(EOPNOTSUPP);
  79 }
  80
  81 int (*ip_mrouter_get)(struct socket *, struct sockopt *) = _ip_mrouter_get;
  82
  83 int
  84 _ip_mrouter_done()
  85 {
  86         return(0);
  87 }
  88
  89 int (*ip_mrouter_done)(void) = _ip_mrouter_done;
  90
  91 int
  92 _ip_mforward(ip, ifp, m, imo)
  93         struct ip *ip;
  94         struct ifnet *ifp;
  95         struct mbuf *m;
  96         struct ip_moptions *imo;
  97 {
  98         return(0);
  99 }
 100
 101 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 102                    struct ip_moptions *) = _ip_mforward;
 103
 104 int
 105 _mrt_ioctl(int req, caddr_t data)
 106 {
 107         return EOPNOTSUPP;
 108 }
 109
 110 int (*mrt_ioctl)(int, caddr_t) = _mrt_ioctl;
 111
 112 void
 113 rsvp_input(m, off)              /* XXX must fixup manually */
 114         struct mbuf *m;
 115         int off;
 116 {
 117     /* Can still get packets with rsvp_on = 0 if there is a local member
 118      * of the group to which the RSVP packet is addressed.  But in this
 119      * case we want to throw the packet away.
 120      */
 121     if (!rsvp_on) {
 122         m_freem(m);
 123         return;
 124     }
 125
 126     if (ip_rsvpd != NULL) {
 127         if (rsvpdebug)
 128             printf("rsvp_input: Sending packet up old-style socket\n");
 129         rip_input(m, off);
 130         return;
 131     }
 132     /* Drop the packet */
 133     m_freem(m);
 134 }
 135
 136 int (*legal_vif_num)(int) = 0;
 137
 138 /*
 139  * This should never be called, since IP_MULTICAST_VIF should fail, but
 140  * just in case it does get called, the code a little lower in ip_output
 141  * will assign the packet a local address.
 142  */
 143 u_long
 144 _ip_mcast_src(int vifi) { return INADDR_ANY; }
 145 u_long (*ip_mcast_src)(int) = _ip_mcast_src;
 146
 147 int
 148 ip_rsvp_vif_init(so, sopt)
 149     struct socket *so;
 150     struct sockopt *sopt;
 151 {
 152     return(EINVAL);
 153 }
 154
 155 int
 156 ip_rsvp_vif_done(so, sopt)
 157     struct socket *so;
 158     struct sockopt *sopt;
 159 {
 160     return(EINVAL);
 161 }
 162
 163 void
 164 ip_rsvp_force_done(so)
 165     struct socket *so;
 166 {
 167     return;
 168 }
 169
 170 #else /* MROUTING */
 171
 172 #define M_HASCL(m)      ((m)->m_flags & M_EXT)
 173
 174 static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables");
 175
 176 #ifndef MROUTE_KLD
 177 /* The socket used to communicate with the multicast routing daemon.  */
 178 struct socket  *ip_mrouter  = NULL;
 179 #endif
 180
 181 #if defined(MROUTING) || defined(MROUTE_KLD)
 182 static struct mrtstat   mrtstat;
 183 SYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW,
 184     &mrtstat, mrtstat, "Multicast Routing Statistics (struct mrtstat, netinet/ip_mroute.h)");
 185 #endif
 186
 187 static struct mfc       *mfctable[MFCTBLSIZ];
 188 static u_char           nexpire[MFCTBLSIZ];
 189 static struct vif       viftable[MAXVIFS];
 190 static u_int    mrtdebug = 0;     /* debug level        */
 191 #define         DEBUG_MFC       0x02
 192 #define         DEBUG_FORWARD   0x04
 193 #define         DEBUG_EXPIRE    0x08
 194 #define         DEBUG_XMIT      0x10
 195 static u_int    tbfdebug = 0;     /* tbf debug level    */
 196 static u_int    rsvpdebug = 0;    /* rsvp debug level   */
 197
 198 static struct callout_handle expire_upcalls_ch;
 199
 200 #define         EXPIRE_TIMEOUT  (hz / 4)        /* 4x / second          */
 201 #define         UPCALL_EXPIRE   6               /* number of timeouts   */
 202
 203 /*
 204  * Define the token bucket filter structures
 205  * tbftable -> each vif has one of these for storing info
 206  */
 207
 208 static struct tbf tbftable[MAXVIFS];
 209 #define         TBF_REPROCESS   (hz / 100)      /* 100x / second */
 210
 211 /*
 212  * 'Interfaces' associated with decapsulator (so we can tell
 213  * packets that went through it from ones that get reflected
 214  * by a broken gateway).  These interfaces are never linked into
 215  * the system ifnet list & no routes point to them.  I.e., packets
 216  * can't be sent this way.  They only exist as a placeholder for
 217  * multicast source verification.
 218  */
 219 static struct ifnet multicast_decap_if[MAXVIFS];
 220
 221 #define ENCAP_TTL 64
 222 #define ENCAP_PROTO IPPROTO_IPIP        /* 4 */
 223
 224 /* prototype IP hdr for encapsulated packets */
 225 static struct ip multicast_encap_iphdr = {
 226 #if BYTE_ORDER == LITTLE_ENDIAN
 227         sizeof(struct ip) >> 2, IPVERSION,
 228 #else
 229         IPVERSION, sizeof(struct ip) >> 2,
 230 #endif
 231         0,                              /* tos */
 232         sizeof(struct ip),              /* total length */
 233         0,                              /* id */
 234         0,                              /* frag offset */
 235         ENCAP_TTL, ENCAP_PROTO,
 236         0,                              /* checksum */
 237 };
 238
 239 /*
 240  * Private variables.
 241  */
 242 static vifi_t      numvifs = 0;
 243 static const struct encaptab *encap_cookie = NULL;
 244
 245 /*
 246  * one-back cache used by mroute_encapcheck to locate a tunnel's vif
 247  * given a datagram's src ip address.
 248  */
 249 static u_long last_encap_src;
 250 static struct vif *last_encap_vif;
 251
 252 static u_long   X_ip_mcast_src(int vifi);
 253 static int      X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo);
 254 static int      X_ip_mrouter_done(void);
 255 static int      X_ip_mrouter_get(struct socket *so, struct sockopt *m);
 256 static int      X_ip_mrouter_set(struct socket *so, struct sockopt *m);
 257 static int      X_legal_vif_num(int vif);
 258 static int      X_mrt_ioctl(int cmd, caddr_t data);
 259
 260 static int get_sg_cnt(struct sioc_sg_req *);
 261 static int get_vif_cnt(struct sioc_vif_req *);
 262 static int ip_mrouter_init(struct socket *, int);
 263 static int add_vif(struct vifctl *);
 264 static int del_vif(vifi_t);
 265 static int add_mfc(struct mfcctl *);
 266 static int del_mfc(struct mfcctl *);
 267 static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *);
 268 static int set_assert(int);
 269 static void expire_upcalls(void *);
 270 static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *,
 271                   vifi_t);
 272 static void phyint_send(struct ip *, struct vif *, struct mbuf *);
 273 static void encap_send(struct ip *, struct vif *, struct mbuf *);
 274 static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long);
 275 static void tbf_queue(struct vif *, struct mbuf *);
 276 static void tbf_process_q(struct vif *);
 277 static void tbf_reprocess_q(void *);
 278 static int tbf_dq_sel(struct vif *, struct ip *);
 279 static void tbf_send_packet(struct vif *, struct mbuf *);
 280 static void tbf_update_tokens(struct vif *);
 281 static int priority(struct vif *, struct ip *);
 282
 283 /*
 284  * whether or not special PIM assert processing is enabled.
 285  */
 286 static int pim_assert;
 287 /*
 288  * Rate limit for assert notification messages, in usec
 289  */
 290 #define ASSERT_MSG_TIME         3000000
 291
 292 /*
 293  * Hash function for a source, group entry
 294  */
 295 #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
 296                         ((g) >> 20) ^ ((g) >> 10) ^ (g))
 297
 298 /*
 299  * Find a route for a given origin IP address and Multicast group address
 300  * Type of service parameter to be added in the future!!!
 301  */
 302
 303 #define MFCFIND(o, g, rt) { \
 304         register struct mfc *_rt = mfctable[MFCHASH(o,g)]; \
 305         rt = NULL; \
 306         ++mrtstat.mrts_mfc_lookups; \
 307         while (_rt) { \
 308                 if ((_rt->mfc_origin.s_addr == o) && \
 309                     (_rt->mfc_mcastgrp.s_addr == g) && \
 310                     (_rt->mfc_stall == NULL)) { \
 311                         rt = _rt; \
 312                         break; \
 313                 } \
 314                 _rt = _rt->mfc_next; \
 315         } \
 316         if (rt == NULL) { \
 317                 ++mrtstat.mrts_mfc_misses; \
 318         } \
 319 }
 320
 321
 322 /*
 323  * Macros to compute elapsed time efficiently
 324  * Borrowed from Van Jacobson's scheduling code
 325  */
 326 #define TV_DELTA(a, b, delta) { \
 327             register int xxs; \
 328                 \
 329             delta = (a).tv_usec - (b).tv_usec; \
 330             if ((xxs = (a).tv_sec - (b).tv_sec)) { \
 331                switch (xxs) { \
 332                       case 2: \
 333                           delta += 1000000; \
 334                               /* fall through */ \
 335                       case 1: \
 336                           delta += 1000000; \
 337                           break; \
 338                       default: \
 339                           delta += (1000000 * xxs); \
 340                } \
 341             } \
 342 }
 343
 344 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
 345               (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
 346
 347 #ifdef UPCALL_TIMING
 348 u_long upcall_data[51];
 349 static void collate(struct timeval *);
 350 #endif /* UPCALL_TIMING */
 351
 352
 353 /*
 354  * Handle MRT setsockopt commands to modify the multicast routing tables.
 355  */
 356 static int
 357 X_ip_mrouter_set(so, sopt)
 358         struct socket *so;
 359         struct sockopt *sopt;
 360 {
 361         int     error, optval;
 362         vifi_t  vifi;
 363         struct  vifctl vifc;
 364         struct  mfcctl mfc;
 365
 366         if (so != ip_mrouter && sopt->sopt_name != MRT_INIT)
 367                 return (EPERM);
 368
 369         error = 0;
 370         switch (sopt->sopt_name) {
 371         case MRT_INIT:
 372                 error = sooptcopyin(sopt, &optval, sizeof optval,
 373                                     sizeof optval);
 374                 if (error)
 375                         break;
 376                 error = ip_mrouter_init(so, optval);
 377                 break;
 378
 379         case MRT_DONE:
 380                 error = ip_mrouter_done();
 381                 break;
 382
 383         case MRT_ADD_VIF:
 384                 error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
 385                 if (error)
 386                         break;
 387                 error = add_vif(&vifc);
 388                 break;
 389
 390         case MRT_DEL_VIF:
 391                 error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
 392                 if (error)
 393                         break;
 394                 error = del_vif(vifi);
 395                 break;
 396
 397         case MRT_ADD_MFC:
 398         case MRT_DEL_MFC:
 399                 error = sooptcopyin(sopt, &mfc, sizeof mfc, sizeof mfc);
 400                 if (error)
 401                         break;
 402                 if (sopt->sopt_name == MRT_ADD_MFC)
 403                         error = add_mfc(&mfc);
 404                 else
 405                         error = del_mfc(&mfc);
 406                 break;
 407
 408         case MRT_ASSERT:
 409                 error = sooptcopyin(sopt, &optval, sizeof optval,
 410                                     sizeof optval);
 411                 if (error)
 412                         break;
 413                 set_assert(optval);
 414                 break;
 415
 416         default:
 417                 error = EOPNOTSUPP;
 418                 break;
 419         }
 420         return (error);
 421 }
 422
 423 #ifndef MROUTE_KLD
 424 int (*ip_mrouter_set)(struct socket *, struct sockopt *) = X_ip_mrouter_set;
 425 #endif
 426
 427 /*
 428  * Handle MRT getsockopt commands
 429  */
 430 static int
 431 X_ip_mrouter_get(so, sopt)
 432         struct socket *so;
 433         struct sockopt *sopt;
 434 {
 435         int error;
 436         static int version = 0x0305; /* !!! why is this here? XXX */
 437
 438         switch (sopt->sopt_name) {
 439         case MRT_VERSION:
 440                 error = sooptcopyout(sopt, &version, sizeof version);
 441                 break;
 442
 443         case MRT_ASSERT:
 444                 error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert);
 445                 break;
 446         default:
 447                 error = EOPNOTSUPP;
 448                 break;
 449         }
 450         return (error);
 451 }
 452
 453 #ifndef MROUTE_KLD
 454 int (*ip_mrouter_get)(struct socket *, struct sockopt *) = X_ip_mrouter_get;
 455 #endif
 456
 457 /*
 458  * Handle ioctl commands to obtain information from the cache
 459  */
 460 static int
 461 X_mrt_ioctl(cmd, data)
 462     int cmd;
 463     caddr_t data;
 464 {
 465     int error = 0;
 466
 467     switch (cmd) {
 468         case (SIOCGETVIFCNT):
 469             return (get_vif_cnt((struct sioc_vif_req *)data));
 470             break;
 471         case (SIOCGETSGCNT):
 472             return (get_sg_cnt((struct sioc_sg_req *)data));
 473             break;
 474         default:
 475             return (EINVAL);
 476             break;
 477     }
 478     return error;
 479 }
 480
 481 #ifndef MROUTE_KLD
 482 int (*mrt_ioctl)(int, caddr_t) = X_mrt_ioctl;
 483 #endif
 484
 485 /*
 486  * returns the packet, byte, rpf-failure count for the source group provided
 487  */
 488 static int
 489 get_sg_cnt(req)
 490     register struct sioc_sg_req *req;
 491 {
 492     register struct mfc *rt;
 493     int s;
 494
 495     s = splnet();
 496     MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
 497     splx(s);
 498     if (rt != NULL) {
 499         req->pktcnt = rt->mfc_pkt_cnt;
 500         req->bytecnt = rt->mfc_byte_cnt;
 501         req->wrong_if = rt->mfc_wrong_if;
 502     } else
 503         req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
 504
 505     return 0;
 506 }
 507
 508 /*
 509  * returns the input and output packet and byte counts on the vif provided
 510  */
 511 static int
 512 get_vif_cnt(req)
 513     register struct sioc_vif_req *req;
 514 {
 515     register vifi_t vifi = req->vifi;
 516
 517     if (vifi >= numvifs) return EINVAL;
 518
 519     req->icount = viftable[vifi].v_pkt_in;
 520     req->ocount = viftable[vifi].v_pkt_out;
 521     req->ibytes = viftable[vifi].v_bytes_in;
 522     req->obytes = viftable[vifi].v_bytes_out;
 523
 524     return 0;
 525 }
 526
 527 /*
 528  * Enable multicast routing
 529  */
 530 static int
 531 ip_mrouter_init(so, version)
 532         struct socket *so;
 533         int version;
 534 {
 535     if (mrtdebug)
 536         log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d\n",
 537                 so->so_type, so->so_proto->pr_protocol);
 538
 539     if (so->so_type != SOCK_RAW ||
 540         so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP;
 541
 542     if (version != 1)
 543         return ENOPROTOOPT;
 544
 545     if (ip_mrouter != NULL) return EADDRINUSE;
 546
 547     ip_mrouter = so;
 548
 549     bzero((caddr_t)mfctable, sizeof(mfctable));
 550     bzero((caddr_t)nexpire, sizeof(nexpire));
 551
 552     pim_assert = 0;
 553
 554     expire_upcalls_ch = timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
 555
 556     if (mrtdebug)
 557         log(LOG_DEBUG, "ip_mrouter_init\n");
 558
 559     return 0;
 560 }
 561
 562 /*
 563  * Disable multicast routing
 564  */
 565 static int
 566 X_ip_mrouter_done()
 567 {
 568     vifi_t vifi;
 569     int i;
 570     struct ifnet *ifp;
 571     struct ifreq ifr;
 572     struct mfc *rt;
 573     struct rtdetq *rte;
 574     int s;
 575
 576     s = splnet();
 577
 578     /*
 579      * For each phyint in use, disable promiscuous reception of all IP
 580      * multicasts.
 581      */
 582     for (vifi = 0; vifi < numvifs; vifi++) {
 583         if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
 584             !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
 585             ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
 586             ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr
 587                                                                 = INADDR_ANY;
 588             ifp = viftable[vifi].v_ifp;
 589             if_allmulti(ifp, 0);
 590         }
 591     }
 592     bzero((caddr_t)tbftable, sizeof(tbftable));
 593     bzero((caddr_t)viftable, sizeof(viftable));
 594     numvifs = 0;
 595     pim_assert = 0;
 596
 597     untimeout(expire_upcalls, (caddr_t)NULL, expire_upcalls_ch);
 598
 599     /*
 600      * Free all multicast forwarding cache entries.
 601      */
 602     for (i = 0; i < MFCTBLSIZ; i++) {
 603         for (rt = mfctable[i]; rt != NULL; ) {
 604             struct mfc *nr = rt->mfc_next;
 605
 606             for (rte = rt->mfc_stall; rte != NULL; ) {
 607                 struct rtdetq *n = rte->next;
 608
 609                 m_freem(rte->m);
 610                 free(rte, M_MRTABLE);
 611                 rte = n;
 612             }
 613             free(rt, M_MRTABLE);
 614             rt = nr;
 615         }
 616     }
 617
 618     bzero((caddr_t)mfctable, sizeof(mfctable));
 619
 620     /*
 621      * Reset de-encapsulation cache
 622      */
 623     last_encap_src = 0;
 624     last_encap_vif = NULL;
 625     if (encap_cookie) {
 626         encap_detach(encap_cookie);
 627         encap_cookie = NULL;
 628     }
 629
 630     ip_mrouter = NULL;
 631
 632     splx(s);
 633
 634     if (mrtdebug)
 635         log(LOG_DEBUG, "ip_mrouter_done\n");
 636
 637     return 0;
 638 }
 639
 640 #ifndef MROUTE_KLD
 641 int (*ip_mrouter_done)(void) = X_ip_mrouter_done;
 642 #endif
 643
 644 /*
 645  * Set PIM assert processing global
 646  */
 647 static int
 648 set_assert(i)
 649         int i;
 650 {
 651     if ((i != 1) && (i != 0))
 652         return EINVAL;
 653
 654     pim_assert = i;
 655
 656     return 0;
 657 }
 658
 659 /*
 660  * Decide if a packet is from a tunnelled peer.
 661  * Return 0 if not, 64 if so.
 662  */
 663 static int
 664 mroute_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 665 {
 666     struct ip *ip = mtod(m, struct ip *);
 667     int hlen = ip->ip_hl << 2;
 668     register struct vif *vifp;
 669
 670     /*
 671      * don't claim the packet if it's not to a multicast destination or if
 672      * we don't have an encapsulating tunnel with the source.
 673      * Note:  This code assumes that the remote site IP address
 674      * uniquely identifies the tunnel (i.e., that this site has
 675      * at most one tunnel with the remote site).
 676      */
 677     if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) {
 678         return 0;
 679     }
 680     if (ip->ip_src.s_addr != last_encap_src) {
 681         register struct vif *vife;
 682
 683         vifp = viftable;
 684         vife = vifp + numvifs;
 685         last_encap_src = ip->ip_src.s_addr;
 686         last_encap_vif = 0;
 687         for ( ; vifp < vife; ++vifp)
 688             if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
 689                 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT))
 690                     == VIFF_TUNNEL)
 691                     last_encap_vif = vifp;
 692                 break;
 693             }
 694     }
 695     if ((vifp = last_encap_vif) == 0) {
 696         last_encap_src = 0;
 697         return 0;
 698     }
 699     return 64;
 700 }
 701
 702 /*
 703  * De-encapsulate a packet and feed it back through ip input (this
 704  * routine is called whenever IP gets a packet that mroute_encap_func()
 705  * claimed).
 706  */
 707 static void
 708 mroute_encap_input(struct mbuf *m, int off)
 709 {
 710     struct ip *ip = mtod(m, struct ip *);
 711     int hlen = ip->ip_hl << 2;
 712
 713     if (hlen > sizeof(struct ip))
 714       ip_stripoptions(m, (struct mbuf *) 0);
 715     m->m_data += sizeof(struct ip);
 716     m->m_len -= sizeof(struct ip);
 717     m->m_pkthdr.len -= sizeof(struct ip);
 718
 719     m->m_pkthdr.rcvif = last_encap_vif->v_ifp;
 720
 721     (void) IF_HANDOFF(&ipintrq, m, NULL);
 722         /*
 723          * normally we would need a "schednetisr(NETISR_IP)"
 724          * here but we were called by ip_input and it is going
 725          * to loop back & try to dequeue the packet we just
 726          * queued as soon as we return so we avoid the
 727          * unnecessary software interrrupt.
 728          */
 729 }
 730
 731 extern struct domain inetdomain;
 732 static struct protosw mroute_encap_protosw =
 733 { SOCK_RAW,     &inetdomain,    IPPROTO_IPV4,   PR_ATOMIC|PR_ADDR,
 734   mroute_encap_input,   0,      0,              rip_ctloutput,
 735   0,
 736   0,            0,              0,              0,
 737   &rip_usrreqs
 738 };
 739
 740 /*
 741  * Add a vif to the vif table
 742  */
 743 static int
 744 add_vif(vifcp)
 745     register struct vifctl *vifcp;
 746 {
 747     register struct vif *vifp = viftable + vifcp->vifc_vifi;
 748     static struct sockaddr_in sin = {sizeof sin, AF_INET};
 749     struct ifaddr *ifa;
 750     struct ifnet *ifp;
 751     int error, s;
 752     struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
 753
 754     if (vifcp->vifc_vifi >= MAXVIFS)  return EINVAL;
 755     if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE;
 756
 757     /* Find the interface with an address in AF_INET family */
 758     sin.sin_addr = vifcp->vifc_lcl_addr;
 759     ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
 760     if (ifa == 0) return EADDRNOTAVAIL;
 761     ifp = ifa->ifa_ifp;
 762
 763     if (vifcp->vifc_flags & VIFF_TUNNEL) {
 764         if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
 765                 /*
 766                  * An encapsulating tunnel is wanted.  Tell
 767                  * mroute_encap_input() to start paying attention
 768                  * to encapsulated packets.
 769                  */
 770                 if (encap_cookie == NULL) {
 771                         encap_cookie = encap_attach_func(AF_INET, -1,
 772                                 mroute_encapcheck,
 773                                 (struct protosw *)&mroute_encap_protosw, NULL);
 774
 775                         if (encap_cookie == NULL) {
 776                                 printf("ip_mroute: unable to attach encap\n");
 777                                 return (EIO);   /* XXX */
 778                         }
 779                         for (s = 0; s < MAXVIFS; ++s) {
 780                                 multicast_decap_if[s].if_name = "mdecap";
 781                                 multicast_decap_if[s].if_unit = s;
 782                         }
 783                 }
 784                 /*
 785                  * Set interface to fake encapsulator interface
 786                  */
 787                 ifp = &multicast_decap_if[vifcp->vifc_vifi];
 788                 /*
 789                  * Prepare cached route entry
 790                  */
 791                 bzero(&vifp->v_route, sizeof(vifp->v_route));
 792         } else {
 793             log(LOG_ERR, "source routed tunnels not supported\n");
 794             return EOPNOTSUPP;
 795         }
 796     } else {
 797         /* Make sure the interface supports multicast */
 798         if ((ifp->if_flags & IFF_MULTICAST) == 0)
 799             return EOPNOTSUPP;
 800
 801         /* Enable promiscuous reception of all IP multicasts from the if */
 802         s = splnet();
 803         error = if_allmulti(ifp, 1);
 804         splx(s);
 805         if (error)
 806             return error;
 807     }
 808
 809     s = splnet();
 810     /* define parameters for the tbf structure */
 811     vifp->v_tbf = v_tbf;
 812     GET_TIME(vifp->v_tbf->tbf_last_pkt_t);
 813     vifp->v_tbf->tbf_n_tok = 0;
 814     vifp->v_tbf->tbf_q_len = 0;
 815     vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
 816     vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
 817
 818     vifp->v_flags     = vifcp->vifc_flags;
 819     vifp->v_threshold = vifcp->vifc_threshold;
 820     vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
 821     vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
 822     vifp->v_ifp       = ifp;
 823     /* scaling up here allows division by 1024 in critical code */
 824     vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000;
 825     vifp->v_rsvp_on   = 0;
 826     vifp->v_rsvpd     = NULL;
 827     /* initialize per vif pkt counters */
 828     vifp->v_pkt_in    = 0;
 829     vifp->v_pkt_out   = 0;
 830     vifp->v_bytes_in  = 0;
 831     vifp->v_bytes_out = 0;
 832     splx(s);
 833
 834     /* Adjust numvifs up if the vifi is higher than numvifs */
 835     if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
 836
 837     if (mrtdebug)
 838         log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n",
 839             vifcp->vifc_vifi,
 840             (u_long)ntohl(vifcp->vifc_lcl_addr.s_addr),
 841             (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
 842             (u_long)ntohl(vifcp->vifc_rmt_addr.s_addr),
 843             vifcp->vifc_threshold,
 844             vifcp->vifc_rate_limit);
 845
 846     return 0;
 847 }
 848
 849 /*
 850  * Delete a vif from the vif table
 851  */
 852 static int
 853 del_vif(vifi)
 854         vifi_t vifi;
 855 {
 856     register struct vif *vifp = &viftable[vifi];
 857     register struct mbuf *m;
 858     struct ifnet *ifp;
 859     struct ifreq ifr;
 860     int s;
 861
 862     if (vifi >= numvifs) return EINVAL;
 863     if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL;
 864
 865     s = splnet();
 866
 867     if (!(vifp->v_flags & VIFF_TUNNEL)) {
 868         ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
 869         ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
 870         ifp = vifp->v_ifp;
 871         if_allmulti(ifp, 0);
 872     }
 873
 874     if (vifp == last_encap_vif) {
 875         last_encap_vif = 0;
 876         last_encap_src = 0;
 877     }
 878
 879     /*
 880      * Free packets queued at the interface
 881      */
 882     while (vifp->v_tbf->tbf_q) {
 883         m = vifp->v_tbf->tbf_q;
 884         vifp->v_tbf->tbf_q = m->m_act;
 885         m_freem(m);
 886     }
 887
 888     bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
 889     bzero((caddr_t)vifp, sizeof (*vifp));
 890
 891     if (mrtdebug)
 892       log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs);
 893
 894     /* Adjust numvifs down */
 895     for (vifi = numvifs; vifi > 0; vifi--)
 896         if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break;
 897     numvifs = vifi;
 898
 899     splx(s);
 900
 901     return 0;
 902 }
 903
 904 /*
 905  * Add an mfc entry
 906  */
 907 static int
 908 add_mfc(mfccp)
 909     struct mfcctl *mfccp;
 910 {
 911     struct mfc *rt;
 912     u_long hash;
 913     struct rtdetq *rte;
 914     register u_short nstl;
 915     int s;
 916     int i;
 917
 918     MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
 919
 920     /* If an entry already exists, just update the fields */
 921     if (rt) {
 922         if (mrtdebug & DEBUG_MFC)
 923             log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n",
 924                 (u_long)ntohl(mfccp->mfcc_origin.s_addr),
 925                 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 926                 mfccp->mfcc_parent);
 927
 928         s = splnet();
 929         rt->mfc_parent = mfccp->mfcc_parent;
 930         for (i = 0; i < numvifs; i++)
 931             rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 932         splx(s);
 933         return 0;
 934     }
 935
 936     /*
 937      * Find the entry for which the upcall was made and update
 938      */
 939     s = splnet();
 940     hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
 941     for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) {
 942
 943         if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
 944             (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
 945             (rt->mfc_stall != NULL)) {
 946
 947             if (nstl++)
 948                 log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n",
 949                     "multiple kernel entries",
 950                     (u_long)ntohl(mfccp->mfcc_origin.s_addr),
 951                     (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 952                     mfccp->mfcc_parent, (void *)rt->mfc_stall);
 953
 954             if (mrtdebug & DEBUG_MFC)
 955                 log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n",
 956                     (u_long)ntohl(mfccp->mfcc_origin.s_addr),
 957                     (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 958                     mfccp->mfcc_parent, (void *)rt->mfc_stall);
 959
 960             rt->mfc_origin     = mfccp->mfcc_origin;
 961             rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 962             rt->mfc_parent     = mfccp->mfcc_parent;
 963             for (i = 0; i < numvifs; i++)
 964                 rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 965             /* initialize pkt counters per src-grp */
 966             rt->mfc_pkt_cnt    = 0;
 967             rt->mfc_byte_cnt   = 0;
 968             rt->mfc_wrong_if   = 0;
 969             rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
 970
 971             rt->mfc_expire = 0; /* Don't clean this guy up */
 972             nexpire[hash]--;
 973
 974             /* free packets Qed at the end of this entry */
 975             for (rte = rt->mfc_stall; rte != NULL; ) {
 976                 struct rtdetq *n = rte->next;
 977
 978                 ip_mdq(rte->m, rte->ifp, rt, -1);
 979                 m_freem(rte->m);
 980 #ifdef UPCALL_TIMING
 981                 collate(&(rte->t));
 982 #endif /* UPCALL_TIMING */
 983                 free(rte, M_MRTABLE);
 984                 rte = n;
 985             }
 986             rt->mfc_stall = NULL;
 987         }
 988     }
 989
 990     /*
 991      * It is possible that an entry is being inserted without an upcall
 992      */
 993     if (nstl == 0) {
 994         if (mrtdebug & DEBUG_MFC)
 995             log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n",
 996                 hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr),
 997                 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 998                 mfccp->mfcc_parent);
 999
1000         for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) {
1001
1002             if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
1003                 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
1004
1005                 rt->mfc_origin     = mfccp->mfcc_origin;
1006                 rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
1007                 rt->mfc_parent     = mfccp->mfcc_parent;
1008                 for (i = 0; i < numvifs; i++)
1009                     rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
1010                 /* initialize pkt counters per src-grp */
1011                 rt->mfc_pkt_cnt    = 0;
1012                 rt->mfc_byte_cnt   = 0;
1013                 rt->mfc_wrong_if   = 0;
1014                 rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
1015                 if (rt->mfc_expire)
1016                     nexpire[hash]--;
1017                 rt->mfc_expire     = 0;
1018             }
1019         }
1020         if (rt == NULL) {
1021             /* no upcall, so make a new entry */
1022             rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
1023             if (rt == NULL) {
1024                 splx(s);
1025                 return ENOBUFS;
1026             }
1027
1028             /* insert new entry at head of hash chain */
1029             rt->mfc_origin     = mfccp->mfcc_origin;
1030             rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
1031             rt->mfc_parent     = mfccp->mfcc_parent;
1032             for (i = 0; i < numvifs; i++)
1033                     rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
1034             /* initialize pkt counters per src-grp */
1035             rt->mfc_pkt_cnt    = 0;
1036             rt->mfc_byte_cnt   = 0;
1037             rt->mfc_wrong_if   = 0;
1038             rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
1039             rt->mfc_expire     = 0;
1040             rt->mfc_stall      = NULL;
1041
1042             /* link into table */
1043             rt->mfc_next = mfctable[hash];
1044             mfctable[hash] = rt;
1045         }
1046     }
1047     splx(s);
1048     return 0;
1049 }
1050
1051 #ifdef UPCALL_TIMING
1052 /*
1053  * collect delay statistics on the upcalls
1054  */
1055 static void collate(t)
1056 register struct timeval *t;
1057 {
1058     register u_long d;
1059     register struct timeval tp;
1060     register u_long delta;
1061
1062     GET_TIME(tp);
1063
1064     if (TV_LT(*t, tp))
1065     {
1066         TV_DELTA(tp, *t, delta);
1067
1068         d = delta >> 10;
1069         if (d > 50)
1070             d = 50;
1071
1072         ++upcall_data[d];
1073     }
1074 }
1075 #endif /* UPCALL_TIMING */
1076
1077 /*
1078  * Delete an mfc entry
1079  */
1080 static int
1081 del_mfc(mfccp)
1082     struct mfcctl *mfccp;
1083 {
1084     struct in_addr      origin;
1085     struct in_addr      mcastgrp;
1086     struct mfc          *rt;
1087     struct mfc          **nptr;
1088     u_long              hash;
1089     int s;
1090
1091     origin = mfccp->mfcc_origin;
1092     mcastgrp = mfccp->mfcc_mcastgrp;
1093     hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
1094
1095     if (mrtdebug & DEBUG_MFC)
1096         log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n",
1097             (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr));
1098
1099     s = splnet();
1100
1101     nptr = &mfctable[hash];
1102     while ((rt = *nptr) != NULL) {
1103         if (origin.s_addr == rt->mfc_origin.s_addr &&
1104             mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
1105             rt->mfc_stall == NULL)
1106             break;
1107
1108         nptr = &rt->mfc_next;
1109     }
1110     if (rt == NULL) {
1111         splx(s);
1112         return EADDRNOTAVAIL;
1113     }
1114
1115     *nptr = rt->mfc_next;
1116     free(rt, M_MRTABLE);
1117
1118     splx(s);
1119
1120     return 0;
1121 }
1122
1123 /*
1124  * Send a message to mrouted on the multicast routing socket
1125  */
1126 static int
1127 socket_send(s, mm, src)
1128         struct socket *s;
1129         struct mbuf *mm;
1130         struct sockaddr_in *src;
1131 {
1132         if (s) {
1133                 if (sbappendaddr(&s->so_rcv,
1134                                  (struct sockaddr *)src,
1135                                  mm, (struct mbuf *)0) != 0) {
1136                         sorwakeup(s);
1137                         return 0;
1138                 }
1139         }
1140         m_freem(mm);
1141         return -1;
1142 }
1143
1144 /*
1145  * IP multicast forwarding function. This function assumes that the packet
1146  * pointed to by "ip" has arrived on (or is about to be sent to) the interface
1147  * pointed to by "ifp", and the packet is to be relayed to other networks
1148  * that have members of the packet's destination IP multicast group.
1149  *
1150  * The packet is returned unscathed to the caller, unless it is
1151  * erroneous, in which case a non-zero return value tells the caller to
1152  * discard it.
1153  */
1154
1155 #define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
1156
1157 static int
1158 X_ip_mforward(ip, ifp, m, imo)
1159     register struct ip *ip;
1160     struct ifnet *ifp;
1161     struct mbuf *m;
1162     struct ip_moptions *imo;
1163 {
1164     register struct mfc *rt;
1165     register u_char *ipoptions;
1166     static struct sockaddr_in   k_igmpsrc       = { sizeof k_igmpsrc, AF_INET };
1167     static int srctun = 0;
1168     register struct mbuf *mm;
1169     int s;
1170     vifi_t vifi;
1171     struct vif *vifp;
1172
1173     if (mrtdebug & DEBUG_FORWARD)
1174         log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n",
1175             (u_long)ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr),
1176             (void *)ifp);
1177
1178     if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 ||
1179         (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
1180         /*
1181          * Packet arrived via a physical interface or
1182          * an encapsulated tunnel.
1183          */
1184     } else {
1185         /*
1186          * Packet arrived through a source-route tunnel.
1187          * Source-route tunnels are no longer supported.
1188          */
1189         if ((srctun++ % 1000) == 0)
1190             log(LOG_ERR,
1191                 "ip_mforward: received source-routed packet from %lx\n",
1192                 (u_long)ntohl(ip->ip_src.s_addr));
1193
1194         return 1;
1195     }
1196
1197     if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) {
1198         if (ip->ip_ttl < 255)
1199                 ip->ip_ttl++;   /* compensate for -1 in *_send routines */
1200         if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1201             vifp = viftable + vifi;
1202             printf("Sending IPPROTO_RSVP from %lx to %lx on vif %d (%s%s%d)\n",
1203                 (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr),
1204                 vifi,
1205                 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
1206                 vifp->v_ifp->if_name, vifp->v_ifp->if_unit);
1207         }
1208         return (ip_mdq(m, ifp, NULL, vifi));
1209     }
1210     if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1211         printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n",
1212             (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr));
1213         if(!imo)
1214                 printf("In fact, no options were specified at all\n");
1215     }
1216
1217     /*
1218      * Don't forward a packet with time-to-live of zero or one,
1219      * or a packet destined to a local-only group.
1220      */
1221     if (ip->ip_ttl <= 1 ||
1222         ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
1223         return 0;
1224
1225     /*
1226      * Determine forwarding vifs from the forwarding cache table
1227      */
1228     s = splnet();
1229     MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
1230
1231     /* Entry exists, so forward if necessary */
1232     if (rt != NULL) {
1233         splx(s);
1234         return (ip_mdq(m, ifp, rt, -1));
1235     } else {
1236         /*
1237          * If we don't have a route for packet's origin,
1238          * Make a copy of the packet &
1239          * send message to routing daemon
1240          */
1241
1242         register struct mbuf *mb0;
1243         register struct rtdetq *rte;
1244         register u_long hash;
1245         int hlen = ip->ip_hl << 2;
1246 #ifdef UPCALL_TIMING
1247         struct timeval tp;
1248
1249         GET_TIME(tp);
1250 #endif
1251
1252         mrtstat.mrts_no_route++;
1253         if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1254             log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n",
1255                 (u_long)ntohl(ip->ip_src.s_addr),
1256                 (u_long)ntohl(ip->ip_dst.s_addr));
1257
1258         /*
1259          * Allocate mbufs early so that we don't do extra work if we are
1260          * just going to fail anyway.  Make sure to pullup the header so
1261          * that other people can't step on it.
1262          */
1263         rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT);
1264         if (rte == NULL) {
1265             splx(s);
1266             return ENOBUFS;
1267         }
1268         mb0 = m_copy(m, 0, M_COPYALL);
1269         if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
1270             mb0 = m_pullup(mb0, hlen);
1271         if (mb0 == NULL) {
1272             free(rte, M_MRTABLE);
1273             splx(s);
1274             return ENOBUFS;
1275         }
1276
1277         /* is there an upcall waiting for this packet? */
1278         hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1279         for (rt = mfctable[hash]; rt; rt = rt->mfc_next) {
1280             if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) &&
1281                 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
1282                 (rt->mfc_stall != NULL))
1283                 break;
1284         }
1285
1286         if (rt == NULL) {
1287             int i;
1288             struct igmpmsg *im;
1289
1290             /* no upcall, so make a new entry */
1291             rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
1292             if (rt == NULL) {
1293                 free(rte, M_MRTABLE);
1294                 m_freem(mb0);
1295                 splx(s);
1296                 return ENOBUFS;
1297             }
1298             /* Make a copy of the header to send to the user level process */
1299             mm = m_copy(mb0, 0, hlen);
1300             if (mm == NULL) {
1301                 free(rte, M_MRTABLE);
1302                 m_freem(mb0);
1303                 free(rt, M_MRTABLE);
1304                 splx(s);
1305                 return ENOBUFS;
1306             }
1307
1308             /*
1309              * Send message to routing daemon to install
1310              * a route into the kernel table
1311              */
1312             k_igmpsrc.sin_addr = ip->ip_src;
1313
1314             im = mtod(mm, struct igmpmsg *);
1315             im->im_msgtype      = IGMPMSG_NOCACHE;
1316             im->im_mbz          = 0;
1317
1318             mrtstat.mrts_upcalls++;
1319
1320             if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) {
1321                 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n");
1322                 ++mrtstat.mrts_upq_sockfull;
1323                 free(rte, M_MRTABLE);
1324                 m_freem(mb0);
1325                 free(rt, M_MRTABLE);
1326                 splx(s);
1327                 return ENOBUFS;
1328             }
1329
1330             /* insert new entry at head of hash chain */
1331             rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
1332             rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
1333             rt->mfc_expire            = UPCALL_EXPIRE;
1334             nexpire[hash]++;
1335             for (i = 0; i < numvifs; i++)
1336                 rt->mfc_ttls[i] = 0;
1337             rt->mfc_parent = -1;
1338
1339             /* link into table */
1340             rt->mfc_next   = mfctable[hash];
1341             mfctable[hash] = rt;
1342             rt->mfc_stall = rte;
1343
1344         } else {
1345             /* determine if q has overflowed */
1346             int npkts = 0;
1347             struct rtdetq **p;
1348
1349             for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next)
1350                 npkts++;
1351
1352             if (npkts > MAX_UPQ) {
1353                 mrtstat.mrts_upq_ovflw++;
1354                 free(rte, M_MRTABLE);
1355                 m_freem(mb0);
1356                 splx(s);
1357                 return 0;
1358             }
1359
1360             /* Add this entry to the end of the queue */
1361             *p = rte;
1362         }
1363
1364         rte->m                  = mb0;
1365         rte->ifp                = ifp;
1366 #ifdef UPCALL_TIMING
1367         rte->t                  = tp;
1368 #endif
1369         rte->next               = NULL;
1370
1371         splx(s);
1372
1373         return 0;
1374     }
1375 }
1376
1377 #ifndef MROUTE_KLD
1378 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
1379                    struct ip_moptions *) = X_ip_mforward;
1380 #endif
1381
1382 /*
1383  * Clean up the cache entry if upcall is not serviced
1384  */
1385 static void
1386 expire_upcalls(void *unused)
1387 {
1388     struct rtdetq *rte;
1389     struct mfc *mfc, **nptr;
1390     int i;
1391     int s;
1392
1393     s = splnet();
1394     for (i = 0; i < MFCTBLSIZ; i++) {
1395         if (nexpire[i] == 0)
1396             continue;
1397         nptr = &mfctable[i];
1398         for (mfc = *nptr; mfc != NULL; mfc = *nptr) {
1399             /*
1400              * Skip real cache entries
1401              * Make sure it wasn't marked to not expire (shouldn't happen)
1402              * If it expires now
1403              */
1404             if (mfc->mfc_stall != NULL &&
1405                 mfc->mfc_expire != 0 &&
1406                 --mfc->mfc_expire == 0) {
1407                 if (mrtdebug & DEBUG_EXPIRE)
1408                     log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n",
1409                         (u_long)ntohl(mfc->mfc_origin.s_addr),
1410                         (u_long)ntohl(mfc->mfc_mcastgrp.s_addr));
1411                 /*
1412                  * drop all the packets
1413                  * free the mbuf with the pkt, if, timing info
1414                  */
1415                 for (rte = mfc->mfc_stall; rte; ) {
1416                     struct rtdetq *n = rte->next;
1417
1418                     m_freem(rte->m);
1419                     free(rte, M_MRTABLE);
1420                     rte = n;
1421                 }
1422                 ++mrtstat.mrts_cache_cleanups;
1423                 nexpire[i]--;
1424
1425                 *nptr = mfc->mfc_next;
1426                 free(mfc, M_MRTABLE);
1427             } else {
1428                 nptr = &mfc->mfc_next;
1429             }
1430         }
1431     }
1432     splx(s);
1433     expire_upcalls_ch = timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
1434 }
1435
1436 /*
1437  * Packet forwarding routine once entry in the cache is made
1438  */
1439 static int
1440 ip_mdq(m, ifp, rt, xmt_vif)
1441     register struct mbuf *m;
1442     register struct ifnet *ifp;
1443     register struct mfc *rt;
1444     register vifi_t xmt_vif;
1445 {
1446     register struct ip  *ip = mtod(m, struct ip *);
1447     register vifi_t vifi;
1448     register struct vif *vifp;
1449     register int plen = ip->ip_len;
1450
1451 /*
1452  * Macro to send packet on vif.  Since RSVP packets don't get counted on
1453  * input, they shouldn't get counted on output, so statistics keeping is
1454  * separate.
1455  */
1456 #define MC_SEND(ip,vifp,m) {                             \
1457                 if ((vifp)->v_flags & VIFF_TUNNEL)       \
1458                     encap_send((ip), (vifp), (m));       \
1459                 else                                     \
1460                     phyint_send((ip), (vifp), (m));      \
1461 }
1462
1463     /*
1464      * If xmt_vif is not -1, send on only the requested vif.
1465      *
1466      * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
1467      */
1468     if (xmt_vif < numvifs) {
1469         MC_SEND(ip, viftable + xmt_vif, m);
1470         return 1;
1471     }
1472
1473     /*
1474      * Don't forward if it didn't arrive from the parent vif for its origin.
1475      */
1476     vifi = rt->mfc_parent;
1477     if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1478         /* came in the wrong interface */
1479         if (mrtdebug & DEBUG_FORWARD)
1480             log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n",
1481                 (void *)ifp, vifi, (void *)viftable[vifi].v_ifp);
1482         ++mrtstat.mrts_wrong_if;
1483         ++rt->mfc_wrong_if;
1484         /*
1485          * If we are doing PIM assert processing, and we are forwarding
1486          * packets on this interface, and it is a broadcast medium
1487          * interface (and not a tunnel), send a message to the routing daemon.
1488          */
1489         if (pim_assert && rt->mfc_ttls[vifi] &&
1490                 (ifp->if_flags & IFF_BROADCAST) &&
1491                 !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1492             struct sockaddr_in k_igmpsrc;
1493             struct mbuf *mm;
1494             struct igmpmsg *im;
1495             int hlen = ip->ip_hl << 2;
1496             struct timeval now;
1497             register u_long delta;
1498
1499             GET_TIME(now);
1500
1501             TV_DELTA(rt->mfc_last_assert, now, delta);
1502
1503             if (delta > ASSERT_MSG_TIME) {
1504                 mm = m_copy(m, 0, hlen);
1505                 if (mm && (M_HASCL(mm) || mm->m_len < hlen))
1506                     mm = m_pullup(mm, hlen);
1507                 if (mm == NULL) {
1508                     return ENOBUFS;
1509                 }
1510
1511                 rt->mfc_last_assert = now;
1512
1513                 im = mtod(mm, struct igmpmsg *);
1514                 im->im_msgtype  = IGMPMSG_WRONGVIF;
1515                 im->im_mbz              = 0;
1516                 im->im_vif              = vifi;
1517
1518                 k_igmpsrc.sin_addr = im->im_src;
1519
1520                 socket_send(ip_mrouter, mm, &k_igmpsrc);
1521             }
1522         }
1523         return 0;
1524     }
1525
1526     /* If I sourced this packet, it counts as output, else it was input. */
1527     if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
1528         viftable[vifi].v_pkt_out++;
1529         viftable[vifi].v_bytes_out += plen;
1530     } else {
1531         viftable[vifi].v_pkt_in++;
1532         viftable[vifi].v_bytes_in += plen;
1533     }
1534     rt->mfc_pkt_cnt++;
1535     rt->mfc_byte_cnt += plen;
1536
1537     /*
1538      * For each vif, decide if a copy of the packet should be forwarded.
1539      * Forward if:
1540      *          - the ttl exceeds the vif's threshold
1541      *          - there are group members downstream on interface
1542      */
1543     for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1544         if ((rt->mfc_ttls[vifi] > 0) &&
1545             (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1546             vifp->v_pkt_out++;
1547             vifp->v_bytes_out += plen;
1548             MC_SEND(ip, vifp, m);
1549         }
1550
1551     return 0;
1552 }
1553
1554 /*
1555  * check if a vif number is legal/ok. This is used by ip_output, to export
1556  * numvifs there,
1557  */
1558 static int
1559 X_legal_vif_num(vif)
1560     int vif;
1561 {
1562     if (vif >= 0 && vif < numvifs)
1563        return(1);
1564     else
1565        return(0);
1566 }
1567
1568 #ifndef MROUTE_KLD
1569 int (*legal_vif_num)(int) = X_legal_vif_num;
1570 #endif
1571
1572 /*
1573  * Return the local address used by this vif
1574  */
1575 static u_long
1576 X_ip_mcast_src(vifi)
1577     int vifi;
1578 {
1579     if (vifi >= 0 && vifi < numvifs)
1580         return viftable[vifi].v_lcl_addr.s_addr;
1581     else
1582         return INADDR_ANY;
1583 }
1584
1585 #ifndef MROUTE_KLD
1586 u_long (*ip_mcast_src)(int) = X_ip_mcast_src;
1587 #endif
1588
1589 static void
1590 phyint_send(ip, vifp, m)
1591     struct ip *ip;
1592     struct vif *vifp;
1593     struct mbuf *m;
1594 {
1595     register struct mbuf *mb_copy;
1596     register int hlen = ip->ip_hl << 2;
1597
1598     /*
1599      * Make a new reference to the packet; make sure that
1600      * the IP header is actually copied, not just referenced,
1601      * so that ip_output() only scribbles on the copy.
1602      */
1603     mb_copy = m_copy(m, 0, M_COPYALL);
1604     if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
1605         mb_copy = m_pullup(mb_copy, hlen);
1606     if (mb_copy == NULL)
1607         return;
1608
1609     if (vifp->v_rate_limit == 0)
1610         tbf_send_packet(vifp, mb_copy);
1611     else
1612         tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len);
1613 }
1614
1615 static void
1616 encap_send(ip, vifp, m)
1617     register struct ip *ip;
1618     register struct vif *vifp;
1619     register struct mbuf *m;
1620 {
1621     register struct mbuf *mb_copy;
1622     register struct ip *ip_copy;
1623     register int i, len = ip->ip_len;
1624
1625     /*
1626      * copy the old packet & pullup its IP header into the
1627      * new mbuf so we can modify it.  Try to fill the new
1628      * mbuf since if we don't the ethernet driver will.
1629      */
1630     MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER);
1631     if (mb_copy == NULL)
1632         return;
1633     mb_copy->m_data += max_linkhdr;
1634     mb_copy->m_len = sizeof(multicast_encap_iphdr);
1635
1636     if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1637         m_freem(mb_copy);
1638         return;
1639     }
1640     i = MHLEN - M_LEADINGSPACE(mb_copy);
1641     if (i > len)
1642         i = len;
1643     mb_copy = m_pullup(mb_copy, i);
1644     if (mb_copy == NULL)
1645         return;
1646     mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr);
1647
1648     /*
1649      * fill in the encapsulating IP header.
1650      */
1651     ip_copy = mtod(mb_copy, struct ip *);
1652     *ip_copy = multicast_encap_iphdr;
1653 #ifdef RANDOM_IP_ID
1654     ip_copy->ip_id = ip_randomid();
1655 #else
1656     ip_copy->ip_id = htons(ip_id++);
1657 #endif
1658     ip_copy->ip_len += len;
1659     ip_copy->ip_src = vifp->v_lcl_addr;
1660     ip_copy->ip_dst = vifp->v_rmt_addr;
1661
1662     /*
1663      * turn the encapsulated IP header back into a valid one.
1664      */
1665     ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1666     --ip->ip_ttl;
1667     ip->ip_len = htons(ip->ip_len);
1668     ip->ip_off = htons(ip->ip_off);
1669     ip->ip_sum = 0;
1670     mb_copy->m_data += sizeof(multicast_encap_iphdr);
1671     ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1672     mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1673
1674     if (vifp->v_rate_limit == 0)
1675         tbf_send_packet(vifp, mb_copy);
1676     else
1677         tbf_control(vifp, mb_copy, ip, ip_copy->ip_len);
1678 }
1679
1680 /*
1681  * Token bucket filter module
1682  */
1683
1684 static void
1685 tbf_control(vifp, m, ip, p_len)
1686         register struct vif *vifp;
1687         register struct mbuf *m;
1688         register struct ip *ip;
1689         register u_long p_len;
1690 {
1691     register struct tbf *t = vifp->v_tbf;
1692
1693     if (p_len > MAX_BKT_SIZE) {
1694         /* drop if packet is too large */
1695         mrtstat.mrts_pkt2large++;
1696         m_freem(m);
1697         return;
1698     }
1699
1700     tbf_update_tokens(vifp);
1701
1702     /* if there are enough tokens,
1703      * and the queue is empty,
1704      * send this packet out
1705      */
1706
1707     if (t->tbf_q_len == 0) {
1708         /* queue empty, send packet if enough tokens */
1709         if (p_len <= t->tbf_n_tok) {
1710             t->tbf_n_tok -= p_len;
1711             tbf_send_packet(vifp, m);
1712         } else {
1713             /* queue packet and timeout till later */
1714             tbf_queue(vifp, m);
1715             timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
1716         }
1717     } else if (t->tbf_q_len < t->tbf_max_q_len) {
1718         /* finite queue length, so queue pkts and process queue */
1719         tbf_queue(vifp, m);
1720         tbf_process_q(vifp);
1721     } else {
1722         /* queue length too much, try to dq and queue and process */
1723         if (!tbf_dq_sel(vifp, ip)) {
1724             mrtstat.mrts_q_overflow++;
1725             m_freem(m);
1726             return;
1727         } else {
1728             tbf_queue(vifp, m);
1729             tbf_process_q(vifp);
1730         }
1731     }
1732     return;
1733 }
1734
1735 /*
1736  * adds a packet to the queue at the interface
1737  */
1738 static void
1739 tbf_queue(vifp, m)
1740         register struct vif *vifp;
1741         register struct mbuf *m;
1742 {
1743     register int s = splnet();
1744     register struct tbf *t = vifp->v_tbf;
1745
1746     if (t->tbf_t == NULL) {
1747         /* Queue was empty */
1748         t->tbf_q = m;
1749     } else {
1750         /* Insert at tail */
1751         t->tbf_t->m_act = m;
1752     }
1753
1754     /* Set new tail pointer */
1755     t->tbf_t = m;
1756
1757 #ifdef DIAGNOSTIC
1758     /* Make sure we didn't get fed a bogus mbuf */
1759     if (m->m_act)
1760         panic("tbf_queue: m_act");
1761 #endif
1762     m->m_act = NULL;
1763
1764     t->tbf_q_len++;
1765
1766     splx(s);
1767 }
1768
1769
1770 /*
1771  * processes the queue at the interface
1772  */
1773 static void
1774 tbf_process_q(vifp)
1775     register struct vif *vifp;
1776 {
1777     register struct mbuf *m;
1778     register int len;
1779     register int s = splnet();
1780     register struct tbf *t = vifp->v_tbf;
1781
1782     /* loop through the queue at the interface and send as many packets
1783      * as possible
1784      */
1785     while (t->tbf_q_len > 0) {
1786         m = t->tbf_q;
1787
1788         len = mtod(m, struct ip *)->ip_len;
1789
1790         /* determine if the packet can be sent */
1791         if (len <= t->tbf_n_tok) {
1792             /* if so,
1793              * reduce no of tokens, dequeue the packet,
1794              * send the packet.
1795              */
1796             t->tbf_n_tok -= len;
1797
1798             t->tbf_q = m->m_act;
1799             if (--t->tbf_q_len == 0)
1800                 t->tbf_t = NULL;
1801
1802             m->m_act = NULL;
1803             tbf_send_packet(vifp, m);
1804
1805         } else break;
1806     }
1807     splx(s);
1808 }
1809
1810 static void
1811 tbf_reprocess_q(xvifp)
1812         void *xvifp;
1813 {
1814     register struct vif *vifp = xvifp;
1815     if (ip_mrouter == NULL)
1816         return;
1817
1818     tbf_update_tokens(vifp);
1819
1820     tbf_process_q(vifp);
1821
1822     if (vifp->v_tbf->tbf_q_len)
1823         timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
1824 }
1825
1826 /* function that will selectively discard a member of the queue
1827  * based on the precedence value and the priority
1828  */
1829 static int
1830 tbf_dq_sel(vifp, ip)
1831     register struct vif *vifp;
1832     register struct ip *ip;
1833 {
1834     register int s = splnet();
1835     register u_int p;
1836     register struct mbuf *m, *last;
1837     register struct mbuf **np;
1838     register struct tbf *t = vifp->v_tbf;
1839
1840     p = priority(vifp, ip);
1841
1842     np = &t->tbf_q;
1843     last = NULL;
1844     while ((m = *np) != NULL) {
1845         if (p > priority(vifp, mtod(m, struct ip *))) {
1846             *np = m->m_act;
1847             /* If we're removing the last packet, fix the tail pointer */
1848             if (m == t->tbf_t)
1849                 t->tbf_t = last;
1850             m_freem(m);
1851             /* it's impossible for the queue to be empty, but
1852              * we check anyway. */
1853             if (--t->tbf_q_len == 0)
1854                 t->tbf_t = NULL;
1855             splx(s);
1856             mrtstat.mrts_drop_sel++;
1857             return(1);
1858         }
1859         np = &m->m_act;
1860         last = m;
1861     }
1862     splx(s);
1863     return(0);
1864 }
1865
1866 static void
1867 tbf_send_packet(vifp, m)
1868     register struct vif *vifp;
1869     register struct mbuf *m;
1870 {
1871     struct ip_moptions imo;
1872     int error;
1873     static struct route ro;
1874     int s = splnet();
1875
1876     if (vifp->v_flags & VIFF_TUNNEL) {
1877         /* If tunnel options */
1878         ip_output(m, (struct mbuf *)0, &vifp->v_route,
1879                   IP_FORWARDING, (struct ip_moptions *)0);
1880     } else {
1881         imo.imo_multicast_ifp  = vifp->v_ifp;
1882         imo.imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
1883         imo.imo_multicast_loop = 1;
1884         imo.imo_multicast_vif  = -1;
1885
1886         /*
1887          * Re-entrancy should not be a problem here, because
1888          * the packets that we send out and are looped back at us
1889          * should get rejected because they appear to come from
1890          * the loopback interface, thus preventing looping.
1891          */
1892         error = ip_output(m, (struct mbuf *)0, &ro,
1893                           IP_FORWARDING, &imo);
1894
1895         if (mrtdebug & DEBUG_XMIT)
1896             log(LOG_DEBUG, "phyint_send on vif %d err %d\n",
1897                 vifp - viftable, error);
1898     }
1899     splx(s);
1900 }
1901
1902 /* determine the current time and then
1903  * the elapsed time (between the last time and time now)
1904  * in milliseconds & update the no. of tokens in the bucket
1905  */
1906 static void
1907 tbf_update_tokens(vifp)
1908     register struct vif *vifp;
1909 {
1910     struct timeval tp;
1911     register u_long tm;
1912     register int s = splnet();
1913     register struct tbf *t = vifp->v_tbf;
1914
1915     GET_TIME(tp);
1916
1917     TV_DELTA(tp, t->tbf_last_pkt_t, tm);
1918
1919     /*
1920      * This formula is actually
1921      * "time in seconds" * "bytes/second".
1922      *
1923      * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)
1924      *
1925      * The (1000/1024) was introduced in add_vif to optimize
1926      * this divide into a shift.
1927      */
1928     t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8;
1929     t->tbf_last_pkt_t = tp;
1930
1931     if (t->tbf_n_tok > MAX_BKT_SIZE)
1932         t->tbf_n_tok = MAX_BKT_SIZE;
1933
1934     splx(s);
1935 }
1936
1937 static int
1938 priority(vifp, ip)
1939     register struct vif *vifp;
1940     register struct ip *ip;
1941 {
1942     register int prio;
1943
1944     /* temporary hack; may add general packet classifier some day */
1945
1946     /*
1947      * The UDP port space is divided up into four priority ranges:
1948      * [0, 16384)     : unclassified - lowest priority
1949      * [16384, 32768) : audio - highest priority
1950      * [32768, 49152) : whiteboard - medium priority
1951      * [49152, 65536) : video - low priority
1952      */
1953     if (ip->ip_p == IPPROTO_UDP) {
1954         struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1955         switch (ntohs(udp->uh_dport) & 0xc000) {
1956             case 0x4000:
1957                 prio = 70;
1958                 break;
1959             case 0x8000:
1960                 prio = 60;
1961                 break;
1962             case 0xc000:
1963                 prio = 55;
1964                 break;
1965             default:
1966                 prio = 50;
1967                 break;
1968         }
1969         if (tbfdebug > 1)
1970                 log(LOG_DEBUG, "port %x prio%d\n", ntohs(udp->uh_dport), prio);
1971     } else {
1972             prio = 50;
1973     }
1974     return prio;
1975 }
1976
1977 /*
1978  * End of token bucket filter modifications
1979  */
1980
1981 int
1982 ip_rsvp_vif_init(so, sopt)
1983         struct socket *so;
1984         struct sockopt *sopt;
1985 {
1986     int error, i, s;
1987
1988     if (rsvpdebug)
1989         printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
1990                so->so_type, so->so_proto->pr_protocol);
1991
1992     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1993         return EOPNOTSUPP;
1994
1995     /* Check mbuf. */
1996     error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1997     if (error)
1998             return (error);
1999
2000     if (rsvpdebug)
2001         printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n", i, rsvp_on);
2002
2003     s = splnet();
2004
2005     /* Check vif. */
2006     if (!legal_vif_num(i)) {
2007         splx(s);
2008         return EADDRNOTAVAIL;
2009     }
2010
2011     /* Check if socket is available. */
2012     if (viftable[i].v_rsvpd != NULL) {
2013         splx(s);
2014         return EADDRINUSE;
2015     }
2016
2017     viftable[i].v_rsvpd = so;
2018     /* This may seem silly, but we need to be sure we don't over-increment
2019      * the RSVP counter, in case something slips up.
2020      */
2021     if (!viftable[i].v_rsvp_on) {
2022         viftable[i].v_rsvp_on = 1;
2023         rsvp_on++;
2024     }
2025
2026     splx(s);
2027     return 0;
2028 }
2029
2030 int
2031 ip_rsvp_vif_done(so, sopt)
2032         struct socket *so;
2033         struct sockopt *sopt;
2034 {
2035         int error, i, s;
2036
2037         if (rsvpdebug)
2038                 printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
2039                        so->so_type, so->so_proto->pr_protocol);
2040
2041         if (so->so_type != SOCK_RAW ||
2042             so->so_proto->pr_protocol != IPPROTO_RSVP)
2043                 return EOPNOTSUPP;
2044
2045         error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2046         if (error)
2047                 return (error);
2048
2049         s = splnet();
2050
2051         /* Check vif. */
2052         if (!legal_vif_num(i)) {
2053                 splx(s);
2054                 return EADDRNOTAVAIL;
2055         }
2056
2057         if (rsvpdebug)
2058                 printf("ip_rsvp_vif_done: v_rsvpd = %p so = %p\n",
2059                        viftable[i].v_rsvpd, so);
2060
2061         /*
2062          * XXX as an additional consistency check, one could make sure
2063          * that viftable[i].v_rsvpd == so, otherwise passing so as
2064          * first parameter is pretty useless.
2065          */
2066         viftable[i].v_rsvpd = NULL;
2067         /*
2068          * This may seem silly, but we need to be sure we don't over-decrement
2069          * the RSVP counter, in case something slips up.
2070          */
2071         if (viftable[i].v_rsvp_on) {
2072                 viftable[i].v_rsvp_on = 0;
2073                 rsvp_on--;
2074         }
2075
2076         splx(s);
2077         return 0;
2078 }
2079
2080 void
2081 ip_rsvp_force_done(so)
2082     struct socket *so;
2083 {
2084     int vifi;
2085     register int s;
2086
2087     /* Don't bother if it is not the right type of socket. */
2088     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
2089         return;
2090
2091     s = splnet();
2092
2093     /* The socket may be attached to more than one vif...this
2094      * is perfectly legal.
2095      */
2096     for (vifi = 0; vifi < numvifs; vifi++) {
2097         if (viftable[vifi].v_rsvpd == so) {
2098             viftable[vifi].v_rsvpd = NULL;
2099             /* This may seem silly, but we need to be sure we don't
2100              * over-decrement the RSVP counter, in case something slips up.
2101              */
2102             if (viftable[vifi].v_rsvp_on) {
2103                 viftable[vifi].v_rsvp_on = 0;
2104                 rsvp_on--;
2105             }
2106         }
2107     }
2108
2109     splx(s);
2110     return;
2111 }
2112
2113 void
2114 rsvp_input(m, off)
2115         struct mbuf *m;
2116         int off;
2117 {
2118     int vifi;
2119     register struct ip *ip = mtod(m, struct ip *);
2120     static struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET };
2121     register int s;
2122     struct ifnet *ifp;
2123
2124     if (rsvpdebug)
2125         printf("rsvp_input: rsvp_on %d\n",rsvp_on);
2126
2127     /* Can still get packets with rsvp_on = 0 if there is a local member
2128      * of the group to which the RSVP packet is addressed.  But in this
2129      * case we want to throw the packet away.
2130      */
2131     if (!rsvp_on) {
2132         m_freem(m);
2133         return;
2134     }
2135
2136     s = splnet();
2137
2138     if (rsvpdebug)
2139         printf("rsvp_input: check vifs\n");
2140
2141 #ifdef DIAGNOSTIC
2142     if (!(m->m_flags & M_PKTHDR))
2143             panic("rsvp_input no hdr");
2144 #endif
2145
2146     ifp = m->m_pkthdr.rcvif;
2147     /* Find which vif the packet arrived on. */
2148     for (vifi = 0; vifi < numvifs; vifi++)
2149         if (viftable[vifi].v_ifp == ifp)
2150             break;
2151
2152     if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) {
2153         /*
2154          * If the old-style non-vif-associated socket is set,
2155          * then use it.  Otherwise, drop packet since there
2156          * is no specific socket for this vif.
2157          */
2158         if (ip_rsvpd != NULL) {
2159             if (rsvpdebug)
2160                 printf("rsvp_input: Sending packet up old-style socket\n");
2161             rip_input(m, off);  /* xxx */
2162         } else {
2163             if (rsvpdebug && vifi == numvifs)
2164                 printf("rsvp_input: Can't find vif for packet.\n");
2165             else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL)
2166                 printf("rsvp_input: No socket defined for vif %d\n",vifi);
2167             m_freem(m);
2168         }
2169         splx(s);
2170         return;
2171     }
2172     rsvp_src.sin_addr = ip->ip_src;
2173
2174     if (rsvpdebug && m)
2175         printf("rsvp_input: m->m_len = %d, sbspace() = %ld\n",
2176                m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv)));
2177
2178     if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) {
2179         if (rsvpdebug)
2180             printf("rsvp_input: Failed to append to socket\n");
2181     } else {
2182         if (rsvpdebug)
2183             printf("rsvp_input: send packet up\n");
2184     }
2185
2186     splx(s);
2187 }
2188
2189 #ifdef MROUTE_KLD
2190
2191 static int
2192 ip_mroute_modevent(module_t mod, int type, void *unused)
2193 {
2194         int s;
2195
2196         switch (type) {
2197                 static u_long (*old_ip_mcast_src)(int);
2198                 static int (*old_ip_mrouter_set)(struct socket *,
2199                         struct sockopt *);
2200                 static int (*old_ip_mrouter_get)(struct socket *,
2201                         struct sockopt *);
2202                 static int (*old_ip_mrouter_done)(void);
2203                 static int (*old_ip_mforward)(struct ip *, struct ifnet *,
2204                         struct mbuf *, struct ip_moptions *);
2205                 static int (*old_mrt_ioctl)(int, caddr_t);
2206                 static int (*old_legal_vif_num)(int);
2207
2208         case MOD_LOAD:
2209                 s = splnet();
2210                 /* XXX Protect against multiple loading */
2211                 old_ip_mcast_src = ip_mcast_src;
2212                 ip_mcast_src = X_ip_mcast_src;
2213                 old_ip_mrouter_get = ip_mrouter_get;
2214                 ip_mrouter_get = X_ip_mrouter_get;
2215                 old_ip_mrouter_set = ip_mrouter_set;
2216                 ip_mrouter_set = X_ip_mrouter_set;
2217                 old_ip_mrouter_done = ip_mrouter_done;
2218                 ip_mrouter_done = X_ip_mrouter_done;
2219                 old_ip_mforward = ip_mforward;
2220                 ip_mforward = X_ip_mforward;
2221                 old_mrt_ioctl = mrt_ioctl;
2222                 mrt_ioctl = X_mrt_ioctl;
2223                 old_legal_vif_num = legal_vif_num;
2224                 legal_vif_num = X_legal_vif_num;
2225
2226                 splx(s);
2227                 return 0;
2228
2229         case MOD_UNLOAD:
2230                 if (ip_mrouter)
2231                   return EINVAL;
2232
2233                 s = splnet();
2234                 ip_mrouter_get = old_ip_mrouter_get;
2235                 ip_mrouter_set = old_ip_mrouter_set;
2236                 ip_mrouter_done = old_ip_mrouter_done;
2237                 ip_mforward = old_ip_mforward;
2238                 mrt_ioctl = old_mrt_ioctl;
2239                 legal_vif_num = old_legal_vif_num;
2240                 splx(s);
2241                 return 0;
2242
2243         default:
2244                 break;
2245         }
2246         return 0;
2247 }
2248
2249 static moduledata_t ip_mroutemod = {
2250         "ip_mroute",
2251         ip_mroute_modevent,
2252         0
2253 };
2254 DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY);
2255
2256 #endif /* MROUTE_KLD */
2257 #endif /* MROUTING */