sys/net/bpf.c

   1 /*
   2  * Copyright (c) 1990, 1991, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * This code is derived from the Stanford/CMU enet packet filter,
   6  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
   7  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
   8  * Berkeley Laboratory.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 3. All advertising materials mentioning features or use of this software
  19  *    must display the following acknowledgement:
  20  *      This product includes software developed by the University of
  21  *      California, Berkeley and its contributors.
  22  * 4. Neither the name of the University nor the names of its contributors
  23  *    may be used to endorse or promote products derived from this software
  24  *    without specific prior written permission.
  25  *
  26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36  * SUCH DAMAGE.
  37  *
  38  *      @(#)bpf.c       8.4 (Berkeley) 1/9/95
  39  *
  40  * $FreeBSD$
  41  */
  42
  43 #include "opt_bpf.h"
  44 #include "opt_netgraph.h"
  45
  46 #include <sys/param.h>
  47 #include <sys/systm.h>
  48 #include <sys/conf.h>
  49 #include <sys/malloc.h>
  50 #include <sys/mbuf.h>
  51 #include <sys/time.h>
  52 #include <sys/proc.h>
  53 #include <sys/signalvar.h>
  54 #include <sys/filio.h>
  55 #include <sys/sockio.h>
  56 #include <sys/ttycom.h>
  57 #include <sys/filedesc.h>
  58
  59 #include <sys/poll.h>
  60
  61 #include <sys/socket.h>
  62 #include <sys/vnode.h>
  63
  64 #include <net/if.h>
  65 #include <net/bpf.h>
  66 #include <net/bpfdesc.h>
  67
  68 #include <netinet/in.h>
  69 #include <netinet/if_ether.h>
  70 #include <sys/kernel.h>
  71 #include <sys/sysctl.h>
  72
  73 static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
  74
  75 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
  76
  77 #define PRINET  26                      /* interruptible */
  78
  79 /*
  80  * The default read buffer size is patchable.
  81  */
  82 static int bpf_bufsize = 4096;
  83 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW,
  84         &bpf_bufsize, 0, "");
  85 static int bpf_maxbufsize = BPF_MAXBUFSIZE;
  86 SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW,
  87         &bpf_maxbufsize, 0, "");
  88
  89 /*
  90  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
  91  */
  92 static struct bpf_if    *bpf_iflist;
  93 static struct mtx       bpf_mtx;                /* bpf global lock */
  94
  95 static int      bpf_allocbufs __P((struct bpf_d *));
  96 static void     bpf_attachd __P((struct bpf_d *d, struct bpf_if *bp));
  97 static void     bpf_detachd __P((struct bpf_d *d));
  98 static void     bpf_freed __P((struct bpf_d *));
  99 static void     bpf_mcopy __P((const void *, void *, size_t));
 100 static int      bpf_movein __P((struct uio *, int,
 101                     struct mbuf **, struct sockaddr *, int *));
 102 static int      bpf_setif __P((struct bpf_d *, struct ifreq *));
 103 static __inline void
 104                 bpf_wakeup __P((struct bpf_d *));
 105 static void     catchpacket __P((struct bpf_d *, u_char *, u_int,
 106                     u_int, void (*)(const void *, void *, size_t)));
 107 static void     reset_d __P((struct bpf_d *));
 108 static int       bpf_setf __P((struct bpf_d *, struct bpf_program *));
 109
 110 static  d_open_t        bpfopen;
 111 static  d_close_t       bpfclose;
 112 static  d_read_t        bpfread;
 113 static  d_write_t       bpfwrite;
 114 static  d_ioctl_t       bpfioctl;
 115 static  d_poll_t        bpfpoll;
 116
 117 #define CDEV_MAJOR 23
 118 static struct cdevsw bpf_cdevsw = {
 119         /* open */      bpfopen,
 120         /* close */     bpfclose,
 121         /* read */      bpfread,
 122         /* write */     bpfwrite,
 123         /* ioctl */     bpfioctl,
 124         /* poll */      bpfpoll,
 125         /* mmap */      nommap,
 126         /* strategy */  nostrategy,
 127         /* name */      "bpf",
 128         /* maj */       CDEV_MAJOR,
 129         /* dump */      nodump,
 130         /* psize */     nopsize,
 131         /* flags */     0,
 132 };
 133
 134
 135 static int
 136 bpf_movein(uio, linktype, mp, sockp, datlen)
 137         register struct uio *uio;
 138         int linktype, *datlen;
 139         register struct mbuf **mp;
 140         register struct sockaddr *sockp;
 141 {
 142         struct mbuf *m;
 143         int error;
 144         int len;
 145         int hlen;
 146
 147         /*
 148          * Build a sockaddr based on the data link layer type.
 149          * We do this at this level because the ethernet header
 150          * is copied directly into the data field of the sockaddr.
 151          * In the case of SLIP, there is no header and the packet
 152          * is forwarded as is.
 153          * Also, we are careful to leave room at the front of the mbuf
 154          * for the link level header.
 155          */
 156         switch (linktype) {
 157
 158         case DLT_SLIP:
 159                 sockp->sa_family = AF_INET;
 160                 hlen = 0;
 161                 break;
 162
 163         case DLT_EN10MB:
 164                 sockp->sa_family = AF_UNSPEC;
 165                 /* XXX Would MAXLINKHDR be better? */
 166                 hlen = sizeof(struct ether_header);
 167                 break;
 168
 169         case DLT_FDDI:
 170                 sockp->sa_family = AF_IMPLINK;
 171                 hlen = 0;
 172                 break;
 173
 174         case DLT_RAW:
 175         case DLT_NULL:
 176                 sockp->sa_family = AF_UNSPEC;
 177                 hlen = 0;
 178                 break;
 179
 180         case DLT_ATM_RFC1483:
 181                 /*
 182                  * en atm driver requires 4-byte atm pseudo header.
 183                  * though it isn't standard, vpi:vci needs to be
 184                  * specified anyway.
 185                  */
 186                 sockp->sa_family = AF_UNSPEC;
 187                 hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
 188                 break;
 189
 190         case DLT_PPP:
 191                 sockp->sa_family = AF_UNSPEC;
 192                 hlen = 4;       /* This should match PPP_HDRLEN */
 193                 break;
 194
 195         default:
 196                 return (EIO);
 197         }
 198
 199         len = uio->uio_resid;
 200         *datlen = len - hlen;
 201         if ((unsigned)len > MCLBYTES)
 202                 return (EIO);
 203
 204         MGETHDR(m, M_TRYWAIT, MT_DATA);
 205         if (m == 0)
 206                 return (ENOBUFS);
 207         if (len > MHLEN) {
 208                 MCLGET(m, M_TRYWAIT);
 209                 if ((m->m_flags & M_EXT) == 0) {
 210                         error = ENOBUFS;
 211                         goto bad;
 212                 }
 213         }
 214         m->m_pkthdr.len = m->m_len = len;
 215         m->m_pkthdr.rcvif = NULL;
 216         *mp = m;
 217         /*
 218          * Make room for link header.
 219          */
 220         if (hlen != 0) {
 221                 m->m_pkthdr.len -= hlen;
 222                 m->m_len -= hlen;
 223 #if BSD >= 199103
 224                 m->m_data += hlen; /* XXX */
 225 #else
 226                 m->m_off += hlen;
 227 #endif
 228                 error = uiomove((caddr_t)sockp->sa_data, hlen, uio);
 229                 if (error)
 230                         goto bad;
 231         }
 232         error = uiomove(mtod(m, caddr_t), len - hlen, uio);
 233         if (!error)
 234                 return (0);
 235  bad:
 236         m_freem(m);
 237         return (error);
 238 }
 239
 240 /*
 241  * Attach file to the bpf interface, i.e. make d listen on bp.
 242  */
 243 static void
 244 bpf_attachd(d, bp)
 245         struct bpf_d *d;
 246         struct bpf_if *bp;
 247 {
 248         /*
 249          * Point d at bp, and add d to the interface's list of listeners.
 250          * Finally, point the driver's bpf cookie at the interface so
 251          * it will divert packets to bpf.
 252          */
 253         BPFIF_LOCK(bp);
 254         d->bd_bif = bp;
 255         d->bd_next = bp->bif_dlist;
 256         bp->bif_dlist = d;
 257
 258         bp->bif_ifp->if_bpf = bp;
 259         BPFIF_UNLOCK(bp);
 260 }
 261
 262 /*
 263  * Detach a file from its interface.
 264  */
 265 static void
 266 bpf_detachd(d)
 267         struct bpf_d *d;
 268 {
 269         int error;
 270         struct bpf_d **p;
 271         struct bpf_if *bp;
 272
 273         bp = d->bd_bif;
 274         /*
 275          * Check if this descriptor had requested promiscuous mode.
 276          * If so, turn it off.
 277          */
 278         if (d->bd_promisc) {
 279                 d->bd_promisc = 0;
 280                 error = ifpromisc(bp->bif_ifp, 0);
 281                 if (error != 0 && error != ENXIO) {
 282                         /*
 283                          * ENXIO can happen if a pccard is unplugged
 284                          * Something is really wrong if we were able to put
 285                          * the driver into promiscuous mode, but can't
 286                          * take it out.
 287                          */
 288                         printf("%s%d: ifpromisc failed %d\n",
 289                             bp->bif_ifp->if_name, bp->bif_ifp->if_unit, error);
 290                 }
 291         }
 292         /* Remove d from the interface's descriptor list. */
 293         BPFIF_LOCK(bp);
 294         p = &bp->bif_dlist;
 295         while (*p != d) {
 296                 p = &(*p)->bd_next;
 297                 if (*p == 0)
 298                         panic("bpf_detachd: descriptor not in list");
 299         }
 300         *p = (*p)->bd_next;
 301         if (bp->bif_dlist == 0)
 302                 /*
 303                  * Let the driver know that there are no more listeners.
 304                  */
 305                 d->bd_bif->bif_ifp->if_bpf = 0;
 306         BPFIF_UNLOCK(bp);
 307         d->bd_bif = 0;
 308 }
 309
 310 /*
 311  * Open ethernet device.  Returns ENXIO for illegal minor device number,
 312  * EBUSY if file is open by another process.
 313  */
 314 /* ARGSUSED */
 315 static  int
 316 bpfopen(dev, flags, fmt, td)
 317         dev_t dev;
 318         int flags;
 319         int fmt;
 320         struct thread *td;
 321 {
 322         struct bpf_d *d;
 323
 324         mtx_lock(&bpf_mtx);
 325         d = dev->si_drv1;
 326         /*
 327          * Each minor can be opened by only one process.  If the requested
 328          * minor is in use, return EBUSY.
 329          */
 330         if (d) {
 331                 mtx_unlock(&bpf_mtx);
 332                 return (EBUSY);
 333         }
 334         dev->si_drv1 = (struct bpf_d *)~0;      /* mark device in use */
 335         mtx_unlock(&bpf_mtx);
 336
 337         if ((dev->si_flags & SI_NAMED) == 0)
 338                 make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
 339                     "bpf%d", dev2unit(dev));
 340         MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
 341         dev->si_drv1 = d;
 342         d->bd_bufsize = bpf_bufsize;
 343         d->bd_sig = SIGIO;
 344         d->bd_seesent = 1;
 345         mtx_init(&d->bd_mtx, devtoname(dev), MTX_DEF);
 346
 347         return (0);
 348 }
 349
 350 /*
 351  * Close the descriptor by detaching it from its interface,
 352  * deallocating its buffers, and marking it free.
 353  */
 354 /* ARGSUSED */
 355 static  int
 356 bpfclose(dev, flags, fmt, td)
 357         dev_t dev;
 358         int flags;
 359         int fmt;
 360         struct thread *td;
 361 {
 362         struct bpf_d *d = dev->si_drv1;
 363
 364         funsetown(d->bd_sigio);
 365         mtx_lock(&bpf_mtx);
 366         if (d->bd_bif)
 367                 bpf_detachd(d);
 368         mtx_unlock(&bpf_mtx);
 369         bpf_freed(d);
 370         dev->si_drv1 = 0;
 371         FREE(d, M_BPF);
 372
 373         return (0);
 374 }
 375
 376
 377 /*
 378  * Rotate the packet buffers in descriptor d.  Move the store buffer
 379  * into the hold slot, and the free buffer into the store slot.
 380  * Zero the length of the new store buffer.
 381  */
 382 #define ROTATE_BUFFERS(d) \
 383         (d)->bd_hbuf = (d)->bd_sbuf; \
 384         (d)->bd_hlen = (d)->bd_slen; \
 385         (d)->bd_sbuf = (d)->bd_fbuf; \
 386         (d)->bd_slen = 0; \
 387         (d)->bd_fbuf = 0;
 388 /*
 389  *  bpfread - read next chunk of packets from buffers
 390  */
 391 static  int
 392 bpfread(dev, uio, ioflag)
 393         dev_t dev;
 394         register struct uio *uio;
 395         int ioflag;
 396 {
 397         struct bpf_d *d = dev->si_drv1;
 398         int error;
 399
 400         /*
 401          * Restrict application to use a buffer the same size as
 402          * as kernel buffers.
 403          */
 404         if (uio->uio_resid != d->bd_bufsize)
 405                 return (EINVAL);
 406
 407         BPFD_LOCK(d);
 408         /*
 409          * If the hold buffer is empty, then do a timed sleep, which
 410          * ends when the timeout expires or when enough packets
 411          * have arrived to fill the store buffer.
 412          */
 413         while (d->bd_hbuf == 0) {
 414                 if (d->bd_immediate && d->bd_slen != 0) {
 415                         /*
 416                          * A packet(s) either arrived since the previous
 417                          * read or arrived while we were asleep.
 418                          * Rotate the buffers and return what's here.
 419                          */
 420                         ROTATE_BUFFERS(d);
 421                         break;
 422                 }
 423
 424                 /*
 425                  * No data is available, check to see if the bpf device
 426                  * is still pointed at a real interface.  If not, return
 427                  * ENXIO so that the userland process knows to rebind
 428                  * it before using it again.
 429                  */
 430                 if (d->bd_bif == NULL) {
 431                         BPFD_UNLOCK(d);
 432                         return (ENXIO);
 433                 }
 434
 435                 if (ioflag & IO_NDELAY) {
 436                         BPFD_UNLOCK(d);
 437                         return (EWOULDBLOCK);
 438                 }
 439                 error = msleep((caddr_t)d, &d->bd_mtx, PRINET|PCATCH,
 440                      "bpf", d->bd_rtout);
 441                 if (error == EINTR || error == ERESTART) {
 442                         BPFD_UNLOCK(d);
 443                         return (error);
 444                 }
 445                 if (error == EWOULDBLOCK) {
 446                         /*
 447                          * On a timeout, return what's in the buffer,
 448                          * which may be nothing.  If there is something
 449                          * in the store buffer, we can rotate the buffers.
 450                          */
 451                         if (d->bd_hbuf)
 452                                 /*
 453                                  * We filled up the buffer in between
 454                                  * getting the timeout and arriving
 455                                  * here, so we don't need to rotate.
 456                                  */
 457                                 break;
 458
 459                         if (d->bd_slen == 0) {
 460                                 BPFD_UNLOCK(d);
 461                                 return (0);
 462                         }
 463                         ROTATE_BUFFERS(d);
 464                         break;
 465                 }
 466         }
 467         /*
 468          * At this point, we know we have something in the hold slot.
 469          */
 470         BPFD_UNLOCK(d);
 471
 472         /*
 473          * Move data from hold buffer into user space.
 474          * We know the entire buffer is transferred since
 475          * we checked above that the read buffer is bpf_bufsize bytes.
 476          */
 477         error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
 478
 479         BPFD_LOCK(d);
 480         d->bd_fbuf = d->bd_hbuf;
 481         d->bd_hbuf = 0;
 482         d->bd_hlen = 0;
 483         BPFD_UNLOCK(d);
 484
 485         return (error);
 486 }
 487
 488
 489 /*
 490  * If there are processes sleeping on this descriptor, wake them up.
 491  */
 492 static __inline void
 493 bpf_wakeup(d)
 494         register struct bpf_d *d;
 495 {
 496         wakeup((caddr_t)d);
 497         if (d->bd_async && d->bd_sig && d->bd_sigio)
 498                 pgsigio(d->bd_sigio, d->bd_sig, 0);
 499
 500         selwakeup(&d->bd_sel);
 501         /* XXX */
 502         d->bd_sel.si_pid = 0;
 503 }
 504
 505 static  int
 506 bpfwrite(dev, uio, ioflag)
 507         dev_t dev;
 508         struct uio *uio;
 509         int ioflag;
 510 {
 511         struct bpf_d *d = dev->si_drv1;
 512         struct ifnet *ifp;
 513         struct mbuf *m;
 514         int error;
 515         static struct sockaddr dst;
 516         int datlen;
 517
 518         if (d->bd_bif == 0)
 519                 return (ENXIO);
 520
 521         ifp = d->bd_bif->bif_ifp;
 522
 523         if (uio->uio_resid == 0)
 524                 return (0);
 525
 526         error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
 527         if (error)
 528                 return (error);
 529
 530         if (datlen > ifp->if_mtu)
 531                 return (EMSGSIZE);
 532
 533         if (d->bd_hdrcmplt)
 534                 dst.sa_family = pseudo_AF_HDRCMPLT;
 535
 536         mtx_lock(&Giant);
 537         error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
 538         mtx_unlock(&Giant);
 539         /*
 540          * The driver frees the mbuf.
 541          */
 542         return (error);
 543 }
 544
 545 /*
 546  * Reset a descriptor by flushing its packet buffer and clearing the
 547  * receive and drop counts.
 548  */
 549 static void
 550 reset_d(d)
 551         struct bpf_d *d;
 552 {
 553
 554         mtx_assert(&d->bd_mtx, MA_OWNED);
 555         if (d->bd_hbuf) {
 556                 /* Free the hold buffer. */
 557                 d->bd_fbuf = d->bd_hbuf;
 558                 d->bd_hbuf = 0;
 559         }
 560         d->bd_slen = 0;
 561         d->bd_hlen = 0;
 562         d->bd_rcount = 0;
 563         d->bd_dcount = 0;
 564 }
 565
 566 /*
 567  *  FIONREAD            Check for read packet available.
 568  *  SIOCGIFADDR         Get interface address - convenient hook to driver.
 569  *  BIOCGBLEN           Get buffer len [for read()].
 570  *  BIOCSETF            Set ethernet read filter.
 571  *  BIOCFLUSH           Flush read packet buffer.
 572  *  BIOCPROMISC         Put interface into promiscuous mode.
 573  *  BIOCGDLT            Get link layer type.
 574  *  BIOCGETIF           Get interface name.
 575  *  BIOCSETIF           Set interface.
 576  *  BIOCSRTIMEOUT       Set read timeout.
 577  *  BIOCGRTIMEOUT       Get read timeout.
 578  *  BIOCGSTATS          Get packet stats.
 579  *  BIOCIMMEDIATE       Set immediate mode.
 580  *  BIOCVERSION         Get filter language version.
 581  *  BIOCGHDRCMPLT       Get "header already complete" flag
 582  *  BIOCSHDRCMPLT       Set "header already complete" flag
 583  *  BIOCGSEESENT        Get "see packets sent" flag
 584  *  BIOCSSEESENT        Set "see packets sent" flag
 585  */
 586 /* ARGSUSED */
 587 static  int
 588 bpfioctl(dev, cmd, addr, flags, td)
 589         dev_t dev;
 590         u_long cmd;
 591         caddr_t addr;
 592         int flags;
 593         struct thread *td;
 594 {
 595         struct bpf_d *d = dev->si_drv1;
 596         int error = 0;
 597
 598         switch (cmd) {
 599
 600         default:
 601                 error = EINVAL;
 602                 break;
 603
 604         /*
 605          * Check for read packet available.
 606          */
 607         case FIONREAD:
 608                 {
 609                         int n;
 610
 611                         BPFD_LOCK(d);
 612                         n = d->bd_slen;
 613                         if (d->bd_hbuf)
 614                                 n += d->bd_hlen;
 615                         BPFD_UNLOCK(d);
 616
 617                         *(int *)addr = n;
 618                         break;
 619                 }
 620
 621         case SIOCGIFADDR:
 622                 {
 623                         struct ifnet *ifp;
 624
 625                         if (d->bd_bif == 0)
 626                                 error = EINVAL;
 627                         else {
 628                                 ifp = d->bd_bif->bif_ifp;
 629                                 error = (*ifp->if_ioctl)(ifp, cmd, addr);
 630                         }
 631                         break;
 632                 }
 633
 634         /*
 635          * Get buffer len [for read()].
 636          */
 637         case BIOCGBLEN:
 638                 *(u_int *)addr = d->bd_bufsize;
 639                 break;
 640
 641         /*
 642          * Set buffer length.
 643          */
 644         case BIOCSBLEN:
 645                 if (d->bd_bif != 0)
 646                         error = EINVAL;
 647                 else {
 648                         register u_int size = *(u_int *)addr;
 649
 650                         if (size > bpf_maxbufsize)
 651                                 *(u_int *)addr = size = bpf_maxbufsize;
 652                         else if (size < BPF_MINBUFSIZE)
 653                                 *(u_int *)addr = size = BPF_MINBUFSIZE;
 654                         d->bd_bufsize = size;
 655                 }
 656                 break;
 657
 658         /*
 659          * Set link layer read filter.
 660          */
 661         case BIOCSETF:
 662                 error = bpf_setf(d, (struct bpf_program *)addr);
 663                 break;
 664
 665         /*
 666          * Flush read packet buffer.
 667          */
 668         case BIOCFLUSH:
 669                 BPFD_LOCK(d);
 670                 reset_d(d);
 671                 BPFD_UNLOCK(d);
 672                 break;
 673
 674         /*
 675          * Put interface into promiscuous mode.
 676          */
 677         case BIOCPROMISC:
 678                 if (d->bd_bif == 0) {
 679                         /*
 680                          * No interface attached yet.
 681                          */
 682                         error = EINVAL;
 683                         break;
 684                 }
 685                 if (d->bd_promisc == 0) {
 686                         mtx_lock(&Giant);
 687                         error = ifpromisc(d->bd_bif->bif_ifp, 1);
 688                         mtx_unlock(&Giant);
 689                         if (error == 0)
 690                                 d->bd_promisc = 1;
 691                 }
 692                 break;
 693
 694         /*
 695          * Get device parameters.
 696          */
 697         case BIOCGDLT:
 698                 if (d->bd_bif == 0)
 699                         error = EINVAL;
 700                 else
 701                         *(u_int *)addr = d->bd_bif->bif_dlt;
 702                 break;
 703
 704         /*
 705          * Get interface name.
 706          */
 707         case BIOCGETIF:
 708                 if (d->bd_bif == 0)
 709                         error = EINVAL;
 710                 else {
 711                         struct ifnet *const ifp = d->bd_bif->bif_ifp;
 712                         struct ifreq *const ifr = (struct ifreq *)addr;
 713
 714                         snprintf(ifr->ifr_name, sizeof(ifr->ifr_name),
 715                             "%s%d", ifp->if_name, ifp->if_unit);
 716                 }
 717                 break;
 718
 719         /*
 720          * Set interface.
 721          */
 722         case BIOCSETIF:
 723                 error = bpf_setif(d, (struct ifreq *)addr);
 724                 break;
 725
 726         /*
 727          * Set read timeout.
 728          */
 729         case BIOCSRTIMEOUT:
 730                 {
 731                         struct timeval *tv = (struct timeval *)addr;
 732
 733                         /*
 734                          * Subtract 1 tick from tvtohz() since this isn't
 735                          * a one-shot timer.
 736                          */
 737                         if ((error = itimerfix(tv)) == 0)
 738                                 d->bd_rtout = tvtohz(tv) - 1;
 739                         break;
 740                 }
 741
 742         /*
 743          * Get read timeout.
 744          */
 745         case BIOCGRTIMEOUT:
 746                 {
 747                         struct timeval *tv = (struct timeval *)addr;
 748
 749                         tv->tv_sec = d->bd_rtout / hz;
 750                         tv->tv_usec = (d->bd_rtout % hz) * tick;
 751                         break;
 752                 }
 753
 754         /*
 755          * Get packet stats.
 756          */
 757         case BIOCGSTATS:
 758                 {
 759                         struct bpf_stat *bs = (struct bpf_stat *)addr;
 760
 761                         bs->bs_recv = d->bd_rcount;
 762                         bs->bs_drop = d->bd_dcount;
 763                         break;
 764                 }
 765
 766         /*
 767          * Set immediate mode.
 768          */
 769         case BIOCIMMEDIATE:
 770                 d->bd_immediate = *(u_int *)addr;
 771                 break;
 772
 773         case BIOCVERSION:
 774                 {
 775                         struct bpf_version *bv = (struct bpf_version *)addr;
 776
 777                         bv->bv_major = BPF_MAJOR_VERSION;
 778                         bv->bv_minor = BPF_MINOR_VERSION;
 779                         break;
 780                 }
 781
 782         /*
 783          * Get "header already complete" flag
 784          */
 785         case BIOCGHDRCMPLT:
 786                 *(u_int *)addr = d->bd_hdrcmplt;
 787                 break;
 788
 789         /*
 790          * Set "header already complete" flag
 791          */
 792         case BIOCSHDRCMPLT:
 793                 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
 794                 break;
 795
 796         /*
 797          * Get "see sent packets" flag
 798          */
 799         case BIOCGSEESENT:
 800                 *(u_int *)addr = d->bd_seesent;
 801                 break;
 802
 803         /*
 804          * Set "see sent packets" flag
 805          */
 806         case BIOCSSEESENT:
 807                 d->bd_seesent = *(u_int *)addr;
 808                 break;
 809
 810         case FIONBIO:           /* Non-blocking I/O */
 811                 break;
 812
 813         case FIOASYNC:          /* Send signal on receive packets */
 814                 d->bd_async = *(int *)addr;
 815                 break;
 816
 817         case FIOSETOWN:
 818                 error = fsetown(*(int *)addr, &d->bd_sigio);
 819                 break;
 820
 821         case FIOGETOWN:
 822                 *(int *)addr = fgetown(d->bd_sigio);
 823                 break;
 824
 825         /* This is deprecated, FIOSETOWN should be used instead. */
 826         case TIOCSPGRP:
 827                 error = fsetown(-(*(int *)addr), &d->bd_sigio);
 828                 break;
 829
 830         /* This is deprecated, FIOGETOWN should be used instead. */
 831         case TIOCGPGRP:
 832                 *(int *)addr = -fgetown(d->bd_sigio);
 833                 break;
 834
 835         case BIOCSRSIG:         /* Set receive signal */
 836                 {
 837                         u_int sig;
 838
 839                         sig = *(u_int *)addr;
 840
 841                         if (sig >= NSIG)
 842                                 error = EINVAL;
 843                         else
 844                                 d->bd_sig = sig;
 845                         break;
 846                 }
 847         case BIOCGRSIG:
 848                 *(u_int *)addr = d->bd_sig;
 849                 break;
 850         }
 851         return (error);
 852 }
 853
 854 /*
 855  * Set d's packet filter program to fp.  If this file already has a filter,
 856  * free it and replace it.  Returns EINVAL for bogus requests.
 857  */
 858 static int
 859 bpf_setf(d, fp)
 860         struct bpf_d *d;
 861         struct bpf_program *fp;
 862 {
 863         struct bpf_insn *fcode, *old;
 864         u_int flen, size;
 865
 866         old = d->bd_filter;
 867         if (fp->bf_insns == 0) {
 868                 if (fp->bf_len != 0)
 869                         return (EINVAL);
 870                 BPFD_LOCK(d);
 871                 d->bd_filter = 0;
 872                 reset_d(d);
 873                 BPFD_UNLOCK(d);
 874                 if (old != 0)
 875                         free((caddr_t)old, M_BPF);
 876                 return (0);
 877         }
 878         flen = fp->bf_len;
 879         if (flen > BPF_MAXINSNS)
 880                 return (EINVAL);
 881
 882         size = flen * sizeof(*fp->bf_insns);
 883         fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
 884         if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
 885             bpf_validate(fcode, (int)flen)) {
 886                 BPFD_LOCK(d);
 887                 d->bd_filter = fcode;
 888                 reset_d(d);
 889                 BPFD_UNLOCK(d);
 890                 if (old != 0)
 891                         free((caddr_t)old, M_BPF);
 892
 893                 return (0);
 894         }
 895         free((caddr_t)fcode, M_BPF);
 896         return (EINVAL);
 897 }
 898
 899 /*
 900  * Detach a file from its current interface (if attached at all) and attach
 901  * to the interface indicated by the name stored in ifr.
 902  * Return an errno or 0.
 903  */
 904 static int
 905 bpf_setif(d, ifr)
 906         struct bpf_d *d;
 907         struct ifreq *ifr;
 908 {
 909         struct bpf_if *bp;
 910         int error;
 911         struct ifnet *theywant;
 912
 913         theywant = ifunit(ifr->ifr_name);
 914         if (theywant == 0)
 915                 return ENXIO;
 916
 917         /*
 918          * Look through attached interfaces for the named one.
 919          */
 920         mtx_lock(&bpf_mtx);
 921         for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
 922                 struct ifnet *ifp = bp->bif_ifp;
 923
 924                 if (ifp == 0 || ifp != theywant)
 925                         continue;
 926
 927                 mtx_unlock(&bpf_mtx);
 928                 /*
 929                  * We found the requested interface.
 930                  * If it's not up, return an error.
 931                  * Allocate the packet buffers if we need to.
 932                  * If we're already attached to requested interface,
 933                  * just flush the buffer.
 934                  */
 935                 if ((ifp->if_flags & IFF_UP) == 0)
 936                         return (ENETDOWN);
 937
 938                 if (d->bd_sbuf == 0) {
 939                         error = bpf_allocbufs(d);
 940                         if (error != 0)
 941                                 return (error);
 942                 }
 943                 if (bp != d->bd_bif) {
 944                         if (d->bd_bif)
 945                                 /*
 946                                  * Detach if attached to something else.
 947                                  */
 948                                 bpf_detachd(d);
 949
 950                         bpf_attachd(d, bp);
 951                 }
 952                 BPFD_LOCK(d);
 953                 reset_d(d);
 954                 BPFD_UNLOCK(d);
 955                 return (0);
 956         }
 957         mtx_unlock(&bpf_mtx);
 958         /* Not found. */
 959         return (ENXIO);
 960 }
 961
 962 /*
 963  * Support for select() and poll() system calls
 964  *
 965  * Return true iff the specific operation will not block indefinitely.
 966  * Otherwise, return false but make a note that a selwakeup() must be done.
 967  */
 968 int
 969 bpfpoll(dev, events, td)
 970         register dev_t dev;
 971         int events;
 972         struct thread *td;
 973 {
 974         struct bpf_d *d;
 975         int revents;
 976
 977         d = dev->si_drv1;
 978         if (d->bd_bif == NULL)
 979                 return (ENXIO);
 980
 981         revents = events & (POLLOUT | POLLWRNORM);
 982         BPFD_LOCK(d);
 983         if (events & (POLLIN | POLLRDNORM)) {
 984                 /*
 985                  * An imitation of the FIONREAD ioctl code.
 986                  * XXX not quite.  An exact imitation:
 987                  *      if (d->b_slen != 0 ||
 988                  *          (d->bd_hbuf != NULL && d->bd_hlen != 0)
 989                  */
 990                 if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
 991                         revents |= events & (POLLIN | POLLRDNORM);
 992                 else
 993                         selrecord(td, &d->bd_sel);
 994         }
 995         BPFD_UNLOCK(d);
 996         return (revents);
 997 }
 998
 999 /*
1000  * Incoming linkage from device drivers.  Process the packet pkt, of length
1001  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1002  * by each process' filter, and if accepted, stashed into the corresponding
1003  * buffer.
1004  */
1005 void
1006 bpf_tap(ifp, pkt, pktlen)
1007         struct ifnet *ifp;
1008         register u_char *pkt;
1009         register u_int pktlen;
1010 {
1011         struct bpf_if *bp;
1012         register struct bpf_d *d;
1013         register u_int slen;
1014
1015         bp = ifp->if_bpf;
1016         BPFIF_LOCK(bp);
1017         for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1018                 BPFD_LOCK(d);
1019                 ++d->bd_rcount;
1020                 slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
1021                 if (slen != 0)
1022                         catchpacket(d, pkt, pktlen, slen, bcopy);
1023                 BPFD_UNLOCK(d);
1024         }
1025         BPFIF_UNLOCK(bp);
1026 }
1027
1028 /*
1029  * Copy data from an mbuf chain into a buffer.  This code is derived
1030  * from m_copydata in sys/uipc_mbuf.c.
1031  */
1032 static void
1033 bpf_mcopy(src_arg, dst_arg, len)
1034         const void *src_arg;
1035         void *dst_arg;
1036         register size_t len;
1037 {
1038         register const struct mbuf *m;
1039         register u_int count;
1040         u_char *dst;
1041
1042         m = src_arg;
1043         dst = dst_arg;
1044         while (len > 0) {
1045                 if (m == 0)
1046                         panic("bpf_mcopy");
1047                 count = min(m->m_len, len);
1048                 bcopy(mtod(m, void *), dst, count);
1049                 m = m->m_next;
1050                 dst += count;
1051                 len -= count;
1052         }
1053 }
1054
1055 /*
1056  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1057  */
1058 void
1059 bpf_mtap(ifp, m)
1060         struct ifnet *ifp;
1061         struct mbuf *m;
1062 {
1063         struct bpf_if *bp = ifp->if_bpf;
1064         struct bpf_d *d;
1065         u_int pktlen, slen;
1066         struct mbuf *m0;
1067
1068         pktlen = 0;
1069         for (m0 = m; m0 != 0; m0 = m0->m_next)
1070                 pktlen += m0->m_len;
1071
1072         BPFIF_LOCK(bp);
1073         for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1074                 if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1075                         continue;
1076                 BPFD_LOCK(d);
1077                 ++d->bd_rcount;
1078                 slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
1079                 if (slen != 0)
1080                         catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy);
1081                 BPFD_UNLOCK(d);
1082         }
1083         BPFIF_UNLOCK(bp);
1084 }
1085
1086 /*
1087  * Move the packet data from interface memory (pkt) into the
1088  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1089  * otherwise 0.  "copy" is the routine called to do the actual data
1090  * transfer.  bcopy is passed in to copy contiguous chunks, while
1091  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1092  * pkt is really an mbuf.
1093  */
1094 static void
1095 catchpacket(d, pkt, pktlen, snaplen, cpfn)
1096         register struct bpf_d *d;
1097         register u_char *pkt;
1098         register u_int pktlen, snaplen;
1099         register void (*cpfn) __P((const void *, void *, size_t));
1100 {
1101         register struct bpf_hdr *hp;
1102         register int totlen, curlen;
1103         register int hdrlen = d->bd_bif->bif_hdrlen;
1104         /*
1105          * Figure out how many bytes to move.  If the packet is
1106          * greater or equal to the snapshot length, transfer that
1107          * much.  Otherwise, transfer the whole packet (unless
1108          * we hit the buffer size limit).
1109          */
1110         totlen = hdrlen + min(snaplen, pktlen);
1111         if (totlen > d->bd_bufsize)
1112                 totlen = d->bd_bufsize;
1113
1114         /*
1115          * Round up the end of the previous packet to the next longword.
1116          */
1117         curlen = BPF_WORDALIGN(d->bd_slen);
1118         if (curlen + totlen > d->bd_bufsize) {
1119                 /*
1120                  * This packet will overflow the storage buffer.
1121                  * Rotate the buffers if we can, then wakeup any
1122                  * pending reads.
1123                  */
1124                 if (d->bd_fbuf == 0) {
1125                         /*
1126                          * We haven't completed the previous read yet,
1127                          * so drop the packet.
1128                          */
1129                         ++d->bd_dcount;
1130                         return;
1131                 }
1132                 ROTATE_BUFFERS(d);
1133                 bpf_wakeup(d);
1134                 curlen = 0;
1135         }
1136         else if (d->bd_immediate)
1137                 /*
1138                  * Immediate mode is set.  A packet arrived so any
1139                  * reads should be woken up.
1140                  */
1141                 bpf_wakeup(d);
1142
1143         /*
1144          * Append the bpf header.
1145          */
1146         hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1147         microtime(&hp->bh_tstamp);
1148         hp->bh_datalen = pktlen;
1149         hp->bh_hdrlen = hdrlen;
1150         /*
1151          * Copy the packet data into the store buffer and update its length.
1152          */
1153         (*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1154         d->bd_slen = curlen + totlen;
1155 }
1156
1157 /*
1158  * Initialize all nonzero fields of a descriptor.
1159  */
1160 static int
1161 bpf_allocbufs(d)
1162         register struct bpf_d *d;
1163 {
1164         d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1165         if (d->bd_fbuf == 0)
1166                 return (ENOBUFS);
1167
1168         d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1169         if (d->bd_sbuf == 0) {
1170                 free(d->bd_fbuf, M_BPF);
1171                 return (ENOBUFS);
1172         }
1173         d->bd_slen = 0;
1174         d->bd_hlen = 0;
1175         return (0);
1176 }
1177
1178 /*
1179  * Free buffers currently in use by a descriptor.
1180  * Called on close.
1181  */
1182 static void
1183 bpf_freed(d)
1184         register struct bpf_d *d;
1185 {
1186         /*
1187          * We don't need to lock out interrupts since this descriptor has
1188          * been detached from its interface and it yet hasn't been marked
1189          * free.
1190          */
1191         if (d->bd_sbuf != 0) {
1192                 free(d->bd_sbuf, M_BPF);
1193                 if (d->bd_hbuf != 0)
1194                         free(d->bd_hbuf, M_BPF);
1195                 if (d->bd_fbuf != 0)
1196                         free(d->bd_fbuf, M_BPF);
1197         }
1198         if (d->bd_filter)
1199                 free((caddr_t)d->bd_filter, M_BPF);
1200         mtx_destroy(&d->bd_mtx);
1201 }
1202
1203 /*
1204  * Attach an interface to bpf.  ifp is a pointer to the structure
1205  * defining the interface to be attached, dlt is the link layer type,
1206  * and hdrlen is the fixed size of the link header (variable length
1207  * headers are not yet supporrted).
1208  */
1209 void
1210 bpfattach(ifp, dlt, hdrlen)
1211         struct ifnet *ifp;
1212         u_int dlt, hdrlen;
1213 {
1214         struct bpf_if *bp;
1215         bp = (struct bpf_if *)malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1216         if (bp == 0)
1217                 panic("bpfattach");
1218
1219         bp->bif_ifp = ifp;
1220         bp->bif_dlt = dlt;
1221         mtx_init(&bp->bif_mtx, "bpf interface lock", MTX_DEF);
1222
1223         mtx_lock(&bpf_mtx);
1224         bp->bif_next = bpf_iflist;
1225         bpf_iflist = bp;
1226         mtx_unlock(&bpf_mtx);
1227
1228         bp->bif_ifp->if_bpf = 0;
1229
1230         /*
1231          * Compute the length of the bpf header.  This is not necessarily
1232          * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1233          * that the network layer header begins on a longword boundary (for
1234          * performance reasons and to alleviate alignment restrictions).
1235          */
1236         bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1237
1238         if (bootverbose)
1239                 printf("bpf: %s%d attached\n", ifp->if_name, ifp->if_unit);
1240 }
1241
1242 /*
1243  * Detach bpf from an interface.  This involves detaching each descriptor
1244  * associated with the interface, and leaving bd_bif NULL.  Notify each
1245  * descriptor as it's detached so that any sleepers wake up and get
1246  * ENXIO.
1247  */
1248 void
1249 bpfdetach(ifp)
1250         struct ifnet *ifp;
1251 {
1252         struct bpf_if   *bp, *bp_prev;
1253         struct bpf_d    *d;
1254
1255         mtx_lock(&bpf_mtx);
1256
1257         /* Locate BPF interface information */
1258         bp_prev = NULL;
1259         for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1260                 if (ifp == bp->bif_ifp)
1261                         break;
1262                 bp_prev = bp;
1263         }
1264
1265         /* Interface wasn't attached */
1266         if (bp->bif_ifp == NULL) {
1267                 mtx_unlock(&bpf_mtx);
1268                 printf("bpfdetach: %s%d was not attached\n", ifp->if_name,
1269                     ifp->if_unit);
1270                 return;
1271         }
1272
1273         if (bp_prev) {
1274                 bp_prev->bif_next = bp->bif_next;
1275         } else {
1276                 bpf_iflist = bp->bif_next;
1277         }
1278
1279         while ((d = bp->bif_dlist) != NULL) {
1280                 bpf_detachd(d);
1281                 BPFD_LOCK(d);
1282                 bpf_wakeup(d);
1283                 BPFD_UNLOCK(d);
1284         }
1285
1286         mtx_destroy(&bp->bif_mtx);
1287         free(bp, M_BPF);
1288
1289         mtx_unlock(&bpf_mtx);
1290 }
1291
1292 static void bpf_drvinit __P((void *unused));
1293
1294 static void bpf_clone __P((void *arg, char *name, int namelen, dev_t *dev));
1295
1296 static void
1297 bpf_clone(arg, name, namelen, dev)
1298         void *arg;
1299         char *name;
1300         int namelen;
1301         dev_t *dev;
1302 {
1303         int u;
1304
1305         if (*dev != NODEV)
1306                 return;
1307         if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1308                 return;
1309         *dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1310             "bpf%d", u);
1311         (*dev)->si_flags |= SI_CHEAPCLONE;
1312         return;
1313 }
1314
1315 static void
1316 bpf_drvinit(unused)
1317         void *unused;
1318 {
1319
1320         mtx_init(&bpf_mtx, "bpf global lock", MTX_DEF);
1321         EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1322         cdevsw_add(&bpf_cdevsw);
1323 }
1324
1325 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
1326
1327 #else /* !DEV_BPF && !NETGRAPH_BPF */
1328 /*
1329  * NOP stubs to allow bpf-using drivers to load and function.
1330  *
1331  * A 'better' implementation would allow the core bpf functionality
1332  * to be loaded at runtime.
1333  */
1334
1335 void
1336 bpf_tap(ifp, pkt, pktlen)
1337         struct ifnet *ifp;
1338         register u_char *pkt;
1339         register u_int pktlen;
1340 {
1341 }
1342
1343 void
1344 bpf_mtap(ifp, m)
1345         struct ifnet *ifp;
1346         struct mbuf *m;
1347 {
1348 }
1349
1350 void
1351 bpfattach(ifp, dlt, hdrlen)
1352         struct ifnet *ifp;
1353         u_int dlt, hdrlen;
1354 {
1355 }
1356
1357 void
1358 bpfdetach(ifp)
1359         struct ifnet *ifp;
1360 {
1361 }
1362
1363 u_int
1364 bpf_filter(pc, p, wirelen, buflen)
1365         register const struct bpf_insn *pc;
1366         register u_char *p;
1367         u_int wirelen;
1368         register u_int buflen;
1369 {
1370         return -1;      /* "no filter" behaviour */
1371 }
1372
1373 int
1374 bpf_validate(f, len)
1375         const struct bpf_insn *f;
1376         int len;
1377 {
1378         return 0;               /* false */
1379 }
1380
1381 #endif /* !DEV_BPF && !NETGRAPH_BPF */