sys/net/bpf.c

   1 /*-
   2  * Copyright (c) 1990, 1991, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * This code is derived from the Stanford/CMU enet packet filter,
   6  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
   7  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
   8  * Berkeley Laboratory.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 4. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  *      @(#)bpf.c       8.4 (Berkeley) 1/9/95
  35  *
  36  * $FreeBSD$
  37  */
  38
  39 #include "opt_bpf.h"
  40 #include "opt_mac.h"
  41 #include "opt_netgraph.h"
  42
  43 #include <sys/types.h>
  44 #include <sys/param.h>
  45 #include <sys/systm.h>
  46 #include <sys/conf.h>
  47 #include <sys/fcntl.h>
  48 #include <sys/mac.h>
  49 #include <sys/malloc.h>
  50 #include <sys/mbuf.h>
  51 #include <sys/time.h>
  52 #include <sys/proc.h>
  53 #include <sys/signalvar.h>
  54 #include <sys/filio.h>
  55 #include <sys/sockio.h>
  56 #include <sys/ttycom.h>
  57 #include <sys/uio.h>
  58
  59 #include <sys/event.h>
  60 #include <sys/file.h>
  61 #include <sys/poll.h>
  62 #include <sys/proc.h>
  63
  64 #include <sys/socket.h>
  65
  66 #include <net/if.h>
  67 #include <net/bpf.h>
  68 #include <net/bpfdesc.h>
  69
  70 #include <netinet/in.h>
  71 #include <netinet/if_ether.h>
  72 #include <sys/kernel.h>
  73 #include <sys/sysctl.h>
  74
  75 static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
  76
  77 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
  78
  79 #define PRINET  26                      /* interruptible */
  80
  81 /*
  82  * bpf_iflist is a list of BPF interface structures, each corresponding to a
  83  * specific DLT.  The same network interface might have several BPF interface
  84  * structures registered by different layers in the stack (i.e., 802.11
  85  * frames, ethernet frames, etc).
  86  */
  87 static LIST_HEAD(, bpf_if)      bpf_iflist;
  88 static struct mtx       bpf_mtx;                /* bpf global lock */
  89 static int              bpf_bpfd_cnt;
  90
  91 static int      bpf_allocbufs(struct bpf_d *);
  92 static void     bpf_attachd(struct bpf_d *, struct bpf_if *);
  93 static void     bpf_detachd(struct bpf_d *);
  94 static void     bpf_freed(struct bpf_d *);
  95 static void     bpf_mcopy(const void *, void *, size_t);
  96 static int      bpf_movein(struct uio *, int, struct ifnet *,
  97                     struct mbuf **, struct sockaddr *, struct bpf_insn *);
  98 static int      bpf_setif(struct bpf_d *, struct ifreq *);
  99 static void     bpf_timed_out(void *);
 100 static __inline void
 101                 bpf_wakeup(struct bpf_d *);
 102 static void     catchpacket(struct bpf_d *, u_char *, u_int,
 103                     u_int, void (*)(const void *, void *, size_t),
 104                     struct timeval *);
 105 static void     reset_d(struct bpf_d *);
 106 static int       bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
 107 static int      bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
 108 static int      bpf_setdlt(struct bpf_d *, u_int);
 109 static void     filt_bpfdetach(struct knote *);
 110 static int      filt_bpfread(struct knote *, long);
 111 static void     bpf_drvinit(void *);
 112 static void     bpf_clone(void *, struct ucred *, char *, int, struct cdev **);
 113 static int      bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
 114
 115 /*
 116  * The default read buffer size is patchable.
 117  */
 118 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
 119 static int bpf_bufsize = 4096;
 120 SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
 121     &bpf_bufsize, 0, "");
 122 static int bpf_maxbufsize = BPF_MAXBUFSIZE;
 123 SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
 124     &bpf_maxbufsize, 0, "");
 125 static int bpf_maxinsns = BPF_MAXINSNS;
 126 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
 127     &bpf_maxinsns, 0, "Maximum bpf program instructions");
 128 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW,
 129     bpf_stats_sysctl, "bpf statistics portal");
 130
 131 static  d_open_t        bpfopen;
 132 static  d_close_t       bpfclose;
 133 static  d_read_t        bpfread;
 134 static  d_write_t       bpfwrite;
 135 static  d_ioctl_t       bpfioctl;
 136 static  d_poll_t        bpfpoll;
 137 static  d_kqfilter_t    bpfkqfilter;
 138
 139 static struct cdevsw bpf_cdevsw = {
 140         .d_version =    D_VERSION,
 141         .d_flags =      D_NEEDGIANT | D_TRACKCLOSE,
 142         .d_open =       bpfopen,
 143         .d_close =      bpfclose,
 144         .d_read =       bpfread,
 145         .d_write =      bpfwrite,
 146         .d_ioctl =      bpfioctl,
 147         .d_poll =       bpfpoll,
 148         .d_name =       "bpf",
 149         .d_kqfilter =   bpfkqfilter,
 150 };
 151
 152 static struct filterops bpfread_filtops =
 153         { 1, NULL, filt_bpfdetach, filt_bpfread };
 154
 155 static int
 156 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
 157     struct sockaddr *sockp, struct bpf_insn *wfilter)
 158 {
 159         struct ether_header *eh;
 160         struct mbuf *m;
 161         int error;
 162         int len;
 163         int hlen;
 164         int slen;
 165
 166         /*
 167          * Build a sockaddr based on the data link layer type.
 168          * We do this at this level because the ethernet header
 169          * is copied directly into the data field of the sockaddr.
 170          * In the case of SLIP, there is no header and the packet
 171          * is forwarded as is.
 172          * Also, we are careful to leave room at the front of the mbuf
 173          * for the link level header.
 174          */
 175         switch (linktype) {
 176
 177         case DLT_SLIP:
 178                 sockp->sa_family = AF_INET;
 179                 hlen = 0;
 180                 break;
 181
 182         case DLT_EN10MB:
 183                 sockp->sa_family = AF_UNSPEC;
 184                 /* XXX Would MAXLINKHDR be better? */
 185                 hlen = ETHER_HDR_LEN;
 186                 break;
 187
 188         case DLT_FDDI:
 189                 sockp->sa_family = AF_IMPLINK;
 190                 hlen = 0;
 191                 break;
 192
 193         case DLT_RAW:
 194                 sockp->sa_family = AF_UNSPEC;
 195                 hlen = 0;
 196                 break;
 197
 198         case DLT_NULL:
 199                 /*
 200                  * null interface types require a 4 byte pseudo header which
 201                  * corresponds to the address family of the packet.
 202                  */
 203                 sockp->sa_family = AF_UNSPEC;
 204                 hlen = 4;
 205                 break;
 206
 207         case DLT_ATM_RFC1483:
 208                 /*
 209                  * en atm driver requires 4-byte atm pseudo header.
 210                  * though it isn't standard, vpi:vci needs to be
 211                  * specified anyway.
 212                  */
 213                 sockp->sa_family = AF_UNSPEC;
 214                 hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
 215                 break;
 216
 217         case DLT_PPP:
 218                 sockp->sa_family = AF_UNSPEC;
 219                 hlen = 4;       /* This should match PPP_HDRLEN */
 220                 break;
 221
 222         default:
 223                 return (EIO);
 224         }
 225
 226         len = uio->uio_resid;
 227
 228         if (len - hlen > ifp->if_mtu)
 229                 return (EMSGSIZE);
 230
 231         if ((unsigned)len > MJUM16BYTES)
 232                 return (EIO);
 233
 234         if (len <= MHLEN)
 235                 MGETHDR(m, M_TRYWAIT, MT_DATA);
 236         else if (len <= MCLBYTES)
 237                 m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR);
 238         else
 239                 m = m_getjcl(M_TRYWAIT, MT_DATA, M_PKTHDR,
 240 #if (MJUMPAGESIZE > MCLBYTES)
 241                     len <= MJUMPAGESIZE ? MJUMPAGESIZE :
 242 #endif
 243                     (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
 244         if (m == NULL)
 245                 return (ENOBUFS);
 246         m->m_pkthdr.len = m->m_len = len;
 247         m->m_pkthdr.rcvif = NULL;
 248         *mp = m;
 249
 250         if (m->m_len < hlen) {
 251                 error = EPERM;
 252                 goto bad;
 253         }
 254
 255         error = uiomove(mtod(m, u_char *), len, uio);
 256         if (error)
 257                 goto bad;
 258
 259         slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
 260         if (slen == 0) {
 261                 error = EPERM;
 262                 goto bad;
 263         }
 264
 265         /* Check for multicast destination */
 266         switch (linktype) {
 267         case DLT_EN10MB:
 268                 eh = mtod(m, struct ether_header *);
 269                 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 270                         if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
 271                             ETHER_ADDR_LEN) == 0)
 272                                 m->m_flags |= M_BCAST;
 273                         else
 274                                 m->m_flags |= M_MCAST;
 275                 }
 276                 break;
 277         }
 278
 279         /*
 280          * Make room for link header, and copy it to sockaddr
 281          */
 282         if (hlen != 0) {
 283                 bcopy(m->m_data, sockp->sa_data, hlen);
 284                 m->m_pkthdr.len -= hlen;
 285                 m->m_len -= hlen;
 286 #if BSD >= 199103
 287                 m->m_data += hlen; /* XXX */
 288 #else
 289                 m->m_off += hlen;
 290 #endif
 291         }
 292
 293         return (0);
 294 bad:
 295         m_freem(m);
 296         return (error);
 297 }
 298
 299 /*
 300  * Attach file to the bpf interface, i.e. make d listen on bp.
 301  */
 302 static void
 303 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 304 {
 305         /*
 306          * Point d at bp, and add d to the interface's list of listeners.
 307          * Finally, point the driver's bpf cookie at the interface so
 308          * it will divert packets to bpf.
 309          */
 310         BPFIF_LOCK(bp);
 311         d->bd_bif = bp;
 312         LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
 313
 314         bpf_bpfd_cnt++;
 315         BPFIF_UNLOCK(bp);
 316 }
 317
 318 /*
 319  * Detach a file from its interface.
 320  */
 321 static void
 322 bpf_detachd(struct bpf_d *d)
 323 {
 324         int error;
 325         struct bpf_if *bp;
 326         struct ifnet *ifp;
 327
 328         bp = d->bd_bif;
 329         BPFIF_LOCK(bp);
 330         BPFD_LOCK(d);
 331         ifp = d->bd_bif->bif_ifp;
 332
 333         /*
 334          * Remove d from the interface's descriptor list.
 335          */
 336         LIST_REMOVE(d, bd_next);
 337
 338         bpf_bpfd_cnt--;
 339         d->bd_bif = NULL;
 340         BPFD_UNLOCK(d);
 341         BPFIF_UNLOCK(bp);
 342
 343         /*
 344          * Check if this descriptor had requested promiscuous mode.
 345          * If so, turn it off.
 346          */
 347         if (d->bd_promisc) {
 348                 d->bd_promisc = 0;
 349                 error = ifpromisc(ifp, 0);
 350                 if (error != 0 && error != ENXIO) {
 351                         /*
 352                          * ENXIO can happen if a pccard is unplugged
 353                          * Something is really wrong if we were able to put
 354                          * the driver into promiscuous mode, but can't
 355                          * take it out.
 356                          */
 357                         if_printf(bp->bif_ifp,
 358                                 "bpf_detach: ifpromisc failed (%d)\n", error);
 359                 }
 360         }
 361 }
 362
 363 /*
 364  * Open ethernet device.  Returns ENXIO for illegal minor device number,
 365  * EBUSY if file is open by another process.
 366  */
 367 /* ARGSUSED */
 368 static  int
 369 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 370 {
 371         struct bpf_d *d;
 372
 373         mtx_lock(&bpf_mtx);
 374         d = dev->si_drv1;
 375         /*
 376          * Each minor can be opened by only one process.  If the requested
 377          * minor is in use, return EBUSY.
 378          */
 379         if (d != NULL) {
 380                 mtx_unlock(&bpf_mtx);
 381                 return (EBUSY);
 382         }
 383         dev->si_drv1 = (struct bpf_d *)~0;      /* mark device in use */
 384         mtx_unlock(&bpf_mtx);
 385
 386         if ((dev->si_flags & SI_NAMED) == 0)
 387                 make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
 388                     "bpf%d", dev2unit(dev));
 389         MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
 390         dev->si_drv1 = d;
 391         d->bd_bufsize = bpf_bufsize;
 392         d->bd_sig = SIGIO;
 393         d->bd_seesent = 1;
 394         d->bd_pid = td->td_proc->p_pid;
 395         strlcpy(d->bd_pcomm, td->td_proc->p_comm, MAXCOMLEN);
 396 #ifdef MAC
 397         mac_init_bpfdesc(d);
 398         mac_create_bpfdesc(td->td_ucred, d);
 399 #endif
 400         mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
 401         callout_init(&d->bd_callout, NET_CALLOUT_MPSAFE);
 402         knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
 403
 404         return (0);
 405 }
 406
 407 /*
 408  * Close the descriptor by detaching it from its interface,
 409  * deallocating its buffers, and marking it free.
 410  */
 411 /* ARGSUSED */
 412 static  int
 413 bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td)
 414 {
 415         struct bpf_d *d = dev->si_drv1;
 416
 417         BPFD_LOCK(d);
 418         if (d->bd_state == BPF_WAITING)
 419                 callout_stop(&d->bd_callout);
 420         d->bd_state = BPF_IDLE;
 421         BPFD_UNLOCK(d);
 422         funsetown(&d->bd_sigio);
 423         mtx_lock(&bpf_mtx);
 424         if (d->bd_bif)
 425                 bpf_detachd(d);
 426         mtx_unlock(&bpf_mtx);
 427         selwakeuppri(&d->bd_sel, PRINET);
 428 #ifdef MAC
 429         mac_destroy_bpfdesc(d);
 430 #endif /* MAC */
 431         knlist_destroy(&d->bd_sel.si_note);
 432         bpf_freed(d);
 433         dev->si_drv1 = NULL;
 434         free(d, M_BPF);
 435
 436         return (0);
 437 }
 438
 439
 440 /*
 441  * Rotate the packet buffers in descriptor d.  Move the store buffer
 442  * into the hold slot, and the free buffer into the store slot.
 443  * Zero the length of the new store buffer.
 444  */
 445 #define ROTATE_BUFFERS(d) \
 446         (d)->bd_hbuf = (d)->bd_sbuf; \
 447         (d)->bd_hlen = (d)->bd_slen; \
 448         (d)->bd_sbuf = (d)->bd_fbuf; \
 449         (d)->bd_slen = 0; \
 450         (d)->bd_fbuf = NULL;
 451 /*
 452  *  bpfread - read next chunk of packets from buffers
 453  */
 454 static  int
 455 bpfread(struct cdev *dev, struct uio *uio, int ioflag)
 456 {
 457         struct bpf_d *d = dev->si_drv1;
 458         int timed_out;
 459         int error;
 460
 461         /*
 462          * Restrict application to use a buffer the same size as
 463          * as kernel buffers.
 464          */
 465         if (uio->uio_resid != d->bd_bufsize)
 466                 return (EINVAL);
 467
 468         BPFD_LOCK(d);
 469         d->bd_pid = curthread->td_proc->p_pid;
 470         if (d->bd_state == BPF_WAITING)
 471                 callout_stop(&d->bd_callout);
 472         timed_out = (d->bd_state == BPF_TIMED_OUT);
 473         d->bd_state = BPF_IDLE;
 474         /*
 475          * If the hold buffer is empty, then do a timed sleep, which
 476          * ends when the timeout expires or when enough packets
 477          * have arrived to fill the store buffer.
 478          */
 479         while (d->bd_hbuf == NULL) {
 480                 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
 481                         /*
 482                          * A packet(s) either arrived since the previous
 483                          * read or arrived while we were asleep.
 484                          * Rotate the buffers and return what's here.
 485                          */
 486                         ROTATE_BUFFERS(d);
 487                         break;
 488                 }
 489
 490                 /*
 491                  * No data is available, check to see if the bpf device
 492                  * is still pointed at a real interface.  If not, return
 493                  * ENXIO so that the userland process knows to rebind
 494                  * it before using it again.
 495                  */
 496                 if (d->bd_bif == NULL) {
 497                         BPFD_UNLOCK(d);
 498                         return (ENXIO);
 499                 }
 500
 501                 if (ioflag & O_NONBLOCK) {
 502                         BPFD_UNLOCK(d);
 503                         return (EWOULDBLOCK);
 504                 }
 505                 error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
 506                      "bpf", d->bd_rtout);
 507                 if (error == EINTR || error == ERESTART) {
 508                         BPFD_UNLOCK(d);
 509                         return (error);
 510                 }
 511                 if (error == EWOULDBLOCK) {
 512                         /*
 513                          * On a timeout, return what's in the buffer,
 514                          * which may be nothing.  If there is something
 515                          * in the store buffer, we can rotate the buffers.
 516                          */
 517                         if (d->bd_hbuf)
 518                                 /*
 519                                  * We filled up the buffer in between
 520                                  * getting the timeout and arriving
 521                                  * here, so we don't need to rotate.
 522                                  */
 523                                 break;
 524
 525                         if (d->bd_slen == 0) {
 526                                 BPFD_UNLOCK(d);
 527                                 return (0);
 528                         }
 529                         ROTATE_BUFFERS(d);
 530                         break;
 531                 }
 532         }
 533         /*
 534          * At this point, we know we have something in the hold slot.
 535          */
 536         BPFD_UNLOCK(d);
 537
 538         /*
 539          * Move data from hold buffer into user space.
 540          * We know the entire buffer is transferred since
 541          * we checked above that the read buffer is bpf_bufsize bytes.
 542          */
 543         error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
 544
 545         BPFD_LOCK(d);
 546         d->bd_fbuf = d->bd_hbuf;
 547         d->bd_hbuf = NULL;
 548         d->bd_hlen = 0;
 549         BPFD_UNLOCK(d);
 550
 551         return (error);
 552 }
 553
 554
 555 /*
 556  * If there are processes sleeping on this descriptor, wake them up.
 557  */
 558 static __inline void
 559 bpf_wakeup(struct bpf_d *d)
 560 {
 561
 562         BPFD_LOCK_ASSERT(d);
 563         if (d->bd_state == BPF_WAITING) {
 564                 callout_stop(&d->bd_callout);
 565                 d->bd_state = BPF_IDLE;
 566         }
 567         wakeup(d);
 568         if (d->bd_async && d->bd_sig && d->bd_sigio)
 569                 pgsigio(&d->bd_sigio, d->bd_sig, 0);
 570
 571         selwakeuppri(&d->bd_sel, PRINET);
 572         KNOTE_LOCKED(&d->bd_sel.si_note, 0);
 573 }
 574
 575 static void
 576 bpf_timed_out(void *arg)
 577 {
 578         struct bpf_d *d = (struct bpf_d *)arg;
 579
 580         BPFD_LOCK(d);
 581         if (d->bd_state == BPF_WAITING) {
 582                 d->bd_state = BPF_TIMED_OUT;
 583                 if (d->bd_slen != 0)
 584                         bpf_wakeup(d);
 585         }
 586         BPFD_UNLOCK(d);
 587 }
 588
 589 static int
 590 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
 591 {
 592         struct bpf_d *d = dev->si_drv1;
 593         struct ifnet *ifp;
 594         struct mbuf *m;
 595         int error;
 596         struct sockaddr dst;
 597
 598         d->bd_pid = curthread->td_proc->p_pid;
 599         if (d->bd_bif == NULL)
 600                 return (ENXIO);
 601
 602         ifp = d->bd_bif->bif_ifp;
 603
 604         if ((ifp->if_flags & IFF_UP) == 0)
 605                 return (ENETDOWN);
 606
 607         if (uio->uio_resid == 0)
 608                 return (0);
 609
 610         bzero(&dst, sizeof(dst));
 611         error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
 612             &m, &dst, d->bd_wfilter);
 613         if (error)
 614                 return (error);
 615
 616         if (d->bd_hdrcmplt)
 617                 dst.sa_family = pseudo_AF_HDRCMPLT;
 618
 619 #ifdef MAC
 620         BPFD_LOCK(d);
 621         mac_create_mbuf_from_bpfdesc(d, m);
 622         BPFD_UNLOCK(d);
 623 #endif
 624         NET_LOCK_GIANT();
 625         error = (*ifp->if_output)(ifp, m, &dst, NULL);
 626         NET_UNLOCK_GIANT();
 627         /*
 628          * The driver frees the mbuf.
 629          */
 630         return (error);
 631 }
 632
 633 /*
 634  * Reset a descriptor by flushing its packet buffer and clearing the
 635  * receive and drop counts.
 636  */
 637 static void
 638 reset_d(struct bpf_d *d)
 639 {
 640
 641         mtx_assert(&d->bd_mtx, MA_OWNED);
 642         if (d->bd_hbuf) {
 643                 /* Free the hold buffer. */
 644                 d->bd_fbuf = d->bd_hbuf;
 645                 d->bd_hbuf = NULL;
 646         }
 647         d->bd_slen = 0;
 648         d->bd_hlen = 0;
 649         d->bd_rcount = 0;
 650         d->bd_dcount = 0;
 651         d->bd_fcount = 0;
 652 }
 653
 654 /*
 655  *  FIONREAD            Check for read packet available.
 656  *  SIOCGIFADDR         Get interface address - convenient hook to driver.
 657  *  BIOCGBLEN           Get buffer len [for read()].
 658  *  BIOCSETF            Set ethernet read filter.
 659  *  BIOCSETWF           Set ethernet write filter.
 660  *  BIOCFLUSH           Flush read packet buffer.
 661  *  BIOCPROMISC         Put interface into promiscuous mode.
 662  *  BIOCGDLT            Get link layer type.
 663  *  BIOCGETIF           Get interface name.
 664  *  BIOCSETIF           Set interface.
 665  *  BIOCSRTIMEOUT       Set read timeout.
 666  *  BIOCGRTIMEOUT       Get read timeout.
 667  *  BIOCGSTATS          Get packet stats.
 668  *  BIOCIMMEDIATE       Set immediate mode.
 669  *  BIOCVERSION         Get filter language version.
 670  *  BIOCGHDRCMPLT       Get "header already complete" flag
 671  *  BIOCSHDRCMPLT       Set "header already complete" flag
 672  *  BIOCGSEESENT        Get "see packets sent" flag
 673  *  BIOCSSEESENT        Set "see packets sent" flag
 674  *  BIOCLOCK            Set "locked" flag
 675  */
 676 /* ARGSUSED */
 677 static  int
 678 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 679     struct thread *td)
 680 {
 681         struct bpf_d *d = dev->si_drv1;
 682         int error = 0;
 683
 684         /*
 685          * Refresh PID associated with this descriptor.
 686          */
 687         d->bd_pid = td->td_proc->p_pid;
 688         BPFD_LOCK(d);
 689         if (d->bd_state == BPF_WAITING)
 690                 callout_stop(&d->bd_callout);
 691         d->bd_state = BPF_IDLE;
 692         BPFD_UNLOCK(d);
 693
 694         if (d->bd_locked == 1) {
 695                 switch (cmd) {
 696                 case BIOCGBLEN:
 697                 case BIOCFLUSH:
 698                 case BIOCGDLT:
 699                 case BIOCGDLTLIST:
 700                 case BIOCGETIF:
 701                 case BIOCGRTIMEOUT:
 702                 case BIOCGSTATS:
 703                 case BIOCVERSION:
 704                 case BIOCGRSIG:
 705                 case BIOCGHDRCMPLT:
 706                 case FIONREAD:
 707                 case BIOCLOCK:
 708                 case BIOCSRTIMEOUT:
 709                 case BIOCIMMEDIATE:
 710                 case TIOCGPGRP:
 711                         break;
 712                 default:
 713                         return (EPERM);
 714                 }
 715         }
 716         switch (cmd) {
 717
 718         default:
 719                 error = EINVAL;
 720                 break;
 721
 722         /*
 723          * Check for read packet available.
 724          */
 725         case FIONREAD:
 726                 {
 727                         int n;
 728
 729                         BPFD_LOCK(d);
 730                         n = d->bd_slen;
 731                         if (d->bd_hbuf)
 732                                 n += d->bd_hlen;
 733                         BPFD_UNLOCK(d);
 734
 735                         *(int *)addr = n;
 736                         break;
 737                 }
 738
 739         case SIOCGIFADDR:
 740                 {
 741                         struct ifnet *ifp;
 742
 743                         if (d->bd_bif == NULL)
 744                                 error = EINVAL;
 745                         else {
 746                                 ifp = d->bd_bif->bif_ifp;
 747                                 error = (*ifp->if_ioctl)(ifp, cmd, addr);
 748                         }
 749                         break;
 750                 }
 751
 752         /*
 753          * Get buffer len [for read()].
 754          */
 755         case BIOCGBLEN:
 756                 *(u_int *)addr = d->bd_bufsize;
 757                 break;
 758
 759         /*
 760          * Set buffer length.
 761          */
 762         case BIOCSBLEN:
 763                 if (d->bd_bif != NULL)
 764                         error = EINVAL;
 765                 else {
 766                         u_int size = *(u_int *)addr;
 767
 768                         if (size > bpf_maxbufsize)
 769                                 *(u_int *)addr = size = bpf_maxbufsize;
 770                         else if (size < BPF_MINBUFSIZE)
 771                                 *(u_int *)addr = size = BPF_MINBUFSIZE;
 772                         d->bd_bufsize = size;
 773                 }
 774                 break;
 775
 776         /*
 777          * Set link layer read filter.
 778          */
 779         case BIOCSETF:
 780         case BIOCSETWF:
 781                 error = bpf_setf(d, (struct bpf_program *)addr, cmd);
 782                 break;
 783
 784         /*
 785          * Flush read packet buffer.
 786          */
 787         case BIOCFLUSH:
 788                 BPFD_LOCK(d);
 789                 reset_d(d);
 790                 BPFD_UNLOCK(d);
 791                 break;
 792
 793         /*
 794          * Put interface into promiscuous mode.
 795          */
 796         case BIOCPROMISC:
 797                 if (d->bd_bif == NULL) {
 798                         /*
 799                          * No interface attached yet.
 800                          */
 801                         error = EINVAL;
 802                         break;
 803                 }
 804                 if (d->bd_promisc == 0) {
 805                         mtx_lock(&Giant);
 806                         error = ifpromisc(d->bd_bif->bif_ifp, 1);
 807                         mtx_unlock(&Giant);
 808                         if (error == 0)
 809                                 d->bd_promisc = 1;
 810                 }
 811                 break;
 812
 813         /*
 814          * Get current data link type.
 815          */
 816         case BIOCGDLT:
 817                 if (d->bd_bif == NULL)
 818                         error = EINVAL;
 819                 else
 820                         *(u_int *)addr = d->bd_bif->bif_dlt;
 821                 break;
 822
 823         /*
 824          * Get a list of supported data link types.
 825          */
 826         case BIOCGDLTLIST:
 827                 if (d->bd_bif == NULL)
 828                         error = EINVAL;
 829                 else
 830                         error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
 831                 break;
 832
 833         /*
 834          * Set data link type.
 835          */
 836         case BIOCSDLT:
 837                 if (d->bd_bif == NULL)
 838                         error = EINVAL;
 839                 else
 840                         error = bpf_setdlt(d, *(u_int *)addr);
 841                 break;
 842
 843         /*
 844          * Get interface name.
 845          */
 846         case BIOCGETIF:
 847                 if (d->bd_bif == NULL)
 848                         error = EINVAL;
 849                 else {
 850                         struct ifnet *const ifp = d->bd_bif->bif_ifp;
 851                         struct ifreq *const ifr = (struct ifreq *)addr;
 852
 853                         strlcpy(ifr->ifr_name, ifp->if_xname,
 854                             sizeof(ifr->ifr_name));
 855                 }
 856                 break;
 857
 858         /*
 859          * Set interface.
 860          */
 861         case BIOCSETIF:
 862                 error = bpf_setif(d, (struct ifreq *)addr);
 863                 break;
 864
 865         /*
 866          * Set read timeout.
 867          */
 868         case BIOCSRTIMEOUT:
 869                 {
 870                         struct timeval *tv = (struct timeval *)addr;
 871
 872                         /*
 873                          * Subtract 1 tick from tvtohz() since this isn't
 874                          * a one-shot timer.
 875                          */
 876                         if ((error = itimerfix(tv)) == 0)
 877                                 d->bd_rtout = tvtohz(tv) - 1;
 878                         break;
 879                 }
 880
 881         /*
 882          * Get read timeout.
 883          */
 884         case BIOCGRTIMEOUT:
 885                 {
 886                         struct timeval *tv = (struct timeval *)addr;
 887
 888                         tv->tv_sec = d->bd_rtout / hz;
 889                         tv->tv_usec = (d->bd_rtout % hz) * tick;
 890                         break;
 891                 }
 892
 893         /*
 894          * Get packet stats.
 895          */
 896         case BIOCGSTATS:
 897                 {
 898                         struct bpf_stat *bs = (struct bpf_stat *)addr;
 899
 900                         bs->bs_recv = d->bd_rcount;
 901                         bs->bs_drop = d->bd_dcount;
 902                         break;
 903                 }
 904
 905         /*
 906          * Set immediate mode.
 907          */
 908         case BIOCIMMEDIATE:
 909                 d->bd_immediate = *(u_int *)addr;
 910                 break;
 911
 912         case BIOCVERSION:
 913                 {
 914                         struct bpf_version *bv = (struct bpf_version *)addr;
 915
 916                         bv->bv_major = BPF_MAJOR_VERSION;
 917                         bv->bv_minor = BPF_MINOR_VERSION;
 918                         break;
 919                 }
 920
 921         /*
 922          * Get "header already complete" flag
 923          */
 924         case BIOCGHDRCMPLT:
 925                 *(u_int *)addr = d->bd_hdrcmplt;
 926                 break;
 927
 928         case BIOCLOCK:
 929                 d->bd_locked = 1;
 930                 break;
 931         /*
 932          * Set "header already complete" flag
 933          */
 934         case BIOCSHDRCMPLT:
 935                 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
 936                 break;
 937
 938         /*
 939          * Get "see sent packets" flag
 940          */
 941         case BIOCGSEESENT:
 942                 *(u_int *)addr = d->bd_seesent;
 943                 break;
 944
 945         /*
 946          * Set "see sent packets" flag
 947          */
 948         case BIOCSSEESENT:
 949                 d->bd_seesent = *(u_int *)addr;
 950                 break;
 951
 952         case FIONBIO:           /* Non-blocking I/O */
 953                 break;
 954
 955         case FIOASYNC:          /* Send signal on receive packets */
 956                 d->bd_async = *(int *)addr;
 957                 break;
 958
 959         case FIOSETOWN:
 960                 error = fsetown(*(int *)addr, &d->bd_sigio);
 961                 break;
 962
 963         case FIOGETOWN:
 964                 *(int *)addr = fgetown(&d->bd_sigio);
 965                 break;
 966
 967         /* This is deprecated, FIOSETOWN should be used instead. */
 968         case TIOCSPGRP:
 969                 error = fsetown(-(*(int *)addr), &d->bd_sigio);
 970                 break;
 971
 972         /* This is deprecated, FIOGETOWN should be used instead. */
 973         case TIOCGPGRP:
 974                 *(int *)addr = -fgetown(&d->bd_sigio);
 975                 break;
 976
 977         case BIOCSRSIG:         /* Set receive signal */
 978                 {
 979                         u_int sig;
 980
 981                         sig = *(u_int *)addr;
 982
 983                         if (sig >= NSIG)
 984                                 error = EINVAL;
 985                         else
 986                                 d->bd_sig = sig;
 987                         break;
 988                 }
 989         case BIOCGRSIG:
 990                 *(u_int *)addr = d->bd_sig;
 991                 break;
 992         }
 993         return (error);
 994 }
 995
 996 /*
 997  * Set d's packet filter program to fp.  If this file already has a filter,
 998  * free it and replace it.  Returns EINVAL for bogus requests.
 999  */
1000 static int
1001 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1002 {
1003         struct bpf_insn *fcode, *old;
1004         u_int wfilter, flen, size;
1005
1006         if (cmd == BIOCSETWF) {
1007                 old = d->bd_wfilter;
1008                 wfilter = 1;
1009         } else {
1010                 wfilter = 0;
1011                 old = d->bd_rfilter;
1012         }
1013         if (fp->bf_insns == NULL) {
1014                 if (fp->bf_len != 0)
1015                         return (EINVAL);
1016                 BPFD_LOCK(d);
1017                 if (wfilter)
1018                         d->bd_wfilter = NULL;
1019                 else
1020                         d->bd_rfilter = NULL;
1021                 reset_d(d);
1022                 BPFD_UNLOCK(d);
1023                 if (old != NULL)
1024                         free((caddr_t)old, M_BPF);
1025                 return (0);
1026         }
1027         flen = fp->bf_len;
1028         if (flen > bpf_maxinsns)
1029                 return (EINVAL);
1030
1031         size = flen * sizeof(*fp->bf_insns);
1032         fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1033         if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1034             bpf_validate(fcode, (int)flen)) {
1035                 BPFD_LOCK(d);
1036                 if (wfilter)
1037                         d->bd_wfilter = fcode;
1038                 else
1039                         d->bd_rfilter = fcode;
1040                 reset_d(d);
1041                 BPFD_UNLOCK(d);
1042                 if (old != NULL)
1043                         free((caddr_t)old, M_BPF);
1044
1045                 return (0);
1046         }
1047         free((caddr_t)fcode, M_BPF);
1048         return (EINVAL);
1049 }
1050
1051 /*
1052  * Detach a file from its current interface (if attached at all) and attach
1053  * to the interface indicated by the name stored in ifr.
1054  * Return an errno or 0.
1055  */
1056 static int
1057 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1058 {
1059         struct bpf_if *bp;
1060         int error;
1061         struct ifnet *theywant;
1062
1063         theywant = ifunit(ifr->ifr_name);
1064         if (theywant == NULL || theywant->if_bpf == NULL)
1065                 return (ENXIO);
1066
1067         bp = theywant->if_bpf;
1068         /*
1069          * Allocate the packet buffers if we need to.
1070          * If we're already attached to requested interface,
1071          * just flush the buffer.
1072          */
1073         if (d->bd_sbuf == NULL) {
1074                 error = bpf_allocbufs(d);
1075                 if (error != 0)
1076                         return (error);
1077         }
1078         if (bp != d->bd_bif) {
1079                 if (d->bd_bif)
1080                         /*
1081                          * Detach if attached to something else.
1082                          */
1083                         bpf_detachd(d);
1084
1085                 bpf_attachd(d, bp);
1086         }
1087         BPFD_LOCK(d);
1088         reset_d(d);
1089         BPFD_UNLOCK(d);
1090         return (0);
1091 }
1092
1093 /*
1094  * Support for select() and poll() system calls
1095  *
1096  * Return true iff the specific operation will not block indefinitely.
1097  * Otherwise, return false but make a note that a selwakeup() must be done.
1098  */
1099 static int
1100 bpfpoll(struct cdev *dev, int events, struct thread *td)
1101 {
1102         struct bpf_d *d;
1103         int revents;
1104
1105         d = dev->si_drv1;
1106         if (d->bd_bif == NULL)
1107                 return (ENXIO);
1108
1109         /*
1110          * Refresh PID associated with this descriptor.
1111          */
1112         d->bd_pid = td->td_proc->p_pid;
1113         revents = events & (POLLOUT | POLLWRNORM);
1114         BPFD_LOCK(d);
1115         if (events & (POLLIN | POLLRDNORM)) {
1116                 if (bpf_ready(d))
1117                         revents |= events & (POLLIN | POLLRDNORM);
1118                 else {
1119                         selrecord(td, &d->bd_sel);
1120                         /* Start the read timeout if necessary. */
1121                         if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1122                                 callout_reset(&d->bd_callout, d->bd_rtout,
1123                                     bpf_timed_out, d);
1124                                 d->bd_state = BPF_WAITING;
1125                         }
1126                 }
1127         }
1128         BPFD_UNLOCK(d);
1129         return (revents);
1130 }
1131
1132 /*
1133  * Support for kevent() system call.  Register EVFILT_READ filters and
1134  * reject all others.
1135  */
1136 int
1137 bpfkqfilter(struct cdev *dev, struct knote *kn)
1138 {
1139         struct bpf_d *d = (struct bpf_d *)dev->si_drv1;
1140
1141         if (kn->kn_filter != EVFILT_READ)
1142                 return (1);
1143
1144         /*
1145          * Refresh PID associated with this descriptor.
1146          */
1147         d->bd_pid = curthread->td_proc->p_pid;
1148         kn->kn_fop = &bpfread_filtops;
1149         kn->kn_hook = d;
1150         knlist_add(&d->bd_sel.si_note, kn, 0);
1151
1152         return (0);
1153 }
1154
1155 static void
1156 filt_bpfdetach(struct knote *kn)
1157 {
1158         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1159
1160         knlist_remove(&d->bd_sel.si_note, kn, 0);
1161 }
1162
1163 static int
1164 filt_bpfread(struct knote *kn, long hint)
1165 {
1166         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1167         int ready;
1168
1169         BPFD_LOCK_ASSERT(d);
1170         ready = bpf_ready(d);
1171         if (ready) {
1172                 kn->kn_data = d->bd_slen;
1173                 if (d->bd_hbuf)
1174                         kn->kn_data += d->bd_hlen;
1175         }
1176         else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1177                 callout_reset(&d->bd_callout, d->bd_rtout,
1178                     bpf_timed_out, d);
1179                 d->bd_state = BPF_WAITING;
1180         }
1181
1182         return (ready);
1183 }
1184
1185 /*
1186  * Incoming linkage from device drivers.  Process the packet pkt, of length
1187  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1188  * by each process' filter, and if accepted, stashed into the corresponding
1189  * buffer.
1190  */
1191 void
1192 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1193 {
1194         struct bpf_d *d;
1195         u_int slen;
1196         int gottime;
1197         struct timeval tv;
1198
1199         gottime = 0;
1200         BPFIF_LOCK(bp);
1201         LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1202                 BPFD_LOCK(d);
1203                 ++d->bd_rcount;
1204                 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1205                 if (slen != 0) {
1206                         d->bd_fcount++;
1207                         if (!gottime) {
1208                                 microtime(&tv);
1209                                 gottime = 1;
1210                         }
1211 #ifdef MAC
1212                         if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1213 #endif
1214                                 catchpacket(d, pkt, pktlen, slen, bcopy, &tv);
1215                 }
1216                 BPFD_UNLOCK(d);
1217         }
1218         BPFIF_UNLOCK(bp);
1219 }
1220
1221 /*
1222  * Copy data from an mbuf chain into a buffer.  This code is derived
1223  * from m_copydata in sys/uipc_mbuf.c.
1224  */
1225 static void
1226 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1227 {
1228         const struct mbuf *m;
1229         u_int count;
1230         u_char *dst;
1231
1232         m = src_arg;
1233         dst = dst_arg;
1234         while (len > 0) {
1235                 if (m == NULL)
1236                         panic("bpf_mcopy");
1237                 count = min(m->m_len, len);
1238                 bcopy(mtod(m, void *), dst, count);
1239                 m = m->m_next;
1240                 dst += count;
1241                 len -= count;
1242         }
1243 }
1244
1245 /*
1246  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1247  */
1248 void
1249 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1250 {
1251         struct bpf_d *d;
1252         u_int pktlen, slen;
1253         int gottime;
1254         struct timeval tv;
1255
1256         gottime = 0;
1257
1258         pktlen = m_length(m, NULL);
1259
1260         BPFIF_LOCK(bp);
1261         LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1262                 if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1263                         continue;
1264                 BPFD_LOCK(d);
1265                 ++d->bd_rcount;
1266                 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1267                 if (slen != 0) {
1268                         d->bd_fcount++;
1269                         if (!gottime) {
1270                                 microtime(&tv);
1271                                 gottime = 1;
1272                         }
1273 #ifdef MAC
1274                         if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1275 #endif
1276                                 catchpacket(d, (u_char *)m, pktlen, slen,
1277                                     bpf_mcopy, &tv);
1278                 }
1279                 BPFD_UNLOCK(d);
1280         }
1281         BPFIF_UNLOCK(bp);
1282 }
1283
1284 /*
1285  * Incoming linkage from device drivers, when packet is in
1286  * an mbuf chain and to be prepended by a contiguous header.
1287  */
1288 void
1289 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1290 {
1291         struct mbuf mb;
1292         struct bpf_d *d;
1293         u_int pktlen, slen;
1294         int gottime;
1295         struct timeval tv;
1296
1297         gottime = 0;
1298
1299         pktlen = m_length(m, NULL);
1300         /*
1301          * Craft on-stack mbuf suitable for passing to bpf_filter.
1302          * Note that we cut corners here; we only setup what's
1303          * absolutely needed--this mbuf should never go anywhere else.
1304          */
1305         mb.m_next = m;
1306         mb.m_data = data;
1307         mb.m_len = dlen;
1308         pktlen += dlen;
1309
1310         BPFIF_LOCK(bp);
1311         LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1312                 if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1313                         continue;
1314                 BPFD_LOCK(d);
1315                 ++d->bd_rcount;
1316                 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1317                 if (slen != 0) {
1318                         d->bd_fcount++;
1319                         if (!gottime) {
1320                                 microtime(&tv);
1321                                 gottime = 1;
1322                         }
1323 #ifdef MAC
1324                         if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1325 #endif
1326                                 catchpacket(d, (u_char *)&mb, pktlen, slen,
1327                                     bpf_mcopy, &tv);
1328                 }
1329                 BPFD_UNLOCK(d);
1330         }
1331         BPFIF_UNLOCK(bp);
1332 }
1333
1334 /*
1335  * Move the packet data from interface memory (pkt) into the
1336  * store buffer.  "cpfn" is the routine called to do the actual data
1337  * transfer.  bcopy is passed in to copy contiguous chunks, while
1338  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1339  * pkt is really an mbuf.
1340  */
1341 static void
1342 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1343     void (*cpfn)(const void *, void *, size_t), struct timeval *tv)
1344 {
1345         struct bpf_hdr *hp;
1346         int totlen, curlen;
1347         int hdrlen = d->bd_bif->bif_hdrlen;
1348         int do_wakeup = 0;
1349
1350         BPFD_LOCK_ASSERT(d);
1351         /*
1352          * Figure out how many bytes to move.  If the packet is
1353          * greater or equal to the snapshot length, transfer that
1354          * much.  Otherwise, transfer the whole packet (unless
1355          * we hit the buffer size limit).
1356          */
1357         totlen = hdrlen + min(snaplen, pktlen);
1358         if (totlen > d->bd_bufsize)
1359                 totlen = d->bd_bufsize;
1360
1361         /*
1362          * Round up the end of the previous packet to the next longword.
1363          */
1364         curlen = BPF_WORDALIGN(d->bd_slen);
1365         if (curlen + totlen > d->bd_bufsize) {
1366                 /*
1367                  * This packet will overflow the storage buffer.
1368                  * Rotate the buffers if we can, then wakeup any
1369                  * pending reads.
1370                  */
1371                 if (d->bd_fbuf == NULL) {
1372                         /*
1373                          * We haven't completed the previous read yet,
1374                          * so drop the packet.
1375                          */
1376                         ++d->bd_dcount;
1377                         return;
1378                 }
1379                 ROTATE_BUFFERS(d);
1380                 do_wakeup = 1;
1381                 curlen = 0;
1382         }
1383         else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1384                 /*
1385                  * Immediate mode is set, or the read timeout has
1386                  * already expired during a select call.  A packet
1387                  * arrived, so the reader should be woken up.
1388                  */
1389                 do_wakeup = 1;
1390
1391         /*
1392          * Append the bpf header.
1393          */
1394         hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1395         hp->bh_tstamp = *tv;
1396         hp->bh_datalen = pktlen;
1397         hp->bh_hdrlen = hdrlen;
1398         /*
1399          * Copy the packet data into the store buffer and update its length.
1400          */
1401         (*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1402         d->bd_slen = curlen + totlen;
1403
1404         if (do_wakeup)
1405                 bpf_wakeup(d);
1406 }
1407
1408 /*
1409  * Initialize all nonzero fields of a descriptor.
1410  */
1411 static int
1412 bpf_allocbufs(struct bpf_d *d)
1413 {
1414         d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1415         if (d->bd_fbuf == NULL)
1416                 return (ENOBUFS);
1417
1418         d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1419         if (d->bd_sbuf == NULL) {
1420                 free(d->bd_fbuf, M_BPF);
1421                 return (ENOBUFS);
1422         }
1423         d->bd_slen = 0;
1424         d->bd_hlen = 0;
1425         return (0);
1426 }
1427
1428 /*
1429  * Free buffers currently in use by a descriptor.
1430  * Called on close.
1431  */
1432 static void
1433 bpf_freed(struct bpf_d *d)
1434 {
1435         /*
1436          * We don't need to lock out interrupts since this descriptor has
1437          * been detached from its interface and it yet hasn't been marked
1438          * free.
1439          */
1440         if (d->bd_sbuf != NULL) {
1441                 free(d->bd_sbuf, M_BPF);
1442                 if (d->bd_hbuf != NULL)
1443                         free(d->bd_hbuf, M_BPF);
1444                 if (d->bd_fbuf != NULL)
1445                         free(d->bd_fbuf, M_BPF);
1446         }
1447         if (d->bd_rfilter)
1448                 free((caddr_t)d->bd_rfilter, M_BPF);
1449         if (d->bd_wfilter)
1450                 free((caddr_t)d->bd_wfilter, M_BPF);
1451         mtx_destroy(&d->bd_mtx);
1452 }
1453
1454 /*
1455  * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1456  * fixed size of the link header (variable length headers not yet supported).
1457  */
1458 void
1459 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1460 {
1461
1462         bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1463 }
1464
1465 /*
1466  * Attach an interface to bpf.  ifp is a pointer to the structure
1467  * defining the interface to be attached, dlt is the link layer type,
1468  * and hdrlen is the fixed size of the link header (variable length
1469  * headers are not yet supporrted).
1470  */
1471 void
1472 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1473 {
1474         struct bpf_if *bp;
1475
1476         bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1477         if (bp == NULL)
1478                 panic("bpfattach");
1479
1480         LIST_INIT(&bp->bif_dlist);
1481         bp->bif_ifp = ifp;
1482         bp->bif_dlt = dlt;
1483         mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1484         KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
1485         *driverp = bp;
1486
1487         mtx_lock(&bpf_mtx);
1488         LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1489         mtx_unlock(&bpf_mtx);
1490
1491         /*
1492          * Compute the length of the bpf header.  This is not necessarily
1493          * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1494          * that the network layer header begins on a longword boundary (for
1495          * performance reasons and to alleviate alignment restrictions).
1496          */
1497         bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1498
1499         if (bootverbose)
1500                 if_printf(ifp, "bpf attached\n");
1501 }
1502
1503 /*
1504  * Detach bpf from an interface.  This involves detaching each descriptor
1505  * associated with the interface, and leaving bd_bif NULL.  Notify each
1506  * descriptor as it's detached so that any sleepers wake up and get
1507  * ENXIO.
1508  */
1509 void
1510 bpfdetach(struct ifnet *ifp)
1511 {
1512         struct bpf_if   *bp;
1513         struct bpf_d    *d;
1514
1515         /* Locate BPF interface information */
1516         mtx_lock(&bpf_mtx);
1517         LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1518                 if (ifp == bp->bif_ifp)
1519                         break;
1520         }
1521
1522         /* Interface wasn't attached */
1523         if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1524                 mtx_unlock(&bpf_mtx);
1525                 printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1526                 return;
1527         }
1528
1529         LIST_REMOVE(bp, bif_next);
1530         mtx_unlock(&bpf_mtx);
1531
1532         while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1533                 bpf_detachd(d);
1534                 BPFD_LOCK(d);
1535                 bpf_wakeup(d);
1536                 BPFD_UNLOCK(d);
1537         }
1538
1539         mtx_destroy(&bp->bif_mtx);
1540         free(bp, M_BPF);
1541 }
1542
1543 /*
1544  * Get a list of available data link type of the interface.
1545  */
1546 static int
1547 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1548 {
1549         int n, error;
1550         struct ifnet *ifp;
1551         struct bpf_if *bp;
1552
1553         ifp = d->bd_bif->bif_ifp;
1554         n = 0;
1555         error = 0;
1556         mtx_lock(&bpf_mtx);
1557         LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1558                 if (bp->bif_ifp != ifp)
1559                         continue;
1560                 if (bfl->bfl_list != NULL) {
1561                         if (n >= bfl->bfl_len) {
1562                                 mtx_unlock(&bpf_mtx);
1563                                 return (ENOMEM);
1564                         }
1565                         error = copyout(&bp->bif_dlt,
1566                             bfl->bfl_list + n, sizeof(u_int));
1567                 }
1568                 n++;
1569         }
1570         mtx_unlock(&bpf_mtx);
1571         bfl->bfl_len = n;
1572         return (error);
1573 }
1574
1575 /*
1576  * Set the data link type of a BPF instance.
1577  */
1578 static int
1579 bpf_setdlt(struct bpf_d *d, u_int dlt)
1580 {
1581         int error, opromisc;
1582         struct ifnet *ifp;
1583         struct bpf_if *bp;
1584
1585         if (d->bd_bif->bif_dlt == dlt)
1586                 return (0);
1587         ifp = d->bd_bif->bif_ifp;
1588         mtx_lock(&bpf_mtx);
1589         LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1590                 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1591                         break;
1592         }
1593         mtx_unlock(&bpf_mtx);
1594         if (bp != NULL) {
1595                 opromisc = d->bd_promisc;
1596                 bpf_detachd(d);
1597                 bpf_attachd(d, bp);
1598                 BPFD_LOCK(d);
1599                 reset_d(d);
1600                 BPFD_UNLOCK(d);
1601                 if (opromisc) {
1602                         error = ifpromisc(bp->bif_ifp, 1);
1603                         if (error)
1604                                 if_printf(bp->bif_ifp,
1605                                         "bpf_setdlt: ifpromisc failed (%d)\n",
1606                                         error);
1607                         else
1608                                 d->bd_promisc = 1;
1609                 }
1610         }
1611         return (bp == NULL ? EINVAL : 0);
1612 }
1613
1614 static void
1615 bpf_clone(void *arg, struct ucred *cred, char *name, int namelen,
1616     struct cdev **dev)
1617 {
1618         int u;
1619
1620         if (*dev != NULL)
1621                 return;
1622         if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1623                 return;
1624         *dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1625             "bpf%d", u);
1626         dev_ref(*dev);
1627         (*dev)->si_flags |= SI_CHEAPCLONE;
1628         return;
1629 }
1630
1631 static void
1632 bpf_drvinit(void *unused)
1633 {
1634
1635         mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1636         LIST_INIT(&bpf_iflist);
1637         EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1638 }
1639
1640 static void
1641 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
1642 {
1643
1644         bzero(d, sizeof(*d));
1645         BPFD_LOCK_ASSERT(bd);
1646         d->bd_immediate = bd->bd_immediate;
1647         d->bd_promisc = bd->bd_promisc;
1648         d->bd_hdrcmplt = bd->bd_hdrcmplt;
1649         d->bd_seesent = bd->bd_seesent;
1650         d->bd_async = bd->bd_async;
1651         d->bd_rcount = bd->bd_rcount;
1652         d->bd_dcount = bd->bd_dcount;
1653         d->bd_fcount = bd->bd_fcount;
1654         d->bd_sig = bd->bd_sig;
1655         d->bd_slen = bd->bd_slen;
1656         d->bd_hlen = bd->bd_hlen;
1657         d->bd_bufsize = bd->bd_bufsize;
1658         d->bd_pid = bd->bd_pid;
1659         strlcpy(d->bd_ifname,
1660             bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
1661         strlcpy(d->bd_pcomm, bd->bd_pcomm, MAXCOMLEN);
1662         d->bd_locked = bd->bd_locked;
1663 }
1664
1665 static int
1666 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
1667 {
1668         struct xbpf_d *xbdbuf, *xbd;
1669         int index, error;
1670         struct bpf_if *bp;
1671         struct bpf_d *bd;
1672
1673         /*
1674          * XXX This is not technically correct. It is possible for non
1675          * privileged users to open bpf devices. It would make sense
1676          * if the users who opened the devices were able to retrieve
1677          * the statistics for them, too.
1678          */
1679         error = suser(req->td);
1680         if (error)
1681                 return (error);
1682         if (req->oldptr == NULL)
1683                 return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
1684         if (bpf_bpfd_cnt == 0)
1685                 return (SYSCTL_OUT(req, 0, 0));
1686         xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
1687         mtx_lock(&bpf_mtx);
1688         if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
1689                 mtx_unlock(&bpf_mtx);
1690                 free(xbdbuf, M_BPF);
1691                 return (ENOMEM);
1692         }
1693         index = 0;
1694         LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1695                 BPFIF_LOCK(bp);
1696                 LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
1697                         xbd = &xbdbuf[index++];
1698                         BPFD_LOCK(bd);
1699                         bpfstats_fill_xbpf(xbd, bd);
1700                         BPFD_UNLOCK(bd);
1701                 }
1702                 BPFIF_UNLOCK(bp);
1703         }
1704         mtx_unlock(&bpf_mtx);
1705         error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
1706         free(xbdbuf, M_BPF);
1707         return (error);
1708 }
1709
1710 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL)
1711
1712 #else /* !DEV_BPF && !NETGRAPH_BPF */
1713 /*
1714  * NOP stubs to allow bpf-using drivers to load and function.
1715  *
1716  * A 'better' implementation would allow the core bpf functionality
1717  * to be loaded at runtime.
1718  */
1719 static struct bpf_if bp_null;
1720
1721 void
1722 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1723 {
1724 }
1725
1726 void
1727 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1728 {
1729 }
1730
1731 void
1732 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
1733 {
1734 }
1735
1736 void
1737 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1738 {
1739
1740         bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1741 }
1742
1743 void
1744 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1745 {
1746
1747         *driverp = &bp_null;
1748 }
1749
1750 void
1751 bpfdetach(struct ifnet *ifp)
1752 {
1753 }
1754
1755 u_int
1756 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
1757 {
1758         return -1;      /* "no filter" behaviour */
1759 }
1760
1761 int
1762 bpf_validate(const struct bpf_insn *f, int len)
1763 {
1764         return 0;               /* false */
1765 }
1766
1767 #endif /* !DEV_BPF && !NETGRAPH_BPF */
1768
1769 /*
1770  * ABI compatibility hacks.  Older drivers check if_bpf against NULL
1771  * to see if there are active listeners.  In the new ABI, if_bpf is
1772  * always non-NULL, so bpf_*tap() are always invoked.  We check for
1773  * listeners in these wrappers and call the real functions if needed.
1774  */
1775 #undef bpf_tap
1776 #undef bpf_mtap
1777 #undef bpf_mtap2
1778
1779 void     bpf_tap(struct bpf_if *, u_char *, u_int);
1780 void     bpf_mtap(struct bpf_if *, struct mbuf *);
1781 void     bpf_mtap2(struct bpf_if *, void *, u_int, struct mbuf *);
1782
1783 void
1784 bpf_tap(bp, pkt, pktlen)
1785         struct bpf_if *bp;
1786         u_char *pkt;
1787         u_int pktlen;
1788 {
1789
1790         if (bpf_peers_present(bp))
1791                 bpf_tap_new(bp, pkt, pktlen);
1792 }
1793
1794 void
1795 bpf_mtap(bp, m)
1796         struct bpf_if *bp;
1797         struct mbuf *m;
1798 {
1799
1800         if (bpf_peers_present(bp))
1801                 bpf_mtap_new(bp, m);
1802 }
1803
1804 void
1805 bpf_mtap2(bp, d, l, m)
1806         struct bpf_if *bp;
1807         void *d;
1808         u_int l;
1809         struct mbuf *m;
1810 {
1811
1812         if (bpf_peers_present(bp))
1813                 bpf_mtap2_new(bp, d, l, m);
1814 }