sys/net/bpf.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-3-Clause
   3  *
   4  * Copyright (c) 1990, 1991, 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * This code is derived from the Stanford/CMU enet packet filter,
   8  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
   9  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  10  * Berkeley Laboratory.
  11  *
  12  * Redistribution and use in source and binary forms, with or without
  13  * modification, are permitted provided that the following conditions
  14  * are met:
  15  * 1. Redistributions of source code must retain the above copyright
  16  *    notice, this list of conditions and the following disclaimer.
  17  * 2. Redistributions in binary form must reproduce the above copyright
  18  *    notice, this list of conditions and the following disclaimer in the
  19  *    documentation and/or other materials provided with the distribution.
  20  * 3. Neither the name of the University nor the names of its contributors
  21  *    may be used to endorse or promote products derived from this software
  22  *    without specific prior written permission.
  23  *
  24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34  * SUCH DAMAGE.
  35  *
  36  *      @(#)bpf.c       8.4 (Berkeley) 1/9/95
  37  */
  38
  39 #include <sys/cdefs.h>
  40 __FBSDID("$FreeBSD$");
  41
  42 #include "opt_bpf.h"
  43 #include "opt_compat.h"
  44 #include "opt_ddb.h"
  45 #include "opt_netgraph.h"
  46
  47 #include <sys/types.h>
  48 #include <sys/param.h>
  49 #include <sys/lock.h>
  50 #include <sys/rwlock.h>
  51 #include <sys/systm.h>
  52 #include <sys/conf.h>
  53 #include <sys/fcntl.h>
  54 #include <sys/jail.h>
  55 #include <sys/malloc.h>
  56 #include <sys/mbuf.h>
  57 #include <sys/time.h>
  58 #include <sys/priv.h>
  59 #include <sys/proc.h>
  60 #include <sys/signalvar.h>
  61 #include <sys/filio.h>
  62 #include <sys/sockio.h>
  63 #include <sys/ttycom.h>
  64 #include <sys/uio.h>
  65
  66 #include <sys/event.h>
  67 #include <sys/file.h>
  68 #include <sys/poll.h>
  69 #include <sys/proc.h>
  70
  71 #include <sys/socket.h>
  72
  73 #ifdef DDB
  74 #include <ddb/ddb.h>
  75 #endif
  76
  77 #include <net/if.h>
  78 #include <net/if_var.h>
  79 #include <net/if_dl.h>
  80 #include <net/bpf.h>
  81 #include <net/bpf_buffer.h>
  82 #ifdef BPF_JITTER
  83 #include <net/bpf_jitter.h>
  84 #endif
  85 #include <net/bpf_zerocopy.h>
  86 #include <net/bpfdesc.h>
  87 #include <net/route.h>
  88 #include <net/vnet.h>
  89
  90 #include <netinet/in.h>
  91 #include <netinet/if_ether.h>
  92 #include <sys/kernel.h>
  93 #include <sys/sysctl.h>
  94
  95 #include <net80211/ieee80211_freebsd.h>
  96
  97 #include <security/mac/mac_framework.h>
  98
  99 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
 100
 101 struct bpf_if {
 102 #define bif_next        bif_ext.bif_next
 103 #define bif_dlist       bif_ext.bif_dlist
 104         struct bpf_if_ext bif_ext;      /* public members */
 105         u_int           bif_dlt;        /* link layer type */
 106         u_int           bif_hdrlen;     /* length of link header */
 107         struct ifnet    *bif_ifp;       /* corresponding interface */
 108         struct rwlock   bif_lock;       /* interface lock */
 109         LIST_HEAD(, bpf_d) bif_wlist;   /* writer-only list */
 110         int             bif_flags;      /* Interface flags */
 111         struct bpf_if   **bif_bpf;      /* Pointer to pointer to us */
 112 };
 113
 114 CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
 115
 116 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
 117
 118 #define PRINET  26                      /* interruptible */
 119
 120 #define SIZEOF_BPF_HDR(type)    \
 121     (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen))
 122
 123 #ifdef COMPAT_FREEBSD32
 124 #include <sys/mount.h>
 125 #include <compat/freebsd32/freebsd32.h>
 126 #define BPF_ALIGNMENT32 sizeof(int32_t)
 127 #define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32)
 128
 129 #ifndef BURN_BRIDGES
 130 /*
 131  * 32-bit version of structure prepended to each packet.  We use this header
 132  * instead of the standard one for 32-bit streams.  We mark the a stream as
 133  * 32-bit the first time we see a 32-bit compat ioctl request.
 134  */
 135 struct bpf_hdr32 {
 136         struct timeval32 bh_tstamp;     /* time stamp */
 137         uint32_t        bh_caplen;      /* length of captured portion */
 138         uint32_t        bh_datalen;     /* original length of packet */
 139         uint16_t        bh_hdrlen;      /* length of bpf header (this struct
 140                                            plus alignment padding) */
 141 };
 142 #endif
 143
 144 struct bpf_program32 {
 145         u_int bf_len;
 146         uint32_t bf_insns;
 147 };
 148
 149 struct bpf_dltlist32 {
 150         u_int   bfl_len;
 151         u_int   bfl_list;
 152 };
 153
 154 #define BIOCSETF32      _IOW('B', 103, struct bpf_program32)
 155 #define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32)
 156 #define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32)
 157 #define BIOCGDLTLIST32  _IOWR('B', 121, struct bpf_dltlist32)
 158 #define BIOCSETWF32     _IOW('B', 123, struct bpf_program32)
 159 #define BIOCSETFNR32    _IOW('B', 130, struct bpf_program32)
 160 #endif
 161
 162 /*
 163  * bpf_iflist is a list of BPF interface structures, each corresponding to a
 164  * specific DLT.  The same network interface might have several BPF interface
 165  * structures registered by different layers in the stack (i.e., 802.11
 166  * frames, ethernet frames, etc).
 167  */
 168 static LIST_HEAD(, bpf_if)      bpf_iflist, bpf_freelist;
 169 static struct mtx       bpf_mtx;                /* bpf global lock */
 170 static int              bpf_bpfd_cnt;
 171
 172 static void     bpf_attachd(struct bpf_d *, struct bpf_if *);
 173 static void     bpf_detachd(struct bpf_d *);
 174 static void     bpf_detachd_locked(struct bpf_d *);
 175 static void     bpf_freed(struct bpf_d *);
 176 static int      bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
 177                     struct sockaddr *, int *, struct bpf_d *);
 178 static int      bpf_setif(struct bpf_d *, struct ifreq *);
 179 static void     bpf_timed_out(void *);
 180 static __inline void
 181                 bpf_wakeup(struct bpf_d *);
 182 static void     catchpacket(struct bpf_d *, u_char *, u_int, u_int,
 183                     void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
 184                     struct bintime *);
 185 static void     reset_d(struct bpf_d *);
 186 static int      bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
 187 static int      bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
 188 static int      bpf_setdlt(struct bpf_d *, u_int);
 189 static void     filt_bpfdetach(struct knote *);
 190 static int      filt_bpfread(struct knote *, long);
 191 static void     bpf_drvinit(void *);
 192 static int      bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
 193
 194 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
 195 int bpf_maxinsns = BPF_MAXINSNS;
 196 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
 197     &bpf_maxinsns, 0, "Maximum bpf program instructions");
 198 static int bpf_zerocopy_enable = 0;
 199 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
 200     &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
 201 static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
 202     bpf_stats_sysctl, "bpf statistics portal");
 203
 204 static VNET_DEFINE(int, bpf_optimize_writers) = 0;
 205 #define V_bpf_optimize_writers VNET(bpf_optimize_writers)
 206 SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RW,
 207     &VNET_NAME(bpf_optimize_writers), 0,
 208     "Do not send packets until BPF program is set");
 209
 210 static  d_open_t        bpfopen;
 211 static  d_read_t        bpfread;
 212 static  d_write_t       bpfwrite;
 213 static  d_ioctl_t       bpfioctl;
 214 static  d_poll_t        bpfpoll;
 215 static  d_kqfilter_t    bpfkqfilter;
 216
 217 static struct cdevsw bpf_cdevsw = {
 218         .d_version =    D_VERSION,
 219         .d_open =       bpfopen,
 220         .d_read =       bpfread,
 221         .d_write =      bpfwrite,
 222         .d_ioctl =      bpfioctl,
 223         .d_poll =       bpfpoll,
 224         .d_name =       "bpf",
 225         .d_kqfilter =   bpfkqfilter,
 226 };
 227
 228 static struct filterops bpfread_filtops = {
 229         .f_isfd = 1,
 230         .f_detach = filt_bpfdetach,
 231         .f_event = filt_bpfread,
 232 };
 233
 234 eventhandler_tag        bpf_ifdetach_cookie = NULL;
 235
 236 /*
 237  * LOCKING MODEL USED BY BPF:
 238  * Locks:
 239  * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal,
 240  * some global counters and every bpf_if reference.
 241  * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters.
 242  * 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields
 243  *   used by bpf_mtap code.
 244  *
 245  * Lock order:
 246  *
 247  * Global lock, interface lock, descriptor lock
 248  *
 249  * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2]
 250  * working model. In many places (like bpf_detachd) we start with BPF descriptor
 251  * (and we need to at least rlock it to get reliable interface pointer). This
 252  * gives us potential LOR. As a result, we use global lock to protect from bpf_if
 253  * change in every such place.
 254  *
 255  * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
 256  * 3) descriptor main wlock.
 257  * Reading bd_bif can be protected by any of these locks, typically global lock.
 258  *
 259  * Changing read/write BPF filter is protected by the same three locks,
 260  * the same applies for reading.
 261  *
 262  * Sleeping in global lock is not allowed due to bpfdetach() using it.
 263  */
 264
 265 /*
 266  * Wrapper functions for various buffering methods.  If the set of buffer
 267  * modes expands, we will probably want to introduce a switch data structure
 268  * similar to protosw, et.
 269  */
 270 static void
 271 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
 272     u_int len)
 273 {
 274
 275         BPFD_LOCK_ASSERT(d);
 276
 277         switch (d->bd_bufmode) {
 278         case BPF_BUFMODE_BUFFER:
 279                 return (bpf_buffer_append_bytes(d, buf, offset, src, len));
 280
 281         case BPF_BUFMODE_ZBUF:
 282                 d->bd_zcopy++;
 283                 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
 284
 285         default:
 286                 panic("bpf_buf_append_bytes");
 287         }
 288 }
 289
 290 static void
 291 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
 292     u_int len)
 293 {
 294
 295         BPFD_LOCK_ASSERT(d);
 296
 297         switch (d->bd_bufmode) {
 298         case BPF_BUFMODE_BUFFER:
 299                 return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
 300
 301         case BPF_BUFMODE_ZBUF:
 302                 d->bd_zcopy++;
 303                 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
 304
 305         default:
 306                 panic("bpf_buf_append_mbuf");
 307         }
 308 }
 309
 310 /*
 311  * This function gets called when the free buffer is re-assigned.
 312  */
 313 static void
 314 bpf_buf_reclaimed(struct bpf_d *d)
 315 {
 316
 317         BPFD_LOCK_ASSERT(d);
 318
 319         switch (d->bd_bufmode) {
 320         case BPF_BUFMODE_BUFFER:
 321                 return;
 322
 323         case BPF_BUFMODE_ZBUF:
 324                 bpf_zerocopy_buf_reclaimed(d);
 325                 return;
 326
 327         default:
 328                 panic("bpf_buf_reclaimed");
 329         }
 330 }
 331
 332 /*
 333  * If the buffer mechanism has a way to decide that a held buffer can be made
 334  * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
 335  * returned if the buffer can be discarded, (0) is returned if it cannot.
 336  */
 337 static int
 338 bpf_canfreebuf(struct bpf_d *d)
 339 {
 340
 341         BPFD_LOCK_ASSERT(d);
 342
 343         switch (d->bd_bufmode) {
 344         case BPF_BUFMODE_ZBUF:
 345                 return (bpf_zerocopy_canfreebuf(d));
 346         }
 347         return (0);
 348 }
 349
 350 /*
 351  * Allow the buffer model to indicate that the current store buffer is
 352  * immutable, regardless of the appearance of space.  Return (1) if the
 353  * buffer is writable, and (0) if not.
 354  */
 355 static int
 356 bpf_canwritebuf(struct bpf_d *d)
 357 {
 358         BPFD_LOCK_ASSERT(d);
 359
 360         switch (d->bd_bufmode) {
 361         case BPF_BUFMODE_ZBUF:
 362                 return (bpf_zerocopy_canwritebuf(d));
 363         }
 364         return (1);
 365 }
 366
 367 /*
 368  * Notify buffer model that an attempt to write to the store buffer has
 369  * resulted in a dropped packet, in which case the buffer may be considered
 370  * full.
 371  */
 372 static void
 373 bpf_buffull(struct bpf_d *d)
 374 {
 375
 376         BPFD_LOCK_ASSERT(d);
 377
 378         switch (d->bd_bufmode) {
 379         case BPF_BUFMODE_ZBUF:
 380                 bpf_zerocopy_buffull(d);
 381                 break;
 382         }
 383 }
 384
 385 /*
 386  * Notify the buffer model that a buffer has moved into the hold position.
 387  */
 388 void
 389 bpf_bufheld(struct bpf_d *d)
 390 {
 391
 392         BPFD_LOCK_ASSERT(d);
 393
 394         switch (d->bd_bufmode) {
 395         case BPF_BUFMODE_ZBUF:
 396                 bpf_zerocopy_bufheld(d);
 397                 break;
 398         }
 399 }
 400
 401 static void
 402 bpf_free(struct bpf_d *d)
 403 {
 404
 405         switch (d->bd_bufmode) {
 406         case BPF_BUFMODE_BUFFER:
 407                 return (bpf_buffer_free(d));
 408
 409         case BPF_BUFMODE_ZBUF:
 410                 return (bpf_zerocopy_free(d));
 411
 412         default:
 413                 panic("bpf_buf_free");
 414         }
 415 }
 416
 417 static int
 418 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
 419 {
 420
 421         if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
 422                 return (EOPNOTSUPP);
 423         return (bpf_buffer_uiomove(d, buf, len, uio));
 424 }
 425
 426 static int
 427 bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
 428 {
 429
 430         if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
 431                 return (EOPNOTSUPP);
 432         return (bpf_buffer_ioctl_sblen(d, i));
 433 }
 434
 435 static int
 436 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
 437 {
 438
 439         if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
 440                 return (EOPNOTSUPP);
 441         return (bpf_zerocopy_ioctl_getzmax(td, d, i));
 442 }
 443
 444 static int
 445 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
 446 {
 447
 448         if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
 449                 return (EOPNOTSUPP);
 450         return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
 451 }
 452
 453 static int
 454 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
 455 {
 456
 457         if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
 458                 return (EOPNOTSUPP);
 459         return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
 460 }
 461
 462 /*
 463  * General BPF functions.
 464  */
 465 static int
 466 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
 467     struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
 468 {
 469         const struct ieee80211_bpf_params *p;
 470         struct ether_header *eh;
 471         struct mbuf *m;
 472         int error;
 473         int len;
 474         int hlen;
 475         int slen;
 476
 477         /*
 478          * Build a sockaddr based on the data link layer type.
 479          * We do this at this level because the ethernet header
 480          * is copied directly into the data field of the sockaddr.
 481          * In the case of SLIP, there is no header and the packet
 482          * is forwarded as is.
 483          * Also, we are careful to leave room at the front of the mbuf
 484          * for the link level header.
 485          */
 486         switch (linktype) {
 487
 488         case DLT_SLIP:
 489                 sockp->sa_family = AF_INET;
 490                 hlen = 0;
 491                 break;
 492
 493         case DLT_EN10MB:
 494                 sockp->sa_family = AF_UNSPEC;
 495                 /* XXX Would MAXLINKHDR be better? */
 496                 hlen = ETHER_HDR_LEN;
 497                 break;
 498
 499         case DLT_FDDI:
 500                 sockp->sa_family = AF_IMPLINK;
 501                 hlen = 0;
 502                 break;
 503
 504         case DLT_RAW:
 505                 sockp->sa_family = AF_UNSPEC;
 506                 hlen = 0;
 507                 break;
 508
 509         case DLT_NULL:
 510                 /*
 511                  * null interface types require a 4 byte pseudo header which
 512                  * corresponds to the address family of the packet.
 513                  */
 514                 sockp->sa_family = AF_UNSPEC;
 515                 hlen = 4;
 516                 break;
 517
 518         case DLT_ATM_RFC1483:
 519                 /*
 520                  * en atm driver requires 4-byte atm pseudo header.
 521                  * though it isn't standard, vpi:vci needs to be
 522                  * specified anyway.
 523                  */
 524                 sockp->sa_family = AF_UNSPEC;
 525                 hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
 526                 break;
 527
 528         case DLT_PPP:
 529                 sockp->sa_family = AF_UNSPEC;
 530                 hlen = 4;       /* This should match PPP_HDRLEN */
 531                 break;
 532
 533         case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
 534                 sockp->sa_family = AF_IEEE80211;
 535                 hlen = 0;
 536                 break;
 537
 538         case DLT_IEEE802_11_RADIO:      /* IEEE 802.11 wireless w/ phy params */
 539                 sockp->sa_family = AF_IEEE80211;
 540                 sockp->sa_len = 12;     /* XXX != 0 */
 541                 hlen = sizeof(struct ieee80211_bpf_params);
 542                 break;
 543
 544         default:
 545                 return (EIO);
 546         }
 547
 548         len = uio->uio_resid;
 549         if (len < hlen || len - hlen > ifp->if_mtu)
 550                 return (EMSGSIZE);
 551
 552         m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR);
 553         if (m == NULL)
 554                 return (EIO);
 555         m->m_pkthdr.len = m->m_len = len;
 556         *mp = m;
 557
 558         error = uiomove(mtod(m, u_char *), len, uio);
 559         if (error)
 560                 goto bad;
 561
 562         slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
 563         if (slen == 0) {
 564                 error = EPERM;
 565                 goto bad;
 566         }
 567
 568         /* Check for multicast destination */
 569         switch (linktype) {
 570         case DLT_EN10MB:
 571                 eh = mtod(m, struct ether_header *);
 572                 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 573                         if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
 574                             ETHER_ADDR_LEN) == 0)
 575                                 m->m_flags |= M_BCAST;
 576                         else
 577                                 m->m_flags |= M_MCAST;
 578                 }
 579                 if (d->bd_hdrcmplt == 0) {
 580                         memcpy(eh->ether_shost, IF_LLADDR(ifp),
 581                             sizeof(eh->ether_shost));
 582                 }
 583                 break;
 584         }
 585
 586         /*
 587          * Make room for link header, and copy it to sockaddr
 588          */
 589         if (hlen != 0) {
 590                 if (sockp->sa_family == AF_IEEE80211) {
 591                         /*
 592                          * Collect true length from the parameter header
 593                          * NB: sockp is known to be zero'd so if we do a
 594                          *     short copy unspecified parameters will be
 595                          *     zero.
 596                          * NB: packet may not be aligned after stripping
 597                          *     bpf params
 598                          * XXX check ibp_vers
 599                          */
 600                         p = mtod(m, const struct ieee80211_bpf_params *);
 601                         hlen = p->ibp_len;
 602                         if (hlen > sizeof(sockp->sa_data)) {
 603                                 error = EINVAL;
 604                                 goto bad;
 605                         }
 606                 }
 607                 bcopy(mtod(m, const void *), sockp->sa_data, hlen);
 608         }
 609         *hdrlen = hlen;
 610
 611         return (0);
 612 bad:
 613         m_freem(m);
 614         return (error);
 615 }
 616
 617 /*
 618  * Attach file to the bpf interface, i.e. make d listen on bp.
 619  */
 620 static void
 621 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 622 {
 623         int op_w;
 624
 625         BPF_LOCK_ASSERT();
 626
 627         /*
 628          * Save sysctl value to protect from sysctl change
 629          * between reads
 630          */
 631         op_w = V_bpf_optimize_writers || d->bd_writer;
 632
 633         if (d->bd_bif != NULL)
 634                 bpf_detachd_locked(d);
 635         /*
 636          * Point d at bp, and add d to the interface's list.
 637          * Since there are many applications using BPF for
 638          * sending raw packets only (dhcpd, cdpd are good examples)
 639          * we can delay adding d to the list of active listeners until
 640          * some filter is configured.
 641          */
 642
 643         BPFIF_WLOCK(bp);
 644         BPFD_LOCK(d);
 645
 646         d->bd_bif = bp;
 647
 648         if (op_w != 0) {
 649                 /* Add to writers-only list */
 650                 LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
 651                 /*
 652                  * We decrement bd_writer on every filter set operation.
 653                  * First BIOCSETF is done by pcap_open_live() to set up
 654                  * snap length. After that appliation usually sets its own filter
 655                  */
 656                 d->bd_writer = 2;
 657         } else
 658                 LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
 659
 660         BPFD_UNLOCK(d);
 661         BPFIF_WUNLOCK(bp);
 662
 663         bpf_bpfd_cnt++;
 664
 665         CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
 666             __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
 667
 668         if (op_w == 0)
 669                 EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
 670 }
 671
 672 /*
 673  * Check if we need to upgrade our descriptor @d from write-only mode.
 674  */
 675 static int
 676 bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen)
 677 {
 678         int is_snap, need_upgrade;
 679
 680         /*
 681          * Check if we've already upgraded or new filter is empty.
 682          */
 683         if (d->bd_writer == 0 || fcode == NULL)
 684                 return (0);
 685
 686         need_upgrade = 0;
 687
 688         /*
 689          * Check if cmd looks like snaplen setting from
 690          * pcap_bpf.c:pcap_open_live().
 691          * Note we're not checking .k value here:
 692          * while pcap_open_live() definitely sets to non-zero value,
 693          * we'd prefer to treat k=0 (deny ALL) case the same way: e.g.
 694          * do not consider upgrading immediately
 695          */
 696         if (cmd == BIOCSETF && flen == 1 && fcode[0].code == (BPF_RET | BPF_K))
 697                 is_snap = 1;
 698         else
 699                 is_snap = 0;
 700
 701         if (is_snap == 0) {
 702                 /*
 703                  * We're setting first filter and it doesn't look like
 704                  * setting snaplen.  We're probably using bpf directly.
 705                  * Upgrade immediately.
 706                  */
 707                 need_upgrade = 1;
 708         } else {
 709                 /*
 710                  * Do not require upgrade by first BIOCSETF
 711                  * (used to set snaplen) by pcap_open_live().
 712                  */
 713
 714                 if (--d->bd_writer == 0) {
 715                         /*
 716                          * First snaplen filter has already
 717                          * been set. This is probably catch-all
 718                          * filter
 719                          */
 720                         need_upgrade = 1;
 721                 }
 722         }
 723
 724         CTR5(KTR_NET,
 725             "%s: filter function set by pid %d, "
 726             "bd_writer counter %d, snap %d upgrade %d",
 727             __func__, d->bd_pid, d->bd_writer,
 728             is_snap, need_upgrade);
 729
 730         return (need_upgrade);
 731 }
 732
 733 /*
 734  * Add d to the list of active bp filters.
 735  * Requires bpf_attachd() to be called before.
 736  */
 737 static void
 738 bpf_upgraded(struct bpf_d *d)
 739 {
 740         struct bpf_if *bp;
 741
 742         BPF_LOCK_ASSERT();
 743
 744         bp = d->bd_bif;
 745
 746         /*
 747          * Filter can be set several times without specifying interface.
 748          * Mark d as reader and exit.
 749          */
 750         if (bp == NULL) {
 751                 BPFD_LOCK(d);
 752                 d->bd_writer = 0;
 753                 BPFD_UNLOCK(d);
 754                 return;
 755         }
 756
 757         BPFIF_WLOCK(bp);
 758         BPFD_LOCK(d);
 759
 760         /* Remove from writers-only list */
 761         LIST_REMOVE(d, bd_next);
 762         LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
 763         /* Mark d as reader */
 764         d->bd_writer = 0;
 765
 766         BPFD_UNLOCK(d);
 767         BPFIF_WUNLOCK(bp);
 768
 769         CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid);
 770
 771         EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
 772 }
 773
 774 /*
 775  * Detach a file from its interface.
 776  */
 777 static void
 778 bpf_detachd(struct bpf_d *d)
 779 {
 780         BPF_LOCK();
 781         bpf_detachd_locked(d);
 782         BPF_UNLOCK();
 783 }
 784
 785 static void
 786 bpf_detachd_locked(struct bpf_d *d)
 787 {
 788         int error;
 789         struct bpf_if *bp;
 790         struct ifnet *ifp;
 791
 792         CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
 793
 794         BPF_LOCK_ASSERT();
 795
 796         /* Check if descriptor is attached */
 797         if ((bp = d->bd_bif) == NULL)
 798                 return;
 799
 800         BPFIF_WLOCK(bp);
 801         BPFD_LOCK(d);
 802
 803         /* Save bd_writer value */
 804         error = d->bd_writer;
 805
 806         /*
 807          * Remove d from the interface's descriptor list.
 808          */
 809         LIST_REMOVE(d, bd_next);
 810
 811         ifp = bp->bif_ifp;
 812         d->bd_bif = NULL;
 813         BPFD_UNLOCK(d);
 814         BPFIF_WUNLOCK(bp);
 815
 816         bpf_bpfd_cnt--;
 817
 818         /* Call event handler iff d is attached */
 819         if (error == 0)
 820                 EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
 821
 822         /*
 823          * Check if this descriptor had requested promiscuous mode.
 824          * If so, turn it off.
 825          */
 826         if (d->bd_promisc) {
 827                 d->bd_promisc = 0;
 828                 CURVNET_SET(ifp->if_vnet);
 829                 error = ifpromisc(ifp, 0);
 830                 CURVNET_RESTORE();
 831                 if (error != 0 && error != ENXIO) {
 832                         /*
 833                          * ENXIO can happen if a pccard is unplugged
 834                          * Something is really wrong if we were able to put
 835                          * the driver into promiscuous mode, but can't
 836                          * take it out.
 837                          */
 838                         if_printf(bp->bif_ifp,
 839                                 "bpf_detach: ifpromisc failed (%d)\n", error);
 840                 }
 841         }
 842 }
 843
 844 /*
 845  * Close the descriptor by detaching it from its interface,
 846  * deallocating its buffers, and marking it free.
 847  */
 848 static void
 849 bpf_dtor(void *data)
 850 {
 851         struct bpf_d *d = data;
 852
 853         BPFD_LOCK(d);
 854         if (d->bd_state == BPF_WAITING)
 855                 callout_stop(&d->bd_callout);
 856         d->bd_state = BPF_IDLE;
 857         BPFD_UNLOCK(d);
 858         funsetown(&d->bd_sigio);
 859         bpf_detachd(d);
 860 #ifdef MAC
 861         mac_bpfdesc_destroy(d);
 862 #endif /* MAC */
 863         seldrain(&d->bd_sel);
 864         knlist_destroy(&d->bd_sel.si_note);
 865         callout_drain(&d->bd_callout);
 866         bpf_freed(d);
 867         free(d, M_BPF);
 868 }
 869
 870 /*
 871  * Open ethernet device.  Returns ENXIO for illegal minor device number,
 872  * EBUSY if file is open by another process.
 873  */
 874 /* ARGSUSED */
 875 static  int
 876 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 877 {
 878         struct bpf_d *d;
 879         int error;
 880
 881         d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
 882         error = devfs_set_cdevpriv(d, bpf_dtor);
 883         if (error != 0) {
 884                 free(d, M_BPF);
 885                 return (error);
 886         }
 887
 888         /*
 889          * For historical reasons, perform a one-time initialization call to
 890          * the buffer routines, even though we're not yet committed to a
 891          * particular buffer method.
 892          */
 893         bpf_buffer_init(d);
 894         if ((flags & FREAD) == 0)
 895                 d->bd_writer = 2;
 896         d->bd_hbuf_in_use = 0;
 897         d->bd_bufmode = BPF_BUFMODE_BUFFER;
 898         d->bd_sig = SIGIO;
 899         d->bd_direction = BPF_D_INOUT;
 900         BPF_PID_REFRESH(d, td);
 901 #ifdef MAC
 902         mac_bpfdesc_init(d);
 903         mac_bpfdesc_create(td->td_ucred, d);
 904 #endif
 905         mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
 906         callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
 907         knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
 908
 909         return (0);
 910 }
 911
 912 /*
 913  *  bpfread - read next chunk of packets from buffers
 914  */
 915 static  int
 916 bpfread(struct cdev *dev, struct uio *uio, int ioflag)
 917 {
 918         struct bpf_d *d;
 919         int error;
 920         int non_block;
 921         int timed_out;
 922
 923         error = devfs_get_cdevpriv((void **)&d);
 924         if (error != 0)
 925                 return (error);
 926
 927         /*
 928          * Restrict application to use a buffer the same size as
 929          * as kernel buffers.
 930          */
 931         if (uio->uio_resid != d->bd_bufsize)
 932                 return (EINVAL);
 933
 934         non_block = ((ioflag & O_NONBLOCK) != 0);
 935
 936         BPFD_LOCK(d);
 937         BPF_PID_REFRESH_CUR(d);
 938         if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
 939                 BPFD_UNLOCK(d);
 940                 return (EOPNOTSUPP);
 941         }
 942         if (d->bd_state == BPF_WAITING)
 943                 callout_stop(&d->bd_callout);
 944         timed_out = (d->bd_state == BPF_TIMED_OUT);
 945         d->bd_state = BPF_IDLE;
 946         while (d->bd_hbuf_in_use) {
 947                 error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
 948                     PRINET|PCATCH, "bd_hbuf", 0);
 949                 if (error != 0) {
 950                         BPFD_UNLOCK(d);
 951                         return (error);
 952                 }
 953         }
 954         /*
 955          * If the hold buffer is empty, then do a timed sleep, which
 956          * ends when the timeout expires or when enough packets
 957          * have arrived to fill the store buffer.
 958          */
 959         while (d->bd_hbuf == NULL) {
 960                 if (d->bd_slen != 0) {
 961                         /*
 962                          * A packet(s) either arrived since the previous
 963                          * read or arrived while we were asleep.
 964                          */
 965                         if (d->bd_immediate || non_block || timed_out) {
 966                                 /*
 967                                  * Rotate the buffers and return what's here
 968                                  * if we are in immediate mode, non-blocking
 969                                  * flag is set, or this descriptor timed out.
 970                                  */
 971                                 ROTATE_BUFFERS(d);
 972                                 break;
 973                         }
 974                 }
 975
 976                 /*
 977                  * No data is available, check to see if the bpf device
 978                  * is still pointed at a real interface.  If not, return
 979                  * ENXIO so that the userland process knows to rebind
 980                  * it before using it again.
 981                  */
 982                 if (d->bd_bif == NULL) {
 983                         BPFD_UNLOCK(d);
 984                         return (ENXIO);
 985                 }
 986
 987                 if (non_block) {
 988                         BPFD_UNLOCK(d);
 989                         return (EWOULDBLOCK);
 990                 }
 991                 error = msleep(d, &d->bd_lock, PRINET|PCATCH,
 992                      "bpf", d->bd_rtout);
 993                 if (error == EINTR || error == ERESTART) {
 994                         BPFD_UNLOCK(d);
 995                         return (error);
 996                 }
 997                 if (error == EWOULDBLOCK) {
 998                         /*
 999                          * On a timeout, return what's in the buffer,
1000                          * which may be nothing.  If there is something
1001                          * in the store buffer, we can rotate the buffers.
1002                          */
1003                         if (d->bd_hbuf)
1004                                 /*
1005                                  * We filled up the buffer in between
1006                                  * getting the timeout and arriving
1007                                  * here, so we don't need to rotate.
1008                                  */
1009                                 break;
1010
1011                         if (d->bd_slen == 0) {
1012                                 BPFD_UNLOCK(d);
1013                                 return (0);
1014                         }
1015                         ROTATE_BUFFERS(d);
1016                         break;
1017                 }
1018         }
1019         /*
1020          * At this point, we know we have something in the hold slot.
1021          */
1022         d->bd_hbuf_in_use = 1;
1023         BPFD_UNLOCK(d);
1024
1025         /*
1026          * Move data from hold buffer into user space.
1027          * We know the entire buffer is transferred since
1028          * we checked above that the read buffer is bpf_bufsize bytes.
1029          *
1030          * We do not have to worry about simultaneous reads because
1031          * we waited for sole access to the hold buffer above.
1032          */
1033         error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
1034
1035         BPFD_LOCK(d);
1036         KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
1037         d->bd_fbuf = d->bd_hbuf;
1038         d->bd_hbuf = NULL;
1039         d->bd_hlen = 0;
1040         bpf_buf_reclaimed(d);
1041         d->bd_hbuf_in_use = 0;
1042         wakeup(&d->bd_hbuf_in_use);
1043         BPFD_UNLOCK(d);
1044
1045         return (error);
1046 }
1047
1048 /*
1049  * If there are processes sleeping on this descriptor, wake them up.
1050  */
1051 static __inline void
1052 bpf_wakeup(struct bpf_d *d)
1053 {
1054
1055         BPFD_LOCK_ASSERT(d);
1056         if (d->bd_state == BPF_WAITING) {
1057                 callout_stop(&d->bd_callout);
1058                 d->bd_state = BPF_IDLE;
1059         }
1060         wakeup(d);
1061         if (d->bd_async && d->bd_sig && d->bd_sigio)
1062                 pgsigio(&d->bd_sigio, d->bd_sig, 0);
1063
1064         selwakeuppri(&d->bd_sel, PRINET);
1065         KNOTE_LOCKED(&d->bd_sel.si_note, 0);
1066 }
1067
1068 static void
1069 bpf_timed_out(void *arg)
1070 {
1071         struct bpf_d *d = (struct bpf_d *)arg;
1072
1073         BPFD_LOCK_ASSERT(d);
1074
1075         if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
1076                 return;
1077         if (d->bd_state == BPF_WAITING) {
1078                 d->bd_state = BPF_TIMED_OUT;
1079                 if (d->bd_slen != 0)
1080                         bpf_wakeup(d);
1081         }
1082 }
1083
1084 static int
1085 bpf_ready(struct bpf_d *d)
1086 {
1087
1088         BPFD_LOCK_ASSERT(d);
1089
1090         if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
1091                 return (1);
1092         if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1093             d->bd_slen != 0)
1094                 return (1);
1095         return (0);
1096 }
1097
1098 static int
1099 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
1100 {
1101         struct bpf_d *d;
1102         struct ifnet *ifp;
1103         struct mbuf *m, *mc;
1104         struct sockaddr dst;
1105         struct route ro;
1106         int error, hlen;
1107
1108         error = devfs_get_cdevpriv((void **)&d);
1109         if (error != 0)
1110                 return (error);
1111
1112         BPF_PID_REFRESH_CUR(d);
1113         d->bd_wcount++;
1114         /* XXX: locking required */
1115         if (d->bd_bif == NULL) {
1116                 d->bd_wdcount++;
1117                 return (ENXIO);
1118         }
1119
1120         ifp = d->bd_bif->bif_ifp;
1121
1122         if ((ifp->if_flags & IFF_UP) == 0) {
1123                 d->bd_wdcount++;
1124                 return (ENETDOWN);
1125         }
1126
1127         if (uio->uio_resid == 0) {
1128                 d->bd_wdcount++;
1129                 return (0);
1130         }
1131
1132         bzero(&dst, sizeof(dst));
1133         m = NULL;
1134         hlen = 0;
1135         /* XXX: bpf_movein() can sleep */
1136         error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
1137             &m, &dst, &hlen, d);
1138         if (error) {
1139                 d->bd_wdcount++;
1140                 return (error);
1141         }
1142         d->bd_wfcount++;
1143         if (d->bd_hdrcmplt)
1144                 dst.sa_family = pseudo_AF_HDRCMPLT;
1145
1146         if (d->bd_feedback) {
1147                 mc = m_dup(m, M_NOWAIT);
1148                 if (mc != NULL)
1149                         mc->m_pkthdr.rcvif = ifp;
1150                 /* Set M_PROMISC for outgoing packets to be discarded. */
1151                 if (d->bd_direction == BPF_D_INOUT)
1152                         m->m_flags |= M_PROMISC;
1153         } else
1154                 mc = NULL;
1155
1156         m->m_pkthdr.len -= hlen;
1157         m->m_len -= hlen;
1158         m->m_data += hlen;      /* XXX */
1159
1160         CURVNET_SET(ifp->if_vnet);
1161 #ifdef MAC
1162         BPFD_LOCK(d);
1163         mac_bpfdesc_create_mbuf(d, m);
1164         if (mc != NULL)
1165                 mac_bpfdesc_create_mbuf(d, mc);
1166         BPFD_UNLOCK(d);
1167 #endif
1168
1169         bzero(&ro, sizeof(ro));
1170         if (hlen != 0) {
1171                 ro.ro_prepend = (u_char *)&dst.sa_data;
1172                 ro.ro_plen = hlen;
1173                 ro.ro_flags = RT_HAS_HEADER;
1174         }
1175
1176         error = (*ifp->if_output)(ifp, m, &dst, &ro);
1177         if (error)
1178                 d->bd_wdcount++;
1179
1180         if (mc != NULL) {
1181                 if (error == 0)
1182                         (*ifp->if_input)(ifp, mc);
1183                 else
1184                         m_freem(mc);
1185         }
1186         CURVNET_RESTORE();
1187
1188         return (error);
1189 }
1190
1191 /*
1192  * Reset a descriptor by flushing its packet buffer and clearing the receive
1193  * and drop counts.  This is doable for kernel-only buffers, but with
1194  * zero-copy buffers, we can't write to (or rotate) buffers that are
1195  * currently owned by userspace.  It would be nice if we could encapsulate
1196  * this logic in the buffer code rather than here.
1197  */
1198 static void
1199 reset_d(struct bpf_d *d)
1200 {
1201
1202         BPFD_LOCK_ASSERT(d);
1203
1204         while (d->bd_hbuf_in_use)
1205                 mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET,
1206                     "bd_hbuf", 0);
1207         if ((d->bd_hbuf != NULL) &&
1208             (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
1209                 /* Free the hold buffer. */
1210                 d->bd_fbuf = d->bd_hbuf;
1211                 d->bd_hbuf = NULL;
1212                 d->bd_hlen = 0;
1213                 bpf_buf_reclaimed(d);
1214         }
1215         if (bpf_canwritebuf(d))
1216                 d->bd_slen = 0;
1217         d->bd_rcount = 0;
1218         d->bd_dcount = 0;
1219         d->bd_fcount = 0;
1220         d->bd_wcount = 0;
1221         d->bd_wfcount = 0;
1222         d->bd_wdcount = 0;
1223         d->bd_zcopy = 0;
1224 }
1225
1226 /*
1227  *  FIONREAD            Check for read packet available.
1228  *  BIOCGBLEN           Get buffer len [for read()].
1229  *  BIOCSETF            Set read filter.
1230  *  BIOCSETFNR          Set read filter without resetting descriptor.
1231  *  BIOCSETWF           Set write filter.
1232  *  BIOCFLUSH           Flush read packet buffer.
1233  *  BIOCPROMISC         Put interface into promiscuous mode.
1234  *  BIOCGDLT            Get link layer type.
1235  *  BIOCGETIF           Get interface name.
1236  *  BIOCSETIF           Set interface.
1237  *  BIOCSRTIMEOUT       Set read timeout.
1238  *  BIOCGRTIMEOUT       Get read timeout.
1239  *  BIOCGSTATS          Get packet stats.
1240  *  BIOCIMMEDIATE       Set immediate mode.
1241  *  BIOCVERSION         Get filter language version.
1242  *  BIOCGHDRCMPLT       Get "header already complete" flag
1243  *  BIOCSHDRCMPLT       Set "header already complete" flag
1244  *  BIOCGDIRECTION      Get packet direction flag
1245  *  BIOCSDIRECTION      Set packet direction flag
1246  *  BIOCGTSTAMP         Get time stamp format and resolution.
1247  *  BIOCSTSTAMP         Set time stamp format and resolution.
1248  *  BIOCLOCK            Set "locked" flag
1249  *  BIOCFEEDBACK        Set packet feedback mode.
1250  *  BIOCSETZBUF         Set current zero-copy buffer locations.
1251  *  BIOCGETZMAX         Get maximum zero-copy buffer size.
1252  *  BIOCROTZBUF         Force rotation of zero-copy buffer
1253  *  BIOCSETBUFMODE      Set buffer mode.
1254  *  BIOCGETBUFMODE      Get current buffer mode.
1255  */
1256 /* ARGSUSED */
1257 static  int
1258 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
1259     struct thread *td)
1260 {
1261         struct bpf_d *d;
1262         int error;
1263
1264         error = devfs_get_cdevpriv((void **)&d);
1265         if (error != 0)
1266                 return (error);
1267
1268         /*
1269          * Refresh PID associated with this descriptor.
1270          */
1271         BPFD_LOCK(d);
1272         BPF_PID_REFRESH(d, td);
1273         if (d->bd_state == BPF_WAITING)
1274                 callout_stop(&d->bd_callout);
1275         d->bd_state = BPF_IDLE;
1276         BPFD_UNLOCK(d);
1277
1278         if (d->bd_locked == 1) {
1279                 switch (cmd) {
1280                 case BIOCGBLEN:
1281                 case BIOCFLUSH:
1282                 case BIOCGDLT:
1283                 case BIOCGDLTLIST:
1284 #ifdef COMPAT_FREEBSD32
1285                 case BIOCGDLTLIST32:
1286 #endif
1287                 case BIOCGETIF:
1288                 case BIOCGRTIMEOUT:
1289 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1290                 case BIOCGRTIMEOUT32:
1291 #endif
1292                 case BIOCGSTATS:
1293                 case BIOCVERSION:
1294                 case BIOCGRSIG:
1295                 case BIOCGHDRCMPLT:
1296                 case BIOCSTSTAMP:
1297                 case BIOCFEEDBACK:
1298                 case FIONREAD:
1299                 case BIOCLOCK:
1300                 case BIOCSRTIMEOUT:
1301 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1302                 case BIOCSRTIMEOUT32:
1303 #endif
1304                 case BIOCIMMEDIATE:
1305                 case TIOCGPGRP:
1306                 case BIOCROTZBUF:
1307                         break;
1308                 default:
1309                         return (EPERM);
1310                 }
1311         }
1312 #ifdef COMPAT_FREEBSD32
1313         /*
1314          * If we see a 32-bit compat ioctl, mark the stream as 32-bit so
1315          * that it will get 32-bit packet headers.
1316          */
1317         switch (cmd) {
1318         case BIOCSETF32:
1319         case BIOCSETFNR32:
1320         case BIOCSETWF32:
1321         case BIOCGDLTLIST32:
1322         case BIOCGRTIMEOUT32:
1323         case BIOCSRTIMEOUT32:
1324                 BPFD_LOCK(d);
1325                 d->bd_compat32 = 1;
1326                 BPFD_UNLOCK(d);
1327         }
1328 #endif
1329
1330         CURVNET_SET(TD_TO_VNET(td));
1331         switch (cmd) {
1332
1333         default:
1334                 error = EINVAL;
1335                 break;
1336
1337         /*
1338          * Check for read packet available.
1339          */
1340         case FIONREAD:
1341                 {
1342                         int n;
1343
1344                         BPFD_LOCK(d);
1345                         n = d->bd_slen;
1346                         while (d->bd_hbuf_in_use)
1347                                 mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
1348                                     PRINET, "bd_hbuf", 0);
1349                         if (d->bd_hbuf)
1350                                 n += d->bd_hlen;
1351                         BPFD_UNLOCK(d);
1352
1353                         *(int *)addr = n;
1354                         break;
1355                 }
1356
1357         /*
1358          * Get buffer len [for read()].
1359          */
1360         case BIOCGBLEN:
1361                 BPFD_LOCK(d);
1362                 *(u_int *)addr = d->bd_bufsize;
1363                 BPFD_UNLOCK(d);
1364                 break;
1365
1366         /*
1367          * Set buffer length.
1368          */
1369         case BIOCSBLEN:
1370                 error = bpf_ioctl_sblen(d, (u_int *)addr);
1371                 break;
1372
1373         /*
1374          * Set link layer read filter.
1375          */
1376         case BIOCSETF:
1377         case BIOCSETFNR:
1378         case BIOCSETWF:
1379 #ifdef COMPAT_FREEBSD32
1380         case BIOCSETF32:
1381         case BIOCSETFNR32:
1382         case BIOCSETWF32:
1383 #endif
1384                 error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1385                 break;
1386
1387         /*
1388          * Flush read packet buffer.
1389          */
1390         case BIOCFLUSH:
1391                 BPFD_LOCK(d);
1392                 reset_d(d);
1393                 BPFD_UNLOCK(d);
1394                 break;
1395
1396         /*
1397          * Put interface into promiscuous mode.
1398          */
1399         case BIOCPROMISC:
1400                 if (d->bd_bif == NULL) {
1401                         /*
1402                          * No interface attached yet.
1403                          */
1404                         error = EINVAL;
1405                         break;
1406                 }
1407                 if (d->bd_promisc == 0) {
1408                         error = ifpromisc(d->bd_bif->bif_ifp, 1);
1409                         if (error == 0)
1410                                 d->bd_promisc = 1;
1411                 }
1412                 break;
1413
1414         /*
1415          * Get current data link type.
1416          */
1417         case BIOCGDLT:
1418                 BPF_LOCK();
1419                 if (d->bd_bif == NULL)
1420                         error = EINVAL;
1421                 else
1422                         *(u_int *)addr = d->bd_bif->bif_dlt;
1423                 BPF_UNLOCK();
1424                 break;
1425
1426         /*
1427          * Get a list of supported data link types.
1428          */
1429 #ifdef COMPAT_FREEBSD32
1430         case BIOCGDLTLIST32:
1431                 {
1432                         struct bpf_dltlist32 *list32;
1433                         struct bpf_dltlist dltlist;
1434
1435                         list32 = (struct bpf_dltlist32 *)addr;
1436                         dltlist.bfl_len = list32->bfl_len;
1437                         dltlist.bfl_list = PTRIN(list32->bfl_list);
1438                         BPF_LOCK();
1439                         if (d->bd_bif == NULL)
1440                                 error = EINVAL;
1441                         else {
1442                                 error = bpf_getdltlist(d, &dltlist);
1443                                 if (error == 0)
1444                                         list32->bfl_len = dltlist.bfl_len;
1445                         }
1446                         BPF_UNLOCK();
1447                         break;
1448                 }
1449 #endif
1450
1451         case BIOCGDLTLIST:
1452                 BPF_LOCK();
1453                 if (d->bd_bif == NULL)
1454                         error = EINVAL;
1455                 else
1456                         error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1457                 BPF_UNLOCK();
1458                 break;
1459
1460         /*
1461          * Set data link type.
1462          */
1463         case BIOCSDLT:
1464                 BPF_LOCK();
1465                 if (d->bd_bif == NULL)
1466                         error = EINVAL;
1467                 else
1468                         error = bpf_setdlt(d, *(u_int *)addr);
1469                 BPF_UNLOCK();
1470                 break;
1471
1472         /*
1473          * Get interface name.
1474          */
1475         case BIOCGETIF:
1476                 BPF_LOCK();
1477                 if (d->bd_bif == NULL)
1478                         error = EINVAL;
1479                 else {
1480                         struct ifnet *const ifp = d->bd_bif->bif_ifp;
1481                         struct ifreq *const ifr = (struct ifreq *)addr;
1482
1483                         strlcpy(ifr->ifr_name, ifp->if_xname,
1484                             sizeof(ifr->ifr_name));
1485                 }
1486                 BPF_UNLOCK();
1487                 break;
1488
1489         /*
1490          * Set interface.
1491          */
1492         case BIOCSETIF:
1493                 {
1494                         int alloc_buf, size;
1495
1496                         /*
1497                          * Behavior here depends on the buffering model.  If
1498                          * we're using kernel memory buffers, then we can
1499                          * allocate them here.  If we're using zero-copy,
1500                          * then the user process must have registered buffers
1501                          * by the time we get here.
1502                          */
1503                         alloc_buf = 0;
1504                         BPFD_LOCK(d);
1505                         if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
1506                             d->bd_sbuf == NULL)
1507                                 alloc_buf = 1;
1508                         BPFD_UNLOCK(d);
1509                         if (alloc_buf) {
1510                                 size = d->bd_bufsize;
1511                                 error = bpf_buffer_ioctl_sblen(d, &size);
1512                                 if (error != 0)
1513                                         break;
1514                         }
1515                         BPF_LOCK();
1516                         error = bpf_setif(d, (struct ifreq *)addr);
1517                         BPF_UNLOCK();
1518                         break;
1519                 }
1520
1521         /*
1522          * Set read timeout.
1523          */
1524         case BIOCSRTIMEOUT:
1525 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1526         case BIOCSRTIMEOUT32:
1527 #endif
1528                 {
1529                         struct timeval *tv = (struct timeval *)addr;
1530 #if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1531                         struct timeval32 *tv32;
1532                         struct timeval tv64;
1533
1534                         if (cmd == BIOCSRTIMEOUT32) {
1535                                 tv32 = (struct timeval32 *)addr;
1536                                 tv = &tv64;
1537                                 tv->tv_sec = tv32->tv_sec;
1538                                 tv->tv_usec = tv32->tv_usec;
1539                         } else
1540 #endif
1541                                 tv = (struct timeval *)addr;
1542
1543                         /*
1544                          * Subtract 1 tick from tvtohz() since this isn't
1545                          * a one-shot timer.
1546                          */
1547                         if ((error = itimerfix(tv)) == 0)
1548                                 d->bd_rtout = tvtohz(tv) - 1;
1549                         break;
1550                 }
1551
1552         /*
1553          * Get read timeout.
1554          */
1555         case BIOCGRTIMEOUT:
1556 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1557         case BIOCGRTIMEOUT32:
1558 #endif
1559                 {
1560                         struct timeval *tv;
1561 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1562                         struct timeval32 *tv32;
1563                         struct timeval tv64;
1564
1565                         if (cmd == BIOCGRTIMEOUT32)
1566                                 tv = &tv64;
1567                         else
1568 #endif
1569                                 tv = (struct timeval *)addr;
1570
1571                         tv->tv_sec = d->bd_rtout / hz;
1572                         tv->tv_usec = (d->bd_rtout % hz) * tick;
1573 #if defined(COMPAT_FREEBSD32) && defined(__amd64__)
1574                         if (cmd == BIOCGRTIMEOUT32) {
1575                                 tv32 = (struct timeval32 *)addr;
1576                                 tv32->tv_sec = tv->tv_sec;
1577                                 tv32->tv_usec = tv->tv_usec;
1578                         }
1579 #endif
1580
1581                         break;
1582                 }
1583
1584         /*
1585          * Get packet stats.
1586          */
1587         case BIOCGSTATS:
1588                 {
1589                         struct bpf_stat *bs = (struct bpf_stat *)addr;
1590
1591                         /* XXXCSJP overflow */
1592                         bs->bs_recv = d->bd_rcount;
1593                         bs->bs_drop = d->bd_dcount;
1594                         break;
1595                 }
1596
1597         /*
1598          * Set immediate mode.
1599          */
1600         case BIOCIMMEDIATE:
1601                 BPFD_LOCK(d);
1602                 d->bd_immediate = *(u_int *)addr;
1603                 BPFD_UNLOCK(d);
1604                 break;
1605
1606         case BIOCVERSION:
1607                 {
1608                         struct bpf_version *bv = (struct bpf_version *)addr;
1609
1610                         bv->bv_major = BPF_MAJOR_VERSION;
1611                         bv->bv_minor = BPF_MINOR_VERSION;
1612                         break;
1613                 }
1614
1615         /*
1616          * Get "header already complete" flag
1617          */
1618         case BIOCGHDRCMPLT:
1619                 BPFD_LOCK(d);
1620                 *(u_int *)addr = d->bd_hdrcmplt;
1621                 BPFD_UNLOCK(d);
1622                 break;
1623
1624         /*
1625          * Set "header already complete" flag
1626          */
1627         case BIOCSHDRCMPLT:
1628                 BPFD_LOCK(d);
1629                 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1630                 BPFD_UNLOCK(d);
1631                 break;
1632
1633         /*
1634          * Get packet direction flag
1635          */
1636         case BIOCGDIRECTION:
1637                 BPFD_LOCK(d);
1638                 *(u_int *)addr = d->bd_direction;
1639                 BPFD_UNLOCK(d);
1640                 break;
1641
1642         /*
1643          * Set packet direction flag
1644          */
1645         case BIOCSDIRECTION:
1646                 {
1647                         u_int   direction;
1648
1649                         direction = *(u_int *)addr;
1650                         switch (direction) {
1651                         case BPF_D_IN:
1652                         case BPF_D_INOUT:
1653                         case BPF_D_OUT:
1654                                 BPFD_LOCK(d);
1655                                 d->bd_direction = direction;
1656                                 BPFD_UNLOCK(d);
1657                                 break;
1658                         default:
1659                                 error = EINVAL;
1660                         }
1661                 }
1662                 break;
1663
1664         /*
1665          * Get packet timestamp format and resolution.
1666          */
1667         case BIOCGTSTAMP:
1668                 BPFD_LOCK(d);
1669                 *(u_int *)addr = d->bd_tstamp;
1670                 BPFD_UNLOCK(d);
1671                 break;
1672
1673         /*
1674          * Set packet timestamp format and resolution.
1675          */
1676         case BIOCSTSTAMP:
1677                 {
1678                         u_int   func;
1679
1680                         func = *(u_int *)addr;
1681                         if (BPF_T_VALID(func))
1682                                 d->bd_tstamp = func;
1683                         else
1684                                 error = EINVAL;
1685                 }
1686                 break;
1687
1688         case BIOCFEEDBACK:
1689                 BPFD_LOCK(d);
1690                 d->bd_feedback = *(u_int *)addr;
1691                 BPFD_UNLOCK(d);
1692                 break;
1693
1694         case BIOCLOCK:
1695                 BPFD_LOCK(d);
1696                 d->bd_locked = 1;
1697                 BPFD_UNLOCK(d);
1698                 break;
1699
1700         case FIONBIO:           /* Non-blocking I/O */
1701                 break;
1702
1703         case FIOASYNC:          /* Send signal on receive packets */
1704                 BPFD_LOCK(d);
1705                 d->bd_async = *(int *)addr;
1706                 BPFD_UNLOCK(d);
1707                 break;
1708
1709         case FIOSETOWN:
1710                 /*
1711                  * XXX: Add some sort of locking here?
1712                  * fsetown() can sleep.
1713                  */
1714                 error = fsetown(*(int *)addr, &d->bd_sigio);
1715                 break;
1716
1717         case FIOGETOWN:
1718                 BPFD_LOCK(d);
1719                 *(int *)addr = fgetown(&d->bd_sigio);
1720                 BPFD_UNLOCK(d);
1721                 break;
1722
1723         /* This is deprecated, FIOSETOWN should be used instead. */
1724         case TIOCSPGRP:
1725                 error = fsetown(-(*(int *)addr), &d->bd_sigio);
1726                 break;
1727
1728         /* This is deprecated, FIOGETOWN should be used instead. */
1729         case TIOCGPGRP:
1730                 *(int *)addr = -fgetown(&d->bd_sigio);
1731                 break;
1732
1733         case BIOCSRSIG:         /* Set receive signal */
1734                 {
1735                         u_int sig;
1736
1737                         sig = *(u_int *)addr;
1738
1739                         if (sig >= NSIG)
1740                                 error = EINVAL;
1741                         else {
1742                                 BPFD_LOCK(d);
1743                                 d->bd_sig = sig;
1744                                 BPFD_UNLOCK(d);
1745                         }
1746                         break;
1747                 }
1748         case BIOCGRSIG:
1749                 BPFD_LOCK(d);
1750                 *(u_int *)addr = d->bd_sig;
1751                 BPFD_UNLOCK(d);
1752                 break;
1753
1754         case BIOCGETBUFMODE:
1755                 BPFD_LOCK(d);
1756                 *(u_int *)addr = d->bd_bufmode;
1757                 BPFD_UNLOCK(d);
1758                 break;
1759
1760         case BIOCSETBUFMODE:
1761                 /*
1762                  * Allow the buffering mode to be changed as long as we
1763                  * haven't yet committed to a particular mode.  Our
1764                  * definition of commitment, for now, is whether or not a
1765                  * buffer has been allocated or an interface attached, since
1766                  * that's the point where things get tricky.
1767                  */
1768                 switch (*(u_int *)addr) {
1769                 case BPF_BUFMODE_BUFFER:
1770                         break;
1771
1772                 case BPF_BUFMODE_ZBUF:
1773                         if (bpf_zerocopy_enable)
1774                                 break;
1775                         /* FALLSTHROUGH */
1776
1777                 default:
1778                         CURVNET_RESTORE();
1779                         return (EINVAL);
1780                 }
1781
1782                 BPFD_LOCK(d);
1783                 if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
1784                     d->bd_fbuf != NULL || d->bd_bif != NULL) {
1785                         BPFD_UNLOCK(d);
1786                         CURVNET_RESTORE();
1787                         return (EBUSY);
1788                 }
1789                 d->bd_bufmode = *(u_int *)addr;
1790                 BPFD_UNLOCK(d);
1791                 break;
1792
1793         case BIOCGETZMAX:
1794                 error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
1795                 break;
1796
1797         case BIOCSETZBUF:
1798                 error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
1799                 break;
1800
1801         case BIOCROTZBUF:
1802                 error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
1803                 break;
1804         }
1805         CURVNET_RESTORE();
1806         return (error);
1807 }
1808
1809 /*
1810  * Set d's packet filter program to fp.  If this file already has a filter,
1811  * free it and replace it.  Returns EINVAL for bogus requests.
1812  *
1813  * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls
1814  * since reading d->bd_bif can't be protected by d or interface lock due to
1815  * lock order.
1816  *
1817  * Additionally, we have to acquire interface write lock due to bpf_mtap() uses
1818  * interface read lock to read all filers.
1819  *
1820  */
1821 static int
1822 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1823 {
1824 #ifdef COMPAT_FREEBSD32
1825         struct bpf_program fp_swab;
1826         struct bpf_program32 *fp32;
1827 #endif
1828         struct bpf_insn *fcode, *old;
1829 #ifdef BPF_JITTER
1830         bpf_jit_filter *jfunc, *ofunc;
1831 #endif
1832         size_t size;
1833         u_int flen;
1834         int need_upgrade;
1835
1836 #ifdef COMPAT_FREEBSD32
1837         switch (cmd) {
1838         case BIOCSETF32:
1839         case BIOCSETWF32:
1840         case BIOCSETFNR32:
1841                 fp32 = (struct bpf_program32 *)fp;
1842                 fp_swab.bf_len = fp32->bf_len;
1843                 fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
1844                 fp = &fp_swab;
1845                 switch (cmd) {
1846                 case BIOCSETF32:
1847                         cmd = BIOCSETF;
1848                         break;
1849                 case BIOCSETWF32:
1850                         cmd = BIOCSETWF;
1851                         break;
1852                 }
1853                 break;
1854         }
1855 #endif
1856
1857         fcode = NULL;
1858 #ifdef BPF_JITTER
1859         jfunc = ofunc = NULL;
1860 #endif
1861         need_upgrade = 0;
1862
1863         /*
1864          * Check new filter validness before acquiring any locks.
1865          * Allocate memory for new filter, if needed.
1866          */
1867         flen = fp->bf_len;
1868         if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0))
1869                 return (EINVAL);
1870         size = flen * sizeof(*fp->bf_insns);
1871         if (size > 0) {
1872                 /* We're setting up new filter.  Copy and check actual data. */
1873                 fcode = malloc(size, M_BPF, M_WAITOK);
1874                 if (copyin(fp->bf_insns, fcode, size) != 0 ||
1875                     !bpf_validate(fcode, flen)) {
1876                         free(fcode, M_BPF);
1877                         return (EINVAL);
1878                 }
1879 #ifdef BPF_JITTER
1880                 /* Filter is copied inside fcode and is perfectly valid. */
1881                 jfunc = bpf_jitter(fcode, flen);
1882 #endif
1883         }
1884
1885         BPF_LOCK();
1886
1887         /*
1888          * Set up new filter.
1889          * Protect filter change by interface lock.
1890          * Additionally, we are protected by global lock here.
1891          */
1892         if (d->bd_bif != NULL)
1893                 BPFIF_WLOCK(d->bd_bif);
1894         BPFD_LOCK(d);
1895         if (cmd == BIOCSETWF) {
1896                 old = d->bd_wfilter;
1897                 d->bd_wfilter = fcode;
1898         } else {
1899                 old = d->bd_rfilter;
1900                 d->bd_rfilter = fcode;
1901 #ifdef BPF_JITTER
1902                 ofunc = d->bd_bfilter;
1903                 d->bd_bfilter = jfunc;
1904 #endif
1905                 if (cmd == BIOCSETF)
1906                         reset_d(d);
1907
1908                 need_upgrade = bpf_check_upgrade(cmd, d, fcode, flen);
1909         }
1910         BPFD_UNLOCK(d);
1911         if (d->bd_bif != NULL)
1912                 BPFIF_WUNLOCK(d->bd_bif);
1913         if (old != NULL)
1914                 free(old, M_BPF);
1915 #ifdef BPF_JITTER
1916         if (ofunc != NULL)
1917                 bpf_destroy_jit_filter(ofunc);
1918 #endif
1919
1920         /* Move d to active readers list. */
1921         if (need_upgrade != 0)
1922                 bpf_upgraded(d);
1923
1924         BPF_UNLOCK();
1925         return (0);
1926 }
1927
1928 /*
1929  * Detach a file from its current interface (if attached at all) and attach
1930  * to the interface indicated by the name stored in ifr.
1931  * Return an errno or 0.
1932  */
1933 static int
1934 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1935 {
1936         struct bpf_if *bp;
1937         struct ifnet *theywant;
1938
1939         BPF_LOCK_ASSERT();
1940
1941         theywant = ifunit(ifr->ifr_name);
1942         if (theywant == NULL || theywant->if_bpf == NULL)
1943                 return (ENXIO);
1944
1945         bp = theywant->if_bpf;
1946
1947         /* Check if interface is not being detached from BPF */
1948         BPFIF_RLOCK(bp);
1949         if (bp->bif_flags & BPFIF_FLAG_DYING) {
1950                 BPFIF_RUNLOCK(bp);
1951                 return (ENXIO);
1952         }
1953         BPFIF_RUNLOCK(bp);
1954
1955         /*
1956          * At this point, we expect the buffer is already allocated.  If not,
1957          * return an error.
1958          */
1959         switch (d->bd_bufmode) {
1960         case BPF_BUFMODE_BUFFER:
1961         case BPF_BUFMODE_ZBUF:
1962                 if (d->bd_sbuf == NULL)
1963                         return (EINVAL);
1964                 break;
1965
1966         default:
1967                 panic("bpf_setif: bufmode %d", d->bd_bufmode);
1968         }
1969         if (bp != d->bd_bif)
1970                 bpf_attachd(d, bp);
1971         BPFD_LOCK(d);
1972         reset_d(d);
1973         BPFD_UNLOCK(d);
1974         return (0);
1975 }
1976
1977 /*
1978  * Support for select() and poll() system calls
1979  *
1980  * Return true iff the specific operation will not block indefinitely.
1981  * Otherwise, return false but make a note that a selwakeup() must be done.
1982  */
1983 static int
1984 bpfpoll(struct cdev *dev, int events, struct thread *td)
1985 {
1986         struct bpf_d *d;
1987         int revents;
1988
1989         if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
1990                 return (events &
1991                     (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
1992
1993         /*
1994          * Refresh PID associated with this descriptor.
1995          */
1996         revents = events & (POLLOUT | POLLWRNORM);
1997         BPFD_LOCK(d);
1998         BPF_PID_REFRESH(d, td);
1999         if (events & (POLLIN | POLLRDNORM)) {
2000                 if (bpf_ready(d))
2001                         revents |= events & (POLLIN | POLLRDNORM);
2002                 else {
2003                         selrecord(td, &d->bd_sel);
2004                         /* Start the read timeout if necessary. */
2005                         if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2006                                 callout_reset(&d->bd_callout, d->bd_rtout,
2007                                     bpf_timed_out, d);
2008                                 d->bd_state = BPF_WAITING;
2009                         }
2010                 }
2011         }
2012         BPFD_UNLOCK(d);
2013         return (revents);
2014 }
2015
2016 /*
2017  * Support for kevent() system call.  Register EVFILT_READ filters and
2018  * reject all others.
2019  */
2020 int
2021 bpfkqfilter(struct cdev *dev, struct knote *kn)
2022 {
2023         struct bpf_d *d;
2024
2025         if (devfs_get_cdevpriv((void **)&d) != 0 ||
2026             kn->kn_filter != EVFILT_READ)
2027                 return (1);
2028
2029         /*
2030          * Refresh PID associated with this descriptor.
2031          */
2032         BPFD_LOCK(d);
2033         BPF_PID_REFRESH_CUR(d);
2034         kn->kn_fop = &bpfread_filtops;
2035         kn->kn_hook = d;
2036         knlist_add(&d->bd_sel.si_note, kn, 1);
2037         BPFD_UNLOCK(d);
2038
2039         return (0);
2040 }
2041
2042 static void
2043 filt_bpfdetach(struct knote *kn)
2044 {
2045         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2046
2047         knlist_remove(&d->bd_sel.si_note, kn, 0);
2048 }
2049
2050 static int
2051 filt_bpfread(struct knote *kn, long hint)
2052 {
2053         struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2054         int ready;
2055
2056         BPFD_LOCK_ASSERT(d);
2057         ready = bpf_ready(d);
2058         if (ready) {
2059                 kn->kn_data = d->bd_slen;
2060                 /*
2061                  * Ignore the hold buffer if it is being copied to user space.
2062                  */
2063                 if (!d->bd_hbuf_in_use && d->bd_hbuf)
2064                         kn->kn_data += d->bd_hlen;
2065         } else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2066                 callout_reset(&d->bd_callout, d->bd_rtout,
2067                     bpf_timed_out, d);
2068                 d->bd_state = BPF_WAITING;
2069         }
2070
2071         return (ready);
2072 }
2073
2074 #define BPF_TSTAMP_NONE         0
2075 #define BPF_TSTAMP_FAST         1
2076 #define BPF_TSTAMP_NORMAL       2
2077 #define BPF_TSTAMP_EXTERN       3
2078
2079 static int
2080 bpf_ts_quality(int tstype)
2081 {
2082
2083         if (tstype == BPF_T_NONE)
2084                 return (BPF_TSTAMP_NONE);
2085         if ((tstype & BPF_T_FAST) != 0)
2086                 return (BPF_TSTAMP_FAST);
2087
2088         return (BPF_TSTAMP_NORMAL);
2089 }
2090
2091 static int
2092 bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m)
2093 {
2094         struct m_tag *tag;
2095         int quality;
2096
2097         quality = bpf_ts_quality(tstype);
2098         if (quality == BPF_TSTAMP_NONE)
2099                 return (quality);
2100
2101         if (m != NULL) {
2102                 tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL);
2103                 if (tag != NULL) {
2104                         *bt = *(struct bintime *)(tag + 1);
2105                         return (BPF_TSTAMP_EXTERN);
2106                 }
2107         }
2108         if (quality == BPF_TSTAMP_NORMAL)
2109                 binuptime(bt);
2110         else
2111                 getbinuptime(bt);
2112
2113         return (quality);
2114 }
2115
2116 /*
2117  * Incoming linkage from device drivers.  Process the packet pkt, of length
2118  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
2119  * by each process' filter, and if accepted, stashed into the corresponding
2120  * buffer.
2121  */
2122 void
2123 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2124 {
2125         struct bintime bt;
2126         struct bpf_d *d;
2127 #ifdef BPF_JITTER
2128         bpf_jit_filter *bf;
2129 #endif
2130         u_int slen;
2131         int gottime;
2132
2133         gottime = BPF_TSTAMP_NONE;
2134
2135         BPFIF_RLOCK(bp);
2136
2137         LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2138                 /*
2139                  * We are not using any locks for d here because:
2140                  * 1) any filter change is protected by interface
2141                  * write lock
2142                  * 2) destroying/detaching d is protected by interface
2143                  * write lock, too
2144                  */
2145
2146                 /* XXX: Do not protect counter for the sake of performance. */
2147                 ++d->bd_rcount;
2148                 /*
2149                  * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
2150                  * way for the caller to indiciate to us whether this packet
2151                  * is inbound or outbound.  In the bpf_mtap() routines, we use
2152                  * the interface pointers on the mbuf to figure it out.
2153                  */
2154 #ifdef BPF_JITTER
2155                 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2156                 if (bf != NULL)
2157                         slen = (*(bf->func))(pkt, pktlen, pktlen);
2158                 else
2159 #endif
2160                 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
2161                 if (slen != 0) {
2162                         /*
2163                          * Filter matches. Let's to acquire write lock.
2164                          */
2165                         BPFD_LOCK(d);
2166
2167                         d->bd_fcount++;
2168                         if (gottime < bpf_ts_quality(d->bd_tstamp))
2169                                 gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
2170 #ifdef MAC
2171                         if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2172 #endif
2173                                 catchpacket(d, pkt, pktlen, slen,
2174                                     bpf_append_bytes, &bt);
2175                         BPFD_UNLOCK(d);
2176                 }
2177         }
2178         BPFIF_RUNLOCK(bp);
2179 }
2180
2181 #define BPF_CHECK_DIRECTION(d, r, i)                            \
2182             (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||   \
2183             ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
2184
2185 /*
2186  * Incoming linkage from device drivers, when packet is in an mbuf chain.
2187  * Locking model is explained in bpf_tap().
2188  */
2189 void
2190 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2191 {
2192         struct bintime bt;
2193         struct bpf_d *d;
2194 #ifdef BPF_JITTER
2195         bpf_jit_filter *bf;
2196 #endif
2197         u_int pktlen, slen;
2198         int gottime;
2199
2200         /* Skip outgoing duplicate packets. */
2201         if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
2202                 m->m_flags &= ~M_PROMISC;
2203                 return;
2204         }
2205
2206         pktlen = m_length(m, NULL);
2207         gottime = BPF_TSTAMP_NONE;
2208
2209         BPFIF_RLOCK(bp);
2210
2211         LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2212                 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
2213                         continue;
2214                 ++d->bd_rcount;
2215 #ifdef BPF_JITTER
2216                 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2217                 /* XXX We cannot handle multiple mbufs. */
2218                 if (bf != NULL && m->m_next == NULL)
2219                         slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
2220                 else
2221 #endif
2222                 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
2223                 if (slen != 0) {
2224                         BPFD_LOCK(d);
2225
2226                         d->bd_fcount++;
2227                         if (gottime < bpf_ts_quality(d->bd_tstamp))
2228                                 gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2229 #ifdef MAC
2230                         if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2231 #endif
2232                                 catchpacket(d, (u_char *)m, pktlen, slen,
2233                                     bpf_append_mbuf, &bt);
2234                         BPFD_UNLOCK(d);
2235                 }
2236         }
2237         BPFIF_RUNLOCK(bp);
2238 }
2239
2240 /*
2241  * Incoming linkage from device drivers, when packet is in
2242  * an mbuf chain and to be prepended by a contiguous header.
2243  */
2244 void
2245 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
2246 {
2247         struct bintime bt;
2248         struct mbuf mb;
2249         struct bpf_d *d;
2250         u_int pktlen, slen;
2251         int gottime;
2252
2253         /* Skip outgoing duplicate packets. */
2254         if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
2255                 m->m_flags &= ~M_PROMISC;
2256                 return;
2257         }
2258
2259         pktlen = m_length(m, NULL);
2260         /*
2261          * Craft on-stack mbuf suitable for passing to bpf_filter.
2262          * Note that we cut corners here; we only setup what's
2263          * absolutely needed--this mbuf should never go anywhere else.
2264          */
2265         mb.m_next = m;
2266         mb.m_data = data;
2267         mb.m_len = dlen;
2268         pktlen += dlen;
2269
2270         gottime = BPF_TSTAMP_NONE;
2271
2272         BPFIF_RLOCK(bp);
2273
2274         LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2275                 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
2276                         continue;
2277                 ++d->bd_rcount;
2278                 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
2279                 if (slen != 0) {
2280                         BPFD_LOCK(d);
2281
2282                         d->bd_fcount++;
2283                         if (gottime < bpf_ts_quality(d->bd_tstamp))
2284                                 gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2285 #ifdef MAC
2286                         if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2287 #endif
2288                                 catchpacket(d, (u_char *)&mb, pktlen, slen,
2289                                     bpf_append_mbuf, &bt);
2290                         BPFD_UNLOCK(d);
2291                 }
2292         }
2293         BPFIF_RUNLOCK(bp);
2294 }
2295
2296 #undef  BPF_CHECK_DIRECTION
2297
2298 #undef  BPF_TSTAMP_NONE
2299 #undef  BPF_TSTAMP_FAST
2300 #undef  BPF_TSTAMP_NORMAL
2301 #undef  BPF_TSTAMP_EXTERN
2302
2303 static int
2304 bpf_hdrlen(struct bpf_d *d)
2305 {
2306         int hdrlen;
2307
2308         hdrlen = d->bd_bif->bif_hdrlen;
2309 #ifndef BURN_BRIDGES
2310         if (d->bd_tstamp == BPF_T_NONE ||
2311             BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME)
2312 #ifdef COMPAT_FREEBSD32
2313                 if (d->bd_compat32)
2314                         hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32);
2315                 else
2316 #endif
2317                         hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr);
2318         else
2319 #endif
2320                 hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr);
2321 #ifdef COMPAT_FREEBSD32
2322         if (d->bd_compat32)
2323                 hdrlen = BPF_WORDALIGN32(hdrlen);
2324         else
2325 #endif
2326                 hdrlen = BPF_WORDALIGN(hdrlen);
2327
2328         return (hdrlen - d->bd_bif->bif_hdrlen);
2329 }
2330
2331 static void
2332 bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
2333 {
2334         struct bintime bt2, boottimebin;
2335         struct timeval tsm;
2336         struct timespec tsn;
2337
2338         if ((tstype & BPF_T_MONOTONIC) == 0) {
2339                 bt2 = *bt;
2340                 getboottimebin(&boottimebin);
2341                 bintime_add(&bt2, &boottimebin);
2342                 bt = &bt2;
2343         }
2344         switch (BPF_T_FORMAT(tstype)) {
2345         case BPF_T_MICROTIME:
2346                 bintime2timeval(bt, &tsm);
2347                 ts->bt_sec = tsm.tv_sec;
2348                 ts->bt_frac = tsm.tv_usec;
2349                 break;
2350         case BPF_T_NANOTIME:
2351                 bintime2timespec(bt, &tsn);
2352                 ts->bt_sec = tsn.tv_sec;
2353                 ts->bt_frac = tsn.tv_nsec;
2354                 break;
2355         case BPF_T_BINTIME:
2356                 ts->bt_sec = bt->sec;
2357                 ts->bt_frac = bt->frac;
2358                 break;
2359         }
2360 }
2361
2362 /*
2363  * Move the packet data from interface memory (pkt) into the
2364  * store buffer.  "cpfn" is the routine called to do the actual data
2365  * transfer.  bcopy is passed in to copy contiguous chunks, while
2366  * bpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
2367  * pkt is really an mbuf.
2368  */
2369 static void
2370 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
2371     void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
2372     struct bintime *bt)
2373 {
2374         struct bpf_xhdr hdr;
2375 #ifndef BURN_BRIDGES
2376         struct bpf_hdr hdr_old;
2377 #ifdef COMPAT_FREEBSD32
2378         struct bpf_hdr32 hdr32_old;
2379 #endif
2380 #endif
2381         int caplen, curlen, hdrlen, totlen;
2382         int do_wakeup = 0;
2383         int do_timestamp;
2384         int tstype;
2385
2386         BPFD_LOCK_ASSERT(d);
2387
2388         /*
2389          * Detect whether user space has released a buffer back to us, and if
2390          * so, move it from being a hold buffer to a free buffer.  This may
2391          * not be the best place to do it (for example, we might only want to
2392          * run this check if we need the space), but for now it's a reliable
2393          * spot to do it.
2394          */
2395         if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
2396                 d->bd_fbuf = d->bd_hbuf;
2397                 d->bd_hbuf = NULL;
2398                 d->bd_hlen = 0;
2399                 bpf_buf_reclaimed(d);
2400         }
2401
2402         /*
2403          * Figure out how many bytes to move.  If the packet is
2404          * greater or equal to the snapshot length, transfer that
2405          * much.  Otherwise, transfer the whole packet (unless
2406          * we hit the buffer size limit).
2407          */
2408         hdrlen = bpf_hdrlen(d);
2409         totlen = hdrlen + min(snaplen, pktlen);
2410         if (totlen > d->bd_bufsize)
2411                 totlen = d->bd_bufsize;
2412
2413         /*
2414          * Round up the end of the previous packet to the next longword.
2415          *
2416          * Drop the packet if there's no room and no hope of room
2417          * If the packet would overflow the storage buffer or the storage
2418          * buffer is considered immutable by the buffer model, try to rotate
2419          * the buffer and wakeup pending processes.
2420          */
2421 #ifdef COMPAT_FREEBSD32
2422         if (d->bd_compat32)
2423                 curlen = BPF_WORDALIGN32(d->bd_slen);
2424         else
2425 #endif
2426                 curlen = BPF_WORDALIGN(d->bd_slen);
2427         if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
2428                 if (d->bd_fbuf == NULL) {
2429                         /*
2430                          * There's no room in the store buffer, and no
2431                          * prospect of room, so drop the packet.  Notify the
2432                          * buffer model.
2433                          */
2434                         bpf_buffull(d);
2435                         ++d->bd_dcount;
2436                         return;
2437                 }
2438                 KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use"));
2439                 ROTATE_BUFFERS(d);
2440                 do_wakeup = 1;
2441                 curlen = 0;
2442         } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
2443                 /*
2444                  * Immediate mode is set, or the read timeout has already
2445                  * expired during a select call.  A packet arrived, so the
2446                  * reader should be woken up.
2447                  */
2448                 do_wakeup = 1;
2449         caplen = totlen - hdrlen;
2450         tstype = d->bd_tstamp;
2451         do_timestamp = tstype != BPF_T_NONE;
2452 #ifndef BURN_BRIDGES
2453         if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) {
2454                 struct bpf_ts ts;
2455                 if (do_timestamp)
2456                         bpf_bintime2ts(bt, &ts, tstype);
2457 #ifdef COMPAT_FREEBSD32
2458                 if (d->bd_compat32) {
2459                         bzero(&hdr32_old, sizeof(hdr32_old));
2460                         if (do_timestamp) {
2461                                 hdr32_old.bh_tstamp.tv_sec = ts.bt_sec;
2462                                 hdr32_old.bh_tstamp.tv_usec = ts.bt_frac;
2463                         }
2464                         hdr32_old.bh_datalen = pktlen;
2465                         hdr32_old.bh_hdrlen = hdrlen;
2466                         hdr32_old.bh_caplen = caplen;
2467                         bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old,
2468                             sizeof(hdr32_old));
2469                         goto copy;
2470                 }
2471 #endif
2472                 bzero(&hdr_old, sizeof(hdr_old));
2473                 if (do_timestamp) {
2474                         hdr_old.bh_tstamp.tv_sec = ts.bt_sec;
2475                         hdr_old.bh_tstamp.tv_usec = ts.bt_frac;
2476                 }
2477                 hdr_old.bh_datalen = pktlen;
2478                 hdr_old.bh_hdrlen = hdrlen;
2479                 hdr_old.bh_caplen = caplen;
2480                 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old,
2481                     sizeof(hdr_old));
2482                 goto copy;
2483         }
2484 #endif
2485
2486         /*
2487          * Append the bpf header.  Note we append the actual header size, but
2488          * move forward the length of the header plus padding.
2489          */
2490         bzero(&hdr, sizeof(hdr));
2491         if (do_timestamp)
2492                 bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype);
2493         hdr.bh_datalen = pktlen;
2494         hdr.bh_hdrlen = hdrlen;
2495         hdr.bh_caplen = caplen;
2496         bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
2497
2498         /*
2499          * Copy the packet data into the store buffer and update its length.
2500          */
2501 #ifndef BURN_BRIDGES
2502 copy:
2503 #endif
2504         (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen);
2505         d->bd_slen = curlen + totlen;
2506
2507         if (do_wakeup)
2508                 bpf_wakeup(d);
2509 }
2510
2511 /*
2512  * Free buffers currently in use by a descriptor.
2513  * Called on close.
2514  */
2515 static void
2516 bpf_freed(struct bpf_d *d)
2517 {
2518
2519         /*
2520          * We don't need to lock out interrupts since this descriptor has
2521          * been detached from its interface and it yet hasn't been marked
2522          * free.
2523          */
2524         bpf_free(d);
2525         if (d->bd_rfilter != NULL) {
2526                 free((caddr_t)d->bd_rfilter, M_BPF);
2527 #ifdef BPF_JITTER
2528                 if (d->bd_bfilter != NULL)
2529                         bpf_destroy_jit_filter(d->bd_bfilter);
2530 #endif
2531         }
2532         if (d->bd_wfilter != NULL)
2533                 free((caddr_t)d->bd_wfilter, M_BPF);
2534         mtx_destroy(&d->bd_lock);
2535 }
2536
2537 /*
2538  * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
2539  * fixed size of the link header (variable length headers not yet supported).
2540  */
2541 void
2542 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2543 {
2544
2545         bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2546 }
2547
2548 /*
2549  * Attach an interface to bpf.  ifp is a pointer to the structure
2550  * defining the interface to be attached, dlt is the link layer type,
2551  * and hdrlen is the fixed size of the link header (variable length
2552  * headers are not yet supporrted).
2553  */
2554 void
2555 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2556 {
2557         struct bpf_if *bp;
2558
2559         bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
2560         if (bp == NULL)
2561                 panic("bpfattach");
2562
2563         LIST_INIT(&bp->bif_dlist);
2564         LIST_INIT(&bp->bif_wlist);
2565         bp->bif_ifp = ifp;
2566         bp->bif_dlt = dlt;
2567         rw_init(&bp->bif_lock, "bpf interface lock");
2568         KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
2569         bp->bif_bpf = driverp;
2570         *driverp = bp;
2571
2572         BPF_LOCK();
2573         LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
2574         BPF_UNLOCK();
2575
2576         bp->bif_hdrlen = hdrlen;
2577
2578         if (bootverbose && IS_DEFAULT_VNET(curvnet))
2579                 if_printf(ifp, "bpf attached\n");
2580 }
2581
2582 #ifdef VIMAGE
2583 /*
2584  * When moving interfaces between vnet instances we need a way to
2585  * query the dlt and hdrlen before detach so we can re-attch the if_bpf
2586  * after the vmove.  We unfortunately have no device driver infrastructure
2587  * to query the interface for these values after creation/attach, thus
2588  * add this as a workaround.
2589  */
2590 int
2591 bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen)
2592 {
2593
2594         if (bp == NULL)
2595                 return (ENXIO);
2596         if (bif_dlt == NULL && bif_hdrlen == NULL)
2597                 return (0);
2598
2599         if (bif_dlt != NULL)
2600                 *bif_dlt = bp->bif_dlt;
2601         if (bif_hdrlen != NULL)
2602                 *bif_hdrlen = bp->bif_hdrlen;
2603
2604         return (0);
2605 }
2606 #endif
2607
2608 /*
2609  * Detach bpf from an interface. This involves detaching each descriptor
2610  * associated with the interface. Notify each descriptor as it's detached
2611  * so that any sleepers wake up and get ENXIO.
2612  */
2613 void
2614 bpfdetach(struct ifnet *ifp)
2615 {
2616         struct bpf_if   *bp, *bp_temp;
2617         struct bpf_d    *d;
2618         int ndetached;
2619
2620         ndetached = 0;
2621
2622         BPF_LOCK();
2623         /* Find all bpf_if struct's which reference ifp and detach them. */
2624         LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
2625                 if (ifp != bp->bif_ifp)
2626                         continue;
2627
2628                 LIST_REMOVE(bp, bif_next);
2629                 /* Add to to-be-freed list */
2630                 LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
2631
2632                 ndetached++;
2633                 /*
2634                  * Delay freeing bp till interface is detached
2635                  * and all routes through this interface are removed.
2636                  * Mark bp as detached to restrict new consumers.
2637                  */
2638                 BPFIF_WLOCK(bp);
2639                 bp->bif_flags |= BPFIF_FLAG_DYING;
2640                 *bp->bif_bpf = NULL;
2641                 BPFIF_WUNLOCK(bp);
2642
2643                 CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
2644                     __func__, bp->bif_dlt, bp, ifp);
2645
2646                 /* Free common descriptors */
2647                 while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
2648                         bpf_detachd_locked(d);
2649                         BPFD_LOCK(d);
2650                         bpf_wakeup(d);
2651                         BPFD_UNLOCK(d);
2652                 }
2653
2654                 /* Free writer-only descriptors */
2655                 while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
2656                         bpf_detachd_locked(d);
2657                         BPFD_LOCK(d);
2658                         bpf_wakeup(d);
2659                         BPFD_UNLOCK(d);
2660                 }
2661         }
2662         BPF_UNLOCK();
2663
2664 #ifdef INVARIANTS
2665         if (ndetached == 0)
2666                 printf("bpfdetach: %s was not attached\n", ifp->if_xname);
2667 #endif
2668 }
2669
2670 /*
2671  * Interface departure handler.
2672  * Note departure event does not guarantee interface is going down.
2673  * Interface renaming is currently done via departure/arrival event set.
2674  *
2675  * Departure handled is called after all routes pointing to
2676  * given interface are removed and interface is in down state
2677  * restricting any packets to be sent/received. We assume it is now safe
2678  * to free data allocated by BPF.
2679  */
2680 static void
2681 bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
2682 {
2683         struct bpf_if *bp, *bp_temp;
2684         int nmatched = 0;
2685
2686         /* Ignore ifnet renaming. */
2687         if (ifp->if_flags & IFF_RENAMING)
2688                 return;
2689
2690         BPF_LOCK();
2691         /*
2692          * Find matching entries in free list.
2693          * Nothing should be found if bpfdetach() was not called.
2694          */
2695         LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
2696                 if (ifp != bp->bif_ifp)
2697                         continue;
2698
2699                 CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
2700                     __func__, bp, ifp);
2701
2702                 LIST_REMOVE(bp, bif_next);
2703
2704                 rw_destroy(&bp->bif_lock);
2705                 free(bp, M_BPF);
2706
2707                 nmatched++;
2708         }
2709         BPF_UNLOCK();
2710 }
2711
2712 /*
2713  * Get a list of available data link type of the interface.
2714  */
2715 static int
2716 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
2717 {
2718         struct ifnet *ifp;
2719         struct bpf_if *bp;
2720         u_int *lst;
2721         int error, n, n1;
2722
2723         BPF_LOCK_ASSERT();
2724
2725         ifp = d->bd_bif->bif_ifp;
2726 again:
2727         n1 = 0;
2728         LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2729                 if (bp->bif_ifp == ifp)
2730                         n1++;
2731         }
2732         if (bfl->bfl_list == NULL) {
2733                 bfl->bfl_len = n1;
2734                 return (0);
2735         }
2736         if (n1 > bfl->bfl_len)
2737                 return (ENOMEM);
2738         BPF_UNLOCK();
2739         lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
2740         n = 0;
2741         BPF_LOCK();
2742         LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2743                 if (bp->bif_ifp != ifp)
2744                         continue;
2745                 if (n >= n1) {
2746                         free(lst, M_TEMP);
2747                         goto again;
2748                 }
2749                 lst[n] = bp->bif_dlt;
2750                 n++;
2751         }
2752         BPF_UNLOCK();
2753         error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
2754         free(lst, M_TEMP);
2755         BPF_LOCK();
2756         bfl->bfl_len = n;
2757         return (error);
2758 }
2759
2760 /*
2761  * Set the data link type of a BPF instance.
2762  */
2763 static int
2764 bpf_setdlt(struct bpf_d *d, u_int dlt)
2765 {
2766         int error, opromisc;
2767         struct ifnet *ifp;
2768         struct bpf_if *bp;
2769
2770         BPF_LOCK_ASSERT();
2771
2772         if (d->bd_bif->bif_dlt == dlt)
2773                 return (0);
2774         ifp = d->bd_bif->bif_ifp;
2775
2776         LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2777                 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2778                         break;
2779         }
2780
2781         if (bp != NULL) {
2782                 opromisc = d->bd_promisc;
2783                 bpf_attachd(d, bp);
2784                 BPFD_LOCK(d);
2785                 reset_d(d);
2786                 BPFD_UNLOCK(d);
2787                 if (opromisc) {
2788                         error = ifpromisc(bp->bif_ifp, 1);
2789                         if (error)
2790                                 if_printf(bp->bif_ifp,
2791                                         "bpf_setdlt: ifpromisc failed (%d)\n",
2792                                         error);
2793                         else
2794                                 d->bd_promisc = 1;
2795                 }
2796         }
2797         return (bp == NULL ? EINVAL : 0);
2798 }
2799
2800 static void
2801 bpf_drvinit(void *unused)
2802 {
2803         struct cdev *dev;
2804
2805         mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
2806         LIST_INIT(&bpf_iflist);
2807         LIST_INIT(&bpf_freelist);
2808
2809         dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
2810         /* For compatibility */
2811         make_dev_alias(dev, "bpf0");
2812
2813         /* Register interface departure handler */
2814         bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
2815                     ifnet_departure_event, bpf_ifdetach, NULL,
2816                     EVENTHANDLER_PRI_ANY);
2817 }
2818
2819 /*
2820  * Zero out the various packet counters associated with all of the bpf
2821  * descriptors.  At some point, we will probably want to get a bit more
2822  * granular and allow the user to specify descriptors to be zeroed.
2823  */
2824 static void
2825 bpf_zero_counters(void)
2826 {
2827         struct bpf_if *bp;
2828         struct bpf_d *bd;
2829
2830         BPF_LOCK();
2831         LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2832                 BPFIF_RLOCK(bp);
2833                 LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2834                         BPFD_LOCK(bd);
2835                         bd->bd_rcount = 0;
2836                         bd->bd_dcount = 0;
2837                         bd->bd_fcount = 0;
2838                         bd->bd_wcount = 0;
2839                         bd->bd_wfcount = 0;
2840                         bd->bd_zcopy = 0;
2841                         BPFD_UNLOCK(bd);
2842                 }
2843                 BPFIF_RUNLOCK(bp);
2844         }
2845         BPF_UNLOCK();
2846 }
2847
2848 /*
2849  * Fill filter statistics
2850  */
2851 static void
2852 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
2853 {
2854
2855         bzero(d, sizeof(*d));
2856         BPFD_LOCK_ASSERT(bd);
2857         d->bd_structsize = sizeof(*d);
2858         /* XXX: reading should be protected by global lock */
2859         d->bd_immediate = bd->bd_immediate;
2860         d->bd_promisc = bd->bd_promisc;
2861         d->bd_hdrcmplt = bd->bd_hdrcmplt;
2862         d->bd_direction = bd->bd_direction;
2863         d->bd_feedback = bd->bd_feedback;
2864         d->bd_async = bd->bd_async;
2865         d->bd_rcount = bd->bd_rcount;
2866         d->bd_dcount = bd->bd_dcount;
2867         d->bd_fcount = bd->bd_fcount;
2868         d->bd_sig = bd->bd_sig;
2869         d->bd_slen = bd->bd_slen;
2870         d->bd_hlen = bd->bd_hlen;
2871         d->bd_bufsize = bd->bd_bufsize;
2872         d->bd_pid = bd->bd_pid;
2873         strlcpy(d->bd_ifname,
2874             bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
2875         d->bd_locked = bd->bd_locked;
2876         d->bd_wcount = bd->bd_wcount;
2877         d->bd_wdcount = bd->bd_wdcount;
2878         d->bd_wfcount = bd->bd_wfcount;
2879         d->bd_zcopy = bd->bd_zcopy;
2880         d->bd_bufmode = bd->bd_bufmode;
2881 }
2882
2883 /*
2884  * Handle `netstat -B' stats request
2885  */
2886 static int
2887 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
2888 {
2889         static const struct xbpf_d zerostats;
2890         struct xbpf_d *xbdbuf, *xbd, tempstats;
2891         int index, error;
2892         struct bpf_if *bp;
2893         struct bpf_d *bd;
2894
2895         /*
2896          * XXX This is not technically correct. It is possible for non
2897          * privileged users to open bpf devices. It would make sense
2898          * if the users who opened the devices were able to retrieve
2899          * the statistics for them, too.
2900          */
2901         error = priv_check(req->td, PRIV_NET_BPF);
2902         if (error)
2903                 return (error);
2904         /*
2905          * Check to see if the user is requesting that the counters be
2906          * zeroed out.  Explicitly check that the supplied data is zeroed,
2907          * as we aren't allowing the user to set the counters currently.
2908          */
2909         if (req->newptr != NULL) {
2910                 if (req->newlen != sizeof(tempstats))
2911                         return (EINVAL);
2912                 memset(&tempstats, 0, sizeof(tempstats));
2913                 error = SYSCTL_IN(req, &tempstats, sizeof(tempstats));
2914                 if (error)
2915                         return (error);
2916                 if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0)
2917                         return (EINVAL);
2918                 bpf_zero_counters();
2919                 return (0);
2920         }
2921         if (req->oldptr == NULL)
2922                 return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
2923         if (bpf_bpfd_cnt == 0)
2924                 return (SYSCTL_OUT(req, 0, 0));
2925         xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
2926         BPF_LOCK();
2927         if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
2928                 BPF_UNLOCK();
2929                 free(xbdbuf, M_BPF);
2930                 return (ENOMEM);
2931         }
2932         index = 0;
2933         LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2934                 BPFIF_RLOCK(bp);
2935                 /* Send writers-only first */
2936                 LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
2937                         xbd = &xbdbuf[index++];
2938                         BPFD_LOCK(bd);
2939                         bpfstats_fill_xbpf(xbd, bd);
2940                         BPFD_UNLOCK(bd);
2941                 }
2942                 LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2943                         xbd = &xbdbuf[index++];
2944                         BPFD_LOCK(bd);
2945                         bpfstats_fill_xbpf(xbd, bd);
2946                         BPFD_UNLOCK(bd);
2947                 }
2948                 BPFIF_RUNLOCK(bp);
2949         }
2950         BPF_UNLOCK();
2951         error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
2952         free(xbdbuf, M_BPF);
2953         return (error);
2954 }
2955
2956 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
2957
2958 #else /* !DEV_BPF && !NETGRAPH_BPF */
2959 /*
2960  * NOP stubs to allow bpf-using drivers to load and function.
2961  *
2962  * A 'better' implementation would allow the core bpf functionality
2963  * to be loaded at runtime.
2964  */
2965 static struct bpf_if bp_null;
2966
2967 void
2968 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2969 {
2970 }
2971
2972 void
2973 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2974 {
2975 }
2976
2977 void
2978 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
2979 {
2980 }
2981
2982 void
2983 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2984 {
2985
2986         bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2987 }
2988
2989 void
2990 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2991 {
2992
2993         *driverp = &bp_null;
2994 }
2995
2996 void
2997 bpfdetach(struct ifnet *ifp)
2998 {
2999 }
3000
3001 u_int
3002 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
3003 {
3004         return -1;      /* "no filter" behaviour */
3005 }
3006
3007 int
3008 bpf_validate(const struct bpf_insn *f, int len)
3009 {
3010         return 0;               /* false */
3011 }
3012
3013 #endif /* !DEV_BPF && !NETGRAPH_BPF */
3014
3015 #ifdef DDB
3016 static void
3017 bpf_show_bpf_if(struct bpf_if *bpf_if)
3018 {
3019
3020         if (bpf_if == NULL)
3021                 return;
3022         db_printf("%p:\n", bpf_if);
3023 #define BPF_DB_PRINTF(f, e)     db_printf("   %s = " f "\n", #e, bpf_if->e);
3024         /* bif_ext.bif_next */
3025         /* bif_ext.bif_dlist */
3026         BPF_DB_PRINTF("%#x", bif_dlt);
3027         BPF_DB_PRINTF("%u", bif_hdrlen);
3028         BPF_DB_PRINTF("%p", bif_ifp);
3029         /* bif_lock */
3030         /* bif_wlist */
3031         BPF_DB_PRINTF("%#x", bif_flags);
3032 }
3033
3034 DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
3035 {
3036
3037         if (!have_addr) {
3038                 db_printf("usage: show bpf_if <struct bpf_if *>\n");
3039                 return;
3040         }
3041
3042         bpf_show_bpf_if((struct bpf_if *)addr);
3043 }
3044 #endif