]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/net/if_tuntap.c
tun/tap: merge and rename to `tuntap`
[FreeBSD/FreeBSD.git] / sys / net / if_tuntap.c
1 /*      $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $  */
2 /*-
3  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4  *
5  * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
6  * All rights reserved.
7  * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  * BASED ON:
32  * -------------------------------------------------------------------------
33  *
34  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
35  * Nottingham University 1987.
36  *
37  * This source may be freely distributed, however I would be interested
38  * in any changes that are made.
39  *
40  * This driver takes packets off the IP i/f and hands them up to a
41  * user process to have its wicked way with. This driver has it's
42  * roots in a similar driver written by Phil Cockcroft (formerly) at
43  * UCL. This driver is based much more on read/write/poll mode of
44  * operation though.
45  *
46  * $FreeBSD$
47  */
48
49 #include "opt_inet.h"
50 #include "opt_inet6.h"
51
52 #include <sys/param.h>
53 #include <sys/lock.h>
54 #include <sys/priv.h>
55 #include <sys/proc.h>
56 #include <sys/systm.h>
57 #include <sys/jail.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/socket.h>
61 #include <sys/fcntl.h>
62 #include <sys/filio.h>
63 #include <sys/sockio.h>
64 #include <sys/sx.h>
65 #include <sys/ttycom.h>
66 #include <sys/poll.h>
67 #include <sys/selinfo.h>
68 #include <sys/signalvar.h>
69 #include <sys/filedesc.h>
70 #include <sys/kernel.h>
71 #include <sys/sysctl.h>
72 #include <sys/conf.h>
73 #include <sys/uio.h>
74 #include <sys/malloc.h>
75 #include <sys/random.h>
76 #include <sys/ctype.h>
77
78 #include <net/ethernet.h>
79 #include <net/if.h>
80 #include <net/if_var.h>
81 #include <net/if_clone.h>
82 #include <net/if_dl.h>
83 #include <net/if_media.h>
84 #include <net/if_types.h>
85 #include <net/netisr.h>
86 #include <net/route.h>
87 #include <net/vnet.h>
88 #ifdef INET
89 #include <netinet/in.h>
90 #endif
91 #include <net/bpf.h>
92 #include <net/if_tap.h>
93 #include <net/if_tun.h>
94
95 #include <sys/queue.h>
96 #include <sys/condvar.h>
97 #include <security/mac/mac_framework.h>
98
99 struct tuntap_driver;
100
101 /*
102  * tun_list is protected by global tunmtx.  Other mutable fields are
103  * protected by tun->tun_mtx, or by their owning subsystem.  tun_dev is
104  * static for the duration of a tunnel interface.
105  */
106 struct tuntap_softc {
107         TAILQ_ENTRY(tuntap_softc)       tun_list;
108         struct cdev *tun_dev;
109         u_short tun_flags;              /* misc flags */
110 #define TUN_OPEN        0x0001
111 #define TUN_INITED      0x0002
112 #define TUN_RCOLL       0x0004
113 #define TUN_IASET       0x0008
114 #define TUN_DSTADDR     0x0010
115 #define TUN_LMODE       0x0020
116 #define TUN_RWAIT       0x0040
117 #define TUN_ASYNC       0x0080
118 #define TUN_IFHEAD      0x0100
119 #define TUN_DYING       0x0200
120 #define TUN_L2          0x0400
121 #define TUN_VMNET       0x0800
122
123 #define TUN_READY       (TUN_OPEN | TUN_INITED)
124
125         pid_t   tun_pid;                /* owning pid */
126         struct  ifnet *tun_ifp;         /* the interface */
127         struct  sigio *tun_sigio;       /* information for async I/O */
128         struct  tuntap_driver *tun_drv; /* appropriate driver */
129         struct  selinfo tun_rsel;       /* read select */
130         struct mtx      tun_mtx;        /* protect mutable softc fields */
131         struct cv       tun_cv;         /* protect against ref'd dev destroy */
132         struct ether_addr       tun_ether;      /* remote address */
133 };
134 #define TUN2IFP(sc)     ((sc)->tun_ifp)
135
136 #define TUNDEBUG        if (tundebug) if_printf
137
138 #define TUN_LOCK(tp)    mtx_lock(&(tp)->tun_mtx)
139 #define TUN_UNLOCK(tp)  mtx_unlock(&(tp)->tun_mtx)
140
141 #define TUN_VMIO_FLAG_MASK      0x0fff
142
143 /*
144  * All mutable global variables in if_tun are locked using tunmtx, with
145  * the exception of tundebug, which is used unlocked, and the drivers' *clones,
146  * which are static after setup.
147  */
148 static struct mtx tunmtx;
149 static eventhandler_tag tag;
150 static const char tunname[] = "tun";
151 static const char tapname[] = "tap";
152 static const char vmnetname[] = "vmnet";
153 static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
154 static int tundebug = 0;
155 static int tundclone = 1;
156 static int tap_allow_uopen = 0;        /* allow user open() */
157 static int tapuponopen = 0;    /* IFF_UP on open() */
158 static int tapdclone = 1;       /* enable devfs cloning */
159
160 static TAILQ_HEAD(,tuntap_softc)        tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
161 SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
162
163 static struct sx tun_ioctl_sx;
164 SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
165
166 SYSCTL_DECL(_net_link);
167 /* tun */
168 static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
169     "IP tunnel software network interface.");
170 SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
171     "Enable legacy devfs interface creation.");
172
173 /* tap */
174 static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
175     "Ethernet tunnel software network interface");
176 SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tap_allow_uopen, 0,
177         "Allow user to open /dev/tap (based on node permissions)");
178 SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
179         "Bring interface up when /dev/tap is opened");
180 SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
181         "Enable legacy devfs interface creation");
182 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tundebug, 0, "");
183
184 static int      tuntap_name2info(const char *name, int *unit, int *flags);
185 static void     tunclone(void *arg, struct ucred *cred, char *name,
186                     int namelen, struct cdev **dev);
187 static void     tuncreate(struct cdev *dev, struct tuntap_driver *);
188 static int      tunifioctl(struct ifnet *, u_long, caddr_t);
189 static void     tuninit(struct ifnet *);
190 static void     tunifinit(void *xtp);
191 static int      tuntapmodevent(module_t, int, void *);
192 static int      tunoutput(struct ifnet *, struct mbuf *,
193                     const struct sockaddr *, struct route *ro);
194 static void     tunstart(struct ifnet *);
195 static void     tunstart_l2(struct ifnet *);
196
197 static int      tun_clone_match(struct if_clone *ifc, const char *name);
198 static int      tap_clone_match(struct if_clone *ifc, const char *name);
199 static int      vmnet_clone_match(struct if_clone *ifc, const char *name);
200 static int      tun_clone_create(struct if_clone *, char *, size_t, caddr_t);
201 static int      tun_clone_destroy(struct if_clone *, struct ifnet *);
202
203 static d_open_t         tunopen;
204 static d_close_t        tunclose;
205 static d_read_t         tunread;
206 static d_write_t        tunwrite;
207 static d_ioctl_t        tunioctl;
208 static d_poll_t         tunpoll;
209 static d_kqfilter_t     tunkqfilter;
210
211 static int              tunkqread(struct knote *, long);
212 static int              tunkqwrite(struct knote *, long);
213 static void             tunkqdetach(struct knote *);
214
215 static struct filterops tun_read_filterops = {
216         .f_isfd =       1,
217         .f_attach =     NULL,
218         .f_detach =     tunkqdetach,
219         .f_event =      tunkqread,
220 };
221
222 static struct filterops tun_write_filterops = {
223         .f_isfd =       1,
224         .f_attach =     NULL,
225         .f_detach =     tunkqdetach,
226         .f_event =      tunkqwrite,
227 };
228
229 #define TUN_DRIVER_IDENT_MASK   (TUN_L2 | TUN_VMNET)
230
231 static struct tuntap_driver {
232         int                      tun_flags;
233         struct unrhdr           *unrhdr;
234         struct cdevsw            cdevsw;
235         struct clonedevs        *clones;
236         ifc_match_t             *clone_match_fn;
237         ifc_create_t            *clone_create_fn;
238         ifc_destroy_t           *clone_destroy_fn;
239 } tuntap_drivers[] = {
240         {
241                 .tun_flags =    0,
242                 .cdevsw =       {
243                     .d_version =        D_VERSION,
244                     .d_flags =          D_NEEDMINOR,
245                     .d_open =           tunopen,
246                     .d_close =          tunclose,
247                     .d_read =           tunread,
248                     .d_write =          tunwrite,
249                     .d_ioctl =          tunioctl,
250                     .d_poll =           tunpoll,
251                     .d_kqfilter =       tunkqfilter,
252                     .d_name =           tunname,
253                 },
254                 .clone_match_fn =       tun_clone_match,
255                 .clone_create_fn =      tun_clone_create,
256                 .clone_destroy_fn =     tun_clone_destroy,
257         },
258         {
259                 .tun_flags =    TUN_L2,
260                 .cdevsw =       {
261                     .d_version =        D_VERSION,
262                     .d_flags =          D_NEEDMINOR,
263                     .d_open =           tunopen,
264                     .d_close =          tunclose,
265                     .d_read =           tunread,
266                     .d_write =          tunwrite,
267                     .d_ioctl =          tunioctl,
268                     .d_poll =           tunpoll,
269                     .d_kqfilter =       tunkqfilter,
270                     .d_name =           tapname,
271                 },
272                 .clone_match_fn =       tap_clone_match,
273                 .clone_create_fn =      tun_clone_create,
274                 .clone_destroy_fn =     tun_clone_destroy,
275         },
276         {
277                 .tun_flags =    TUN_L2 | TUN_VMNET,
278                 .cdevsw =       {
279                     .d_version =        D_VERSION,
280                     .d_flags =          D_NEEDMINOR,
281                     .d_open =           tunopen,
282                     .d_close =          tunclose,
283                     .d_read =           tunread,
284                     .d_write =          tunwrite,
285                     .d_ioctl =          tunioctl,
286                     .d_poll =           tunpoll,
287                     .d_kqfilter =       tunkqfilter,
288                     .d_name =           vmnetname,
289                 },
290                 .clone_match_fn =       vmnet_clone_match,
291                 .clone_create_fn =      tun_clone_create,
292                 .clone_destroy_fn =     tun_clone_destroy,
293         },
294 };
295
296 struct tuntap_driver_cloner {
297         SLIST_ENTRY(tuntap_driver_cloner)                link;
298         struct tuntap_driver                    *drv;
299         struct if_clone                         *cloner;
300 };
301
302 VNET_DEFINE_STATIC(SLIST_HEAD(, tuntap_driver_cloner), tuntap_driver_cloners) =
303     SLIST_HEAD_INITIALIZER(tuntap_driver_cloners);
304
305 #define V_tuntap_driver_cloners VNET(tuntap_driver_cloners)
306
307 /*
308  * Sets unit and/or flags given the device name.  Must be called with correct
309  * vnet context.
310  */
311 static int
312 tuntap_name2info(const char *name, int *outunit, int *outflags)
313 {
314         struct tuntap_driver *drv;
315         struct tuntap_driver_cloner *drvc;
316         char *dname;
317         int flags, unit;
318         bool found;
319
320         if (name == NULL)
321                 return (EINVAL);
322
323         /*
324          * Needed for dev_stdclone, but dev_stdclone will not modify, it just
325          * wants to be able to pass back a char * through the second param. We
326          * will always set that as NULL here, so we'll fake it.
327          */
328         dname = __DECONST(char *, name);
329         found = false;
330
331         KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
332             ("tuntap_driver_cloners failed to initialize"));
333         SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
334                 KASSERT(drvc->drv != NULL,
335                     ("tuntap_driver_cloners entry not properly initialized"));
336                 drv = drvc->drv;
337
338                 if (strcmp(name, drv->cdevsw.d_name) == 0) {
339                         found = true;
340                         unit = -1;
341                         flags = drv->tun_flags;
342                         break;
343                 }
344
345                 if (dev_stdclone(dname, NULL, drv->cdevsw.d_name, &unit) == 1) {
346                         found = true;
347                         flags = drv->tun_flags;
348                         break;
349                 }
350         }
351
352         if (!found)
353                 return (ENXIO);
354
355         if (outunit != NULL)
356                 *outunit = unit;
357         if (outflags != NULL)
358                 *outflags = flags;
359         return (0);
360 }
361
362 /*
363  * Get driver information from a set of flags specified.  Masks the identifying
364  * part of the flags and compares it against all of the available
365  * tuntap_drivers. Must be called with correct vnet context.
366  */
367 static struct tuntap_driver *
368 tuntap_driver_from_flags(int tun_flags)
369 {
370         struct tuntap_driver *drv;
371         struct tuntap_driver_cloner *drvc;
372
373         KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
374             ("tuntap_driver_cloners failed to initialize"));
375         SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
376                 KASSERT(drvc->drv != NULL,
377                     ("tuntap_driver_cloners entry not properly initialized"));
378                 drv = drvc->drv;
379                 if ((tun_flags & TUN_DRIVER_IDENT_MASK) == drv->tun_flags)
380                         return (drv);
381         }
382
383         return (NULL);
384 }
385
386
387
388 static int
389 tun_clone_match(struct if_clone *ifc, const char *name)
390 {
391         int tunflags;
392
393         if (tuntap_name2info(name, NULL, &tunflags) == 0) {
394                 if ((tunflags & TUN_L2) == 0)
395                         return (1);
396         }
397
398         return (0);
399 }
400
401 static int
402 tap_clone_match(struct if_clone *ifc, const char *name)
403 {
404         int tunflags;
405
406         if (tuntap_name2info(name, NULL, &tunflags) == 0) {
407                 if ((tunflags & (TUN_L2 | TUN_VMNET)) == TUN_L2)
408                         return (1);
409         }
410
411         return (0);
412 }
413
414 static int
415 vmnet_clone_match(struct if_clone *ifc, const char *name)
416 {
417         int tunflags;
418
419         if (tuntap_name2info(name, NULL, &tunflags) == 0) {
420                 if ((tunflags & TUN_VMNET) != 0)
421                         return (1);
422         }
423
424         return (0);
425 }
426
427 static int
428 tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
429 {
430         struct tuntap_driver *drv;
431         struct cdev *dev;
432         int err, i, tunflags, unit;
433
434         tunflags = 0;
435         /* The name here tells us exactly what we're creating */
436         err = tuntap_name2info(name, &unit, &tunflags);
437         if (err != 0)
438                 return (err);
439
440         drv = tuntap_driver_from_flags(tunflags);
441         if (drv == NULL)
442                 return (ENXIO);
443
444         if (unit != -1) {
445                 /* If this unit number is still available that/s okay. */
446                 if (alloc_unr_specific(drv->unrhdr, unit) == -1)
447                         return (EEXIST);
448         } else {
449                 unit = alloc_unr(drv->unrhdr);
450         }
451
452         snprintf(name, IFNAMSIZ, "%s%d", drv->cdevsw.d_name, unit);
453
454         /* find any existing device, or allocate new unit number */
455         i = clone_create(&drv->clones, &drv->cdevsw, &unit, &dev, 0);
456         if (i) {
457                 /* No preexisting struct cdev *, create one */
458                 dev = make_dev(&drv->cdevsw, unit, UID_UUCP, GID_DIALER, 0600,
459                     "%s%d", drv->cdevsw.d_name, unit);
460         }
461
462         tuncreate(dev, drv);
463
464         return (0);
465 }
466
467 static void
468 tunclone(void *arg, struct ucred *cred, char *name, int namelen,
469     struct cdev **dev)
470 {
471         char devname[SPECNAMELEN + 1];
472         struct tuntap_driver *drv;
473         int append_unit, i, u, tunflags;
474         bool mayclone;
475
476         if (*dev != NULL)
477                 return;
478
479         tunflags = 0;
480         CURVNET_SET(CRED_TO_VNET(cred));
481         if (tuntap_name2info(name, &u, &tunflags) != 0)
482                 goto out;       /* Not recognized */
483
484         if (u != -1 && u > IF_MAXUNIT)
485                 goto out;       /* Unit number too high */
486
487         mayclone = priv_check_cred(cred, PRIV_NET_IFCREATE) == 0;
488         if ((tunflags & TUN_L2) != 0) {
489                 /* tap/vmnet allow user open with a sysctl */
490                 mayclone = (mayclone || tap_allow_uopen) && tapdclone;
491         } else {
492                 mayclone = mayclone && tundclone;
493         }
494
495         /*
496          * If tun cloning is enabled, only the superuser can create an
497          * interface.
498          */
499         if (!mayclone)
500                 goto out;
501
502         if (u == -1)
503                 append_unit = 1;
504         else
505                 append_unit = 0;
506
507         drv = tuntap_driver_from_flags(tunflags);
508         if (drv == NULL)
509                 goto out;
510
511         /* find any existing device, or allocate new unit number */
512         i = clone_create(&drv->clones, &drv->cdevsw, &u, dev, 0);
513         if (i) {
514                 if (append_unit) {
515                         namelen = snprintf(devname, sizeof(devname), "%s%d",
516                             name, u);
517                         name = devname;
518                 }
519                 /* No preexisting struct cdev *, create one */
520                 *dev = make_dev_credf(MAKEDEV_REF, &drv->cdevsw, u, cred,
521                     UID_UUCP, GID_DIALER, 0600, "%s", name);
522         }
523
524         if_clone_create(name, namelen, NULL);
525 out:
526         CURVNET_RESTORE();
527 }
528
529 static void
530 tun_destroy(struct tuntap_softc *tp)
531 {
532         struct cdev *dev;
533
534         TUN_LOCK(tp);
535         tp->tun_flags |= TUN_DYING;
536         if ((tp->tun_flags & TUN_OPEN) != 0)
537                 cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
538         else
539                 TUN_UNLOCK(tp);
540
541         CURVNET_SET(TUN2IFP(tp)->if_vnet);
542         sx_xlock(&tun_ioctl_sx);
543         TUN2IFP(tp)->if_softc = NULL;
544         sx_xunlock(&tun_ioctl_sx);
545
546         dev = tp->tun_dev;
547         bpfdetach(TUN2IFP(tp));
548         if_detach(TUN2IFP(tp));
549         free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit);
550         if_free(TUN2IFP(tp));
551         destroy_dev(dev);
552         seldrain(&tp->tun_rsel);
553         knlist_clear(&tp->tun_rsel.si_note, 0);
554         knlist_destroy(&tp->tun_rsel.si_note);
555         mtx_destroy(&tp->tun_mtx);
556         cv_destroy(&tp->tun_cv);
557         free(tp, M_TUN);
558         CURVNET_RESTORE();
559 }
560
561 static int
562 tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp)
563 {
564         struct tuntap_softc *tp = ifp->if_softc;
565
566         mtx_lock(&tunmtx);
567         TAILQ_REMOVE(&tunhead, tp, tun_list);
568         mtx_unlock(&tunmtx);
569         tun_destroy(tp);
570
571         return (0);
572 }
573
574 static void
575 vnet_tun_init(const void *unused __unused)
576 {
577         struct tuntap_driver *drv;
578         struct tuntap_driver_cloner *drvc;
579         int i;
580
581         for (i = 0; i < nitems(tuntap_drivers); ++i) {
582                 drv = &tuntap_drivers[i];
583                 drvc = malloc(sizeof(*drvc), M_TUN, M_WAITOK | M_ZERO);
584
585                 drvc->drv = drv;
586                 drvc->cloner = if_clone_advanced(drv->cdevsw.d_name, 0,
587                     drv->clone_match_fn, drv->clone_create_fn,
588                     drv->clone_destroy_fn);
589                 SLIST_INSERT_HEAD(&V_tuntap_driver_cloners, drvc, link);
590         };
591 }
592 VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
593                 vnet_tun_init, NULL);
594
595 static void
596 vnet_tun_uninit(const void *unused __unused)
597 {
598         struct tuntap_driver_cloner *drvc;
599
600         while (!SLIST_EMPTY(&V_tuntap_driver_cloners)) {
601                 drvc = SLIST_FIRST(&V_tuntap_driver_cloners);
602                 SLIST_REMOVE_HEAD(&V_tuntap_driver_cloners, link);
603
604                 if_clone_detach(drvc->cloner);
605                 free(drvc, M_TUN);
606         }
607 }
608 VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
609     vnet_tun_uninit, NULL);
610
611 static void
612 tun_uninit(const void *unused __unused)
613 {
614         struct tuntap_driver *drv;
615         struct tuntap_softc *tp;
616         int i;
617
618         EVENTHANDLER_DEREGISTER(dev_clone, tag);
619         drain_dev_clone_events();
620
621         mtx_lock(&tunmtx);
622         while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
623                 TAILQ_REMOVE(&tunhead, tp, tun_list);
624                 mtx_unlock(&tunmtx);
625                 tun_destroy(tp);
626                 mtx_lock(&tunmtx);
627         }
628         mtx_unlock(&tunmtx);
629         for (i = 0; i < nitems(tuntap_drivers); ++i) {
630                 drv = &tuntap_drivers[i];
631                 delete_unrhdr(drv->unrhdr);
632                 clone_cleanup(&drv->clones);
633         }
634         mtx_destroy(&tunmtx);
635 }
636 SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL);
637
638 static int
639 tuntapmodevent(module_t mod, int type, void *data)
640 {
641         struct tuntap_driver *drv;
642         int i;
643
644         switch (type) {
645         case MOD_LOAD:
646                 mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
647                 for (i = 0; i < nitems(tuntap_drivers); ++i) {
648                         drv = &tuntap_drivers[i];
649                         clone_setup(&drv->clones);
650                         drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
651                 }
652                 tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
653                 if (tag == NULL)
654                         return (ENOMEM);
655                 break;
656         case MOD_UNLOAD:
657                 /* See tun_uninit, so it's done after the vnet_sysuninit() */
658                 break;
659         default:
660                 return EOPNOTSUPP;
661         }
662         return 0;
663 }
664
665 static moduledata_t tuntap_mod = {
666         "if_tuntap",
667         tuntapmodevent,
668         0
669 };
670
671 DECLARE_MODULE(if_tuntap, tuntap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
672 MODULE_VERSION(if_tuntap, 1);
673
674 static void
675 tunstart(struct ifnet *ifp)
676 {
677         struct tuntap_softc *tp = ifp->if_softc;
678         struct mbuf *m;
679
680         TUNDEBUG(ifp, "starting\n");
681         if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
682                 IFQ_LOCK(&ifp->if_snd);
683                 IFQ_POLL_NOLOCK(&ifp->if_snd, m);
684                 if (m == NULL) {
685                         IFQ_UNLOCK(&ifp->if_snd);
686                         return;
687                 }
688                 IFQ_UNLOCK(&ifp->if_snd);
689         }
690
691         TUN_LOCK(tp);
692         if (tp->tun_flags & TUN_RWAIT) {
693                 tp->tun_flags &= ~TUN_RWAIT;
694                 wakeup(tp);
695         }
696         selwakeuppri(&tp->tun_rsel, PZERO + 1);
697         KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
698         if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
699                 TUN_UNLOCK(tp);
700                 pgsigio(&tp->tun_sigio, SIGIO, 0);
701         } else
702                 TUN_UNLOCK(tp);
703 }
704
705 /*
706  * tunstart_l2
707  *
708  * queue packets from higher level ready to put out
709  */
710 static void
711 tunstart_l2(struct ifnet *ifp)
712 {
713         struct tuntap_softc     *tp = ifp->if_softc;
714
715         TUNDEBUG(ifp, "starting\n");
716
717         /*
718          * do not junk pending output if we are in VMnet mode.
719          * XXX: can this do any harm because of queue overflow?
720          */
721
722         TUN_LOCK(tp);
723         if (((tp->tun_flags & TUN_VMNET) == 0) &&
724             ((tp->tun_flags & TUN_READY) != TUN_READY)) {
725                 struct mbuf *m;
726
727                 /* Unlocked read. */
728                 TUNDEBUG(ifp, "not ready, tun_flags = 0x%x\n", tp->tun_flags);
729
730                 for (;;) {
731                         IF_DEQUEUE(&ifp->if_snd, m);
732                         if (m != NULL) {
733                                 m_freem(m);
734                                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
735                         } else
736                                 break;
737                 }
738                 TUN_UNLOCK(tp);
739
740                 return;
741         }
742
743         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
744
745         if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
746                 if (tp->tun_flags & TUN_RWAIT) {
747                         tp->tun_flags &= ~TUN_RWAIT;
748                         wakeup(tp);
749                 }
750
751                 if ((tp->tun_flags & TUN_ASYNC) && (tp->tun_sigio != NULL)) {
752                         TUN_UNLOCK(tp);
753                         pgsigio(&tp->tun_sigio, SIGIO, 0);
754                         TUN_LOCK(tp);
755                 }
756
757                 selwakeuppri(&tp->tun_rsel, PZERO+1);
758                 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
759                 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
760         }
761
762         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
763         TUN_UNLOCK(tp);
764 } /* tunstart_l2 */
765
766
767 /* XXX: should return an error code so it can fail. */
768 static void
769 tuncreate(struct cdev *dev, struct tuntap_driver *drv)
770 {
771         struct tuntap_softc *sc;
772         struct ifnet *ifp;
773         struct ether_addr eaddr;
774         int iflags;
775         u_char type;
776
777         sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
778         mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
779         cv_init(&sc->tun_cv, "tun_condvar");
780         sc->tun_flags = drv->tun_flags;
781         sc->tun_dev = dev;
782         sc->tun_drv = drv;
783         mtx_lock(&tunmtx);
784         TAILQ_INSERT_TAIL(&tunhead, sc, tun_list);
785         mtx_unlock(&tunmtx);
786
787         iflags = IFF_MULTICAST;
788         if ((sc->tun_flags & TUN_L2) != 0) {
789                 type = IFT_ETHER;
790                 iflags |= IFF_BROADCAST | IFF_SIMPLEX;
791         } else {
792                 type = IFT_PPP;
793                 iflags |= IFF_POINTOPOINT;
794         }
795         ifp = sc->tun_ifp = if_alloc(type);
796         if (ifp == NULL)
797                 panic("%s%d: failed to if_alloc() interface.\n",
798                     drv->cdevsw.d_name, dev2unit(dev));
799         ifp->if_softc = sc;
800         if_initname(ifp, drv->cdevsw.d_name, dev2unit(dev));
801         ifp->if_ioctl = tunifioctl;
802         ifp->if_flags = iflags;
803         IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
804         knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx);
805         ifp->if_capabilities |= IFCAP_LINKSTATE;
806         ifp->if_capenable |= IFCAP_LINKSTATE;
807
808         if ((sc->tun_flags & TUN_L2) != 0) {
809                 ifp->if_mtu = ETHERMTU;
810                 ifp->if_init = tunifinit;
811                 ifp->if_start = tunstart_l2;
812
813                 ether_gen_addr(ifp, &eaddr);
814                 ether_ifattach(ifp, eaddr.octet);
815         } else {
816                 ifp->if_mtu = TUNMTU;
817                 ifp->if_start = tunstart;
818                 ifp->if_output = tunoutput;
819
820                 ifp->if_snd.ifq_drv_maxlen = 0;
821                 IFQ_SET_READY(&ifp->if_snd);
822
823                 if_attach(ifp);
824                 bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
825         }
826         dev->si_drv1 = sc;
827
828         TUN_LOCK(sc);
829         sc->tun_flags |= TUN_INITED;
830         TUN_UNLOCK(sc);
831
832         TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
833             ifp->if_xname, dev2unit(dev));
834 }
835
836 static int
837 tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
838 {
839         struct ifnet    *ifp;
840         struct tuntap_driver *drv;
841         struct tuntap_softc *tp;
842         int error, tunflags;
843
844         tunflags = 0;
845         CURVNET_SET(TD_TO_VNET(td));
846         error = tuntap_name2info(dev->si_name, NULL, &tunflags);
847         if (error != 0) {
848                 CURVNET_RESTORE();
849                 return (error); /* Shouldn't happen */
850         }
851
852         if ((tunflags & TUN_L2) != 0) {
853                 /* Restrict? */
854                 if (tap_allow_uopen == 0) {
855                         error = priv_check(td, PRIV_NET_TAP);
856                         if (error != 0) {
857                                 CURVNET_RESTORE();
858                                 return (error);
859                         }
860                 }
861         }
862
863         /*
864          * XXXRW: Non-atomic test and set of dev->si_drv1 requires
865          * synchronization.
866          */
867         tp = dev->si_drv1;
868         if (!tp) {
869                 drv = tuntap_driver_from_flags(tunflags);
870                 if (drv == NULL) {
871                         CURVNET_RESTORE();
872                         return (ENXIO);
873                 }
874                 tuncreate(dev, drv);
875                 tp = dev->si_drv1;
876         }
877
878         TUN_LOCK(tp);
879         if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) {
880                 TUN_UNLOCK(tp);
881                 CURVNET_RESTORE();
882                 return (EBUSY);
883         }
884
885         ifp = TUN2IFP(tp);
886
887         if ((tp->tun_flags & TUN_L2) != 0) {
888                 bcopy(IF_LLADDR(ifp), tp->tun_ether.octet,
889                     sizeof(tp->tun_ether.octet));
890
891                 ifp->if_drv_flags |= IFF_DRV_RUNNING;
892                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
893
894                 if (tapuponopen)
895                         ifp->if_flags |= IFF_UP;
896         }
897
898         tp->tun_pid = td->td_proc->p_pid;
899         tp->tun_flags |= TUN_OPEN;
900
901         if_link_state_change(ifp, LINK_STATE_UP);
902         TUNDEBUG(ifp, "open\n");
903         TUN_UNLOCK(tp);
904         CURVNET_RESTORE();
905         return (0);
906 }
907
908 /*
909  * tunclose - close the device - mark i/f down & delete
910  * routing info
911  */
912 static  int
913 tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
914 {
915         struct tuntap_softc *tp;
916         struct ifnet *ifp;
917         bool l2tun;
918
919         tp = dev->si_drv1;
920         ifp = TUN2IFP(tp);
921
922         TUN_LOCK(tp);
923         /*
924          * Simply close the device if this isn't the controlling process.  This
925          * may happen if, for instance, the tunnel has been handed off to
926          * another process.  The original controller should be able to close it
927          * without putting us into an inconsistent state.
928          */
929         if (td->td_proc->p_pid != tp->tun_pid) {
930                 TUN_UNLOCK(tp);
931                 return (0);
932         }
933
934         /*
935          * junk all pending output
936          */
937         CURVNET_SET(ifp->if_vnet);
938
939         l2tun = false;
940         if ((tp->tun_flags & TUN_L2) != 0) {
941                 l2tun = true;
942                 IF_DRAIN(&ifp->if_snd);
943         } else {
944                 IFQ_PURGE(&ifp->if_snd);
945         }
946
947         /* For vmnet, we won't do most of the address/route bits */
948         if ((tp->tun_flags & TUN_VMNET) != 0)
949                 goto out;
950
951         if (ifp->if_flags & IFF_UP) {
952                 TUN_UNLOCK(tp);
953                 if_down(ifp);
954                 TUN_LOCK(tp);
955         }
956
957         /* Delete all addresses and routes which reference this interface. */
958         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
959                 struct ifaddr *ifa;
960
961                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
962                 TUN_UNLOCK(tp);
963                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
964                         /* deal w/IPv4 PtP destination; unlocked read */
965                         if (!l2tun && ifa->ifa_addr->sa_family == AF_INET) {
966                                 rtinit(ifa, (int)RTM_DELETE,
967                                     tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
968                         } else {
969                                 rtinit(ifa, (int)RTM_DELETE, 0);
970                         }
971                 }
972                 if_purgeaddrs(ifp);
973                 TUN_LOCK(tp);
974         }
975
976 out:
977         if_link_state_change(ifp, LINK_STATE_DOWN);
978         CURVNET_RESTORE();
979
980         funsetown(&tp->tun_sigio);
981         selwakeuppri(&tp->tun_rsel, PZERO + 1);
982         KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
983         TUNDEBUG (ifp, "closed\n");
984         tp->tun_flags &= ~TUN_OPEN;
985         tp->tun_pid = 0;
986
987         cv_broadcast(&tp->tun_cv);
988         TUN_UNLOCK(tp);
989         return (0);
990 }
991
992 static void
993 tuninit(struct ifnet *ifp)
994 {
995         struct tuntap_softc *tp = ifp->if_softc;
996 #ifdef INET
997         struct ifaddr *ifa;
998 #endif
999
1000         TUNDEBUG(ifp, "tuninit\n");
1001
1002         TUN_LOCK(tp);
1003         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1004         if ((tp->tun_flags & TUN_L2) == 0) {
1005                 ifp->if_flags |= IFF_UP;
1006                 getmicrotime(&ifp->if_lastchange);
1007 #ifdef INET
1008                 if_addr_rlock(ifp);
1009                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1010                         if (ifa->ifa_addr->sa_family == AF_INET) {
1011                                 struct sockaddr_in *si;
1012
1013                                 si = (struct sockaddr_in *)ifa->ifa_addr;
1014                                 if (si->sin_addr.s_addr)
1015                                         tp->tun_flags |= TUN_IASET;
1016
1017                                 si = (struct sockaddr_in *)ifa->ifa_dstaddr;
1018                                 if (si && si->sin_addr.s_addr)
1019                                         tp->tun_flags |= TUN_DSTADDR;
1020                         }
1021                 }
1022                 if_addr_runlock(ifp);
1023 #endif
1024                 TUN_UNLOCK(tp);
1025         } else {
1026                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1027                 TUN_UNLOCK(tp);
1028                 /* attempt to start output */
1029                 tunstart_l2(ifp);
1030         }
1031
1032 }
1033
1034 /*
1035  * Used only for l2 tunnel.
1036  */
1037 static void
1038 tunifinit(void *xtp)
1039 {
1040         struct tuntap_softc *tp;
1041
1042         tp = (struct tuntap_softc *)xtp;
1043         tuninit(tp->tun_ifp);
1044 }
1045
1046 /*
1047  * Process an ioctl request.
1048  */
1049 static int
1050 tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1051 {
1052         struct ifreq *ifr = (struct ifreq *)data;
1053         struct tuntap_softc *tp;
1054         struct ifstat *ifs;
1055         struct ifmediareq       *ifmr;
1056         int             dummy, error = 0;
1057         bool            l2tun;
1058
1059         ifmr = NULL;
1060         sx_xlock(&tun_ioctl_sx);
1061         tp = ifp->if_softc;
1062         if (tp == NULL) {
1063                 error = ENXIO;
1064                 goto bad;
1065         }
1066         l2tun = (tp->tun_flags & TUN_L2) != 0;
1067         switch(cmd) {
1068         case SIOCGIFSTATUS:
1069                 ifs = (struct ifstat *)data;
1070                 TUN_LOCK(tp);
1071                 if (tp->tun_pid)
1072                         snprintf(ifs->ascii, sizeof(ifs->ascii),
1073                             "\tOpened by PID %d\n", tp->tun_pid);
1074                 else
1075                         ifs->ascii[0] = '\0';
1076                 TUN_UNLOCK(tp);
1077                 break;
1078         case SIOCSIFADDR:
1079                 if (l2tun)
1080                         error = ether_ioctl(ifp, cmd, data);
1081                 else
1082                         tuninit(ifp);
1083                 if (error == 0)
1084                     TUNDEBUG(ifp, "address set\n");
1085                 break;
1086         case SIOCSIFMTU:
1087                 ifp->if_mtu = ifr->ifr_mtu;
1088                 TUNDEBUG(ifp, "mtu set\n");
1089                 break;
1090         case SIOCSIFFLAGS:
1091         case SIOCADDMULTI:
1092         case SIOCDELMULTI:
1093                 break;
1094         case SIOCGIFMEDIA:
1095                 if (!l2tun) {
1096                         error = EINVAL;
1097                         break;
1098                 }
1099
1100                 ifmr = (struct ifmediareq *)data;
1101                 dummy = ifmr->ifm_count;
1102                 ifmr->ifm_count = 1;
1103                 ifmr->ifm_status = IFM_AVALID;
1104                 ifmr->ifm_active = IFM_ETHER;
1105                 if (tp->tun_flags & TUN_OPEN)
1106                         ifmr->ifm_status |= IFM_ACTIVE;
1107                 ifmr->ifm_current = ifmr->ifm_active;
1108                 if (dummy >= 1) {
1109                         int media = IFM_ETHER;
1110                         error = copyout(&media, ifmr->ifm_ulist, sizeof(int));
1111                 }
1112                 break;
1113         default:
1114                 if (l2tun) {
1115                         error = ether_ioctl(ifp, cmd, data);
1116                 } else {
1117                         error = EINVAL;
1118                 }
1119         }
1120 bad:
1121         sx_xunlock(&tun_ioctl_sx);
1122         return (error);
1123 }
1124
1125 /*
1126  * tunoutput - queue packets from higher level ready to put out.
1127  */
1128 static int
1129 tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
1130     struct route *ro)
1131 {
1132         struct tuntap_softc *tp = ifp->if_softc;
1133         u_short cached_tun_flags;
1134         int error;
1135         u_int32_t af;
1136
1137         TUNDEBUG (ifp, "tunoutput\n");
1138
1139 #ifdef MAC
1140         error = mac_ifnet_check_transmit(ifp, m0);
1141         if (error) {
1142                 m_freem(m0);
1143                 return (error);
1144         }
1145 #endif
1146
1147         /* Could be unlocked read? */
1148         TUN_LOCK(tp);
1149         cached_tun_flags = tp->tun_flags;
1150         TUN_UNLOCK(tp);
1151         if ((cached_tun_flags & TUN_READY) != TUN_READY) {
1152                 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
1153                 m_freem (m0);
1154                 return (EHOSTDOWN);
1155         }
1156
1157         if ((ifp->if_flags & IFF_UP) != IFF_UP) {
1158                 m_freem (m0);
1159                 return (EHOSTDOWN);
1160         }
1161
1162         /* BPF writes need to be handled specially. */
1163         if (dst->sa_family == AF_UNSPEC)
1164                 bcopy(dst->sa_data, &af, sizeof(af));
1165         else
1166                 af = dst->sa_family;
1167
1168         if (bpf_peers_present(ifp->if_bpf))
1169                 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
1170
1171         /* prepend sockaddr? this may abort if the mbuf allocation fails */
1172         if (cached_tun_flags & TUN_LMODE) {
1173                 /* allocate space for sockaddr */
1174                 M_PREPEND(m0, dst->sa_len, M_NOWAIT);
1175
1176                 /* if allocation failed drop packet */
1177                 if (m0 == NULL) {
1178                         if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
1179                         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
1180                         return (ENOBUFS);
1181                 } else {
1182                         bcopy(dst, m0->m_data, dst->sa_len);
1183                 }
1184         }
1185
1186         if (cached_tun_flags & TUN_IFHEAD) {
1187                 /* Prepend the address family */
1188                 M_PREPEND(m0, 4, M_NOWAIT);
1189
1190                 /* if allocation failed drop packet */
1191                 if (m0 == NULL) {
1192                         if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
1193                         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
1194                         return (ENOBUFS);
1195                 } else
1196                         *(u_int32_t *)m0->m_data = htonl(af);
1197         } else {
1198 #ifdef INET
1199                 if (af != AF_INET)
1200 #endif
1201                 {
1202                         m_freem(m0);
1203                         return (EAFNOSUPPORT);
1204                 }
1205         }
1206
1207         error = (ifp->if_transmit)(ifp, m0);
1208         if (error)
1209                 return (ENOBUFS);
1210         if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
1211         return (0);
1212 }
1213
1214 /*
1215  * the cdevsw interface is now pretty minimal.
1216  */
1217 static  int
1218 tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
1219     struct thread *td)
1220 {
1221         struct ifreq ifr, *ifrp;
1222         struct tuntap_softc *tp = dev->si_drv1;
1223         struct tuninfo *tunp;
1224         int error, iflags;
1225 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
1226     defined(COMPAT_FREEBSD4)
1227         int     ival;
1228 #endif
1229         bool    l2tun;
1230
1231         l2tun = (tp->tun_flags & TUN_L2) != 0;
1232         if (l2tun) {
1233                 /* tap specific ioctls */
1234                 switch(cmd) {
1235                 case TAPGIFNAME:
1236                         ifrp = (struct ifreq *)data;
1237                         strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname,
1238                             IFNAMSIZ);
1239
1240                         return (0);
1241                 /* VMware/VMnet port ioctl's */
1242 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
1243     defined(COMPAT_FREEBSD4)
1244                 case _IO('V', 0):
1245                         ival = IOCPARM_IVAL(data);
1246                         data = (caddr_t)&ival;
1247                         /* FALLTHROUGH */
1248 #endif
1249                 case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
1250                         iflags = *(int *)data;
1251                         iflags &= TUN_VMIO_FLAG_MASK;
1252                         iflags &= ~IFF_CANTCHANGE;
1253                         iflags |= IFF_UP;
1254
1255                         TUN_LOCK(tp);
1256                         TUN2IFP(tp)->if_flags = iflags |
1257                             (TUN2IFP(tp)->if_flags & IFF_CANTCHANGE);
1258                         TUN_UNLOCK(tp);
1259
1260                         return (0);
1261                 case SIOCGIFADDR:       /* get MAC address of the remote side */
1262                         TUN_LOCK(tp);
1263                         bcopy(&tp->tun_ether.octet, data,
1264                             sizeof(tp->tun_ether.octet));
1265                         TUN_UNLOCK(tp);
1266
1267                         return (0);
1268                 case SIOCSIFADDR:       /* set MAC address of the remote side */
1269                         TUN_LOCK(tp);
1270                         bcopy(data, &tp->tun_ether.octet,
1271                             sizeof(tp->tun_ether.octet));
1272                         TUN_UNLOCK(tp);
1273
1274                         return (0);
1275                 }
1276
1277                 /* Fall through to the common ioctls if unhandled */
1278         } else {
1279                 switch (cmd) {
1280                 case TUNSLMODE:
1281                         TUN_LOCK(tp);
1282                         if (*(int *)data) {
1283                                 tp->tun_flags |= TUN_LMODE;
1284                                 tp->tun_flags &= ~TUN_IFHEAD;
1285                         } else
1286                                 tp->tun_flags &= ~TUN_LMODE;
1287                         TUN_UNLOCK(tp);
1288
1289                         return (0);
1290                 case TUNSIFHEAD:
1291                         TUN_LOCK(tp);
1292                         if (*(int *)data) {
1293                                 tp->tun_flags |= TUN_IFHEAD;
1294                                 tp->tun_flags &= ~TUN_LMODE;
1295                         } else
1296                                 tp->tun_flags &= ~TUN_IFHEAD;
1297                         TUN_UNLOCK(tp);
1298
1299                         return (0);
1300                 case TUNGIFHEAD:
1301                         TUN_LOCK(tp);
1302                         *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
1303                         TUN_UNLOCK(tp);
1304
1305                         return (0);
1306                 case TUNSIFMODE:
1307                         /* deny this if UP */
1308                         if (TUN2IFP(tp)->if_flags & IFF_UP)
1309                                 return (EBUSY);
1310
1311                         switch (*(int *)data & ~IFF_MULTICAST) {
1312                         case IFF_POINTOPOINT:
1313                         case IFF_BROADCAST:
1314                                 TUN_LOCK(tp);
1315                                 TUN2IFP(tp)->if_flags &=
1316                                     ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
1317                                 TUN2IFP(tp)->if_flags |= *(int *)data;
1318                                 TUN_UNLOCK(tp);
1319
1320                                 break;
1321                         default:
1322                                 return (EINVAL);
1323                         }
1324
1325                         return (0);
1326                 case TUNSIFPID:
1327                         TUN_LOCK(tp);
1328                         tp->tun_pid = curthread->td_proc->p_pid;
1329                         TUN_UNLOCK(tp);
1330
1331                         return (0);
1332                 }
1333                 /* Fall through to the common ioctls if unhandled */
1334         }
1335
1336         switch (cmd) {
1337         case TUNSIFINFO:
1338                 tunp = (struct tuninfo *)data;
1339                 if (TUN2IFP(tp)->if_type != tunp->type)
1340                         return (EPROTOTYPE);
1341                 TUN_LOCK(tp);
1342                 if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
1343                         strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
1344                         ifr.ifr_mtu = tunp->mtu;
1345                         CURVNET_SET(TUN2IFP(tp)->if_vnet);
1346                         error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
1347                             (caddr_t)&ifr, td);
1348                         CURVNET_RESTORE();
1349                         if (error) {
1350                                 TUN_UNLOCK(tp);
1351                                 return (error);
1352                         }
1353                 }
1354                 TUN2IFP(tp)->if_baudrate = tunp->baudrate;
1355                 TUN_UNLOCK(tp);
1356                 break;
1357         case TUNGIFINFO:
1358                 tunp = (struct tuninfo *)data;
1359                 TUN_LOCK(tp);
1360                 tunp->mtu = TUN2IFP(tp)->if_mtu;
1361                 tunp->type = TUN2IFP(tp)->if_type;
1362                 tunp->baudrate = TUN2IFP(tp)->if_baudrate;
1363                 TUN_UNLOCK(tp);
1364                 break;
1365         case TUNSDEBUG:
1366                 tundebug = *(int *)data;
1367                 break;
1368         case TUNGDEBUG:
1369                 *(int *)data = tundebug;
1370                 break;
1371         case FIONBIO:
1372                 break;
1373         case FIOASYNC:
1374                 TUN_LOCK(tp);
1375                 if (*(int *)data)
1376                         tp->tun_flags |= TUN_ASYNC;
1377                 else
1378                         tp->tun_flags &= ~TUN_ASYNC;
1379                 TUN_UNLOCK(tp);
1380                 break;
1381         case FIONREAD:
1382                 if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
1383                         struct mbuf *mb;
1384                         IFQ_LOCK(&TUN2IFP(tp)->if_snd);
1385                         IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
1386                         for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
1387                                 *(int *)data += mb->m_len;
1388                         IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
1389                 } else
1390                         *(int *)data = 0;
1391                 break;
1392         case FIOSETOWN:
1393                 return (fsetown(*(int *)data, &tp->tun_sigio));
1394
1395         case FIOGETOWN:
1396                 *(int *)data = fgetown(&tp->tun_sigio);
1397                 return (0);
1398
1399         /* This is deprecated, FIOSETOWN should be used instead. */
1400         case TIOCSPGRP:
1401                 return (fsetown(-(*(int *)data), &tp->tun_sigio));
1402
1403         /* This is deprecated, FIOGETOWN should be used instead. */
1404         case TIOCGPGRP:
1405                 *(int *)data = -fgetown(&tp->tun_sigio);
1406                 return (0);
1407
1408         default:
1409                 return (ENOTTY);
1410         }
1411         return (0);
1412 }
1413
1414 /*
1415  * The cdevsw read interface - reads a packet at a time, or at
1416  * least as much of a packet as can be read.
1417  */
1418 static  int
1419 tunread(struct cdev *dev, struct uio *uio, int flag)
1420 {
1421         struct tuntap_softc *tp = dev->si_drv1;
1422         struct ifnet    *ifp = TUN2IFP(tp);
1423         struct mbuf     *m;
1424         int             error=0, len;
1425
1426         TUNDEBUG (ifp, "read\n");
1427         TUN_LOCK(tp);
1428         if ((tp->tun_flags & TUN_READY) != TUN_READY) {
1429                 TUN_UNLOCK(tp);
1430                 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
1431                 return (EHOSTDOWN);
1432         }
1433
1434         tp->tun_flags &= ~TUN_RWAIT;
1435
1436         do {
1437                 IFQ_DEQUEUE(&ifp->if_snd, m);
1438                 if (m == NULL) {
1439                         if (flag & O_NONBLOCK) {
1440                                 TUN_UNLOCK(tp);
1441                                 return (EWOULDBLOCK);
1442                         }
1443                         tp->tun_flags |= TUN_RWAIT;
1444                         error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
1445                             "tunread", 0);
1446                         if (error != 0) {
1447                                 TUN_UNLOCK(tp);
1448                                 return (error);
1449                         }
1450                 }
1451         } while (m == NULL);
1452         TUN_UNLOCK(tp);
1453
1454         if ((tp->tun_flags & TUN_L2) != 0)
1455                 BPF_MTAP(ifp, m);
1456
1457         while (m && uio->uio_resid > 0 && error == 0) {
1458                 len = min(uio->uio_resid, m->m_len);
1459                 if (len != 0)
1460                         error = uiomove(mtod(m, void *), len, uio);
1461                 m = m_free(m);
1462         }
1463
1464         if (m) {
1465                 TUNDEBUG(ifp, "Dropping mbuf\n");
1466                 m_freem(m);
1467         }
1468         return (error);
1469 }
1470
1471 static int
1472 tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m)
1473 {
1474         struct ether_header *eh;
1475         struct ifnet *ifp;
1476
1477         ifp = TUN2IFP(tp);
1478
1479         /*
1480          * Only pass a unicast frame to ether_input(), if it would
1481          * actually have been received by non-virtual hardware.
1482          */
1483         if (m->m_len < sizeof(struct ether_header)) {
1484                 m_freem(m);
1485                 return (0);
1486         }
1487
1488         eh = mtod(m, struct ether_header *);
1489
1490         if (eh && (ifp->if_flags & IFF_PROMISC) == 0 &&
1491             !ETHER_IS_MULTICAST(eh->ether_dhost) &&
1492             bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
1493                 m_freem(m);
1494                 return (0);
1495         }
1496
1497         /* Pass packet up to parent. */
1498         CURVNET_SET(ifp->if_vnet);
1499         (*ifp->if_input)(ifp, m);
1500         CURVNET_RESTORE();
1501         /* ibytes are counted in parent */
1502         if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
1503         return (0);
1504 }
1505
1506 static int
1507 tunwrite_l3(struct tuntap_softc *tp, struct mbuf *m)
1508 {
1509         struct ifnet *ifp;
1510         int family, isr;
1511
1512         ifp = TUN2IFP(tp);
1513         /* Could be unlocked read? */
1514         TUN_LOCK(tp);
1515         if (tp->tun_flags & TUN_IFHEAD) {
1516                 TUN_UNLOCK(tp);
1517                 if (m->m_len < sizeof(family) &&
1518                 (m = m_pullup(m, sizeof(family))) == NULL)
1519                         return (ENOBUFS);
1520                 family = ntohl(*mtod(m, u_int32_t *));
1521                 m_adj(m, sizeof(family));
1522         } else {
1523                 TUN_UNLOCK(tp);
1524                 family = AF_INET;
1525         }
1526
1527         BPF_MTAP2(ifp, &family, sizeof(family), m);
1528
1529         switch (family) {
1530 #ifdef INET
1531         case AF_INET:
1532                 isr = NETISR_IP;
1533                 break;
1534 #endif
1535 #ifdef INET6
1536         case AF_INET6:
1537                 isr = NETISR_IPV6;
1538                 break;
1539 #endif
1540         default:
1541                 m_freem(m);
1542                 return (EAFNOSUPPORT);
1543         }
1544         random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN);
1545         if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
1546         if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
1547         CURVNET_SET(ifp->if_vnet);
1548         M_SETFIB(m, ifp->if_fib);
1549         netisr_dispatch(isr, m);
1550         CURVNET_RESTORE();
1551         return (0);
1552 }
1553
1554 /*
1555  * the cdevsw write interface - an atomic write is a packet - or else!
1556  */
1557 static  int
1558 tunwrite(struct cdev *dev, struct uio *uio, int flag)
1559 {
1560         struct tuntap_softc *tp;
1561         struct ifnet    *ifp;
1562         struct mbuf     *m;
1563         uint32_t        mru;
1564         int             align;
1565         bool            l2tun;
1566
1567         tp = dev->si_drv1;
1568         ifp = TUN2IFP(tp);
1569         TUNDEBUG(ifp, "tunwrite\n");
1570         if ((ifp->if_flags & IFF_UP) != IFF_UP)
1571                 /* ignore silently */
1572                 return (0);
1573
1574         if (uio->uio_resid == 0)
1575                 return (0);
1576
1577         l2tun = (tp->tun_flags & TUN_L2) != 0;
1578         align = 0;
1579         mru = l2tun ? TAPMRU : TUNMRU;
1580         if (l2tun)
1581                 align = ETHER_ALIGN;
1582         else if ((tp->tun_flags & TUN_IFHEAD) != 0)
1583                 mru += sizeof(uint32_t);        /* family */
1584         if (uio->uio_resid < 0 || uio->uio_resid > mru) {
1585                 TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
1586                 return (EIO);
1587         }
1588
1589         if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) {
1590                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1591                 return (ENOBUFS);
1592         }
1593
1594         m->m_pkthdr.rcvif = ifp;
1595 #ifdef MAC
1596         mac_ifnet_create_mbuf(ifp, m);
1597 #endif
1598
1599         if (l2tun)
1600                 return (tunwrite_l2(tp, m));
1601
1602         return (tunwrite_l3(tp, m));
1603 }
1604
1605 /*
1606  * tunpoll - the poll interface, this is only useful on reads
1607  * really. The write detect always returns true, write never blocks
1608  * anyway, it either accepts the packet or drops it.
1609  */
1610 static  int
1611 tunpoll(struct cdev *dev, int events, struct thread *td)
1612 {
1613         struct tuntap_softc *tp = dev->si_drv1;
1614         struct ifnet    *ifp = TUN2IFP(tp);
1615         int             revents = 0;
1616
1617         TUNDEBUG(ifp, "tunpoll\n");
1618
1619         if (events & (POLLIN | POLLRDNORM)) {
1620                 IFQ_LOCK(&ifp->if_snd);
1621                 if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
1622                         TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
1623                         revents |= events & (POLLIN | POLLRDNORM);
1624                 } else {
1625                         TUNDEBUG(ifp, "tunpoll waiting\n");
1626                         selrecord(td, &tp->tun_rsel);
1627                 }
1628                 IFQ_UNLOCK(&ifp->if_snd);
1629         }
1630         if (events & (POLLOUT | POLLWRNORM))
1631                 revents |= events & (POLLOUT | POLLWRNORM);
1632
1633         return (revents);
1634 }
1635
1636 /*
1637  * tunkqfilter - support for the kevent() system call.
1638  */
1639 static int
1640 tunkqfilter(struct cdev *dev, struct knote *kn)
1641 {
1642         struct tuntap_softc     *tp = dev->si_drv1;
1643         struct ifnet    *ifp = TUN2IFP(tp);
1644
1645         switch(kn->kn_filter) {
1646         case EVFILT_READ:
1647                 TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
1648                     ifp->if_xname, dev2unit(dev));
1649                 kn->kn_fop = &tun_read_filterops;
1650                 break;
1651
1652         case EVFILT_WRITE:
1653                 TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
1654                     ifp->if_xname, dev2unit(dev));
1655                 kn->kn_fop = &tun_write_filterops;
1656                 break;
1657
1658         default:
1659                 TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
1660                     ifp->if_xname, dev2unit(dev));
1661                 return(EINVAL);
1662         }
1663
1664         kn->kn_hook = tp;
1665         knlist_add(&tp->tun_rsel.si_note, kn, 0);
1666
1667         return (0);
1668 }
1669
1670 /*
1671  * Return true of there is data in the interface queue.
1672  */
1673 static int
1674 tunkqread(struct knote *kn, long hint)
1675 {
1676         int                     ret;
1677         struct tuntap_softc     *tp = kn->kn_hook;
1678         struct cdev             *dev = tp->tun_dev;
1679         struct ifnet    *ifp = TUN2IFP(tp);
1680
1681         if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
1682                 TUNDEBUG(ifp,
1683                     "%s have data in the queue.  Len = %d, minor = %#x\n",
1684                     ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
1685                 ret = 1;
1686         } else {
1687                 TUNDEBUG(ifp,
1688                     "%s waiting for data, minor = %#x\n", ifp->if_xname,
1689                     dev2unit(dev));
1690                 ret = 0;
1691         }
1692
1693         return (ret);
1694 }
1695
1696 /*
1697  * Always can write, always return MTU in kn->data.
1698  */
1699 static int
1700 tunkqwrite(struct knote *kn, long hint)
1701 {
1702         struct tuntap_softc     *tp = kn->kn_hook;
1703         struct ifnet    *ifp = TUN2IFP(tp);
1704
1705         kn->kn_data = ifp->if_mtu;
1706
1707         return (1);
1708 }
1709
1710 static void
1711 tunkqdetach(struct knote *kn)
1712 {
1713         struct tuntap_softc     *tp = kn->kn_hook;
1714
1715         knlist_remove(&tp->tun_rsel.si_note, kn, 0);
1716 }