]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/net/if_tuntap.c
Implement pci_enable_msi() and pci_disable_msi() in the LinuxKPI.
[FreeBSD/FreeBSD.git] / sys / net / if_tuntap.c
1 /*      $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $  */
2 /*-
3  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4  *
5  * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
6  * All rights reserved.
7  * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  * BASED ON:
32  * -------------------------------------------------------------------------
33  *
34  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
35  * Nottingham University 1987.
36  *
37  * This source may be freely distributed, however I would be interested
38  * in any changes that are made.
39  *
40  * This driver takes packets off the IP i/f and hands them up to a
41  * user process to have its wicked way with. This driver has it's
42  * roots in a similar driver written by Phil Cockcroft (formerly) at
43  * UCL. This driver is based much more on read/write/poll mode of
44  * operation though.
45  *
46  * $FreeBSD$
47  */
48
49 #include "opt_inet.h"
50 #include "opt_inet6.h"
51
52 #include <sys/param.h>
53 #include <sys/lock.h>
54 #include <sys/priv.h>
55 #include <sys/proc.h>
56 #include <sys/systm.h>
57 #include <sys/jail.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/socket.h>
61 #include <sys/eventhandler.h>
62 #include <sys/fcntl.h>
63 #include <sys/filio.h>
64 #include <sys/sockio.h>
65 #include <sys/sx.h>
66 #include <sys/ttycom.h>
67 #include <sys/poll.h>
68 #include <sys/selinfo.h>
69 #include <sys/signalvar.h>
70 #include <sys/filedesc.h>
71 #include <sys/kernel.h>
72 #include <sys/sysctl.h>
73 #include <sys/conf.h>
74 #include <sys/uio.h>
75 #include <sys/malloc.h>
76 #include <sys/random.h>
77 #include <sys/ctype.h>
78
79 #include <net/ethernet.h>
80 #include <net/if.h>
81 #include <net/if_var.h>
82 #include <net/if_clone.h>
83 #include <net/if_dl.h>
84 #include <net/if_media.h>
85 #include <net/if_types.h>
86 #include <net/netisr.h>
87 #include <net/route.h>
88 #include <net/vnet.h>
89 #ifdef INET
90 #include <netinet/in.h>
91 #endif
92 #include <net/bpf.h>
93 #include <net/if_tap.h>
94 #include <net/if_tun.h>
95
96 #include <sys/queue.h>
97 #include <sys/condvar.h>
98 #include <security/mac/mac_framework.h>
99
100 struct tuntap_driver;
101
102 /*
103  * tun_list is protected by global tunmtx.  Other mutable fields are
104  * protected by tun->tun_mtx, or by their owning subsystem.  tun_dev is
105  * static for the duration of a tunnel interface.
106  */
107 struct tuntap_softc {
108         TAILQ_ENTRY(tuntap_softc)        tun_list;
109         struct cdev                     *tun_dev;
110         u_short                          tun_flags;     /* misc flags */
111 #define TUN_OPEN        0x0001
112 #define TUN_INITED      0x0002
113 #define TUN_RCOLL       0x0004
114 #define TUN_IASET       0x0008
115 #define TUN_DSTADDR     0x0010
116 #define TUN_LMODE       0x0020
117 #define TUN_RWAIT       0x0040
118 #define TUN_ASYNC       0x0080
119 #define TUN_IFHEAD      0x0100
120 #define TUN_DYING       0x0200
121 #define TUN_L2          0x0400
122 #define TUN_VMNET       0x0800
123
124 #define TUN_DRIVER_IDENT_MASK   (TUN_L2 | TUN_VMNET)
125 #define TUN_READY               (TUN_OPEN | TUN_INITED)
126
127         pid_t                    tun_pid;       /* owning pid */
128         struct ifnet            *tun_ifp;       /* the interface */
129         struct sigio            *tun_sigio;     /* async I/O info */
130         struct tuntap_driver    *tun_drv;       /* appropriate driver */
131         struct selinfo           tun_rsel;      /* read select */
132         struct mtx               tun_mtx;       /* softc field mutex */
133         struct cv                tun_cv;        /* for ref'd dev destroy */
134         struct ether_addr        tun_ether;     /* remote address */
135 };
136 #define TUN2IFP(sc)     ((sc)->tun_ifp)
137
138 #define TUNDEBUG        if (tundebug) if_printf
139
140 #define TUN_LOCK(tp)    mtx_lock(&(tp)->tun_mtx)
141 #define TUN_UNLOCK(tp)  mtx_unlock(&(tp)->tun_mtx)
142
143 #define TUN_VMIO_FLAG_MASK      0x0fff
144
145 /*
146  * All mutable global variables in if_tun are locked using tunmtx, with
147  * the exception of tundebug, which is used unlocked, and the drivers' *clones,
148  * which are static after setup.
149  */
150 static struct mtx tunmtx;
151 static eventhandler_tag tag;
152 static const char tunname[] = "tun";
153 static const char tapname[] = "tap";
154 static const char vmnetname[] = "vmnet";
155 static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
156 static int tundebug = 0;
157 static int tundclone = 1;
158 static int tap_allow_uopen = 0; /* allow user open() */
159 static int tapuponopen = 0;     /* IFF_UP on open() */
160 static int tapdclone = 1;       /* enable devfs cloning */
161
162 static TAILQ_HEAD(,tuntap_softc)        tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
163 SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
164
165 static struct sx tun_ioctl_sx;
166 SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
167
168 SYSCTL_DECL(_net_link);
169 /* tun */
170 static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
171     "IP tunnel software network interface.");
172 SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
173     "Enable legacy devfs interface creation.");
174
175 /* tap */
176 static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
177     "Ethernet tunnel software network interface");
178 SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tap_allow_uopen, 0,
179     "Allow user to open /dev/tap (based on node permissions)");
180 SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
181     "Bring interface up when /dev/tap is opened");
182 SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
183     "Enable legacy devfs interface creation");
184 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tundebug, 0, "");
185
186 static int      tuntap_name2info(const char *name, int *unit, int *flags);
187 static void     tunclone(void *arg, struct ucred *cred, char *name,
188                     int namelen, struct cdev **dev);
189 static void     tuncreate(struct cdev *dev, struct tuntap_driver *);
190 static int      tunifioctl(struct ifnet *, u_long, caddr_t);
191 static void     tuninit(struct ifnet *);
192 static void     tunifinit(void *xtp);
193 static int      tuntapmodevent(module_t, int, void *);
194 static int      tunoutput(struct ifnet *, struct mbuf *,
195                     const struct sockaddr *, struct route *ro);
196 static void     tunstart(struct ifnet *);
197 static void     tunstart_l2(struct ifnet *);
198
199 static int      tun_clone_match(struct if_clone *ifc, const char *name);
200 static int      tap_clone_match(struct if_clone *ifc, const char *name);
201 static int      vmnet_clone_match(struct if_clone *ifc, const char *name);
202 static int      tun_clone_create(struct if_clone *, char *, size_t, caddr_t);
203 static int      tun_clone_destroy(struct if_clone *, struct ifnet *);
204
205 static d_open_t         tunopen;
206 static d_close_t        tunclose;
207 static d_read_t         tunread;
208 static d_write_t        tunwrite;
209 static d_ioctl_t        tunioctl;
210 static d_poll_t         tunpoll;
211 static d_kqfilter_t     tunkqfilter;
212
213 static int              tunkqread(struct knote *, long);
214 static int              tunkqwrite(struct knote *, long);
215 static void             tunkqdetach(struct knote *);
216
217 static struct filterops tun_read_filterops = {
218         .f_isfd =       1,
219         .f_attach =     NULL,
220         .f_detach =     tunkqdetach,
221         .f_event =      tunkqread,
222 };
223
224 static struct filterops tun_write_filterops = {
225         .f_isfd =       1,
226         .f_attach =     NULL,
227         .f_detach =     tunkqdetach,
228         .f_event =      tunkqwrite,
229 };
230
231 static struct tuntap_driver {
232         struct cdevsw            cdevsw;
233         int                      ident_flags;
234         struct unrhdr           *unrhdr;
235         struct clonedevs        *clones;
236         ifc_match_t             *clone_match_fn;
237         ifc_create_t            *clone_create_fn;
238         ifc_destroy_t           *clone_destroy_fn;
239 } tuntap_drivers[] = {
240         {
241                 .ident_flags =  0,
242                 .cdevsw =       {
243                     .d_version =        D_VERSION,
244                     .d_flags =          D_NEEDMINOR,
245                     .d_open =           tunopen,
246                     .d_close =          tunclose,
247                     .d_read =           tunread,
248                     .d_write =          tunwrite,
249                     .d_ioctl =          tunioctl,
250                     .d_poll =           tunpoll,
251                     .d_kqfilter =       tunkqfilter,
252                     .d_name =           tunname,
253                 },
254                 .clone_match_fn =       tun_clone_match,
255                 .clone_create_fn =      tun_clone_create,
256                 .clone_destroy_fn =     tun_clone_destroy,
257         },
258         {
259                 .ident_flags =  TUN_L2,
260                 .cdevsw =       {
261                     .d_version =        D_VERSION,
262                     .d_flags =          D_NEEDMINOR,
263                     .d_open =           tunopen,
264                     .d_close =          tunclose,
265                     .d_read =           tunread,
266                     .d_write =          tunwrite,
267                     .d_ioctl =          tunioctl,
268                     .d_poll =           tunpoll,
269                     .d_kqfilter =       tunkqfilter,
270                     .d_name =           tapname,
271                 },
272                 .clone_match_fn =       tap_clone_match,
273                 .clone_create_fn =      tun_clone_create,
274                 .clone_destroy_fn =     tun_clone_destroy,
275         },
276         {
277                 .ident_flags =  TUN_L2 | TUN_VMNET,
278                 .cdevsw =       {
279                     .d_version =        D_VERSION,
280                     .d_flags =          D_NEEDMINOR,
281                     .d_open =           tunopen,
282                     .d_close =          tunclose,
283                     .d_read =           tunread,
284                     .d_write =          tunwrite,
285                     .d_ioctl =          tunioctl,
286                     .d_poll =           tunpoll,
287                     .d_kqfilter =       tunkqfilter,
288                     .d_name =           vmnetname,
289                 },
290                 .clone_match_fn =       vmnet_clone_match,
291                 .clone_create_fn =      tun_clone_create,
292                 .clone_destroy_fn =     tun_clone_destroy,
293         },
294 };
295
296 struct tuntap_driver_cloner {
297         SLIST_ENTRY(tuntap_driver_cloner)        link;
298         struct tuntap_driver                    *drv;
299         struct if_clone                         *cloner;
300 };
301
302 VNET_DEFINE_STATIC(SLIST_HEAD(, tuntap_driver_cloner), tuntap_driver_cloners) =
303     SLIST_HEAD_INITIALIZER(tuntap_driver_cloners);
304
305 #define V_tuntap_driver_cloners VNET(tuntap_driver_cloners)
306
307 /*
308  * Sets unit and/or flags given the device name.  Must be called with correct
309  * vnet context.
310  */
311 static int
312 tuntap_name2info(const char *name, int *outunit, int *outflags)
313 {
314         struct tuntap_driver *drv;
315         struct tuntap_driver_cloner *drvc;
316         char *dname;
317         int flags, unit;
318         bool found;
319
320         if (name == NULL)
321                 return (EINVAL);
322
323         /*
324          * Needed for dev_stdclone, but dev_stdclone will not modify, it just
325          * wants to be able to pass back a char * through the second param. We
326          * will always set that as NULL here, so we'll fake it.
327          */
328         dname = __DECONST(char *, name);
329         found = false;
330
331         KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
332             ("tuntap_driver_cloners failed to initialize"));
333         SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
334                 KASSERT(drvc->drv != NULL,
335                     ("tuntap_driver_cloners entry not properly initialized"));
336                 drv = drvc->drv;
337
338                 if (strcmp(name, drv->cdevsw.d_name) == 0) {
339                         found = true;
340                         unit = -1;
341                         flags = drv->ident_flags;
342                         break;
343                 }
344
345                 if (dev_stdclone(dname, NULL, drv->cdevsw.d_name, &unit) == 1) {
346                         found = true;
347                         flags = drv->ident_flags;
348                         break;
349                 }
350         }
351
352         if (!found)
353                 return (ENXIO);
354
355         if (outunit != NULL)
356                 *outunit = unit;
357         if (outflags != NULL)
358                 *outflags = flags;
359         return (0);
360 }
361
362 /*
363  * Get driver information from a set of flags specified.  Masks the identifying
364  * part of the flags and compares it against all of the available
365  * tuntap_drivers. Must be called with correct vnet context.
366  */
367 static struct tuntap_driver *
368 tuntap_driver_from_flags(int tun_flags)
369 {
370         struct tuntap_driver *drv;
371         struct tuntap_driver_cloner *drvc;
372
373         KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
374             ("tuntap_driver_cloners failed to initialize"));
375         SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
376                 KASSERT(drvc->drv != NULL,
377                     ("tuntap_driver_cloners entry not properly initialized"));
378                 drv = drvc->drv;
379                 if ((tun_flags & TUN_DRIVER_IDENT_MASK) == drv->ident_flags)
380                         return (drv);
381         }
382
383         return (NULL);
384 }
385
386
387
388 static int
389 tun_clone_match(struct if_clone *ifc, const char *name)
390 {
391         int tunflags;
392
393         if (tuntap_name2info(name, NULL, &tunflags) == 0) {
394                 if ((tunflags & TUN_L2) == 0)
395                         return (1);
396         }
397
398         return (0);
399 }
400
401 static int
402 tap_clone_match(struct if_clone *ifc, const char *name)
403 {
404         int tunflags;
405
406         if (tuntap_name2info(name, NULL, &tunflags) == 0) {
407                 if ((tunflags & (TUN_L2 | TUN_VMNET)) == TUN_L2)
408                         return (1);
409         }
410
411         return (0);
412 }
413
414 static int
415 vmnet_clone_match(struct if_clone *ifc, const char *name)
416 {
417         int tunflags;
418
419         if (tuntap_name2info(name, NULL, &tunflags) == 0) {
420                 if ((tunflags & TUN_VMNET) != 0)
421                         return (1);
422         }
423
424         return (0);
425 }
426
427 static int
428 tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
429 {
430         struct tuntap_driver *drv;
431         struct cdev *dev;
432         int err, i, tunflags, unit;
433
434         tunflags = 0;
435         /* The name here tells us exactly what we're creating */
436         err = tuntap_name2info(name, &unit, &tunflags);
437         if (err != 0)
438                 return (err);
439
440         drv = tuntap_driver_from_flags(tunflags);
441         if (drv == NULL)
442                 return (ENXIO);
443
444         if (unit != -1) {
445                 /* If this unit number is still available that/s okay. */
446                 if (alloc_unr_specific(drv->unrhdr, unit) == -1)
447                         return (EEXIST);
448         } else {
449                 unit = alloc_unr(drv->unrhdr);
450         }
451
452         snprintf(name, IFNAMSIZ, "%s%d", drv->cdevsw.d_name, unit);
453
454         /* find any existing device, or allocate new unit number */
455         i = clone_create(&drv->clones, &drv->cdevsw, &unit, &dev, 0);
456         if (i) {
457                 /* No preexisting struct cdev *, create one */
458                 dev = make_dev(&drv->cdevsw, unit, UID_UUCP, GID_DIALER, 0600,
459                     "%s%d", drv->cdevsw.d_name, unit);
460         }
461
462         tuncreate(dev, drv);
463
464         return (0);
465 }
466
467 static void
468 tunclone(void *arg, struct ucred *cred, char *name, int namelen,
469     struct cdev **dev)
470 {
471         char devname[SPECNAMELEN + 1];
472         struct tuntap_driver *drv;
473         int append_unit, i, u, tunflags;
474         bool mayclone;
475
476         if (*dev != NULL)
477                 return;
478
479         tunflags = 0;
480         CURVNET_SET(CRED_TO_VNET(cred));
481         if (tuntap_name2info(name, &u, &tunflags) != 0)
482                 goto out;       /* Not recognized */
483
484         if (u != -1 && u > IF_MAXUNIT)
485                 goto out;       /* Unit number too high */
486
487         mayclone = priv_check_cred(cred, PRIV_NET_IFCREATE) == 0;
488         if ((tunflags & TUN_L2) != 0) {
489                 /* tap/vmnet allow user open with a sysctl */
490                 mayclone = (mayclone || tap_allow_uopen) && tapdclone;
491         } else {
492                 mayclone = mayclone && tundclone;
493         }
494
495         /*
496          * If tun cloning is enabled, only the superuser can create an
497          * interface.
498          */
499         if (!mayclone)
500                 goto out;
501
502         if (u == -1)
503                 append_unit = 1;
504         else
505                 append_unit = 0;
506
507         drv = tuntap_driver_from_flags(tunflags);
508         if (drv == NULL)
509                 goto out;
510
511         /* find any existing device, or allocate new unit number */
512         i = clone_create(&drv->clones, &drv->cdevsw, &u, dev, 0);
513         if (i) {
514                 if (append_unit) {
515                         namelen = snprintf(devname, sizeof(devname), "%s%d",
516                             name, u);
517                         name = devname;
518                 }
519                 /* No preexisting struct cdev *, create one */
520                 *dev = make_dev_credf(MAKEDEV_REF, &drv->cdevsw, u, cred,
521                     UID_UUCP, GID_DIALER, 0600, "%s", name);
522         }
523
524         if_clone_create(name, namelen, NULL);
525 out:
526         CURVNET_RESTORE();
527 }
528
529 static void
530 tun_destroy(struct tuntap_softc *tp)
531 {
532
533         TUN_LOCK(tp);
534         tp->tun_flags |= TUN_DYING;
535         if ((tp->tun_flags & TUN_OPEN) != 0)
536                 cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
537         else
538                 TUN_UNLOCK(tp);
539
540         CURVNET_SET(TUN2IFP(tp)->if_vnet);
541
542         destroy_dev(tp->tun_dev);
543         seldrain(&tp->tun_rsel);
544         knlist_clear(&tp->tun_rsel.si_note, 0);
545         knlist_destroy(&tp->tun_rsel.si_note);
546         if ((tp->tun_flags & TUN_L2) != 0) {
547                 ether_ifdetach(TUN2IFP(tp));
548         } else {
549                 bpfdetach(TUN2IFP(tp));
550                 if_detach(TUN2IFP(tp));
551         }
552         sx_xlock(&tun_ioctl_sx);
553         TUN2IFP(tp)->if_softc = NULL;
554         sx_xunlock(&tun_ioctl_sx);
555         free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit);
556         if_free(TUN2IFP(tp));
557         mtx_destroy(&tp->tun_mtx);
558         cv_destroy(&tp->tun_cv);
559         free(tp, M_TUN);
560         CURVNET_RESTORE();
561 }
562
563 static int
564 tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp)
565 {
566         struct tuntap_softc *tp = ifp->if_softc;
567
568         mtx_lock(&tunmtx);
569         TAILQ_REMOVE(&tunhead, tp, tun_list);
570         mtx_unlock(&tunmtx);
571         tun_destroy(tp);
572
573         return (0);
574 }
575
576 static void
577 vnet_tun_init(const void *unused __unused)
578 {
579         struct tuntap_driver *drv;
580         struct tuntap_driver_cloner *drvc;
581         int i;
582
583         for (i = 0; i < nitems(tuntap_drivers); ++i) {
584                 drv = &tuntap_drivers[i];
585                 drvc = malloc(sizeof(*drvc), M_TUN, M_WAITOK | M_ZERO);
586
587                 drvc->drv = drv;
588                 drvc->cloner = if_clone_advanced(drv->cdevsw.d_name, 0,
589                     drv->clone_match_fn, drv->clone_create_fn,
590                     drv->clone_destroy_fn);
591                 SLIST_INSERT_HEAD(&V_tuntap_driver_cloners, drvc, link);
592         };
593 }
594 VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
595                 vnet_tun_init, NULL);
596
597 static void
598 vnet_tun_uninit(const void *unused __unused)
599 {
600         struct tuntap_driver_cloner *drvc;
601
602         while (!SLIST_EMPTY(&V_tuntap_driver_cloners)) {
603                 drvc = SLIST_FIRST(&V_tuntap_driver_cloners);
604                 SLIST_REMOVE_HEAD(&V_tuntap_driver_cloners, link);
605
606                 if_clone_detach(drvc->cloner);
607                 free(drvc, M_TUN);
608         }
609 }
610 VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
611     vnet_tun_uninit, NULL);
612
613 static void
614 tun_uninit(const void *unused __unused)
615 {
616         struct tuntap_driver *drv;
617         struct tuntap_softc *tp;
618         int i;
619
620         EVENTHANDLER_DEREGISTER(dev_clone, tag);
621         drain_dev_clone_events();
622
623         mtx_lock(&tunmtx);
624         while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
625                 TAILQ_REMOVE(&tunhead, tp, tun_list);
626                 mtx_unlock(&tunmtx);
627                 tun_destroy(tp);
628                 mtx_lock(&tunmtx);
629         }
630         mtx_unlock(&tunmtx);
631         for (i = 0; i < nitems(tuntap_drivers); ++i) {
632                 drv = &tuntap_drivers[i];
633                 delete_unrhdr(drv->unrhdr);
634                 clone_cleanup(&drv->clones);
635         }
636         mtx_destroy(&tunmtx);
637 }
638 SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL);
639
640 static int
641 tuntapmodevent(module_t mod, int type, void *data)
642 {
643         struct tuntap_driver *drv;
644         int i;
645
646         switch (type) {
647         case MOD_LOAD:
648                 mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
649                 for (i = 0; i < nitems(tuntap_drivers); ++i) {
650                         drv = &tuntap_drivers[i];
651                         clone_setup(&drv->clones);
652                         drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
653                 }
654                 tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
655                 if (tag == NULL)
656                         return (ENOMEM);
657                 break;
658         case MOD_UNLOAD:
659                 /* See tun_uninit, so it's done after the vnet_sysuninit() */
660                 break;
661         default:
662                 return EOPNOTSUPP;
663         }
664         return 0;
665 }
666
667 static moduledata_t tuntap_mod = {
668         "if_tuntap",
669         tuntapmodevent,
670         0
671 };
672
673 DECLARE_MODULE(if_tuntap, tuntap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
674 MODULE_VERSION(if_tuntap, 1);
675
676 static void
677 tunstart(struct ifnet *ifp)
678 {
679         struct tuntap_softc *tp = ifp->if_softc;
680         struct mbuf *m;
681
682         TUNDEBUG(ifp, "starting\n");
683         if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
684                 IFQ_LOCK(&ifp->if_snd);
685                 IFQ_POLL_NOLOCK(&ifp->if_snd, m);
686                 if (m == NULL) {
687                         IFQ_UNLOCK(&ifp->if_snd);
688                         return;
689                 }
690                 IFQ_UNLOCK(&ifp->if_snd);
691         }
692
693         TUN_LOCK(tp);
694         if (tp->tun_flags & TUN_RWAIT) {
695                 tp->tun_flags &= ~TUN_RWAIT;
696                 wakeup(tp);
697         }
698         selwakeuppri(&tp->tun_rsel, PZERO + 1);
699         KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
700         if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
701                 TUN_UNLOCK(tp);
702                 pgsigio(&tp->tun_sigio, SIGIO, 0);
703         } else
704                 TUN_UNLOCK(tp);
705 }
706
707 /*
708  * tunstart_l2
709  *
710  * queue packets from higher level ready to put out
711  */
712 static void
713 tunstart_l2(struct ifnet *ifp)
714 {
715         struct tuntap_softc     *tp = ifp->if_softc;
716
717         TUNDEBUG(ifp, "starting\n");
718
719         /*
720          * do not junk pending output if we are in VMnet mode.
721          * XXX: can this do any harm because of queue overflow?
722          */
723
724         TUN_LOCK(tp);
725         if (((tp->tun_flags & TUN_VMNET) == 0) &&
726             ((tp->tun_flags & TUN_READY) != TUN_READY)) {
727                 struct mbuf *m;
728
729                 /* Unlocked read. */
730                 TUNDEBUG(ifp, "not ready, tun_flags = 0x%x\n", tp->tun_flags);
731
732                 for (;;) {
733                         IF_DEQUEUE(&ifp->if_snd, m);
734                         if (m != NULL) {
735                                 m_freem(m);
736                                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
737                         } else
738                                 break;
739                 }
740                 TUN_UNLOCK(tp);
741
742                 return;
743         }
744
745         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
746
747         if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
748                 if (tp->tun_flags & TUN_RWAIT) {
749                         tp->tun_flags &= ~TUN_RWAIT;
750                         wakeup(tp);
751                 }
752
753                 if ((tp->tun_flags & TUN_ASYNC) && (tp->tun_sigio != NULL)) {
754                         TUN_UNLOCK(tp);
755                         pgsigio(&tp->tun_sigio, SIGIO, 0);
756                         TUN_LOCK(tp);
757                 }
758
759                 selwakeuppri(&tp->tun_rsel, PZERO+1);
760                 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
761                 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
762         }
763
764         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
765         TUN_UNLOCK(tp);
766 } /* tunstart_l2 */
767
768
769 /* XXX: should return an error code so it can fail. */
770 static void
771 tuncreate(struct cdev *dev, struct tuntap_driver *drv)
772 {
773         struct tuntap_softc *sc;
774         struct ifnet *ifp;
775         struct ether_addr eaddr;
776         int iflags;
777         u_char type;
778
779         sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
780         mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
781         cv_init(&sc->tun_cv, "tun_condvar");
782         sc->tun_flags = drv->ident_flags;
783         sc->tun_dev = dev;
784         sc->tun_drv = drv;
785         mtx_lock(&tunmtx);
786         TAILQ_INSERT_TAIL(&tunhead, sc, tun_list);
787         mtx_unlock(&tunmtx);
788
789         iflags = IFF_MULTICAST;
790         if ((sc->tun_flags & TUN_L2) != 0) {
791                 type = IFT_ETHER;
792                 iflags |= IFF_BROADCAST | IFF_SIMPLEX;
793         } else {
794                 type = IFT_PPP;
795                 iflags |= IFF_POINTOPOINT;
796         }
797         ifp = sc->tun_ifp = if_alloc(type);
798         if (ifp == NULL)
799                 panic("%s%d: failed to if_alloc() interface.\n",
800                     drv->cdevsw.d_name, dev2unit(dev));
801         ifp->if_softc = sc;
802         if_initname(ifp, drv->cdevsw.d_name, dev2unit(dev));
803         ifp->if_ioctl = tunifioctl;
804         ifp->if_flags = iflags;
805         IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
806         knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx);
807         ifp->if_capabilities |= IFCAP_LINKSTATE;
808         ifp->if_capenable |= IFCAP_LINKSTATE;
809
810         if ((sc->tun_flags & TUN_L2) != 0) {
811                 ifp->if_mtu = ETHERMTU;
812                 ifp->if_init = tunifinit;
813                 ifp->if_start = tunstart_l2;
814
815                 ether_gen_addr(ifp, &eaddr);
816                 ether_ifattach(ifp, eaddr.octet);
817         } else {
818                 ifp->if_mtu = TUNMTU;
819                 ifp->if_start = tunstart;
820                 ifp->if_output = tunoutput;
821
822                 ifp->if_snd.ifq_drv_maxlen = 0;
823                 IFQ_SET_READY(&ifp->if_snd);
824
825                 if_attach(ifp);
826                 bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
827         }
828         dev->si_drv1 = sc;
829
830         TUN_LOCK(sc);
831         sc->tun_flags |= TUN_INITED;
832         TUN_UNLOCK(sc);
833
834         TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
835             ifp->if_xname, dev2unit(dev));
836 }
837
838 static int
839 tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
840 {
841         struct ifnet    *ifp;
842         struct tuntap_driver *drv;
843         struct tuntap_softc *tp;
844         int error, tunflags;
845
846         tunflags = 0;
847         CURVNET_SET(TD_TO_VNET(td));
848         error = tuntap_name2info(dev->si_name, NULL, &tunflags);
849         if (error != 0) {
850                 CURVNET_RESTORE();
851                 return (error); /* Shouldn't happen */
852         }
853
854         if ((tunflags & TUN_L2) != 0) {
855                 /* Restrict? */
856                 if (tap_allow_uopen == 0) {
857                         error = priv_check(td, PRIV_NET_TAP);
858                         if (error != 0) {
859                                 CURVNET_RESTORE();
860                                 return (error);
861                         }
862                 }
863         }
864
865         /*
866          * XXXRW: Non-atomic test and set of dev->si_drv1 requires
867          * synchronization.
868          */
869         tp = dev->si_drv1;
870         if (!tp) {
871                 drv = tuntap_driver_from_flags(tunflags);
872                 if (drv == NULL) {
873                         CURVNET_RESTORE();
874                         return (ENXIO);
875                 }
876                 tuncreate(dev, drv);
877                 tp = dev->si_drv1;
878         }
879
880         TUN_LOCK(tp);
881         if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) {
882                 TUN_UNLOCK(tp);
883                 CURVNET_RESTORE();
884                 return (EBUSY);
885         }
886
887         ifp = TUN2IFP(tp);
888
889         if ((tp->tun_flags & TUN_L2) != 0) {
890                 bcopy(IF_LLADDR(ifp), tp->tun_ether.octet,
891                     sizeof(tp->tun_ether.octet));
892
893                 ifp->if_drv_flags |= IFF_DRV_RUNNING;
894                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
895
896                 if (tapuponopen)
897                         ifp->if_flags |= IFF_UP;
898         }
899
900         tp->tun_pid = td->td_proc->p_pid;
901         tp->tun_flags |= TUN_OPEN;
902
903         if_link_state_change(ifp, LINK_STATE_UP);
904         TUNDEBUG(ifp, "open\n");
905         TUN_UNLOCK(tp);
906         CURVNET_RESTORE();
907         return (0);
908 }
909
910 /*
911  * tunclose - close the device - mark i/f down & delete
912  * routing info
913  */
914 static  int
915 tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
916 {
917         struct tuntap_softc *tp;
918         struct ifnet *ifp;
919         bool l2tun;
920
921         tp = dev->si_drv1;
922         ifp = TUN2IFP(tp);
923
924         TUN_LOCK(tp);
925         /*
926          * Simply close the device if this isn't the controlling process.  This
927          * may happen if, for instance, the tunnel has been handed off to
928          * another process.  The original controller should be able to close it
929          * without putting us into an inconsistent state.
930          */
931         if (td->td_proc->p_pid != tp->tun_pid) {
932                 TUN_UNLOCK(tp);
933                 return (0);
934         }
935
936         /*
937          * junk all pending output
938          */
939         CURVNET_SET(ifp->if_vnet);
940
941         l2tun = false;
942         if ((tp->tun_flags & TUN_L2) != 0) {
943                 l2tun = true;
944                 IF_DRAIN(&ifp->if_snd);
945         } else {
946                 IFQ_PURGE(&ifp->if_snd);
947         }
948
949         /* For vmnet, we won't do most of the address/route bits */
950         if ((tp->tun_flags & TUN_VMNET) != 0 ||
951             (l2tun && (ifp->if_flags & IFF_LINK0) != 0))
952                 goto out;
953
954         if (ifp->if_flags & IFF_UP) {
955                 TUN_UNLOCK(tp);
956                 if_down(ifp);
957                 TUN_LOCK(tp);
958         }
959
960         /* Delete all addresses and routes which reference this interface. */
961         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
962                 struct ifaddr *ifa;
963
964                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
965                 TUN_UNLOCK(tp);
966                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
967                         /* deal w/IPv4 PtP destination; unlocked read */
968                         if (!l2tun && ifa->ifa_addr->sa_family == AF_INET) {
969                                 rtinit(ifa, (int)RTM_DELETE,
970                                     tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
971                         } else {
972                                 rtinit(ifa, (int)RTM_DELETE, 0);
973                         }
974                 }
975                 if_purgeaddrs(ifp);
976                 TUN_LOCK(tp);
977         }
978
979 out:
980         if_link_state_change(ifp, LINK_STATE_DOWN);
981         CURVNET_RESTORE();
982
983         funsetown(&tp->tun_sigio);
984         selwakeuppri(&tp->tun_rsel, PZERO + 1);
985         KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
986         TUNDEBUG (ifp, "closed\n");
987         tp->tun_flags &= ~TUN_OPEN;
988         tp->tun_pid = 0;
989
990         cv_broadcast(&tp->tun_cv);
991         TUN_UNLOCK(tp);
992         return (0);
993 }
994
995 static void
996 tuninit(struct ifnet *ifp)
997 {
998         struct tuntap_softc *tp = ifp->if_softc;
999 #ifdef INET
1000         struct ifaddr *ifa;
1001 #endif
1002
1003         TUNDEBUG(ifp, "tuninit\n");
1004
1005         TUN_LOCK(tp);
1006         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1007         if ((tp->tun_flags & TUN_L2) == 0) {
1008                 ifp->if_flags |= IFF_UP;
1009                 getmicrotime(&ifp->if_lastchange);
1010 #ifdef INET
1011                 if_addr_rlock(ifp);
1012                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1013                         if (ifa->ifa_addr->sa_family == AF_INET) {
1014                                 struct sockaddr_in *si;
1015
1016                                 si = (struct sockaddr_in *)ifa->ifa_addr;
1017                                 if (si->sin_addr.s_addr)
1018                                         tp->tun_flags |= TUN_IASET;
1019
1020                                 si = (struct sockaddr_in *)ifa->ifa_dstaddr;
1021                                 if (si && si->sin_addr.s_addr)
1022                                         tp->tun_flags |= TUN_DSTADDR;
1023                         }
1024                 }
1025                 if_addr_runlock(ifp);
1026 #endif
1027                 TUN_UNLOCK(tp);
1028         } else {
1029                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1030                 TUN_UNLOCK(tp);
1031                 /* attempt to start output */
1032                 tunstart_l2(ifp);
1033         }
1034
1035 }
1036
1037 /*
1038  * Used only for l2 tunnel.
1039  */
1040 static void
1041 tunifinit(void *xtp)
1042 {
1043         struct tuntap_softc *tp;
1044
1045         tp = (struct tuntap_softc *)xtp;
1046         tuninit(tp->tun_ifp);
1047 }
1048
1049 /*
1050  * Process an ioctl request.
1051  */
1052 static int
1053 tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1054 {
1055         struct ifreq *ifr = (struct ifreq *)data;
1056         struct tuntap_softc *tp;
1057         struct ifstat *ifs;
1058         struct ifmediareq       *ifmr;
1059         int             dummy, error = 0;
1060         bool            l2tun;
1061
1062         ifmr = NULL;
1063         sx_xlock(&tun_ioctl_sx);
1064         tp = ifp->if_softc;
1065         if (tp == NULL) {
1066                 error = ENXIO;
1067                 goto bad;
1068         }
1069         l2tun = (tp->tun_flags & TUN_L2) != 0;
1070         switch(cmd) {
1071         case SIOCGIFSTATUS:
1072                 ifs = (struct ifstat *)data;
1073                 TUN_LOCK(tp);
1074                 if (tp->tun_pid)
1075                         snprintf(ifs->ascii, sizeof(ifs->ascii),
1076                             "\tOpened by PID %d\n", tp->tun_pid);
1077                 else
1078                         ifs->ascii[0] = '\0';
1079                 TUN_UNLOCK(tp);
1080                 break;
1081         case SIOCSIFADDR:
1082                 if (l2tun)
1083                         error = ether_ioctl(ifp, cmd, data);
1084                 else
1085                         tuninit(ifp);
1086                 if (error == 0)
1087                     TUNDEBUG(ifp, "address set\n");
1088                 break;
1089         case SIOCSIFMTU:
1090                 ifp->if_mtu = ifr->ifr_mtu;
1091                 TUNDEBUG(ifp, "mtu set\n");
1092                 break;
1093         case SIOCSIFFLAGS:
1094         case SIOCADDMULTI:
1095         case SIOCDELMULTI:
1096                 break;
1097         case SIOCGIFMEDIA:
1098                 if (!l2tun) {
1099                         error = EINVAL;
1100                         break;
1101                 }
1102
1103                 ifmr = (struct ifmediareq *)data;
1104                 dummy = ifmr->ifm_count;
1105                 ifmr->ifm_count = 1;
1106                 ifmr->ifm_status = IFM_AVALID;
1107                 ifmr->ifm_active = IFM_ETHER;
1108                 if (tp->tun_flags & TUN_OPEN)
1109                         ifmr->ifm_status |= IFM_ACTIVE;
1110                 ifmr->ifm_current = ifmr->ifm_active;
1111                 if (dummy >= 1) {
1112                         int media = IFM_ETHER;
1113                         error = copyout(&media, ifmr->ifm_ulist, sizeof(int));
1114                 }
1115                 break;
1116         default:
1117                 if (l2tun) {
1118                         error = ether_ioctl(ifp, cmd, data);
1119                 } else {
1120                         error = EINVAL;
1121                 }
1122         }
1123 bad:
1124         sx_xunlock(&tun_ioctl_sx);
1125         return (error);
1126 }
1127
1128 /*
1129  * tunoutput - queue packets from higher level ready to put out.
1130  */
1131 static int
1132 tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
1133     struct route *ro)
1134 {
1135         struct tuntap_softc *tp = ifp->if_softc;
1136         u_short cached_tun_flags;
1137         int error;
1138         u_int32_t af;
1139
1140         TUNDEBUG (ifp, "tunoutput\n");
1141
1142 #ifdef MAC
1143         error = mac_ifnet_check_transmit(ifp, m0);
1144         if (error) {
1145                 m_freem(m0);
1146                 return (error);
1147         }
1148 #endif
1149
1150         /* Could be unlocked read? */
1151         TUN_LOCK(tp);
1152         cached_tun_flags = tp->tun_flags;
1153         TUN_UNLOCK(tp);
1154         if ((cached_tun_flags & TUN_READY) != TUN_READY) {
1155                 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
1156                 m_freem (m0);
1157                 return (EHOSTDOWN);
1158         }
1159
1160         if ((ifp->if_flags & IFF_UP) != IFF_UP) {
1161                 m_freem (m0);
1162                 return (EHOSTDOWN);
1163         }
1164
1165         /* BPF writes need to be handled specially. */
1166         if (dst->sa_family == AF_UNSPEC)
1167                 bcopy(dst->sa_data, &af, sizeof(af));
1168         else
1169                 af = dst->sa_family;
1170
1171         if (bpf_peers_present(ifp->if_bpf))
1172                 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
1173
1174         /* prepend sockaddr? this may abort if the mbuf allocation fails */
1175         if (cached_tun_flags & TUN_LMODE) {
1176                 /* allocate space for sockaddr */
1177                 M_PREPEND(m0, dst->sa_len, M_NOWAIT);
1178
1179                 /* if allocation failed drop packet */
1180                 if (m0 == NULL) {
1181                         if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
1182                         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
1183                         return (ENOBUFS);
1184                 } else {
1185                         bcopy(dst, m0->m_data, dst->sa_len);
1186                 }
1187         }
1188
1189         if (cached_tun_flags & TUN_IFHEAD) {
1190                 /* Prepend the address family */
1191                 M_PREPEND(m0, 4, M_NOWAIT);
1192
1193                 /* if allocation failed drop packet */
1194                 if (m0 == NULL) {
1195                         if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
1196                         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
1197                         return (ENOBUFS);
1198                 } else
1199                         *(u_int32_t *)m0->m_data = htonl(af);
1200         } else {
1201 #ifdef INET
1202                 if (af != AF_INET)
1203 #endif
1204                 {
1205                         m_freem(m0);
1206                         return (EAFNOSUPPORT);
1207                 }
1208         }
1209
1210         error = (ifp->if_transmit)(ifp, m0);
1211         if (error)
1212                 return (ENOBUFS);
1213         if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
1214         return (0);
1215 }
1216
1217 /*
1218  * the cdevsw interface is now pretty minimal.
1219  */
1220 static  int
1221 tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
1222     struct thread *td)
1223 {
1224         struct ifreq ifr, *ifrp;
1225         struct tuntap_softc *tp = dev->si_drv1;
1226         struct tuninfo *tunp;
1227         int error, iflags;
1228 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
1229     defined(COMPAT_FREEBSD4)
1230         int     ival;
1231 #endif
1232         bool    l2tun;
1233
1234         l2tun = (tp->tun_flags & TUN_L2) != 0;
1235         if (l2tun) {
1236                 /* tap specific ioctls */
1237                 switch(cmd) {
1238                 /* VMware/VMnet port ioctl's */
1239 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
1240     defined(COMPAT_FREEBSD4)
1241                 case _IO('V', 0):
1242                         ival = IOCPARM_IVAL(data);
1243                         data = (caddr_t)&ival;
1244                         /* FALLTHROUGH */
1245 #endif
1246                 case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
1247                         iflags = *(int *)data;
1248                         iflags &= TUN_VMIO_FLAG_MASK;
1249                         iflags &= ~IFF_CANTCHANGE;
1250                         iflags |= IFF_UP;
1251
1252                         TUN_LOCK(tp);
1253                         TUN2IFP(tp)->if_flags = iflags |
1254                             (TUN2IFP(tp)->if_flags & IFF_CANTCHANGE);
1255                         TUN_UNLOCK(tp);
1256
1257                         return (0);
1258                 case SIOCGIFADDR:       /* get MAC address of the remote side */
1259                         TUN_LOCK(tp);
1260                         bcopy(&tp->tun_ether.octet, data,
1261                             sizeof(tp->tun_ether.octet));
1262                         TUN_UNLOCK(tp);
1263
1264                         return (0);
1265                 case SIOCSIFADDR:       /* set MAC address of the remote side */
1266                         TUN_LOCK(tp);
1267                         bcopy(data, &tp->tun_ether.octet,
1268                             sizeof(tp->tun_ether.octet));
1269                         TUN_UNLOCK(tp);
1270
1271                         return (0);
1272                 }
1273
1274                 /* Fall through to the common ioctls if unhandled */
1275         } else {
1276                 switch (cmd) {
1277                 case TUNSLMODE:
1278                         TUN_LOCK(tp);
1279                         if (*(int *)data) {
1280                                 tp->tun_flags |= TUN_LMODE;
1281                                 tp->tun_flags &= ~TUN_IFHEAD;
1282                         } else
1283                                 tp->tun_flags &= ~TUN_LMODE;
1284                         TUN_UNLOCK(tp);
1285
1286                         return (0);
1287                 case TUNSIFHEAD:
1288                         TUN_LOCK(tp);
1289                         if (*(int *)data) {
1290                                 tp->tun_flags |= TUN_IFHEAD;
1291                                 tp->tun_flags &= ~TUN_LMODE;
1292                         } else
1293                                 tp->tun_flags &= ~TUN_IFHEAD;
1294                         TUN_UNLOCK(tp);
1295
1296                         return (0);
1297                 case TUNGIFHEAD:
1298                         TUN_LOCK(tp);
1299                         *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
1300                         TUN_UNLOCK(tp);
1301
1302                         return (0);
1303                 case TUNSIFMODE:
1304                         /* deny this if UP */
1305                         if (TUN2IFP(tp)->if_flags & IFF_UP)
1306                                 return (EBUSY);
1307
1308                         switch (*(int *)data & ~IFF_MULTICAST) {
1309                         case IFF_POINTOPOINT:
1310                         case IFF_BROADCAST:
1311                                 TUN_LOCK(tp);
1312                                 TUN2IFP(tp)->if_flags &=
1313                                     ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
1314                                 TUN2IFP(tp)->if_flags |= *(int *)data;
1315                                 TUN_UNLOCK(tp);
1316
1317                                 break;
1318                         default:
1319                                 return (EINVAL);
1320                         }
1321
1322                         return (0);
1323                 case TUNSIFPID:
1324                         TUN_LOCK(tp);
1325                         tp->tun_pid = curthread->td_proc->p_pid;
1326                         TUN_UNLOCK(tp);
1327
1328                         return (0);
1329                 }
1330                 /* Fall through to the common ioctls if unhandled */
1331         }
1332
1333         switch (cmd) {
1334         case TUNGIFNAME:
1335                 ifrp = (struct ifreq *)data;
1336                 strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ);
1337
1338                 return (0);
1339         case TUNSIFINFO:
1340                 tunp = (struct tuninfo *)data;
1341                 if (TUN2IFP(tp)->if_type != tunp->type)
1342                         return (EPROTOTYPE);
1343                 TUN_LOCK(tp);
1344                 if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
1345                         strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
1346                         ifr.ifr_mtu = tunp->mtu;
1347                         CURVNET_SET(TUN2IFP(tp)->if_vnet);
1348                         error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
1349                             (caddr_t)&ifr, td);
1350                         CURVNET_RESTORE();
1351                         if (error) {
1352                                 TUN_UNLOCK(tp);
1353                                 return (error);
1354                         }
1355                 }
1356                 TUN2IFP(tp)->if_baudrate = tunp->baudrate;
1357                 TUN_UNLOCK(tp);
1358                 break;
1359         case TUNGIFINFO:
1360                 tunp = (struct tuninfo *)data;
1361                 TUN_LOCK(tp);
1362                 tunp->mtu = TUN2IFP(tp)->if_mtu;
1363                 tunp->type = TUN2IFP(tp)->if_type;
1364                 tunp->baudrate = TUN2IFP(tp)->if_baudrate;
1365                 TUN_UNLOCK(tp);
1366                 break;
1367         case TUNSDEBUG:
1368                 tundebug = *(int *)data;
1369                 break;
1370         case TUNGDEBUG:
1371                 *(int *)data = tundebug;
1372                 break;
1373         case FIONBIO:
1374                 break;
1375         case FIOASYNC:
1376                 TUN_LOCK(tp);
1377                 if (*(int *)data)
1378                         tp->tun_flags |= TUN_ASYNC;
1379                 else
1380                         tp->tun_flags &= ~TUN_ASYNC;
1381                 TUN_UNLOCK(tp);
1382                 break;
1383         case FIONREAD:
1384                 if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
1385                         struct mbuf *mb;
1386                         IFQ_LOCK(&TUN2IFP(tp)->if_snd);
1387                         IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
1388                         for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
1389                                 *(int *)data += mb->m_len;
1390                         IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
1391                 } else
1392                         *(int *)data = 0;
1393                 break;
1394         case FIOSETOWN:
1395                 return (fsetown(*(int *)data, &tp->tun_sigio));
1396
1397         case FIOGETOWN:
1398                 *(int *)data = fgetown(&tp->tun_sigio);
1399                 return (0);
1400
1401         /* This is deprecated, FIOSETOWN should be used instead. */
1402         case TIOCSPGRP:
1403                 return (fsetown(-(*(int *)data), &tp->tun_sigio));
1404
1405         /* This is deprecated, FIOGETOWN should be used instead. */
1406         case TIOCGPGRP:
1407                 *(int *)data = -fgetown(&tp->tun_sigio);
1408                 return (0);
1409
1410         default:
1411                 return (ENOTTY);
1412         }
1413         return (0);
1414 }
1415
1416 /*
1417  * The cdevsw read interface - reads a packet at a time, or at
1418  * least as much of a packet as can be read.
1419  */
1420 static  int
1421 tunread(struct cdev *dev, struct uio *uio, int flag)
1422 {
1423         struct tuntap_softc *tp = dev->si_drv1;
1424         struct ifnet    *ifp = TUN2IFP(tp);
1425         struct mbuf     *m;
1426         int             error=0, len;
1427
1428         TUNDEBUG (ifp, "read\n");
1429         TUN_LOCK(tp);
1430         if ((tp->tun_flags & TUN_READY) != TUN_READY) {
1431                 TUN_UNLOCK(tp);
1432                 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
1433                 return (EHOSTDOWN);
1434         }
1435
1436         tp->tun_flags &= ~TUN_RWAIT;
1437
1438         do {
1439                 IFQ_DEQUEUE(&ifp->if_snd, m);
1440                 if (m == NULL) {
1441                         if (flag & O_NONBLOCK) {
1442                                 TUN_UNLOCK(tp);
1443                                 return (EWOULDBLOCK);
1444                         }
1445                         tp->tun_flags |= TUN_RWAIT;
1446                         error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
1447                             "tunread", 0);
1448                         if (error != 0) {
1449                                 TUN_UNLOCK(tp);
1450                                 return (error);
1451                         }
1452                 }
1453         } while (m == NULL);
1454         TUN_UNLOCK(tp);
1455
1456         if ((tp->tun_flags & TUN_L2) != 0)
1457                 BPF_MTAP(ifp, m);
1458
1459         while (m && uio->uio_resid > 0 && error == 0) {
1460                 len = min(uio->uio_resid, m->m_len);
1461                 if (len != 0)
1462                         error = uiomove(mtod(m, void *), len, uio);
1463                 m = m_free(m);
1464         }
1465
1466         if (m) {
1467                 TUNDEBUG(ifp, "Dropping mbuf\n");
1468                 m_freem(m);
1469         }
1470         return (error);
1471 }
1472
1473 static int
1474 tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m)
1475 {
1476         struct ether_header *eh;
1477         struct ifnet *ifp;
1478
1479         ifp = TUN2IFP(tp);
1480
1481         /*
1482          * Only pass a unicast frame to ether_input(), if it would
1483          * actually have been received by non-virtual hardware.
1484          */
1485         if (m->m_len < sizeof(struct ether_header)) {
1486                 m_freem(m);
1487                 return (0);
1488         }
1489
1490         eh = mtod(m, struct ether_header *);
1491
1492         if (eh && (ifp->if_flags & IFF_PROMISC) == 0 &&
1493             !ETHER_IS_MULTICAST(eh->ether_dhost) &&
1494             bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
1495                 m_freem(m);
1496                 return (0);
1497         }
1498
1499         /* Pass packet up to parent. */
1500         CURVNET_SET(ifp->if_vnet);
1501         (*ifp->if_input)(ifp, m);
1502         CURVNET_RESTORE();
1503         /* ibytes are counted in parent */
1504         if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
1505         return (0);
1506 }
1507
1508 static int
1509 tunwrite_l3(struct tuntap_softc *tp, struct mbuf *m)
1510 {
1511         struct ifnet *ifp;
1512         int family, isr;
1513
1514         ifp = TUN2IFP(tp);
1515         /* Could be unlocked read? */
1516         TUN_LOCK(tp);
1517         if (tp->tun_flags & TUN_IFHEAD) {
1518                 TUN_UNLOCK(tp);
1519                 if (m->m_len < sizeof(family) &&
1520                 (m = m_pullup(m, sizeof(family))) == NULL)
1521                         return (ENOBUFS);
1522                 family = ntohl(*mtod(m, u_int32_t *));
1523                 m_adj(m, sizeof(family));
1524         } else {
1525                 TUN_UNLOCK(tp);
1526                 family = AF_INET;
1527         }
1528
1529         BPF_MTAP2(ifp, &family, sizeof(family), m);
1530
1531         switch (family) {
1532 #ifdef INET
1533         case AF_INET:
1534                 isr = NETISR_IP;
1535                 break;
1536 #endif
1537 #ifdef INET6
1538         case AF_INET6:
1539                 isr = NETISR_IPV6;
1540                 break;
1541 #endif
1542         default:
1543                 m_freem(m);
1544                 return (EAFNOSUPPORT);
1545         }
1546         random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN);
1547         if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
1548         if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
1549         CURVNET_SET(ifp->if_vnet);
1550         M_SETFIB(m, ifp->if_fib);
1551         netisr_dispatch(isr, m);
1552         CURVNET_RESTORE();
1553         return (0);
1554 }
1555
1556 /*
1557  * the cdevsw write interface - an atomic write is a packet - or else!
1558  */
1559 static  int
1560 tunwrite(struct cdev *dev, struct uio *uio, int flag)
1561 {
1562         struct tuntap_softc *tp;
1563         struct ifnet    *ifp;
1564         struct mbuf     *m;
1565         uint32_t        mru;
1566         int             align;
1567         bool            l2tun;
1568
1569         tp = dev->si_drv1;
1570         ifp = TUN2IFP(tp);
1571         TUNDEBUG(ifp, "tunwrite\n");
1572         if ((ifp->if_flags & IFF_UP) != IFF_UP)
1573                 /* ignore silently */
1574                 return (0);
1575
1576         if (uio->uio_resid == 0)
1577                 return (0);
1578
1579         l2tun = (tp->tun_flags & TUN_L2) != 0;
1580         align = 0;
1581         mru = l2tun ? TAPMRU : TUNMRU;
1582         if (l2tun)
1583                 align = ETHER_ALIGN;
1584         else if ((tp->tun_flags & TUN_IFHEAD) != 0)
1585                 mru += sizeof(uint32_t);        /* family */
1586         if (uio->uio_resid < 0 || uio->uio_resid > mru) {
1587                 TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
1588                 return (EIO);
1589         }
1590
1591         if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) {
1592                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1593                 return (ENOBUFS);
1594         }
1595
1596         m->m_pkthdr.rcvif = ifp;
1597 #ifdef MAC
1598         mac_ifnet_create_mbuf(ifp, m);
1599 #endif
1600
1601         if (l2tun)
1602                 return (tunwrite_l2(tp, m));
1603
1604         return (tunwrite_l3(tp, m));
1605 }
1606
1607 /*
1608  * tunpoll - the poll interface, this is only useful on reads
1609  * really. The write detect always returns true, write never blocks
1610  * anyway, it either accepts the packet or drops it.
1611  */
1612 static  int
1613 tunpoll(struct cdev *dev, int events, struct thread *td)
1614 {
1615         struct tuntap_softc *tp = dev->si_drv1;
1616         struct ifnet    *ifp = TUN2IFP(tp);
1617         int             revents = 0;
1618
1619         TUNDEBUG(ifp, "tunpoll\n");
1620
1621         if (events & (POLLIN | POLLRDNORM)) {
1622                 IFQ_LOCK(&ifp->if_snd);
1623                 if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
1624                         TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
1625                         revents |= events & (POLLIN | POLLRDNORM);
1626                 } else {
1627                         TUNDEBUG(ifp, "tunpoll waiting\n");
1628                         selrecord(td, &tp->tun_rsel);
1629                 }
1630                 IFQ_UNLOCK(&ifp->if_snd);
1631         }
1632         if (events & (POLLOUT | POLLWRNORM))
1633                 revents |= events & (POLLOUT | POLLWRNORM);
1634
1635         return (revents);
1636 }
1637
1638 /*
1639  * tunkqfilter - support for the kevent() system call.
1640  */
1641 static int
1642 tunkqfilter(struct cdev *dev, struct knote *kn)
1643 {
1644         struct tuntap_softc     *tp = dev->si_drv1;
1645         struct ifnet    *ifp = TUN2IFP(tp);
1646
1647         switch(kn->kn_filter) {
1648         case EVFILT_READ:
1649                 TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
1650                     ifp->if_xname, dev2unit(dev));
1651                 kn->kn_fop = &tun_read_filterops;
1652                 break;
1653
1654         case EVFILT_WRITE:
1655                 TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
1656                     ifp->if_xname, dev2unit(dev));
1657                 kn->kn_fop = &tun_write_filterops;
1658                 break;
1659
1660         default:
1661                 TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
1662                     ifp->if_xname, dev2unit(dev));
1663                 return(EINVAL);
1664         }
1665
1666         kn->kn_hook = tp;
1667         knlist_add(&tp->tun_rsel.si_note, kn, 0);
1668
1669         return (0);
1670 }
1671
1672 /*
1673  * Return true of there is data in the interface queue.
1674  */
1675 static int
1676 tunkqread(struct knote *kn, long hint)
1677 {
1678         int                     ret;
1679         struct tuntap_softc     *tp = kn->kn_hook;
1680         struct cdev             *dev = tp->tun_dev;
1681         struct ifnet    *ifp = TUN2IFP(tp);
1682
1683         if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
1684                 TUNDEBUG(ifp,
1685                     "%s have data in the queue.  Len = %d, minor = %#x\n",
1686                     ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
1687                 ret = 1;
1688         } else {
1689                 TUNDEBUG(ifp,
1690                     "%s waiting for data, minor = %#x\n", ifp->if_xname,
1691                     dev2unit(dev));
1692                 ret = 0;
1693         }
1694
1695         return (ret);
1696 }
1697
1698 /*
1699  * Always can write, always return MTU in kn->data.
1700  */
1701 static int
1702 tunkqwrite(struct knote *kn, long hint)
1703 {
1704         struct tuntap_softc     *tp = kn->kn_hook;
1705         struct ifnet    *ifp = TUN2IFP(tp);
1706
1707         kn->kn_data = ifp->if_mtu;
1708
1709         return (1);
1710 }
1711
1712 static void
1713 tunkqdetach(struct knote *kn)
1714 {
1715         struct tuntap_softc     *tp = kn->kn_hook;
1716
1717         knlist_remove(&tp->tun_rsel.si_note, kn, 0);
1718 }