2 /* $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $ */
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include "opt_inet6.h"
36 #include <sys/param.h>
37 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/socket.h>
44 #include <sys/sockio.h>
45 #include <sys/errno.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
51 #include <sys/protosw.h>
53 #include <machine/cpu.h>
56 #include <net/if_clone.h>
57 #include <net/if_types.h>
58 #include <net/netisr.h>
59 #include <net/route.h>
63 #include <netinet/in.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/ip.h>
67 #include <netinet/in_var.h>
68 #include <netinet/in_gif.h>
69 #include <netinet/ip_var.h>
74 #include <netinet/in.h>
76 #include <netinet6/in6_var.h>
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80 #include <netinet6/in6_gif.h>
81 #include <netinet6/ip6protosw.h>
84 #include <netinet/ip_encap.h>
85 #include <net/ethernet.h>
86 #include <net/if_bridgevar.h>
87 #include <net/if_gif.h>
89 #include <security/mac/mac_framework.h>
94 * gif_mtx protects the global gif_softc_list.
96 static struct mtx gif_mtx;
97 static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
98 static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
99 #define V_gif_softc_list VNET(gif_softc_list)
101 void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
102 void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
103 void (*ng_gif_attach_p)(struct ifnet *ifp);
104 void (*ng_gif_detach_p)(struct ifnet *ifp);
106 static void gif_start(struct ifnet *);
107 static int gif_clone_create(struct if_clone *, int, caddr_t);
108 static void gif_clone_destroy(struct ifnet *);
110 IFC_SIMPLE_DECLARE(gif, 0);
112 static int gifmodevent(module_t, int, void *);
114 SYSCTL_DECL(_net_link);
115 static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
116 "Generic Tunnel Interface");
119 * This macro controls the default upper limitation on nesting of gif tunnels.
120 * Since, setting a large value to this macro with a careless configuration
121 * may introduce system crash, we don't allow any nestings by default.
122 * If you need to configure nested gif tunnels, you can define this macro
123 * in your kernel configuration file. However, if you do so, please be
124 * careful to configure the tunnels so that it won't make a loop.
126 #define MAX_GIF_NEST 1
128 static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
129 #define V_max_gif_nesting VNET(max_gif_nesting)
130 SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_RW,
131 &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
134 * By default, we disallow creation of multiple tunnels between the same
135 * pair of addresses. Some applications require this functionality so
136 * we allow control over this check here.
139 static VNET_DEFINE(int, parallel_tunnels) = 1;
141 static VNET_DEFINE(int, parallel_tunnels) = 0;
143 #define V_parallel_tunnels VNET(parallel_tunnels)
144 SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_RW,
145 &VNET_NAME(parallel_tunnels), 0, "Allow parallel tunnels?");
147 /* copy from src/sys/net/if_ethersubr.c */
148 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
149 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
150 #ifndef ETHER_IS_BROADCAST
151 #define ETHER_IS_BROADCAST(addr) \
152 (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
156 gif_clone_create(ifc, unit, params)
157 struct if_clone *ifc;
161 struct gif_softc *sc;
163 sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
164 sc->gif_fibnum = curthread->td_proc->p_fibnum;
165 GIF2IFP(sc) = if_alloc(IFT_GIF);
166 if (GIF2IFP(sc) == NULL) {
173 GIF2IFP(sc)->if_softc = sc;
174 if_initname(GIF2IFP(sc), ifc->ifc_name, unit);
176 sc->encap_cookie4 = sc->encap_cookie6 = NULL;
177 sc->gif_options = GIF_ACCEPT_REVETHIP;
179 GIF2IFP(sc)->if_addrlen = 0;
180 GIF2IFP(sc)->if_mtu = GIF_MTU;
181 GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
183 /* turn off ingress filter */
184 GIF2IFP(sc)->if_flags |= IFF_LINK2;
186 GIF2IFP(sc)->if_ioctl = gif_ioctl;
187 GIF2IFP(sc)->if_start = gif_start;
188 GIF2IFP(sc)->if_output = gif_output;
189 GIF2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
190 if_attach(GIF2IFP(sc));
191 bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
192 if (ng_gif_attach_p != NULL)
193 (*ng_gif_attach_p)(GIF2IFP(sc));
196 LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
197 mtx_unlock(&gif_mtx);
203 gif_clone_destroy(ifp)
206 #if defined(INET) || defined(INET6)
209 struct gif_softc *sc = ifp->if_softc;
212 LIST_REMOVE(sc, gif_list);
213 mtx_unlock(&gif_mtx);
215 gif_delete_tunnel(ifp);
217 if (sc->encap_cookie6 != NULL) {
218 err = encap_detach(sc->encap_cookie6);
219 KASSERT(err == 0, ("Unexpected error detaching encap_cookie6"));
223 if (sc->encap_cookie4 != NULL) {
224 err = encap_detach(sc->encap_cookie4);
225 KASSERT(err == 0, ("Unexpected error detaching encap_cookie4"));
229 if (ng_gif_detach_p != NULL)
230 (*ng_gif_detach_p)(ifp);
235 GIF_LOCK_DESTROY(sc);
241 vnet_gif_init(const void *unused __unused)
244 LIST_INIT(&V_gif_softc_list);
246 VNET_SYSINIT(vnet_gif_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_gif_init,
250 gifmodevent(mod, type, data)
258 mtx_init(&gif_mtx, "gif_mtx", NULL, MTX_DEF);
259 if_clone_attach(&gif_cloner);
263 if_clone_detach(&gif_cloner);
264 mtx_destroy(&gif_mtx);
272 static moduledata_t gif_mod = {
278 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
279 MODULE_VERSION(if_gif, 1);
282 gif_encapcheck(m, off, proto, arg)
283 const struct mbuf *m;
289 struct gif_softc *sc;
291 sc = (struct gif_softc *)arg;
295 if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
298 /* no physical address */
299 if (!sc->gif_psrc || !sc->gif_pdst)
311 case IPPROTO_ETHERIP:
318 /* Bail on short packets */
319 if (m->m_pkthdr.len < sizeof(ip))
322 m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
327 if (sc->gif_psrc->sa_family != AF_INET ||
328 sc->gif_pdst->sa_family != AF_INET)
330 return gif_encapcheck4(m, off, proto, arg);
334 if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
336 if (sc->gif_psrc->sa_family != AF_INET6 ||
337 sc->gif_pdst->sa_family != AF_INET6)
339 return gif_encapcheck6(m, off, proto, arg);
347 gif_start(struct ifnet *ifp)
349 struct gif_softc *sc;
354 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
356 IFQ_DEQUEUE(&ifp->if_snd, m);
360 gif_output(ifp, m, sc->gif_pdst, NULL);
363 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
369 gif_output(ifp, m, dst, ro)
372 struct sockaddr *dst;
375 struct gif_softc *sc = ifp->if_softc;
382 error = mac_ifnet_check_transmit(ifp, m);
390 * gif may cause infinite recursion calls when misconfigured.
391 * We'll prevent this by detecting loops.
393 * High nesting level may cause stack exhaustion.
394 * We'll prevent this by introducing upper limit.
397 mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
398 while (mtag != NULL) {
399 if (*(struct ifnet **)(mtag + 1) == ifp) {
401 "gif_output: loop detected on %s\n",
402 (*(struct ifnet **)(mtag + 1))->if_xname);
404 error = EIO; /* is there better errno? */
407 mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
410 if (gif_called > V_max_gif_nesting) {
412 "gif_output: recursively called too many times(%d)\n",
415 error = EIO; /* is there better errno? */
418 mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
425 *(struct ifnet **)(mtag + 1) = ifp;
426 m_tag_prepend(m, mtag);
428 m->m_flags &= ~(M_BCAST|M_MCAST);
432 if (!(ifp->if_flags & IFF_UP) ||
433 sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
440 /* BPF writes need to be handled specially. */
441 if (dst->sa_family == AF_UNSPEC) {
442 bcopy(dst->sa_data, &af, sizeof(af));
447 BPF_MTAP2(ifp, &af, sizeof(af), m);
449 ifp->if_obytes += m->m_pkthdr.len;
451 /* override to IPPROTO_ETHERIP for bridged traffic */
455 M_SETFIB(m, sc->gif_fibnum);
456 /* inner AF-specific encapsulation */
458 /* XXX should we check if our outer source is legal? */
460 /* dispatch to output logic based on outer AF */
461 switch (sc->gif_psrc->sa_family) {
464 error = in_gif_output(ifp, af, m);
469 error = in6_gif_output(ifp, af, m);
485 gif_input(m, af, ifp)
491 struct gif_softc *sc;
492 struct etherip_header *eip;
493 struct ether_header *eh;
494 struct ifnet *oldifp;
502 m->m_pkthdr.rcvif = ifp;
505 mac_ifnet_create_mbuf(ifp, m);
508 if (bpf_peers_present(ifp->if_bpf)) {
510 bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
513 if (ng_gif_input_p != NULL) {
514 (*ng_gif_input_p)(ifp, &m, af);
520 * Put the packet to the network layer input queue according to the
521 * specified address family.
522 * Note: older versions of gif_input directly called network layer
523 * input functions, e.g. ip6_input, here. We changed the policy to
524 * prevent too many recursive calls of such input functions, which
525 * might cause kernel panic. But the change may introduce another
526 * problem; if the input queue is full, packets are discarded.
527 * The kernel stack overflow really happened, and we believed
528 * queue-full rarely occurs, so we changed the policy.
542 n = sizeof(struct etherip_header) + sizeof(struct ether_header);
551 eip = mtod(m, struct etherip_header *);
553 * GIF_ACCEPT_REVETHIP (enabled by default) intentionally
554 * accepts an EtherIP packet with revered version field in
555 * the header. This is a knob for backward compatibility
556 * with FreeBSD 7.2R or prior.
558 if (sc->gif_options & GIF_ACCEPT_REVETHIP) {
559 if (eip->eip_resvl != ETHERIP_VERSION
560 && eip->eip_ver != ETHERIP_VERSION) {
561 /* discard unknown versions */
566 if (eip->eip_ver != ETHERIP_VERSION) {
567 /* discard unknown versions */
572 m_adj(m, sizeof(struct etherip_header));
574 m->m_flags &= ~(M_BCAST|M_MCAST);
575 m->m_pkthdr.rcvif = ifp;
577 if (ifp->if_bridge) {
579 eh = mtod(m, struct ether_header *);
580 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
581 if (ETHER_IS_BROADCAST(eh->ether_dhost))
582 m->m_flags |= M_BCAST;
584 m->m_flags |= M_MCAST;
587 BRIDGE_INPUT(ifp, m);
589 if (m != NULL && ifp != oldifp) {
591 * The bridge gave us back itself or one of the
592 * members for which the frame is addressed.
603 if (ng_gif_input_orphan_p != NULL)
604 (*ng_gif_input_orphan_p)(ifp, m, af);
611 ifp->if_ibytes += m->m_pkthdr.len;
612 M_SETFIB(m, ifp->if_fib);
613 netisr_dispatch(isr, m);
616 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
618 gif_ioctl(ifp, cmd, data)
623 struct gif_softc *sc = ifp->if_softc;
624 struct ifreq *ifr = (struct ifreq*)data;
627 struct sockaddr *dst, *src;
628 #ifdef SIOCSIFMTU /* xxx */
634 ifp->if_flags |= IFF_UP;
644 #ifdef SIOCSIFMTU /* xxx */
650 if (mtu < GIF_MTU_MIN || mtu > GIF_MTU_MAX)
654 #endif /* SIOCSIFMTU */
660 case SIOCSIFPHYADDR_IN6:
662 case SIOCSLIFPHYADDR:
666 src = (struct sockaddr *)
667 &(((struct in_aliasreq *)data)->ifra_addr);
668 dst = (struct sockaddr *)
669 &(((struct in_aliasreq *)data)->ifra_dstaddr);
673 case SIOCSIFPHYADDR_IN6:
674 src = (struct sockaddr *)
675 &(((struct in6_aliasreq *)data)->ifra_addr);
676 dst = (struct sockaddr *)
677 &(((struct in6_aliasreq *)data)->ifra_dstaddr);
680 case SIOCSLIFPHYADDR:
681 src = (struct sockaddr *)
682 &(((struct if_laddrreq *)data)->addr);
683 dst = (struct sockaddr *)
684 &(((struct if_laddrreq *)data)->dstaddr);
690 /* sa_family must be equal */
691 if (src->sa_family != dst->sa_family)
694 /* validate sa_len */
695 switch (src->sa_family) {
698 if (src->sa_len != sizeof(struct sockaddr_in))
704 if (src->sa_len != sizeof(struct sockaddr_in6))
711 switch (dst->sa_family) {
714 if (dst->sa_len != sizeof(struct sockaddr_in))
720 if (dst->sa_len != sizeof(struct sockaddr_in6))
728 /* check sa_family looks sane for the cmd */
731 if (src->sa_family == AF_INET)
735 case SIOCSIFPHYADDR_IN6:
736 if (src->sa_family == AF_INET6)
740 case SIOCSLIFPHYADDR:
741 /* checks done in the above */
745 error = gif_set_tunnel(GIF2IFP(sc), src, dst);
748 #ifdef SIOCDIFPHYADDR
750 gif_delete_tunnel(GIF2IFP(sc));
754 case SIOCGIFPSRCADDR:
756 case SIOCGIFPSRCADDR_IN6:
758 if (sc->gif_psrc == NULL) {
759 error = EADDRNOTAVAIL;
765 case SIOCGIFPSRCADDR:
766 dst = &ifr->ifr_addr;
767 size = sizeof(ifr->ifr_addr);
771 case SIOCGIFPSRCADDR_IN6:
772 dst = (struct sockaddr *)
773 &(((struct in6_ifreq *)data)->ifr_addr);
774 size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
778 error = EADDRNOTAVAIL;
781 if (src->sa_len > size)
783 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
785 if (dst->sa_family == AF_INET6) {
786 error = sa6_recoverscope((struct sockaddr_in6 *)dst);
793 case SIOCGIFPDSTADDR:
795 case SIOCGIFPDSTADDR_IN6:
797 if (sc->gif_pdst == NULL) {
798 error = EADDRNOTAVAIL;
804 case SIOCGIFPDSTADDR:
805 dst = &ifr->ifr_addr;
806 size = sizeof(ifr->ifr_addr);
810 case SIOCGIFPDSTADDR_IN6:
811 dst = (struct sockaddr *)
812 &(((struct in6_ifreq *)data)->ifr_addr);
813 size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
817 error = EADDRNOTAVAIL;
820 if (src->sa_len > size)
822 error = prison_if(curthread->td_ucred, src);
825 error = prison_if(curthread->td_ucred, dst);
828 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
830 if (dst->sa_family == AF_INET6) {
831 error = sa6_recoverscope((struct sockaddr_in6 *)dst);
838 case SIOCGLIFPHYADDR:
839 if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
840 error = EADDRNOTAVAIL;
846 dst = (struct sockaddr *)
847 &(((struct if_laddrreq *)data)->addr);
848 size = sizeof(((struct if_laddrreq *)data)->addr);
849 if (src->sa_len > size)
851 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
855 dst = (struct sockaddr *)
856 &(((struct if_laddrreq *)data)->dstaddr);
857 size = sizeof(((struct if_laddrreq *)data)->dstaddr);
858 if (src->sa_len > size)
860 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
864 /* if_ioctl() takes care of it */
868 options = sc->gif_options;
869 error = copyout(&options, ifr->ifr_data,
874 if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
876 error = copyin(ifr->ifr_data, &options, sizeof(options));
879 if (options & ~GIF_OPTMASK)
882 sc->gif_options = options;
894 * XXXRW: There's a general event-ordering issue here: the code to check
895 * if a given tunnel is already present happens before we perform a
896 * potentially blocking setup of the tunnel. This code needs to be
897 * re-ordered so that the check and replacement can be atomic using
901 gif_set_tunnel(ifp, src, dst)
903 struct sockaddr *src;
904 struct sockaddr *dst;
906 struct gif_softc *sc = ifp->if_softc;
907 struct gif_softc *sc2;
908 struct sockaddr *osrc, *odst, *sa;
912 LIST_FOREACH(sc2, &V_gif_softc_list, gif_list) {
915 if (!sc2->gif_pdst || !sc2->gif_psrc)
917 if (sc2->gif_pdst->sa_family != dst->sa_family ||
918 sc2->gif_pdst->sa_len != dst->sa_len ||
919 sc2->gif_psrc->sa_family != src->sa_family ||
920 sc2->gif_psrc->sa_len != src->sa_len)
924 * Disallow parallel tunnels unless instructed
927 if (!V_parallel_tunnels &&
928 bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 &&
929 bcmp(sc2->gif_psrc, src, src->sa_len) == 0) {
930 error = EADDRNOTAVAIL;
931 mtx_unlock(&gif_mtx);
935 /* XXX both end must be valid? (I mean, not 0.0.0.0) */
937 mtx_unlock(&gif_mtx);
939 /* XXX we can detach from both, but be polite just in case */
941 switch (sc->gif_psrc->sa_family) {
944 (void)in_gif_detach(sc);
949 (void)in6_gif_detach(sc);
955 sa = (struct sockaddr *)malloc(src->sa_len, M_IFADDR, M_WAITOK);
956 bcopy((caddr_t)src, (caddr_t)sa, src->sa_len);
960 sa = (struct sockaddr *)malloc(dst->sa_len, M_IFADDR, M_WAITOK);
961 bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len);
964 switch (sc->gif_psrc->sa_family) {
967 error = in_gif_attach(sc);
973 * Check validity of the scope zone ID of the addresses, and
974 * convert it into the kernel internal form if necessary.
976 error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_psrc, 0);
979 error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_pdst, 0);
982 error = in6_gif_attach(sc);
988 free((caddr_t)sc->gif_psrc, M_IFADDR);
989 free((caddr_t)sc->gif_pdst, M_IFADDR);
996 free((caddr_t)osrc, M_IFADDR);
998 free((caddr_t)odst, M_IFADDR);
1001 if (sc->gif_psrc && sc->gif_pdst)
1002 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1004 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1010 gif_delete_tunnel(ifp)
1013 struct gif_softc *sc = ifp->if_softc;
1016 free((caddr_t)sc->gif_psrc, M_IFADDR);
1017 sc->gif_psrc = NULL;
1020 free((caddr_t)sc->gif_pdst, M_IFADDR);
1021 sc->gif_pdst = NULL;
1023 /* it is safe to detach from both */
1025 (void)in_gif_detach(sc);
1028 (void)in6_gif_detach(sc);
1030 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;