2 /* $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $ */
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include "opt_inet6.h"
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/socket.h>
44 #include <sys/sockio.h>
45 #include <sys/errno.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
50 #include <sys/protosw.h>
52 #include <sys/vimage.h>
53 #include <machine/cpu.h>
56 #include <net/if_clone.h>
57 #include <net/if_types.h>
58 #include <net/netisr.h>
59 #include <net/route.h>
62 #include <netinet/in.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/ip.h>
66 #include <netinet/in_var.h>
67 #include <netinet/in_gif.h>
68 #include <netinet/ip_var.h>
73 #include <netinet/in.h>
75 #include <netinet6/in6_var.h>
76 #include <netinet/ip6.h>
77 #include <netinet6/ip6_var.h>
78 #include <netinet6/scope6_var.h>
79 #include <netinet6/in6_gif.h>
80 #include <netinet6/ip6protosw.h>
83 #include <netinet/ip_encap.h>
84 #include <net/ethernet.h>
85 #include <net/if_bridgevar.h>
86 #include <net/if_gif.h>
88 #include <security/mac/mac_framework.h>
93 * gif_mtx protects the global gif_softc_list.
95 static struct mtx gif_mtx;
96 static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
99 #ifndef VIMAGE_GLOBALS
100 struct vnet_gif vnet_gif_0;
104 #ifdef VIMAGE_GLOBALS
105 static LIST_HEAD(, gif_softc) gif_softc_list;
106 static int max_gif_nesting;
107 static int parallel_tunnels;
116 void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
117 void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
118 void (*ng_gif_attach_p)(struct ifnet *ifp);
119 void (*ng_gif_detach_p)(struct ifnet *ifp);
121 static void gif_start(struct ifnet *);
122 static int gif_clone_create(struct if_clone *, int, caddr_t);
123 static void gif_clone_destroy(struct ifnet *);
124 static int vnet_gif_iattach(const void *);
126 #ifndef VIMAGE_GLOBALS
127 static const vnet_modinfo_t vnet_gif_modinfo = {
128 .vmi_id = VNET_MOD_GIF,
130 .vmi_dependson = VNET_MOD_NET,
131 .vmi_iattach = vnet_gif_iattach
135 IFC_SIMPLE_DECLARE(gif, 0);
137 static int gifmodevent(module_t, int, void *);
139 SYSCTL_DECL(_net_link);
140 SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
141 "Generic Tunnel Interface");
144 * This macro controls the default upper limitation on nesting of gif tunnels.
145 * Since, setting a large value to this macro with a careless configuration
146 * may introduce system crash, we don't allow any nestings by default.
147 * If you need to configure nested gif tunnels, you can define this macro
148 * in your kernel configuration file. However, if you do so, please be
149 * careful to configure the tunnels so that it won't make a loop.
151 #define MAX_GIF_NEST 1
153 SYSCTL_V_INT(V_NET, vnet_gif, _net_link_gif, OID_AUTO, max_nesting,
154 CTLFLAG_RW, max_gif_nesting, 0, "Max nested tunnels");
157 SYSCTL_DECL(_net_inet6_ip6);
158 SYSCTL_V_INT(V_NET, vnet_gif, _net_inet6_ip6, IPV6CTL_GIF_HLIM,
159 gifhlim, CTLFLAG_RW, ip6_gif_hlim, 0, "");
163 * By default, we disallow creation of multiple tunnels between the same
164 * pair of addresses. Some applications require this functionality so
165 * we allow control over this check here.
167 SYSCTL_V_INT(V_NET, vnet_gif, _net_link_gif, OID_AUTO, parallel_tunnels,
168 CTLFLAG_RW, parallel_tunnels, 0, "Allow parallel tunnels?");
170 /* copy from src/sys/net/if_ethersubr.c */
171 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
172 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
173 #ifndef ETHER_IS_BROADCAST
174 #define ETHER_IS_BROADCAST(addr) \
175 (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
179 gif_clone_create(ifc, unit, params)
180 struct if_clone *ifc;
184 INIT_VNET_GIF(curvnet);
185 struct gif_softc *sc;
187 sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
188 sc->gif_fibnum = curthread->td_proc->p_fibnum;
189 GIF2IFP(sc) = if_alloc(IFT_GIF);
190 if (GIF2IFP(sc) == NULL) {
197 GIF2IFP(sc)->if_softc = sc;
198 if_initname(GIF2IFP(sc), ifc->ifc_name, unit);
200 sc->encap_cookie4 = sc->encap_cookie6 = NULL;
202 GIF2IFP(sc)->if_addrlen = 0;
203 GIF2IFP(sc)->if_mtu = GIF_MTU;
204 GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
206 /* turn off ingress filter */
207 GIF2IFP(sc)->if_flags |= IFF_LINK2;
209 GIF2IFP(sc)->if_ioctl = gif_ioctl;
210 GIF2IFP(sc)->if_start = gif_start;
211 GIF2IFP(sc)->if_output = gif_output;
212 GIF2IFP(sc)->if_snd.ifq_maxlen = IFQ_MAXLEN;
213 if_attach(GIF2IFP(sc));
214 bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
215 if (ng_gif_attach_p != NULL)
216 (*ng_gif_attach_p)(GIF2IFP(sc));
219 LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
220 mtx_unlock(&gif_mtx);
226 gif_clone_destroy(ifp)
229 #if defined(INET) || defined(INET6)
232 struct gif_softc *sc = ifp->if_softc;
235 LIST_REMOVE(sc, gif_list);
236 mtx_unlock(&gif_mtx);
238 gif_delete_tunnel(ifp);
240 if (sc->encap_cookie6 != NULL) {
241 err = encap_detach(sc->encap_cookie6);
242 KASSERT(err == 0, ("Unexpected error detaching encap_cookie6"));
246 if (sc->encap_cookie4 != NULL) {
247 err = encap_detach(sc->encap_cookie4);
248 KASSERT(err == 0, ("Unexpected error detaching encap_cookie4"));
252 if (ng_gif_detach_p != NULL)
253 (*ng_gif_detach_p)(ifp);
258 GIF_LOCK_DESTROY(sc);
264 vnet_gif_iattach(const void *unused __unused)
266 INIT_VNET_GIF(curvnet);
268 LIST_INIT(&V_gif_softc_list);
269 V_max_gif_nesting = MAX_GIF_NEST;
271 V_parallel_tunnels = 1;
273 V_parallel_tunnels = 0;
275 V_ip_gif_ttl = GIF_TTL;
277 V_ip6_gif_hlim = GIF_HLIM;
284 gifmodevent(mod, type, data)
292 mtx_init(&gif_mtx, "gif_mtx", NULL, MTX_DEF);
294 #ifndef VIMAGE_GLOBALS
295 vnet_mod_register(&vnet_gif_modinfo);
297 vnet_gif_iattach(NULL);
299 if_clone_attach(&gif_cloner);
303 if_clone_detach(&gif_cloner);
304 mtx_destroy(&gif_mtx);
306 V_ip6_gif_hlim = 0; /* XXX -> vnet_gif_idetach() */
315 static moduledata_t gif_mod = {
321 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
322 MODULE_VERSION(if_gif, 1);
325 gif_encapcheck(m, off, proto, arg)
326 const struct mbuf *m;
332 struct gif_softc *sc;
334 sc = (struct gif_softc *)arg;
338 if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
341 /* no physical address */
342 if (!sc->gif_psrc || !sc->gif_pdst)
354 case IPPROTO_ETHERIP:
361 /* Bail on short packets */
362 if (m->m_pkthdr.len < sizeof(ip))
365 m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
370 if (sc->gif_psrc->sa_family != AF_INET ||
371 sc->gif_pdst->sa_family != AF_INET)
373 return gif_encapcheck4(m, off, proto, arg);
377 if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
379 if (sc->gif_psrc->sa_family != AF_INET6 ||
380 sc->gif_pdst->sa_family != AF_INET6)
382 return gif_encapcheck6(m, off, proto, arg);
390 gif_start(struct ifnet *ifp)
392 struct gif_softc *sc;
397 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
399 IFQ_DEQUEUE(&ifp->if_snd, m);
403 gif_output(ifp, m, sc->gif_pdst, NULL);
406 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
412 gif_output(ifp, m, dst, ro)
415 struct sockaddr *dst;
418 INIT_VNET_GIF(ifp->if_vnet);
419 struct gif_softc *sc = ifp->if_softc;
426 error = mac_ifnet_check_transmit(ifp, m);
434 * gif may cause infinite recursion calls when misconfigured.
435 * We'll prevent this by detecting loops.
437 * High nesting level may cause stack exhaustion.
438 * We'll prevent this by introducing upper limit.
441 mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
442 while (mtag != NULL) {
443 if (*(struct ifnet **)(mtag + 1) == ifp) {
445 "gif_output: loop detected on %s\n",
446 (*(struct ifnet **)(mtag + 1))->if_xname);
448 error = EIO; /* is there better errno? */
451 mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
454 if (gif_called > V_max_gif_nesting) {
456 "gif_output: recursively called too many times(%d)\n",
459 error = EIO; /* is there better errno? */
462 mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
469 *(struct ifnet **)(mtag + 1) = ifp;
470 m_tag_prepend(m, mtag);
472 m->m_flags &= ~(M_BCAST|M_MCAST);
476 if (!(ifp->if_flags & IFF_UP) ||
477 sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
484 /* BPF writes need to be handled specially. */
485 if (dst->sa_family == AF_UNSPEC) {
486 bcopy(dst->sa_data, &af, sizeof(af));
491 BPF_MTAP2(ifp, &af, sizeof(af), m);
493 ifp->if_obytes += m->m_pkthdr.len;
495 /* override to IPPROTO_ETHERIP for bridged traffic */
499 M_SETFIB(m, sc->gif_fibnum);
500 /* inner AF-specific encapsulation */
502 /* XXX should we check if our outer source is legal? */
504 /* dispatch to output logic based on outer AF */
505 switch (sc->gif_psrc->sa_family) {
508 error = in_gif_output(ifp, af, m);
513 error = in6_gif_output(ifp, af, m);
529 gif_input(m, af, ifp)
535 struct etherip_header *eip;
536 struct ether_header *eh;
537 struct ifnet *oldifp;
545 m->m_pkthdr.rcvif = ifp;
548 mac_ifnet_create_mbuf(ifp, m);
551 if (bpf_peers_present(ifp->if_bpf)) {
553 bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
556 if (ng_gif_input_p != NULL) {
557 (*ng_gif_input_p)(ifp, &m, af);
563 * Put the packet to the network layer input queue according to the
564 * specified address family.
565 * Note: older versions of gif_input directly called network layer
566 * input functions, e.g. ip6_input, here. We changed the policy to
567 * prevent too many recursive calls of such input functions, which
568 * might cause kernel panic. But the change may introduce another
569 * problem; if the input queue is full, packets are discarded.
570 * The kernel stack overflow really happened, and we believed
571 * queue-full rarely occurs, so we changed the policy.
585 n = sizeof(struct etherip_header) + sizeof(struct ether_header);
594 eip = mtod(m, struct etherip_header *);
596 (ETHERIP_VERSION & ETHERIP_VER_VERS_MASK)) {
597 /* discard unknown versions */
601 m_adj(m, sizeof(struct etherip_header));
603 m->m_flags &= ~(M_BCAST|M_MCAST);
604 m->m_pkthdr.rcvif = ifp;
606 if (ifp->if_bridge) {
608 eh = mtod(m, struct ether_header *);
609 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
610 if (ETHER_IS_BROADCAST(eh->ether_dhost))
611 m->m_flags |= M_BCAST;
613 m->m_flags |= M_MCAST;
616 BRIDGE_INPUT(ifp, m);
618 if (m != NULL && ifp != oldifp) {
620 * The bridge gave us back itself or one of the
621 * members for which the frame is addressed.
632 if (ng_gif_input_orphan_p != NULL)
633 (*ng_gif_input_orphan_p)(ifp, m, af);
640 ifp->if_ibytes += m->m_pkthdr.len;
641 netisr_dispatch(isr, m);
644 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
646 gif_ioctl(ifp, cmd, data)
651 struct gif_softc *sc = ifp->if_softc;
652 struct ifreq *ifr = (struct ifreq*)data;
654 struct sockaddr *dst, *src;
655 #ifdef SIOCSIFMTU /* xxx */
661 ifp->if_flags |= IFF_UP;
671 #ifdef SIOCSIFMTU /* xxx */
677 if (mtu < GIF_MTU_MIN || mtu > GIF_MTU_MAX)
681 #endif /* SIOCSIFMTU */
687 case SIOCSIFPHYADDR_IN6:
689 case SIOCSLIFPHYADDR:
693 src = (struct sockaddr *)
694 &(((struct in_aliasreq *)data)->ifra_addr);
695 dst = (struct sockaddr *)
696 &(((struct in_aliasreq *)data)->ifra_dstaddr);
700 case SIOCSIFPHYADDR_IN6:
701 src = (struct sockaddr *)
702 &(((struct in6_aliasreq *)data)->ifra_addr);
703 dst = (struct sockaddr *)
704 &(((struct in6_aliasreq *)data)->ifra_dstaddr);
707 case SIOCSLIFPHYADDR:
708 src = (struct sockaddr *)
709 &(((struct if_laddrreq *)data)->addr);
710 dst = (struct sockaddr *)
711 &(((struct if_laddrreq *)data)->dstaddr);
717 /* sa_family must be equal */
718 if (src->sa_family != dst->sa_family)
721 /* validate sa_len */
722 switch (src->sa_family) {
725 if (src->sa_len != sizeof(struct sockaddr_in))
731 if (src->sa_len != sizeof(struct sockaddr_in6))
738 switch (dst->sa_family) {
741 if (dst->sa_len != sizeof(struct sockaddr_in))
747 if (dst->sa_len != sizeof(struct sockaddr_in6))
755 /* check sa_family looks sane for the cmd */
758 if (src->sa_family == AF_INET)
762 case SIOCSIFPHYADDR_IN6:
763 if (src->sa_family == AF_INET6)
767 case SIOCSLIFPHYADDR:
768 /* checks done in the above */
772 error = gif_set_tunnel(GIF2IFP(sc), src, dst);
775 #ifdef SIOCDIFPHYADDR
777 gif_delete_tunnel(GIF2IFP(sc));
781 case SIOCGIFPSRCADDR:
783 case SIOCGIFPSRCADDR_IN6:
785 if (sc->gif_psrc == NULL) {
786 error = EADDRNOTAVAIL;
792 case SIOCGIFPSRCADDR:
793 dst = &ifr->ifr_addr;
794 size = sizeof(ifr->ifr_addr);
798 case SIOCGIFPSRCADDR_IN6:
799 dst = (struct sockaddr *)
800 &(((struct in6_ifreq *)data)->ifr_addr);
801 size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
805 error = EADDRNOTAVAIL;
808 if (src->sa_len > size)
810 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
812 if (dst->sa_family == AF_INET6) {
813 error = sa6_recoverscope((struct sockaddr_in6 *)dst);
820 case SIOCGIFPDSTADDR:
822 case SIOCGIFPDSTADDR_IN6:
824 if (sc->gif_pdst == NULL) {
825 error = EADDRNOTAVAIL;
831 case SIOCGIFPDSTADDR:
832 dst = &ifr->ifr_addr;
833 size = sizeof(ifr->ifr_addr);
837 case SIOCGIFPDSTADDR_IN6:
838 dst = (struct sockaddr *)
839 &(((struct in6_ifreq *)data)->ifr_addr);
840 size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
844 error = EADDRNOTAVAIL;
847 if (src->sa_len > size)
849 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
851 if (dst->sa_family == AF_INET6) {
852 error = sa6_recoverscope((struct sockaddr_in6 *)dst);
859 case SIOCGLIFPHYADDR:
860 if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
861 error = EADDRNOTAVAIL;
867 dst = (struct sockaddr *)
868 &(((struct if_laddrreq *)data)->addr);
869 size = sizeof(((struct if_laddrreq *)data)->addr);
870 if (src->sa_len > size)
872 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
876 dst = (struct sockaddr *)
877 &(((struct if_laddrreq *)data)->dstaddr);
878 size = sizeof(((struct if_laddrreq *)data)->dstaddr);
879 if (src->sa_len > size)
881 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
885 /* if_ioctl() takes care of it */
897 * XXXRW: There's a general event-ordering issue here: the code to check
898 * if a given tunnel is already present happens before we perform a
899 * potentially blocking setup of the tunnel. This code needs to be
900 * re-ordered so that the check and replacement can be atomic using
904 gif_set_tunnel(ifp, src, dst)
906 struct sockaddr *src;
907 struct sockaddr *dst;
909 INIT_VNET_GIF(ifp->if_vnet);
910 struct gif_softc *sc = ifp->if_softc;
911 struct gif_softc *sc2;
912 struct sockaddr *osrc, *odst, *sa;
916 LIST_FOREACH(sc2, &V_gif_softc_list, gif_list) {
919 if (!sc2->gif_pdst || !sc2->gif_psrc)
921 if (sc2->gif_pdst->sa_family != dst->sa_family ||
922 sc2->gif_pdst->sa_len != dst->sa_len ||
923 sc2->gif_psrc->sa_family != src->sa_family ||
924 sc2->gif_psrc->sa_len != src->sa_len)
928 * Disallow parallel tunnels unless instructed
931 if (!V_parallel_tunnels &&
932 bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 &&
933 bcmp(sc2->gif_psrc, src, src->sa_len) == 0) {
934 error = EADDRNOTAVAIL;
935 mtx_unlock(&gif_mtx);
939 /* XXX both end must be valid? (I mean, not 0.0.0.0) */
941 mtx_unlock(&gif_mtx);
943 /* XXX we can detach from both, but be polite just in case */
945 switch (sc->gif_psrc->sa_family) {
948 (void)in_gif_detach(sc);
953 (void)in6_gif_detach(sc);
959 sa = (struct sockaddr *)malloc(src->sa_len, M_IFADDR, M_WAITOK);
960 bcopy((caddr_t)src, (caddr_t)sa, src->sa_len);
964 sa = (struct sockaddr *)malloc(dst->sa_len, M_IFADDR, M_WAITOK);
965 bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len);
968 switch (sc->gif_psrc->sa_family) {
971 error = in_gif_attach(sc);
977 * Check validity of the scope zone ID of the addresses, and
978 * convert it into the kernel internal form if necessary.
980 error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_psrc, 0);
983 error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_pdst, 0);
986 error = in6_gif_attach(sc);
992 free((caddr_t)sc->gif_psrc, M_IFADDR);
993 free((caddr_t)sc->gif_pdst, M_IFADDR);
1000 free((caddr_t)osrc, M_IFADDR);
1002 free((caddr_t)odst, M_IFADDR);
1005 if (sc->gif_psrc && sc->gif_pdst)
1006 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1008 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1014 gif_delete_tunnel(ifp)
1017 struct gif_softc *sc = ifp->if_softc;
1020 free((caddr_t)sc->gif_psrc, M_IFADDR);
1021 sc->gif_psrc = NULL;
1024 free((caddr_t)sc->gif_pdst, M_IFADDR);
1025 sc->gif_pdst = NULL;
1027 /* it is safe to detach from both */
1029 (void)in_gif_detach(sc);
1032 (void)in6_gif_detach(sc);
1034 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;