2 /* $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $ */
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include "opt_inet6.h"
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
41 #include <sys/malloc.h>
43 #include <sys/module.h>
44 #include <sys/socket.h>
45 #include <sys/sockio.h>
46 #include <sys/errno.h>
48 #include <sys/sysctl.h>
49 #include <sys/syslog.h>
50 #include <sys/protosw.h>
52 #include <machine/cpu.h>
55 #include <net/if_clone.h>
56 #include <net/if_types.h>
57 #include <net/netisr.h>
58 #include <net/route.h>
61 #include <netinet/in.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/ip.h>
65 #include <netinet/in_var.h>
66 #include <netinet/in_gif.h>
67 #include <netinet/ip_var.h>
72 #include <netinet/in.h>
74 #include <netinet6/in6_var.h>
75 #include <netinet/ip6.h>
76 #include <netinet6/ip6_var.h>
77 #include <netinet6/scope6_var.h>
78 #include <netinet6/in6_gif.h>
79 #include <netinet6/ip6protosw.h>
82 #include <netinet/ip_encap.h>
83 #include <net/ethernet.h>
84 #include <net/if_bridgevar.h>
85 #include <net/if_gif.h>
87 #include <net/net_osdep.h>
92 * gif_mtx protects the global gif_softc_list.
93 * XXX: Per-softc locking is still required.
95 static struct mtx gif_mtx;
96 static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
97 static LIST_HEAD(, gif_softc) gif_softc_list;
99 void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
100 void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
101 void (*ng_gif_attach_p)(struct ifnet *ifp);
102 void (*ng_gif_detach_p)(struct ifnet *ifp);
104 static void gif_start(struct ifnet *);
105 static int gif_clone_create(struct if_clone *, int);
106 static void gif_clone_destroy(struct ifnet *);
108 IFC_SIMPLE_DECLARE(gif, 0);
110 static int gifmodevent(module_t, int, void *);
112 SYSCTL_DECL(_net_link);
113 SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
114 "Generic Tunnel Interface");
117 * This macro controls the default upper limitation on nesting of gif tunnels.
118 * Since, setting a large value to this macro with a careless configuration
119 * may introduce system crash, we don't allow any nestings by default.
120 * If you need to configure nested gif tunnels, you can define this macro
121 * in your kernel configuration file. However, if you do so, please be
122 * careful to configure the tunnels so that it won't make a loop.
124 #define MAX_GIF_NEST 1
126 static int max_gif_nesting = MAX_GIF_NEST;
127 SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_RW,
128 &max_gif_nesting, 0, "Max nested tunnels");
131 * By default, we disallow creation of multiple tunnels between the same
132 * pair of addresses. Some applications require this functionality so
133 * we allow control over this check here.
136 static int parallel_tunnels = 1;
138 static int parallel_tunnels = 0;
140 SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_RW,
141 ¶llel_tunnels, 0, "Allow parallel tunnels?");
144 gif_clone_create(ifc, unit)
145 struct if_clone *ifc;
148 struct gif_softc *sc;
150 sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
151 GIF2IFP(sc) = if_alloc(IFT_GIF);
152 if (GIF2IFP(sc) == NULL) {
157 GIF2IFP(sc)->if_softc = sc;
158 if_initname(GIF2IFP(sc), ifc->ifc_name, unit);
163 LIST_INSERT_HEAD(&gif_softc_list, sc, gif_list);
164 mtx_unlock(&gif_mtx);
170 struct gif_softc *sc;
173 sc->encap_cookie4 = sc->encap_cookie6 = NULL;
175 GIF2IFP(sc)->if_addrlen = 0;
176 GIF2IFP(sc)->if_mtu = GIF_MTU;
177 GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
179 /* turn off ingress filter */
180 GIF2IFP(sc)->if_flags |= IFF_LINK2;
182 GIF2IFP(sc)->if_ioctl = gif_ioctl;
183 GIF2IFP(sc)->if_start = gif_start;
184 GIF2IFP(sc)->if_output = gif_output;
185 GIF2IFP(sc)->if_snd.ifq_maxlen = IFQ_MAXLEN;
186 if_attach(GIF2IFP(sc));
187 bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
188 if (ng_gif_attach_p != NULL)
189 (*ng_gif_attach_p)(GIF2IFP(sc));
193 gif_clone_destroy(ifp)
197 struct gif_softc *sc = ifp->if_softc;
200 LIST_REMOVE(sc, gif_list);
201 mtx_unlock(&gif_mtx);
203 gif_delete_tunnel(ifp);
205 if (sc->encap_cookie6 != NULL) {
206 err = encap_detach(sc->encap_cookie6);
207 KASSERT(err == 0, ("Unexpected error detaching encap_cookie6"));
211 if (sc->encap_cookie4 != NULL) {
212 err = encap_detach(sc->encap_cookie4);
213 KASSERT(err == 0, ("Unexpected error detaching encap_cookie4"));
217 if (ng_gif_detach_p != NULL)
218 (*ng_gif_detach_p)(ifp);
227 gifmodevent(mod, type, data)
235 mtx_init(&gif_mtx, "gif_mtx", NULL, MTX_DEF);
236 LIST_INIT(&gif_softc_list);
237 if_clone_attach(&gif_cloner);
240 ip6_gif_hlim = GIF_HLIM;
245 if_clone_detach(&gif_cloner);
246 mtx_destroy(&gif_mtx);
257 static moduledata_t gif_mod = {
263 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
264 MODULE_VERSION(if_gif, 1);
267 gif_encapcheck(m, off, proto, arg)
268 const struct mbuf *m;
274 struct gif_softc *sc;
276 sc = (struct gif_softc *)arg;
280 if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
283 /* no physical address */
284 if (!sc->gif_psrc || !sc->gif_pdst)
296 case IPPROTO_ETHERIP:
303 /* Bail on short packets */
304 if (m->m_pkthdr.len < sizeof(ip))
307 m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
312 if (sc->gif_psrc->sa_family != AF_INET ||
313 sc->gif_pdst->sa_family != AF_INET)
315 return gif_encapcheck4(m, off, proto, arg);
319 if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
321 if (sc->gif_psrc->sa_family != AF_INET6 ||
322 sc->gif_pdst->sa_family != AF_INET6)
324 return gif_encapcheck6(m, off, proto, arg);
332 gif_start(struct ifnet *ifp)
334 struct gif_softc *sc;
339 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
341 IFQ_DEQUEUE(&ifp->if_snd, m);
345 gif_output(ifp, m, sc->gif_pdst, NULL);
348 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
354 gif_output(ifp, m, dst, rt)
357 struct sockaddr *dst;
358 struct rtentry *rt; /* added in net2 */
360 struct gif_softc *sc = ifp->if_softc;
367 error = mac_check_ifnet_transmit(ifp, m);
375 * gif may cause infinite recursion calls when misconfigured.
376 * We'll prevent this by detecting loops.
378 * High nesting level may cause stack exhaustion.
379 * We'll prevent this by introducing upper limit.
382 mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
383 while (mtag != NULL) {
384 if (*(struct ifnet **)(mtag + 1) == ifp) {
386 "gif_output: loop detected on %s\n",
387 (*(struct ifnet **)(mtag + 1))->if_xname);
389 error = EIO; /* is there better errno? */
392 mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
395 if (gif_called > max_gif_nesting) {
397 "gif_output: recursively called too many times(%d)\n",
400 error = EIO; /* is there better errno? */
403 mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
410 *(struct ifnet **)(mtag + 1) = ifp;
411 m_tag_prepend(m, mtag);
413 m->m_flags &= ~(M_BCAST|M_MCAST);
414 if (!(ifp->if_flags & IFF_UP) ||
415 sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
421 /* BPF writes need to be handled specially. */
422 if (dst->sa_family == AF_UNSPEC) {
423 bcopy(dst->sa_data, &af, sizeof(af));
429 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
432 ifp->if_obytes += m->m_pkthdr.len;
434 /* override to IPPROTO_ETHERIP for bridged traffic */
438 /* inner AF-specific encapsulation */
440 /* XXX should we check if our outer source is legal? */
442 /* dispatch to output logic based on outer AF */
443 switch (sc->gif_psrc->sa_family) {
446 error = in_gif_output(ifp, af, m);
451 error = in6_gif_output(ifp, af, m);
467 gif_input(m, af, ifp)
473 struct etherip_header *eip;
481 m->m_pkthdr.rcvif = ifp;
484 mac_create_mbuf_from_ifnet(ifp, m);
489 bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
492 if (ng_gif_input_p != NULL) {
493 (*ng_gif_input_p)(ifp, &m, af);
499 * Put the packet to the network layer input queue according to the
500 * specified address family.
501 * Note: older versions of gif_input directly called network layer
502 * input functions, e.g. ip6_input, here. We changed the policy to
503 * prevent too many recursive calls of such input functions, which
504 * might cause kernel panic. But the change may introduce another
505 * problem; if the input queue is full, packets are discarded.
506 * The kernel stack overflow really happened, and we believed
507 * queue-full rarely occurs, so we changed the policy.
521 n = sizeof(struct etherip_header) + sizeof(struct ether_header);
530 eip = mtod(m, struct etherip_header *);
532 (ETHERIP_VERSION & ETHERIP_VER_VERS_MASK)) {
533 /* discard unknown versions */
537 m_adj(m, sizeof(struct etherip_header));
539 m->m_flags &= ~(M_BCAST|M_MCAST);
540 m->m_pkthdr.rcvif = ifp;
543 BRIDGE_INPUT(ifp, m);
550 if (ng_gif_input_orphan_p != NULL)
551 (*ng_gif_input_orphan_p)(ifp, m, af);
558 ifp->if_ibytes += m->m_pkthdr.len;
559 netisr_dispatch(isr, m);
562 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
564 gif_ioctl(ifp, cmd, data)
569 struct gif_softc *sc = ifp->if_softc;
570 struct ifreq *ifr = (struct ifreq*)data;
572 struct sockaddr *dst, *src;
573 #ifdef SIOCSIFMTU /* xxx */
579 ifp->if_flags |= IFF_UP;
589 #ifdef SIOCSIFMTU /* xxx */
595 if (mtu < GIF_MTU_MIN || mtu > GIF_MTU_MAX)
599 #endif /* SIOCSIFMTU */
605 case SIOCSIFPHYADDR_IN6:
607 case SIOCSLIFPHYADDR:
611 src = (struct sockaddr *)
612 &(((struct in_aliasreq *)data)->ifra_addr);
613 dst = (struct sockaddr *)
614 &(((struct in_aliasreq *)data)->ifra_dstaddr);
618 case SIOCSIFPHYADDR_IN6:
619 src = (struct sockaddr *)
620 &(((struct in6_aliasreq *)data)->ifra_addr);
621 dst = (struct sockaddr *)
622 &(((struct in6_aliasreq *)data)->ifra_dstaddr);
625 case SIOCSLIFPHYADDR:
626 src = (struct sockaddr *)
627 &(((struct if_laddrreq *)data)->addr);
628 dst = (struct sockaddr *)
629 &(((struct if_laddrreq *)data)->dstaddr);
635 /* sa_family must be equal */
636 if (src->sa_family != dst->sa_family)
639 /* validate sa_len */
640 switch (src->sa_family) {
643 if (src->sa_len != sizeof(struct sockaddr_in))
649 if (src->sa_len != sizeof(struct sockaddr_in6))
656 switch (dst->sa_family) {
659 if (dst->sa_len != sizeof(struct sockaddr_in))
665 if (dst->sa_len != sizeof(struct sockaddr_in6))
673 /* check sa_family looks sane for the cmd */
676 if (src->sa_family == AF_INET)
680 case SIOCSIFPHYADDR_IN6:
681 if (src->sa_family == AF_INET6)
685 case SIOCSLIFPHYADDR:
686 /* checks done in the above */
690 error = gif_set_tunnel(GIF2IFP(sc), src, dst);
693 #ifdef SIOCDIFPHYADDR
695 gif_delete_tunnel(GIF2IFP(sc));
699 case SIOCGIFPSRCADDR:
701 case SIOCGIFPSRCADDR_IN6:
703 if (sc->gif_psrc == NULL) {
704 error = EADDRNOTAVAIL;
710 case SIOCGIFPSRCADDR:
711 dst = &ifr->ifr_addr;
712 size = sizeof(ifr->ifr_addr);
716 case SIOCGIFPSRCADDR_IN6:
717 dst = (struct sockaddr *)
718 &(((struct in6_ifreq *)data)->ifr_addr);
719 size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
723 error = EADDRNOTAVAIL;
726 if (src->sa_len > size)
728 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
730 if (dst->sa_family == AF_INET6) {
731 error = sa6_recoverscope((struct sockaddr_in6 *)dst);
738 case SIOCGIFPDSTADDR:
740 case SIOCGIFPDSTADDR_IN6:
742 if (sc->gif_pdst == NULL) {
743 error = EADDRNOTAVAIL;
749 case SIOCGIFPDSTADDR:
750 dst = &ifr->ifr_addr;
751 size = sizeof(ifr->ifr_addr);
755 case SIOCGIFPDSTADDR_IN6:
756 dst = (struct sockaddr *)
757 &(((struct in6_ifreq *)data)->ifr_addr);
758 size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
762 error = EADDRNOTAVAIL;
765 if (src->sa_len > size)
767 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
769 if (dst->sa_family == AF_INET6) {
770 error = sa6_recoverscope((struct sockaddr_in6 *)dst);
777 case SIOCGLIFPHYADDR:
778 if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
779 error = EADDRNOTAVAIL;
785 dst = (struct sockaddr *)
786 &(((struct if_laddrreq *)data)->addr);
787 size = sizeof(((struct if_laddrreq *)data)->addr);
788 if (src->sa_len > size)
790 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
794 dst = (struct sockaddr *)
795 &(((struct if_laddrreq *)data)->dstaddr);
796 size = sizeof(((struct if_laddrreq *)data)->dstaddr);
797 if (src->sa_len > size)
799 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
803 /* if_ioctl() takes care of it */
815 * XXXRW: There's a general event-ordering issue here: the code to check
816 * if a given tunnel is already present happens before we perform a
817 * potentially blocking setup of the tunnel. This code needs to be
818 * re-ordered so that the check and replacement can be atomic using
822 gif_set_tunnel(ifp, src, dst)
824 struct sockaddr *src;
825 struct sockaddr *dst;
827 struct gif_softc *sc = ifp->if_softc;
828 struct gif_softc *sc2;
829 struct sockaddr *osrc, *odst, *sa;
836 LIST_FOREACH(sc2, &gif_softc_list, gif_list) {
839 if (!sc2->gif_pdst || !sc2->gif_psrc)
841 if (sc2->gif_pdst->sa_family != dst->sa_family ||
842 sc2->gif_pdst->sa_len != dst->sa_len ||
843 sc2->gif_psrc->sa_family != src->sa_family ||
844 sc2->gif_psrc->sa_len != src->sa_len)
848 * Disallow parallel tunnels unless instructed
851 if (!parallel_tunnels &&
852 bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 &&
853 bcmp(sc2->gif_psrc, src, src->sa_len) == 0) {
854 error = EADDRNOTAVAIL;
855 mtx_unlock(&gif_mtx);
859 /* XXX both end must be valid? (I mean, not 0.0.0.0) */
861 mtx_unlock(&gif_mtx);
863 /* XXX we can detach from both, but be polite just in case */
865 switch (sc->gif_psrc->sa_family) {
868 (void)in_gif_detach(sc);
873 (void)in6_gif_detach(sc);
879 sa = (struct sockaddr *)malloc(src->sa_len, M_IFADDR, M_WAITOK);
880 bcopy((caddr_t)src, (caddr_t)sa, src->sa_len);
884 sa = (struct sockaddr *)malloc(dst->sa_len, M_IFADDR, M_WAITOK);
885 bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len);
888 switch (sc->gif_psrc->sa_family) {
891 error = in_gif_attach(sc);
897 * Check validity of the scope zone ID of the addresses, and
898 * convert it into the kernel internal form if necessary.
900 error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_psrc, 0);
903 error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_pdst, 0);
906 error = in6_gif_attach(sc);
912 free((caddr_t)sc->gif_psrc, M_IFADDR);
913 free((caddr_t)sc->gif_pdst, M_IFADDR);
920 free((caddr_t)osrc, M_IFADDR);
922 free((caddr_t)odst, M_IFADDR);
924 if (sc->gif_psrc && sc->gif_pdst)
925 ifp->if_drv_flags |= IFF_DRV_RUNNING;
927 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
933 if (sc->gif_psrc && sc->gif_pdst)
934 ifp->if_drv_flags |= IFF_DRV_RUNNING;
936 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
943 gif_delete_tunnel(ifp)
946 struct gif_softc *sc = ifp->if_softc;
952 free((caddr_t)sc->gif_psrc, M_IFADDR);
956 free((caddr_t)sc->gif_pdst, M_IFADDR);
959 /* it is safe to detach from both */
961 (void)in_gif_detach(sc);
964 (void)in6_gif_detach(sc);
967 if (sc->gif_psrc && sc->gif_pdst)
968 ifp->if_drv_flags |= IFF_DRV_RUNNING;
970 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;