2 /* $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $ */
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include "opt_inet6.h"
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/socket.h>
44 #include <sys/sockio.h>
45 #include <sys/errno.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/protosw.h>
51 #include <machine/cpu.h>
54 #include <net/if_clone.h>
55 #include <net/if_types.h>
56 #include <net/netisr.h>
57 #include <net/route.h>
60 #include <netinet/in.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/ip.h>
64 #include <netinet/in_var.h>
65 #include <netinet/in_gif.h>
66 #include <netinet/ip_var.h>
71 #include <netinet/in.h>
73 #include <netinet6/in6_var.h>
74 #include <netinet/ip6.h>
75 #include <netinet6/ip6_var.h>
76 #include <netinet6/scope6_var.h>
77 #include <netinet6/in6_gif.h>
78 #include <netinet6/ip6protosw.h>
81 #include <netinet/ip_encap.h>
82 #include <net/ethernet.h>
83 #include <net/if_bridgevar.h>
84 #include <net/if_gif.h>
86 #include <security/mac/mac_framework.h>
91 * gif_mtx protects the global gif_softc_list.
93 static struct mtx gif_mtx;
94 static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
95 static LIST_HEAD(, gif_softc) gif_softc_list;
97 void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
98 void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
99 void (*ng_gif_attach_p)(struct ifnet *ifp);
100 void (*ng_gif_detach_p)(struct ifnet *ifp);
102 static void gif_start(struct ifnet *);
103 static int gif_clone_create(struct if_clone *, int, caddr_t);
104 static void gif_clone_destroy(struct ifnet *);
106 IFC_SIMPLE_DECLARE(gif, 0);
108 static int gifmodevent(module_t, int, void *);
110 SYSCTL_DECL(_net_link);
111 SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
112 "Generic Tunnel Interface");
115 * This macro controls the default upper limitation on nesting of gif tunnels.
116 * Since, setting a large value to this macro with a careless configuration
117 * may introduce system crash, we don't allow any nestings by default.
118 * If you need to configure nested gif tunnels, you can define this macro
119 * in your kernel configuration file. However, if you do so, please be
120 * careful to configure the tunnels so that it won't make a loop.
122 #define MAX_GIF_NEST 1
124 static int max_gif_nesting = MAX_GIF_NEST;
125 SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_RW,
126 &max_gif_nesting, 0, "Max nested tunnels");
129 * By default, we disallow creation of multiple tunnels between the same
130 * pair of addresses. Some applications require this functionality so
131 * we allow control over this check here.
134 static int parallel_tunnels = 1;
136 static int parallel_tunnels = 0;
138 SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_RW,
139 ¶llel_tunnels, 0, "Allow parallel tunnels?");
142 gif_clone_create(ifc, unit, params)
143 struct if_clone *ifc;
147 struct gif_softc *sc;
149 sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
150 GIF2IFP(sc) = if_alloc(IFT_GIF);
151 if (GIF2IFP(sc) == NULL) {
158 GIF2IFP(sc)->if_softc = sc;
159 if_initname(GIF2IFP(sc), ifc->ifc_name, unit);
161 sc->encap_cookie4 = sc->encap_cookie6 = NULL;
163 GIF2IFP(sc)->if_addrlen = 0;
164 GIF2IFP(sc)->if_mtu = GIF_MTU;
165 GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
167 /* turn off ingress filter */
168 GIF2IFP(sc)->if_flags |= IFF_LINK2;
170 GIF2IFP(sc)->if_ioctl = gif_ioctl;
171 GIF2IFP(sc)->if_start = gif_start;
172 GIF2IFP(sc)->if_output = gif_output;
173 GIF2IFP(sc)->if_snd.ifq_maxlen = IFQ_MAXLEN;
174 if_attach(GIF2IFP(sc));
175 bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
176 if (ng_gif_attach_p != NULL)
177 (*ng_gif_attach_p)(GIF2IFP(sc));
180 LIST_INSERT_HEAD(&gif_softc_list, sc, gif_list);
181 mtx_unlock(&gif_mtx);
187 gif_clone_destroy(ifp)
191 struct gif_softc *sc = ifp->if_softc;
194 LIST_REMOVE(sc, gif_list);
195 mtx_unlock(&gif_mtx);
197 gif_delete_tunnel(ifp);
199 if (sc->encap_cookie6 != NULL) {
200 err = encap_detach(sc->encap_cookie6);
201 KASSERT(err == 0, ("Unexpected error detaching encap_cookie6"));
205 if (sc->encap_cookie4 != NULL) {
206 err = encap_detach(sc->encap_cookie4);
207 KASSERT(err == 0, ("Unexpected error detaching encap_cookie4"));
211 if (ng_gif_detach_p != NULL)
212 (*ng_gif_detach_p)(ifp);
217 GIF_LOCK_DESTROY(sc);
223 gifmodevent(mod, type, data)
231 mtx_init(&gif_mtx, "gif_mtx", NULL, MTX_DEF);
232 LIST_INIT(&gif_softc_list);
233 if_clone_attach(&gif_cloner);
236 ip6_gif_hlim = GIF_HLIM;
241 if_clone_detach(&gif_cloner);
242 mtx_destroy(&gif_mtx);
253 static moduledata_t gif_mod = {
259 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
260 MODULE_VERSION(if_gif, 1);
263 gif_encapcheck(m, off, proto, arg)
264 const struct mbuf *m;
270 struct gif_softc *sc;
272 sc = (struct gif_softc *)arg;
276 if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
279 /* no physical address */
280 if (!sc->gif_psrc || !sc->gif_pdst)
292 case IPPROTO_ETHERIP:
299 /* Bail on short packets */
300 if (m->m_pkthdr.len < sizeof(ip))
303 m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
308 if (sc->gif_psrc->sa_family != AF_INET ||
309 sc->gif_pdst->sa_family != AF_INET)
311 return gif_encapcheck4(m, off, proto, arg);
315 if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
317 if (sc->gif_psrc->sa_family != AF_INET6 ||
318 sc->gif_pdst->sa_family != AF_INET6)
320 return gif_encapcheck6(m, off, proto, arg);
328 gif_start(struct ifnet *ifp)
330 struct gif_softc *sc;
335 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
337 IFQ_DEQUEUE(&ifp->if_snd, m);
341 gif_output(ifp, m, sc->gif_pdst, NULL);
344 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
350 gif_output(ifp, m, dst, rt)
353 struct sockaddr *dst;
354 struct rtentry *rt; /* added in net2 */
356 struct gif_softc *sc = ifp->if_softc;
363 error = mac_check_ifnet_transmit(ifp, m);
371 * gif may cause infinite recursion calls when misconfigured.
372 * We'll prevent this by detecting loops.
374 * High nesting level may cause stack exhaustion.
375 * We'll prevent this by introducing upper limit.
378 mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
379 while (mtag != NULL) {
380 if (*(struct ifnet **)(mtag + 1) == ifp) {
382 "gif_output: loop detected on %s\n",
383 (*(struct ifnet **)(mtag + 1))->if_xname);
385 error = EIO; /* is there better errno? */
388 mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
391 if (gif_called > max_gif_nesting) {
393 "gif_output: recursively called too many times(%d)\n",
396 error = EIO; /* is there better errno? */
399 mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
406 *(struct ifnet **)(mtag + 1) = ifp;
407 m_tag_prepend(m, mtag);
409 m->m_flags &= ~(M_BCAST|M_MCAST);
413 if (!(ifp->if_flags & IFF_UP) ||
414 sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
421 /* BPF writes need to be handled specially. */
422 if (dst->sa_family == AF_UNSPEC) {
423 bcopy(dst->sa_data, &af, sizeof(af));
428 BPF_MTAP2(ifp, &af, sizeof(af), m);
430 ifp->if_obytes += m->m_pkthdr.len;
432 /* override to IPPROTO_ETHERIP for bridged traffic */
436 /* inner AF-specific encapsulation */
438 /* XXX should we check if our outer source is legal? */
440 /* dispatch to output logic based on outer AF */
441 switch (sc->gif_psrc->sa_family) {
444 error = in_gif_output(ifp, af, m);
449 error = in6_gif_output(ifp, af, m);
465 gif_input(m, af, ifp)
471 struct etherip_header *eip;
479 m->m_pkthdr.rcvif = ifp;
482 mac_create_mbuf_from_ifnet(ifp, m);
485 if (bpf_peers_present(ifp->if_bpf)) {
487 bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
490 if (ng_gif_input_p != NULL) {
491 (*ng_gif_input_p)(ifp, &m, af);
497 * Put the packet to the network layer input queue according to the
498 * specified address family.
499 * Note: older versions of gif_input directly called network layer
500 * input functions, e.g. ip6_input, here. We changed the policy to
501 * prevent too many recursive calls of such input functions, which
502 * might cause kernel panic. But the change may introduce another
503 * problem; if the input queue is full, packets are discarded.
504 * The kernel stack overflow really happened, and we believed
505 * queue-full rarely occurs, so we changed the policy.
519 n = sizeof(struct etherip_header) + sizeof(struct ether_header);
528 eip = mtod(m, struct etherip_header *);
530 (ETHERIP_VERSION & ETHERIP_VER_VERS_MASK)) {
531 /* discard unknown versions */
535 m_adj(m, sizeof(struct etherip_header));
537 m->m_flags &= ~(M_BCAST|M_MCAST);
538 m->m_pkthdr.rcvif = ifp;
541 BRIDGE_INPUT(ifp, m);
548 if (ng_gif_input_orphan_p != NULL)
549 (*ng_gif_input_orphan_p)(ifp, m, af);
556 ifp->if_ibytes += m->m_pkthdr.len;
557 netisr_dispatch(isr, m);
560 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
562 gif_ioctl(ifp, cmd, data)
567 struct gif_softc *sc = ifp->if_softc;
568 struct ifreq *ifr = (struct ifreq*)data;
570 struct sockaddr *dst, *src;
571 #ifdef SIOCSIFMTU /* xxx */
577 ifp->if_flags |= IFF_UP;
587 #ifdef SIOCSIFMTU /* xxx */
593 if (mtu < GIF_MTU_MIN || mtu > GIF_MTU_MAX)
597 #endif /* SIOCSIFMTU */
603 case SIOCSIFPHYADDR_IN6:
605 case SIOCSLIFPHYADDR:
609 src = (struct sockaddr *)
610 &(((struct in_aliasreq *)data)->ifra_addr);
611 dst = (struct sockaddr *)
612 &(((struct in_aliasreq *)data)->ifra_dstaddr);
616 case SIOCSIFPHYADDR_IN6:
617 src = (struct sockaddr *)
618 &(((struct in6_aliasreq *)data)->ifra_addr);
619 dst = (struct sockaddr *)
620 &(((struct in6_aliasreq *)data)->ifra_dstaddr);
623 case SIOCSLIFPHYADDR:
624 src = (struct sockaddr *)
625 &(((struct if_laddrreq *)data)->addr);
626 dst = (struct sockaddr *)
627 &(((struct if_laddrreq *)data)->dstaddr);
633 /* sa_family must be equal */
634 if (src->sa_family != dst->sa_family)
637 /* validate sa_len */
638 switch (src->sa_family) {
641 if (src->sa_len != sizeof(struct sockaddr_in))
647 if (src->sa_len != sizeof(struct sockaddr_in6))
654 switch (dst->sa_family) {
657 if (dst->sa_len != sizeof(struct sockaddr_in))
663 if (dst->sa_len != sizeof(struct sockaddr_in6))
671 /* check sa_family looks sane for the cmd */
674 if (src->sa_family == AF_INET)
678 case SIOCSIFPHYADDR_IN6:
679 if (src->sa_family == AF_INET6)
683 case SIOCSLIFPHYADDR:
684 /* checks done in the above */
688 error = gif_set_tunnel(GIF2IFP(sc), src, dst);
691 #ifdef SIOCDIFPHYADDR
693 gif_delete_tunnel(GIF2IFP(sc));
697 case SIOCGIFPSRCADDR:
699 case SIOCGIFPSRCADDR_IN6:
701 if (sc->gif_psrc == NULL) {
702 error = EADDRNOTAVAIL;
708 case SIOCGIFPSRCADDR:
709 dst = &ifr->ifr_addr;
710 size = sizeof(ifr->ifr_addr);
714 case SIOCGIFPSRCADDR_IN6:
715 dst = (struct sockaddr *)
716 &(((struct in6_ifreq *)data)->ifr_addr);
717 size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
721 error = EADDRNOTAVAIL;
724 if (src->sa_len > size)
726 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
728 if (dst->sa_family == AF_INET6) {
729 error = sa6_recoverscope((struct sockaddr_in6 *)dst);
736 case SIOCGIFPDSTADDR:
738 case SIOCGIFPDSTADDR_IN6:
740 if (sc->gif_pdst == NULL) {
741 error = EADDRNOTAVAIL;
747 case SIOCGIFPDSTADDR:
748 dst = &ifr->ifr_addr;
749 size = sizeof(ifr->ifr_addr);
753 case SIOCGIFPDSTADDR_IN6:
754 dst = (struct sockaddr *)
755 &(((struct in6_ifreq *)data)->ifr_addr);
756 size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
760 error = EADDRNOTAVAIL;
763 if (src->sa_len > size)
765 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
767 if (dst->sa_family == AF_INET6) {
768 error = sa6_recoverscope((struct sockaddr_in6 *)dst);
775 case SIOCGLIFPHYADDR:
776 if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
777 error = EADDRNOTAVAIL;
783 dst = (struct sockaddr *)
784 &(((struct if_laddrreq *)data)->addr);
785 size = sizeof(((struct if_laddrreq *)data)->addr);
786 if (src->sa_len > size)
788 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
792 dst = (struct sockaddr *)
793 &(((struct if_laddrreq *)data)->dstaddr);
794 size = sizeof(((struct if_laddrreq *)data)->dstaddr);
795 if (src->sa_len > size)
797 bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
801 /* if_ioctl() takes care of it */
813 * XXXRW: There's a general event-ordering issue here: the code to check
814 * if a given tunnel is already present happens before we perform a
815 * potentially blocking setup of the tunnel. This code needs to be
816 * re-ordered so that the check and replacement can be atomic using
820 gif_set_tunnel(ifp, src, dst)
822 struct sockaddr *src;
823 struct sockaddr *dst;
825 struct gif_softc *sc = ifp->if_softc;
826 struct gif_softc *sc2;
827 struct sockaddr *osrc, *odst, *sa;
831 LIST_FOREACH(sc2, &gif_softc_list, gif_list) {
834 if (!sc2->gif_pdst || !sc2->gif_psrc)
836 if (sc2->gif_pdst->sa_family != dst->sa_family ||
837 sc2->gif_pdst->sa_len != dst->sa_len ||
838 sc2->gif_psrc->sa_family != src->sa_family ||
839 sc2->gif_psrc->sa_len != src->sa_len)
843 * Disallow parallel tunnels unless instructed
846 if (!parallel_tunnels &&
847 bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 &&
848 bcmp(sc2->gif_psrc, src, src->sa_len) == 0) {
849 error = EADDRNOTAVAIL;
850 mtx_unlock(&gif_mtx);
854 /* XXX both end must be valid? (I mean, not 0.0.0.0) */
856 mtx_unlock(&gif_mtx);
858 /* XXX we can detach from both, but be polite just in case */
860 switch (sc->gif_psrc->sa_family) {
863 (void)in_gif_detach(sc);
868 (void)in6_gif_detach(sc);
874 sa = (struct sockaddr *)malloc(src->sa_len, M_IFADDR, M_WAITOK);
875 bcopy((caddr_t)src, (caddr_t)sa, src->sa_len);
879 sa = (struct sockaddr *)malloc(dst->sa_len, M_IFADDR, M_WAITOK);
880 bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len);
883 switch (sc->gif_psrc->sa_family) {
886 error = in_gif_attach(sc);
892 * Check validity of the scope zone ID of the addresses, and
893 * convert it into the kernel internal form if necessary.
895 error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_psrc, 0);
898 error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_pdst, 0);
901 error = in6_gif_attach(sc);
907 free((caddr_t)sc->gif_psrc, M_IFADDR);
908 free((caddr_t)sc->gif_pdst, M_IFADDR);
915 free((caddr_t)osrc, M_IFADDR);
917 free((caddr_t)odst, M_IFADDR);
920 if (sc->gif_psrc && sc->gif_pdst)
921 ifp->if_drv_flags |= IFF_DRV_RUNNING;
923 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
929 gif_delete_tunnel(ifp)
932 struct gif_softc *sc = ifp->if_softc;
935 free((caddr_t)sc->gif_psrc, M_IFADDR);
939 free((caddr_t)sc->gif_pdst, M_IFADDR);
942 /* it is safe to detach from both */
944 (void)in_gif_detach(sc);
947 (void)in6_gif_detach(sc);
949 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;