2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * Copyright (c) 1982, 1986, 1988, 1993
32 * The Regents of the University of California.
33 * All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * @(#)raw_ip.c 8.2 (Berkeley) 1/4/94
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
65 #include "opt_ipsec.h"
66 #include "opt_inet6.h"
67 #include "opt_route.h"
69 #include <sys/param.h>
70 #include <sys/errno.h>
73 #include <sys/malloc.h>
77 #include <sys/protosw.h>
78 #include <sys/signalvar.h>
79 #include <sys/socket.h>
80 #include <sys/socketvar.h>
82 #include <sys/syslog.h>
83 #include <sys/vimage.h>
86 #include <net/if_types.h>
87 #include <net/route.h>
90 #include <netinet/in.h>
91 #include <netinet/in_var.h>
92 #include <netinet/in_systm.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet/vinet.h>
96 #include <netinet/icmp6.h>
97 #include <netinet/ip6.h>
98 #include <netinet6/ip6protosw.h>
99 #include <netinet6/ip6_mroute.h>
100 #include <netinet6/in6_pcb.h>
101 #include <netinet6/ip6_var.h>
102 #include <netinet6/nd6.h>
103 #include <netinet6/raw_ip6.h>
104 #include <netinet6/scope6_var.h>
105 #include <netinet6/vinet6.h>
108 #include <netipsec/ipsec.h>
109 #include <netipsec/ipsec6.h>
112 #include <machine/stdarg.h>
114 #define satosin6(sa) ((struct sockaddr_in6 *)(sa))
115 #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))
118 * Raw interface to IP6 protocol.
121 #ifdef VIMAGE_GLOBALS
122 extern struct inpcbhead ripcb;
123 extern struct inpcbinfo ripcbinfo;
124 struct rip6stat rip6stat;
127 extern u_long rip_sendspace;
128 extern u_long rip_recvspace;
131 * Hooks for multicast forwarding.
133 struct socket *ip6_mrouter = NULL;
134 int (*ip6_mrouter_set)(struct socket *, struct sockopt *);
135 int (*ip6_mrouter_get)(struct socket *, struct sockopt *);
136 int (*ip6_mrouter_done)(void);
137 int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *);
138 int (*mrt6_ioctl)(int, caddr_t);
141 * Setup generic address and protocol structures for raw_input routine, then
142 * pass them along with mbuf chain.
145 rip6_input(struct mbuf **mp, int *offp, int proto)
147 INIT_VNET_INET(curvnet);
148 INIT_VNET_INET6(curvnet);
150 INIT_VNET_IPSEC(curvnet);
152 struct mbuf *m = *mp;
153 register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
154 register struct inpcb *in6p;
155 struct inpcb *last = 0;
156 struct mbuf *opts = NULL;
157 struct sockaddr_in6 fromsa;
159 V_rip6stat.rip6s_ipackets++;
161 if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
162 /* XXX Send icmp6 host/port unreach? */
164 return (IPPROTO_DONE);
167 init_sin6(&fromsa, m); /* general init */
169 INP_INFO_RLOCK(&V_ripcbinfo);
170 LIST_FOREACH(in6p, &V_ripcb, inp_list) {
171 /* XXX inp locking */
172 if ((in6p->inp_vflag & INP_IPV6) == 0)
174 if (in6p->inp_ip_p &&
175 in6p->inp_ip_p != proto)
177 if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
178 !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
180 if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
181 !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
183 if (prison_check_ip6(in6p->inp_cred, &ip6->ip6_dst) != 0)
186 if (in6p->in6p_cksum != -1) {
187 V_rip6stat.rip6s_isum++;
188 if (in6_cksum(m, proto, *offp,
189 m->m_pkthdr.len - *offp)) {
191 V_rip6stat.rip6s_badsum++;
196 struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
200 * Check AH/ESP integrity.
202 if (n && ipsec6_in_reject(n, last)) {
204 V_ipsec6stat.in_polvio++;
205 /* Do not inject data into pcb. */
209 if (last->inp_flags & INP_CONTROLOPTS ||
210 last->inp_socket->so_options & SO_TIMESTAMP)
211 ip6_savecontrol(last, n, &opts);
212 /* strip intermediate headers */
214 if (sbappendaddr(&last->inp_socket->so_rcv,
215 (struct sockaddr *)&fromsa,
220 V_rip6stat.rip6s_fullsock++;
222 sorwakeup(last->inp_socket);
229 INP_INFO_RUNLOCK(&V_ripcbinfo);
232 * Check AH/ESP integrity.
234 if ((last != NULL) && ipsec6_in_reject(m, last)) {
236 V_ipsec6stat.in_polvio++;
237 V_ip6stat.ip6s_delivered--;
238 /* Do not inject data into pcb. */
243 if (last->inp_flags & INP_CONTROLOPTS ||
244 last->inp_socket->so_options & SO_TIMESTAMP)
245 ip6_savecontrol(last, m, &opts);
246 /* Strip intermediate headers. */
248 if (sbappendaddr(&last->inp_socket->so_rcv,
249 (struct sockaddr *)&fromsa, m, opts) == 0) {
253 V_rip6stat.rip6s_fullsock++;
255 sorwakeup(last->inp_socket);
258 V_rip6stat.rip6s_nosock++;
259 if (m->m_flags & M_MCAST)
260 V_rip6stat.rip6s_nosockmcast++;
261 if (proto == IPPROTO_NONE)
264 char *prvnxtp = ip6_get_prevhdr(m, *offp); /* XXX */
265 icmp6_error(m, ICMP6_PARAM_PROB,
266 ICMP6_PARAMPROB_NEXTHEADER,
267 prvnxtp - mtod(m, char *));
269 V_ip6stat.ip6s_delivered--;
271 return (IPPROTO_DONE);
275 rip6_ctlinput(int cmd, struct sockaddr *sa, void *d)
277 INIT_VNET_INET(curvnet);
281 struct ip6ctlparam *ip6cp = NULL;
282 const struct sockaddr_in6 *sa6_src = NULL;
284 struct inpcb *(*notify)(struct inpcb *, int) = in6_rtchange;
286 if (sa->sa_family != AF_INET6 ||
287 sa->sa_len != sizeof(struct sockaddr_in6))
290 if ((unsigned)cmd >= PRC_NCMDS)
292 if (PRC_IS_REDIRECT(cmd))
293 notify = in6_rtchange, d = NULL;
294 else if (cmd == PRC_HOSTDEAD)
296 else if (inet6ctlerrmap[cmd] == 0)
300 * If the parameter is from icmp6, decode it.
303 ip6cp = (struct ip6ctlparam *)d;
305 ip6 = ip6cp->ip6c_ip6;
306 off = ip6cp->ip6c_off;
307 cmdarg = ip6cp->ip6c_cmdarg;
308 sa6_src = ip6cp->ip6c_src;
316 (void) in6_pcbnotify(&V_ripcbinfo, sa, 0,
317 (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
321 * Generate IPv6 header and pass packet to ip6_output. Tack on options user
322 * may have setup with control call.
326 rip6_output(struct mbuf *m, ...)
328 rip6_output(m, va_alist)
333 INIT_VNET_INET6(curvnet);
334 struct mbuf *control;
336 struct sockaddr_in6 *dstsock;
337 struct in6_addr *dst;
340 u_int plen = m->m_pkthdr.len;
342 struct ip6_pktopts opt, *optp;
343 struct ifnet *oifp = NULL;
344 int type = 0, code = 0; /* for ICMPv6 output statistics only */
345 int scope_ambiguous = 0;
346 struct in6_addr *in6a;
350 so = va_arg(ap, struct socket *);
351 dstsock = va_arg(ap, struct sockaddr_in6 *);
352 control = va_arg(ap, struct mbuf *);
355 in6p = sotoinpcb(so);
358 dst = &dstsock->sin6_addr;
359 if (control != NULL) {
360 if ((error = ip6_setpktopts(control, &opt,
361 in6p->in6p_outputopts, so->so_cred,
362 so->so_proto->pr_protocol)) != 0) {
367 optp = in6p->in6p_outputopts;
370 * Check and convert scope zone ID into internal form.
372 * XXX: we may still need to determine the zone later.
374 if (!(so->so_state & SS_ISCONNECTED)) {
375 if (dstsock->sin6_scope_id == 0 && !V_ip6_use_defzone)
377 if ((error = sa6_embedscope(dstsock, V_ip6_use_defzone)) != 0)
382 * For an ICMPv6 packet, we should know its type and code to update
385 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
386 struct icmp6_hdr *icmp6;
387 if (m->m_len < sizeof(struct icmp6_hdr) &&
388 (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) {
392 icmp6 = mtod(m, struct icmp6_hdr *);
393 type = icmp6->icmp6_type;
394 code = icmp6->icmp6_code;
397 M_PREPEND(m, sizeof(*ip6), M_DONTWAIT);
402 ip6 = mtod(m, struct ip6_hdr *);
405 * Source address selection.
407 if ((in6a = in6_selectsrc(dstsock, optp, in6p, NULL, so->so_cred,
408 &oifp, &error)) == NULL) {
410 error = EADDRNOTAVAIL;
413 error = prison_get_ip6(in6p->inp_cred, in6a);
416 ip6->ip6_src = *in6a;
418 if (oifp && scope_ambiguous) {
420 * Application should provide a proper zone ID or the use of
421 * default zone IDs should be enabled. Unfortunately, some
422 * applications do not behave as it should, so we need a
423 * workaround. Even if an appropriate ID is not determined
424 * (when it's required), if we can determine the outgoing
425 * interface. determine the zone ID based on the interface.
427 error = in6_setscope(&dstsock->sin6_addr, oifp, NULL);
431 ip6->ip6_dst = dstsock->sin6_addr;
434 * Fill in the rest of the IPv6 header fields.
436 ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
437 (in6p->inp_flow & IPV6_FLOWINFO_MASK);
438 ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
439 (IPV6_VERSION & IPV6_VERSION_MASK);
442 * ip6_plen will be filled in ip6_output, so not fill it here.
444 ip6->ip6_nxt = in6p->inp_ip_p;
445 ip6->ip6_hlim = in6_selecthlim(in6p, oifp);
447 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 ||
448 in6p->in6p_cksum != -1) {
453 /* Compute checksum. */
454 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
455 off = offsetof(struct icmp6_hdr, icmp6_cksum);
457 off = in6p->in6p_cksum;
458 if (plen < off + 1) {
462 off += sizeof(struct ip6_hdr);
465 while (n && n->m_len <= off) {
471 p = (u_int16_t *)(mtod(n, caddr_t) + off);
473 *p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen);
476 error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, &oifp, in6p);
477 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
479 icmp6_ifoutstat_inc(oifp, type, code);
480 V_icmp6stat.icp6s_outhist[type]++;
482 V_rip6stat.rip6s_opackets++;
491 if (control != NULL) {
492 ip6_clearpktopts(&opt, -1);
500 * Raw IPv6 socket option processing.
503 rip6_ctloutput(struct socket *so, struct sockopt *sopt)
507 if (sopt->sopt_level == IPPROTO_ICMPV6)
509 * XXX: is it better to call icmp6_ctloutput() directly
512 return (icmp6_ctloutput(so, sopt));
513 else if (sopt->sopt_level != IPPROTO_IPV6)
518 switch (sopt->sopt_dir) {
520 switch (sopt->sopt_name) {
528 error = ip6_mrouter_get ? ip6_mrouter_get(so, sopt) :
532 error = ip6_raw_ctloutput(so, sopt);
535 error = ip6_ctloutput(so, sopt);
541 switch (sopt->sopt_name) {
549 error = ip6_mrouter_set ? ip6_mrouter_set(so, sopt) :
553 error = ip6_raw_ctloutput(so, sopt);
556 error = ip6_ctloutput(so, sopt);
566 rip6_attach(struct socket *so, int proto, struct thread *td)
568 INIT_VNET_INET(so->so_vnet);
570 struct icmp6_filter *filter;
574 KASSERT(inp == NULL, ("rip6_attach: inp != NULL"));
576 error = priv_check(td, PRIV_NETINET_RAW);
579 error = soreserve(so, rip_sendspace, rip_recvspace);
582 filter = malloc(sizeof(struct icmp6_filter), M_PCB, M_NOWAIT);
585 INP_INFO_WLOCK(&V_ripcbinfo);
586 error = in_pcballoc(so, &V_ripcbinfo);
588 INP_INFO_WUNLOCK(&V_ripcbinfo);
592 inp = (struct inpcb *)so->so_pcb;
593 INP_INFO_WUNLOCK(&V_ripcbinfo);
594 inp->inp_vflag |= INP_IPV6;
595 inp->inp_ip_p = (long)proto;
596 inp->in6p_hops = -1; /* use kernel default */
597 inp->in6p_cksum = -1;
598 inp->in6p_icmp6filt = filter;
599 ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt);
605 rip6_detach(struct socket *so)
607 INIT_VNET_INET(so->so_vnet);
611 KASSERT(inp != NULL, ("rip6_detach: inp == NULL"));
613 if (so == ip6_mrouter && ip6_mrouter_done)
616 INP_INFO_WLOCK(&V_ripcbinfo);
618 free(inp->in6p_icmp6filt, M_PCB);
621 INP_INFO_WUNLOCK(&V_ripcbinfo);
624 /* XXXRW: This can't ever be called. */
626 rip6_abort(struct socket *so)
631 KASSERT(inp != NULL, ("rip6_abort: inp == NULL"));
633 soisdisconnected(so);
637 rip6_close(struct socket *so)
642 KASSERT(inp != NULL, ("rip6_close: inp == NULL"));
644 soisdisconnected(so);
648 rip6_disconnect(struct socket *so)
653 KASSERT(inp != NULL, ("rip6_disconnect: inp == NULL"));
655 if ((so->so_state & SS_ISCONNECTED) == 0)
657 inp->in6p_faddr = in6addr_any;
663 rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
665 INIT_VNET_NET(so->so_vnet);
666 INIT_VNET_INET(so->so_vnet);
667 INIT_VNET_INET6(so->so_vnet);
669 struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
670 struct ifaddr *ia = NULL;
674 KASSERT(inp != NULL, ("rip6_bind: inp == NULL"));
676 if (nam->sa_len != sizeof(*addr))
678 if ((error = prison_check_ip6(td->td_ucred, &addr->sin6_addr)) != 0)
680 if (TAILQ_EMPTY(&V_ifnet) || addr->sin6_family != AF_INET6)
681 return (EADDRNOTAVAIL);
682 if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0)
685 if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) &&
686 (ia = ifa_ifwithaddr((struct sockaddr *)addr)) == 0)
687 return (EADDRNOTAVAIL);
689 ((struct in6_ifaddr *)ia)->ia6_flags &
690 (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|
691 IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) {
692 return (EADDRNOTAVAIL);
694 INP_INFO_WLOCK(&V_ripcbinfo);
696 inp->in6p_laddr = addr->sin6_addr;
698 INP_INFO_WUNLOCK(&V_ripcbinfo);
703 rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
705 INIT_VNET_NET(so->so_vnet);
706 INIT_VNET_INET(so->so_vnet);
707 INIT_VNET_INET6(so->so_vnet);
709 struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
710 struct in6_addr *in6a = NULL;
711 struct ifnet *ifp = NULL;
712 int error = 0, scope_ambiguous = 0;
715 KASSERT(inp != NULL, ("rip6_connect: inp == NULL"));
717 if (nam->sa_len != sizeof(*addr))
719 if (TAILQ_EMPTY(&V_ifnet))
720 return (EADDRNOTAVAIL);
721 if (addr->sin6_family != AF_INET6)
722 return (EAFNOSUPPORT);
725 * Application should provide a proper zone ID or the use of default
726 * zone IDs should be enabled. Unfortunately, some applications do
727 * not behave as it should, so we need a workaround. Even if an
728 * appropriate ID is not determined, we'll see if we can determine
729 * the outgoing interface. If we can, determine the zone ID based on
730 * the interface below.
732 if (addr->sin6_scope_id == 0 && !V_ip6_use_defzone)
734 if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0)
737 INP_INFO_WLOCK(&V_ripcbinfo);
739 /* Source address selection. XXX: need pcblookup? */
740 in6a = in6_selectsrc(addr, inp->in6p_outputopts,
741 inp, NULL, so->so_cred,
745 INP_INFO_WUNLOCK(&V_ripcbinfo);
746 return (error ? error : EADDRNOTAVAIL);
750 if (ifp && scope_ambiguous &&
751 (error = in6_setscope(&addr->sin6_addr, ifp, NULL)) != 0) {
753 INP_INFO_WUNLOCK(&V_ripcbinfo);
756 inp->in6p_faddr = addr->sin6_addr;
757 inp->in6p_laddr = *in6a;
760 INP_INFO_WUNLOCK(&V_ripcbinfo);
765 rip6_shutdown(struct socket *so)
770 KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL"));
779 rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
780 struct mbuf *control, struct thread *td)
783 struct sockaddr_in6 tmp;
784 struct sockaddr_in6 *dst;
788 KASSERT(inp != NULL, ("rip6_send: inp == NULL"));
790 /* Always copy sockaddr to avoid overwrites. */
792 if (so->so_state & SS_ISCONNECTED) {
798 bzero(&tmp, sizeof(tmp));
799 tmp.sin6_family = AF_INET6;
800 tmp.sin6_len = sizeof(struct sockaddr_in6);
802 bcopy(&inp->in6p_faddr, &tmp.sin6_addr,
803 sizeof(struct in6_addr));
811 if (nam->sa_len != sizeof(struct sockaddr_in6)) {
815 tmp = *(struct sockaddr_in6 *)nam;
818 if (dst->sin6_family == AF_UNSPEC) {
820 * XXX: we allow this case for backward
821 * compatibility to buggy applications that
822 * rely on old (and wrong) kernel behavior.
824 log(LOG_INFO, "rip6 SEND: address family is "
825 "unspec. Assume AF_INET6\n");
826 dst->sin6_family = AF_INET6;
827 } else if (dst->sin6_family != AF_INET6) {
829 return(EAFNOSUPPORT);
832 ret = rip6_output(m, so, dst, control);
836 struct pr_usrreqs rip6_usrreqs = {
837 .pru_abort = rip6_abort,
838 .pru_attach = rip6_attach,
839 .pru_bind = rip6_bind,
840 .pru_connect = rip6_connect,
841 .pru_control = in6_control,
842 .pru_detach = rip6_detach,
843 .pru_disconnect = rip6_disconnect,
844 .pru_peeraddr = in6_getpeeraddr,
845 .pru_send = rip6_send,
846 .pru_shutdown = rip6_shutdown,
847 .pru_sockaddr = in6_getsockaddr,
848 .pru_close = rip6_close,